From 649655d870c8ff3cbb1cc45eec8eaf6e241e2382 Mon Sep 17 00:00:00 2001 From: desertaxle Date: Fri, 15 Mar 2024 13:51:17 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20docs=20from=20@=20PrefectHQ/pr?= =?UTF-8?q?efect-gcp@b88d61933415a8f94be8d445498a1bf2bc6ccfb1=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 404.html | 2 +- aiplatform/index.html | 163 +++++++++-------- bigquery/index.html | 2 +- blocks_catalog/index.html | 2 +- cloud_run/index.html | 321 +++++++++++++++++---------------- cloud_run_worker/index.html | 2 +- cloud_run_worker_v2/index.html | 2 +- cloud_storage/index.html | 2 +- contributing/index.html | 2 +- credentials/index.html | 2 +- deployments/steps/index.html | 2 +- examples_catalog/index.html | 316 ++++++++++++++++---------------- gcp-worker-guide/index.html | 2 +- index.html | 2 +- search/search_index.json | 2 +- secret_manager/index.html | 2 +- sitemap.xml | 30 +-- sitemap.xml.gz | Bin 352 -> 352 bytes vertex_worker/index.html | 2 +- 19 files changed, 448 insertions(+), 410 deletions(-) diff --git a/404.html b/404.html index 1a36947..72ba69a 100644 --- a/404.html +++ b/404.html @@ -12,7 +12,7 @@ - + diff --git a/aiplatform/index.html b/aiplatform/index.html index 46970b6..014b435 100644 --- a/aiplatform/index.html +++ b/aiplatform/index.html @@ -18,7 +18,7 @@ - + @@ -1040,7 +1040,10 @@

-

+

DEPRECATION WARNING:

+

This module is deprecated as of March 2024 and will not be available after September 2024. +It has been replaced by the Vertex AI worker, which offers enhanced functionality and better performance.

+

For upgrade instructions, see https://docs.prefect.io/latest/guides/upgrade-guide-agents-to-workers/.

Integrations with Google AI Platform.

Note this module is experimental. The intefaces within may change without notice.

Examples:

@@ -1120,13 +1123,7 @@

Source code in prefect_gcp/aiplatform.py -
110
-111
-112
-113
-114
-115
-116
+              
116
 117
 118
 119
@@ -1574,7 +1571,29 @@ 

561 562 563 -564

class VertexAICustomTrainingJob(Infrastructure):
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
@deprecated_class(
+    start_date="Mar 2024",
+    help=(
+        "Use the Vertex AI worker instead."
+        " Refer to the upgrade guide for more information:"
+        " https://docs.prefect.io/latest/guides/upgrade-guide-agents-to-workers/."
+    ),
+)
+class VertexAICustomTrainingJob(Infrastructure):
     """
     Infrastructure block used to run Vertex AI custom training jobs.
     """
@@ -2090,21 +2109,7 @@ 
Source code in prefect_gcp/aiplatform.py -
261
-262
-263
-264
-265
-266
-267
-268
-269
-270
-271
-272
-273
-274
-275
+            
275
 276
 277
 278
@@ -2148,7 +2153,21 @@ 
316 317 318 -319
async def generate_work_pool_base_job_template(self) -> dict:
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
async def generate_work_pool_base_job_template(self) -> dict:
     """
     Generate a base job template for a `Vertex AI` work pool with the same
     configuration as this block.
@@ -2230,9 +2249,9 @@ 
Source code in prefect_gcp/aiplatform.py -
257
-258
-259
def get_corresponding_worker_type(self) -> str:
+            
271
+272
+273
def get_corresponding_worker_type(self) -> str:
     """Return the corresponding worker type for this infrastructure block."""
     return "vertex-ai"
 
@@ -2317,21 +2336,7 @@
Source code in prefect_gcp/aiplatform.py -
510
-511
-512
-513
-514
-515
-516
-517
-518
-519
-520
-521
-522
-523
-524
+            
524
 525
 526
 527
@@ -2340,7 +2345,21 @@ 
530 531 532 -533
@sync_compatible
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
@sync_compatible
 async def kill(self, identifier: str, grace_seconds: int = 30) -> None:
     """
     Kill a job running Cloud Run.
@@ -2387,15 +2406,15 @@ 
Source code in prefect_gcp/aiplatform.py -
247
-248
-249
-250
-251
-252
-253
-254
-255
def preview(self) -> str:
+            
261
+262
+263
+264
+265
+266
+267
+268
+269
def preview(self) -> str:
     """Generate a preview of the job definition that will be sent to GCP."""
     job_spec = self._build_job_spec()
     custom_job = CustomJob(
@@ -2485,21 +2504,7 @@ 
Source code in prefect_gcp/aiplatform.py -
461
-462
-463
-464
-465
-466
-467
-468
-469
-470
-471
-472
-473
-474
-475
+            
475
 476
 477
 478
@@ -2532,7 +2537,21 @@ 
505 506 507 -508
@sync_compatible
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
@sync_compatible
 async def run(
     self, task_status: Optional["TaskStatus"] = None
 ) -> VertexAICustomTrainingJobResult:
@@ -2615,8 +2634,8 @@ 

Source code in prefect_gcp/aiplatform.py -
106
-107
class VertexAICustomTrainingJobResult(InfrastructureResult):
+              
112
+113
class VertexAICustomTrainingJobResult(InfrastructureResult):
     """Result from a Vertex AI custom training job."""
 
diff --git a/bigquery/index.html b/bigquery/index.html index 43662a3..60c9b94 100644 --- a/bigquery/index.html +++ b/bigquery/index.html @@ -18,7 +18,7 @@ - + diff --git a/blocks_catalog/index.html b/blocks_catalog/index.html index 9b95757..3b36386 100644 --- a/blocks_catalog/index.html +++ b/blocks_catalog/index.html @@ -18,7 +18,7 @@ - + diff --git a/cloud_run/index.html b/cloud_run/index.html index da1b43f..910a546 100644 --- a/cloud_run/index.html +++ b/cloud_run/index.html @@ -18,7 +18,7 @@ - + @@ -1358,7 +1358,10 @@

-

+

DEPRECATION WARNING:

+

This module is deprecated as of March 2024 and will not be available after September 2024. +It has been replaced by the Cloud Run and Cloud Run V2 workers, which offer enhanced functionality and better performance.

+

For upgrade instructions, see https://docs.prefect.io/latest/guides/upgrade-guide-agents-to-workers/.

Integrations with Google Cloud Run Job.

Note this module is experimental. The intefaces within may change without notice.

Examples:

@@ -1417,13 +1420,7 @@

Source code in prefect_gcp/cloud_run.py -
213
-214
-215
-216
-217
-218
-219
+              
219
 220
 221
 222
@@ -2043,7 +2040,29 @@ 

836 837 838 -839

class CloudRunJob(Infrastructure):
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
@deprecated_class(
+    start_date="Mar 2024",
+    help=(
+        "Use the Cloud Run or Cloud Run v2 worker instead."
+        " Refer to the upgrade guide for more information:"
+        " https://docs.prefect.io/latest/guides/upgrade-guide-agents-to-workers/."
+    ),
+)
+class CloudRunJob(Infrastructure):
     """
     <span class="badge-api experimental"/>
 
@@ -2775,21 +2794,7 @@ 
Source code in prefect_gcp/cloud_run.py -
365
-366
-367
-368
-369
-370
-371
-372
-373
-374
-375
-376
-377
-378
-379
+            
379
 380
 381
 382
@@ -2844,7 +2849,21 @@ 
431 432 433 -434
async def generate_work_pool_base_job_template(self) -> dict:
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
async def generate_work_pool_base_job_template(self) -> dict:
     """
     Generate a base job template for a cloud-run work pool with the same
     configuration as this block.
@@ -2937,9 +2956,9 @@ 
Source code in prefect_gcp/cloud_run.py -
361
-362
-363
def get_corresponding_worker_type(self) -> str:
+            
375
+376
+377
def get_corresponding_worker_type(self) -> str:
     """Return the corresponding worker type for this infrastructure block."""
     return "cloud-run"
 
@@ -3000,21 +3019,7 @@
Source code in prefect_gcp/cloud_run.py -
502
-503
-504
-505
-506
-507
-508
-509
-510
-511
-512
-513
-514
-515
-516
+            
516
 517
 518
 519
@@ -3022,7 +3027,21 @@ 
521 522 523 -524
@sync_compatible
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
@sync_compatible
 async def kill(self, identifier: str, grace_seconds: int = 30) -> None:
     """
     Kill a task running Cloud Run.
@@ -3068,18 +3087,18 @@ 
Source code in prefect_gcp/cloud_run.py -
701
-702
-703
-704
-705
-706
-707
-708
-709
-710
-711
-712
def preview(self) -> str:
+            
715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
def preview(self) -> str:
     """Generate a preview of the job definition that will be sent to GCP."""
     body = self._jobs_body()
     container_settings = body["spec"]["template"]["spec"]["template"]["spec"][
@@ -3118,27 +3137,27 @@ 
Source code in prefect_gcp/cloud_run.py -
480
-481
-482
-483
-484
-485
-486
-487
-488
-489
-490
-491
-492
-493
-494
+            
494
 495
 496
 497
 498
 499
-500
@sync_compatible
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
@sync_compatible
 async def run(self, task_status: Optional[TaskStatus] = None):
     """Run the configured job on a Google Cloud Run Job."""
     with self._get_client() as client:
@@ -3194,8 +3213,8 @@ 

Source code in prefect_gcp/cloud_run.py -
209
-210
class CloudRunJobResult(InfrastructureResult):
+              
215
+216
class CloudRunJobResult(InfrastructureResult):
     """Result from a Cloud Run Job."""
 
@@ -3226,13 +3245,7 @@

Source code in prefect_gcp/cloud_run.py -
157
-158
-159
-160
-161
-162
-163
+              
163
 164
 165
 166
@@ -3275,7 +3288,13 @@ 

203 204 205 -206

class Execution(BaseModel):
+206
+207
+208
+209
+210
+211
+212
class Execution(BaseModel):
     """
     Utility class to call GCP `executions` API and
     interact with the returned objects.
@@ -3357,11 +3376,11 @@ 
Source code in prefect_gcp/cloud_run.py -
174
-175
-176
-177
-178
def condition_after_completion(self):
+            
180
+181
+182
+183
+184
def condition_after_completion(self):
     """Returns Execution condition if Execution has completed."""
     for condition in self.status["conditions"]:
         if condition["type"] == "Completed":
@@ -3394,13 +3413,7 @@ 
Source code in prefect_gcp/cloud_run.py -
188
-189
-190
-191
-192
-193
-194
+            
194
 195
 196
 197
@@ -3412,7 +3425,13 @@ 
203 204 205 -206
@classmethod
+206
+207
+208
+209
+210
+211
+212
@classmethod
 def get(cls, client: Resource, namespace: str, execution_name: str):
     """
     Make a get request to the GCP executions API
@@ -3454,9 +3473,9 @@ 
Source code in prefect_gcp/cloud_run.py -
170
-171
-172
def is_running(self) -> bool:
+            
176
+177
+178
def is_running(self) -> bool:
     """Returns True if Execution is not completed."""
     return self.status.get("completionTime") is None
 
@@ -3482,13 +3501,13 @@
Source code in prefect_gcp/cloud_run.py -
180
-181
-182
-183
-184
-185
-186
def succeeded(self):
+            
186
+187
+188
+189
+190
+191
+192
def succeeded(self):
     """Whether or not the Execution completed is a successful state."""
     completed_condition = self.condition_after_completion()
     if completed_condition and completed_condition["status"] == "True":
@@ -3531,13 +3550,7 @@ 

Source code in prefect_gcp/cloud_run.py -
 64
- 65
- 66
- 67
- 68
- 69
- 70
+              
 70
  71
  72
  73
@@ -3621,7 +3634,13 @@ 

151 152 153 -154

class Job(BaseModel):
+154
+155
+156
+157
+158
+159
+160
class Job(BaseModel):
     """
     Utility class to call GCP `jobs` API and
     interact with the returned objects.
@@ -3748,12 +3767,12 @@ 
Source code in prefect_gcp/cloud_run.py -
135
-136
-137
-138
-139
-140
@staticmethod
+            
141
+142
+143
+144
+145
+146
@staticmethod
 def create(client: Resource, namespace: str, body: dict):
     """Make a create request to the GCP jobs API."""
     request = client.jobs().create(parent=f"namespaces/{namespace}", body=body)
@@ -3786,12 +3805,12 @@ 
Source code in prefect_gcp/cloud_run.py -
142
-143
-144
-145
-146
-147
@staticmethod
+            
148
+149
+150
+151
+152
+153
@staticmethod
 def delete(client: Resource, namespace: str, job_name: str):
     """Make a delete request to the GCP jobs API."""
     request = client.jobs().delete(name=f"namespaces/{namespace}/jobs/{job_name}")
@@ -3824,20 +3843,20 @@ 
Source code in prefect_gcp/cloud_run.py -
120
-121
-122
-123
-124
-125
-126
+            
126
 127
 128
 129
 130
 131
 132
-133
@classmethod
+133
+134
+135
+136
+137
+138
+139
@classmethod
 def get(cls, client: Resource, namespace: str, job_name: str):
     """Make a get request to the GCP jobs API and return a Job instance."""
     request = client.jobs().get(name=f"namespaces/{namespace}/jobs/{job_name}")
@@ -3874,12 +3893,12 @@ 
Source code in prefect_gcp/cloud_run.py -
 95
- 96
- 97
- 98
- 99
-100
def has_execution_in_progress(self) -> bool:
+            
101
+102
+103
+104
+105
+106
def has_execution_in_progress(self) -> bool:
     """See if job has a run in progress."""
     return (
         self.execution_status == {}
@@ -3908,11 +3927,11 @@ 
Source code in prefect_gcp/cloud_run.py -
89
-90
-91
-92
-93
def is_ready(self) -> bool:
+            
95
+96
+97
+98
+99
def is_ready(self) -> bool:
     """Whether a job is finished registering and ready to be executed"""
     if self._is_missing_container():
         raise Exception(f"{self.ready_condition['message']}")
@@ -3944,12 +3963,12 @@ 
Source code in prefect_gcp/cloud_run.py -
149
-150
-151
-152
-153
-154
@staticmethod
+            
155
+156
+157
+158
+159
+160
@staticmethod
 def run(client: Resource, namespace: str, job_name: str):
     """Make a run request to the GCP jobs API."""
     request = client.jobs().run(name=f"namespaces/{namespace}/jobs/{job_name}")
diff --git a/cloud_run_worker/index.html b/cloud_run_worker/index.html
index 9edb698..eca0091 100644
--- a/cloud_run_worker/index.html
+++ b/cloud_run_worker/index.html
@@ -18,7 +18,7 @@
       
       
       
-      
+      
     
     
       
diff --git a/cloud_run_worker_v2/index.html b/cloud_run_worker_v2/index.html
index 14bb9f7..40d812a 100644
--- a/cloud_run_worker_v2/index.html
+++ b/cloud_run_worker_v2/index.html
@@ -18,7 +18,7 @@
       
       
       
-      
+      
     
     
       
diff --git a/cloud_storage/index.html b/cloud_storage/index.html
index 722fd2c..89a47a1 100644
--- a/cloud_storage/index.html
+++ b/cloud_storage/index.html
@@ -18,7 +18,7 @@
       
       
       
-      
+      
     
     
       
diff --git a/contributing/index.html b/contributing/index.html
index 56a6b4f..33a49eb 100644
--- a/contributing/index.html
+++ b/contributing/index.html
@@ -18,7 +18,7 @@
       
       
       
-      
+      
     
     
       
diff --git a/credentials/index.html b/credentials/index.html
index 054aa53..020e981 100644
--- a/credentials/index.html
+++ b/credentials/index.html
@@ -18,7 +18,7 @@
       
       
       
-      
+      
     
     
       
diff --git a/deployments/steps/index.html b/deployments/steps/index.html
index 761d4d4..ccd981c 100644
--- a/deployments/steps/index.html
+++ b/deployments/steps/index.html
@@ -18,7 +18,7 @@
       
       
       
-      
+      
     
     
       
diff --git a/examples_catalog/index.html b/examples_catalog/index.html
index 51bcdb8..804322e 100644
--- a/examples_catalog/index.html
+++ b/examples_catalog/index.html
@@ -18,7 +18,7 @@
       
       
       
-      
+      
     
     
       
@@ -851,6 +851,28 @@ 

''' warehouse.execute(operation, parameters={"limit": 5})

+
from prefect import flow
+from prefect_gcp import GcpCredentials
+from prefect_gcp.bigquery import bigquery_insert_stream
+from google.cloud.bigquery import SchemaField
+
+@flow
+def example_bigquery_insert_stream_flow():
+    gcp_credentials = GcpCredentials(project="project")
+    records = [
+        {"number": 1, "text": "abc", "bool": True},
+        {"number": 2, "text": "def", "bool": False},
+    ]
+    result = bigquery_insert_stream(
+        dataset="integrations",
+        table="test_table",
+        records=records,
+        gcp_credentials=gcp_credentials
+    )
+    return result
+
+example_bigquery_insert_stream_flow()
+
Queries the public names database, returning 10 results.
+
from prefect import flow
+from prefect_gcp import GcpCredentials
+from prefect_gcp.bigquery import bigquery_load_file
 from google.cloud.bigquery import SchemaField
 
 @flow
-def example_bigquery_insert_stream_flow():
+def example_bigquery_load_file_flow():
     gcp_credentials = GcpCredentials(project="project")
-    records = [
-        {"number": 1, "text": "abc", "bool": True},
-        {"number": 2, "text": "def", "bool": False},
-    ]
-    result = bigquery_insert_stream(
-        dataset="integrations",
+    result = bigquery_load_file(
+        dataset="dataset",
         table="test_table",
-        records=records,
+        path="path",
         gcp_credentials=gcp_credentials
     )
     return result
 
-example_bigquery_insert_stream_flow()
+example_bigquery_load_file_flow()
 
Create mytable in mydataset and insert two rows into it:
-
from prefect import flow
-from prefect_gcp import GcpCredentials
-from prefect_gcp.bigquery import bigquery_load_cloud_storage
-
-@flow
-def example_bigquery_load_cloud_storage_flow():
-    gcp_credentials = GcpCredentials(project="project")
-    result = bigquery_load_cloud_storage(
-        dataset="dataset",
-        table="test_table",
-        uri="uri",
-        gcp_credentials=gcp_credentials
-    )
-    return result
-
-example_bigquery_load_cloud_storage_flow()
-
-Execute operation with parameters, fetching all rows: +Execute operation with parameters, fetching one new row at a time:
-Execute operation with parameters, fetching two new rows at a time: +Execute operation with parameters, fetching all rows: -Execute operation with parameters, fetching one new row at a time: +Execute operation with parameters, fetching two new rows at a time: -
from prefect import flow
-from prefect_gcp import GcpCredentials
-from prefect_gcp.bigquery import bigquery_load_file
-from google.cloud.bigquery import SchemaField
-
-@flow
-def example_bigquery_load_file_flow():
-    gcp_credentials = GcpCredentials(project="project")
-    result = bigquery_load_file(
-        dataset="dataset",
-        table="test_table",
-        path="path",
-        gcp_credentials=gcp_credentials
-    )
-    return result
-
-example_bigquery_load_file_flow()
 

Cloud Storage Module

-

Uploads blob to bucket. +

Creates a bucket named "prefect".

from prefect import flow
 from prefect_gcp import GcpCredentials
-from prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_file
+from prefect_gcp.cloud_storage import cloud_storage_create_bucket
 
 @flow()
-def example_cloud_storage_upload_blob_from_file_flow():
+def example_cloud_storage_create_bucket_flow():
     gcp_credentials = GcpCredentials(
         service_account_file="/path/to/service/account/keyfile.json")
-    blob = cloud_storage_upload_blob_from_file(
-        "/path/somewhere", "bucket", "blob", gcp_credentials)
-    return blob
+    bucket = cloud_storage_create_bucket("prefect", gcp_credentials)
 
-example_cloud_storage_upload_blob_from_file_flow()
+example_cloud_storage_create_bucket_flow()
 
-Download my_folder/notes.txt object to notes.txt. +Upload local folder my_folder to the bucket's folder my_folder.
from prefect_gcp.cloud_storage import GcsBucket
 
 gcs_bucket = GcsBucket.load("my-bucket")
-gcs_bucket.download_object_to_path("my_folder/notes.txt", "notes.txt")
+gcs_bucket.upload_from_folder("my_folder")
 
-Create a bucket. -
from prefect_gcp.cloud_storage import GcsBucket
+Downloads blob from bucket.
+
from prefect import flow
+from prefect_gcp import GcpCredentials
+from prefect_gcp.cloud_storage import cloud_storage_download_blob_as_bytes
 
-gcs_bucket = GcsBucket(bucket="my-bucket")
-gcs_bucket.create_bucket()
+@flow()
+def example_cloud_storage_download_blob_flow():
+    gcp_credentials = GcpCredentials(
+        service_account_file="/path/to/service/account/keyfile.json")
+    contents = cloud_storage_download_blob_as_bytes(
+        "bucket", "blob", gcp_credentials)
+    return contents
+
+example_cloud_storage_download_blob_flow()
 
-Download my_folder/notes.txt object to a BytesIO object. -
from io import BytesIO
-from prefect_gcp.cloud_storage import GcsBucket
+Get all folders from a bucket named "my-bucket".
+
from prefect_gcp.cloud_storage import GcsBucket
 
 gcs_bucket = GcsBucket.load("my-bucket")
-with BytesIO() as buf:
-    gcs_bucket.download_object_to_file_object("my_folder/notes.txt", buf)
+gcs_bucket.list_folders()
 

-

Download my_folder/notes.txt object to a BufferedWriter. -

    from prefect_gcp.cloud_storage import GcsBucket
-
-    gcs_bucket = GcsBucket.load("my-bucket")
-    with open("notes.txt", "wb") as f:
-        gcs_bucket.download_object_to_file_object("my_folder/notes.txt", f)
-
-Upload notes.txt to my_folder/notes.txt. +

Get all folders from a folder called years

from prefect_gcp.cloud_storage import GcsBucket
 
 gcs_bucket = GcsBucket.load("my-bucket")
-gcs_bucket.upload_from_path("notes.txt", "my_folder/notes.txt")
+gcs_bucket.list_folders("years")
 
Download my_folder to a local folder named my_folder.
from prefect_gcp.cloud_storage import GcsBucket
@@ -1078,61 +1076,59 @@ 

gcs_bucket = GcsBucket.load("my-bucket") gcs_bucket.download_folder_to_path("my_folder", "my_folder")

-Creates a bucket named "prefect". +Uploads blob to bucket.
from prefect import flow
 from prefect_gcp import GcpCredentials
-from prefect_gcp.cloud_storage import cloud_storage_create_bucket
+from prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_file
 
 @flow()
-def example_cloud_storage_create_bucket_flow():
+def example_cloud_storage_upload_blob_from_file_flow():
     gcp_credentials = GcpCredentials(
         service_account_file="/path/to/service/account/keyfile.json")
-    bucket = cloud_storage_create_bucket("prefect", gcp_credentials)
-
-example_cloud_storage_create_bucket_flow()
-
-Get the bucket object. -
from prefect_gcp.cloud_storage import GcsBucket
+    blob = cloud_storage_upload_blob_from_file(
+        "/path/somewhere", "bucket", "blob", gcp_credentials)
+    return blob
 
-gcs_bucket = GcsBucket.load("my-bucket")
-gcs_bucket.get_bucket()
+example_cloud_storage_upload_blob_from_file_flow()
 
-Copies blob from one bucket to another. +Uploads blob to bucket.
from prefect import flow
 from prefect_gcp import GcpCredentials
-from prefect_gcp.cloud_storage import cloud_storage_copy_blob
+from prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_string
 
 @flow()
-def example_cloud_storage_copy_blob_flow():
+def example_cloud_storage_upload_blob_from_string_flow():
     gcp_credentials = GcpCredentials(
         service_account_file="/path/to/service/account/keyfile.json")
-    blob = cloud_storage_copy_blob(
-        "source_bucket",
-        "dest_bucket",
-        "source_blob",
-        gcp_credentials
-    )
+    blob = cloud_storage_upload_blob_from_string(
+        "data", "bucket", "blob", gcp_credentials)
     return blob
 
-example_cloud_storage_copy_blob_flow()
+example_cloud_storage_upload_blob_from_string_flow()
 
-Get all folders from a bucket named "my-bucket". +Create a bucket. +
from prefect_gcp.cloud_storage import GcsBucket
+
+gcs_bucket = GcsBucket(bucket="my-bucket")
+gcs_bucket.create_bucket()
+
+Download my_folder/notes.txt object to notes.txt.
from prefect_gcp.cloud_storage import GcsBucket
 
 gcs_bucket = GcsBucket.load("my-bucket")
-gcs_bucket.list_folders()
-

-

Get all folders from a folder called years +gcs_bucket.download_object_to_path("my_folder/notes.txt", "notes.txt") +

+Get the bucket object.
from prefect_gcp.cloud_storage import GcsBucket
 
 gcs_bucket = GcsBucket.load("my-bucket")
-gcs_bucket.list_folders("years")
+gcs_bucket.get_bucket()
 
-Upload local folder my_folder to the bucket's folder my_folder. +Get all blobs from a folder named "prefect".
from prefect_gcp.cloud_storage import GcsBucket
 
 gcs_bucket = GcsBucket.load("my-bucket")
-gcs_bucket.upload_from_folder("my_folder")
+gcs_bucket.list_blobs("prefect")
 
Upload my_folder/notes.txt object to a BytesIO object.
-Downloads blob from bucket. -
from prefect import flow
-from prefect_gcp import GcpCredentials
-from prefect_gcp.cloud_storage import cloud_storage_download_blob_to_file
+Download my_folder/notes.txt object to a BytesIO object.
+
from io import BytesIO
+from prefect_gcp.cloud_storage import GcsBucket
 
-@flow()
-def example_cloud_storage_download_blob_flow():
-    gcp_credentials = GcpCredentials(
-        service_account_file="/path/to/service/account/keyfile.json")
-    path = cloud_storage_download_blob_to_file(
-        "bucket", "blob", "file_path", gcp_credentials)
-    return path
+gcs_bucket = GcsBucket.load("my-bucket")
+with BytesIO() as buf:
+    gcs_bucket.download_object_to_file_object("my_folder/notes.txt", buf)
+

+

Download my_folder/notes.txt object to a BufferedWriter. +

    from prefect_gcp.cloud_storage import GcsBucket
 
-example_cloud_storage_download_blob_flow()
+    gcs_bucket = GcsBucket.load("my-bucket")
+    with open("notes.txt", "wb") as f:
+        gcs_bucket.download_object_to_file_object("my_folder/notes.txt", f)
 
-Downloads blob from bucket. +Copies blob from one bucket to another.
from prefect import flow
 from prefect_gcp import GcpCredentials
-from prefect_gcp.cloud_storage import cloud_storage_download_blob_as_bytes
+from prefect_gcp.cloud_storage import cloud_storage_copy_blob
 
 @flow()
-def example_cloud_storage_download_blob_flow():
+def example_cloud_storage_copy_blob_flow():
     gcp_credentials = GcpCredentials(
         service_account_file="/path/to/service/account/keyfile.json")
-    contents = cloud_storage_download_blob_as_bytes(
-        "bucket", "blob", gcp_credentials)
-    return contents
+    blob = cloud_storage_copy_blob(
+        "source_bucket",
+        "dest_bucket",
+        "source_blob",
+        gcp_credentials
+    )
+    return blob
 
-example_cloud_storage_download_blob_flow()
+example_cloud_storage_copy_blob_flow()
 
-Uploads blob to bucket. +Downloads blob from bucket.
from prefect import flow
 from prefect_gcp import GcpCredentials
-from prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_string
+from prefect_gcp.cloud_storage import cloud_storage_download_blob_to_file
 
 @flow()
-def example_cloud_storage_upload_blob_from_string_flow():
+def example_cloud_storage_download_blob_flow():
     gcp_credentials = GcpCredentials(
         service_account_file="/path/to/service/account/keyfile.json")
-    blob = cloud_storage_upload_blob_from_string(
-        "data", "bucket", "blob", gcp_credentials)
-    return blob
+    path = cloud_storage_download_blob_to_file(
+        "bucket", "blob", "file_path", gcp_credentials)
+    return path
 
-example_cloud_storage_upload_blob_from_string_flow()
+example_cloud_storage_download_blob_flow()
 
-Get all blobs from a folder named "prefect". +Upload notes.txt to my_folder/notes.txt.
from prefect_gcp.cloud_storage import GcsBucket
 
 gcs_bucket = GcsBucket.load("my-bucket")
-gcs_bucket.list_blobs("prefect")
+gcs_bucket.upload_from_path("notes.txt", "my_folder/notes.txt")
 

Credentials Module

-

Gets a GCP Secret Manager client from a path. +

Gets a GCP Cloud Storage client from a path.

Gets a GCP Cloud Storage client from a dictionary. @@ -1236,10 +1236,10 @@

} client = GcpCredentials( service_account_info=service_account_info - ).get_secret_manager_client() + ).get_cloud_storage_client() example_get_client_flow()

-Gets a GCP BigQuery client from a path. +Gets a GCP Job Service client from a path.

-

Gets a GCP BigQuery client from a dictionary. +

Gets a GCP Cloud Storage client from a dictionary.

-Gets a GCP Cloud Storage client from a path. +Gets a GCP Secret Manager client from a path.

Gets a GCP Cloud Storage client from a dictionary. @@ -1307,10 +1308,10 @@

} client = GcpCredentials( service_account_info=service_account_info - ).get_cloud_storage_client() + ).get_secret_manager_client() example_get_client_flow()

-Gets a GCP Job Service client from a path. +Gets a GCP BigQuery client from a path.

-

Gets a GCP Cloud Storage client from a dictionary. +

Gets a GCP BigQuery client from a dictionary.

diff --git a/gcp-worker-guide/index.html b/gcp-worker-guide/index.html index 079c0f1..cc14bdc 100644 --- a/gcp-worker-guide/index.html +++ b/gcp-worker-guide/index.html @@ -18,7 +18,7 @@ - + diff --git a/index.html b/index.html index 8045440..89e2c1f 100644 --- a/index.html +++ b/index.html @@ -16,7 +16,7 @@ - + diff --git a/search/search_index.json b/search/search_index.json index 68db609..2991f46 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"prefect-gcp","text":"

prefect-gcp makes it easy to leverage the capabilities of Google Cloud Platform (GCP) in your flows, featuring support for Vertex AI, Cloud Run, BigQuery, Cloud Storage, and Secret Manager.

"},{"location":"#getting-started","title":"Getting Started","text":""},{"location":"#saving-credentials-to-a-block","title":"Saving credentials to a block","text":"

You will need to first install prefect-gcp and authenticate with a service account in order to use prefect-gcp.

prefect-gcp is able to safely save and load the service account, so they can be reused across the collection! Simply follow the steps below.

  1. Refer to the GCP service account documentation on how to create and download a service account key file.
  2. Copy the JSON contents.
  3. Create a short script, replacing the placeholders with your information.
from prefect_gcp import GcpCredentials\n\n# replace this PLACEHOLDER dict with your own service account info\nservice_account_info = {\n  \"type\": \"service_account\",\n  \"project_id\": \"PROJECT_ID\",\n  \"private_key_id\": \"KEY_ID\",\n  \"private_key\": \"-----BEGIN PRIVATE KEY-----\\nPRIVATE_KEY\\n-----END PRIVATE KEY-----\\n\",\n  \"client_email\": \"SERVICE_ACCOUNT_EMAIL\",\n  \"client_id\": \"CLIENT_ID\",\n  \"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\",\n  \"token_uri\": \"https://accounts.google.com/o/oauth2/token\",\n  \"auth_provider_x509_cert_url\": \"https://www.googleapis.com/oauth2/v1/certs\",\n  \"client_x509_cert_url\": \"https://www.googleapis.com/robot/v1/metadata/x509/SERVICE_ACCOUNT_EMAIL\"\n}\n\nGcpCredentials(\n    service_account_info=service_account_info\n).save(\"BLOCK-NAME-PLACEHOLDER\")\n

service_account_info vs service_account_file

The advantage of using service_account_info, instead of service_account_file, is that it is accessible across containers.

If service_account_file is used, the provided file path must be available in the container executing the flow.

Congrats! You can now easily load the saved block, which holds your credentials:

from prefect_gcp import GcpCredentials\nGcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n

Registering blocks

Register blocks in this module to view and edit them on Prefect Cloud:

prefect block register -m prefect_gcp\n
"},{"location":"#using-prefect-with-google-cloud-run","title":"Using Prefect with Google Cloud Run","text":"

Is your local computer or server running out of memory or taking too long to complete a job?

prefect_gcp can offers a solution by enabling you to execute your Prefect flows remotely, on-demand thru Google Cloud Run.

The following code snippets demonstrate how prefect_gcp can be used to run a job on Cloud Run, either as part of a Prefect deployment's infrastructure or within a flow.

"},{"location":"#as-infrastructure","title":"As Infrastructure","text":"

Below is a simple walkthrough for how to use Google Cloud Run as infrastructure for a deployment.

"},{"location":"#set-variables","title":"Set variables","text":"

To expedite copy/paste without the needing to update placeholders manually, update and execute the following.

export CREDENTIALS_BLOCK_NAME=\"BLOCK-NAME-PLACEHOLDER\"\nexport CLOUD_RUN_JOB_BLOCK_NAME=\"cloud-run-job-example\"\nexport CLOUD_RUN_JOB_REGION=\"us-central1\"\nexport GCS_BUCKET_BLOCK_NAME=\"cloud-run-job-bucket-example\"\nexport GCP_PROJECT_ID=$(gcloud config get-value project)\n
"},{"location":"#build-an-image","title":"Build an image","text":"

First, find an existing image within the Google Artifact Registry. Ensure it has Python and prefect-gcp[cloud_storage] installed, or follow the instructions below to set one up.

Create a Dockerfile.

FROM prefecthq/prefect:2-python3.11\nRUN pip install \"prefect-gcp[cloud_storage]\"\n

Then push to the Google Artifact Registry.

gcloud artifacts repositories create test-example-repository --repository-format=docker --location=us\ngcloud auth configure-docker us-docker.pkg.dev\ndocker build -t us-docker.pkg.dev/${GCP_PROJECT_ID}/test-example-repository/prefect-gcp:2-python3.11 .\ndocker push us-docker.pkg.dev/${GCP_PROJECT_ID}/test-example-repository/prefect-gcp:2-python3.11\n
"},{"location":"#save-an-infrastructure-and-storage-block","title":"Save an infrastructure and storage block","text":"

Save a custom infrastructure and storage block by executing the following snippet.

import os\nfrom prefect_gcp import GcpCredentials, CloudRunJob, GcsBucket\n\ngcp_credentials = GcpCredentials.load(os.environ[\"CREDENTIALS_BLOCK_NAME\"])\n\n# must be from GCR and have Python + Prefect\nimage = f\"us-docker.pkg.dev/{os.environ['GCP_PROJECT_ID']}/test-example-repository/prefect-gcp:2-python3.11\"  # noqa\n\ncloud_run_job = CloudRunJob(\n    image=image,\n    credentials=gcp_credentials,\n    region=os.environ[\"CLOUD_RUN_JOB_REGION\"],\n)\ncloud_run_job.save(os.environ[\"CLOUD_RUN_JOB_BLOCK_NAME\"], overwrite=True)\n\nbucket_name = \"cloud-run-job-bucket\"\ncloud_storage_client = gcp_credentials.get_cloud_storage_client()\ncloud_storage_client.create_bucket(bucket_name)\ngcs_bucket = GcsBucket(\n    bucket=bucket_name,\n    gcp_credentials=gcp_credentials,\n)\ngcs_bucket.save(os.environ[\"GCS_BUCKET_BLOCK_NAME\"], overwrite=True)\n
"},{"location":"#write-a-flow","title":"Write a flow","text":"

Then, use an existing flow to create a deployment with, or use the flow below if you don't have an existing flow handy.

from prefect import flow\n\n@flow(log_prints=True)\ndef cloud_run_job_flow():\n    print(\"Hello, Prefect!\")\n\nif __name__ == \"__main__\":\n    cloud_run_job_flow()\n
"},{"location":"#create-a-deployment","title":"Create a deployment","text":"

If the script was named \"cloud_run_job_script.py\", build a deployment manifest with the following command.

prefect deployment build cloud_run_job_script.py:cloud_run_job_flow \\\n    -n cloud-run-deployment \\\n    -ib cloud-run-job/${CLOUD_RUN_JOB_BLOCK_NAME} \\\n    -sb gcs-bucket/${GCS_BUCKET_BLOCK_NAME}\n

Now apply the deployment!

prefect deployment apply cloud_run_job_flow-deployment.yaml\n
"},{"location":"#test-the-deployment","title":"Test the deployment","text":"

Start up an agent in a separate terminal. The agent will poll the Prefect API for scheduled flow runs that are ready to run.

prefect agent start -q 'default'\n

Run the deployment once to test.

prefect deployment run cloud-run-job-flow/cloud-run-deployment\n

Once the flow run has completed, you will see Hello, Prefect! logged in the Prefect UI.

No class found for dispatch key

If you encounter an error message like KeyError: \"No class found for dispatch key 'cloud-run-job' in registry for type 'Block'.\", ensure prefect-gcp is installed in the environment that your agent is running!

"},{"location":"#within-flow","title":"Within Flow","text":"

You can execute commands through Cloud Run Job directly within a Prefect flow.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_run import CloudRunJob\n\n@flow\ndef cloud_run_job_flow():\n    cloud_run_job = CloudRunJob(\n        image=\"us-docker.pkg.dev/cloudrun/container/job:latest\",\n        credentials=GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\"),\n        region=\"us-central1\",\n        command=[\"echo\", \"Hello, Prefect!\"],\n    )\n    return cloud_run_job.run()\n
"},{"location":"#using-prefect-with-google-vertex-ai","title":"Using Prefect with Google Vertex AI","text":"

prefect_gcp can enable you to execute your Prefect flows remotely, on-demand using Google Vertex AI too!

Be sure to additionally install the AI Platform extra!

Setting up a Vertex AI job is extremely similar to setting up a Cloud Run Job, but replace CloudRunJob with the following snippet.

from prefect_gcp import GcpCredentials, VertexAICustomTrainingJob, GcsBucket\n\ngcp_credentials = GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n\nvertex_ai_job = VertexAICustomTrainingJob(\n    image=\"IMAGE-NAME-PLACEHOLDER\",  # must be from GCR and have Python + Prefect\n    credentials=gcp_credentials,\n    region=\"us-central1\",\n)\nvertex_ai_job.save(\"test-example\")\n

Cloud Run Job vs Vertex AI

With Vertex AI, you can allocate computational resources on-the-fly for your executions, much like Cloud Run.

However, unlike Cloud Run, you have the flexibility to provision instances with higher CPU, GPU, TPU, and RAM capacities.

Additionally, jobs can run for up to 7 days, which is significantly longer than the maximum duration allowed on Cloud Run.

"},{"location":"#using-prefect-with-google-bigquery","title":"Using Prefect with Google BigQuery","text":"

Got big data in BigQuery? prefect_gcp allows you to steadily stream data from and write to Google BigQuery within your Prefect flows!

Be sure to install prefect-gcp with the BigQuery extra!

The provided code snippet shows how you can use prefect_gcp to create a new dataset in BigQuery, define a table, insert rows, and fetch data from the table.

from prefect import flow\nfrom prefect_gcp.bigquery import GcpCredentials, BigQueryWarehouse\n\n@flow\ndef bigquery_flow():\n    all_rows = []\n    gcp_credentials = GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n\n    client = gcp_credentials.get_bigquery_client()\n    client.create_dataset(\"test_example\", exists_ok=True)\n\n    with BigQueryWarehouse(gcp_credentials=gcp_credentials) as warehouse:\n        warehouse.execute(\n            \"CREATE TABLE IF NOT EXISTS test_example.customers (name STRING, address STRING);\"\n        )\n        warehouse.execute_many(\n            \"INSERT INTO test_example.customers (name, address) VALUES (%(name)s, %(address)s);\",\n            seq_of_parameters=[\n                {\"name\": \"Marvin\", \"address\": \"Highway 42\"},\n                {\"name\": \"Ford\", \"address\": \"Highway 42\"},\n                {\"name\": \"Unknown\", \"address\": \"Highway 42\"},\n            ],\n        )\n        while True:\n            # Repeated fetch* calls using the same operation will\n            # skip re-executing and instead return the next set of results\n            new_rows = warehouse.fetch_many(\"SELECT * FROM test_example.customers\", size=2)\n            if len(new_rows) == 0:\n                break\n            all_rows.extend(new_rows)\n    return all_rows\n\nbigquery_flow()\n
"},{"location":"#using-prefect-with-google-cloud-storage","title":"Using Prefect with Google Cloud Storage","text":"

With prefect_gcp, you can have peace of mind that your Prefect flows have not only seamlessly uploaded and downloaded objects to Google Cloud Storage, but also have these actions logged.

Be sure to additionally install prefect-gcp with the Cloud Storage extra!

The provided code snippet shows how you can use prefect_gcp to upload a file to a Google Cloud Storage bucket and download the same file under a different file name.

from pathlib import Path\nfrom prefect import flow\nfrom prefect_gcp import GcpCredentials, GcsBucket\n\n\n@flow\ndef cloud_storage_flow():\n    # create a dummy file to upload\n    file_path = Path(\"test-example.txt\")\n    file_path.write_text(\"Hello, Prefect!\")\n\n    gcp_credentials = GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n    gcs_bucket = GcsBucket(\n        bucket=\"BUCKET-NAME-PLACEHOLDER\",\n        gcp_credentials=gcp_credentials\n    )\n\n    gcs_bucket_path = gcs_bucket.upload_from_path(file_path)\n    downloaded_file_path = gcs_bucket.download_object_to_path(\n        gcs_bucket_path, \"downloaded-test-example.txt\"\n    )\n    return downloaded_file_path.read_text()\n\n\ncloud_storage_flow()\n

Upload and download directories

GcsBucket supports uploading and downloading entire directories. To view examples, check out the Examples Catalog!

"},{"location":"#using-prefect-with-google-secret-manager","title":"Using Prefect with Google Secret Manager","text":"

Do you already have secrets available on Google Secret Manager? There's no need to migrate them!

prefect_gcp allows you to read and write secrets with Google Secret Manager within your Prefect flows.

Be sure to install prefect-gcp with the Secret Manager extra!

The provided code snippet shows how you can use prefect_gcp to write a secret to the Secret Manager, read the secret data, delete the secret, and finally return the secret data.

from prefect import flow\nfrom prefect_gcp import GcpCredentials, GcpSecret\n\n\n@flow\ndef secret_manager_flow():\n    gcp_credentials = GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n    gcp_secret = GcpSecret(secret_name=\"test-example\", gcp_credentials=gcp_credentials)\n    gcp_secret.write_secret(secret_data=b\"Hello, Prefect!\")\n    secret_data = gcp_secret.read_secret()\n    gcp_secret.delete_secret()\n    return secret_data\n\nsecret_manager_flow()\n
"},{"location":"#accessing-google-credentials-or-clients-from-gcpcredentials","title":"Accessing Google credentials or clients from GcpCredentials","text":"

In the case that prefect-gcp is missing a feature, feel free to submit an issue.

In the meantime, you may want to access the underlying Google Cloud credentials or clients, which prefect-gcp exposes via the GcpCredentials block.

The provided code snippet shows how you can use prefect_gcp to instantiate a Google Cloud client, like bigquery.Client.

Note a GcpCredentials object is NOT a valid input to the underlying BigQuery client--use the get_credentials_from_service_account method to access and pass an actual google.auth.Credentials object.

import google.cloud.bigquery\nfrom prefect import flow\nfrom prefect_gcp import GcpCredentials\n\n@flow\ndef create_bigquery_client():\n    gcp_credentials = GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n    google_auth_credentials = gcp_credentials.get_credentials_from_service_account()\n    bigquery_client = bigquery.Client(credentials=google_auth_credentials)\n

If you simply want to access the underlying client, prefect-gcp exposes a get_client method from GcpCredentials.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\n\n@flow\ndef create_bigquery_client():\n    gcp_credentials = GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n    bigquery_client = gcp_credentials.get_client(\"bigquery\")\n
"},{"location":"#resources","title":"Resources","text":"

For more tips on how to use tasks and flows in a Collection, check out Using Collections!

"},{"location":"#installation","title":"Installation","text":"

To use prefect-gcp and Cloud Run:

pip install prefect-gcp\n

To use Cloud Storage:

pip install \"prefect-gcp[cloud_storage]\"\n

To use BigQuery:

pip install \"prefect-gcp[bigquery]\"\n

To use Secret Manager:

pip install \"prefect-gcp[secret_manager]\"\n

To use Vertex AI:

pip install \"prefect-gcp[aiplatform]\"\n

A list of available blocks in prefect-gcp and their setup instructions can be found here.

Requires an installation of Python 3.7+.

We recommend using a Python virtual environment manager such as pipenv, conda or virtualenv.

These tasks are designed to work with Prefect 2. For more information about how to use Prefect, please refer to the Prefect documentation.

"},{"location":"#feedback","title":"Feedback","text":"

If you encounter any bugs while using prefect-gcp, feel free to open an issue in the prefect-gcp repository.

If you have any questions or issues while using prefect-gcp, you can find help in either the Prefect Discourse forum or the Prefect Slack community.

Feel free to star or watch prefect-gcp for updates too!

"},{"location":"aiplatform/","title":"AI Platform","text":""},{"location":"aiplatform/#prefect_gcp.aiplatform","title":"prefect_gcp.aiplatform","text":"

Integrations with Google AI Platform.

Note this module is experimental. The intefaces within may change without notice.

Examples:

Run a job using Vertex AI Custom Training:\n```python\nfrom prefect_gcp.credentials import GcpCredentials\nfrom prefect_gcp.aiplatform import VertexAICustomTrainingJob\n\ngcp_credentials = GcpCredentials.load(\"BLOCK_NAME\")\njob = VertexAICustomTrainingJob(\n    region=\"us-east1\",\n    image=\"us-docker.pkg.dev/cloudrun/container/job:latest\",\n    gcp_credentials=gcp_credentials,\n)\njob.run()\n```\n\nRun a job that runs the command `echo hello world` using Google Cloud Run Jobs:\n```python\nfrom prefect_gcp.credentials import GcpCredentials\nfrom prefect_gcp.aiplatform import VertexAICustomTrainingJob\n\ngcp_credentials = GcpCredentials.load(\"BLOCK_NAME\")\njob = VertexAICustomTrainingJob(\n    command=[\"echo\", \"hello world\"],\n    region=\"us-east1\",\n    image=\"us-docker.pkg.dev/cloudrun/container/job:latest\",\n    gcp_credentials=gcp_credentials,\n)\njob.run()\n```\n\nPreview job specs:\n```python\nfrom prefect_gcp.credentials import GcpCredentials\nfrom prefect_gcp.aiplatform import VertexAICustomTrainingJob\n\ngcp_credentials = GcpCredentials.load(\"BLOCK_NAME\")\njob = VertexAICustomTrainingJob(\n    command=[\"echo\", \"hello world\"],\n    region=\"us-east1\",\n    image=\"us-docker.pkg.dev/cloudrun/container/job:latest\",\n    gcp_credentials=gcp_credentials,\n)\njob.preview()\n```\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform-classes","title":"Classes","text":""},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob","title":"VertexAICustomTrainingJob","text":"

Bases: Infrastructure

Infrastructure block used to run Vertex AI custom training jobs.

Source code in prefect_gcp/aiplatform.py
class VertexAICustomTrainingJob(Infrastructure):\n    \"\"\"\n    Infrastructure block used to run Vertex AI custom training jobs.\n    \"\"\"\n\n    _block_type_name = \"Vertex AI Custom Training Job\"\n    _block_type_slug = \"vertex-ai-custom-training-job\"\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob\"  # noqa: E501\n\n    type: Literal[\"vertex-ai-custom-training-job\"] = Field(\n        \"vertex-ai-custom-training-job\", description=\"The slug for this task type.\"\n    )\n\n    gcp_credentials: GcpCredentials = Field(\n        default_factory=GcpCredentials,\n        description=(\n            \"GCP credentials to use when running the configured Vertex AI custom \"\n            \"training job. If not provided, credentials will be inferred from the \"\n            \"environment. See `GcpCredentials` for details.\"\n        ),\n    )\n    region: str = Field(\n        default=...,\n        description=\"The region where the Vertex AI custom training job resides.\",\n    )\n    image: str = Field(\n        default=...,\n        title=\"Image Name\",\n        description=(\n            \"The image to use for a new Vertex AI custom training job. This value must \"\n            \"refer to an image within either Google Container Registry \"\n            \"or Google Artifact Registry, like `gcr.io/<project_name>/<repo>/`.\"\n        ),\n    )\n    env: Dict[str, str] = Field(\n        default_factory=dict,\n        title=\"Environment Variables\",\n        description=\"Environment variables to be passed to your Cloud Run Job.\",\n    )\n    machine_type: str = Field(\n        default=\"n1-standard-4\",\n        description=\"The machine type to use for the run, which controls the available \"\n        \"CPU and memory.\",\n    )\n    accelerator_type: Optional[str] = Field(\n        default=None, description=\"The type of accelerator to attach to the machine.\"\n    )\n    accelerator_count: Optional[int] = Field(\n        default=None, description=\"The number of accelerators to attach to the machine.\"\n    )\n    boot_disk_type: str = Field(\n        default=\"pd-ssd\",\n        title=\"Boot Disk Type\",\n        description=\"The type of boot disk to attach to the machine.\",\n    )\n    boot_disk_size_gb: int = Field(\n        default=100,\n        title=\"Boot Disk Size\",\n        description=\"The size of the boot disk to attach to the machine, in gigabytes.\",\n    )\n    maximum_run_time: datetime.timedelta = Field(\n        default=datetime.timedelta(days=7), description=\"The maximum job running time.\"\n    )\n    network: Optional[str] = Field(\n        default=None,\n        description=\"The full name of the Compute Engine network\"\n        \"to which the Job should be peered. Private services access must \"\n        \"already be configured for the network. If left unspecified, the job \"\n        \"is not peered with any network.\",\n    )\n    reserved_ip_ranges: Optional[List[str]] = Field(\n        default=None,\n        description=\"A list of names for the reserved ip ranges under the VPC \"\n        \"network that can be used for this job. If set, we will deploy the job \"\n        \"within the provided ip ranges. Otherwise, the job will be deployed to \"\n        \"any ip ranges under the provided VPC network.\",\n    )\n    service_account: Optional[str] = Field(\n        default=None,\n        description=(\n            \"Specifies the service account to use \"\n            \"as the run-as account in Vertex AI. The agent submitting jobs must have \"\n            \"act-as permission on this run-as account. If unspecified, the AI \"\n            \"Platform Custom Code Service Agent for the CustomJob's project is \"\n            \"used. Takes precedence over the service account found in gcp_credentials, \"\n            \"and required if a service account cannot be detected in gcp_credentials.\"\n        ),\n    )\n    job_watch_poll_interval: float = Field(\n        default=5.0,\n        description=(\n            \"The amount of time to wait between GCP API calls while monitoring the \"\n            \"state of a Vertex AI Job.\"\n        ),\n    )\n\n    @property\n    def job_name(self):\n        \"\"\"\n        The name can be up to 128 characters long and can be consist of any UTF-8 characters. Reference:\n        https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomJob#google_cloud_aiplatform_CustomJob_display_name\n        \"\"\"  # noqa\n        try:\n            base_name = self.name or self.image.split(\"/\")[2]\n            return f\"{base_name}-{uuid4().hex}\"\n        except IndexError:\n            raise ValueError(\n                \"The provided image must be from either Google Container Registry \"\n                \"or Google Artifact Registry\"\n            )\n\n    def _get_compatible_labels(self) -> Dict[str, str]:\n        \"\"\"\n        Ensures labels are compatible with GCP label requirements.\n        https://cloud.google.com/resource-manager/docs/creating-managing-labels\n\n        Ex: the Prefect provided key of prefect.io/flow-name -> prefect-io_flow-name\n        \"\"\"\n        compatible_labels = {}\n        for key, val in self.labels.items():\n            new_key = slugify(\n                key,\n                lowercase=True,\n                replacements=[(\"/\", \"_\"), (\".\", \"-\")],\n                max_length=63,\n                regex_pattern=_DISALLOWED_GCP_LABEL_CHARACTERS,\n            )\n            compatible_labels[new_key] = slugify(\n                val,\n                lowercase=True,\n                replacements=[(\"/\", \"_\"), (\".\", \"-\")],\n                max_length=63,\n                regex_pattern=_DISALLOWED_GCP_LABEL_CHARACTERS,\n            )\n        return compatible_labels\n\n    def preview(self) -> str:\n        \"\"\"Generate a preview of the job definition that will be sent to GCP.\"\"\"\n        job_spec = self._build_job_spec()\n        custom_job = CustomJob(\n            display_name=self.job_name,\n            job_spec=job_spec,\n            labels=self._get_compatible_labels(),\n        )\n        return str(custom_job)  # outputs a json string\n\n    def get_corresponding_worker_type(self) -> str:\n        \"\"\"Return the corresponding worker type for this infrastructure block.\"\"\"\n        return \"vertex-ai\"\n\n    async def generate_work_pool_base_job_template(self) -> dict:\n        \"\"\"\n        Generate a base job template for a `Vertex AI` work pool with the same\n        configuration as this block.\n        Returns:\n            - dict: a base job template for a `Vertex AI` work pool\n        \"\"\"\n        base_job_template = await get_default_base_job_template_for_infrastructure_type(\n            self.get_corresponding_worker_type(),\n        )\n        assert (\n            base_job_template is not None\n        ), \"Failed to generate default base job template for Cloud Run worker.\"\n        for key, value in self.dict(exclude_unset=True, exclude_defaults=True).items():\n            if key == \"command\":\n                base_job_template[\"variables\"][\"properties\"][\"command\"][\n                    \"default\"\n                ] = shlex.join(value)\n            elif key in [\n                \"type\",\n                \"block_type_slug\",\n                \"_block_document_id\",\n                \"_block_document_name\",\n                \"_is_anonymous\",\n            ]:\n                continue\n            elif key == \"gcp_credentials\":\n                if not self.gcp_credentials._block_document_id:\n                    raise BlockNotSavedError(\n                        \"It looks like you are trying to use a block that\"\n                        \" has not been saved. Please call `.save` on your block\"\n                        \" before publishing it as a work pool.\"\n                    )\n                base_job_template[\"variables\"][\"properties\"][\"credentials\"][\n                    \"default\"\n                ] = {\n                    \"$ref\": {\n                        \"block_document_id\": str(\n                            self.gcp_credentials._block_document_id\n                        )\n                    }\n                }\n            elif key == \"maximum_run_time\":\n                base_job_template[\"variables\"][\"properties\"][\"maximum_run_time_hours\"][\n                    \"default\"\n                ] = round(value.total_seconds() / 3600)\n            elif key == \"service_account\":\n                base_job_template[\"variables\"][\"properties\"][\"service_account_name\"][\n                    \"default\"\n                ] = value\n            elif key in base_job_template[\"variables\"][\"properties\"]:\n                base_job_template[\"variables\"][\"properties\"][key][\"default\"] = value\n            else:\n                self.logger.warning(\n                    f\"Variable {key!r} is not supported by `Vertex AI` work pools.\"\n                    \" Skipping.\"\n                )\n\n        return base_job_template\n\n    def _build_job_spec(self) -> \"CustomJobSpec\":\n        \"\"\"\n        Builds a job spec by gathering details.\n        \"\"\"\n        # gather worker pool spec\n        env_list = [\n            {\"name\": name, \"value\": value}\n            for name, value in {\n                **self._base_environment(),\n                **self.env,\n            }.items()\n        ]\n        container_spec = ContainerSpec(\n            image_uri=self.image, command=self.command, args=[], env=env_list\n        )\n        machine_spec = MachineSpec(\n            machine_type=self.machine_type,\n            accelerator_type=self.accelerator_type,\n            accelerator_count=self.accelerator_count,\n        )\n        worker_pool_spec = WorkerPoolSpec(\n            container_spec=container_spec,\n            machine_spec=machine_spec,\n            replica_count=1,\n            disk_spec=DiskSpec(\n                boot_disk_type=self.boot_disk_type,\n                boot_disk_size_gb=self.boot_disk_size_gb,\n            ),\n        )\n        # look for service account\n        service_account = (\n            self.service_account or self.gcp_credentials._service_account_email\n        )\n        if service_account is None:\n            raise ValueError(\n                \"A service account is required for the Vertex job. \"\n                \"A service account could not be detected in the attached credentials; \"\n                \"please set a service account explicitly, e.g. \"\n                '`VertexAICustomTrainingJob(service_acount=\"...\")`'\n            )\n\n        # build custom job specs\n        timeout = Duration().FromTimedelta(td=self.maximum_run_time)\n        scheduling = Scheduling(timeout=timeout)\n        job_spec = CustomJobSpec(\n            worker_pool_specs=[worker_pool_spec],\n            service_account=service_account,\n            scheduling=scheduling,\n            network=self.network,\n            reserved_ip_ranges=self.reserved_ip_ranges,\n        )\n        return job_spec\n\n    async def _create_and_begin_job(\n        self, job_spec: \"CustomJobSpec\", job_service_client: \"JobServiceClient\"\n    ) -> \"CustomJob\":\n        \"\"\"\n        Builds a custom job and begins running it.\n        \"\"\"\n        # create custom job\n        custom_job = CustomJob(\n            display_name=self.job_name,\n            job_spec=job_spec,\n            labels=self._get_compatible_labels(),\n        )\n\n        # run job\n        self.logger.info(\n            f\"{self._log_prefix}: Job {self.job_name!r} starting to run \"\n            f\"the command {' '.join(self.command)!r} in region \"\n            f\"{self.region!r} using image {self.image!r}\"\n        )\n\n        project = self.gcp_credentials.project\n        resource_name = f\"projects/{project}/locations/{self.region}\"\n\n        retry_policy = retry(\n            stop=stop_after_attempt(3), wait=wait_fixed(1) + wait_random(0, 3)\n        )\n\n        custom_job_run = await run_sync_in_worker_thread(\n            retry_policy(job_service_client.create_custom_job),\n            parent=resource_name,\n            custom_job=custom_job,\n        )\n\n        self.logger.info(\n            f\"{self._log_prefix}: Job {self.job_name!r} has successfully started; \"\n            f\"the full job name is {custom_job_run.name!r}\"\n        )\n\n        return custom_job_run\n\n    async def _watch_job_run(\n        self,\n        full_job_name: str,  # different from self.job_name\n        job_service_client: \"JobServiceClient\",\n        current_state: \"JobState\",\n        until_states: Tuple[\"JobState\"],\n        timeout: int = None,\n    ) -> \"CustomJob\":\n        \"\"\"\n        Polls job run to see if status changed.\n        \"\"\"\n        state = JobState.JOB_STATE_UNSPECIFIED\n        last_state = current_state\n        t0 = time.time()\n\n        while state not in until_states:\n            job_run = await run_sync_in_worker_thread(\n                job_service_client.get_custom_job,\n                name=full_job_name,\n            )\n            state = job_run.state\n            if state != last_state:\n                state_label = (\n                    state.name.replace(\"_\", \" \")\n                    .lower()\n                    .replace(\"state\", \"state is now:\")\n                )\n                # results in \"New job state is now: succeeded\"\n                self.logger.info(\n                    f\"{self._log_prefix}: {self.job_name} has new {state_label}\"\n                )\n                last_state = state\n            else:\n                # Intermittently, the job will not be described. We want to respect the\n                # watch timeout though.\n                self.logger.debug(f\"{self._log_prefix}: Job not found.\")\n\n            elapsed_time = time.time() - t0\n            if timeout is not None and elapsed_time > timeout:\n                raise RuntimeError(\n                    f\"Timed out after {elapsed_time}s while watching job for states \"\n                    \"{until_states!r}\"\n                )\n            time.sleep(self.job_watch_poll_interval)\n\n        return job_run\n\n    @sync_compatible\n    async def run(\n        self, task_status: Optional[\"TaskStatus\"] = None\n    ) -> VertexAICustomTrainingJobResult:\n        \"\"\"\n        Run the configured task on VertexAI.\n\n        Args:\n            task_status: An optional `TaskStatus` to update when the container starts.\n\n        Returns:\n            The `VertexAICustomTrainingJobResult`.\n        \"\"\"\n        client_options = ClientOptions(\n            api_endpoint=f\"{self.region}-aiplatform.googleapis.com\"\n        )\n\n        job_spec = self._build_job_spec()\n        with self.gcp_credentials.get_job_service_client(\n            client_options=client_options\n        ) as job_service_client:\n            job_run = await self._create_and_begin_job(job_spec, job_service_client)\n\n            if task_status:\n                task_status.started(self.job_name)\n\n            final_job_run = await self._watch_job_run(\n                full_job_name=job_run.name,\n                job_service_client=job_service_client,\n                current_state=job_run.state,\n                until_states=(\n                    JobState.JOB_STATE_SUCCEEDED,\n                    JobState.JOB_STATE_FAILED,\n                    JobState.JOB_STATE_CANCELLED,\n                    JobState.JOB_STATE_EXPIRED,\n                ),\n                timeout=self.maximum_run_time.total_seconds(),\n            )\n\n        error_msg = final_job_run.error.message\n        if error_msg:\n            raise RuntimeError(f\"{self._log_prefix}: {error_msg}\")\n\n        status_code = 0 if final_job_run.state == JobState.JOB_STATE_SUCCEEDED else 1\n\n        return VertexAICustomTrainingJobResult(\n            identifier=final_job_run.display_name, status_code=status_code\n        )\n\n    @sync_compatible\n    async def kill(self, identifier: str, grace_seconds: int = 30) -> None:\n        \"\"\"\n        Kill a job running Cloud Run.\n\n        Args:\n            identifier: The Vertex AI full job name, formatted like\n                \"projects/{project}/locations/{location}/customJobs/{custom_job}\".\n\n        Returns:\n            The `VertexAICustomTrainingJobResult`.\n        \"\"\"\n        client_options = ClientOptions(\n            api_endpoint=f\"{self.region}-aiplatform.googleapis.com\"\n        )\n        with self.gcp_credentials.get_job_service_client(\n            client_options=client_options\n        ) as job_service_client:\n            await run_sync_in_worker_thread(\n                self._kill_job,\n                job_service_client=job_service_client,\n                full_job_name=identifier,\n            )\n            self.logger.info(f\"Requested to cancel {identifier}...\")\n\n    def _kill_job(\n        self, job_service_client: \"JobServiceClient\", full_job_name: str\n    ) -> None:\n        \"\"\"\n        Thin wrapper around Job.delete, wrapping a try/except since\n        Job is an independent class that doesn't have knowledge of\n        CloudRunJob and its associated logic.\n        \"\"\"\n        cancel_custom_job_request = CancelCustomJobRequest(name=full_job_name)\n        try:\n            job_service_client.cancel_custom_job(\n                request=cancel_custom_job_request,\n            )\n        except Exception as exc:\n            if \"does not exist\" in str(exc):\n                raise InfrastructureNotFound(\n                    f\"Cannot stop Vertex AI job; the job name {full_job_name!r} \"\n                    \"could not be found.\"\n                ) from exc\n            raise\n\n    @property\n    def _log_prefix(self) -> str:\n        \"\"\"\n        Internal property for generating a prefix for logs where `name` may be null\n        \"\"\"\n        if self.name is not None:\n            return f\"VertexAICustomTrainingJob {self.name!r}\"\n        else:\n            return \"VertexAICustomTrainingJob\"\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob-attributes","title":"Attributes","text":""},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob.job_name","title":"job_name property","text":"

The name can be up to 128 characters long and can be consist of any UTF-8 characters. Reference: https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomJob#google_cloud_aiplatform_CustomJob_display_name

"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob-functions","title":"Functions","text":""},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob.generate_work_pool_base_job_template","title":"generate_work_pool_base_job_template async","text":"

Generate a base job template for a Vertex AI work pool with the same configuration as this block. Returns: - dict: a base job template for a Vertex AI work pool

Source code in prefect_gcp/aiplatform.py
async def generate_work_pool_base_job_template(self) -> dict:\n    \"\"\"\n    Generate a base job template for a `Vertex AI` work pool with the same\n    configuration as this block.\n    Returns:\n        - dict: a base job template for a `Vertex AI` work pool\n    \"\"\"\n    base_job_template = await get_default_base_job_template_for_infrastructure_type(\n        self.get_corresponding_worker_type(),\n    )\n    assert (\n        base_job_template is not None\n    ), \"Failed to generate default base job template for Cloud Run worker.\"\n    for key, value in self.dict(exclude_unset=True, exclude_defaults=True).items():\n        if key == \"command\":\n            base_job_template[\"variables\"][\"properties\"][\"command\"][\n                \"default\"\n            ] = shlex.join(value)\n        elif key in [\n            \"type\",\n            \"block_type_slug\",\n            \"_block_document_id\",\n            \"_block_document_name\",\n            \"_is_anonymous\",\n        ]:\n            continue\n        elif key == \"gcp_credentials\":\n            if not self.gcp_credentials._block_document_id:\n                raise BlockNotSavedError(\n                    \"It looks like you are trying to use a block that\"\n                    \" has not been saved. Please call `.save` on your block\"\n                    \" before publishing it as a work pool.\"\n                )\n            base_job_template[\"variables\"][\"properties\"][\"credentials\"][\n                \"default\"\n            ] = {\n                \"$ref\": {\n                    \"block_document_id\": str(\n                        self.gcp_credentials._block_document_id\n                    )\n                }\n            }\n        elif key == \"maximum_run_time\":\n            base_job_template[\"variables\"][\"properties\"][\"maximum_run_time_hours\"][\n                \"default\"\n            ] = round(value.total_seconds() / 3600)\n        elif key == \"service_account\":\n            base_job_template[\"variables\"][\"properties\"][\"service_account_name\"][\n                \"default\"\n            ] = value\n        elif key in base_job_template[\"variables\"][\"properties\"]:\n            base_job_template[\"variables\"][\"properties\"][key][\"default\"] = value\n        else:\n            self.logger.warning(\n                f\"Variable {key!r} is not supported by `Vertex AI` work pools.\"\n                \" Skipping.\"\n            )\n\n    return base_job_template\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob.get_corresponding_worker_type","title":"get_corresponding_worker_type","text":"

Return the corresponding worker type for this infrastructure block.

Source code in prefect_gcp/aiplatform.py
def get_corresponding_worker_type(self) -> str:\n    \"\"\"Return the corresponding worker type for this infrastructure block.\"\"\"\n    return \"vertex-ai\"\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob.kill","title":"kill async","text":"

Kill a job running Cloud Run.

Parameters:

Name Type Description Default identifier str

The Vertex AI full job name, formatted like \"projects/{project}/locations/{location}/customJobs/{custom_job}\".

required

Returns:

Type Description None

The VertexAICustomTrainingJobResult.

Source code in prefect_gcp/aiplatform.py
@sync_compatible\nasync def kill(self, identifier: str, grace_seconds: int = 30) -> None:\n    \"\"\"\n    Kill a job running Cloud Run.\n\n    Args:\n        identifier: The Vertex AI full job name, formatted like\n            \"projects/{project}/locations/{location}/customJobs/{custom_job}\".\n\n    Returns:\n        The `VertexAICustomTrainingJobResult`.\n    \"\"\"\n    client_options = ClientOptions(\n        api_endpoint=f\"{self.region}-aiplatform.googleapis.com\"\n    )\n    with self.gcp_credentials.get_job_service_client(\n        client_options=client_options\n    ) as job_service_client:\n        await run_sync_in_worker_thread(\n            self._kill_job,\n            job_service_client=job_service_client,\n            full_job_name=identifier,\n        )\n        self.logger.info(f\"Requested to cancel {identifier}...\")\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob.preview","title":"preview","text":"

Generate a preview of the job definition that will be sent to GCP.

Source code in prefect_gcp/aiplatform.py
def preview(self) -> str:\n    \"\"\"Generate a preview of the job definition that will be sent to GCP.\"\"\"\n    job_spec = self._build_job_spec()\n    custom_job = CustomJob(\n        display_name=self.job_name,\n        job_spec=job_spec,\n        labels=self._get_compatible_labels(),\n    )\n    return str(custom_job)  # outputs a json string\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob.run","title":"run async","text":"

Run the configured task on VertexAI.

Parameters:

Name Type Description Default task_status Optional[TaskStatus]

An optional TaskStatus to update when the container starts.

None

Returns:

Type Description VertexAICustomTrainingJobResult

The VertexAICustomTrainingJobResult.

Source code in prefect_gcp/aiplatform.py
@sync_compatible\nasync def run(\n    self, task_status: Optional[\"TaskStatus\"] = None\n) -> VertexAICustomTrainingJobResult:\n    \"\"\"\n    Run the configured task on VertexAI.\n\n    Args:\n        task_status: An optional `TaskStatus` to update when the container starts.\n\n    Returns:\n        The `VertexAICustomTrainingJobResult`.\n    \"\"\"\n    client_options = ClientOptions(\n        api_endpoint=f\"{self.region}-aiplatform.googleapis.com\"\n    )\n\n    job_spec = self._build_job_spec()\n    with self.gcp_credentials.get_job_service_client(\n        client_options=client_options\n    ) as job_service_client:\n        job_run = await self._create_and_begin_job(job_spec, job_service_client)\n\n        if task_status:\n            task_status.started(self.job_name)\n\n        final_job_run = await self._watch_job_run(\n            full_job_name=job_run.name,\n            job_service_client=job_service_client,\n            current_state=job_run.state,\n            until_states=(\n                JobState.JOB_STATE_SUCCEEDED,\n                JobState.JOB_STATE_FAILED,\n                JobState.JOB_STATE_CANCELLED,\n                JobState.JOB_STATE_EXPIRED,\n            ),\n            timeout=self.maximum_run_time.total_seconds(),\n        )\n\n    error_msg = final_job_run.error.message\n    if error_msg:\n        raise RuntimeError(f\"{self._log_prefix}: {error_msg}\")\n\n    status_code = 0 if final_job_run.state == JobState.JOB_STATE_SUCCEEDED else 1\n\n    return VertexAICustomTrainingJobResult(\n        identifier=final_job_run.display_name, status_code=status_code\n    )\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJobResult","title":"VertexAICustomTrainingJobResult","text":"

Bases: InfrastructureResult

Result from a Vertex AI custom training job.

Source code in prefect_gcp/aiplatform.py
class VertexAICustomTrainingJobResult(InfrastructureResult):\n    \"\"\"Result from a Vertex AI custom training job.\"\"\"\n
"},{"location":"bigquery/","title":"BigQuery","text":""},{"location":"bigquery/#prefect_gcp.bigquery","title":"prefect_gcp.bigquery","text":"

Tasks for interacting with GCP BigQuery

"},{"location":"bigquery/#prefect_gcp.bigquery-classes","title":"Classes","text":""},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse","title":"BigQueryWarehouse","text":"

Bases: DatabaseBlock

A block for querying a database with BigQuery.

Upon instantiating, a connection to BigQuery is established and maintained for the life of the object until the close method is called.

It is recommended to use this block as a context manager, which will automatically close the connection and its cursors when the context is exited.

It is also recommended that this block is loaded and consumed within a single task or flow because if the block is passed across separate tasks and flows, the state of the block's connection and cursor could be lost.

Attributes:

Name Type Description gcp_credentials GcpCredentials

The credentials to use to authenticate.

fetch_size int

The number of rows to fetch at a time when calling fetch_many. Note, this parameter is executed on the client side and is not passed to the database. To limit on the server side, add the LIMIT clause, or the dialect's equivalent clause, like TOP, to the query.

Source code in prefect_gcp/bigquery.py
class BigQueryWarehouse(DatabaseBlock):\n    \"\"\"\n    A block for querying a database with BigQuery.\n\n    Upon instantiating, a connection to BigQuery is established\n    and maintained for the life of the object until the close method is called.\n\n    It is recommended to use this block as a context manager, which will automatically\n    close the connection and its cursors when the context is exited.\n\n    It is also recommended that this block is loaded and consumed within a single task\n    or flow because if the block is passed across separate tasks and flows,\n    the state of the block's connection and cursor could be lost.\n\n    Attributes:\n        gcp_credentials: The credentials to use to authenticate.\n        fetch_size: The number of rows to fetch at a time when calling fetch_many.\n            Note, this parameter is executed on the client side and is not\n            passed to the database. To limit on the server side, add the `LIMIT`\n            clause, or the dialect's equivalent clause, like `TOP`, to the query.\n    \"\"\"  # noqa\n\n    _block_type_name = \"BigQuery Warehouse\"\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/bigquery/#prefect_gcp.bigquery.BigQueryWarehouse\"  # noqa: E501\n\n    gcp_credentials: GcpCredentials\n    fetch_size: int = Field(\n        default=1, description=\"The number of rows to fetch at a time.\"\n    )\n\n    _connection: Optional[\"Connection\"] = None\n    _unique_cursors: Dict[str, \"Cursor\"] = None\n\n    def _start_connection(self):\n        \"\"\"\n        Starts a connection.\n        \"\"\"\n        with self.gcp_credentials.get_bigquery_client() as client:\n            self._connection = Connection(client=client)\n\n    def block_initialization(self) -> None:\n        super().block_initialization()\n        if self._connection is None:\n            self._start_connection()\n\n        if self._unique_cursors is None:\n            self._unique_cursors = {}\n\n    def get_connection(self) -> \"Connection\":\n        \"\"\"\n        Get the opened connection to BigQuery.\n        \"\"\"\n        return self._connection\n\n    def _get_cursor(self, inputs: Dict[str, Any]) -> Tuple[bool, \"Cursor\"]:\n        \"\"\"\n        Get a BigQuery cursor.\n\n        Args:\n            inputs: The inputs to generate a unique hash, used to decide\n                whether a new cursor should be used.\n\n        Returns:\n            Whether a cursor is new and a BigQuery cursor.\n        \"\"\"\n        input_hash = hash_objects(inputs)\n        assert input_hash is not None, (\n            \"We were not able to hash your inputs, \"\n            \"which resulted in an unexpected data return; \"\n            \"please open an issue with a reproducible example.\"\n        )\n        if input_hash not in self._unique_cursors.keys():\n            new_cursor = self._connection.cursor()\n            self._unique_cursors[input_hash] = new_cursor\n            return True, new_cursor\n        else:\n            existing_cursor = self._unique_cursors[input_hash]\n            return False, existing_cursor\n\n    def reset_cursors(self) -> None:\n        \"\"\"\n        Tries to close all opened cursors.\n        \"\"\"\n        input_hashes = tuple(self._unique_cursors.keys())\n        for input_hash in input_hashes:\n            cursor = self._unique_cursors.pop(input_hash)\n            try:\n                cursor.close()\n            except Exception as exc:\n                self.logger.warning(\n                    f\"Failed to close cursor for input hash {input_hash!r}: {exc}\"\n                )\n\n    @sync_compatible\n    async def fetch_one(\n        self,\n        operation: str,\n        parameters: Optional[Dict[str, Any]] = None,\n        **execution_options: Dict[str, Any],\n    ) -> \"Row\":\n        \"\"\"\n        Fetch a single result from the database.\n\n        Repeated calls using the same inputs to *any* of the fetch methods of this\n        block will skip executing the operation again, and instead,\n        return the next set of results from the previous execution,\n        until the reset_cursors method is called.\n\n        Args:\n            operation: The SQL query or other operation to be executed.\n            parameters: The parameters for the operation.\n            **execution_options: Additional options to pass to `connection.execute`.\n\n        Returns:\n            A tuple containing the data returned by the database,\n                where each row is a tuple and each column is a value in the tuple.\n\n        Examples:\n            Execute operation with parameters, fetching one new row at a time:\n            ```python\n            from prefect_gcp.bigquery import BigQueryWarehouse\n\n            with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n                operation = '''\n                    SELECT word, word_count\n                    FROM `bigquery-public-data.samples.shakespeare`\n                    WHERE corpus = %(corpus)s\n                    AND word_count >= %(min_word_count)s\n                    ORDER BY word_count DESC\n                    LIMIT 3;\n                '''\n                parameters = {\n                    \"corpus\": \"romeoandjuliet\",\n                    \"min_word_count\": 250,\n                }\n                for _ in range(0, 3):\n                    result = warehouse.fetch_one(operation, parameters=parameters)\n                    print(result)\n            ```\n        \"\"\"\n        inputs = dict(\n            operation=operation,\n            parameters=parameters,\n            **execution_options,\n        )\n        new, cursor = self._get_cursor(inputs)\n        if new:\n            await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n        result = await run_sync_in_worker_thread(cursor.fetchone)\n        return result\n\n    @sync_compatible\n    async def fetch_many(\n        self,\n        operation: str,\n        parameters: Optional[Dict[str, Any]] = None,\n        size: Optional[int] = None,\n        **execution_options: Dict[str, Any],\n    ) -> List[\"Row\"]:\n        \"\"\"\n        Fetch a limited number of results from the database.\n\n        Repeated calls using the same inputs to *any* of the fetch methods of this\n        block will skip executing the operation again, and instead,\n        return the next set of results from the previous execution,\n        until the reset_cursors method is called.\n\n        Args:\n            operation: The SQL query or other operation to be executed.\n            parameters: The parameters for the operation.\n            size: The number of results to return; if None or 0, uses the value of\n                `fetch_size` configured on the block.\n            **execution_options: Additional options to pass to `connection.execute`.\n\n        Returns:\n            A list of tuples containing the data returned by the database,\n                where each row is a tuple and each column is a value in the tuple.\n\n        Examples:\n            Execute operation with parameters, fetching two new rows at a time:\n            ```python\n            from prefect_gcp.bigquery import BigQueryWarehouse\n\n            with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n                operation = '''\n                    SELECT word, word_count\n                    FROM `bigquery-public-data.samples.shakespeare`\n                    WHERE corpus = %(corpus)s\n                    AND word_count >= %(min_word_count)s\n                    ORDER BY word_count DESC\n                    LIMIT 6;\n                '''\n                parameters = {\n                    \"corpus\": \"romeoandjuliet\",\n                    \"min_word_count\": 250,\n                }\n                for _ in range(0, 3):\n                    result = warehouse.fetch_many(\n                        operation,\n                        parameters=parameters,\n                        size=2\n                    )\n                    print(result)\n            ```\n        \"\"\"\n        inputs = dict(\n            operation=operation,\n            parameters=parameters,\n            **execution_options,\n        )\n        new, cursor = self._get_cursor(inputs)\n        if new:\n            await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n        size = size or self.fetch_size\n        result = await run_sync_in_worker_thread(cursor.fetchmany, size=size)\n        return result\n\n    @sync_compatible\n    async def fetch_all(\n        self,\n        operation: str,\n        parameters: Optional[Dict[str, Any]] = None,\n        **execution_options: Dict[str, Any],\n    ) -> List[\"Row\"]:\n        \"\"\"\n        Fetch all results from the database.\n\n        Repeated calls using the same inputs to *any* of the fetch methods of this\n        block will skip executing the operation again, and instead,\n        return the next set of results from the previous execution,\n        until the reset_cursors method is called.\n\n        Args:\n            operation: The SQL query or other operation to be executed.\n            parameters: The parameters for the operation.\n            **execution_options: Additional options to pass to `connection.execute`.\n\n        Returns:\n            A list of tuples containing the data returned by the database,\n                where each row is a tuple and each column is a value in the tuple.\n\n        Examples:\n            Execute operation with parameters, fetching all rows:\n            ```python\n            from prefect_gcp.bigquery import BigQueryWarehouse\n\n            with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n                operation = '''\n                    SELECT word, word_count\n                    FROM `bigquery-public-data.samples.shakespeare`\n                    WHERE corpus = %(corpus)s\n                    AND word_count >= %(min_word_count)s\n                    ORDER BY word_count DESC\n                    LIMIT 3;\n                '''\n                parameters = {\n                    \"corpus\": \"romeoandjuliet\",\n                    \"min_word_count\": 250,\n                }\n                result = warehouse.fetch_all(operation, parameters=parameters)\n            ```\n        \"\"\"\n        inputs = dict(\n            operation=operation,\n            parameters=parameters,\n            **execution_options,\n        )\n        new, cursor = self._get_cursor(inputs)\n        if new:\n            await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n        result = await run_sync_in_worker_thread(cursor.fetchall)\n        return result\n\n    @sync_compatible\n    async def execute(\n        self,\n        operation: str,\n        parameters: Optional[Dict[str, Any]] = None,\n        **execution_options: Dict[str, Any],\n    ) -> None:\n        \"\"\"\n        Executes an operation on the database. This method is intended to be used\n        for operations that do not return data, such as INSERT, UPDATE, or DELETE.\n\n        Unlike the fetch methods, this method will always execute the operation\n        upon calling.\n\n        Args:\n            operation: The SQL query or other operation to be executed.\n            parameters: The parameters for the operation.\n            **execution_options: Additional options to pass to `connection.execute`.\n\n        Examples:\n            Execute operation with parameters:\n            ```python\n            from prefect_gcp.bigquery import BigQueryWarehouse\n\n            with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n                operation = '''\n                    CREATE TABLE mydataset.trips AS (\n                    SELECT\n                        bikeid,\n                        start_time,\n                        duration_minutes\n                    FROM\n                        bigquery-public-data.austin_bikeshare.bikeshare_trips\n                    LIMIT %(limit)s\n                    );\n                '''\n                warehouse.execute(operation, parameters={\"limit\": 5})\n            ```\n        \"\"\"\n        inputs = dict(\n            operation=operation,\n            parameters=parameters,\n            **execution_options,\n        )\n        cursor = self._get_cursor(inputs)[1]\n        await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n    @sync_compatible\n    async def execute_many(\n        self,\n        operation: str,\n        seq_of_parameters: List[Dict[str, Any]],\n    ) -> None:\n        \"\"\"\n        Executes many operations on the database. This method is intended to be used\n        for operations that do not return data, such as INSERT, UPDATE, or DELETE.\n\n        Unlike the fetch methods, this method will always execute the operations\n        upon calling.\n\n        Args:\n            operation: The SQL query or other operation to be executed.\n            seq_of_parameters: The sequence of parameters for the operation.\n\n        Examples:\n            Create mytable in mydataset and insert two rows into it:\n            ```python\n            from prefect_gcp.bigquery import BigQueryWarehouse\n\n            with BigQueryWarehouse.load(\"bigquery\") as warehouse:\n                create_operation = '''\n                CREATE TABLE IF NOT EXISTS mydataset.mytable (\n                    col1 STRING,\n                    col2 INTEGER,\n                    col3 BOOLEAN\n                )\n                '''\n                warehouse.execute(create_operation)\n                insert_operation = '''\n                INSERT INTO mydataset.mytable (col1, col2, col3) VALUES (%s, %s, %s)\n                '''\n                seq_of_parameters = [\n                    (\"a\", 1, True),\n                    (\"b\", 2, False),\n                ]\n                warehouse.execute_many(\n                    insert_operation,\n                    seq_of_parameters=seq_of_parameters\n                )\n            ```\n        \"\"\"\n        inputs = dict(\n            operation=operation,\n            seq_of_parameters=seq_of_parameters,\n        )\n        cursor = self._get_cursor(inputs)[1]\n        await run_sync_in_worker_thread(cursor.executemany, **inputs)\n\n    def close(self):\n        \"\"\"\n        Closes connection and its cursors.\n        \"\"\"\n        try:\n            self.reset_cursors()\n        finally:\n            if self._connection is not None:\n                self._connection.close()\n                self._connection = None\n\n    def __enter__(self):\n        \"\"\"\n        Start a connection upon entry.\n        \"\"\"\n        return self\n\n    def __exit__(self, *args):\n        \"\"\"\n        Closes connection and its cursors upon exit.\n        \"\"\"\n        self.close()\n\n    def __getstate__(self):\n        \"\"\" \"\"\"\n        data = self.__dict__.copy()\n        data.update({k: None for k in {\"_connection\", \"_unique_cursors\"}})\n        return data\n\n    def __setstate__(self, data: dict):\n        \"\"\" \"\"\"\n        self.__dict__.update(data)\n        self._unique_cursors = {}\n        self._start_connection()\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse-functions","title":"Functions","text":""},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.close","title":"close","text":"

Closes connection and its cursors.

Source code in prefect_gcp/bigquery.py
def close(self):\n    \"\"\"\n    Closes connection and its cursors.\n    \"\"\"\n    try:\n        self.reset_cursors()\n    finally:\n        if self._connection is not None:\n            self._connection.close()\n            self._connection = None\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.execute","title":"execute async","text":"

Executes an operation on the database. This method is intended to be used for operations that do not return data, such as INSERT, UPDATE, or DELETE.

Unlike the fetch methods, this method will always execute the operation upon calling.

Parameters:

Name Type Description Default operation str

The SQL query or other operation to be executed.

required parameters Optional[Dict[str, Any]]

The parameters for the operation.

None **execution_options Dict[str, Any]

Additional options to pass to connection.execute.

{}

Examples:

Execute operation with parameters:

from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        CREATE TABLE mydataset.trips AS (\n        SELECT\n            bikeid,\n            start_time,\n            duration_minutes\n        FROM\n            bigquery-public-data.austin_bikeshare.bikeshare_trips\n        LIMIT %(limit)s\n        );\n    '''\n    warehouse.execute(operation, parameters={\"limit\": 5})\n

Source code in prefect_gcp/bigquery.py
@sync_compatible\nasync def execute(\n    self,\n    operation: str,\n    parameters: Optional[Dict[str, Any]] = None,\n    **execution_options: Dict[str, Any],\n) -> None:\n    \"\"\"\n    Executes an operation on the database. This method is intended to be used\n    for operations that do not return data, such as INSERT, UPDATE, or DELETE.\n\n    Unlike the fetch methods, this method will always execute the operation\n    upon calling.\n\n    Args:\n        operation: The SQL query or other operation to be executed.\n        parameters: The parameters for the operation.\n        **execution_options: Additional options to pass to `connection.execute`.\n\n    Examples:\n        Execute operation with parameters:\n        ```python\n        from prefect_gcp.bigquery import BigQueryWarehouse\n\n        with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n            operation = '''\n                CREATE TABLE mydataset.trips AS (\n                SELECT\n                    bikeid,\n                    start_time,\n                    duration_minutes\n                FROM\n                    bigquery-public-data.austin_bikeshare.bikeshare_trips\n                LIMIT %(limit)s\n                );\n            '''\n            warehouse.execute(operation, parameters={\"limit\": 5})\n        ```\n    \"\"\"\n    inputs = dict(\n        operation=operation,\n        parameters=parameters,\n        **execution_options,\n    )\n    cursor = self._get_cursor(inputs)[1]\n    await run_sync_in_worker_thread(cursor.execute, **inputs)\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.execute_many","title":"execute_many async","text":"

Executes many operations on the database. This method is intended to be used for operations that do not return data, such as INSERT, UPDATE, or DELETE.

Unlike the fetch methods, this method will always execute the operations upon calling.

Parameters:

Name Type Description Default operation str

The SQL query or other operation to be executed.

required seq_of_parameters List[Dict[str, Any]]

The sequence of parameters for the operation.

required

Examples:

Create mytable in mydataset and insert two rows into it:

from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"bigquery\") as warehouse:\n    create_operation = '''\n    CREATE TABLE IF NOT EXISTS mydataset.mytable (\n        col1 STRING,\n        col2 INTEGER,\n        col3 BOOLEAN\n    )\n    '''\n    warehouse.execute(create_operation)\n    insert_operation = '''\n    INSERT INTO mydataset.mytable (col1, col2, col3) VALUES (%s, %s, %s)\n    '''\n    seq_of_parameters = [\n        (\"a\", 1, True),\n        (\"b\", 2, False),\n    ]\n    warehouse.execute_many(\n        insert_operation,\n        seq_of_parameters=seq_of_parameters\n    )\n

Source code in prefect_gcp/bigquery.py
@sync_compatible\nasync def execute_many(\n    self,\n    operation: str,\n    seq_of_parameters: List[Dict[str, Any]],\n) -> None:\n    \"\"\"\n    Executes many operations on the database. This method is intended to be used\n    for operations that do not return data, such as INSERT, UPDATE, or DELETE.\n\n    Unlike the fetch methods, this method will always execute the operations\n    upon calling.\n\n    Args:\n        operation: The SQL query or other operation to be executed.\n        seq_of_parameters: The sequence of parameters for the operation.\n\n    Examples:\n        Create mytable in mydataset and insert two rows into it:\n        ```python\n        from prefect_gcp.bigquery import BigQueryWarehouse\n\n        with BigQueryWarehouse.load(\"bigquery\") as warehouse:\n            create_operation = '''\n            CREATE TABLE IF NOT EXISTS mydataset.mytable (\n                col1 STRING,\n                col2 INTEGER,\n                col3 BOOLEAN\n            )\n            '''\n            warehouse.execute(create_operation)\n            insert_operation = '''\n            INSERT INTO mydataset.mytable (col1, col2, col3) VALUES (%s, %s, %s)\n            '''\n            seq_of_parameters = [\n                (\"a\", 1, True),\n                (\"b\", 2, False),\n            ]\n            warehouse.execute_many(\n                insert_operation,\n                seq_of_parameters=seq_of_parameters\n            )\n        ```\n    \"\"\"\n    inputs = dict(\n        operation=operation,\n        seq_of_parameters=seq_of_parameters,\n    )\n    cursor = self._get_cursor(inputs)[1]\n    await run_sync_in_worker_thread(cursor.executemany, **inputs)\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.fetch_all","title":"fetch_all async","text":"

Fetch all results from the database.

Repeated calls using the same inputs to any of the fetch methods of this block will skip executing the operation again, and instead, return the next set of results from the previous execution, until the reset_cursors method is called.

Parameters:

Name Type Description Default operation str

The SQL query or other operation to be executed.

required parameters Optional[Dict[str, Any]]

The parameters for the operation.

None **execution_options Dict[str, Any]

Additional options to pass to connection.execute.

{}

Returns:

Type Description List[Row]

A list of tuples containing the data returned by the database, where each row is a tuple and each column is a value in the tuple.

Examples:

Execute operation with parameters, fetching all rows:

from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = %(corpus)s\n        AND word_count >= %(min_word_count)s\n        ORDER BY word_count DESC\n        LIMIT 3;\n    '''\n    parameters = {\n        \"corpus\": \"romeoandjuliet\",\n        \"min_word_count\": 250,\n    }\n    result = warehouse.fetch_all(operation, parameters=parameters)\n

Source code in prefect_gcp/bigquery.py
@sync_compatible\nasync def fetch_all(\n    self,\n    operation: str,\n    parameters: Optional[Dict[str, Any]] = None,\n    **execution_options: Dict[str, Any],\n) -> List[\"Row\"]:\n    \"\"\"\n    Fetch all results from the database.\n\n    Repeated calls using the same inputs to *any* of the fetch methods of this\n    block will skip executing the operation again, and instead,\n    return the next set of results from the previous execution,\n    until the reset_cursors method is called.\n\n    Args:\n        operation: The SQL query or other operation to be executed.\n        parameters: The parameters for the operation.\n        **execution_options: Additional options to pass to `connection.execute`.\n\n    Returns:\n        A list of tuples containing the data returned by the database,\n            where each row is a tuple and each column is a value in the tuple.\n\n    Examples:\n        Execute operation with parameters, fetching all rows:\n        ```python\n        from prefect_gcp.bigquery import BigQueryWarehouse\n\n        with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n            operation = '''\n                SELECT word, word_count\n                FROM `bigquery-public-data.samples.shakespeare`\n                WHERE corpus = %(corpus)s\n                AND word_count >= %(min_word_count)s\n                ORDER BY word_count DESC\n                LIMIT 3;\n            '''\n            parameters = {\n                \"corpus\": \"romeoandjuliet\",\n                \"min_word_count\": 250,\n            }\n            result = warehouse.fetch_all(operation, parameters=parameters)\n        ```\n    \"\"\"\n    inputs = dict(\n        operation=operation,\n        parameters=parameters,\n        **execution_options,\n    )\n    new, cursor = self._get_cursor(inputs)\n    if new:\n        await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n    result = await run_sync_in_worker_thread(cursor.fetchall)\n    return result\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.fetch_many","title":"fetch_many async","text":"

Fetch a limited number of results from the database.

Repeated calls using the same inputs to any of the fetch methods of this block will skip executing the operation again, and instead, return the next set of results from the previous execution, until the reset_cursors method is called.

Parameters:

Name Type Description Default operation str

The SQL query or other operation to be executed.

required parameters Optional[Dict[str, Any]]

The parameters for the operation.

None size Optional[int]

The number of results to return; if None or 0, uses the value of fetch_size configured on the block.

None **execution_options Dict[str, Any]

Additional options to pass to connection.execute.

{}

Returns:

Type Description List[Row]

A list of tuples containing the data returned by the database, where each row is a tuple and each column is a value in the tuple.

Examples:

Execute operation with parameters, fetching two new rows at a time:

from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = %(corpus)s\n        AND word_count >= %(min_word_count)s\n        ORDER BY word_count DESC\n        LIMIT 6;\n    '''\n    parameters = {\n        \"corpus\": \"romeoandjuliet\",\n        \"min_word_count\": 250,\n    }\n    for _ in range(0, 3):\n        result = warehouse.fetch_many(\n            operation,\n            parameters=parameters,\n            size=2\n        )\n        print(result)\n

Source code in prefect_gcp/bigquery.py
@sync_compatible\nasync def fetch_many(\n    self,\n    operation: str,\n    parameters: Optional[Dict[str, Any]] = None,\n    size: Optional[int] = None,\n    **execution_options: Dict[str, Any],\n) -> List[\"Row\"]:\n    \"\"\"\n    Fetch a limited number of results from the database.\n\n    Repeated calls using the same inputs to *any* of the fetch methods of this\n    block will skip executing the operation again, and instead,\n    return the next set of results from the previous execution,\n    until the reset_cursors method is called.\n\n    Args:\n        operation: The SQL query or other operation to be executed.\n        parameters: The parameters for the operation.\n        size: The number of results to return; if None or 0, uses the value of\n            `fetch_size` configured on the block.\n        **execution_options: Additional options to pass to `connection.execute`.\n\n    Returns:\n        A list of tuples containing the data returned by the database,\n            where each row is a tuple and each column is a value in the tuple.\n\n    Examples:\n        Execute operation with parameters, fetching two new rows at a time:\n        ```python\n        from prefect_gcp.bigquery import BigQueryWarehouse\n\n        with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n            operation = '''\n                SELECT word, word_count\n                FROM `bigquery-public-data.samples.shakespeare`\n                WHERE corpus = %(corpus)s\n                AND word_count >= %(min_word_count)s\n                ORDER BY word_count DESC\n                LIMIT 6;\n            '''\n            parameters = {\n                \"corpus\": \"romeoandjuliet\",\n                \"min_word_count\": 250,\n            }\n            for _ in range(0, 3):\n                result = warehouse.fetch_many(\n                    operation,\n                    parameters=parameters,\n                    size=2\n                )\n                print(result)\n        ```\n    \"\"\"\n    inputs = dict(\n        operation=operation,\n        parameters=parameters,\n        **execution_options,\n    )\n    new, cursor = self._get_cursor(inputs)\n    if new:\n        await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n    size = size or self.fetch_size\n    result = await run_sync_in_worker_thread(cursor.fetchmany, size=size)\n    return result\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.fetch_one","title":"fetch_one async","text":"

Fetch a single result from the database.

Repeated calls using the same inputs to any of the fetch methods of this block will skip executing the operation again, and instead, return the next set of results from the previous execution, until the reset_cursors method is called.

Parameters:

Name Type Description Default operation str

The SQL query or other operation to be executed.

required parameters Optional[Dict[str, Any]]

The parameters for the operation.

None **execution_options Dict[str, Any]

Additional options to pass to connection.execute.

{}

Returns:

Type Description Row

A tuple containing the data returned by the database, where each row is a tuple and each column is a value in the tuple.

Examples:

Execute operation with parameters, fetching one new row at a time:

from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = %(corpus)s\n        AND word_count >= %(min_word_count)s\n        ORDER BY word_count DESC\n        LIMIT 3;\n    '''\n    parameters = {\n        \"corpus\": \"romeoandjuliet\",\n        \"min_word_count\": 250,\n    }\n    for _ in range(0, 3):\n        result = warehouse.fetch_one(operation, parameters=parameters)\n        print(result)\n

Source code in prefect_gcp/bigquery.py
@sync_compatible\nasync def fetch_one(\n    self,\n    operation: str,\n    parameters: Optional[Dict[str, Any]] = None,\n    **execution_options: Dict[str, Any],\n) -> \"Row\":\n    \"\"\"\n    Fetch a single result from the database.\n\n    Repeated calls using the same inputs to *any* of the fetch methods of this\n    block will skip executing the operation again, and instead,\n    return the next set of results from the previous execution,\n    until the reset_cursors method is called.\n\n    Args:\n        operation: The SQL query or other operation to be executed.\n        parameters: The parameters for the operation.\n        **execution_options: Additional options to pass to `connection.execute`.\n\n    Returns:\n        A tuple containing the data returned by the database,\n            where each row is a tuple and each column is a value in the tuple.\n\n    Examples:\n        Execute operation with parameters, fetching one new row at a time:\n        ```python\n        from prefect_gcp.bigquery import BigQueryWarehouse\n\n        with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n            operation = '''\n                SELECT word, word_count\n                FROM `bigquery-public-data.samples.shakespeare`\n                WHERE corpus = %(corpus)s\n                AND word_count >= %(min_word_count)s\n                ORDER BY word_count DESC\n                LIMIT 3;\n            '''\n            parameters = {\n                \"corpus\": \"romeoandjuliet\",\n                \"min_word_count\": 250,\n            }\n            for _ in range(0, 3):\n                result = warehouse.fetch_one(operation, parameters=parameters)\n                print(result)\n        ```\n    \"\"\"\n    inputs = dict(\n        operation=operation,\n        parameters=parameters,\n        **execution_options,\n    )\n    new, cursor = self._get_cursor(inputs)\n    if new:\n        await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n    result = await run_sync_in_worker_thread(cursor.fetchone)\n    return result\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.get_connection","title":"get_connection","text":"

Get the opened connection to BigQuery.

Source code in prefect_gcp/bigquery.py
def get_connection(self) -> \"Connection\":\n    \"\"\"\n    Get the opened connection to BigQuery.\n    \"\"\"\n    return self._connection\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.reset_cursors","title":"reset_cursors","text":"

Tries to close all opened cursors.

Source code in prefect_gcp/bigquery.py
def reset_cursors(self) -> None:\n    \"\"\"\n    Tries to close all opened cursors.\n    \"\"\"\n    input_hashes = tuple(self._unique_cursors.keys())\n    for input_hash in input_hashes:\n        cursor = self._unique_cursors.pop(input_hash)\n        try:\n            cursor.close()\n        except Exception as exc:\n            self.logger.warning(\n                f\"Failed to close cursor for input hash {input_hash!r}: {exc}\"\n            )\n
"},{"location":"bigquery/#prefect_gcp.bigquery-functions","title":"Functions","text":""},{"location":"bigquery/#prefect_gcp.bigquery.bigquery_create_table","title":"bigquery_create_table async","text":"

Creates table in BigQuery. Args: dataset: Name of a dataset in that the table will be created. table: Name of a table to create. schema: Schema to use when creating the table. gcp_credentials: Credentials to use for authentication with GCP. clustering_fields: List of fields to cluster the table by. time_partitioning: bigquery.TimePartitioning object specifying a partitioning of the newly created table project: Project to initialize the BigQuery Client with; if not provided, will default to the one inferred from your credentials. location: The location of the dataset that will be written to. external_config: The external data source. # noqa Returns: Table name. Example:

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_create_table\nfrom google.cloud.bigquery import SchemaField\n@flow\ndef example_bigquery_create_table_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    schema = [\n        SchemaField(\"number\", field_type=\"INTEGER\", mode=\"REQUIRED\"),\n        SchemaField(\"text\", field_type=\"STRING\", mode=\"REQUIRED\"),\n        SchemaField(\"bool\", field_type=\"BOOLEAN\")\n    ]\n    result = bigquery_create_table(\n        dataset=\"dataset\",\n        table=\"test_table\",\n        schema=schema,\n        gcp_credentials=gcp_credentials\n    )\n    return result\nexample_bigquery_create_table_flow()\n

Source code in prefect_gcp/bigquery.py
@task\nasync def bigquery_create_table(\n    dataset: str,\n    table: str,\n    gcp_credentials: GcpCredentials,\n    schema: Optional[List[\"SchemaField\"]] = None,\n    clustering_fields: List[str] = None,\n    time_partitioning: \"TimePartitioning\" = None,\n    project: Optional[str] = None,\n    location: str = \"US\",\n    external_config: Optional[\"ExternalConfig\"] = None,\n) -> str:\n    \"\"\"\n    Creates table in BigQuery.\n    Args:\n        dataset: Name of a dataset in that the table will be created.\n        table: Name of a table to create.\n        schema: Schema to use when creating the table.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        clustering_fields: List of fields to cluster the table by.\n        time_partitioning: `bigquery.TimePartitioning` object specifying a partitioning\n            of the newly created table\n        project: Project to initialize the BigQuery Client with; if\n            not provided, will default to the one inferred from your credentials.\n        location: The location of the dataset that will be written to.\n        external_config: The [external data source](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/bigquery_table#nested_external_data_configuration).  # noqa\n    Returns:\n        Table name.\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.bigquery import bigquery_create_table\n        from google.cloud.bigquery import SchemaField\n        @flow\n        def example_bigquery_create_table_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            schema = [\n                SchemaField(\"number\", field_type=\"INTEGER\", mode=\"REQUIRED\"),\n                SchemaField(\"text\", field_type=\"STRING\", mode=\"REQUIRED\"),\n                SchemaField(\"bool\", field_type=\"BOOLEAN\")\n            ]\n            result = bigquery_create_table(\n                dataset=\"dataset\",\n                table=\"test_table\",\n                schema=schema,\n                gcp_credentials=gcp_credentials\n            )\n            return result\n        example_bigquery_create_table_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Creating %s.%s\", dataset, table)\n\n    if not external_config and not schema:\n        raise ValueError(\"Either a schema or an external config must be provided.\")\n\n    client = gcp_credentials.get_bigquery_client(project=project, location=location)\n    try:\n        partial_get_dataset = partial(client.get_dataset, dataset)\n        dataset_ref = await to_thread.run_sync(partial_get_dataset)\n    except NotFound:\n        logger.debug(\"Dataset %s not found, creating\", dataset)\n        partial_create_dataset = partial(client.create_dataset, dataset)\n        dataset_ref = await to_thread.run_sync(partial_create_dataset)\n\n    table_ref = dataset_ref.table(table)\n    try:\n        partial_get_table = partial(client.get_table, table_ref)\n        await to_thread.run_sync(partial_get_table)\n        logger.info(\"%s.%s already exists\", dataset, table)\n    except NotFound:\n        logger.debug(\"Table %s not found, creating\", table)\n        table_obj = Table(table_ref, schema=schema)\n\n        # external data configuration\n        if external_config:\n            table_obj.external_data_configuration = external_config\n\n        # cluster for optimal data sorting/access\n        if clustering_fields:\n            table_obj.clustering_fields = clustering_fields\n\n        # partitioning\n        if time_partitioning:\n            table_obj.time_partitioning = time_partitioning\n\n        partial_create_table = partial(client.create_table, table_obj)\n        await to_thread.run_sync(partial_create_table)\n\n    return table\n
"},{"location":"bigquery/#prefect_gcp.bigquery.bigquery_insert_stream","title":"bigquery_insert_stream async","text":"

Insert records in a Google BigQuery table via the streaming API.

Parameters:

Name Type Description Default dataset str

Name of a dataset where the records will be written to.

required table str

Name of a table to write to.

required records List[dict]

The list of records to insert as rows into the BigQuery table; each item in the list should be a dictionary whose keys correspond to columns in the table.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required project Optional[str]

The project to initialize the BigQuery Client with; if not provided, will default to the one inferred from your credentials.

None location str

Location of the dataset that will be written to.

'US'

Returns:

Type Description List

List of inserted rows.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_insert_stream\nfrom google.cloud.bigquery import SchemaField\n\n@flow\ndef example_bigquery_insert_stream_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    records = [\n        {\"number\": 1, \"text\": \"abc\", \"bool\": True},\n        {\"number\": 2, \"text\": \"def\", \"bool\": False},\n    ]\n    result = bigquery_insert_stream(\n        dataset=\"integrations\",\n        table=\"test_table\",\n        records=records,\n        gcp_credentials=gcp_credentials\n    )\n    return result\n\nexample_bigquery_insert_stream_flow()\n
Source code in prefect_gcp/bigquery.py
@task\nasync def bigquery_insert_stream(\n    dataset: str,\n    table: str,\n    records: List[dict],\n    gcp_credentials: GcpCredentials,\n    project: Optional[str] = None,\n    location: str = \"US\",\n) -> List:\n    \"\"\"\n    Insert records in a Google BigQuery table via the [streaming\n    API](https://cloud.google.com/bigquery/streaming-data-into-bigquery).\n\n    Args:\n        dataset: Name of a dataset where the records will be written to.\n        table: Name of a table to write to.\n        records: The list of records to insert as rows into the BigQuery table;\n            each item in the list should be a dictionary whose keys correspond to\n            columns in the table.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        project: The project to initialize the BigQuery Client with; if\n            not provided, will default to the one inferred from your credentials.\n        location: Location of the dataset that will be written to.\n\n    Returns:\n        List of inserted rows.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.bigquery import bigquery_insert_stream\n        from google.cloud.bigquery import SchemaField\n\n        @flow\n        def example_bigquery_insert_stream_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            records = [\n                {\"number\": 1, \"text\": \"abc\", \"bool\": True},\n                {\"number\": 2, \"text\": \"def\", \"bool\": False},\n            ]\n            result = bigquery_insert_stream(\n                dataset=\"integrations\",\n                table=\"test_table\",\n                records=records,\n                gcp_credentials=gcp_credentials\n            )\n            return result\n\n        example_bigquery_insert_stream_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Inserting into %s.%s as a stream\", dataset, table)\n\n    client = gcp_credentials.get_bigquery_client(project=project, location=location)\n    table_ref = client.dataset(dataset).table(table)\n    partial_insert = partial(\n        client.insert_rows_json, table=table_ref, json_rows=records\n    )\n    response = await to_thread.run_sync(partial_insert)\n\n    errors = []\n    output = []\n    for row in response:\n        output.append(row)\n        if \"errors\" in row:\n            errors.append(row[\"errors\"])\n\n    if errors:\n        raise ValueError(errors)\n\n    return output\n
"},{"location":"bigquery/#prefect_gcp.bigquery.bigquery_load_cloud_storage","title":"bigquery_load_cloud_storage async","text":"

Run method for this Task. Invoked by calling this Task within a Flow context, after initialization. Args: uri: GCS path to load data from. dataset: The id of a destination dataset to write the records to. table: The name of a destination table to write the records to. gcp_credentials: Credentials to use for authentication with GCP. schema: The schema to use when creating the table. job_config: Dictionary of job configuration parameters; note that the parameters provided here must be pickleable (e.g., dataset references will be rejected). project: The project to initialize the BigQuery Client with; if not provided, will default to the one inferred from your credentials. location: Location of the dataset that will be written to.

Returns:

Type Description LoadJob

The response from load_table_from_uri.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_load_cloud_storage\n\n@flow\ndef example_bigquery_load_cloud_storage_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    result = bigquery_load_cloud_storage(\n        dataset=\"dataset\",\n        table=\"test_table\",\n        uri=\"uri\",\n        gcp_credentials=gcp_credentials\n    )\n    return result\n\nexample_bigquery_load_cloud_storage_flow()\n
Source code in prefect_gcp/bigquery.py
@task\nasync def bigquery_load_cloud_storage(\n    dataset: str,\n    table: str,\n    uri: str,\n    gcp_credentials: GcpCredentials,\n    schema: Optional[List[\"SchemaField\"]] = None,\n    job_config: Optional[dict] = None,\n    project: Optional[str] = None,\n    location: str = \"US\",\n) -> \"LoadJob\":\n    \"\"\"\n    Run method for this Task.  Invoked by _calling_ this\n    Task within a Flow context, after initialization.\n    Args:\n        uri: GCS path to load data from.\n        dataset: The id of a destination dataset to write the records to.\n        table: The name of a destination table to write the records to.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        schema: The schema to use when creating the table.\n        job_config: Dictionary of job configuration parameters;\n            note that the parameters provided here must be pickleable\n            (e.g., dataset references will be rejected).\n        project: The project to initialize the BigQuery Client with; if\n            not provided, will default to the one inferred from your credentials.\n        location: Location of the dataset that will be written to.\n\n    Returns:\n        The response from `load_table_from_uri`.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.bigquery import bigquery_load_cloud_storage\n\n        @flow\n        def example_bigquery_load_cloud_storage_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            result = bigquery_load_cloud_storage(\n                dataset=\"dataset\",\n                table=\"test_table\",\n                uri=\"uri\",\n                gcp_credentials=gcp_credentials\n            )\n            return result\n\n        example_bigquery_load_cloud_storage_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Loading into %s.%s from cloud storage\", dataset, table)\n\n    client = gcp_credentials.get_bigquery_client(project=project, location=location)\n    table_ref = client.dataset(dataset).table(table)\n\n    job_config = job_config or {}\n    if \"autodetect\" not in job_config:\n        job_config[\"autodetect\"] = True\n    job_config = LoadJobConfig(**job_config)\n    if schema:\n        job_config.schema = schema\n\n    result = None\n    try:\n        partial_load = partial(\n            _result_sync,\n            client.load_table_from_uri,\n            uri,\n            table_ref,\n            job_config=job_config,\n        )\n        result = await to_thread.run_sync(partial_load)\n    except Exception as exception:\n        logger.exception(exception)\n        if result is not None and result.errors is not None:\n            for error in result.errors:\n                logger.exception(error)\n        raise\n\n    if result is not None:\n        # remove unpickleable attributes\n        result._client = None\n        result._completion_lock = None\n\n    return result\n
"},{"location":"bigquery/#prefect_gcp.bigquery.bigquery_load_file","title":"bigquery_load_file async","text":"

Loads file into BigQuery.

Parameters:

Name Type Description Default dataset str

ID of a destination dataset to write the records to; if not provided here, will default to the one provided at initialization.

required table str

Name of a destination table to write the records to; if not provided here, will default to the one provided at initialization.

required path Union[str, Path]

A string or path-like object of the file to be loaded.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required schema Optional[List[SchemaField]]

Schema to use when creating the table.

None job_config Optional[dict]

An optional dictionary of job configuration parameters; note that the parameters provided here must be pickleable (e.g., dataset references will be rejected).

None rewind bool

if True, seek to the beginning of the file handle before reading the file.

False size Optional[int]

Number of bytes to read from the file handle. If size is None or large, resumable upload will be used. Otherwise, multipart upload will be used.

None project Optional[str]

Project to initialize the BigQuery Client with; if not provided, will default to the one inferred from your credentials.

None location str

location of the dataset that will be written to.

'US'

Returns:

Type Description LoadJob

The response from load_table_from_file.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_load_file\nfrom google.cloud.bigquery import SchemaField\n\n@flow\ndef example_bigquery_load_file_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    result = bigquery_load_file(\n        dataset=\"dataset\",\n        table=\"test_table\",\n        path=\"path\",\n        gcp_credentials=gcp_credentials\n    )\n    return result\n\nexample_bigquery_load_file_flow()\n
Source code in prefect_gcp/bigquery.py
@task\nasync def bigquery_load_file(\n    dataset: str,\n    table: str,\n    path: Union[str, Path],\n    gcp_credentials: GcpCredentials,\n    schema: Optional[List[\"SchemaField\"]] = None,\n    job_config: Optional[dict] = None,\n    rewind: bool = False,\n    size: Optional[int] = None,\n    project: Optional[str] = None,\n    location: str = \"US\",\n) -> \"LoadJob\":\n    \"\"\"\n    Loads file into BigQuery.\n\n    Args:\n        dataset: ID of a destination dataset to write the records to;\n            if not provided here, will default to the one provided at initialization.\n        table: Name of a destination table to write the records to;\n            if not provided here, will default to the one provided at initialization.\n        path: A string or path-like object of the file to be loaded.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        schema: Schema to use when creating the table.\n        job_config: An optional dictionary of job configuration parameters;\n            note that the parameters provided here must be pickleable\n            (e.g., dataset references will be rejected).\n        rewind: if True, seek to the beginning of the file handle\n            before reading the file.\n        size: Number of bytes to read from the file handle. If size is None or large,\n            resumable upload will be used. Otherwise, multipart upload will be used.\n        project: Project to initialize the BigQuery Client with; if\n            not provided, will default to the one inferred from your credentials.\n        location: location of the dataset that will be written to.\n\n    Returns:\n        The response from `load_table_from_file`.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.bigquery import bigquery_load_file\n        from google.cloud.bigquery import SchemaField\n\n        @flow\n        def example_bigquery_load_file_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            result = bigquery_load_file(\n                dataset=\"dataset\",\n                table=\"test_table\",\n                path=\"path\",\n                gcp_credentials=gcp_credentials\n            )\n            return result\n\n        example_bigquery_load_file_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Loading into %s.%s from file\", dataset, table)\n\n    if not os.path.exists(path):\n        raise ValueError(f\"{path} does not exist\")\n    elif not os.path.isfile(path):\n        raise ValueError(f\"{path} is not a file\")\n\n    client = gcp_credentials.get_bigquery_client(project=project)\n    table_ref = client.dataset(dataset).table(table)\n\n    job_config = job_config or {}\n    if \"autodetect\" not in job_config:\n        job_config[\"autodetect\"] = True\n        # TODO: test if autodetect is needed when schema is passed\n    job_config = LoadJobConfig(**job_config)\n    if schema:\n        # TODO: test if schema can be passed directly in job_config\n        job_config.schema = schema\n\n    try:\n        with open(path, \"rb\") as file_obj:\n            partial_load = partial(\n                _result_sync,\n                client.load_table_from_file,\n                file_obj,\n                table_ref,\n                rewind=rewind,\n                size=size,\n                location=location,\n                job_config=job_config,\n            )\n            result = await to_thread.run_sync(partial_load)\n    except IOError:\n        logger.exception(f\"Could not open and read from {path}\")\n        raise\n\n    if result is not None:\n        # remove unpickleable attributes\n        result._client = None\n        result._completion_lock = None\n\n    return result\n
"},{"location":"bigquery/#prefect_gcp.bigquery.bigquery_query","title":"bigquery_query async","text":"

Runs a BigQuery query.

Parameters:

Name Type Description Default query str

String of the query to execute.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required query_params Optional[List[tuple]]

List of 3-tuples specifying BigQuery query parameters; currently only scalar query parameters are supported. See the Google documentation for more details on how both the query and the query parameters should be formatted.

None dry_run_max_bytes Optional[int]

If provided, the maximum number of bytes the query is allowed to process; this will be determined by executing a dry run and raising a ValueError if the maximum is exceeded.

None dataset Optional[str]

Name of a destination dataset to write the query results to, if you don't want them returned; if provided, table must also be provided.

None table Optional[str]

Name of a destination table to write the query results to, if you don't want them returned; if provided, dataset must also be provided.

None to_dataframe bool

If provided, returns the results of the query as a pandas dataframe instead of a list of bigquery.table.Row objects.

False job_config Optional[dict]

Dictionary of job configuration parameters; note that the parameters provided here must be pickleable (e.g., dataset references will be rejected).

None project Optional[str]

The project to initialize the BigQuery Client with; if not provided, will default to the one inferred from your credentials.

None result_transformer Optional[Callable[[List[Row]], Any]]

Function that can be passed to transform the result of a query before returning. The function will be passed the list of rows returned by BigQuery for the given query.

None location str

Location of the dataset that will be queried.

'US'

Returns:

Type Description Any

A list of rows, or pandas DataFrame if to_dataframe,

Any

matching the query criteria.

Example

Queries the public names database, returning 10 results.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_query\n\n@flow\ndef example_bigquery_query_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\",\n        project=\"project\"\n    )\n    query = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = @corpus\n        AND word_count >= @min_word_count\n        ORDER BY word_count DESC;\n    '''\n    query_params = [\n        (\"corpus\", \"STRING\", \"romeoandjuliet\"),\n        (\"min_word_count\", \"INT64\", 250)\n    ]\n    result = bigquery_query(\n        query, gcp_credentials, query_params=query_params\n    )\n    return result\n\nexample_bigquery_query_flow()\n

Source code in prefect_gcp/bigquery.py
@task\nasync def bigquery_query(\n    query: str,\n    gcp_credentials: GcpCredentials,\n    query_params: Optional[List[tuple]] = None,  # 3-tuples\n    dry_run_max_bytes: Optional[int] = None,\n    dataset: Optional[str] = None,\n    table: Optional[str] = None,\n    to_dataframe: bool = False,\n    job_config: Optional[dict] = None,\n    project: Optional[str] = None,\n    result_transformer: Optional[Callable[[List[\"Row\"]], Any]] = None,\n    location: str = \"US\",\n) -> Any:\n    \"\"\"\n    Runs a BigQuery query.\n\n    Args:\n        query: String of the query to execute.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        query_params: List of 3-tuples specifying BigQuery query parameters; currently\n            only scalar query parameters are supported.  See the\n            [Google documentation](https://cloud.google.com/bigquery/docs/parameterized-queries#bigquery-query-params-python)\n            for more details on how both the query and the query parameters should be formatted.\n        dry_run_max_bytes: If provided, the maximum number of bytes the query\n            is allowed to process; this will be determined by executing a dry run\n            and raising a `ValueError` if the maximum is exceeded.\n        dataset: Name of a destination dataset to write the query results to,\n            if you don't want them returned; if provided, `table` must also be provided.\n        table: Name of a destination table to write the query results to,\n            if you don't want them returned; if provided, `dataset` must also be provided.\n        to_dataframe: If provided, returns the results of the query as a pandas\n            dataframe instead of a list of `bigquery.table.Row` objects.\n        job_config: Dictionary of job configuration parameters;\n            note that the parameters provided here must be pickleable\n            (e.g., dataset references will be rejected).\n        project: The project to initialize the BigQuery Client with; if not\n            provided, will default to the one inferred from your credentials.\n        result_transformer: Function that can be passed to transform the result of a query before returning. The function will be passed the list of rows returned by BigQuery for the given query.\n        location: Location of the dataset that will be queried.\n\n    Returns:\n        A list of rows, or pandas DataFrame if to_dataframe,\n        matching the query criteria.\n\n    Example:\n        Queries the public names database, returning 10 results.\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.bigquery import bigquery_query\n\n        @flow\n        def example_bigquery_query_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\",\n                project=\"project\"\n            )\n            query = '''\n                SELECT word, word_count\n                FROM `bigquery-public-data.samples.shakespeare`\n                WHERE corpus = @corpus\n                AND word_count >= @min_word_count\n                ORDER BY word_count DESC;\n            '''\n            query_params = [\n                (\"corpus\", \"STRING\", \"romeoandjuliet\"),\n                (\"min_word_count\", \"INT64\", 250)\n            ]\n            result = bigquery_query(\n                query, gcp_credentials, query_params=query_params\n            )\n            return result\n\n        example_bigquery_query_flow()\n        ```\n    \"\"\"  # noqa\n    logger = get_run_logger()\n    logger.info(\"Running BigQuery query\")\n\n    client = gcp_credentials.get_bigquery_client(project=project, location=location)\n\n    # setup job config\n    job_config = QueryJobConfig(**job_config or {})\n    if query_params is not None:\n        job_config.query_parameters = [ScalarQueryParameter(*qp) for qp in query_params]\n\n    # perform dry_run if requested\n    if dry_run_max_bytes is not None:\n        saved_info = dict(\n            dry_run=job_config.dry_run, use_query_cache=job_config.use_query_cache\n        )\n        job_config.dry_run = True\n        job_config.use_query_cache = False\n        partial_query = partial(client.query, query, job_config=job_config)\n        response = await to_thread.run_sync(partial_query)\n        total_bytes_processed = response.total_bytes_processed\n        if total_bytes_processed > dry_run_max_bytes:\n            raise RuntimeError(\n                f\"Query will process {total_bytes_processed} bytes which is above \"\n                f\"the set maximum of {dry_run_max_bytes} for this task.\"\n            )\n        job_config.dry_run = saved_info[\"dry_run\"]\n        job_config.use_query_cache = saved_info[\"use_query_cache\"]\n\n    # if writing to a destination table\n    if dataset is not None:\n        table_ref = client.dataset(dataset).table(table)\n        job_config.destination = table_ref\n\n    partial_query = partial(\n        _result_sync,\n        client.query,\n        query,\n        job_config=job_config,\n    )\n    result = await to_thread.run_sync(partial_query)\n\n    if to_dataframe:\n        return result.to_dataframe()\n    else:\n        if result_transformer:\n            return result_transformer(result)\n        else:\n            return list(result)\n
"},{"location":"blocks_catalog/","title":"Blocks Catalog","text":"

Below is a list of Blocks available for registration in prefect-gcp.

To register blocks in this module to view and edit them on Prefect Cloud, first install the required packages, then

prefect block register -m prefect_gcp\n
Note, to use the load method on Blocks, you must already have a block document saved through code or saved through the UI.

"},{"location":"blocks_catalog/#credentials-module","title":"Credentials Module","text":"

GcpCredentials

Block used to manage authentication with GCP. Google authentication is handled via the google.oauth2 module or through the CLI. Specify either one of service account_file or service_account_info; if both are not specified, the client will try to detect the credentials following Google's Application Default Credentials. See Google's Authentication documentation for details on inference and recommended authentication patterns.

To load the GcpCredentials:

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow\ndef my_flow():\n    my_block = GcpCredentials.load(\"MY_BLOCK_NAME\")\n\nmy_flow()\n
For additional examples, check out the Credentials Module under Examples Catalog.

"},{"location":"blocks_catalog/#aiplatform-module","title":"Aiplatform Module","text":"

VertexAICustomTrainingJob

Infrastructure block used to run Vertex AI custom training jobs.

To load the VertexAICustomTrainingJob:

from prefect import flow\nfrom prefect_gcp.aiplatform import VertexAICustomTrainingJob\n\n@flow\ndef my_flow():\n    my_block = VertexAICustomTrainingJob.load(\"MY_BLOCK_NAME\")\n\nmy_flow()\n
For additional examples, check out the Aiplatform Module under Examples Catalog.

"},{"location":"blocks_catalog/#bigquery-module","title":"Bigquery Module","text":"

BigQueryWarehouse

A block for querying a database with BigQuery.

Upon instantiating, a connection to BigQuery is established and maintained for the life of the object until the close method is called.

It is recommended to use this block as a context manager, which will automatically close the connection and its cursors when the context is exited.

It is also recommended that this block is loaded and consumed within a single task or flow because if the block is passed across separate tasks and flows, the state of the block's connection and cursor could be lost.

To load the BigQueryWarehouse:

from prefect import flow\nfrom prefect_gcp.bigquery import BigQueryWarehouse\n\n@flow\ndef my_flow():\n    my_block = BigQueryWarehouse.load(\"MY_BLOCK_NAME\")\n\nmy_flow()\n
For additional examples, check out the Bigquery Module under Examples Catalog.

"},{"location":"blocks_catalog/#cloud-run-module","title":"Cloud Run Module","text":"

CloudRunJob

Infrastructure block used to run GCP Cloud Run Jobs. Note this block is experimental. The interface may change without notice.

To load the CloudRunJob:

from prefect import flow\nfrom prefect_gcp.cloud_run import CloudRunJob\n\n@flow\ndef my_flow():\n    my_block = CloudRunJob.load(\"MY_BLOCK_NAME\")\n\nmy_flow()\n
For additional examples, check out the Cloud Run Module under Examples Catalog.

"},{"location":"blocks_catalog/#cloud-storage-module","title":"Cloud Storage Module","text":"

GcsBucket

Block used to store data using GCP Cloud Storage Buckets.

Note! GcsBucket in prefect-gcp is a unique block, separate from GCS in core Prefect. GcsBucket does not use gcsfs under the hood, instead using the google-cloud-storage package, and offers more configuration and functionality.

To load the GcsBucket:

from prefect import flow\nfrom prefect_gcp.cloud_storage import GcsBucket\n\n@flow\ndef my_flow():\n    my_block = GcsBucket.load(\"MY_BLOCK_NAME\")\n\nmy_flow()\n
For additional examples, check out the Cloud Storage Module under Examples Catalog.

"},{"location":"blocks_catalog/#secret-manager-module","title":"Secret Manager Module","text":"

GcpSecret

Manages a secret in Google Cloud Platform's Secret Manager.

To load the GcpSecret:

from prefect import flow\nfrom prefect_gcp.secret_manager import GcpSecret\n\n@flow\ndef my_flow():\n    my_block = GcpSecret.load(\"MY_BLOCK_NAME\")\n\nmy_flow()\n
For additional examples, check out the Secret Manager Module under Examples Catalog.

"},{"location":"cloud_run/","title":"Cloud Run","text":""},{"location":"cloud_run/#prefect_gcp.cloud_run","title":"prefect_gcp.cloud_run","text":"

Integrations with Google Cloud Run Job.

Note this module is experimental. The intefaces within may change without notice.

Examples:

Run a job using Google Cloud Run Jobs:\n```python\nCloudRunJob(\n    image=\"gcr.io/my-project/my-image\",\n    region=\"us-east1\",\n    credentials=my_gcp_credentials\n).run()\n```\n\nRun a job that runs the command `echo hello world` using Google Cloud Run Jobs:\n```python\nCloudRunJob(\n    image=\"gcr.io/my-project/my-image\",\n    region=\"us-east1\",\n    credentials=my_gcp_credentials\n    command=[\"echo\", \"hello world\"]\n).run()\n```\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run-classes","title":"Classes","text":""},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob","title":"CloudRunJob","text":"

Bases: Infrastructure

Infrastructure block used to run GCP Cloud Run Jobs.

Project name information is provided by the Credentials object, and should always be correct as long as the Credentials object is for the correct project.

Note this block is experimental. The interface may change without notice.

Source code in prefect_gcp/cloud_run.py
class CloudRunJob(Infrastructure):\n    \"\"\"\n    <span class=\"badge-api experimental\"/>\n\n    Infrastructure block used to run GCP Cloud Run Jobs.\n\n    Project name information is provided by the Credentials object, and should always\n    be correct as long as the Credentials object is for the correct project.\n\n    Note this block is experimental. The interface may change without notice.\n    \"\"\"\n\n    _block_type_slug = \"cloud-run-job\"\n    _block_type_name = \"GCP Cloud Run Job\"\n    _description = \"Infrastructure block used to run GCP Cloud Run Jobs. Note this block is experimental. The interface may change without notice.\"  # noqa\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/cloud_run/#prefect_gcp.cloud_run.CloudRunJob\"  # noqa: E501\n\n    type: Literal[\"cloud-run-job\"] = Field(\n        \"cloud-run-job\", description=\"The slug for this task type.\"\n    )\n    image: str = Field(\n        ...,\n        title=\"Image Name\",\n        description=(\n            \"The image to use for a new Cloud Run Job. This value must \"\n            \"refer to an image within either Google Container Registry \"\n            \"or Google Artifact Registry, like `gcr.io/<project_name>/<repo>/`.\"\n        ),\n    )\n    region: str = Field(..., description=\"The region where the Cloud Run Job resides.\")\n    credentials: GcpCredentials  # cannot be Field; else it shows as Json\n\n    # Job settings\n    cpu: Optional[int] = Field(\n        default=None,\n        title=\"CPU\",\n        description=(\n            \"The amount of compute allocated to the Cloud Run Job. \"\n            \"The int must be valid based on the rules specified at \"\n            \"https://cloud.google.com/run/docs/configuring/cpu#setting-jobs .\"\n        ),\n    )\n    memory: Optional[int] = Field(\n        default=None,\n        title=\"Memory\",\n        description=\"The amount of memory allocated to the Cloud Run Job.\",\n    )\n    memory_unit: Optional[Literal[\"G\", \"Gi\", \"M\", \"Mi\"]] = Field(\n        default=None,\n        title=\"Memory Units\",\n        description=(\n            \"The unit of memory. See \"\n            \"https://cloud.google.com/run/docs/configuring/memory-limits#setting \"\n            \"for additional details.\"\n        ),\n    )\n    vpc_connector_name: Optional[str] = Field(\n        default=None,\n        title=\"VPC Connector Name\",\n        description=\"The name of the VPC connector to use for the Cloud Run Job.\",\n    )\n    args: Optional[List[str]] = Field(\n        default=None,\n        description=(\n            \"Arguments to be passed to your Cloud Run Job's entrypoint command.\"\n        ),\n    )\n    env: Dict[str, str] = Field(\n        default_factory=dict,\n        description=\"Environment variables to be passed to your Cloud Run Job.\",\n    )\n\n    # Cleanup behavior\n    keep_job: Optional[bool] = Field(\n        default=False,\n        title=\"Keep Job After Completion\",\n        description=\"Keep the completed Cloud Run Job on Google Cloud Platform.\",\n    )\n    timeout: Optional[int] = Field(\n        default=600,\n        gt=0,\n        le=3600,\n        title=\"Job Timeout\",\n        description=(\n            \"The length of time that Prefect will wait for a Cloud Run Job to complete \"\n            \"before raising an exception.\"\n        ),\n    )\n    max_retries: Optional[int] = Field(\n        default=3,\n        ge=0,\n        le=10,\n        title=\"Max Retries\",\n        description=(\n            \"The maximum retries setting specifies the number of times a task is \"\n            \"allowed to restart in case of failure before being failed permanently.\"\n        ),\n    )\n    # For private use\n    _job_name: str = None\n    _execution: Optional[Execution] = None\n\n    @property\n    def job_name(self):\n        \"\"\"Create a unique and valid job name.\"\"\"\n\n        if self._job_name is None:\n            # get `repo` from `gcr.io/<project_name>/repo/other`\n            components = self.image.split(\"/\")\n            image_name = components[2]\n            # only alphanumeric and '-' allowed for a job name\n            modified_image_name = image_name.replace(\":\", \"-\").replace(\".\", \"-\")\n            # make 50 char limit for final job name, which will be '<name>-<uuid>'\n            if len(modified_image_name) > 17:\n                modified_image_name = modified_image_name[:17]\n            name = f\"{modified_image_name}-{uuid4().hex}\"\n            self._job_name = name\n\n        return self._job_name\n\n    @property\n    def memory_string(self):\n        \"\"\"Returns the string expected for memory resources argument.\"\"\"\n        if self.memory and self.memory_unit:\n            return str(self.memory) + self.memory_unit\n        return None\n\n    @validator(\"image\")\n    def _remove_image_spaces(cls, value):\n        \"\"\"Deal with spaces in image names.\"\"\"\n        if value is not None:\n            return value.strip()\n\n    @root_validator\n    def _check_valid_memory(cls, values):\n        \"\"\"Make sure memory conforms to expected values for API.\n        See: https://cloud.google.com/run/docs/configuring/memory-limits#setting\n        \"\"\"  # noqa\n        if (values.get(\"memory\") is not None and values.get(\"memory_unit\") is None) or (\n            values.get(\"memory_unit\") is not None and values.get(\"memory\") is None\n        ):\n            raise ValueError(\n                \"A memory value and unit must both be supplied to specify a memory\"\n                \" value other than the default memory value.\"\n            )\n        return values\n\n    def get_corresponding_worker_type(self) -> str:\n        \"\"\"Return the corresponding worker type for this infrastructure block.\"\"\"\n        return \"cloud-run\"\n\n    async def generate_work_pool_base_job_template(self) -> dict:\n        \"\"\"\n        Generate a base job template for a cloud-run work pool with the same\n        configuration as this block.\n\n        Returns:\n            - dict: a base job template for a cloud-run work pool\n        \"\"\"\n        base_job_template = await get_default_base_job_template_for_infrastructure_type(\n            self.get_corresponding_worker_type(),\n        )\n        assert (\n            base_job_template is not None\n        ), \"Failed to generate default base job template for Cloud Run worker.\"\n        for key, value in self.dict(exclude_unset=True, exclude_defaults=True).items():\n            if key == \"command\":\n                base_job_template[\"variables\"][\"properties\"][\"command\"][\n                    \"default\"\n                ] = shlex.join(value)\n            elif key in [\n                \"type\",\n                \"block_type_slug\",\n                \"_block_document_id\",\n                \"_block_document_name\",\n                \"_is_anonymous\",\n                \"memory_unit\",\n            ]:\n                continue\n            elif key == \"credentials\":\n                if not self.credentials._block_document_id:\n                    raise BlockNotSavedError(\n                        \"It looks like you are trying to use a block that\"\n                        \" has not been saved. Please call `.save` on your block\"\n                        \" before publishing it as a work pool.\"\n                    )\n                base_job_template[\"variables\"][\"properties\"][\"credentials\"][\n                    \"default\"\n                ] = {\n                    \"$ref\": {\n                        \"block_document_id\": str(self.credentials._block_document_id)\n                    }\n                }\n            elif key == \"memory\" and self.memory_string:\n                base_job_template[\"variables\"][\"properties\"][\"memory\"][\n                    \"default\"\n                ] = self.memory_string\n            elif key == \"cpu\" and self.cpu is not None:\n                base_job_template[\"variables\"][\"properties\"][\"cpu\"][\n                    \"default\"\n                ] = f\"{self.cpu * 1000}m\"\n            elif key == \"args\":\n                # Not a default variable, but we can add it to the template\n                base_job_template[\"variables\"][\"properties\"][\"args\"] = {\n                    \"title\": \"Arguments\",\n                    \"type\": \"string\",\n                    \"description\": \"Arguments to be passed to your Cloud Run Job's entrypoint command.\",  # noqa\n                    \"default\": value,\n                }\n                base_job_template[\"job_configuration\"][\"job_body\"][\"spec\"][\"template\"][\n                    \"spec\"\n                ][\"template\"][\"spec\"][\"containers\"][0][\"args\"] = \"{{ args }}\"\n            elif key in base_job_template[\"variables\"][\"properties\"]:\n                base_job_template[\"variables\"][\"properties\"][key][\"default\"] = value\n            else:\n                self.logger.warning(\n                    f\"Variable {key!r} is not supported by Cloud Run work pools.\"\n                    \" Skipping.\"\n                )\n\n        return base_job_template\n\n    def _create_job_error(self, exc):\n        \"\"\"Provides a nicer error for 404s when trying to create a Cloud Run Job.\"\"\"\n        # TODO consider lookup table instead of the if/else,\n        # also check for documented errors\n        if exc.status_code == 404:\n            raise RuntimeError(\n                f\"Failed to find resources at {exc.uri}. Confirm that region\"\n                f\" '{self.region}' is the correct region for your Cloud Run Job and\"\n                f\" that {self.credentials.project} is the correct GCP project. If\"\n                f\" your project ID is not correct, you are using a Credentials block\"\n                f\" with permissions for the wrong project.\"\n            ) from exc\n        raise exc\n\n    def _job_run_submission_error(self, exc):\n        \"\"\"Provides a nicer error for 404s when submitting job runs.\"\"\"\n        if exc.status_code == 404:\n            pat1 = r\"The requested URL [^ ]+ was not found on this server\"\n            # pat2 = (\n            #     r\"Resource '[^ ]+' of kind 'JOB' in region '[\\w\\-0-9]+' \"\n            #     r\"in project '[\\w\\-0-9]+' does not exist\"\n            # )\n            if re.findall(pat1, str(exc)):\n                raise RuntimeError(\n                    f\"Failed to find resources at {exc.uri}. \"\n                    f\"Confirm that region '{self.region}' is \"\n                    f\"the correct region for your Cloud Run Job \"\n                    f\"and that '{self.credentials.project}' is the \"\n                    f\"correct GCP project. If your project ID is not \"\n                    f\"correct, you are using a Credentials \"\n                    f\"block with permissions for the wrong project.\"\n                ) from exc\n            else:\n                raise exc\n\n        raise exc\n\n    def _cpu_as_k8s_quantity(self) -> str:\n        \"\"\"Return the CPU integer in the format expected by GCP Cloud Run Jobs API.\n        See: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/\n        See also: https://cloud.google.com/run/docs/configuring/cpu#setting-jobs\n        \"\"\"  # noqa\n        return str(self.cpu * 1000) + \"m\"\n\n    @sync_compatible\n    async def run(self, task_status: Optional[TaskStatus] = None):\n        \"\"\"Run the configured job on a Google Cloud Run Job.\"\"\"\n        with self._get_client() as client:\n            await run_sync_in_worker_thread(\n                self._create_job_and_wait_for_registration, client\n            )\n            job_execution = await run_sync_in_worker_thread(\n                self._begin_job_execution, client\n            )\n\n            if task_status:\n                task_status.started(self.job_name)\n\n            result = await run_sync_in_worker_thread(\n                self._watch_job_execution_and_get_result,\n                client,\n                job_execution,\n                5,\n            )\n            return result\n\n    @sync_compatible\n    async def kill(self, identifier: str, grace_seconds: int = 30) -> None:\n        \"\"\"\n        Kill a task running Cloud Run.\n\n        Args:\n            identifier: The Cloud Run Job name. This should match a\n                value yielded by CloudRunJob.run.\n        \"\"\"\n        if grace_seconds != 30:\n            self.logger.warning(\n                f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n                \"support dynamic grace period configuration. See here for more info: \"\n                \"https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs/delete\"  # noqa\n            )\n\n        with self._get_client() as client:\n            await run_sync_in_worker_thread(\n                self._kill_job,\n                client=client,\n                namespace=self.credentials.project,\n                job_name=identifier,\n            )\n\n    def _kill_job(self, client: Resource, namespace: str, job_name: str) -> None:\n        \"\"\"\n        Thin wrapper around Job.delete, wrapping a try/except since\n        Job is an independent class that doesn't have knowledge of\n        CloudRunJob and its associated logic.\n        \"\"\"\n        try:\n            Job.delete(client=client, namespace=namespace, job_name=job_name)\n        except Exception as exc:\n            if \"does not exist\" in str(exc):\n                raise InfrastructureNotFound(\n                    f\"Cannot stop Cloud Run Job; the job name {job_name!r} \"\n                    \"could not be found.\"\n                ) from exc\n            raise\n\n    def _create_job_and_wait_for_registration(self, client: Resource) -> None:\n        \"\"\"Create a new job wait for it to finish registering.\"\"\"\n        try:\n            self.logger.info(f\"Creating Cloud Run Job {self.job_name}\")\n            Job.create(\n                client=client,\n                namespace=self.credentials.project,\n                body=self._jobs_body(),\n            )\n        except googleapiclient.errors.HttpError as exc:\n            self._create_job_error(exc)\n\n        try:\n            self._wait_for_job_creation(client=client, timeout=self.timeout)\n        except Exception:\n            self.logger.exception(\n                \"Encountered an exception while waiting for job run creation\"\n            )\n            if not self.keep_job:\n                self.logger.info(\n                    f\"Deleting Cloud Run Job {self.job_name} from Google Cloud Run.\"\n                )\n                try:\n                    Job.delete(\n                        client=client,\n                        namespace=self.credentials.project,\n                        job_name=self.job_name,\n                    )\n                except Exception:\n                    self.logger.exception(\n                        \"Received an unexpected exception while attempting to delete\"\n                        f\" Cloud Run Job {self.job_name!r}\"\n                    )\n            raise\n\n    def _begin_job_execution(self, client: Resource) -> Execution:\n        \"\"\"Submit a job run for execution and return the execution object.\"\"\"\n        try:\n            self.logger.info(\n                f\"Submitting Cloud Run Job {self.job_name!r} for execution.\"\n            )\n            submission = Job.run(\n                client=client,\n                namespace=self.credentials.project,\n                job_name=self.job_name,\n            )\n\n            job_execution = Execution.get(\n                client=client,\n                namespace=submission[\"metadata\"][\"namespace\"],\n                execution_name=submission[\"metadata\"][\"name\"],\n            )\n\n            command = (\n                \" \".join(self.command) if self.command else \"default container command\"\n            )\n\n            self.logger.info(\n                f\"Cloud Run Job {self.job_name!r}: Running command {command!r}\"\n            )\n        except Exception as exc:\n            self._job_run_submission_error(exc)\n\n        return job_execution\n\n    def _watch_job_execution_and_get_result(\n        self, client: Resource, execution: Execution, poll_interval: int\n    ) -> CloudRunJobResult:\n        \"\"\"Wait for execution to complete and then return result.\"\"\"\n        try:\n            job_execution = self._watch_job_execution(\n                client=client,\n                job_execution=execution,\n                timeout=self.timeout,\n                poll_interval=poll_interval,\n            )\n        except Exception:\n            self.logger.exception(\n                \"Received an unexpected exception while monitoring Cloud Run Job \"\n                f\"{self.job_name!r}\"\n            )\n            raise\n\n        if job_execution.succeeded():\n            status_code = 0\n            self.logger.info(f\"Job Run {self.job_name} completed successfully\")\n        else:\n            status_code = 1\n            error_msg = job_execution.condition_after_completion()[\"message\"]\n            self.logger.error(\n                f\"Job Run {self.job_name} did not complete successfully. {error_msg}\"\n            )\n\n        self.logger.info(\n            f\"Job Run logs can be found on GCP at: {job_execution.log_uri}\"\n        )\n\n        if not self.keep_job:\n            self.logger.info(\n                f\"Deleting completed Cloud Run Job {self.job_name!r} from Google Cloud\"\n                \" Run...\"\n            )\n            try:\n                Job.delete(\n                    client=client,\n                    namespace=self.credentials.project,\n                    job_name=self.job_name,\n                )\n            except Exception:\n                self.logger.exception(\n                    \"Received an unexpected exception while attempting to delete Cloud\"\n                    f\" Run Job {self.job_name}\"\n                )\n\n        return CloudRunJobResult(identifier=self.job_name, status_code=status_code)\n\n    def _jobs_body(self) -> dict:\n        \"\"\"Create properly formatted body used for a Job CREATE request.\n        See: https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs\n        \"\"\"\n        jobs_metadata = {\"name\": self.job_name}\n\n        annotations = {\n            # See: https://cloud.google.com/run/docs/troubleshooting#launch-stage-validation  # noqa\n            \"run.googleapis.com/launch-stage\": \"BETA\",\n        }\n        # add vpc connector if specified\n        if self.vpc_connector_name:\n            annotations[\n                \"run.googleapis.com/vpc-access-connector\"\n            ] = self.vpc_connector_name\n\n        # env and command here\n        containers = [self._add_container_settings({\"image\": self.image})]\n\n        # apply this timeout to each task\n        timeout_seconds = str(self.timeout)\n\n        body = {\n            \"apiVersion\": \"run.googleapis.com/v1\",\n            \"kind\": \"Job\",\n            \"metadata\": jobs_metadata,\n            \"spec\": {  # JobSpec\n                \"template\": {  # ExecutionTemplateSpec\n                    \"metadata\": {\"annotations\": annotations},\n                    \"spec\": {  # ExecutionSpec\n                        \"template\": {  # TaskTemplateSpec\n                            \"spec\": {\n                                \"containers\": containers,\n                                \"timeoutSeconds\": timeout_seconds,\n                                \"maxRetries\": self.max_retries,\n                            }  # TaskSpec\n                        }\n                    },\n                }\n            },\n        }\n        return body\n\n    def preview(self) -> str:\n        \"\"\"Generate a preview of the job definition that will be sent to GCP.\"\"\"\n        body = self._jobs_body()\n        container_settings = body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n            \"containers\"\n        ][0][\"env\"]\n        body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\"containers\"][0][\"env\"] = [\n            container_setting\n            for container_setting in container_settings\n            if container_setting[\"name\"] != \"PREFECT_API_KEY\"\n        ]\n        return json.dumps(body, indent=2)\n\n    def _watch_job_execution(\n        self, client, job_execution: Execution, timeout: int, poll_interval: int = 5\n    ):\n        \"\"\"\n        Update job_execution status until it is no longer running or timeout is reached.\n        \"\"\"\n        t0 = time.time()\n        while job_execution.is_running():\n            job_execution = Execution.get(\n                client=client,\n                namespace=job_execution.namespace,\n                execution_name=job_execution.name,\n            )\n\n            elapsed_time = time.time() - t0\n            if timeout is not None and elapsed_time > timeout:\n                raise RuntimeError(\n                    f\"Timed out after {elapsed_time}s while waiting for Cloud Run Job \"\n                    \"execution to complete. Your job may still be running on GCP.\"\n                )\n\n            time.sleep(poll_interval)\n\n        return job_execution\n\n    def _wait_for_job_creation(\n        self, client: Resource, timeout: int, poll_interval: int = 5\n    ):\n        \"\"\"Give created job time to register.\"\"\"\n        job = Job.get(\n            client=client, namespace=self.credentials.project, job_name=self.job_name\n        )\n\n        t0 = time.time()\n        while not job.is_ready():\n            ready_condition = (\n                job.ready_condition\n                if job.ready_condition\n                else \"waiting for condition update\"\n            )\n            self.logger.info(\n                f\"Job is not yet ready... Current condition: {ready_condition}\"\n            )\n            job = Job.get(\n                client=client,\n                namespace=self.credentials.project,\n                job_name=self.job_name,\n            )\n\n            elapsed_time = time.time() - t0\n            if timeout is not None and elapsed_time > timeout:\n                raise RuntimeError(\n                    f\"Timed out after {elapsed_time}s while waiting for Cloud Run Job \"\n                    \"execution to complete. Your job may still be running on GCP.\"\n                )\n\n            time.sleep(poll_interval)\n\n    def _get_client(self) -> Resource:\n        \"\"\"Get the base client needed for interacting with GCP APIs.\"\"\"\n        # region needed for 'v1' API\n        api_endpoint = f\"https://{self.region}-run.googleapis.com\"\n        gcp_creds = self.credentials.get_credentials_from_service_account()\n        options = ClientOptions(api_endpoint=api_endpoint)\n\n        return discovery.build(\n            \"run\", \"v1\", client_options=options, credentials=gcp_creds\n        ).namespaces()\n\n    # CONTAINER SETTINGS\n    def _add_container_settings(self, base_settings: Dict[str, Any]) -> Dict[str, Any]:\n        \"\"\"\n        Add settings related to containers for Cloud Run Jobs to a dictionary.\n        Includes environment variables, entrypoint command, entrypoint arguments,\n        and cpu and memory limits.\n        See: https://cloud.google.com/run/docs/reference/rest/v1/Container\n        and https://cloud.google.com/run/docs/reference/rest/v1/Container#ResourceRequirements\n        \"\"\"  # noqa\n        container_settings = base_settings.copy()\n        container_settings.update(self._add_env())\n        container_settings.update(self._add_resources())\n        container_settings.update(self._add_command())\n        container_settings.update(self._add_args())\n        return container_settings\n\n    def _add_args(self) -> dict:\n        \"\"\"Set the arguments that will be passed to the entrypoint for a Cloud Run Job.\n        See: https://cloud.google.com/run/docs/reference/rest/v1/Container\n        \"\"\"  # noqa\n        return {\"args\": self.args} if self.args else {}\n\n    def _add_command(self) -> dict:\n        \"\"\"Set the command that a container will run for a Cloud Run Job.\n        See: https://cloud.google.com/run/docs/reference/rest/v1/Container\n        \"\"\"  # noqa\n        return {\"command\": self.command}\n\n    def _add_resources(self) -> dict:\n        \"\"\"Set specified resources limits for a Cloud Run Job.\n        See: https://cloud.google.com/run/docs/reference/rest/v1/Container#ResourceRequirements\n        See also: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/\n        \"\"\"  # noqa\n        resources = {\"limits\": {}, \"requests\": {}}\n\n        if self.cpu is not None:\n            cpu = self._cpu_as_k8s_quantity()\n            resources[\"limits\"][\"cpu\"] = cpu\n            resources[\"requests\"][\"cpu\"] = cpu\n        if self.memory_string is not None:\n            resources[\"limits\"][\"memory\"] = self.memory_string\n            resources[\"requests\"][\"memory\"] = self.memory_string\n\n        return {\"resources\": resources} if resources[\"requests\"] else {}\n\n    def _add_env(self) -> dict:\n        \"\"\"Add environment variables for a Cloud Run Job.\n\n        Method `self._base_environment()` gets necessary Prefect environment variables\n        from the config.\n\n        See: https://cloud.google.com/run/docs/reference/rest/v1/Container#envvar for\n        how environment variables are specified for Cloud Run Jobs.\n        \"\"\"  # noqa\n        env = {**self._base_environment(), **self.env}\n        cloud_run_env = [{\"name\": k, \"value\": v} for k, v in env.items()]\n        return {\"env\": cloud_run_env}\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob-attributes","title":"Attributes","text":""},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.job_name","title":"job_name property","text":"

Create a unique and valid job name.

"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.memory_string","title":"memory_string property","text":"

Returns the string expected for memory resources argument.

"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob-functions","title":"Functions","text":""},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.generate_work_pool_base_job_template","title":"generate_work_pool_base_job_template async","text":"

Generate a base job template for a cloud-run work pool with the same configuration as this block.

Returns:

Type Description dict
  • dict: a base job template for a cloud-run work pool
Source code in prefect_gcp/cloud_run.py
async def generate_work_pool_base_job_template(self) -> dict:\n    \"\"\"\n    Generate a base job template for a cloud-run work pool with the same\n    configuration as this block.\n\n    Returns:\n        - dict: a base job template for a cloud-run work pool\n    \"\"\"\n    base_job_template = await get_default_base_job_template_for_infrastructure_type(\n        self.get_corresponding_worker_type(),\n    )\n    assert (\n        base_job_template is not None\n    ), \"Failed to generate default base job template for Cloud Run worker.\"\n    for key, value in self.dict(exclude_unset=True, exclude_defaults=True).items():\n        if key == \"command\":\n            base_job_template[\"variables\"][\"properties\"][\"command\"][\n                \"default\"\n            ] = shlex.join(value)\n        elif key in [\n            \"type\",\n            \"block_type_slug\",\n            \"_block_document_id\",\n            \"_block_document_name\",\n            \"_is_anonymous\",\n            \"memory_unit\",\n        ]:\n            continue\n        elif key == \"credentials\":\n            if not self.credentials._block_document_id:\n                raise BlockNotSavedError(\n                    \"It looks like you are trying to use a block that\"\n                    \" has not been saved. Please call `.save` on your block\"\n                    \" before publishing it as a work pool.\"\n                )\n            base_job_template[\"variables\"][\"properties\"][\"credentials\"][\n                \"default\"\n            ] = {\n                \"$ref\": {\n                    \"block_document_id\": str(self.credentials._block_document_id)\n                }\n            }\n        elif key == \"memory\" and self.memory_string:\n            base_job_template[\"variables\"][\"properties\"][\"memory\"][\n                \"default\"\n            ] = self.memory_string\n        elif key == \"cpu\" and self.cpu is not None:\n            base_job_template[\"variables\"][\"properties\"][\"cpu\"][\n                \"default\"\n            ] = f\"{self.cpu * 1000}m\"\n        elif key == \"args\":\n            # Not a default variable, but we can add it to the template\n            base_job_template[\"variables\"][\"properties\"][\"args\"] = {\n                \"title\": \"Arguments\",\n                \"type\": \"string\",\n                \"description\": \"Arguments to be passed to your Cloud Run Job's entrypoint command.\",  # noqa\n                \"default\": value,\n            }\n            base_job_template[\"job_configuration\"][\"job_body\"][\"spec\"][\"template\"][\n                \"spec\"\n            ][\"template\"][\"spec\"][\"containers\"][0][\"args\"] = \"{{ args }}\"\n        elif key in base_job_template[\"variables\"][\"properties\"]:\n            base_job_template[\"variables\"][\"properties\"][key][\"default\"] = value\n        else:\n            self.logger.warning(\n                f\"Variable {key!r} is not supported by Cloud Run work pools.\"\n                \" Skipping.\"\n            )\n\n    return base_job_template\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.get_corresponding_worker_type","title":"get_corresponding_worker_type","text":"

Return the corresponding worker type for this infrastructure block.

Source code in prefect_gcp/cloud_run.py
def get_corresponding_worker_type(self) -> str:\n    \"\"\"Return the corresponding worker type for this infrastructure block.\"\"\"\n    return \"cloud-run\"\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.kill","title":"kill async","text":"

Kill a task running Cloud Run.

Parameters:

Name Type Description Default identifier str

The Cloud Run Job name. This should match a value yielded by CloudRunJob.run.

required Source code in prefect_gcp/cloud_run.py
@sync_compatible\nasync def kill(self, identifier: str, grace_seconds: int = 30) -> None:\n    \"\"\"\n    Kill a task running Cloud Run.\n\n    Args:\n        identifier: The Cloud Run Job name. This should match a\n            value yielded by CloudRunJob.run.\n    \"\"\"\n    if grace_seconds != 30:\n        self.logger.warning(\n            f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n            \"support dynamic grace period configuration. See here for more info: \"\n            \"https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs/delete\"  # noqa\n        )\n\n    with self._get_client() as client:\n        await run_sync_in_worker_thread(\n            self._kill_job,\n            client=client,\n            namespace=self.credentials.project,\n            job_name=identifier,\n        )\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.preview","title":"preview","text":"

Generate a preview of the job definition that will be sent to GCP.

Source code in prefect_gcp/cloud_run.py
def preview(self) -> str:\n    \"\"\"Generate a preview of the job definition that will be sent to GCP.\"\"\"\n    body = self._jobs_body()\n    container_settings = body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n        \"containers\"\n    ][0][\"env\"]\n    body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\"containers\"][0][\"env\"] = [\n        container_setting\n        for container_setting in container_settings\n        if container_setting[\"name\"] != \"PREFECT_API_KEY\"\n    ]\n    return json.dumps(body, indent=2)\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.run","title":"run async","text":"

Run the configured job on a Google Cloud Run Job.

Source code in prefect_gcp/cloud_run.py
@sync_compatible\nasync def run(self, task_status: Optional[TaskStatus] = None):\n    \"\"\"Run the configured job on a Google Cloud Run Job.\"\"\"\n    with self._get_client() as client:\n        await run_sync_in_worker_thread(\n            self._create_job_and_wait_for_registration, client\n        )\n        job_execution = await run_sync_in_worker_thread(\n            self._begin_job_execution, client\n        )\n\n        if task_status:\n            task_status.started(self.job_name)\n\n        result = await run_sync_in_worker_thread(\n            self._watch_job_execution_and_get_result,\n            client,\n            job_execution,\n            5,\n        )\n        return result\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJobResult","title":"CloudRunJobResult","text":"

Bases: InfrastructureResult

Result from a Cloud Run Job.

Source code in prefect_gcp/cloud_run.py
class CloudRunJobResult(InfrastructureResult):\n    \"\"\"Result from a Cloud Run Job.\"\"\"\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Execution","title":"Execution","text":"

Bases: BaseModel

Utility class to call GCP executions API and interact with the returned objects.

Source code in prefect_gcp/cloud_run.py
class Execution(BaseModel):\n    \"\"\"\n    Utility class to call GCP `executions` API and\n    interact with the returned objects.\n    \"\"\"\n\n    name: str\n    namespace: str\n    metadata: dict\n    spec: dict\n    status: dict\n    log_uri: str\n\n    def is_running(self) -> bool:\n        \"\"\"Returns True if Execution is not completed.\"\"\"\n        return self.status.get(\"completionTime\") is None\n\n    def condition_after_completion(self):\n        \"\"\"Returns Execution condition if Execution has completed.\"\"\"\n        for condition in self.status[\"conditions\"]:\n            if condition[\"type\"] == \"Completed\":\n                return condition\n\n    def succeeded(self):\n        \"\"\"Whether or not the Execution completed is a successful state.\"\"\"\n        completed_condition = self.condition_after_completion()\n        if completed_condition and completed_condition[\"status\"] == \"True\":\n            return True\n\n        return False\n\n    @classmethod\n    def get(cls, client: Resource, namespace: str, execution_name: str):\n        \"\"\"\n        Make a get request to the GCP executions API\n        and return an Execution instance.\n        \"\"\"\n        request = client.executions().get(\n            name=f\"namespaces/{namespace}/executions/{execution_name}\"\n        )\n        response = request.execute()\n\n        return cls(\n            name=response[\"metadata\"][\"name\"],\n            namespace=response[\"metadata\"][\"namespace\"],\n            metadata=response[\"metadata\"],\n            spec=response[\"spec\"],\n            status=response[\"status\"],\n            log_uri=response[\"status\"][\"logUri\"],\n        )\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Execution-functions","title":"Functions","text":""},{"location":"cloud_run/#prefect_gcp.cloud_run.Execution.condition_after_completion","title":"condition_after_completion","text":"

Returns Execution condition if Execution has completed.

Source code in prefect_gcp/cloud_run.py
def condition_after_completion(self):\n    \"\"\"Returns Execution condition if Execution has completed.\"\"\"\n    for condition in self.status[\"conditions\"]:\n        if condition[\"type\"] == \"Completed\":\n            return condition\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Execution.get","title":"get classmethod","text":"

Make a get request to the GCP executions API and return an Execution instance.

Source code in prefect_gcp/cloud_run.py
@classmethod\ndef get(cls, client: Resource, namespace: str, execution_name: str):\n    \"\"\"\n    Make a get request to the GCP executions API\n    and return an Execution instance.\n    \"\"\"\n    request = client.executions().get(\n        name=f\"namespaces/{namespace}/executions/{execution_name}\"\n    )\n    response = request.execute()\n\n    return cls(\n        name=response[\"metadata\"][\"name\"],\n        namespace=response[\"metadata\"][\"namespace\"],\n        metadata=response[\"metadata\"],\n        spec=response[\"spec\"],\n        status=response[\"status\"],\n        log_uri=response[\"status\"][\"logUri\"],\n    )\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Execution.is_running","title":"is_running","text":"

Returns True if Execution is not completed.

Source code in prefect_gcp/cloud_run.py
def is_running(self) -> bool:\n    \"\"\"Returns True if Execution is not completed.\"\"\"\n    return self.status.get(\"completionTime\") is None\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Execution.succeeded","title":"succeeded","text":"

Whether or not the Execution completed is a successful state.

Source code in prefect_gcp/cloud_run.py
def succeeded(self):\n    \"\"\"Whether or not the Execution completed is a successful state.\"\"\"\n    completed_condition = self.condition_after_completion()\n    if completed_condition and completed_condition[\"status\"] == \"True\":\n        return True\n\n    return False\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job","title":"Job","text":"

Bases: BaseModel

Utility class to call GCP jobs API and interact with the returned objects.

Source code in prefect_gcp/cloud_run.py
class Job(BaseModel):\n    \"\"\"\n    Utility class to call GCP `jobs` API and\n    interact with the returned objects.\n    \"\"\"\n\n    metadata: dict\n    spec: dict\n    status: dict\n    name: str\n    ready_condition: dict\n    execution_status: dict\n\n    def _is_missing_container(self):\n        \"\"\"\n        Check if Job status is not ready because\n        the specified container cannot be found.\n        \"\"\"\n        if (\n            self.ready_condition.get(\"status\") == \"False\"\n            and self.ready_condition.get(\"reason\") == \"ContainerMissing\"\n        ):\n            return True\n        return False\n\n    def is_ready(self) -> bool:\n        \"\"\"Whether a job is finished registering and ready to be executed\"\"\"\n        if self._is_missing_container():\n            raise Exception(f\"{self.ready_condition['message']}\")\n        return self.ready_condition.get(\"status\") == \"True\"\n\n    def has_execution_in_progress(self) -> bool:\n        \"\"\"See if job has a run in progress.\"\"\"\n        return (\n            self.execution_status == {}\n            or self.execution_status.get(\"completionTimestamp\") is None\n        )\n\n    @staticmethod\n    def _get_ready_condition(job: dict) -> dict:\n        \"\"\"Utility to access JSON field containing ready condition.\"\"\"\n        if job[\"status\"].get(\"conditions\"):\n            for condition in job[\"status\"][\"conditions\"]:\n                if condition[\"type\"] == \"Ready\":\n                    return condition\n\n        return {}\n\n    @staticmethod\n    def _get_execution_status(job: dict):\n        \"\"\"Utility to access JSON field containing execution status.\"\"\"\n        if job[\"status\"].get(\"latestCreatedExecution\"):\n            return job[\"status\"][\"latestCreatedExecution\"]\n\n        return {}\n\n    @classmethod\n    def get(cls, client: Resource, namespace: str, job_name: str):\n        \"\"\"Make a get request to the GCP jobs API and return a Job instance.\"\"\"\n        request = client.jobs().get(name=f\"namespaces/{namespace}/jobs/{job_name}\")\n        response = request.execute()\n\n        return cls(\n            metadata=response[\"metadata\"],\n            spec=response[\"spec\"],\n            status=response[\"status\"],\n            name=response[\"metadata\"][\"name\"],\n            ready_condition=cls._get_ready_condition(response),\n            execution_status=cls._get_execution_status(response),\n        )\n\n    @staticmethod\n    def create(client: Resource, namespace: str, body: dict):\n        \"\"\"Make a create request to the GCP jobs API.\"\"\"\n        request = client.jobs().create(parent=f\"namespaces/{namespace}\", body=body)\n        response = request.execute()\n        return response\n\n    @staticmethod\n    def delete(client: Resource, namespace: str, job_name: str):\n        \"\"\"Make a delete request to the GCP jobs API.\"\"\"\n        request = client.jobs().delete(name=f\"namespaces/{namespace}/jobs/{job_name}\")\n        response = request.execute()\n        return response\n\n    @staticmethod\n    def run(client: Resource, namespace: str, job_name: str):\n        \"\"\"Make a run request to the GCP jobs API.\"\"\"\n        request = client.jobs().run(name=f\"namespaces/{namespace}/jobs/{job_name}\")\n        response = request.execute()\n        return response\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job-functions","title":"Functions","text":""},{"location":"cloud_run/#prefect_gcp.cloud_run.Job.create","title":"create staticmethod","text":"

Make a create request to the GCP jobs API.

Source code in prefect_gcp/cloud_run.py
@staticmethod\ndef create(client: Resource, namespace: str, body: dict):\n    \"\"\"Make a create request to the GCP jobs API.\"\"\"\n    request = client.jobs().create(parent=f\"namespaces/{namespace}\", body=body)\n    response = request.execute()\n    return response\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job.delete","title":"delete staticmethod","text":"

Make a delete request to the GCP jobs API.

Source code in prefect_gcp/cloud_run.py
@staticmethod\ndef delete(client: Resource, namespace: str, job_name: str):\n    \"\"\"Make a delete request to the GCP jobs API.\"\"\"\n    request = client.jobs().delete(name=f\"namespaces/{namespace}/jobs/{job_name}\")\n    response = request.execute()\n    return response\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job.get","title":"get classmethod","text":"

Make a get request to the GCP jobs API and return a Job instance.

Source code in prefect_gcp/cloud_run.py
@classmethod\ndef get(cls, client: Resource, namespace: str, job_name: str):\n    \"\"\"Make a get request to the GCP jobs API and return a Job instance.\"\"\"\n    request = client.jobs().get(name=f\"namespaces/{namespace}/jobs/{job_name}\")\n    response = request.execute()\n\n    return cls(\n        metadata=response[\"metadata\"],\n        spec=response[\"spec\"],\n        status=response[\"status\"],\n        name=response[\"metadata\"][\"name\"],\n        ready_condition=cls._get_ready_condition(response),\n        execution_status=cls._get_execution_status(response),\n    )\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job.has_execution_in_progress","title":"has_execution_in_progress","text":"

See if job has a run in progress.

Source code in prefect_gcp/cloud_run.py
def has_execution_in_progress(self) -> bool:\n    \"\"\"See if job has a run in progress.\"\"\"\n    return (\n        self.execution_status == {}\n        or self.execution_status.get(\"completionTimestamp\") is None\n    )\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job.is_ready","title":"is_ready","text":"

Whether a job is finished registering and ready to be executed

Source code in prefect_gcp/cloud_run.py
def is_ready(self) -> bool:\n    \"\"\"Whether a job is finished registering and ready to be executed\"\"\"\n    if self._is_missing_container():\n        raise Exception(f\"{self.ready_condition['message']}\")\n    return self.ready_condition.get(\"status\") == \"True\"\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job.run","title":"run staticmethod","text":"

Make a run request to the GCP jobs API.

Source code in prefect_gcp/cloud_run.py
@staticmethod\ndef run(client: Resource, namespace: str, job_name: str):\n    \"\"\"Make a run request to the GCP jobs API.\"\"\"\n    request = client.jobs().run(name=f\"namespaces/{namespace}/jobs/{job_name}\")\n    response = request.execute()\n    return response\n
"},{"location":"cloud_run_worker/","title":"Cloud Run","text":""},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run","title":"prefect_gcp.workers.cloud_run","text":"

Module containing the Cloud Run worker used for executing flow runs as Cloud Run jobs.

Get started by creating a Cloud Run work pool:

prefect work-pool create 'my-cloud-run-pool' --type cloud-run\n

Then start a Cloud Run worker with the following command:

prefect worker start --pool 'my-cloud-run-pool'\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run--configuration","title":"Configuration","text":"

Read more about configuring work pools here.

"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run--advanced-configuration","title":"Advanced Configuration","text":"

Using a custom Cloud Run job template

Below is the default job body template used by the Cloud Run Worker:

{\n    \"apiVersion\": \"run.googleapis.com/v1\",\n    \"kind\": \"Job\",\n    \"metadata\":\n        {\n            \"name\": \"{{ name }}\",\n            \"annotations\":\n            {\n                \"run.googleapis.com/launch-stage\": \"BETA\",\n            }\n        },\n        \"spec\":\n        {\n            \"template\":\n            {\n                \"spec\":\n                {\n                    \"template\":\n                    {\n                        \"spec\":\n                        {\n                            \"containers\":\n                            [\n                                {\n                                    \"image\": \"{{ image }}\",\n                                    \"args\": \"{{ args }}\",\n                                    \"resources\":\n                                    {\n                                        \"limits\":\n                                        {\n                                            \"cpu\": \"{{ cpu }}\",\n                                            \"memory\": \"{{ memory }}\"\n                                        },\n                                        \"requests\":\n                                        {\n                                            \"cpu\": \"{{ cpu }}\",\n                                            \"memory\": \"{{ memory }}\"\n                                        }\n                                    }\n                                }\n                            ],\n                            \"timeoutSeconds\": \"{{ timeout }}\",\n                            \"serviceAccountName\": \"{{ service_account_name }}\"\n                        }\n                    }\n                }\n                }\n            },\n            \"metadata\":\n            {\n                \"annotations\":\n                {\n                    \"run.googleapis.com/vpc-access-connector\": \"{{ vpc_connector_name }}\"\n                }\n            }\n        },\n    },\n    \"timeout\": \"{{ timeout }}\",\n    \"keep_job\": \"{{ keep_job }}\"\n}\n
Each values enclosed in {{ }} is a placeholder that will be replaced with a value at runtime on a per-deployment basis. The values that can be used a placeholders are defined by the variables schema defined in the base job template.

The default job body template and available variables can be customized on a work pool by work pool basis. By editing the default job body template you can:

  • Add additional placeholders to the default job template
  • Remove placeholders from the default job template
  • Pass values to Cloud Run that are not defined in the variables schema
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run--adding-additional-placeholders","title":"Adding additional placeholders","text":"

For example, to allow for extra customization of a new annotation not described in the default job template, you can add the following:

{\n    \"apiVersion\": \"run.googleapis.com/v1\",\n    \"kind\": \"Job\",\n    \"metadata\":\n    {\n        \"name\": \"{{ name }}\",\n        \"annotations\":\n        {\n            \"run.googleapis.com/my-custom-annotation\": \"{{ my_custom_annotation }}\",\n            \"run.googleapis.com/launch-stage\": \"BETA\",\n        },\n      ...\n    },\n  ...\n}\n
my_custom_annotation can now be used as a placeholder in the job template and set on a per-deployment basis.

# deployment.yaml\n...\ninfra_overrides: {\"my_custom_annotation\": \"my-custom-value\"}\n

Additionally, fields can be set to prevent configuration at the deployment level. For example to configure the vpc_connector_name field, the placeholder can be removed and replaced with an actual value. Now all deployments that point to this work pool will use the same vpc_connector_name value.

{\n    \"apiVersion\": \"run.googleapis.com/v1\",\n    \"kind\": \"Job\",\n    \"spec\":\n    {\n        \"template\":\n        {\n            \"metadata\":\n            {\n                \"annotations\":\n                {\n                    \"run.googleapis.com/vpc-access-connector\": \"my-vpc-connector\"\n                }\n            },\n            ...\n        },\n        ...\n    }\n}\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run-classes","title":"Classes","text":""},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorker","title":"CloudRunWorker","text":"

Bases: BaseWorker

Prefect worker that executes flow runs within Cloud Run Jobs.

Source code in prefect_gcp/workers/cloud_run.py
class CloudRunWorker(BaseWorker):\n    \"\"\"Prefect worker that executes flow runs within Cloud Run Jobs.\"\"\"\n\n    type = \"cloud-run\"\n    job_configuration = CloudRunWorkerJobConfiguration\n    job_configuration_variables = CloudRunWorkerVariables\n    _description = (\n        \"Execute flow runs within containers on Google Cloud Run. Requires \"\n        \"a Google Cloud Platform account.\"\n    )\n    _display_name = \"Google Cloud Run\"\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/cloud_run_worker/\"\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n\n    def _create_job_error(self, exc, configuration):\n        \"\"\"Provides a nicer error for 404s when trying to create a Cloud Run Job.\"\"\"\n        # TODO consider lookup table instead of the if/else,\n        # also check for documented errors\n        if exc.status_code == 404:\n            raise RuntimeError(\n                f\"Failed to find resources at {exc.uri}. Confirm that region\"\n                f\" '{self.region}' is the correct region for your Cloud Run Job and\"\n                f\" that {configuration.project} is the correct GCP project. If\"\n                f\" your project ID is not correct, you are using a Credentials block\"\n                f\" with permissions for the wrong project.\"\n            ) from exc\n        raise exc\n\n    def _job_run_submission_error(self, exc, configuration):\n        \"\"\"Provides a nicer error for 404s when submitting job runs.\"\"\"\n        if exc.status_code == 404:\n            pat1 = r\"The requested URL [^ ]+ was not found on this server\"\n            # pat2 = (\n            #     r\"Resource '[^ ]+' of kind 'JOB' in region '[\\w\\-0-9]+' \"\n            #     r\"in project '[\\w\\-0-9]+' does not exist\"\n            # )\n            if re.findall(pat1, str(exc)):\n                raise RuntimeError(\n                    f\"Failed to find resources at {exc.uri}. \"\n                    f\"Confirm that region '{self.region}' is \"\n                    f\"the correct region for your Cloud Run Job \"\n                    f\"and that '{configuration.project}' is the \"\n                    f\"correct GCP project. If your project ID is not \"\n                    f\"correct, you are using a Credentials \"\n                    f\"block with permissions for the wrong project.\"\n                ) from exc\n            else:\n                raise exc\n\n        raise exc\n\n    async def run(\n        self,\n        flow_run: \"FlowRun\",\n        configuration: CloudRunWorkerJobConfiguration,\n        task_status: Optional[anyio.abc.TaskStatus] = None,\n    ) -> CloudRunWorkerResult:\n        \"\"\"\n        Executes a flow run within a Cloud Run Job and waits for the flow run\n        to complete.\n\n        Args:\n            flow_run: The flow run to execute\n            configuration: The configuration to use when executing the flow run.\n            task_status: The task status object for the current flow run. If provided,\n                the task will be marked as started.\n\n        Returns:\n            CloudRunWorkerResult: A result object containing information about the\n                final state of the flow run\n        \"\"\"\n\n        logger = self.get_flow_run_logger(flow_run)\n\n        with self._get_client(configuration) as client:\n            await run_sync_in_worker_thread(\n                self._create_job_and_wait_for_registration,\n                configuration,\n                client,\n                logger,\n            )\n            job_execution = await run_sync_in_worker_thread(\n                self._begin_job_execution, configuration, client, logger\n            )\n\n            if task_status:\n                task_status.started(configuration.job_name)\n\n            result = await run_sync_in_worker_thread(\n                self._watch_job_execution_and_get_result,\n                configuration,\n                client,\n                job_execution,\n                logger,\n            )\n            return result\n\n    def _get_client(self, configuration: CloudRunWorkerJobConfiguration) -> Resource:\n        \"\"\"Get the base client needed for interacting with GCP APIs.\"\"\"\n        # region needed for 'v1' API\n        api_endpoint = f\"https://{configuration.region}-run.googleapis.com\"\n        gcp_creds = configuration.credentials.get_credentials_from_service_account()\n        options = ClientOptions(api_endpoint=api_endpoint)\n\n        return discovery.build(\n            \"run\", \"v1\", client_options=options, credentials=gcp_creds\n        ).namespaces()\n\n    def _create_job_and_wait_for_registration(\n        self,\n        configuration: CloudRunWorkerJobConfiguration,\n        client: Resource,\n        logger: PrefectLogAdapter,\n    ) -> None:\n        \"\"\"Create a new job wait for it to finish registering.\"\"\"\n        try:\n            logger.info(f\"Creating Cloud Run Job {configuration.job_name}\")\n\n            Job.create(\n                client=client,\n                namespace=configuration.credentials.project,\n                body=configuration.job_body,\n            )\n        except googleapiclient.errors.HttpError as exc:\n            self._create_job_error(exc, configuration)\n\n        try:\n            self._wait_for_job_creation(\n                client=client, configuration=configuration, logger=logger\n            )\n        except Exception:\n            logger.exception(\n                \"Encountered an exception while waiting for job run creation\"\n            )\n            if not configuration.keep_job:\n                logger.info(\n                    f\"Deleting Cloud Run Job {configuration.job_name} from \"\n                    \"Google Cloud Run.\"\n                )\n                try:\n                    Job.delete(\n                        client=client,\n                        namespace=configuration.credentials.project,\n                        job_name=configuration.job_name,\n                    )\n                except Exception:\n                    logger.exception(\n                        \"Received an unexpected exception while attempting to delete\"\n                        f\" Cloud Run Job {configuration.job_name!r}\"\n                    )\n            raise\n\n    def _begin_job_execution(\n        self,\n        configuration: CloudRunWorkerJobConfiguration,\n        client: Resource,\n        logger: PrefectLogAdapter,\n    ) -> Execution:\n        \"\"\"Submit a job run for execution and return the execution object.\"\"\"\n        try:\n            logger.info(\n                f\"Submitting Cloud Run Job {configuration.job_name!r} for execution.\"\n            )\n            submission = Job.run(\n                client=client,\n                namespace=configuration.project,\n                job_name=configuration.job_name,\n            )\n\n            job_execution = Execution.get(\n                client=client,\n                namespace=submission[\"metadata\"][\"namespace\"],\n                execution_name=submission[\"metadata\"][\"name\"],\n            )\n        except Exception as exc:\n            self._job_run_submission_error(exc, configuration)\n\n        return job_execution\n\n    def _watch_job_execution_and_get_result(\n        self,\n        configuration: CloudRunWorkerJobConfiguration,\n        client: Resource,\n        execution: Execution,\n        logger: PrefectLogAdapter,\n        poll_interval: int = 5,\n    ) -> CloudRunWorkerResult:\n        \"\"\"Wait for execution to complete and then return result.\"\"\"\n        try:\n            job_execution = self._watch_job_execution(\n                client=client,\n                job_execution=execution,\n                timeout=configuration.timeout,\n                poll_interval=poll_interval,\n            )\n        except Exception:\n            logger.exception(\n                \"Received an unexpected exception while monitoring Cloud Run Job \"\n                f\"{configuration.job_name!r}\"\n            )\n            raise\n\n        if job_execution.succeeded():\n            status_code = 0\n            logger.info(f\"Job Run {configuration.job_name} completed successfully\")\n        else:\n            status_code = 1\n            error_msg = job_execution.condition_after_completion()[\"message\"]\n            logger.error(\n                \"Job Run {configuration.job_name} did not complete successfully. \"\n                f\"{error_msg}\"\n            )\n\n        logger.info(f\"Job Run logs can be found on GCP at: {job_execution.log_uri}\")\n\n        if not configuration.keep_job:\n            logger.info(\n                f\"Deleting completed Cloud Run Job {configuration.job_name!r} \"\n                \"from Google Cloud Run...\"\n            )\n            try:\n                Job.delete(\n                    client=client,\n                    namespace=configuration.project,\n                    job_name=configuration.job_name,\n                )\n            except Exception:\n                logger.exception(\n                    \"Received an unexpected exception while attempting to delete Cloud\"\n                    f\" Run Job {configuration.job_name}\"\n                )\n\n        return CloudRunWorkerResult(\n            identifier=configuration.job_name, status_code=status_code\n        )\n\n    def _watch_job_execution(\n        self, client, job_execution: Execution, timeout: int, poll_interval: int = 5\n    ):\n        \"\"\"\n        Update job_execution status until it is no longer running or timeout is reached.\n        \"\"\"\n        t0 = time.time()\n        while job_execution.is_running():\n            job_execution = Execution.get(\n                client=client,\n                namespace=job_execution.namespace,\n                execution_name=job_execution.name,\n            )\n\n            elapsed_time = time.time() - t0\n            if timeout is not None and elapsed_time > timeout:\n                raise RuntimeError(\n                    f\"Timed out after {elapsed_time}s while waiting for Cloud Run Job \"\n                    \"execution to complete. Your job may still be running on GCP.\"\n                )\n\n            time.sleep(poll_interval)\n\n        return job_execution\n\n    def _wait_for_job_creation(\n        self,\n        client: Resource,\n        configuration: CloudRunWorkerJobConfiguration,\n        logger: PrefectLogAdapter,\n        poll_interval: int = 5,\n    ):\n        \"\"\"Give created job time to register.\"\"\"\n        job = Job.get(\n            client=client,\n            namespace=configuration.project,\n            job_name=configuration.job_name,\n        )\n\n        t0 = time.time()\n        while not job.is_ready():\n            ready_condition = (\n                job.ready_condition\n                if job.ready_condition\n                else \"waiting for condition update\"\n            )\n            logger.info(f\"Job is not yet ready... Current condition: {ready_condition}\")\n            job = Job.get(\n                client=client,\n                namespace=configuration.project,\n                job_name=configuration.job_name,\n            )\n\n            elapsed_time = time.time() - t0\n            if (\n                configuration.timeout is not None\n                and elapsed_time > configuration.timeout\n            ):\n                raise RuntimeError(\n                    f\"Timed out after {elapsed_time}s while waiting for Cloud Run Job \"\n                    \"execution to complete. Your job may still be running on GCP.\"\n                )\n\n            time.sleep(poll_interval)\n\n    async def kill_infrastructure(\n        self,\n        infrastructure_pid: str,\n        configuration: CloudRunWorkerJobConfiguration,\n        grace_seconds: int = 30,\n    ):\n        \"\"\"\n        Stops a job for a cancelled flow run based on the provided infrastructure PID\n        and run configuration.\n        \"\"\"\n        if grace_seconds != 30:\n            self._logger.warning(\n                f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n                \"support dynamic grace period configuration. See here for more info: \"\n                \"https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs/delete\"  # noqa\n            )\n\n        with self._get_client(configuration) as client:\n            await run_sync_in_worker_thread(\n                self._stop_job,\n                client=client,\n                namespace=configuration.project,\n                job_name=infrastructure_pid,\n            )\n\n    def _stop_job(self, client: Resource, namespace: str, job_name: str):\n        try:\n            Job.delete(client=client, namespace=namespace, job_name=job_name)\n        except Exception as exc:\n            if \"does not exist\" in str(exc):\n                raise InfrastructureNotFound(\n                    f\"Cannot stop Cloud Run Job; the job name {job_name!r} \"\n                    \"could not be found.\"\n                ) from exc\n            raise\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorker-functions","title":"Functions","text":""},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorker.kill_infrastructure","title":"kill_infrastructure async","text":"

Stops a job for a cancelled flow run based on the provided infrastructure PID and run configuration.

Source code in prefect_gcp/workers/cloud_run.py
async def kill_infrastructure(\n    self,\n    infrastructure_pid: str,\n    configuration: CloudRunWorkerJobConfiguration,\n    grace_seconds: int = 30,\n):\n    \"\"\"\n    Stops a job for a cancelled flow run based on the provided infrastructure PID\n    and run configuration.\n    \"\"\"\n    if grace_seconds != 30:\n        self._logger.warning(\n            f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n            \"support dynamic grace period configuration. See here for more info: \"\n            \"https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs/delete\"  # noqa\n        )\n\n    with self._get_client(configuration) as client:\n        await run_sync_in_worker_thread(\n            self._stop_job,\n            client=client,\n            namespace=configuration.project,\n            job_name=infrastructure_pid,\n        )\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorker.run","title":"run async","text":"

Executes a flow run within a Cloud Run Job and waits for the flow run to complete.

Parameters:

Name Type Description Default flow_run FlowRun

The flow run to execute

required configuration CloudRunWorkerJobConfiguration

The configuration to use when executing the flow run.

required task_status Optional[TaskStatus]

The task status object for the current flow run. If provided, the task will be marked as started.

None

Returns:

Name Type Description CloudRunWorkerResult CloudRunWorkerResult

A result object containing information about the final state of the flow run

Source code in prefect_gcp/workers/cloud_run.py
async def run(\n    self,\n    flow_run: \"FlowRun\",\n    configuration: CloudRunWorkerJobConfiguration,\n    task_status: Optional[anyio.abc.TaskStatus] = None,\n) -> CloudRunWorkerResult:\n    \"\"\"\n    Executes a flow run within a Cloud Run Job and waits for the flow run\n    to complete.\n\n    Args:\n        flow_run: The flow run to execute\n        configuration: The configuration to use when executing the flow run.\n        task_status: The task status object for the current flow run. If provided,\n            the task will be marked as started.\n\n    Returns:\n        CloudRunWorkerResult: A result object containing information about the\n            final state of the flow run\n    \"\"\"\n\n    logger = self.get_flow_run_logger(flow_run)\n\n    with self._get_client(configuration) as client:\n        await run_sync_in_worker_thread(\n            self._create_job_and_wait_for_registration,\n            configuration,\n            client,\n            logger,\n        )\n        job_execution = await run_sync_in_worker_thread(\n            self._begin_job_execution, configuration, client, logger\n        )\n\n        if task_status:\n            task_status.started(configuration.job_name)\n\n        result = await run_sync_in_worker_thread(\n            self._watch_job_execution_and_get_result,\n            configuration,\n            client,\n            job_execution,\n            logger,\n        )\n        return result\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerJobConfiguration","title":"CloudRunWorkerJobConfiguration","text":"

Bases: BaseJobConfiguration

Configuration class used by the Cloud Run Worker to create a Cloud Run Job.

An instance of this class is passed to the Cloud Run worker's run method for each flow run. It contains all information necessary to execute the flow run as a Cloud Run Job.

Attributes:

Name Type Description region str

The region where the Cloud Run Job resides.

credentials Optional[GcpCredentials]

The GCP Credentials used to connect to Cloud Run.

job_body Dict[str, Any]

The job body used to create the Cloud Run Job.

timeout Optional[int]

The length of time that Prefect will wait for a Cloud Run Job.

keep_job Optional[bool]

Whether to delete the Cloud Run Job after it completes.

Source code in prefect_gcp/workers/cloud_run.py
class CloudRunWorkerJobConfiguration(BaseJobConfiguration):\n    \"\"\"\n    Configuration class used by the Cloud Run Worker to create a Cloud Run Job.\n\n    An instance of this class is passed to the Cloud Run worker's `run` method\n    for each flow run. It contains all information necessary to execute\n    the flow run as a Cloud Run Job.\n\n    Attributes:\n        region: The region where the Cloud Run Job resides.\n        credentials: The GCP Credentials used to connect to Cloud Run.\n        job_body: The job body used to create the Cloud Run Job.\n        timeout: The length of time that Prefect will wait for a Cloud Run Job.\n        keep_job: Whether to delete the Cloud Run Job after it completes.\n    \"\"\"\n\n    region: str = Field(\n        default=\"us-central1\", description=\"The region where the Cloud Run Job resides.\"\n    )\n    credentials: Optional[GcpCredentials] = Field(\n        title=\"GCP Credentials\",\n        default_factory=GcpCredentials,\n        description=\"The GCP Credentials used to connect to Cloud Run. \"\n        \"If not provided credentials will be inferred from \"\n        \"the local environment.\",\n    )\n    job_body: Dict[str, Any] = Field(template=_get_default_job_body_template())\n    timeout: Optional[int] = Field(\n        default=600,\n        gt=0,\n        le=3600,\n        title=\"Job Timeout\",\n        description=(\n            \"The length of time that Prefect will wait for a Cloud Run Job to complete \"\n            \"before raising an exception.\"\n        ),\n    )\n    keep_job: Optional[bool] = Field(\n        default=False,\n        title=\"Keep Job After Completion\",\n        description=\"Keep the completed Cloud Run Job on Google Cloud Platform.\",\n    )\n\n    @property\n    def project(self) -> str:\n        \"\"\"property for accessing the project from the credentials.\"\"\"\n        return self.credentials.project\n\n    @property\n    def job_name(self) -> str:\n        \"\"\"property for accessing the name from the job metadata.\"\"\"\n        return self.job_body[\"metadata\"][\"name\"]\n\n    def prepare_for_flow_run(\n        self,\n        flow_run: \"FlowRun\",\n        deployment: Optional[\"DeploymentResponse\"] = None,\n        flow: Optional[\"Flow\"] = None,\n    ):\n        \"\"\"\n        Prepares the job configuration for a flow run.\n\n        Ensures that necessary values are present in the job body and that the\n        job body is valid.\n\n        Args:\n            flow_run: The flow run to prepare the job configuration for\n            deployment: The deployment associated with the flow run used for\n                preparation.\n            flow: The flow associated with the flow run used for preparation.\n        \"\"\"\n        super().prepare_for_flow_run(flow_run, deployment, flow)\n\n        self._populate_envs()\n        self._populate_or_format_command()\n        self._format_args_if_present()\n        self._populate_image_if_not_present()\n        self._populate_name_if_not_present()\n\n    def _populate_envs(self):\n        \"\"\"Populate environment variables. BaseWorker.prepare_for_flow_run handles\n        putting the environment variables in the `env` attribute. This method\n        moves them into the jobs body\"\"\"\n        envs = [{\"name\": k, \"value\": v} for k, v in self.env.items()]\n        self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\"containers\"][0][\n            \"env\"\n        ] = envs\n\n    def _populate_name_if_not_present(self):\n        \"\"\"Adds the flow run name to the job if one is not already provided.\"\"\"\n        try:\n            if \"name\" not in self.job_body[\"metadata\"]:\n                base_job_name = slugify_name(self.name)\n                job_name = f\"{base_job_name}-{uuid4().hex}\"\n                self.job_body[\"metadata\"][\"name\"] = job_name\n        except KeyError:\n            raise ValueError(\"Unable to verify name due to invalid job body template.\")\n\n    def _populate_image_if_not_present(self):\n        \"\"\"Adds the latest prefect image to the job if one is not already provided.\"\"\"\n        try:\n            if (\n                \"image\"\n                not in self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                    \"containers\"\n                ][0]\n            ):\n                self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                    \"containers\"\n                ][0][\"image\"] = f\"docker.io/{get_prefect_image_name()}\"\n        except KeyError:\n            raise ValueError(\"Unable to verify image due to invalid job body template.\")\n\n    def _populate_or_format_command(self):\n        \"\"\"\n        Ensures that the command is present in the job manifest. Populates the command\n        with the `prefect -m prefect.engine` if a command is not present.\n        \"\"\"\n        try:\n            command = self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                \"containers\"\n            ][0].get(\"command\")\n            if command is None:\n                self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                    \"containers\"\n                ][0][\"command\"] = shlex.split(self._base_flow_run_command())\n            elif isinstance(command, str):\n                self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                    \"containers\"\n                ][0][\"command\"] = shlex.split(command)\n        except KeyError:\n            raise ValueError(\n                \"Unable to verify command due to invalid job body template.\"\n            )\n\n    def _format_args_if_present(self):\n        try:\n            args = self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                \"containers\"\n            ][0].get(\"args\")\n            if args is not None and isinstance(args, str):\n                self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                    \"containers\"\n                ][0][\"args\"] = shlex.split(args)\n        except KeyError:\n            raise ValueError(\"Unable to verify args due to invalid job body template.\")\n\n    @validator(\"job_body\")\n    def _ensure_job_includes_all_required_components(cls, value: Dict[str, Any]):\n        \"\"\"\n        Ensures that the job body includes all required components.\n        \"\"\"\n        patch = JsonPatch.from_diff(value, _get_base_job_body())\n        missing_paths = sorted([op[\"path\"] for op in patch if op[\"op\"] == \"add\"])\n        if missing_paths:\n            raise ValueError(\n                \"Job is missing required attributes at the following paths: \"\n                f\"{', '.join(missing_paths)}\"\n            )\n        return value\n\n    @validator(\"job_body\")\n    def _ensure_job_has_compatible_values(cls, value: Dict[str, Any]):\n        \"\"\"Ensure that the job body has compatible values.\"\"\"\n        patch = JsonPatch.from_diff(value, _get_base_job_body())\n        incompatible = sorted(\n            [\n                f\"{op['path']} must have value {op['value']!r}\"\n                for op in patch\n                if op[\"op\"] == \"replace\"\n            ]\n        )\n        if incompatible:\n            raise ValueError(\n                \"Job has incompatible values for the following attributes: \"\n                f\"{', '.join(incompatible)}\"\n            )\n        return value\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerJobConfiguration-attributes","title":"Attributes","text":""},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerJobConfiguration.job_name","title":"job_name: str property","text":"

property for accessing the name from the job metadata.

"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerJobConfiguration.project","title":"project: str property","text":"

property for accessing the project from the credentials.

"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerJobConfiguration-functions","title":"Functions","text":""},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerJobConfiguration.prepare_for_flow_run","title":"prepare_for_flow_run","text":"

Prepares the job configuration for a flow run.

Ensures that necessary values are present in the job body and that the job body is valid.

Parameters:

Name Type Description Default flow_run FlowRun

The flow run to prepare the job configuration for

required deployment Optional[DeploymentResponse]

The deployment associated with the flow run used for preparation.

None flow Optional[Flow]

The flow associated with the flow run used for preparation.

None Source code in prefect_gcp/workers/cloud_run.py
def prepare_for_flow_run(\n    self,\n    flow_run: \"FlowRun\",\n    deployment: Optional[\"DeploymentResponse\"] = None,\n    flow: Optional[\"Flow\"] = None,\n):\n    \"\"\"\n    Prepares the job configuration for a flow run.\n\n    Ensures that necessary values are present in the job body and that the\n    job body is valid.\n\n    Args:\n        flow_run: The flow run to prepare the job configuration for\n        deployment: The deployment associated with the flow run used for\n            preparation.\n        flow: The flow associated with the flow run used for preparation.\n    \"\"\"\n    super().prepare_for_flow_run(flow_run, deployment, flow)\n\n    self._populate_envs()\n    self._populate_or_format_command()\n    self._format_args_if_present()\n    self._populate_image_if_not_present()\n    self._populate_name_if_not_present()\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerResult","title":"CloudRunWorkerResult","text":"

Bases: BaseWorkerResult

Contains information about the final state of a completed process

Source code in prefect_gcp/workers/cloud_run.py
class CloudRunWorkerResult(BaseWorkerResult):\n    \"\"\"Contains information about the final state of a completed process\"\"\"\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerVariables","title":"CloudRunWorkerVariables","text":"

Bases: BaseVariables

Default variables for the Cloud Run worker.

The schema for this class is used to populate the variables section of the default base job template.

Source code in prefect_gcp/workers/cloud_run.py
class CloudRunWorkerVariables(BaseVariables):\n    \"\"\"\n    Default variables for the Cloud Run worker.\n\n    The schema for this class is used to populate the `variables` section of the default\n    base job template.\n    \"\"\"\n\n    region: str = Field(\n        default=\"us-central1\",\n        description=\"The region where the Cloud Run Job resides.\",\n        example=\"us-central1\",\n    )\n    credentials: Optional[GcpCredentials] = Field(\n        title=\"GCP Credentials\",\n        default_factory=GcpCredentials,\n        description=\"The GCP Credentials used to initiate the \"\n        \"Cloud Run Job. If not provided credentials will be \"\n        \"inferred from the local environment.\",\n    )\n    image: Optional[str] = Field(\n        default=None,\n        title=\"Image Name\",\n        description=(\n            \"The image to use for a new Cloud Run Job. \"\n            \"If not set, the latest Prefect image will be used. \"\n            \"See https://cloud.google.com/run/docs/deploying#images.\"\n        ),\n        example=\"docker.io/prefecthq/prefect:2-latest\",\n    )\n    cpu: Optional[str] = Field(\n        default=None,\n        title=\"CPU\",\n        description=(\n            \"The amount of compute allocated to the Cloud Run Job. \"\n            \"(1000m = 1 CPU). See \"\n            \"https://cloud.google.com/run/docs/configuring/cpu#setting-jobs.\"\n        ),\n        example=\"1000m\",\n        regex=r\"^(\\d*000)m$\",\n    )\n    memory: Optional[str] = Field(\n        default=None,\n        title=\"Memory\",\n        description=(\n            \"The amount of memory allocated to the Cloud Run Job. \"\n            \"Must be specified in units of 'G', 'Gi', 'M', or 'Mi'. \"\n            \"See https://cloud.google.com/run/docs/configuring/memory-limits#setting.\"\n        ),\n        example=\"512Mi\",\n        regex=r\"^\\d+(?:G|Gi|M|Mi)$\",\n    )\n    vpc_connector_name: Optional[str] = Field(\n        default=None,\n        title=\"VPC Connector Name\",\n        description=\"The name of the VPC connector to use for the Cloud Run Job.\",\n    )\n    service_account_name: Optional[str] = Field(\n        default=None,\n        title=\"Service Account Name\",\n        description=\"The name of the service account to use for the task execution \"\n        \"of Cloud Run Job. By default Cloud Run jobs run as the default \"\n        \"Compute Engine Service Account. \",\n        example=\"service-account@example.iam.gserviceaccount.com\",\n    )\n    keep_job: Optional[bool] = Field(\n        default=False,\n        title=\"Keep Job After Completion\",\n        description=\"Keep the completed Cloud Run Job after it has run.\",\n    )\n    timeout: Optional[int] = Field(\n        default=600,\n        gt=0,\n        le=3600,\n        title=\"Job Timeout\",\n        description=(\n            \"The length of time that Prefect will wait for Cloud Run Job state changes.\"\n        ),\n    )\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run-functions","title":"Functions","text":""},{"location":"cloud_run_worker_v2/","title":"Cloud Run V2","text":""},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2","title":"prefect_gcp.workers.cloud_run_v2","text":""},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2-classes","title":"Classes","text":""},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerJobV2Configuration","title":"CloudRunWorkerJobV2Configuration","text":"

Bases: BaseJobConfiguration

The configuration for the Cloud Run worker V2.

The schema for this class is used to populate the job_body section of the default base job template.

Source code in prefect_gcp/workers/cloud_run_v2.py
class CloudRunWorkerJobV2Configuration(BaseJobConfiguration):\n    \"\"\"\n    The configuration for the Cloud Run worker V2.\n\n    The schema for this class is used to populate the `job_body` section of the\n    default base job template.\n    \"\"\"\n\n    credentials: GcpCredentials = Field(\n        title=\"GCP Credentials\",\n        default_factory=GcpCredentials,\n        description=(\n            \"The GCP Credentials used to connect to Cloud Run. \"\n            \"If not provided credentials will be inferred from \"\n            \"the local environment.\"\n        ),\n    )\n    job_body: Dict[str, Any] = Field(\n        template=_get_default_job_body_template(),\n    )\n    keep_job: bool = Field(\n        default=False,\n        title=\"Keep Job After Completion\",\n        description=\"Keep the completed Cloud run job on Google Cloud Platform.\",\n    )\n    region: str = Field(\n        default=\"us-central1\",\n        description=\"The region in which to run the Cloud Run job\",\n    )\n    timeout: int = Field(\n        default=600,\n        gt=0,\n        le=86400,\n        description=(\n            \"The length of time that Prefect will wait for a Cloud Run Job to \"\n            \"complete before raising an exception.\"\n        ),\n    )\n    _job_name: str = PrivateAttr(default=None)\n\n    @property\n    def project(self) -> str:\n        \"\"\"\n        Returns the GCP project associated with the credentials.\n\n        Returns:\n            str: The GCP project associated with the credentials.\n        \"\"\"\n        return self.credentials.project\n\n    @property\n    def job_name(self) -> str:\n        \"\"\"\n        Returns the name of the job.\n\n        Returns:\n            str: The name of the job.\n        \"\"\"\n        if self._job_name is None:\n            base_job_name = slugify_name(self.name)\n            job_name = f\"{base_job_name}-{uuid4().hex}\"\n            self._job_name = job_name\n\n        return self._job_name\n\n    def prepare_for_flow_run(\n        self,\n        flow_run: \"FlowRun\",\n        deployment: Optional[\"DeploymentResponse\"] = None,\n        flow: Optional[\"Flow\"] = None,\n    ):\n        \"\"\"\n        Prepares the job configuration for a flow run.\n\n        Ensures that necessary values are present in the job body and that the\n        job body is valid.\n\n        Args:\n            flow_run: The flow run to prepare the job configuration for\n            deployment: The deployment associated with the flow run used for\n                preparation.\n            flow: The flow associated with the flow run used for preparation.\n        \"\"\"\n        super().prepare_for_flow_run(\n            flow_run=flow_run,\n            deployment=deployment,\n            flow=flow,\n        )\n\n        self._populate_env()\n        self._populate_or_format_command()\n        self._format_args_if_present()\n        self._populate_image_if_not_present()\n        self._populate_timeout()\n        self._populate_vpc_if_present()\n\n    def _populate_timeout(self):\n        \"\"\"\n        Populates the job body with the timeout.\n        \"\"\"\n        self.job_body[\"template\"][\"template\"][\"timeout\"] = f\"{self.timeout}s\"\n\n    def _populate_env(self):\n        \"\"\"\n        Populates the job body with environment variables.\n        \"\"\"\n        envs = [{\"name\": k, \"value\": v} for k, v in self.env.items()]\n\n        self.job_body[\"template\"][\"template\"][\"containers\"][0][\"env\"] = envs\n\n    def _populate_image_if_not_present(self):\n        \"\"\"\n        Populates the job body with the image if not present.\n        \"\"\"\n        if \"image\" not in self.job_body[\"template\"][\"template\"][\"containers\"][0]:\n            self.job_body[\"template\"][\"template\"][\"containers\"][0][\n                \"image\"\n            ] = f\"docker.io/{get_prefect_image_name()}\"\n\n    def _populate_or_format_command(self):\n        \"\"\"\n        Populates the job body with the command if not present.\n        \"\"\"\n        command = self.job_body[\"template\"][\"template\"][\"containers\"][0].get(\"command\")\n\n        if command is None:\n            self.job_body[\"template\"][\"template\"][\"containers\"][0][\n                \"command\"\n            ] = shlex.split(self._base_flow_run_command())\n        elif isinstance(command, str):\n            self.job_body[\"template\"][\"template\"][\"containers\"][0][\n                \"command\"\n            ] = shlex.split(command)\n\n    def _format_args_if_present(self):\n        \"\"\"\n        Formats the job body args if present.\n        \"\"\"\n        args = self.job_body[\"template\"][\"template\"][\"containers\"][0].get(\"args\")\n\n        if args is not None and isinstance(args, str):\n            self.job_body[\"template\"][\"template\"][\"containers\"][0][\n                \"args\"\n            ] = shlex.split(args)\n\n    def _populate_vpc_if_present(self):\n        \"\"\"\n        Populates the job body with the VPC connector if present.\n        \"\"\"\n        if self.job_body[\"template\"][\"template\"].get(\"vpcAccess\") is not None:\n            self.job_body[\"template\"][\"template\"][\"vpcAccess\"] = {\n                \"connector\": self.job_body[\"template\"][\"template\"][\"vpcAccess\"],\n            }\n\n    # noinspection PyMethodParameters\n    @validator(\"job_body\")\n    def _ensure_job_includes_all_required_components(cls, value: Dict[str, Any]):\n        \"\"\"\n        Ensures that the job body includes all required components.\n\n        Args:\n            value: The job body to validate.\n        Returns:\n            The validated job body.\n        \"\"\"\n        patch = JsonPatch.from_diff(value, _get_base_job_body())\n\n        missing_paths = sorted([op[\"path\"] for op in patch if op[\"op\"] == \"add\"])\n\n        if missing_paths:\n            raise ValueError(\n                f\"Job body is missing required components: {', '.join(missing_paths)}\"\n            )\n\n        return value\n\n    # noinspection PyMethodParameters\n    @validator(\"job_body\")\n    def _ensure_job_has_compatible_values(cls, value: Dict[str, Any]):\n        \"\"\"Ensure that the job body has compatible values.\"\"\"\n        patch = JsonPatch.from_diff(value, _get_base_job_body())\n        incompatible = sorted(\n            [\n                f\"{op['path']} must have value {op['value']!r}\"\n                for op in patch\n                if op[\"op\"] == \"replace\"\n            ]\n        )\n        if incompatible:\n            raise ValueError(\n                \"Job has incompatible values for the following attributes: \"\n                f\"{', '.join(incompatible)}\"\n            )\n        return value\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerJobV2Configuration-attributes","title":"Attributes","text":""},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerJobV2Configuration.job_name","title":"job_name: str property","text":"

Returns the name of the job.

Returns:

Name Type Description str str

The name of the job.

"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerJobV2Configuration.project","title":"project: str property","text":"

Returns the GCP project associated with the credentials.

Returns:

Name Type Description str str

The GCP project associated with the credentials.

"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerJobV2Configuration-functions","title":"Functions","text":""},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerJobV2Configuration.prepare_for_flow_run","title":"prepare_for_flow_run","text":"

Prepares the job configuration for a flow run.

Ensures that necessary values are present in the job body and that the job body is valid.

Parameters:

Name Type Description Default flow_run FlowRun

The flow run to prepare the job configuration for

required deployment Optional[DeploymentResponse]

The deployment associated with the flow run used for preparation.

None flow Optional[Flow]

The flow associated with the flow run used for preparation.

None Source code in prefect_gcp/workers/cloud_run_v2.py
def prepare_for_flow_run(\n    self,\n    flow_run: \"FlowRun\",\n    deployment: Optional[\"DeploymentResponse\"] = None,\n    flow: Optional[\"Flow\"] = None,\n):\n    \"\"\"\n    Prepares the job configuration for a flow run.\n\n    Ensures that necessary values are present in the job body and that the\n    job body is valid.\n\n    Args:\n        flow_run: The flow run to prepare the job configuration for\n        deployment: The deployment associated with the flow run used for\n            preparation.\n        flow: The flow associated with the flow run used for preparation.\n    \"\"\"\n    super().prepare_for_flow_run(\n        flow_run=flow_run,\n        deployment=deployment,\n        flow=flow,\n    )\n\n    self._populate_env()\n    self._populate_or_format_command()\n    self._format_args_if_present()\n    self._populate_image_if_not_present()\n    self._populate_timeout()\n    self._populate_vpc_if_present()\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerV2","title":"CloudRunWorkerV2","text":"

Bases: BaseWorker

The Cloud Run worker V2.

Source code in prefect_gcp/workers/cloud_run_v2.py
class CloudRunWorkerV2(BaseWorker):\n    \"\"\"\n    The Cloud Run worker V2.\n    \"\"\"\n\n    type = \"cloud-run-v2\"\n    job_configuration = CloudRunWorkerJobV2Configuration\n    job_configuration_variables = CloudRunWorkerV2Variables\n    _description = \"Execute flow runs within containers on Google Cloud Run (V2 API). Requires a Google Cloud Platform account.\"  # noqa\n    _display_name = \"Google Cloud Run V2\"\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/worker_v2/\"\n    _logo_url = \"https://images.ctfassets.net/gm98wzqotmnx/4SpnOBvMYkHp6z939MDKP6/549a91bc1ce9afd4fb12c68db7b68106/social-icon-google-cloud-1200-630.png?h=250\"  # noqa\n\n    async def run(\n        self,\n        flow_run: \"FlowRun\",\n        configuration: CloudRunWorkerJobV2Configuration,\n        task_status: Optional[TaskStatus] = None,\n    ) -> CloudRunJobV2Result:\n        \"\"\"\n        Runs the flow run on Cloud Run and waits for it to complete.\n\n        Args:\n            flow_run: The flow run to run.\n            configuration: The configuration for the job.\n            task_status: The task status to update.\n\n        Returns:\n            The result of the job.\n        \"\"\"\n        logger = self.get_flow_run_logger(flow_run)\n\n        with self._get_client(configuration=configuration) as cr_client:\n            await run_sync_in_worker_thread(\n                self._create_job_and_wait_for_registration,\n                configuration=configuration,\n                cr_client=cr_client,\n                logger=logger,\n            )\n\n            execution = await run_sync_in_worker_thread(\n                self._begin_job_execution,\n                configuration=configuration,\n                cr_client=cr_client,\n                logger=logger,\n            )\n\n            if task_status:\n                task_status.started(configuration.job_name)\n\n            result = await run_sync_in_worker_thread(\n                self._watch_job_execution_and_get_result,\n                configuration=configuration,\n                cr_client=cr_client,\n                execution=execution,\n                logger=logger,\n            )\n\n            return result\n\n    async def kill_infrastructure(\n        self,\n        infrastructure_pid: str,\n        configuration: CloudRunWorkerJobV2Configuration,\n        grace_seconds: int = 30,\n    ):\n        \"\"\"\n        Stops the Cloud Run job.\n\n        Args:\n            infrastructure_pid: The ID of the infrastructure to stop.\n            configuration: The configuration for the job.\n            grace_seconds: The number of seconds to wait before stopping the job.\n        \"\"\"\n        if grace_seconds != 30:\n            self._logger.warning(\n                f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n                \"support dynamic grace period configuration. See here for more info: \"\n                \"https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs/delete\"  # noqa\n            )\n\n        with self._get_client(configuration=configuration) as cr_client:\n            await run_sync_in_worker_thread(\n                self._stop_job,\n                cr_client=cr_client,\n                configuration=configuration,\n                job_name=infrastructure_pid,\n            )\n\n    @staticmethod\n    def _get_client(\n        configuration: CloudRunWorkerJobV2Configuration,\n    ) -> ResourceWarning:\n        \"\"\"\n        Get the base client needed for interacting with GCP Cloud Run V2 API.\n\n        Returns:\n            Resource: The base client needed for interacting with GCP Cloud Run V2 API.\n        \"\"\"\n        api_endpoint = \"https://run.googleapis.com\"\n        gcp_creds = configuration.credentials.get_credentials_from_service_account()\n\n        options = ClientOptions(api_endpoint=api_endpoint)\n\n        return (\n            discovery.build(\n                \"run\",\n                \"v2\",\n                client_options=options,\n                credentials=gcp_creds,\n                num_retries=3,  # Set to 3 in case of intermittent/connection issues\n            )\n            .projects()\n            .locations()\n        )\n\n    def _create_job_and_wait_for_registration(\n        self,\n        configuration: CloudRunWorkerJobV2Configuration,\n        cr_client: Resource,\n        logger: PrefectLogAdapter,\n    ):\n        \"\"\"\n        Creates the Cloud Run job and waits for it to register.\n\n        Args:\n            configuration: The configuration for the job.\n            cr_client: The Cloud Run client.\n            logger: The logger to use.\n        \"\"\"\n        try:\n            logger.info(f\"Creating Cloud Run JobV2 {configuration.job_name}\")\n\n            JobV2.create(\n                cr_client=cr_client,\n                project=configuration.project,\n                location=configuration.region,\n                job_id=configuration.job_name,\n                body=configuration.job_body,\n            )\n        except HttpError as exc:\n            self._create_job_error(\n                exc=exc,\n                configuration=configuration,\n            )\n\n        try:\n            self._wait_for_job_creation(\n                cr_client=cr_client,\n                configuration=configuration,\n                logger=logger,\n            )\n        except Exception as exc:\n            logger.critical(\n                f\"Failed to create Cloud Run JobV2 {configuration.job_name}.\\n{exc}\"\n            )\n\n            if not configuration.keep_job:\n                try:\n                    JobV2.delete(\n                        cr_client=cr_client,\n                        project=configuration.project,\n                        location=configuration.region,\n                        job_name=configuration.job_name,\n                    )\n                except Exception as exc2:\n                    logger.critical(\n                        f\"Failed to delete Cloud Run JobV2 {configuration.job_name}.\"\n                        f\"\\n{exc2}\"\n                    )\n\n            raise\n\n    @staticmethod\n    def _wait_for_job_creation(\n        cr_client: Resource,\n        configuration: CloudRunWorkerJobV2Configuration,\n        logger: PrefectLogAdapter,\n        poll_interval: int = 5,\n    ):\n        \"\"\"\n        Waits for the Cloud Run job to be created.\n\n        Args:\n            cr_client: The Cloud Run client.\n            configuration: The configuration for the job.\n            logger: The logger to use.\n            poll_interval: The interval to poll the Cloud Run job, defaults to 5\n                seconds.\n        \"\"\"\n        job = JobV2.get(\n            cr_client=cr_client,\n            project=configuration.project,\n            location=configuration.region,\n            job_name=configuration.job_name,\n        )\n\n        t0 = time.time()\n\n        while not job.is_ready():\n            if not (ready_condition := job.get_ready_condition()):\n                ready_condition = \"waiting for condition update\"\n\n            logger.info(f\"Current Job Condition: {ready_condition}\")\n\n            job = JobV2.get(\n                cr_client=cr_client,\n                project=configuration.project,\n                location=configuration.region,\n                job_name=configuration.job_name,\n            )\n\n            elapsed_time = time.time() - t0\n\n            if elapsed_time > configuration.timeout:\n                raise RuntimeError(\n                    f\"Timeout of {configuration.timeout} seconds reached while \"\n                    f\"waiting for Cloud Run Job V2 {configuration.job_name} to be \"\n                    \"created.\"\n                )\n\n            time.sleep(poll_interval)\n\n    @staticmethod\n    def _create_job_error(\n        exc: HttpError,\n        configuration: CloudRunWorkerJobV2Configuration,\n    ):\n        \"\"\"\n        Creates a formatted error message for the Cloud Run V2 API errors\n        \"\"\"\n        # noinspection PyUnresolvedReferences\n        if exc.status_code == 404:\n            raise RuntimeError(\n                f\"Failed to find resources at {exc.uri}. Confirm that region\"\n                f\" '{configuration.region}' is the correct region for your Cloud\"\n                f\" Run Job and that {configuration.project} is the correct GCP \"\n                f\" project. If your project ID is not correct, you are using a \"\n                f\"Credentials block with permissions for the wrong project.\"\n            ) from exc\n\n        raise exc\n\n    def _begin_job_execution(\n        self,\n        cr_client: Resource,\n        configuration: CloudRunWorkerJobV2Configuration,\n        logger: PrefectLogAdapter,\n    ) -> ExecutionV2:\n        \"\"\"\n        Begins the Cloud Run job execution.\n\n        Args:\n            cr_client: The Cloud Run client.\n            configuration: The configuration for the job.\n            logger: The logger to use.\n\n        Returns:\n            The Cloud Run job execution.\n        \"\"\"\n        try:\n            logger.info(\n                f\"Submitting Cloud Run Job V2 {configuration.job_name} for execution...\"\n            )\n\n            submission = JobV2.run(\n                cr_client=cr_client,\n                project=configuration.project,\n                location=configuration.region,\n                job_name=configuration.job_name,\n            )\n\n            job_execution = ExecutionV2.get(\n                cr_client=cr_client,\n                execution_id=submission[\"metadata\"][\"name\"],\n            )\n\n            command = (\n                \" \".join(configuration.command)\n                if configuration.command\n                else \"default container command\"\n            )\n\n            logger.info(\n                f\"Cloud Run Job V2 {configuration.job_name} submitted for execution \"\n                f\"with command: {command}\"\n            )\n\n            return job_execution\n        except Exception as exc:\n            self._job_run_submission_error(\n                exc=exc,\n                configuration=configuration,\n            )\n            raise\n\n    def _watch_job_execution_and_get_result(\n        self,\n        cr_client: Resource,\n        configuration: CloudRunWorkerJobV2Configuration,\n        execution: ExecutionV2,\n        logger: PrefectLogAdapter,\n        poll_interval: int = 5,\n    ) -> CloudRunJobV2Result:\n        \"\"\"\n        Watch the job execution and get the result.\n\n        Args:\n            cr_client (Resource): The base client needed for interacting with GCP\n                Cloud Run V2 API.\n            configuration (CloudRunWorkerJobV2Configuration): The configuration for\n                the job.\n            execution (ExecutionV2): The execution to watch.\n            logger (PrefectLogAdapter): The logger to use.\n            poll_interval (int): The number of seconds to wait between polls.\n                Defaults to 5 seconds.\n\n        Returns:\n            The result of the job.\n        \"\"\"\n        try:\n            execution = self._watch_job_execution(\n                cr_client=cr_client,\n                configuration=configuration,\n                execution=execution,\n                poll_interval=poll_interval,\n            )\n        except Exception as exc:\n            logger.critical(\n                f\"Encountered an exception while waiting for job run completion - \"\n                f\"{exc}\"\n            )\n            raise\n\n        if execution.succeeded():\n            status_code = 0\n            logger.info(f\"Cloud Run Job V2 {configuration.job_name} succeeded\")\n        else:\n            status_code = 1\n            error_mg = execution.condition_after_completion().get(\"message\")\n            logger.error(\n                f\"Cloud Run Job V2 {configuration.job_name} failed - {error_mg}\"\n            )\n\n        logger.info(f\"Job run logs can be found on GCP at: {execution.logUri}\")\n\n        if not configuration.keep_job:\n            logger.info(\n                f\"Deleting completed Cloud Run Job {configuration.job_name!r} from \"\n                \"Google Cloud Run...\"\n            )\n\n            try:\n                JobV2.delete(\n                    cr_client=cr_client,\n                    project=configuration.project,\n                    location=configuration.region,\n                    job_name=configuration.job_name,\n                )\n            except Exception as exc:\n                logger.critical(\n                    \"Received an exception while deleting the Cloud Run Job V2 \"\n                    f\"- {configuration.job_name} - {exc}\"\n                )\n\n        return CloudRunJobV2Result(\n            identifier=configuration.job_name,\n            status_code=status_code,\n        )\n\n    # noinspection DuplicatedCode\n    @staticmethod\n    def _watch_job_execution(\n        cr_client: Resource,\n        configuration: CloudRunWorkerJobV2Configuration,\n        execution: ExecutionV2,\n        poll_interval: int,\n    ) -> ExecutionV2:\n        \"\"\"\n        Update execution status until it is no longer running or timeout is reached.\n\n        Args:\n            cr_client (Resource): The base client needed for interacting with GCP\n                Cloud Run V2 API.\n            configuration (CloudRunWorkerJobV2Configuration): The configuration for\n                the job.\n            execution (ExecutionV2): The execution to watch.\n            poll_interval (int): The number of seconds to wait between polls.\n\n        Returns:\n            The execution.\n        \"\"\"\n        t0 = time.time()\n\n        while execution.is_running():\n            execution = ExecutionV2.get(\n                cr_client=cr_client,\n                execution_id=execution.name,\n            )\n\n            elapsed_time = time.time() - t0\n\n            if elapsed_time > configuration.timeout:\n                raise RuntimeError(\n                    f\"Timeout of {configuration.timeout} seconds reached while \"\n                    f\"waiting for Cloud Run Job V2 {configuration.job_name} to \"\n                    \"complete.\"\n                )\n\n            time.sleep(poll_interval)\n\n        return execution\n\n    @staticmethod\n    def _job_run_submission_error(\n        exc: Exception,\n        configuration: CloudRunWorkerJobV2Configuration,\n    ):\n        \"\"\"\n        Creates a formatted error message for the Cloud Run V2 API errors\n\n        Args:\n            exc: The exception to format.\n            configuration: The configuration for the job.\n        \"\"\"\n        # noinspection PyUnresolvedReferences\n        if exc.status_code == 404:\n            pat1 = r\"The requested URL [^ ]+ was not found on this server\"\n\n            if re.findall(pat1, str(exc)):\n                # noinspection PyUnresolvedReferences\n                raise RuntimeError(\n                    f\"Failed to find resources at {exc.uri}. \"\n                    f\"Confirm that region '{configuration.region}' is \"\n                    f\"the correct region for your Cloud Run Job \"\n                    f\"and that '{configuration.project}' is the \"\n                    f\"correct GCP project. If your project ID is not \"\n                    f\"correct, you are using a Credentials \"\n                    f\"block with permissions for the wrong project.\"\n                ) from exc\n            else:\n                raise exc\n\n    @staticmethod\n    def _stop_job(\n        cr_client: Resource,\n        configuration: CloudRunWorkerJobV2Configuration,\n        job_name: str,\n    ):\n        \"\"\"\n        Stops/deletes the Cloud Run job.\n\n        Args:\n            cr_client: The Cloud Run client.\n            configuration: The configuration for the job.\n            job_name: The name of the job to stop.\n        \"\"\"\n        try:\n            JobV2.delete(\n                cr_client=cr_client,\n                project=configuration.project,\n                location=configuration.region,\n                job_name=job_name,\n            )\n        except Exception as exc:\n            if \"does not exist\" in str(exc):\n                raise InfrastructureNotFound(\n                    f\"Cannot stop Cloud Run Job; the job name {job_name!r} \"\n                    \"could not be found.\"\n                ) from exc\n            raise\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerV2-functions","title":"Functions","text":""},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerV2.kill_infrastructure","title":"kill_infrastructure async","text":"

Stops the Cloud Run job.

Parameters:

Name Type Description Default infrastructure_pid str

The ID of the infrastructure to stop.

required configuration CloudRunWorkerJobV2Configuration

The configuration for the job.

required grace_seconds int

The number of seconds to wait before stopping the job.

30 Source code in prefect_gcp/workers/cloud_run_v2.py
async def kill_infrastructure(\n    self,\n    infrastructure_pid: str,\n    configuration: CloudRunWorkerJobV2Configuration,\n    grace_seconds: int = 30,\n):\n    \"\"\"\n    Stops the Cloud Run job.\n\n    Args:\n        infrastructure_pid: The ID of the infrastructure to stop.\n        configuration: The configuration for the job.\n        grace_seconds: The number of seconds to wait before stopping the job.\n    \"\"\"\n    if grace_seconds != 30:\n        self._logger.warning(\n            f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n            \"support dynamic grace period configuration. See here for more info: \"\n            \"https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs/delete\"  # noqa\n        )\n\n    with self._get_client(configuration=configuration) as cr_client:\n        await run_sync_in_worker_thread(\n            self._stop_job,\n            cr_client=cr_client,\n            configuration=configuration,\n            job_name=infrastructure_pid,\n        )\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerV2.run","title":"run async","text":"

Runs the flow run on Cloud Run and waits for it to complete.

Parameters:

Name Type Description Default flow_run FlowRun

The flow run to run.

required configuration CloudRunWorkerJobV2Configuration

The configuration for the job.

required task_status Optional[TaskStatus]

The task status to update.

None

Returns:

Type Description CloudRunJobV2Result

The result of the job.

Source code in prefect_gcp/workers/cloud_run_v2.py
async def run(\n    self,\n    flow_run: \"FlowRun\",\n    configuration: CloudRunWorkerJobV2Configuration,\n    task_status: Optional[TaskStatus] = None,\n) -> CloudRunJobV2Result:\n    \"\"\"\n    Runs the flow run on Cloud Run and waits for it to complete.\n\n    Args:\n        flow_run: The flow run to run.\n        configuration: The configuration for the job.\n        task_status: The task status to update.\n\n    Returns:\n        The result of the job.\n    \"\"\"\n    logger = self.get_flow_run_logger(flow_run)\n\n    with self._get_client(configuration=configuration) as cr_client:\n        await run_sync_in_worker_thread(\n            self._create_job_and_wait_for_registration,\n            configuration=configuration,\n            cr_client=cr_client,\n            logger=logger,\n        )\n\n        execution = await run_sync_in_worker_thread(\n            self._begin_job_execution,\n            configuration=configuration,\n            cr_client=cr_client,\n            logger=logger,\n        )\n\n        if task_status:\n            task_status.started(configuration.job_name)\n\n        result = await run_sync_in_worker_thread(\n            self._watch_job_execution_and_get_result,\n            configuration=configuration,\n            cr_client=cr_client,\n            execution=execution,\n            logger=logger,\n        )\n\n        return result\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerV2Result","title":"CloudRunWorkerV2Result","text":"

Bases: BaseWorkerResult

The result of a Cloud Run worker V2 job.

Source code in prefect_gcp/workers/cloud_run_v2.py
class CloudRunWorkerV2Result(BaseWorkerResult):\n    \"\"\"\n    The result of a Cloud Run worker V2 job.\n    \"\"\"\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerV2Variables","title":"CloudRunWorkerV2Variables","text":"

Bases: BaseVariables

Default variables for the Cloud Run worker V2.

The schema for this class is used to populate the variables section of the default base job template.

Source code in prefect_gcp/workers/cloud_run_v2.py
class CloudRunWorkerV2Variables(BaseVariables):\n    \"\"\"\n    Default variables for the Cloud Run worker V2.\n\n    The schema for this class is used to populate the `variables` section of the\n    default base job template.\n    \"\"\"\n\n    credentials: GcpCredentials = Field(\n        title=\"GCP Credentials\",\n        default_factory=GcpCredentials,\n        description=(\n            \"The GCP Credentials used to connect to Cloud Run. \"\n            \"If not provided credentials will be inferred from \"\n            \"the local environment.\"\n        ),\n    )\n    region: str = Field(\n        default=\"us-central1\",\n        description=\"The region in which to run the Cloud Run job\",\n    )\n    image: Optional[str] = Field(\n        default=\"prefecthq/prefect:2-latest\",\n        title=\"Image Name\",\n        description=(\n            \"The image to use for the Cloud Run job. \"\n            \"If not provided the default Prefect image will be used.\"\n        ),\n    )\n    args: List[str] = Field(\n        default_factory=list,\n        description=(\n            \"The arguments to pass to the Cloud Run Job V2's entrypoint command.\"\n        ),\n    )\n    keep_job: bool = Field(\n        default=False,\n        title=\"Keep Job After Completion\",\n        description=\"Keep the completed Cloud run job on Google Cloud Platform.\",\n    )\n    launch_stage: Literal[\n        \"ALPHA\",\n        \"BETA\",\n        \"GA\",\n        \"DEPRECATED\",\n        \"EARLY_ACCESS\",\n        \"PRELAUNCH\",\n        \"UNIMPLEMENTED\",\n        \"LAUNCH_TAG_UNSPECIFIED\",\n    ] = Field(\n        \"BETA\",\n        description=(\n            \"The launch stage of the Cloud Run Job V2. \"\n            \"See https://cloud.google.com/run/docs/about-features-categories \"\n            \"for additional details.\"\n        ),\n    )\n    max_retries: int = Field(\n        default=0,\n        title=\"Max Retries\",\n        description=\"The number of times to retry the Cloud Run job.\",\n    )\n    cpu: str = Field(\n        default=\"1000m\",\n        title=\"CPU\",\n        description=\"The CPU to allocate to the Cloud Run job.\",\n    )\n    memory: str = Field(\n        default=\"512Mi\",\n        title=\"Memory\",\n        description=(\n            \"The memory to allocate to the Cloud Run job along with the units, which\"\n            \"could be: G, Gi, M, Mi.\"\n        ),\n        example=\"512Mi\",\n        pattern=r\"^\\d+(?:G|Gi|M|Mi)$\",\n    )\n    timeout: int = Field(\n        default=600,\n        gt=0,\n        le=86400,\n        title=\"Job Timeout\",\n        description=(\n            \"The length of time that Prefect will wait for a Cloud Run Job to \"\n            \"complete before raising an exception (maximum of 86400 seconds, 1 day).\"\n        ),\n    )\n    vpc_connector_name: Optional[str] = Field(\n        default=None,\n        title=\"VPC Connector Name\",\n        description=\"The name of the VPC connector to use for the Cloud Run job.\",\n    )\n    service_account_name: Optional[str] = Field(\n        default=None,\n        title=\"Service Account Name\",\n        description=(\n            \"The name of the service account to use for the task execution \"\n            \"of Cloud Run Job. By default Cloud Run jobs run as the default \"\n            \"Compute Engine Service Account.\"\n        ),\n        example=\"service-account@example.iam.gserviceaccount.com\",\n    )\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2-functions","title":"Functions","text":""},{"location":"cloud_storage/","title":"Cloud Storage","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage","title":"prefect_gcp.cloud_storage","text":"

Tasks for interacting with GCP Cloud Storage.

"},{"location":"cloud_storage/#prefect_gcp.cloud_storage-classes","title":"Classes","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat","title":"DataFrameSerializationFormat","text":"

Bases: Enum

An enumeration class to represent different file formats, compression options for upload_from_dataframe

Attributes:

Name Type Description CSV

Representation for 'csv' file format with no compression and its related content type and suffix.

CSV_GZIP

Representation for 'csv' file format with 'gzip' compression and its related content type and suffix.

PARQUET

Representation for 'parquet' file format with no compression and its related content type and suffix.

PARQUET_SNAPPY

Representation for 'parquet' file format with 'snappy' compression and its related content type and suffix.

PARQUET_GZIP

Representation for 'parquet' file format with 'gzip' compression and its related content type and suffix.

Source code in prefect_gcp/cloud_storage.py
class DataFrameSerializationFormat(Enum):\n    \"\"\"\n    An enumeration class to represent different file formats,\n    compression options for upload_from_dataframe\n\n    Attributes:\n        CSV: Representation for 'csv' file format with no compression\n            and its related content type and suffix.\n\n        CSV_GZIP: Representation for 'csv' file format with 'gzip' compression\n            and its related content type and suffix.\n\n        PARQUET: Representation for 'parquet' file format with no compression\n            and its related content type and suffix.\n\n        PARQUET_SNAPPY: Representation for 'parquet' file format\n            with 'snappy' compression and its related content type and suffix.\n\n        PARQUET_GZIP: Representation for 'parquet' file format\n            with 'gzip' compression and its related content type and suffix.\n    \"\"\"\n\n    CSV = (\"csv\", None, \"text/csv\", \".csv\")\n    CSV_GZIP = (\"csv\", \"gzip\", \"application/x-gzip\", \".csv.gz\")\n    PARQUET = (\"parquet\", None, \"application/octet-stream\", \".parquet\")\n    PARQUET_SNAPPY = (\n        \"parquet\",\n        \"snappy\",\n        \"application/octet-stream\",\n        \".snappy.parquet\",\n    )\n    PARQUET_GZIP = (\"parquet\", \"gzip\", \"application/octet-stream\", \".gz.parquet\")\n\n    @property\n    def format(self) -> str:\n        \"\"\"The file format of the current instance.\"\"\"\n        return self.value[0]\n\n    @property\n    def compression(self) -> Union[str, None]:\n        \"\"\"The compression type of the current instance.\"\"\"\n        return self.value[1]\n\n    @property\n    def content_type(self) -> str:\n        \"\"\"The content type of the current instance.\"\"\"\n        return self.value[2]\n\n    @property\n    def suffix(self) -> str:\n        \"\"\"The suffix of the file format of the current instance.\"\"\"\n        return self.value[3]\n\n    def fix_extension_with(self, gcs_blob_path: str) -> str:\n        \"\"\"Fix the extension of a GCS blob.\n\n        Args:\n            gcs_blob_path: The path to the GCS blob to be modified.\n\n        Returns:\n            The modified path to the GCS blob with the new extension.\n        \"\"\"\n        gcs_blob_path = PurePosixPath(gcs_blob_path)\n        folder = gcs_blob_path.parent\n        filename = PurePosixPath(gcs_blob_path.stem).with_suffix(self.suffix)\n        return str(folder.joinpath(filename))\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat-attributes","title":"Attributes","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat.compression","title":"compression: Union[str, None] property","text":"

The compression type of the current instance.

"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat.content_type","title":"content_type: str property","text":"

The content type of the current instance.

"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat.format","title":"format: str property","text":"

The file format of the current instance.

"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat.suffix","title":"suffix: str property","text":"

The suffix of the file format of the current instance.

"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat-functions","title":"Functions","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat.fix_extension_with","title":"fix_extension_with","text":"

Fix the extension of a GCS blob.

Parameters:

Name Type Description Default gcs_blob_path str

The path to the GCS blob to be modified.

required

Returns:

Type Description str

The modified path to the GCS blob with the new extension.

Source code in prefect_gcp/cloud_storage.py
def fix_extension_with(self, gcs_blob_path: str) -> str:\n    \"\"\"Fix the extension of a GCS blob.\n\n    Args:\n        gcs_blob_path: The path to the GCS blob to be modified.\n\n    Returns:\n        The modified path to the GCS blob with the new extension.\n    \"\"\"\n    gcs_blob_path = PurePosixPath(gcs_blob_path)\n    folder = gcs_blob_path.parent\n    filename = PurePosixPath(gcs_blob_path.stem).with_suffix(self.suffix)\n    return str(folder.joinpath(filename))\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket","title":"GcsBucket","text":"

Bases: WritableDeploymentStorage, WritableFileSystem, ObjectStorageBlock

Block used to store data using GCP Cloud Storage Buckets.

Note! GcsBucket in prefect-gcp is a unique block, separate from GCS in core Prefect. GcsBucket does not use gcsfs under the hood, instead using the google-cloud-storage package, and offers more configuration and functionality.

Attributes:

Name Type Description bucket str

Name of the bucket.

gcp_credentials GcpCredentials

The credentials to authenticate with GCP.

bucket_folder str

A default path to a folder within the GCS bucket to use for reading and writing objects.

Example

Load stored GCP Cloud Storage Bucket:

from prefect_gcp.cloud_storage import GcsBucket\ngcp_cloud_storage_bucket_block = GcsBucket.load(\"BLOCK_NAME\")\n

Source code in prefect_gcp/cloud_storage.py
class GcsBucket(WritableDeploymentStorage, WritableFileSystem, ObjectStorageBlock):\n    \"\"\"\n    Block used to store data using GCP Cloud Storage Buckets.\n\n    Note! `GcsBucket` in `prefect-gcp` is a unique block, separate from `GCS`\n    in core Prefect. `GcsBucket` does not use `gcsfs` under the hood,\n    instead using the `google-cloud-storage` package, and offers more configuration\n    and functionality.\n\n    Attributes:\n        bucket: Name of the bucket.\n        gcp_credentials: The credentials to authenticate with GCP.\n        bucket_folder: A default path to a folder within the GCS bucket to use\n            for reading and writing objects.\n\n    Example:\n        Load stored GCP Cloud Storage Bucket:\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n        gcp_cloud_storage_bucket_block = GcsBucket.load(\"BLOCK_NAME\")\n        ```\n    \"\"\"\n\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n    _block_type_name = \"GCS Bucket\"\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/cloud_storage/#prefect_gcp.cloud_storage.GcsBucket\"  # noqa: E501\n\n    bucket: str = Field(..., description=\"Name of the bucket.\")\n    gcp_credentials: GcpCredentials = Field(\n        default_factory=GcpCredentials,\n        description=\"The credentials to authenticate with GCP.\",\n    )\n    bucket_folder: str = Field(\n        default=\"\",\n        description=(\n            \"A default path to a folder within the GCS bucket to use \"\n            \"for reading and writing objects.\"\n        ),\n    )\n\n    @property\n    def basepath(self) -> str:\n        \"\"\"\n        Read-only property that mirrors the bucket folder.\n\n        Used for deployment.\n        \"\"\"\n        return self.bucket_folder\n\n    @validator(\"bucket_folder\", pre=True, always=True)\n    def _bucket_folder_suffix(cls, value):\n        \"\"\"\n        Ensures that the bucket folder is suffixed with a forward slash.\n        \"\"\"\n        if value != \"\" and not value.endswith(\"/\"):\n            value = f\"{value}/\"\n        return value\n\n    def _resolve_path(self, path: str) -> str:\n        \"\"\"\n        A helper function used in write_path to join `self.bucket_folder` and `path`.\n\n        Args:\n            path: Name of the key, e.g. \"file1\". Each object in your\n                bucket has a unique key (or key name).\n\n        Returns:\n            The joined path.\n        \"\"\"\n        # If bucket_folder provided, it means we won't write to the root dir of\n        # the bucket. So we need to add it on the front of the path.\n        path = (\n            str(PurePosixPath(self.bucket_folder, path)) if self.bucket_folder else path\n        )\n        if path in [\"\", \".\", \"/\"]:\n            # client.bucket.list_blobs(prefix=None) is the proper way\n            # of specifying the root folder of the bucket\n            path = None\n        return path\n\n    @sync_compatible\n    async def get_directory(\n        self, from_path: Optional[str] = None, local_path: Optional[str] = None\n    ) -> List[Union[str, Path]]:\n        \"\"\"\n        Copies a folder from the configured GCS bucket to a local directory.\n        Defaults to copying the entire contents of the block's bucket_folder\n        to the current working directory.\n\n        Args:\n            from_path: Path in GCS bucket to download from. Defaults to the block's\n                configured bucket_folder.\n            local_path: Local path to download GCS bucket contents to.\n                Defaults to the current working directory.\n\n        Returns:\n            A list of downloaded file paths.\n        \"\"\"\n        from_path = (\n            self.bucket_folder if from_path is None else self._resolve_path(from_path)\n        )\n\n        if local_path is None:\n            local_path = os.path.abspath(\".\")\n        else:\n            local_path = os.path.abspath(os.path.expanduser(local_path))\n\n        project = self.gcp_credentials.project\n        client = self.gcp_credentials.get_cloud_storage_client(project=project)\n\n        blobs = await run_sync_in_worker_thread(\n            client.list_blobs, self.bucket, prefix=from_path\n        )\n\n        file_paths = []\n        for blob in blobs:\n            blob_path = blob.name\n            if blob_path[-1] == \"/\":\n                # object is a folder and will be created if it contains any objects\n                continue\n            local_file_path = os.path.join(local_path, blob_path)\n            os.makedirs(os.path.dirname(local_file_path), exist_ok=True)\n\n            with disable_run_logger():\n                file_path = await cloud_storage_download_blob_to_file.fn(\n                    bucket=self.bucket,\n                    blob=blob_path,\n                    path=local_file_path,\n                    gcp_credentials=self.gcp_credentials,\n                )\n                file_paths.append(file_path)\n        return file_paths\n\n    @sync_compatible\n    async def put_directory(\n        self,\n        local_path: Optional[str] = None,\n        to_path: Optional[str] = None,\n        ignore_file: Optional[str] = None,\n    ) -> int:\n        \"\"\"\n        Uploads a directory from a given local path to the configured GCS bucket in a\n        given folder.\n\n        Defaults to uploading the entire contents the current working directory to the\n        block's bucket_folder.\n\n        Args:\n            local_path: Path to local directory to upload from.\n            to_path: Path in GCS bucket to upload to. Defaults to block's configured\n                bucket_folder.\n            ignore_file: Path to file containing gitignore style expressions for\n                filepaths to ignore.\n\n        Returns:\n            The number of files uploaded.\n        \"\"\"\n        if local_path is None:\n            local_path = os.path.abspath(\".\")\n        else:\n            local_path = os.path.expanduser(local_path)\n\n        to_path = self.bucket_folder if to_path is None else self._resolve_path(to_path)\n\n        included_files = None\n        if ignore_file:\n            with open(ignore_file, \"r\") as f:\n                ignore_patterns = f.readlines()\n            included_files = filter_files(local_path, ignore_patterns)\n\n        uploaded_file_count = 0\n        for local_file_path in Path(local_path).rglob(\"*\"):\n            if (\n                included_files is not None\n                and local_file_path.name not in included_files\n            ):\n                continue\n            elif not local_file_path.is_dir():\n                remote_file_path = str(\n                    PurePosixPath(to_path, local_file_path.relative_to(local_path))\n                )\n                local_file_content = local_file_path.read_bytes()\n                await self.write_path(remote_file_path, content=local_file_content)\n                uploaded_file_count += 1\n\n        return uploaded_file_count\n\n    @sync_compatible\n    async def read_path(self, path: str) -> bytes:\n        \"\"\"\n        Read specified path from GCS and return contents. Provide the entire\n        path to the key in GCS.\n\n        Args:\n            path: Entire path to (and including) the key.\n\n        Returns:\n            A bytes or string representation of the blob object.\n        \"\"\"\n        path = self._resolve_path(path)\n        with disable_run_logger():\n            contents = await cloud_storage_download_blob_as_bytes.fn(\n                bucket=self.bucket, blob=path, gcp_credentials=self.gcp_credentials\n            )\n        return contents\n\n    @sync_compatible\n    async def write_path(self, path: str, content: bytes) -> str:\n        \"\"\"\n        Writes to an GCS bucket.\n\n        Args:\n            path: The key name. Each object in your bucket has a unique\n                key (or key name).\n            content: What you are uploading to GCS Bucket.\n\n        Returns:\n            The path that the contents were written to.\n        \"\"\"\n        path = self._resolve_path(path)\n        with disable_run_logger():\n            await cloud_storage_upload_blob_from_string.fn(\n                data=content,\n                bucket=self.bucket,\n                blob=path,\n                gcp_credentials=self.gcp_credentials,\n            )\n        return path\n\n    # NEW BLOCK INTERFACE METHODS BELOW\n    def _join_bucket_folder(self, bucket_path: str = \"\") -> str:\n        \"\"\"\n        Joins the base bucket folder to the bucket path.\n\n        NOTE: If a method reuses another method in this class, be careful to not\n        call this  twice because it'll join the bucket folder twice.\n        See https://github.com/PrefectHQ/prefect-aws/issues/141 for a past issue.\n        \"\"\"\n        bucket_path = str(bucket_path)\n        if self.bucket_folder != \"\" and bucket_path.startswith(self.bucket_folder):\n            self.logger.info(\n                f\"Bucket path {bucket_path!r} is already prefixed with \"\n                f\"bucket folder {self.bucket_folder!r}; is this intentional?\"\n            )\n\n        bucket_path = str(PurePosixPath(self.bucket_folder) / bucket_path)\n        if bucket_path in [\"\", \".\", \"/\"]:\n            # client.bucket.list_blobs(prefix=None) is the proper way\n            # of specifying the root folder of the bucket\n            bucket_path = None\n        return bucket_path\n\n    @sync_compatible\n    async def create_bucket(\n        self, location: Optional[str] = None, **create_kwargs\n    ) -> \"Bucket\":\n        \"\"\"\n        Creates a bucket.\n\n        Args:\n            location: The location of the bucket.\n            **create_kwargs: Additional keyword arguments to pass to the\n                `create_bucket` method.\n\n        Returns:\n            The bucket object.\n\n        Examples:\n            Create a bucket.\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket(bucket=\"my-bucket\")\n            gcs_bucket.create_bucket()\n            ```\n        \"\"\"\n        self.logger.info(f\"Creating bucket {self.bucket!r}.\")\n        client = self.gcp_credentials.get_cloud_storage_client()\n        bucket = await run_sync_in_worker_thread(\n            client.create_bucket, self.bucket, location=location, **create_kwargs\n        )\n        return bucket\n\n    @sync_compatible\n    async def get_bucket(self) -> \"Bucket\":\n        \"\"\"\n        Returns the bucket object.\n\n        Returns:\n            The bucket object.\n\n        Examples:\n            Get the bucket object.\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.get_bucket()\n            ```\n        \"\"\"\n        self.logger.info(f\"Getting bucket {self.bucket!r}.\")\n        client = self.gcp_credentials.get_cloud_storage_client()\n        bucket = await run_sync_in_worker_thread(client.get_bucket, self.bucket)\n        return bucket\n\n    @sync_compatible\n    async def list_blobs(self, folder: str = \"\") -> List[\"Blob\"]:\n        \"\"\"\n        Lists all blobs in the bucket that are in a folder.\n        Folders are not included in the output.\n\n        Args:\n            folder: The folder to list blobs from.\n\n        Returns:\n            A list of Blob objects.\n\n        Examples:\n            Get all blobs from a folder named \"prefect\".\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.list_blobs(\"prefect\")\n            ```\n        \"\"\"\n        client = self.gcp_credentials.get_cloud_storage_client()\n\n        bucket_path = self._join_bucket_folder(folder)\n        if bucket_path is None:\n            self.logger.info(f\"Listing blobs in bucket {self.bucket!r}.\")\n        else:\n            self.logger.info(\n                f\"Listing blobs in folder {bucket_path!r} in bucket {self.bucket!r}.\"\n            )\n        blobs = await run_sync_in_worker_thread(\n            client.list_blobs, self.bucket, prefix=bucket_path\n        )\n\n        # Ignore folders\n        return [blob for blob in blobs if not blob.name.endswith(\"/\")]\n\n    @sync_compatible\n    async def list_folders(self, folder: str = \"\") -> List[str]:\n        \"\"\"\n        Lists all folders and subfolders in the bucket.\n\n        Args:\n            folder: List all folders and subfolders inside given folder.\n\n        Returns:\n            A list of folders.\n\n        Examples:\n            Get all folders from a bucket named \"my-bucket\".\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.list_folders()\n            ```\n\n            Get all folders from a folder called years\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.list_folders(\"years\")\n            ```\n        \"\"\"\n\n        # Beware of calling _join_bucket_folder twice, see note in method.\n        # However, we just want to use it to check if we are listing the root folder\n        bucket_path = self._join_bucket_folder(folder)\n        if bucket_path is None:\n            self.logger.info(f\"Listing folders in bucket {self.bucket!r}.\")\n        else:\n            self.logger.info(\n                f\"Listing folders in {bucket_path!r} in bucket {self.bucket!r}.\"\n            )\n\n        blobs = await self.list_blobs(folder)\n        # gets all folders with full path\n        folders = {str(PurePosixPath(blob.name).parent) for blob in blobs}\n\n        return [folder for folder in folders if folder != \".\"]\n\n    @sync_compatible\n    async def download_object_to_path(\n        self,\n        from_path: str,\n        to_path: Optional[Union[str, Path]] = None,\n        **download_kwargs: Dict[str, Any],\n    ) -> Path:\n        \"\"\"\n        Downloads an object from the object storage service to a path.\n\n        Args:\n            from_path: The path to the blob to download; this gets prefixed\n                with the bucket_folder.\n            to_path: The path to download the blob to. If not provided, the\n                blob's name will be used.\n            **download_kwargs: Additional keyword arguments to pass to\n                `Blob.download_to_filename`.\n\n        Returns:\n            The absolute path that the object was downloaded to.\n\n        Examples:\n            Download my_folder/notes.txt object to notes.txt.\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.download_object_to_path(\"my_folder/notes.txt\", \"notes.txt\")\n            ```\n        \"\"\"\n        if to_path is None:\n            to_path = Path(from_path).name\n\n        # making path absolute, but converting back to str here\n        # since !r looks nicer that way and filename arg expects str\n        to_path = str(Path(to_path).absolute())\n\n        bucket = await self.get_bucket()\n        bucket_path = self._join_bucket_folder(from_path)\n        blob = bucket.blob(bucket_path)\n        self.logger.info(\n            f\"Downloading blob from bucket {self.bucket!r} path {bucket_path!r}\"\n            f\"to {to_path!r}.\"\n        )\n\n        await run_sync_in_worker_thread(\n            blob.download_to_filename, filename=to_path, **download_kwargs\n        )\n        return Path(to_path)\n\n    @sync_compatible\n    async def download_object_to_file_object(\n        self,\n        from_path: str,\n        to_file_object: BinaryIO,\n        **download_kwargs: Dict[str, Any],\n    ) -> BinaryIO:\n        \"\"\"\n        Downloads an object from the object storage service to a file-like object,\n        which can be a BytesIO object or a BufferedWriter.\n\n        Args:\n            from_path: The path to the blob to download from; this gets prefixed\n                with the bucket_folder.\n            to_file_object: The file-like object to download the blob to.\n            **download_kwargs: Additional keyword arguments to pass to\n                `Blob.download_to_file`.\n\n        Returns:\n            The file-like object that the object was downloaded to.\n\n        Examples:\n            Download my_folder/notes.txt object to a BytesIO object.\n            ```python\n            from io import BytesIO\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            with BytesIO() as buf:\n                gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", buf)\n            ```\n\n            Download my_folder/notes.txt object to a BufferedWriter.\n            ```python\n                from prefect_gcp.cloud_storage import GcsBucket\n\n                gcs_bucket = GcsBucket.load(\"my-bucket\")\n                with open(\"notes.txt\", \"wb\") as f:\n                    gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", f)\n            ```\n        \"\"\"\n        bucket = await self.get_bucket()\n\n        bucket_path = self._join_bucket_folder(from_path)\n        blob = bucket.blob(bucket_path)\n        self.logger.info(\n            f\"Downloading blob from bucket {self.bucket!r} path {bucket_path!r}\"\n            f\"to file object.\"\n        )\n\n        await run_sync_in_worker_thread(\n            blob.download_to_file, file_obj=to_file_object, **download_kwargs\n        )\n        return to_file_object\n\n    @sync_compatible\n    async def download_folder_to_path(\n        self,\n        from_folder: str,\n        to_folder: Optional[Union[str, Path]] = None,\n        **download_kwargs: Dict[str, Any],\n    ) -> Path:\n        \"\"\"\n        Downloads objects *within* a folder (excluding the folder itself)\n        from the object storage service to a folder.\n\n        Args:\n            from_folder: The path to the folder to download from; this gets prefixed\n                with the bucket_folder.\n            to_folder: The path to download the folder to. If not provided, will default\n                to the current directory.\n            **download_kwargs: Additional keyword arguments to pass to\n                `Blob.download_to_filename`.\n\n        Returns:\n            The absolute path that the folder was downloaded to.\n\n        Examples:\n            Download my_folder to a local folder named my_folder.\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.download_folder_to_path(\"my_folder\", \"my_folder\")\n            ```\n        \"\"\"\n        if to_folder is None:\n            to_folder = \"\"\n        to_folder = Path(to_folder).absolute()\n\n        blobs = await self.list_blobs(folder=from_folder)\n        if len(blobs) == 0:\n            self.logger.warning(\n                f\"No blobs were downloaded from \"\n                f\"bucket {self.bucket!r} path {from_folder!r}.\"\n            )\n            return to_folder\n\n        # do not call self._join_bucket_folder for list_blobs\n        # because it's built-in to that method already!\n        # however, we still need to do it because we're using relative_to\n        bucket_folder = self._join_bucket_folder(from_folder)\n\n        async_coros = []\n        for blob in blobs:\n            bucket_path = PurePosixPath(blob.name).relative_to(bucket_folder)\n            if str(bucket_path).endswith(\"/\"):\n                continue\n            to_path = to_folder / bucket_path\n            to_path.parent.mkdir(parents=True, exist_ok=True)\n            self.logger.info(\n                f\"Downloading blob from bucket {self.bucket!r} path \"\n                f\"{str(bucket_path)!r} to {to_path}.\"\n            )\n            async_coros.append(\n                run_sync_in_worker_thread(\n                    blob.download_to_filename, filename=str(to_path), **download_kwargs\n                )\n            )\n        await asyncio.gather(*async_coros)\n\n        return to_folder\n\n    @sync_compatible\n    async def upload_from_path(\n        self,\n        from_path: Union[str, Path],\n        to_path: Optional[str] = None,\n        **upload_kwargs: Dict[str, Any],\n    ) -> str:\n        \"\"\"\n        Uploads an object from a path to the object storage service.\n\n        Args:\n            from_path: The path to the file to upload from.\n            to_path: The path to upload the file to. If not provided, will use\n                the file name of from_path; this gets prefixed\n                with the bucket_folder.\n            **upload_kwargs: Additional keyword arguments to pass to\n                `Blob.upload_from_filename`.\n\n        Returns:\n            The path that the object was uploaded to.\n\n        Examples:\n            Upload notes.txt to my_folder/notes.txt.\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.upload_from_path(\"notes.txt\", \"my_folder/notes.txt\")\n            ```\n        \"\"\"\n        if to_path is None:\n            to_path = Path(from_path).name\n\n        bucket_path = self._join_bucket_folder(to_path)\n        bucket = await self.get_bucket()\n        blob = bucket.blob(bucket_path)\n        self.logger.info(\n            f\"Uploading from {from_path!r} to the bucket \"\n            f\"{self.bucket!r} path {bucket_path!r}.\"\n        )\n\n        await run_sync_in_worker_thread(\n            blob.upload_from_filename, filename=from_path, **upload_kwargs\n        )\n        return bucket_path\n\n    @sync_compatible\n    async def upload_from_file_object(\n        self, from_file_object: BinaryIO, to_path: str, **upload_kwargs\n    ) -> str:\n        \"\"\"\n        Uploads an object to the object storage service from a file-like object,\n        which can be a BytesIO object or a BufferedReader.\n\n        Args:\n            from_file_object: The file-like object to upload from.\n            to_path: The path to upload the object to; this gets prefixed\n                with the bucket_folder.\n            **upload_kwargs: Additional keyword arguments to pass to\n                `Blob.upload_from_file`.\n\n        Returns:\n            The path that the object was uploaded to.\n\n        Examples:\n            Upload my_folder/notes.txt object to a BytesIO object.\n            ```python\n            from io import BytesIO\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            with open(\"notes.txt\", \"rb\") as f:\n                gcs_bucket.upload_from_file_object(f, \"my_folder/notes.txt\")\n            ```\n\n            Upload BufferedReader object to my_folder/notes.txt.\n            ```python\n            from io import BufferedReader\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            with open(\"notes.txt\", \"rb\") as f:\n                gcs_bucket.upload_from_file_object(\n                    BufferedReader(f), \"my_folder/notes.txt\"\n                )\n            ```\n        \"\"\"\n        bucket = await self.get_bucket()\n\n        bucket_path = self._join_bucket_folder(to_path)\n        blob = bucket.blob(bucket_path)\n        self.logger.info(\n            f\"Uploading from file object to the bucket \"\n            f\"{self.bucket!r} path {bucket_path!r}.\"\n        )\n\n        await run_sync_in_worker_thread(\n            blob.upload_from_file, from_file_object, **upload_kwargs\n        )\n        return bucket_path\n\n    @sync_compatible\n    async def upload_from_folder(\n        self,\n        from_folder: Union[str, Path],\n        to_folder: Optional[str] = None,\n        **upload_kwargs: Dict[str, Any],\n    ) -> str:\n        \"\"\"\n        Uploads files *within* a folder (excluding the folder itself)\n        to the object storage service folder.\n\n        Args:\n            from_folder: The path to the folder to upload from.\n            to_folder: The path to upload the folder to. If not provided, will default\n                to bucket_folder or the base directory of the bucket.\n            **upload_kwargs: Additional keyword arguments to pass to\n                `Blob.upload_from_filename`.\n\n        Returns:\n            The path that the folder was uploaded to.\n\n        Examples:\n            Upload local folder my_folder to the bucket's folder my_folder.\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.upload_from_folder(\"my_folder\")\n            ```\n        \"\"\"\n        from_folder = Path(from_folder)\n        # join bucket folder expects string for the first input\n        # when it returns None, we need to convert it back to empty string\n        # so relative_to works\n        bucket_folder = self._join_bucket_folder(to_folder or \"\") or \"\"\n\n        num_uploaded = 0\n        bucket = await self.get_bucket()\n\n        async_coros = []\n        for from_path in from_folder.rglob(\"**/*\"):\n            if from_path.is_dir():\n                continue\n            bucket_path = str(Path(bucket_folder) / from_path.relative_to(from_folder))\n            self.logger.info(\n                f\"Uploading from {str(from_path)!r} to the bucket \"\n                f\"{self.bucket!r} path {bucket_path!r}.\"\n            )\n            blob = bucket.blob(bucket_path)\n            async_coros.append(\n                run_sync_in_worker_thread(\n                    blob.upload_from_filename, filename=from_path, **upload_kwargs\n                )\n            )\n            num_uploaded += 1\n        await asyncio.gather(*async_coros)\n        if num_uploaded == 0:\n            self.logger.warning(f\"No files were uploaded from {from_folder}.\")\n        return bucket_folder\n\n    @sync_compatible\n    async def upload_from_dataframe(\n        self,\n        df: \"DataFrame\",\n        to_path: str,\n        serialization_format: Union[\n            str, DataFrameSerializationFormat\n        ] = DataFrameSerializationFormat.CSV_GZIP,\n        **upload_kwargs: Dict[str, Any],\n    ) -> str:\n        \"\"\"Upload a Pandas DataFrame to Google Cloud Storage in various formats.\n\n        This function uploads the data in a Pandas DataFrame to Google Cloud Storage\n        in a specified format, such as .csv, .csv.gz, .parquet,\n        .parquet.snappy, and .parquet.gz.\n\n        Args:\n            df: The Pandas DataFrame to be uploaded.\n            to_path: The destination path for the uploaded DataFrame.\n            serialization_format: The format to serialize the DataFrame into.\n                When passed as a `str`, the valid options are:\n                'csv', 'csv_gzip',  'parquet', 'parquet_snappy', 'parquet_gzip'.\n                Defaults to `DataFrameSerializationFormat.CSV_GZIP`.\n            **upload_kwargs: Additional keyword arguments to pass to the underlying\n            `Blob.upload_from_dataframe` method.\n\n        Returns:\n            The path that the object was uploaded to.\n        \"\"\"\n        if isinstance(serialization_format, str):\n            serialization_format = DataFrameSerializationFormat[\n                serialization_format.upper()\n            ]\n\n        with BytesIO() as bytes_buffer:\n            if serialization_format.format == \"parquet\":\n                df.to_parquet(\n                    path=bytes_buffer,\n                    compression=serialization_format.compression,\n                    index=False,\n                )\n            elif serialization_format.format == \"csv\":\n                df.to_csv(\n                    path_or_buf=bytes_buffer,\n                    compression=serialization_format.compression,\n                    index=False,\n                )\n\n            bytes_buffer.seek(0)\n            to_path = serialization_format.fix_extension_with(gcs_blob_path=to_path)\n\n            return await self.upload_from_file_object(\n                from_file_object=bytes_buffer,\n                to_path=to_path,\n                **{\"content_type\": serialization_format.content_type, **upload_kwargs},\n            )\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket-attributes","title":"Attributes","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.basepath","title":"basepath: str property","text":"

Read-only property that mirrors the bucket folder.

Used for deployment.

"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket-functions","title":"Functions","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.create_bucket","title":"create_bucket async","text":"

Creates a bucket.

Parameters:

Name Type Description Default location Optional[str]

The location of the bucket.

None **create_kwargs

Additional keyword arguments to pass to the create_bucket method.

{}

Returns:

Type Description Bucket

The bucket object.

Examples:

Create a bucket.

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket(bucket=\"my-bucket\")\ngcs_bucket.create_bucket()\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def create_bucket(\n    self, location: Optional[str] = None, **create_kwargs\n) -> \"Bucket\":\n    \"\"\"\n    Creates a bucket.\n\n    Args:\n        location: The location of the bucket.\n        **create_kwargs: Additional keyword arguments to pass to the\n            `create_bucket` method.\n\n    Returns:\n        The bucket object.\n\n    Examples:\n        Create a bucket.\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket(bucket=\"my-bucket\")\n        gcs_bucket.create_bucket()\n        ```\n    \"\"\"\n    self.logger.info(f\"Creating bucket {self.bucket!r}.\")\n    client = self.gcp_credentials.get_cloud_storage_client()\n    bucket = await run_sync_in_worker_thread(\n        client.create_bucket, self.bucket, location=location, **create_kwargs\n    )\n    return bucket\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.download_folder_to_path","title":"download_folder_to_path async","text":"

Downloads objects within a folder (excluding the folder itself) from the object storage service to a folder.

Parameters:

Name Type Description Default from_folder str

The path to the folder to download from; this gets prefixed with the bucket_folder.

required to_folder Optional[Union[str, Path]]

The path to download the folder to. If not provided, will default to the current directory.

None **download_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.download_to_filename.

{}

Returns:

Type Description Path

The absolute path that the folder was downloaded to.

Examples:

Download my_folder to a local folder named my_folder.

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.download_folder_to_path(\"my_folder\", \"my_folder\")\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def download_folder_to_path(\n    self,\n    from_folder: str,\n    to_folder: Optional[Union[str, Path]] = None,\n    **download_kwargs: Dict[str, Any],\n) -> Path:\n    \"\"\"\n    Downloads objects *within* a folder (excluding the folder itself)\n    from the object storage service to a folder.\n\n    Args:\n        from_folder: The path to the folder to download from; this gets prefixed\n            with the bucket_folder.\n        to_folder: The path to download the folder to. If not provided, will default\n            to the current directory.\n        **download_kwargs: Additional keyword arguments to pass to\n            `Blob.download_to_filename`.\n\n    Returns:\n        The absolute path that the folder was downloaded to.\n\n    Examples:\n        Download my_folder to a local folder named my_folder.\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.download_folder_to_path(\"my_folder\", \"my_folder\")\n        ```\n    \"\"\"\n    if to_folder is None:\n        to_folder = \"\"\n    to_folder = Path(to_folder).absolute()\n\n    blobs = await self.list_blobs(folder=from_folder)\n    if len(blobs) == 0:\n        self.logger.warning(\n            f\"No blobs were downloaded from \"\n            f\"bucket {self.bucket!r} path {from_folder!r}.\"\n        )\n        return to_folder\n\n    # do not call self._join_bucket_folder for list_blobs\n    # because it's built-in to that method already!\n    # however, we still need to do it because we're using relative_to\n    bucket_folder = self._join_bucket_folder(from_folder)\n\n    async_coros = []\n    for blob in blobs:\n        bucket_path = PurePosixPath(blob.name).relative_to(bucket_folder)\n        if str(bucket_path).endswith(\"/\"):\n            continue\n        to_path = to_folder / bucket_path\n        to_path.parent.mkdir(parents=True, exist_ok=True)\n        self.logger.info(\n            f\"Downloading blob from bucket {self.bucket!r} path \"\n            f\"{str(bucket_path)!r} to {to_path}.\"\n        )\n        async_coros.append(\n            run_sync_in_worker_thread(\n                blob.download_to_filename, filename=str(to_path), **download_kwargs\n            )\n        )\n    await asyncio.gather(*async_coros)\n\n    return to_folder\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.download_object_to_file_object","title":"download_object_to_file_object async","text":"

Downloads an object from the object storage service to a file-like object, which can be a BytesIO object or a BufferedWriter.

Parameters:

Name Type Description Default from_path str

The path to the blob to download from; this gets prefixed with the bucket_folder.

required to_file_object BinaryIO

The file-like object to download the blob to.

required **download_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.download_to_file.

{}

Returns:

Type Description BinaryIO

The file-like object that the object was downloaded to.

Examples:

Download my_folder/notes.txt object to a BytesIO object.

from io import BytesIO\nfrom prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\nwith BytesIO() as buf:\n    gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", buf)\n

Download my_folder/notes.txt object to a BufferedWriter.

    from prefect_gcp.cloud_storage import GcsBucket\n\n    gcs_bucket = GcsBucket.load(\"my-bucket\")\n    with open(\"notes.txt\", \"wb\") as f:\n        gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", f)\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def download_object_to_file_object(\n    self,\n    from_path: str,\n    to_file_object: BinaryIO,\n    **download_kwargs: Dict[str, Any],\n) -> BinaryIO:\n    \"\"\"\n    Downloads an object from the object storage service to a file-like object,\n    which can be a BytesIO object or a BufferedWriter.\n\n    Args:\n        from_path: The path to the blob to download from; this gets prefixed\n            with the bucket_folder.\n        to_file_object: The file-like object to download the blob to.\n        **download_kwargs: Additional keyword arguments to pass to\n            `Blob.download_to_file`.\n\n    Returns:\n        The file-like object that the object was downloaded to.\n\n    Examples:\n        Download my_folder/notes.txt object to a BytesIO object.\n        ```python\n        from io import BytesIO\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        with BytesIO() as buf:\n            gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", buf)\n        ```\n\n        Download my_folder/notes.txt object to a BufferedWriter.\n        ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            with open(\"notes.txt\", \"wb\") as f:\n                gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", f)\n        ```\n    \"\"\"\n    bucket = await self.get_bucket()\n\n    bucket_path = self._join_bucket_folder(from_path)\n    blob = bucket.blob(bucket_path)\n    self.logger.info(\n        f\"Downloading blob from bucket {self.bucket!r} path {bucket_path!r}\"\n        f\"to file object.\"\n    )\n\n    await run_sync_in_worker_thread(\n        blob.download_to_file, file_obj=to_file_object, **download_kwargs\n    )\n    return to_file_object\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.download_object_to_path","title":"download_object_to_path async","text":"

Downloads an object from the object storage service to a path.

Parameters:

Name Type Description Default from_path str

The path to the blob to download; this gets prefixed with the bucket_folder.

required to_path Optional[Union[str, Path]]

The path to download the blob to. If not provided, the blob's name will be used.

None **download_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.download_to_filename.

{}

Returns:

Type Description Path

The absolute path that the object was downloaded to.

Examples:

Download my_folder/notes.txt object to notes.txt.

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.download_object_to_path(\"my_folder/notes.txt\", \"notes.txt\")\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def download_object_to_path(\n    self,\n    from_path: str,\n    to_path: Optional[Union[str, Path]] = None,\n    **download_kwargs: Dict[str, Any],\n) -> Path:\n    \"\"\"\n    Downloads an object from the object storage service to a path.\n\n    Args:\n        from_path: The path to the blob to download; this gets prefixed\n            with the bucket_folder.\n        to_path: The path to download the blob to. If not provided, the\n            blob's name will be used.\n        **download_kwargs: Additional keyword arguments to pass to\n            `Blob.download_to_filename`.\n\n    Returns:\n        The absolute path that the object was downloaded to.\n\n    Examples:\n        Download my_folder/notes.txt object to notes.txt.\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.download_object_to_path(\"my_folder/notes.txt\", \"notes.txt\")\n        ```\n    \"\"\"\n    if to_path is None:\n        to_path = Path(from_path).name\n\n    # making path absolute, but converting back to str here\n    # since !r looks nicer that way and filename arg expects str\n    to_path = str(Path(to_path).absolute())\n\n    bucket = await self.get_bucket()\n    bucket_path = self._join_bucket_folder(from_path)\n    blob = bucket.blob(bucket_path)\n    self.logger.info(\n        f\"Downloading blob from bucket {self.bucket!r} path {bucket_path!r}\"\n        f\"to {to_path!r}.\"\n    )\n\n    await run_sync_in_worker_thread(\n        blob.download_to_filename, filename=to_path, **download_kwargs\n    )\n    return Path(to_path)\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.get_bucket","title":"get_bucket async","text":"

Returns the bucket object.

Returns:

Type Description Bucket

The bucket object.

Examples:

Get the bucket object.

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.get_bucket()\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def get_bucket(self) -> \"Bucket\":\n    \"\"\"\n    Returns the bucket object.\n\n    Returns:\n        The bucket object.\n\n    Examples:\n        Get the bucket object.\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.get_bucket()\n        ```\n    \"\"\"\n    self.logger.info(f\"Getting bucket {self.bucket!r}.\")\n    client = self.gcp_credentials.get_cloud_storage_client()\n    bucket = await run_sync_in_worker_thread(client.get_bucket, self.bucket)\n    return bucket\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.get_directory","title":"get_directory async","text":"

Copies a folder from the configured GCS bucket to a local directory. Defaults to copying the entire contents of the block's bucket_folder to the current working directory.

Parameters:

Name Type Description Default from_path Optional[str]

Path in GCS bucket to download from. Defaults to the block's configured bucket_folder.

None local_path Optional[str]

Local path to download GCS bucket contents to. Defaults to the current working directory.

None

Returns:

Type Description List[Union[str, Path]]

A list of downloaded file paths.

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def get_directory(\n    self, from_path: Optional[str] = None, local_path: Optional[str] = None\n) -> List[Union[str, Path]]:\n    \"\"\"\n    Copies a folder from the configured GCS bucket to a local directory.\n    Defaults to copying the entire contents of the block's bucket_folder\n    to the current working directory.\n\n    Args:\n        from_path: Path in GCS bucket to download from. Defaults to the block's\n            configured bucket_folder.\n        local_path: Local path to download GCS bucket contents to.\n            Defaults to the current working directory.\n\n    Returns:\n        A list of downloaded file paths.\n    \"\"\"\n    from_path = (\n        self.bucket_folder if from_path is None else self._resolve_path(from_path)\n    )\n\n    if local_path is None:\n        local_path = os.path.abspath(\".\")\n    else:\n        local_path = os.path.abspath(os.path.expanduser(local_path))\n\n    project = self.gcp_credentials.project\n    client = self.gcp_credentials.get_cloud_storage_client(project=project)\n\n    blobs = await run_sync_in_worker_thread(\n        client.list_blobs, self.bucket, prefix=from_path\n    )\n\n    file_paths = []\n    for blob in blobs:\n        blob_path = blob.name\n        if blob_path[-1] == \"/\":\n            # object is a folder and will be created if it contains any objects\n            continue\n        local_file_path = os.path.join(local_path, blob_path)\n        os.makedirs(os.path.dirname(local_file_path), exist_ok=True)\n\n        with disable_run_logger():\n            file_path = await cloud_storage_download_blob_to_file.fn(\n                bucket=self.bucket,\n                blob=blob_path,\n                path=local_file_path,\n                gcp_credentials=self.gcp_credentials,\n            )\n            file_paths.append(file_path)\n    return file_paths\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.list_blobs","title":"list_blobs async","text":"

Lists all blobs in the bucket that are in a folder. Folders are not included in the output.

Parameters:

Name Type Description Default folder str

The folder to list blobs from.

''

Returns:

Type Description List[Blob]

A list of Blob objects.

Examples:

Get all blobs from a folder named \"prefect\".

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.list_blobs(\"prefect\")\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def list_blobs(self, folder: str = \"\") -> List[\"Blob\"]:\n    \"\"\"\n    Lists all blobs in the bucket that are in a folder.\n    Folders are not included in the output.\n\n    Args:\n        folder: The folder to list blobs from.\n\n    Returns:\n        A list of Blob objects.\n\n    Examples:\n        Get all blobs from a folder named \"prefect\".\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.list_blobs(\"prefect\")\n        ```\n    \"\"\"\n    client = self.gcp_credentials.get_cloud_storage_client()\n\n    bucket_path = self._join_bucket_folder(folder)\n    if bucket_path is None:\n        self.logger.info(f\"Listing blobs in bucket {self.bucket!r}.\")\n    else:\n        self.logger.info(\n            f\"Listing blobs in folder {bucket_path!r} in bucket {self.bucket!r}.\"\n        )\n    blobs = await run_sync_in_worker_thread(\n        client.list_blobs, self.bucket, prefix=bucket_path\n    )\n\n    # Ignore folders\n    return [blob for blob in blobs if not blob.name.endswith(\"/\")]\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.list_folders","title":"list_folders async","text":"

Lists all folders and subfolders in the bucket.

Parameters:

Name Type Description Default folder str

List all folders and subfolders inside given folder.

''

Returns:

Type Description List[str]

A list of folders.

Examples:

Get all folders from a bucket named \"my-bucket\".

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.list_folders()\n

Get all folders from a folder called years

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.list_folders(\"years\")\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def list_folders(self, folder: str = \"\") -> List[str]:\n    \"\"\"\n    Lists all folders and subfolders in the bucket.\n\n    Args:\n        folder: List all folders and subfolders inside given folder.\n\n    Returns:\n        A list of folders.\n\n    Examples:\n        Get all folders from a bucket named \"my-bucket\".\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.list_folders()\n        ```\n\n        Get all folders from a folder called years\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.list_folders(\"years\")\n        ```\n    \"\"\"\n\n    # Beware of calling _join_bucket_folder twice, see note in method.\n    # However, we just want to use it to check if we are listing the root folder\n    bucket_path = self._join_bucket_folder(folder)\n    if bucket_path is None:\n        self.logger.info(f\"Listing folders in bucket {self.bucket!r}.\")\n    else:\n        self.logger.info(\n            f\"Listing folders in {bucket_path!r} in bucket {self.bucket!r}.\"\n        )\n\n    blobs = await self.list_blobs(folder)\n    # gets all folders with full path\n    folders = {str(PurePosixPath(blob.name).parent) for blob in blobs}\n\n    return [folder for folder in folders if folder != \".\"]\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.put_directory","title":"put_directory async","text":"

Uploads a directory from a given local path to the configured GCS bucket in a given folder.

Defaults to uploading the entire contents the current working directory to the block's bucket_folder.

Parameters:

Name Type Description Default local_path Optional[str]

Path to local directory to upload from.

None to_path Optional[str]

Path in GCS bucket to upload to. Defaults to block's configured bucket_folder.

None ignore_file Optional[str]

Path to file containing gitignore style expressions for filepaths to ignore.

None

Returns:

Type Description int

The number of files uploaded.

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def put_directory(\n    self,\n    local_path: Optional[str] = None,\n    to_path: Optional[str] = None,\n    ignore_file: Optional[str] = None,\n) -> int:\n    \"\"\"\n    Uploads a directory from a given local path to the configured GCS bucket in a\n    given folder.\n\n    Defaults to uploading the entire contents the current working directory to the\n    block's bucket_folder.\n\n    Args:\n        local_path: Path to local directory to upload from.\n        to_path: Path in GCS bucket to upload to. Defaults to block's configured\n            bucket_folder.\n        ignore_file: Path to file containing gitignore style expressions for\n            filepaths to ignore.\n\n    Returns:\n        The number of files uploaded.\n    \"\"\"\n    if local_path is None:\n        local_path = os.path.abspath(\".\")\n    else:\n        local_path = os.path.expanduser(local_path)\n\n    to_path = self.bucket_folder if to_path is None else self._resolve_path(to_path)\n\n    included_files = None\n    if ignore_file:\n        with open(ignore_file, \"r\") as f:\n            ignore_patterns = f.readlines()\n        included_files = filter_files(local_path, ignore_patterns)\n\n    uploaded_file_count = 0\n    for local_file_path in Path(local_path).rglob(\"*\"):\n        if (\n            included_files is not None\n            and local_file_path.name not in included_files\n        ):\n            continue\n        elif not local_file_path.is_dir():\n            remote_file_path = str(\n                PurePosixPath(to_path, local_file_path.relative_to(local_path))\n            )\n            local_file_content = local_file_path.read_bytes()\n            await self.write_path(remote_file_path, content=local_file_content)\n            uploaded_file_count += 1\n\n    return uploaded_file_count\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.read_path","title":"read_path async","text":"

Read specified path from GCS and return contents. Provide the entire path to the key in GCS.

Parameters:

Name Type Description Default path str

Entire path to (and including) the key.

required

Returns:

Type Description bytes

A bytes or string representation of the blob object.

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def read_path(self, path: str) -> bytes:\n    \"\"\"\n    Read specified path from GCS and return contents. Provide the entire\n    path to the key in GCS.\n\n    Args:\n        path: Entire path to (and including) the key.\n\n    Returns:\n        A bytes or string representation of the blob object.\n    \"\"\"\n    path = self._resolve_path(path)\n    with disable_run_logger():\n        contents = await cloud_storage_download_blob_as_bytes.fn(\n            bucket=self.bucket, blob=path, gcp_credentials=self.gcp_credentials\n        )\n    return contents\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.upload_from_dataframe","title":"upload_from_dataframe async","text":"

Upload a Pandas DataFrame to Google Cloud Storage in various formats.

This function uploads the data in a Pandas DataFrame to Google Cloud Storage in a specified format, such as .csv, .csv.gz, .parquet, .parquet.snappy, and .parquet.gz.

Parameters:

Name Type Description Default df DataFrame

The Pandas DataFrame to be uploaded.

required to_path str

The destination path for the uploaded DataFrame.

required serialization_format Union[str, DataFrameSerializationFormat]

The format to serialize the DataFrame into. When passed as a str, the valid options are: 'csv', 'csv_gzip', 'parquet', 'parquet_snappy', 'parquet_gzip'. Defaults to DataFrameSerializationFormat.CSV_GZIP.

CSV_GZIP **upload_kwargs Dict[str, Any]

Additional keyword arguments to pass to the underlying

{}

Returns:

Type Description str

The path that the object was uploaded to.

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def upload_from_dataframe(\n    self,\n    df: \"DataFrame\",\n    to_path: str,\n    serialization_format: Union[\n        str, DataFrameSerializationFormat\n    ] = DataFrameSerializationFormat.CSV_GZIP,\n    **upload_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"Upload a Pandas DataFrame to Google Cloud Storage in various formats.\n\n    This function uploads the data in a Pandas DataFrame to Google Cloud Storage\n    in a specified format, such as .csv, .csv.gz, .parquet,\n    .parquet.snappy, and .parquet.gz.\n\n    Args:\n        df: The Pandas DataFrame to be uploaded.\n        to_path: The destination path for the uploaded DataFrame.\n        serialization_format: The format to serialize the DataFrame into.\n            When passed as a `str`, the valid options are:\n            'csv', 'csv_gzip',  'parquet', 'parquet_snappy', 'parquet_gzip'.\n            Defaults to `DataFrameSerializationFormat.CSV_GZIP`.\n        **upload_kwargs: Additional keyword arguments to pass to the underlying\n        `Blob.upload_from_dataframe` method.\n\n    Returns:\n        The path that the object was uploaded to.\n    \"\"\"\n    if isinstance(serialization_format, str):\n        serialization_format = DataFrameSerializationFormat[\n            serialization_format.upper()\n        ]\n\n    with BytesIO() as bytes_buffer:\n        if serialization_format.format == \"parquet\":\n            df.to_parquet(\n                path=bytes_buffer,\n                compression=serialization_format.compression,\n                index=False,\n            )\n        elif serialization_format.format == \"csv\":\n            df.to_csv(\n                path_or_buf=bytes_buffer,\n                compression=serialization_format.compression,\n                index=False,\n            )\n\n        bytes_buffer.seek(0)\n        to_path = serialization_format.fix_extension_with(gcs_blob_path=to_path)\n\n        return await self.upload_from_file_object(\n            from_file_object=bytes_buffer,\n            to_path=to_path,\n            **{\"content_type\": serialization_format.content_type, **upload_kwargs},\n        )\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.upload_from_file_object","title":"upload_from_file_object async","text":"

Uploads an object to the object storage service from a file-like object, which can be a BytesIO object or a BufferedReader.

Parameters:

Name Type Description Default from_file_object BinaryIO

The file-like object to upload from.

required to_path str

The path to upload the object to; this gets prefixed with the bucket_folder.

required **upload_kwargs

Additional keyword arguments to pass to Blob.upload_from_file.

{}

Returns:

Type Description str

The path that the object was uploaded to.

Examples:

Upload my_folder/notes.txt object to a BytesIO object.

from io import BytesIO\nfrom prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\nwith open(\"notes.txt\", \"rb\") as f:\n    gcs_bucket.upload_from_file_object(f, \"my_folder/notes.txt\")\n

Upload BufferedReader object to my_folder/notes.txt.

from io import BufferedReader\nfrom prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\nwith open(\"notes.txt\", \"rb\") as f:\n    gcs_bucket.upload_from_file_object(\n        BufferedReader(f), \"my_folder/notes.txt\"\n    )\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def upload_from_file_object(\n    self, from_file_object: BinaryIO, to_path: str, **upload_kwargs\n) -> str:\n    \"\"\"\n    Uploads an object to the object storage service from a file-like object,\n    which can be a BytesIO object or a BufferedReader.\n\n    Args:\n        from_file_object: The file-like object to upload from.\n        to_path: The path to upload the object to; this gets prefixed\n            with the bucket_folder.\n        **upload_kwargs: Additional keyword arguments to pass to\n            `Blob.upload_from_file`.\n\n    Returns:\n        The path that the object was uploaded to.\n\n    Examples:\n        Upload my_folder/notes.txt object to a BytesIO object.\n        ```python\n        from io import BytesIO\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        with open(\"notes.txt\", \"rb\") as f:\n            gcs_bucket.upload_from_file_object(f, \"my_folder/notes.txt\")\n        ```\n\n        Upload BufferedReader object to my_folder/notes.txt.\n        ```python\n        from io import BufferedReader\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        with open(\"notes.txt\", \"rb\") as f:\n            gcs_bucket.upload_from_file_object(\n                BufferedReader(f), \"my_folder/notes.txt\"\n            )\n        ```\n    \"\"\"\n    bucket = await self.get_bucket()\n\n    bucket_path = self._join_bucket_folder(to_path)\n    blob = bucket.blob(bucket_path)\n    self.logger.info(\n        f\"Uploading from file object to the bucket \"\n        f\"{self.bucket!r} path {bucket_path!r}.\"\n    )\n\n    await run_sync_in_worker_thread(\n        blob.upload_from_file, from_file_object, **upload_kwargs\n    )\n    return bucket_path\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.upload_from_folder","title":"upload_from_folder async","text":"

Uploads files within a folder (excluding the folder itself) to the object storage service folder.

Parameters:

Name Type Description Default from_folder Union[str, Path]

The path to the folder to upload from.

required to_folder Optional[str]

The path to upload the folder to. If not provided, will default to bucket_folder or the base directory of the bucket.

None **upload_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.upload_from_filename.

{}

Returns:

Type Description str

The path that the folder was uploaded to.

Examples:

Upload local folder my_folder to the bucket's folder my_folder.

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.upload_from_folder(\"my_folder\")\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def upload_from_folder(\n    self,\n    from_folder: Union[str, Path],\n    to_folder: Optional[str] = None,\n    **upload_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"\n    Uploads files *within* a folder (excluding the folder itself)\n    to the object storage service folder.\n\n    Args:\n        from_folder: The path to the folder to upload from.\n        to_folder: The path to upload the folder to. If not provided, will default\n            to bucket_folder or the base directory of the bucket.\n        **upload_kwargs: Additional keyword arguments to pass to\n            `Blob.upload_from_filename`.\n\n    Returns:\n        The path that the folder was uploaded to.\n\n    Examples:\n        Upload local folder my_folder to the bucket's folder my_folder.\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.upload_from_folder(\"my_folder\")\n        ```\n    \"\"\"\n    from_folder = Path(from_folder)\n    # join bucket folder expects string for the first input\n    # when it returns None, we need to convert it back to empty string\n    # so relative_to works\n    bucket_folder = self._join_bucket_folder(to_folder or \"\") or \"\"\n\n    num_uploaded = 0\n    bucket = await self.get_bucket()\n\n    async_coros = []\n    for from_path in from_folder.rglob(\"**/*\"):\n        if from_path.is_dir():\n            continue\n        bucket_path = str(Path(bucket_folder) / from_path.relative_to(from_folder))\n        self.logger.info(\n            f\"Uploading from {str(from_path)!r} to the bucket \"\n            f\"{self.bucket!r} path {bucket_path!r}.\"\n        )\n        blob = bucket.blob(bucket_path)\n        async_coros.append(\n            run_sync_in_worker_thread(\n                blob.upload_from_filename, filename=from_path, **upload_kwargs\n            )\n        )\n        num_uploaded += 1\n    await asyncio.gather(*async_coros)\n    if num_uploaded == 0:\n        self.logger.warning(f\"No files were uploaded from {from_folder}.\")\n    return bucket_folder\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.upload_from_path","title":"upload_from_path async","text":"

Uploads an object from a path to the object storage service.

Parameters:

Name Type Description Default from_path Union[str, Path]

The path to the file to upload from.

required to_path Optional[str]

The path to upload the file to. If not provided, will use the file name of from_path; this gets prefixed with the bucket_folder.

None **upload_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.upload_from_filename.

{}

Returns:

Type Description str

The path that the object was uploaded to.

Examples:

Upload notes.txt to my_folder/notes.txt.

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.upload_from_path(\"notes.txt\", \"my_folder/notes.txt\")\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def upload_from_path(\n    self,\n    from_path: Union[str, Path],\n    to_path: Optional[str] = None,\n    **upload_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"\n    Uploads an object from a path to the object storage service.\n\n    Args:\n        from_path: The path to the file to upload from.\n        to_path: The path to upload the file to. If not provided, will use\n            the file name of from_path; this gets prefixed\n            with the bucket_folder.\n        **upload_kwargs: Additional keyword arguments to pass to\n            `Blob.upload_from_filename`.\n\n    Returns:\n        The path that the object was uploaded to.\n\n    Examples:\n        Upload notes.txt to my_folder/notes.txt.\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.upload_from_path(\"notes.txt\", \"my_folder/notes.txt\")\n        ```\n    \"\"\"\n    if to_path is None:\n        to_path = Path(from_path).name\n\n    bucket_path = self._join_bucket_folder(to_path)\n    bucket = await self.get_bucket()\n    blob = bucket.blob(bucket_path)\n    self.logger.info(\n        f\"Uploading from {from_path!r} to the bucket \"\n        f\"{self.bucket!r} path {bucket_path!r}.\"\n    )\n\n    await run_sync_in_worker_thread(\n        blob.upload_from_filename, filename=from_path, **upload_kwargs\n    )\n    return bucket_path\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.write_path","title":"write_path async","text":"

Writes to an GCS bucket.

Parameters:

Name Type Description Default path str

The key name. Each object in your bucket has a unique key (or key name).

required content bytes

What you are uploading to GCS Bucket.

required

Returns:

Type Description str

The path that the contents were written to.

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def write_path(self, path: str, content: bytes) -> str:\n    \"\"\"\n    Writes to an GCS bucket.\n\n    Args:\n        path: The key name. Each object in your bucket has a unique\n            key (or key name).\n        content: What you are uploading to GCS Bucket.\n\n    Returns:\n        The path that the contents were written to.\n    \"\"\"\n    path = self._resolve_path(path)\n    with disable_run_logger():\n        await cloud_storage_upload_blob_from_string.fn(\n            data=content,\n            bucket=self.bucket,\n            blob=path,\n            gcp_credentials=self.gcp_credentials,\n        )\n    return path\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage-functions","title":"Functions","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage.cloud_storage_copy_blob","title":"cloud_storage_copy_blob async","text":"

Copies data from one Google Cloud Storage bucket to another, without downloading it locally.

Parameters:

Name Type Description Default source_bucket str

Source bucket name.

required dest_bucket str

Destination bucket name.

required source_blob str

Source blob name.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required dest_blob Optional[str]

Destination blob name; if not provided, defaults to source_blob.

None timeout Union[float, Tuple[float, float]]

The number of seconds the transport should wait for the server response. Can also be passed as a tuple (connect_timeout, read_timeout).

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None **copy_kwargs Dict[str, Any]

Additional keyword arguments to pass to Bucket.copy_blob.

{}

Returns:

Type Description str

Destination blob name.

Example

Copies blob from one bucket to another.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_copy_blob\n\n@flow()\ndef example_cloud_storage_copy_blob_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    blob = cloud_storage_copy_blob(\n        \"source_bucket\",\n        \"dest_bucket\",\n        \"source_blob\",\n        gcp_credentials\n    )\n    return blob\n\nexample_cloud_storage_copy_blob_flow()\n

Source code in prefect_gcp/cloud_storage.py
@task\nasync def cloud_storage_copy_blob(\n    source_bucket: str,\n    dest_bucket: str,\n    source_blob: str,\n    gcp_credentials: GcpCredentials,\n    dest_blob: Optional[str] = None,\n    timeout: Union[float, Tuple[float, float]] = 60,\n    project: Optional[str] = None,\n    **copy_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"\n    Copies data from one Google Cloud Storage bucket to another,\n    without downloading it locally.\n\n    Args:\n        source_bucket: Source bucket name.\n        dest_bucket: Destination bucket name.\n        source_blob: Source blob name.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        dest_blob: Destination blob name; if not provided, defaults to source_blob.\n        timeout: The number of seconds the transport should wait\n            for the server response. Can also be passed as a tuple\n            (connect_timeout, read_timeout).\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n        **copy_kwargs: Additional keyword arguments to pass to\n            `Bucket.copy_blob`.\n\n    Returns:\n        Destination blob name.\n\n    Example:\n        Copies blob from one bucket to another.\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.cloud_storage import cloud_storage_copy_blob\n\n        @flow()\n        def example_cloud_storage_copy_blob_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\")\n            blob = cloud_storage_copy_blob(\n                \"source_bucket\",\n                \"dest_bucket\",\n                \"source_blob\",\n                gcp_credentials\n            )\n            return blob\n\n        example_cloud_storage_copy_blob_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\n        \"Copying blob named %s from the %s bucket to the %s bucket\",\n        source_blob,\n        source_bucket,\n        dest_bucket,\n    )\n\n    source_bucket_obj = await _get_bucket(\n        source_bucket, gcp_credentials, project=project\n    )\n\n    dest_bucket_obj = await _get_bucket(dest_bucket, gcp_credentials, project=project)\n    if dest_blob is None:\n        dest_blob = source_blob\n\n    source_blob_obj = source_bucket_obj.blob(source_blob)\n    await run_sync_in_worker_thread(\n        source_bucket_obj.copy_blob,\n        blob=source_blob_obj,\n        destination_bucket=dest_bucket_obj,\n        new_name=dest_blob,\n        timeout=timeout,\n        **copy_kwargs,\n    )\n\n    return dest_blob\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.cloud_storage_create_bucket","title":"cloud_storage_create_bucket async","text":"

Creates a bucket.

Parameters:

Name Type Description Default bucket str

Name of the bucket.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None location Optional[str]

Location of the bucket.

None **create_kwargs Dict[str, Any]

Additional keyword arguments to pass to client.create_bucket.

{}

Returns:

Type Description str

The bucket name.

Example

Creates a bucket named \"prefect\".

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_create_bucket\n\n@flow()\ndef example_cloud_storage_create_bucket_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    bucket = cloud_storage_create_bucket(\"prefect\", gcp_credentials)\n\nexample_cloud_storage_create_bucket_flow()\n

Source code in prefect_gcp/cloud_storage.py
@task\nasync def cloud_storage_create_bucket(\n    bucket: str,\n    gcp_credentials: GcpCredentials,\n    project: Optional[str] = None,\n    location: Optional[str] = None,\n    **create_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"\n    Creates a bucket.\n\n    Args:\n        bucket: Name of the bucket.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n        location: Location of the bucket.\n        **create_kwargs: Additional keyword arguments to pass to `client.create_bucket`.\n\n    Returns:\n        The bucket name.\n\n    Example:\n        Creates a bucket named \"prefect\".\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.cloud_storage import cloud_storage_create_bucket\n\n        @flow()\n        def example_cloud_storage_create_bucket_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\")\n            bucket = cloud_storage_create_bucket(\"prefect\", gcp_credentials)\n\n        example_cloud_storage_create_bucket_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Creating %s bucket\", bucket)\n\n    client = gcp_credentials.get_cloud_storage_client(project=project)\n    await run_sync_in_worker_thread(\n        client.create_bucket, bucket, location=location, **create_kwargs\n    )\n    return bucket\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.cloud_storage_download_blob_as_bytes","title":"cloud_storage_download_blob_as_bytes async","text":"

Downloads a blob as bytes.

Parameters:

Name Type Description Default bucket str

Name of the bucket.

required blob str

Name of the Cloud Storage blob.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required chunk_size int

The size of a chunk of data whenever iterating (in bytes). This must be a multiple of 256 KB per the API specification.

None encryption_key Optional[str]

An encryption key.

None timeout Union[float, Tuple[float, float]]

The number of seconds the transport should wait for the server response. Can also be passed as a tuple (connect_timeout, read_timeout).

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None **download_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.download_as_bytes.

{}

Returns:

Type Description bytes

A bytes or string representation of the blob object.

Example

Downloads blob from bucket.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_download_blob_as_bytes\n\n@flow()\ndef example_cloud_storage_download_blob_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    contents = cloud_storage_download_blob_as_bytes(\n        \"bucket\", \"blob\", gcp_credentials)\n    return contents\n\nexample_cloud_storage_download_blob_flow()\n

Source code in prefect_gcp/cloud_storage.py
@task\nasync def cloud_storage_download_blob_as_bytes(\n    bucket: str,\n    blob: str,\n    gcp_credentials: GcpCredentials,\n    chunk_size: Optional[int] = None,\n    encryption_key: Optional[str] = None,\n    timeout: Union[float, Tuple[float, float]] = 60,\n    project: Optional[str] = None,\n    **download_kwargs: Dict[str, Any],\n) -> bytes:\n    \"\"\"\n    Downloads a blob as bytes.\n\n    Args:\n        bucket: Name of the bucket.\n        blob: Name of the Cloud Storage blob.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        chunk_size (int, optional): The size of a chunk of data whenever\n            iterating (in bytes). This must be a multiple of 256 KB\n            per the API specification.\n        encryption_key: An encryption key.\n        timeout: The number of seconds the transport should wait\n            for the server response. Can also be passed as a tuple\n            (connect_timeout, read_timeout).\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n        **download_kwargs: Additional keyword arguments to pass to\n            `Blob.download_as_bytes`.\n\n    Returns:\n        A bytes or string representation of the blob object.\n\n    Example:\n        Downloads blob from bucket.\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.cloud_storage import cloud_storage_download_blob_as_bytes\n\n        @flow()\n        def example_cloud_storage_download_blob_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\")\n            contents = cloud_storage_download_blob_as_bytes(\n                \"bucket\", \"blob\", gcp_credentials)\n            return contents\n\n        example_cloud_storage_download_blob_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Downloading blob named %s from the %s bucket\", blob, bucket)\n\n    bucket_obj = await _get_bucket(bucket, gcp_credentials, project=project)\n    blob_obj = bucket_obj.blob(\n        blob, chunk_size=chunk_size, encryption_key=encryption_key\n    )\n\n    contents = await run_sync_in_worker_thread(\n        blob_obj.download_as_bytes, timeout=timeout, **download_kwargs\n    )\n    return contents\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.cloud_storage_download_blob_to_file","title":"cloud_storage_download_blob_to_file async","text":"

Downloads a blob to a file path.

Parameters:

Name Type Description Default bucket str

Name of the bucket.

required blob str

Name of the Cloud Storage blob.

required path Union[str, Path]

Downloads the contents to the provided file path; if the path is a directory, automatically joins the blob name.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required chunk_size int

The size of a chunk of data whenever iterating (in bytes). This must be a multiple of 256 KB per the API specification.

None encryption_key Optional[str]

An encryption key.

None timeout Union[float, Tuple[float, float]]

The number of seconds the transport should wait for the server response. Can also be passed as a tuple (connect_timeout, read_timeout).

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None **download_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.download_to_filename.

{}

Returns:

Type Description Union[str, Path]

The path to the blob object.

Example

Downloads blob from bucket.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_download_blob_to_file\n\n@flow()\ndef example_cloud_storage_download_blob_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    path = cloud_storage_download_blob_to_file(\n        \"bucket\", \"blob\", \"file_path\", gcp_credentials)\n    return path\n\nexample_cloud_storage_download_blob_flow()\n

Source code in prefect_gcp/cloud_storage.py
@task\nasync def cloud_storage_download_blob_to_file(\n    bucket: str,\n    blob: str,\n    path: Union[str, Path],\n    gcp_credentials: GcpCredentials,\n    chunk_size: Optional[int] = None,\n    encryption_key: Optional[str] = None,\n    timeout: Union[float, Tuple[float, float]] = 60,\n    project: Optional[str] = None,\n    **download_kwargs: Dict[str, Any],\n) -> Union[str, Path]:\n    \"\"\"\n    Downloads a blob to a file path.\n\n    Args:\n        bucket: Name of the bucket.\n        blob: Name of the Cloud Storage blob.\n        path: Downloads the contents to the provided file path;\n            if the path is a directory, automatically joins the blob name.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        chunk_size (int, optional): The size of a chunk of data whenever\n            iterating (in bytes). This must be a multiple of 256 KB\n            per the API specification.\n        encryption_key: An encryption key.\n        timeout: The number of seconds the transport should wait\n            for the server response. Can also be passed as a tuple\n            (connect_timeout, read_timeout).\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n        **download_kwargs: Additional keyword arguments to pass to\n            `Blob.download_to_filename`.\n\n    Returns:\n        The path to the blob object.\n\n    Example:\n        Downloads blob from bucket.\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.cloud_storage import cloud_storage_download_blob_to_file\n\n        @flow()\n        def example_cloud_storage_download_blob_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\")\n            path = cloud_storage_download_blob_to_file(\n                \"bucket\", \"blob\", \"file_path\", gcp_credentials)\n            return path\n\n        example_cloud_storage_download_blob_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\n        \"Downloading blob named %s from the %s bucket to %s\", blob, bucket, path\n    )\n\n    bucket_obj = await _get_bucket(bucket, gcp_credentials, project=project)\n    blob_obj = bucket_obj.blob(\n        blob, chunk_size=chunk_size, encryption_key=encryption_key\n    )\n\n    if os.path.isdir(path):\n        if isinstance(path, Path):\n            path = path.joinpath(blob)  # keep as Path if Path is passed\n        else:\n            path = os.path.join(path, blob)  # keep as str if a str is passed\n\n    await run_sync_in_worker_thread(\n        blob_obj.download_to_filename, path, timeout=timeout, **download_kwargs\n    )\n    return path\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.cloud_storage_upload_blob_from_file","title":"cloud_storage_upload_blob_from_file async","text":"

Uploads a blob from file path or file-like object. Usage for passing in file-like object is if the data was downloaded from the web; can bypass writing to disk and directly upload to Cloud Storage.

Parameters:

Name Type Description Default file Union[str, Path, BytesIO]

Path to data or file like object to upload.

required bucket str

Name of the bucket.

required blob str

Name of the Cloud Storage blob.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required content_type Optional[str]

Type of content being uploaded.

None chunk_size Optional[int]

The size of a chunk of data whenever iterating (in bytes). This must be a multiple of 256 KB per the API specification.

None encryption_key Optional[str]

An encryption key.

None timeout Union[float, Tuple[float, float]]

The number of seconds the transport should wait for the server response. Can also be passed as a tuple (connect_timeout, read_timeout).

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None **upload_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.upload_from_file or Blob.upload_from_filename.

{}

Returns:

Type Description str

The blob name.

Example

Uploads blob to bucket.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_file\n\n@flow()\ndef example_cloud_storage_upload_blob_from_file_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    blob = cloud_storage_upload_blob_from_file(\n        \"/path/somewhere\", \"bucket\", \"blob\", gcp_credentials)\n    return blob\n\nexample_cloud_storage_upload_blob_from_file_flow()\n

Source code in prefect_gcp/cloud_storage.py
@task\nasync def cloud_storage_upload_blob_from_file(\n    file: Union[str, Path, BytesIO],\n    bucket: str,\n    blob: str,\n    gcp_credentials: GcpCredentials,\n    content_type: Optional[str] = None,\n    chunk_size: Optional[int] = None,\n    encryption_key: Optional[str] = None,\n    timeout: Union[float, Tuple[float, float]] = 60,\n    project: Optional[str] = None,\n    **upload_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"\n    Uploads a blob from file path or file-like object. Usage for passing in\n    file-like object is if the data was downloaded from the web;\n    can bypass writing to disk and directly upload to Cloud Storage.\n\n    Args:\n        file: Path to data or file like object to upload.\n        bucket: Name of the bucket.\n        blob: Name of the Cloud Storage blob.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        content_type: Type of content being uploaded.\n        chunk_size: The size of a chunk of data whenever\n            iterating (in bytes). This must be a multiple of 256 KB\n            per the API specification.\n        encryption_key: An encryption key.\n        timeout: The number of seconds the transport should wait\n            for the server response. Can also be passed as a tuple\n            (connect_timeout, read_timeout).\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n        **upload_kwargs: Additional keyword arguments to pass to\n            `Blob.upload_from_file` or `Blob.upload_from_filename`.\n\n    Returns:\n        The blob name.\n\n    Example:\n        Uploads blob to bucket.\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_file\n\n        @flow()\n        def example_cloud_storage_upload_blob_from_file_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\")\n            blob = cloud_storage_upload_blob_from_file(\n                \"/path/somewhere\", \"bucket\", \"blob\", gcp_credentials)\n            return blob\n\n        example_cloud_storage_upload_blob_from_file_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Uploading blob named %s to the %s bucket\", blob, bucket)\n\n    bucket_obj = await _get_bucket(bucket, gcp_credentials, project=project)\n    blob_obj = bucket_obj.blob(\n        blob, chunk_size=chunk_size, encryption_key=encryption_key\n    )\n\n    if isinstance(file, BytesIO):\n        await run_sync_in_worker_thread(\n            blob_obj.upload_from_file,\n            file,\n            content_type=content_type,\n            timeout=timeout,\n            **upload_kwargs,\n        )\n    else:\n        await run_sync_in_worker_thread(\n            blob_obj.upload_from_filename,\n            file,\n            content_type=content_type,\n            timeout=timeout,\n            **upload_kwargs,\n        )\n    return blob\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.cloud_storage_upload_blob_from_string","title":"cloud_storage_upload_blob_from_string async","text":"

Uploads a blob from a string or bytes representation of data.

Parameters:

Name Type Description Default data Union[str, bytes]

String or bytes representation of data to upload.

required bucket str

Name of the bucket.

required blob str

Name of the Cloud Storage blob.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required content_type Optional[str]

Type of content being uploaded.

None chunk_size Optional[int]

The size of a chunk of data whenever iterating (in bytes). This must be a multiple of 256 KB per the API specification.

None encryption_key Optional[str]

An encryption key.

None timeout Union[float, Tuple[float, float]]

The number of seconds the transport should wait for the server response. Can also be passed as a tuple (connect_timeout, read_timeout).

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None **upload_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.upload_from_string.

{}

Returns:

Type Description str

The blob name.

Example

Uploads blob to bucket.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_string\n\n@flow()\ndef example_cloud_storage_upload_blob_from_string_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    blob = cloud_storage_upload_blob_from_string(\n        \"data\", \"bucket\", \"blob\", gcp_credentials)\n    return blob\n\nexample_cloud_storage_upload_blob_from_string_flow()\n

Source code in prefect_gcp/cloud_storage.py
@task\nasync def cloud_storage_upload_blob_from_string(\n    data: Union[str, bytes],\n    bucket: str,\n    blob: str,\n    gcp_credentials: GcpCredentials,\n    content_type: Optional[str] = None,\n    chunk_size: Optional[int] = None,\n    encryption_key: Optional[str] = None,\n    timeout: Union[float, Tuple[float, float]] = 60,\n    project: Optional[str] = None,\n    **upload_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"\n    Uploads a blob from a string or bytes representation of data.\n\n    Args:\n        data: String or bytes representation of data to upload.\n        bucket: Name of the bucket.\n        blob: Name of the Cloud Storage blob.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        content_type: Type of content being uploaded.\n        chunk_size: The size of a chunk of data whenever\n            iterating (in bytes). This must be a multiple of 256 KB\n            per the API specification.\n        encryption_key: An encryption key.\n        timeout: The number of seconds the transport should wait\n            for the server response. Can also be passed as a tuple\n            (connect_timeout, read_timeout).\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n        **upload_kwargs: Additional keyword arguments to pass to\n            `Blob.upload_from_string`.\n\n    Returns:\n        The blob name.\n\n    Example:\n        Uploads blob to bucket.\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_string\n\n        @flow()\n        def example_cloud_storage_upload_blob_from_string_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\")\n            blob = cloud_storage_upload_blob_from_string(\n                \"data\", \"bucket\", \"blob\", gcp_credentials)\n            return blob\n\n        example_cloud_storage_upload_blob_from_string_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Uploading blob named %s to the %s bucket\", blob, bucket)\n\n    bucket_obj = await _get_bucket(bucket, gcp_credentials, project=project)\n    blob_obj = bucket_obj.blob(\n        blob, chunk_size=chunk_size, encryption_key=encryption_key\n    )\n\n    await run_sync_in_worker_thread(\n        blob_obj.upload_from_string,\n        data,\n        content_type=content_type,\n        timeout=timeout,\n        **upload_kwargs,\n    )\n    return blob\n
"},{"location":"contributing/","title":"Contributing","text":"

If you'd like to help contribute to fix an issue or add a feature to prefect-gcp, please propose changes through a pull request from a fork of the repository.

Here are the steps:

  1. Fork the repository
  2. Clone the forked repository
  3. Install the repository and its dependencies:
    pip install -e \".[dev]\"\n
  4. Make desired changes
  5. Add tests
  6. Insert an entry to CHANGELOG.md
  7. Install pre-commit to perform quality checks prior to commit:
    pre-commit install\n
  8. git commit, git push, and create a pull request
"},{"location":"credentials/","title":"Credentials","text":""},{"location":"credentials/#prefect_gcp.credentials","title":"prefect_gcp.credentials","text":"

Module handling GCP credentials.

"},{"location":"credentials/#prefect_gcp.credentials-classes","title":"Classes","text":""},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials","title":"GcpCredentials","text":"

Bases: CredentialsBlock

Block used to manage authentication with GCP. Google authentication is handled via the google.oauth2 module or through the CLI. Specify either one of service account_file or service_account_info; if both are not specified, the client will try to detect the credentials following Google's Application Default Credentials. See Google's Authentication documentation for details on inference and recommended authentication patterns.

Attributes:

Name Type Description service_account_file Optional[Path]

Path to the service account JSON keyfile.

service_account_info Optional[SecretDict]

The contents of the keyfile as a dict.

Example

Load GCP credentials stored in a GCP Credentials Block:

from prefect_gcp import GcpCredentials\ngcp_credentials_block = GcpCredentials.load(\"BLOCK_NAME\")\n

Source code in prefect_gcp/credentials.py
class GcpCredentials(CredentialsBlock):\n    \"\"\"\n    Block used to manage authentication with GCP. Google authentication is\n    handled via the `google.oauth2` module or through the CLI.\n    Specify either one of service `account_file` or `service_account_info`; if both\n    are not specified, the client will try to detect the credentials following Google's\n    [Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials).\n    See Google's [Authentication documentation](https://cloud.google.com/docs/authentication#service-accounts)\n    for details on inference and recommended authentication patterns.\n\n    Attributes:\n        service_account_file: Path to the service account JSON keyfile.\n        service_account_info: The contents of the keyfile as a dict.\n\n    Example:\n        Load GCP credentials stored in a `GCP Credentials` Block:\n        ```python\n        from prefect_gcp import GcpCredentials\n        gcp_credentials_block = GcpCredentials.load(\"BLOCK_NAME\")\n        ```\n    \"\"\"  # noqa\n\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n    _block_type_name = \"GCP Credentials\"\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/credentials/#prefect_gcp.credentials.GcpCredentials\"  # noqa: E501\n\n    service_account_file: Optional[Path] = Field(\n        default=None, description=\"Path to the service account JSON keyfile.\"\n    )\n    service_account_info: Optional[SecretDict] = Field(\n        default=None, description=\"The contents of the keyfile as a dict.\"\n    )\n    project: Optional[str] = Field(\n        default=None, description=\"The GCP project to use for the client.\"\n    )\n\n    _service_account_email: Optional[str] = None\n\n    @root_validator\n    def _provide_one_service_account_source(cls, values):\n        \"\"\"\n        Ensure that only a service account file or service account info ias provided.\n        \"\"\"\n        both_service_account = (\n            values.get(\"service_account_info\") is not None\n            and values.get(\"service_account_file\") is not None\n        )\n        if both_service_account:\n            raise ValueError(\n                \"Only one of service_account_info or service_account_file \"\n                \"can be specified at once\"\n            )\n        return values\n\n    @validator(\"service_account_file\")\n    def _check_service_account_file(cls, file):\n        \"\"\"Get full path of provided file and make sure that it exists.\"\"\"\n        if not file:\n            return file\n\n        service_account_file = Path(file).expanduser()\n        if not service_account_file.exists():\n            raise ValueError(\"The provided path to the service account is invalid\")\n        return service_account_file\n\n    @validator(\"service_account_info\", pre=True)\n    def _convert_json_string_json_service_account_info(cls, value):\n        \"\"\"\n        Converts service account info provided as a json formatted string\n        to a dictionary\n        \"\"\"\n        if isinstance(value, str):\n            try:\n                service_account_info = json.loads(value)\n                return service_account_info\n            except Exception:\n                raise ValueError(\"Unable to decode service_account_info\")\n        else:\n            return value\n\n    def block_initialization(self):\n        credentials = self.get_credentials_from_service_account()\n        if self.project is None:\n            if self.service_account_info or self.service_account_file:\n                credentials_project = credentials.project_id\n            # google.auth.default using gcloud auth application-default login\n            elif credentials.quota_project_id:\n                credentials_project = credentials.quota_project_id\n            # compute-assigned service account via GCP metadata server\n            else:\n                _, credentials_project = google.auth.default()\n            self.project = credentials_project\n\n        if hasattr(credentials, \"service_account_email\"):\n            self._service_account_email = credentials.service_account_email\n\n    def get_credentials_from_service_account(self) -> Credentials:\n        \"\"\"\n        Helper method to serialize credentials by using either\n        service_account_file or service_account_info.\n        \"\"\"\n        if self.service_account_info:\n            credentials = Credentials.from_service_account_info(\n                self.service_account_info.get_secret_value(),\n                scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n            )\n        elif self.service_account_file:\n            credentials = Credentials.from_service_account_file(\n                self.service_account_file,\n                scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n            )\n        else:\n            credentials, _ = google.auth.default()\n        return credentials\n\n    @sync_compatible\n    async def get_access_token(self):\n        \"\"\"\n        See: https://stackoverflow.com/a/69107745\n        Also: https://www.jhanley.com/google-cloud-creating-oauth-access-tokens-for-rest-api-calls/\n        \"\"\"  # noqa\n        request = google.auth.transport.requests.Request()\n        credentials = self.get_credentials_from_service_account()\n        await run_sync_in_worker_thread(credentials.refresh, request)\n        return credentials.token\n\n    def get_client(\n        self,\n        client_type: Union[str, ClientType],\n        **get_client_kwargs: Dict[str, Any],\n    ) -> Any:\n        \"\"\"\n        Helper method to dynamically get a client type.\n\n        Args:\n            client_type: The name of the client to get.\n            **get_client_kwargs: Additional keyword arguments to pass to the\n                `get_*_client` method.\n\n        Returns:\n            An authenticated client.\n\n        Raises:\n            ValueError: if the client is not supported.\n        \"\"\"\n        if isinstance(client_type, str):\n            client_type = ClientType(client_type)\n        client_type = client_type.value\n        get_client_method = getattr(self, f\"get_{client_type}_client\")\n        return get_client_method(**get_client_kwargs)\n\n    @_raise_help_msg(\"cloud_storage\")\n    def get_cloud_storage_client(\n        self, project: Optional[str] = None\n    ) -> \"StorageClient\":\n        \"\"\"\n        Gets an authenticated Cloud Storage client.\n\n        Args:\n            project: Name of the project to use; overrides the base\n                class's project if provided.\n\n        Returns:\n            An authenticated Cloud Storage client.\n\n        Examples:\n            Gets a GCP Cloud Storage client from a path.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_file = \"~/.secrets/prefect-service-account.json\"\n                client = GcpCredentials(\n                    service_account_file=service_account_file\n                ).get_cloud_storage_client()\n            example_get_client_flow()\n            ```\n\n            Gets a GCP Cloud Storage client from a dictionary.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_info = {\n                    \"type\": \"service_account\",\n                    \"project_id\": \"project_id\",\n                    \"private_key_id\": \"private_key_id\",\n                    \"private_key\": \"private_key\",\n                    \"client_email\": \"client_email\",\n                    \"client_id\": \"client_id\",\n                    \"auth_uri\": \"auth_uri\",\n                    \"token_uri\": \"token_uri\",\n                    \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                    \"client_x509_cert_url\": \"client_x509_cert_url\"\n                }\n                client = GcpCredentials(\n                    service_account_info=service_account_info\n                ).get_cloud_storage_client()\n            example_get_client_flow()\n            ```\n        \"\"\"\n        credentials = self.get_credentials_from_service_account()\n\n        # override class project if method project is provided\n        project = project or self.project\n        storage_client = StorageClient(credentials=credentials, project=project)\n        return storage_client\n\n    @_raise_help_msg(\"bigquery\")\n    def get_bigquery_client(\n        self, project: str = None, location: str = None\n    ) -> \"BigQueryClient\":\n        \"\"\"\n        Gets an authenticated BigQuery client.\n\n        Args:\n            project: Name of the project to use; overrides the base\n                class's project if provided.\n            location: Location to use.\n\n        Returns:\n            An authenticated BigQuery client.\n\n        Examples:\n            Gets a GCP BigQuery client from a path.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_file = \"~/.secrets/prefect-service-account.json\"\n                client = GcpCredentials(\n                    service_account_file=service_account_file\n                ).get_bigquery_client()\n            example_get_client_flow()\n            ```\n\n            Gets a GCP BigQuery client from a dictionary.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_info = {\n                    \"type\": \"service_account\",\n                    \"project_id\": \"project_id\",\n                    \"private_key_id\": \"private_key_id\",\n                    \"private_key\": \"private_key\",\n                    \"client_email\": \"client_email\",\n                    \"client_id\": \"client_id\",\n                    \"auth_uri\": \"auth_uri\",\n                    \"token_uri\": \"token_uri\",\n                    \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                    \"client_x509_cert_url\": \"client_x509_cert_url\"\n                }\n                client = GcpCredentials(\n                    service_account_info=service_account_info\n                ).get_bigquery_client()\n\n            example_get_client_flow()\n            ```\n        \"\"\"\n        credentials = self.get_credentials_from_service_account()\n\n        # override class project if method project is provided\n        project = project or self.project\n        big_query_client = BigQueryClient(\n            credentials=credentials, project=project, location=location\n        )\n        return big_query_client\n\n    @_raise_help_msg(\"secret_manager\")\n    def get_secret_manager_client(self) -> \"SecretManagerServiceClient\":\n        \"\"\"\n        Gets an authenticated Secret Manager Service client.\n\n        Returns:\n            An authenticated Secret Manager Service client.\n\n        Examples:\n            Gets a GCP Secret Manager client from a path.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_file = \"~/.secrets/prefect-service-account.json\"\n                client = GcpCredentials(\n                    service_account_file=service_account_file\n                ).get_secret_manager_client()\n            example_get_client_flow()\n            ```\n\n            Gets a GCP Cloud Storage client from a dictionary.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_info = {\n                    \"type\": \"service_account\",\n                    \"project_id\": \"project_id\",\n                    \"private_key_id\": \"private_key_id\",\n                    \"private_key\": \"private_key\",\n                    \"client_email\": \"client_email\",\n                    \"client_id\": \"client_id\",\n                    \"auth_uri\": \"auth_uri\",\n                    \"token_uri\": \"token_uri\",\n                    \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                    \"client_x509_cert_url\": \"client_x509_cert_url\"\n                }\n                client = GcpCredentials(\n                    service_account_info=service_account_info\n                ).get_secret_manager_client()\n            example_get_client_flow()\n            ```\n        \"\"\"\n        credentials = self.get_credentials_from_service_account()\n\n        # doesn't accept project; must pass in project in tasks\n        secret_manager_client = SecretManagerServiceClient(credentials=credentials)\n        return secret_manager_client\n\n    @_raise_help_msg(\"aiplatform\")\n    def get_job_service_client(\n        self, client_options: Dict[str, Any] = None\n    ) -> \"JobServiceClient\":\n        \"\"\"\n        Gets an authenticated Job Service client for Vertex AI.\n\n        Returns:\n            An authenticated Job Service client.\n\n        Examples:\n            Gets a GCP Job Service client from a path.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_file = \"~/.secrets/prefect-service-account.json\"\n                client = GcpCredentials(\n                    service_account_file=service_account_file\n                ).get_job_service_client()\n\n            example_get_client_flow()\n            ```\n\n            Gets a GCP Cloud Storage client from a dictionary.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_info = {\n                    \"type\": \"service_account\",\n                    \"project_id\": \"project_id\",\n                    \"private_key_id\": \"private_key_id\",\n                    \"private_key\": \"private_key\",\n                    \"client_email\": \"client_email\",\n                    \"client_id\": \"client_id\",\n                    \"auth_uri\": \"auth_uri\",\n                    \"token_uri\": \"token_uri\",\n                    \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                    \"client_x509_cert_url\": \"client_x509_cert_url\"\n                }\n                client = GcpCredentials(\n                    service_account_info=service_account_info\n                ).get_job_service_client()\n\n            example_get_client_flow()\n            ```\n        \"\"\"\n        credentials = self.get_credentials_from_service_account()\n        job_service_client = JobServiceClient(\n            credentials=credentials, client_options=client_options\n        )\n        return job_service_client\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials-functions","title":"Functions","text":""},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_access_token","title":"get_access_token async","text":"Source code in prefect_gcp/credentials.py
@sync_compatible\nasync def get_access_token(self):\n    \"\"\"\n    See: https://stackoverflow.com/a/69107745\n    Also: https://www.jhanley.com/google-cloud-creating-oauth-access-tokens-for-rest-api-calls/\n    \"\"\"  # noqa\n    request = google.auth.transport.requests.Request()\n    credentials = self.get_credentials_from_service_account()\n    await run_sync_in_worker_thread(credentials.refresh, request)\n    return credentials.token\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_bigquery_client","title":"get_bigquery_client","text":"

Gets an authenticated BigQuery client.

Parameters:

Name Type Description Default project str

Name of the project to use; overrides the base class's project if provided.

None location str

Location to use.

None

Returns:

Type Description Client

An authenticated BigQuery client.

Examples:

Gets a GCP BigQuery client from a path.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_bigquery_client()\nexample_get_client_flow()\n

Gets a GCP BigQuery client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_bigquery_client()\n\nexample_get_client_flow()\n

Source code in prefect_gcp/credentials.py
@_raise_help_msg(\"bigquery\")\ndef get_bigquery_client(\n    self, project: str = None, location: str = None\n) -> \"BigQueryClient\":\n    \"\"\"\n    Gets an authenticated BigQuery client.\n\n    Args:\n        project: Name of the project to use; overrides the base\n            class's project if provided.\n        location: Location to use.\n\n    Returns:\n        An authenticated BigQuery client.\n\n    Examples:\n        Gets a GCP BigQuery client from a path.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_file = \"~/.secrets/prefect-service-account.json\"\n            client = GcpCredentials(\n                service_account_file=service_account_file\n            ).get_bigquery_client()\n        example_get_client_flow()\n        ```\n\n        Gets a GCP BigQuery client from a dictionary.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_info = {\n                \"type\": \"service_account\",\n                \"project_id\": \"project_id\",\n                \"private_key_id\": \"private_key_id\",\n                \"private_key\": \"private_key\",\n                \"client_email\": \"client_email\",\n                \"client_id\": \"client_id\",\n                \"auth_uri\": \"auth_uri\",\n                \"token_uri\": \"token_uri\",\n                \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                \"client_x509_cert_url\": \"client_x509_cert_url\"\n            }\n            client = GcpCredentials(\n                service_account_info=service_account_info\n            ).get_bigquery_client()\n\n        example_get_client_flow()\n        ```\n    \"\"\"\n    credentials = self.get_credentials_from_service_account()\n\n    # override class project if method project is provided\n    project = project or self.project\n    big_query_client = BigQueryClient(\n        credentials=credentials, project=project, location=location\n    )\n    return big_query_client\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_client","title":"get_client","text":"

Helper method to dynamically get a client type.

Parameters:

Name Type Description Default client_type Union[str, ClientType]

The name of the client to get.

required **get_client_kwargs Dict[str, Any]

Additional keyword arguments to pass to the get_*_client method.

{}

Returns:

Type Description Any

An authenticated client.

Raises:

Type Description ValueError

if the client is not supported.

Source code in prefect_gcp/credentials.py
def get_client(\n    self,\n    client_type: Union[str, ClientType],\n    **get_client_kwargs: Dict[str, Any],\n) -> Any:\n    \"\"\"\n    Helper method to dynamically get a client type.\n\n    Args:\n        client_type: The name of the client to get.\n        **get_client_kwargs: Additional keyword arguments to pass to the\n            `get_*_client` method.\n\n    Returns:\n        An authenticated client.\n\n    Raises:\n        ValueError: if the client is not supported.\n    \"\"\"\n    if isinstance(client_type, str):\n        client_type = ClientType(client_type)\n    client_type = client_type.value\n    get_client_method = getattr(self, f\"get_{client_type}_client\")\n    return get_client_method(**get_client_kwargs)\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_cloud_storage_client","title":"get_cloud_storage_client","text":"

Gets an authenticated Cloud Storage client.

Parameters:

Name Type Description Default project Optional[str]

Name of the project to use; overrides the base class's project if provided.

None

Returns:

Type Description Client

An authenticated Cloud Storage client.

Examples:

Gets a GCP Cloud Storage client from a path.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_cloud_storage_client()\nexample_get_client_flow()\n

Gets a GCP Cloud Storage client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_cloud_storage_client()\nexample_get_client_flow()\n

Source code in prefect_gcp/credentials.py
@_raise_help_msg(\"cloud_storage\")\ndef get_cloud_storage_client(\n    self, project: Optional[str] = None\n) -> \"StorageClient\":\n    \"\"\"\n    Gets an authenticated Cloud Storage client.\n\n    Args:\n        project: Name of the project to use; overrides the base\n            class's project if provided.\n\n    Returns:\n        An authenticated Cloud Storage client.\n\n    Examples:\n        Gets a GCP Cloud Storage client from a path.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_file = \"~/.secrets/prefect-service-account.json\"\n            client = GcpCredentials(\n                service_account_file=service_account_file\n            ).get_cloud_storage_client()\n        example_get_client_flow()\n        ```\n\n        Gets a GCP Cloud Storage client from a dictionary.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_info = {\n                \"type\": \"service_account\",\n                \"project_id\": \"project_id\",\n                \"private_key_id\": \"private_key_id\",\n                \"private_key\": \"private_key\",\n                \"client_email\": \"client_email\",\n                \"client_id\": \"client_id\",\n                \"auth_uri\": \"auth_uri\",\n                \"token_uri\": \"token_uri\",\n                \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                \"client_x509_cert_url\": \"client_x509_cert_url\"\n            }\n            client = GcpCredentials(\n                service_account_info=service_account_info\n            ).get_cloud_storage_client()\n        example_get_client_flow()\n        ```\n    \"\"\"\n    credentials = self.get_credentials_from_service_account()\n\n    # override class project if method project is provided\n    project = project or self.project\n    storage_client = StorageClient(credentials=credentials, project=project)\n    return storage_client\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_credentials_from_service_account","title":"get_credentials_from_service_account","text":"

Helper method to serialize credentials by using either service_account_file or service_account_info.

Source code in prefect_gcp/credentials.py
def get_credentials_from_service_account(self) -> Credentials:\n    \"\"\"\n    Helper method to serialize credentials by using either\n    service_account_file or service_account_info.\n    \"\"\"\n    if self.service_account_info:\n        credentials = Credentials.from_service_account_info(\n            self.service_account_info.get_secret_value(),\n            scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n        )\n    elif self.service_account_file:\n        credentials = Credentials.from_service_account_file(\n            self.service_account_file,\n            scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n        )\n    else:\n        credentials, _ = google.auth.default()\n    return credentials\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_job_service_client","title":"get_job_service_client","text":"

Gets an authenticated Job Service client for Vertex AI.

Returns:

Type Description JobServiceClient

An authenticated Job Service client.

Examples:

Gets a GCP Job Service client from a path.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_job_service_client()\n\nexample_get_client_flow()\n

Gets a GCP Cloud Storage client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_job_service_client()\n\nexample_get_client_flow()\n

Source code in prefect_gcp/credentials.py
@_raise_help_msg(\"aiplatform\")\ndef get_job_service_client(\n    self, client_options: Dict[str, Any] = None\n) -> \"JobServiceClient\":\n    \"\"\"\n    Gets an authenticated Job Service client for Vertex AI.\n\n    Returns:\n        An authenticated Job Service client.\n\n    Examples:\n        Gets a GCP Job Service client from a path.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_file = \"~/.secrets/prefect-service-account.json\"\n            client = GcpCredentials(\n                service_account_file=service_account_file\n            ).get_job_service_client()\n\n        example_get_client_flow()\n        ```\n\n        Gets a GCP Cloud Storage client from a dictionary.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_info = {\n                \"type\": \"service_account\",\n                \"project_id\": \"project_id\",\n                \"private_key_id\": \"private_key_id\",\n                \"private_key\": \"private_key\",\n                \"client_email\": \"client_email\",\n                \"client_id\": \"client_id\",\n                \"auth_uri\": \"auth_uri\",\n                \"token_uri\": \"token_uri\",\n                \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                \"client_x509_cert_url\": \"client_x509_cert_url\"\n            }\n            client = GcpCredentials(\n                service_account_info=service_account_info\n            ).get_job_service_client()\n\n        example_get_client_flow()\n        ```\n    \"\"\"\n    credentials = self.get_credentials_from_service_account()\n    job_service_client = JobServiceClient(\n        credentials=credentials, client_options=client_options\n    )\n    return job_service_client\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_secret_manager_client","title":"get_secret_manager_client","text":"

Gets an authenticated Secret Manager Service client.

Returns:

Type Description SecretManagerServiceClient

An authenticated Secret Manager Service client.

Examples:

Gets a GCP Secret Manager client from a path.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_secret_manager_client()\nexample_get_client_flow()\n

Gets a GCP Cloud Storage client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_secret_manager_client()\nexample_get_client_flow()\n

Source code in prefect_gcp/credentials.py
@_raise_help_msg(\"secret_manager\")\ndef get_secret_manager_client(self) -> \"SecretManagerServiceClient\":\n    \"\"\"\n    Gets an authenticated Secret Manager Service client.\n\n    Returns:\n        An authenticated Secret Manager Service client.\n\n    Examples:\n        Gets a GCP Secret Manager client from a path.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_file = \"~/.secrets/prefect-service-account.json\"\n            client = GcpCredentials(\n                service_account_file=service_account_file\n            ).get_secret_manager_client()\n        example_get_client_flow()\n        ```\n\n        Gets a GCP Cloud Storage client from a dictionary.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_info = {\n                \"type\": \"service_account\",\n                \"project_id\": \"project_id\",\n                \"private_key_id\": \"private_key_id\",\n                \"private_key\": \"private_key\",\n                \"client_email\": \"client_email\",\n                \"client_id\": \"client_id\",\n                \"auth_uri\": \"auth_uri\",\n                \"token_uri\": \"token_uri\",\n                \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                \"client_x509_cert_url\": \"client_x509_cert_url\"\n            }\n            client = GcpCredentials(\n                service_account_info=service_account_info\n            ).get_secret_manager_client()\n        example_get_client_flow()\n        ```\n    \"\"\"\n    credentials = self.get_credentials_from_service_account()\n\n    # doesn't accept project; must pass in project in tasks\n    secret_manager_client = SecretManagerServiceClient(credentials=credentials)\n    return secret_manager_client\n
"},{"location":"examples_catalog/","title":"Examples Catalog","text":"

Below is a list of examples for prefect-gcp.

"},{"location":"examples_catalog/#bigquery-module","title":"Bigquery Module","text":"

Execute operation with parameters:

from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        CREATE TABLE mydataset.trips AS (\n        SELECT\n            bikeid,\n            start_time,\n            duration_minutes\n        FROM\n            bigquery-public-data.austin_bikeshare.bikeshare_trips\n        LIMIT %(limit)s\n        );\n    '''\n    warehouse.execute(operation, parameters={\"limit\": 5})\n
Queries the public names database, returning 10 results.
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_query\n\n@flow\ndef example_bigquery_query_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\",\n        project=\"project\"\n    )\n    query = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = @corpus\n        AND word_count >= @min_word_count\n        ORDER BY word_count DESC;\n    '''\n    query_params = [\n        (\"corpus\", \"STRING\", \"romeoandjuliet\"),\n        (\"min_word_count\", \"INT64\", 250)\n    ]\n    result = bigquery_query(\n        query, gcp_credentials, query_params=query_params\n    )\n    return result\n\nexample_bigquery_query_flow()\n
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_insert_stream\nfrom google.cloud.bigquery import SchemaField\n\n@flow\ndef example_bigquery_insert_stream_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    records = [\n        {\"number\": 1, \"text\": \"abc\", \"bool\": True},\n        {\"number\": 2, \"text\": \"def\", \"bool\": False},\n    ]\n    result = bigquery_insert_stream(\n        dataset=\"integrations\",\n        table=\"test_table\",\n        records=records,\n        gcp_credentials=gcp_credentials\n    )\n    return result\n\nexample_bigquery_insert_stream_flow()\n
Create mytable in mydataset and insert two rows into it:
from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"bigquery\") as warehouse:\n    create_operation = '''\n    CREATE TABLE IF NOT EXISTS mydataset.mytable (\n        col1 STRING,\n        col2 INTEGER,\n        col3 BOOLEAN\n    )\n    '''\n    warehouse.execute(create_operation)\n    insert_operation = '''\n    INSERT INTO mydataset.mytable (col1, col2, col3) VALUES (%s, %s, %s)\n    '''\n    seq_of_parameters = [\n        (\"a\", 1, True),\n        (\"b\", 2, False),\n    ]\n    warehouse.execute_many(\n        insert_operation,\n        seq_of_parameters=seq_of_parameters\n    )\n
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_load_cloud_storage\n\n@flow\ndef example_bigquery_load_cloud_storage_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    result = bigquery_load_cloud_storage(\n        dataset=\"dataset\",\n        table=\"test_table\",\n        uri=\"uri\",\n        gcp_credentials=gcp_credentials\n    )\n    return result\n\nexample_bigquery_load_cloud_storage_flow()\n
Execute operation with parameters, fetching all rows:
from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = %(corpus)s\n        AND word_count >= %(min_word_count)s\n        ORDER BY word_count DESC\n        LIMIT 3;\n    '''\n    parameters = {\n        \"corpus\": \"romeoandjuliet\",\n        \"min_word_count\": 250,\n    }\n    result = warehouse.fetch_all(operation, parameters=parameters)\n
Execute operation with parameters, fetching two new rows at a time:
from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = %(corpus)s\n        AND word_count >= %(min_word_count)s\n        ORDER BY word_count DESC\n        LIMIT 6;\n    '''\n    parameters = {\n        \"corpus\": \"romeoandjuliet\",\n        \"min_word_count\": 250,\n    }\n    for _ in range(0, 3):\n        result = warehouse.fetch_many(\n            operation,\n            parameters=parameters,\n            size=2\n        )\n        print(result)\n
Execute operation with parameters, fetching one new row at a time:
from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = %(corpus)s\n        AND word_count >= %(min_word_count)s\n        ORDER BY word_count DESC\n        LIMIT 3;\n    '''\n    parameters = {\n        \"corpus\": \"romeoandjuliet\",\n        \"min_word_count\": 250,\n    }\n    for _ in range(0, 3):\n        result = warehouse.fetch_one(operation, parameters=parameters)\n        print(result)\n
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_load_file\nfrom google.cloud.bigquery import SchemaField\n\n@flow\ndef example_bigquery_load_file_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    result = bigquery_load_file(\n        dataset=\"dataset\",\n        table=\"test_table\",\n        path=\"path\",\n        gcp_credentials=gcp_credentials\n    )\n    return result\n\nexample_bigquery_load_file_flow()\n

"},{"location":"examples_catalog/#cloud-storage-module","title":"Cloud Storage Module","text":"

Uploads blob to bucket.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_file\n\n@flow()\ndef example_cloud_storage_upload_blob_from_file_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    blob = cloud_storage_upload_blob_from_file(\n        \"/path/somewhere\", \"bucket\", \"blob\", gcp_credentials)\n    return blob\n\nexample_cloud_storage_upload_blob_from_file_flow()\n
Download my_folder/notes.txt object to notes.txt.
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.download_object_to_path(\"my_folder/notes.txt\", \"notes.txt\")\n
Create a bucket.
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket(bucket=\"my-bucket\")\ngcs_bucket.create_bucket()\n
Download my_folder/notes.txt object to a BytesIO object.
from io import BytesIO\nfrom prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\nwith BytesIO() as buf:\n    gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", buf)\n

Download my_folder/notes.txt object to a BufferedWriter.

    from prefect_gcp.cloud_storage import GcsBucket\n\n    gcs_bucket = GcsBucket.load(\"my-bucket\")\n    with open(\"notes.txt\", \"wb\") as f:\n        gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", f)\n
Upload notes.txt to my_folder/notes.txt.
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.upload_from_path(\"notes.txt\", \"my_folder/notes.txt\")\n
Download my_folder to a local folder named my_folder.
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.download_folder_to_path(\"my_folder\", \"my_folder\")\n
Creates a bucket named \"prefect\".
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_create_bucket\n\n@flow()\ndef example_cloud_storage_create_bucket_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    bucket = cloud_storage_create_bucket(\"prefect\", gcp_credentials)\n\nexample_cloud_storage_create_bucket_flow()\n
Get the bucket object.
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.get_bucket()\n
Copies blob from one bucket to another.
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_copy_blob\n\n@flow()\ndef example_cloud_storage_copy_blob_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    blob = cloud_storage_copy_blob(\n        \"source_bucket\",\n        \"dest_bucket\",\n        \"source_blob\",\n        gcp_credentials\n    )\n    return blob\n\nexample_cloud_storage_copy_blob_flow()\n
Get all folders from a bucket named \"my-bucket\".
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.list_folders()\n

Get all folders from a folder called years

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.list_folders(\"years\")\n
Upload local folder my_folder to the bucket's folder my_folder.
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.upload_from_folder(\"my_folder\")\n
Upload my_folder/notes.txt object to a BytesIO object.
from io import BytesIO\nfrom prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\nwith open(\"notes.txt\", \"rb\") as f:\n    gcs_bucket.upload_from_file_object(f, \"my_folder/notes.txt\")\n

Upload BufferedReader object to my_folder/notes.txt.

from io import BufferedReader\nfrom prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\nwith open(\"notes.txt\", \"rb\") as f:\n    gcs_bucket.upload_from_file_object(\n        BufferedReader(f), \"my_folder/notes.txt\"\n    )\n
Downloads blob from bucket.
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_download_blob_to_file\n\n@flow()\ndef example_cloud_storage_download_blob_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    path = cloud_storage_download_blob_to_file(\n        \"bucket\", \"blob\", \"file_path\", gcp_credentials)\n    return path\n\nexample_cloud_storage_download_blob_flow()\n
Downloads blob from bucket.
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_download_blob_as_bytes\n\n@flow()\ndef example_cloud_storage_download_blob_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    contents = cloud_storage_download_blob_as_bytes(\n        \"bucket\", \"blob\", gcp_credentials)\n    return contents\n\nexample_cloud_storage_download_blob_flow()\n
Uploads blob to bucket.
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_string\n\n@flow()\ndef example_cloud_storage_upload_blob_from_string_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    blob = cloud_storage_upload_blob_from_string(\n        \"data\", \"bucket\", \"blob\", gcp_credentials)\n    return blob\n\nexample_cloud_storage_upload_blob_from_string_flow()\n
Get all blobs from a folder named \"prefect\".
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.list_blobs(\"prefect\")\n

"},{"location":"examples_catalog/#credentials-module","title":"Credentials Module","text":"

Gets a GCP Secret Manager client from a path.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_secret_manager_client()\nexample_get_client_flow()\n

Gets a GCP Cloud Storage client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_secret_manager_client()\nexample_get_client_flow()\n
Gets a GCP BigQuery client from a path.
from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_bigquery_client()\nexample_get_client_flow()\n

Gets a GCP BigQuery client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_bigquery_client()\n\nexample_get_client_flow()\n
Gets a GCP Cloud Storage client from a path.
from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_cloud_storage_client()\nexample_get_client_flow()\n

Gets a GCP Cloud Storage client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_cloud_storage_client()\nexample_get_client_flow()\n
Gets a GCP Job Service client from a path.
from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_job_service_client()\n\nexample_get_client_flow()\n

Gets a GCP Cloud Storage client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_job_service_client()\n\nexample_get_client_flow()\n

"},{"location":"gcp-worker-guide/","title":"Google Cloud Run Worker Guide","text":""},{"location":"gcp-worker-guide/#why-use-google-cloud-run-for-flow-run-execution","title":"Why use Google Cloud Run for flow run execution?","text":"

Google Cloud Run is a fully managed compute platform that automatically scales your containerized applications.

  1. Serverless architecture: Cloud Run follows a serverless architecture, which means you don't need to manage any underlying infrastructure. Google Cloud Run automatically handles the scaling and availability of your flow run infrastructure, allowing you to focus on developing and deploying your code.

  2. Scalability: Cloud Run can automatically scale your pipeline to handle varying workloads and traffic. It can quickly respond to increased demand and scale back down during low activity periods, ensuring efficient resource utilization.

  3. Integration with Google Cloud services: Google Cloud Run easily integrates with other Google Cloud services, such as Google Cloud Storage, Google Cloud Pub/Sub, and Google Cloud Build. This interoperability enables you to build end-to-end data pipelines that use a variety of services.

  4. Portability: Since Cloud Run uses container images, you can develop your pipelines locally using Docker and then deploy them on Google Cloud Run without significant modifications. This portability allows you to run the same pipeline in different environments.

"},{"location":"gcp-worker-guide/#google-cloud-run-guide","title":"Google Cloud Run guide","text":"

After completing this guide, you will have:

  1. Created a Google Cloud Service Account
  2. Created a Prefect Work Pool
  3. Deployed a Prefect Worker as a Cloud Run Service
  4. Deployed a Flow
  5. Executed the Flow as a Google Cloud Run Job

If you're looking for a general introduction to workers, work pools, and deployments, check out the workers and work pools tutorial.

"},{"location":"gcp-worker-guide/#prerequisites","title":"Prerequisites","text":"

Before starting this guide, make sure you have:

  • A Google Cloud Platform (GCP) account.
  • A project on your GCP account where you have the necessary permissions to create Cloud Run Services and Service Accounts.
  • The gcloud CLI installed on your local machine. You can follow Google Cloud's installation guide. If you're using Apple (or a Linux system) you can also use Homebrew for installation.
  • Docker installed on your local machine.
  • A Prefect server instance. You can sign up for a forever free Prefect Cloud Account or, alternatively, self-host a Prefect server.
"},{"location":"gcp-worker-guide/#step-1-create-a-google-cloud-service-account","title":"Step 1. Create a Google Cloud service account","text":"

First, open a terminal or command prompt on your local machine where gcloud is installed. If you haven't already authenticated with gcloud, run the following command and follow the instructions to log in to your GCP account.

gcloud auth login\n

Next, you'll set your project where you'd like to create the service account. Use the following command and replace <PROJECT_ID> with your GCP project's ID.

gcloud config set project <PROJECT-ID>\n

For example, if your project's ID is prefect-project the command will look like this:

gcloud config set project prefect-project\n

Now you're ready to make the service account. To do so, you'll need to run this command:

gcloud iam service-accounts create <SERVICE-ACCOUNT-NAME> --display-name=\"<DISPLAY-NAME>\"\n

Here's an example of the command above which you can use which already has the service account name and display name provided. An additional option to describe the service account has also been added:

gcloud iam service-accounts create prefect-service-account \\\n    --description=\"service account to use for the prefect worker\" \\\n    --display-name=\"prefect-service-account\"\n

The last step of this process is to make sure the service account has the proper permissions to execute flow runs as Cloud Run jobs. Run the following commands to grant the necessary permissions:

gcloud projects add-iam-policy-binding <PROJECT-ID> \\\n    --member=\"serviceAccount:<SERVICE-ACCOUNT-NAME>@<PROJECT-ID>.iam.gserviceaccount.com\" \\\n    --role=\"roles/iam.serviceAccountUser\"\n
gcloud projects add-iam-policy-binding <PROJECT-ID> \\\n    --member=\"serviceAccount:<SERVICE-ACCOUNT-NAME>@<PROJECT-ID>.iam.gserviceaccount.com\" \\\n    --role=\"roles/run.admin\"\n

"},{"location":"gcp-worker-guide/#step-2-create-a-cloud-run-work-pool","title":"Step 2. Create a Cloud Run work pool","text":"

Let's walk through the process of creating a Cloud Run work pool.

"},{"location":"gcp-worker-guide/#fill-out-the-work-pool-base-job-template","title":"Fill out the work pool base job template","text":"

You can create a new work pool using the Prefect UI or CLI. The following command creates a work pool of type cloud-run via the CLI (you'll want to replace the <WORK-POOL-NAME> with the name of your work pool):

prefect work-pool create --type cloud-run <WORK-POOL-NAME>\n

Once the work pool is created, find the work pool in the UI and edit it.

There are many ways to customize the base job template for the work pool. Modifying the template influences the infrastructure configuration that the worker provisions for flow runs submitted to the work pool. For this guide we are going to modify just a few of the available fields.

Specify the region for the cloud run job.

Save the name of the service account created in first step of this guide.

Your work pool is now ready to receive scheduled flow runs!

"},{"location":"gcp-worker-guide/#step-3-deploy-a-cloud-run-worker","title":"Step 3. Deploy a Cloud Run worker","text":"

Now you can launch a Cloud Run service to host the Cloud Run worker. This worker will poll the work pool that you created in the previous step.

Navigate back to your terminal and run the following commands to set your Prefect API key and URL as environment variables. Be sure to replace <ACCOUNT-ID> and <WORKSPACE-ID> with your Prefect account and workspace IDs (both will be available in the URL of the UI when previewing the workspace dashboard). You'll want to replace <YOUR-API-KEY> with an active API key as well.

export PREFECT_API_URL='https://api.prefect.cloud/api/accounts/<ACCOUNT-ID>/workspaces/<WORKSPACE-ID>'\nexport PREFECT_API_KEY='<YOUR-API-KEY>'\n

Once those variables are set, run the following shell command to deploy your worker as a service. Don't forget to replace <YOUR-SERVICE-ACCOUNT-NAME> with the name of the service account you created in the first step of this guide, and replace <WORK-POOL-NAME> with the name of the work pool you created in the second step.

gcloud run deploy prefect-worker --image=prefecthq/prefect:2-latest \\\n--set-env-vars PREFECT_API_URL=$PREFECT_API_URL,PREFECT_API_KEY=$PREFECT_API_KEY \\\n--service-account <YOUR-SERVICE-ACCOUNT-NAME> \\\n--no-cpu-throttling \\\n--min-instances 1 \\\n--args \"prefect\",\"worker\",\"start\",\"--install-policy\",\"always\",\"--with-healthcheck\",\"-p\",\"<WORK-POOL-NAME>\",\"-t\",\"cloud-run\"\n

After running this command, you'll be prompted to specify a region. Choose the same region that you selected when creating the Cloud Run work pool in the second step of this guide. The next prompt will ask if you'd like to allow unauthentiated invocations to your worker. For this guide, you can select \"No\".

After a few seconds, you'll be able to see your new prefect-worker service by navigating to the Cloud Run page of your Google Cloud console. Additionally, you should be able to see a record of this worker in the Prefect UI on the work pool's page by navigating to the Worker tab. Let's not leave our worker hanging, it's time to give it a job.

"},{"location":"gcp-worker-guide/#step-4-deploy-a-flow","title":"Step 4. Deploy a flow","text":"

Let's prepare a flow to run as a Cloud Run job. In this section of the guide, we'll \"bake\" our code into a Docker image, and push that image to Google Artifact Registry.

"},{"location":"gcp-worker-guide/#create-a-registry","title":"Create a registry","text":"

Let's create a docker repository in your Google Artifact Registry to host your custom image. If you already have a registry, and are authenticated to it, skip ahead to the Write a flow section.

The following command creates a repository using the gcloud CLI. You'll want to replace the <REPOSITORY-NAME> with your own value. :

gcloud artifacts repositories create <REPOSITORY-NAME> \\\n--repository-format=docker --location=us\n

Now you can authenticate to artifact registry:

gcloud auth configure-docker us-docker.pkg.dev\n

"},{"location":"gcp-worker-guide/#write-a-flow","title":"Write a flow","text":"

First, create a new directory. This will serve as the root of your project's repository. Within the directory, create a sub-directory called flows. Navigate to the flows subdirectory and create a new file for your flow. Feel free to write your own flow, but here's a ready-made one for your convenience:

import httpx\nfrom prefect import flow, task\nfrom prefect.artifacts import create_markdown_artifact\n\n@task\ndef mark_it_down(temp):\n    markdown_report = f\"\"\"# Weather Report\n## Recent weather\n\n| Time        | Temperature |\n|:--------------|-------:|\n| Now | {temp} |\n| In 1 hour       | {temp + 2} |\n\"\"\"\n    create_markdown_artifact(\n        key=\"weather-report\",\n        markdown=markdown_report,\n        description=\"Very scientific weather report\",\n    )\n\n\n@flow\ndef fetch_weather(lat: float, lon: float):\n    base_url = \"https://api.open-meteo.com/v1/forecast/\"\n    weather = httpx.get(\n        base_url,\n        params=dict(latitude=lat, longitude=lon, hourly=\"temperature_2m\"),\n    )\n    most_recent_temp = float(weather.json()[\"hourly\"][\"temperature_2m\"][0])\n    mark_it_down(most_recent_temp)\n\n\nif __name__ == \"__main__\":\n    fetch_weather(38.9, -77.0)\n

In the remainder of this guide, this script will be referred to as weather_flow.py, but you can name yours whatever you'd like.

"},{"location":"gcp-worker-guide/#creating-a-prefectyaml-file","title":"Creating a prefect.yaml file","text":"

Now we're ready to make a prefect.yaml file, which will be responsible for managing the deployments of this repository. Navigate back to the root of your directory, and run the following command to create a prefect.yaml file using Prefect's docker deployment recipe.

prefect init --recipe docker\n

You'll receive a prompt to put in values for the image name and tag. Since we will be pushing the image to Google Artifact Registry, the name of your image should be prefixed with the path to the docker repository you created within the registry. For example: us-docker.pkg.dev/<PROJECT-ID>/<REPOSITORY-NAME>/. You'll want to replace <PROJECT-ID> with the ID of your project in GCP. This should match the ID of the project you used in first step of this guide. Here is an example of what this could look like:

image_name: us-docker.pkg.dev/prefect-project/my-artifact-registry/gcp-weather-image\ntag: latest\n

At this point, there will be a new prefect.yaml file available at the root of your project. The contents will look similar to the example below, however, I've added in a combination of yaml templating options and prefect deployment actions to build out a simple CI/CD process. Feel free to copy the contents and paste them in your prefect.yaml:

# Welcome to your prefect.yaml file! You can you this file for storing and managing\n# configuration for deploying your flows. We recommend committing this file to source\n# control along with your flow code.\n\n# Generic metadata about this project\nname: <WORKING-DIRECTORY>\nprefect-version: 2.13.4\n\n# build section allows you to manage and build docker image\nbuild:\n- prefect_docker.deployments.steps.build_docker_image:\n    id: build_image\n    requires: prefect-docker>=0.3.1\n    image_name: <PATH-TO-ARTIFACT-REGISTRY>/gcp-weather-image\n    tag: latest\n    dockerfile: auto\n    platform: linux/amd64\n\n# push section allows you to manage if and how this project is uploaded to remote locations\npush:\n- prefect_docker.deployments.steps.push_docker_image:\n    requires: prefect-docker>=0.3.1\n    image_name: '{{ build_image.image_name }}'\n    tag: '{{ build_image.tag }}'\n\n# pull section allows you to provide instructions for cloning this project in remote locations\npull:\n- prefect.deployments.steps.set_working_directory:\n    directory: /opt/prefect/<WORKING-DIRECTORY>\n\n# the deployments section allows you to provide configuration for deploying flows\ndeployments:\n- name: gcp-weather-deploy\n  version: null\n  tags: []\n  description: null\n  schedule: {}\n  flow_name: null\n  entrypoint: flows/weather_flow.py:fetch_weather\n  parameters:\n    lat: 14.5994\n    lon: 28.6731\n  work_pool:\n    name: my-cloud-run-pool\n    work_queue_name: default\n    job_variables:\n      image: '{{ build_image.image }}'\n

Tip

After copying the example above, don't forget to replace <WORKING-DIRECTORY> with the name of the directory where your flow folder and prefect.yaml live. You'll also need to replace <PATH-TO-ARTIFACT-REGISTRY> with the path to the Docker repository in your Google Artifact Registry.

To get a better understanding of the different components of the prefect.yaml file above and what they do, feel free to read this next section. Otherwise, you can skip ahead to Flow Deployment.

In the build section of the prefect.yaml the following step is executed at deployment build time:

  1. prefect_docker.deployments.steps.build_docker_image : builds a Docker image automatically which uses the name and tag chosen previously.

Warning

If you are using an ARM-based chip (such as an M1 or M2 Mac), you'll want to ensure that you add platform: linux/amd64 to your build_docker_image step to ensure that your docker image uses an AMD architecture. For example:

- prefect_docker.deployments.steps.build_docker_image:\nid: build_image\nrequires: prefect-docker>=0.3.1\nimage_name: us-docker.pkg.dev/prefect-project/my-docker-repository/gcp-weather-image\ntag: latest\ndockerfile: auto\nplatform: linux/amd64\n

The push section sends the Docker image to the Docker repository in your Google Artifact Registry, so that it can be easily accessed by the worker for flow run execution.

The pull section sets the working directory for the process prior to importing your flow.

In the deployments section of the prefect.yaml file above, you'll see that there is a deployment declaration named gcp-weather-deploy. Within the declaration, the entrypoint for the flow is specified along with some default parameters which will be passed to the flow at runtime. Last but not least, the name of the workpool that we created in step 2 of this guide is specified.

"},{"location":"gcp-worker-guide/#flow-deployment","title":"Flow deployment","text":"

Once you're happy with the specifications in the prefect.yaml file, run the following command in the terminal to deploy your flow:

prefect deploy --name gcp-weather-deploy\n

Once the flow is deployed to Prefect Cloud or your local Prefect Server, it's time to queue up a flow run!

"},{"location":"gcp-worker-guide/#step-5-flow-execution","title":"Step 5. Flow execution","text":"

Find your deployment in the UI, and hit the Quick Run button. You have now successfully submitted a flow run to your Cloud Run worker! If you used the flow script provided in this guide, check the Artifacts tab for the flow run once it completes. You'll have a nice little weather report waiting for you there. Hope your day is a sunny one!

"},{"location":"gcp-worker-guide/#recap-and-next-steps","title":"Recap and next steps","text":"

Congratulations on completing this guide! Looking back on our journey, you have:

  1. Created a Google Cloud service account
  2. Created a Cloud Run work pool
  3. Deployed a Cloud Run worker
  4. Deployed a flow
  5. Executed a flow

For next steps, you could:

  • Take a look at some of the other work pools Prefect has to offer
  • Do a deep drive on Prefect concepts
  • Try out another guide to explore new deployment patterns and recipes

The world is your oyster \ud83e\uddaa\u2728.

"},{"location":"secret_manager/","title":"Secret Manager","text":""},{"location":"secret_manager/#prefect_gcp.secret_manager","title":"prefect_gcp.secret_manager","text":""},{"location":"secret_manager/#prefect_gcp.secret_manager-classes","title":"Classes","text":""},{"location":"secret_manager/#prefect_gcp.secret_manager.GcpSecret","title":"GcpSecret","text":"

Bases: SecretBlock

Manages a secret in Google Cloud Platform's Secret Manager.

Attributes:

Name Type Description gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

secret_name str

Name of the secret to manage.

secret_version str

Version number of the secret to use, or \"latest\".

Source code in prefect_gcp/secret_manager.py
class GcpSecret(SecretBlock):\n    \"\"\"\n    Manages a secret in Google Cloud Platform's Secret Manager.\n\n    Attributes:\n        gcp_credentials: Credentials to use for authentication with GCP.\n        secret_name: Name of the secret to manage.\n        secret_version: Version number of the secret to use, or \"latest\".\n    \"\"\"\n\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/secret_manager/#prefect_gcp.secret_manager.GcpSecret\"  # noqa: E501\n\n    gcp_credentials: GcpCredentials\n    secret_name: str = Field(default=..., description=\"Name of the secret to manage.\")\n    secret_version: str = Field(\n        default=\"latest\", description=\"Version number of the secret to use.\"\n    )\n\n    @sync_compatible\n    async def read_secret(self) -> bytes:\n        \"\"\"\n        Reads the secret data from the secret storage service.\n\n        Returns:\n            The secret data as bytes.\n        \"\"\"\n        client = self.gcp_credentials.get_secret_manager_client()\n        project = self.gcp_credentials.project\n        name = f\"projects/{project}/secrets/{self.secret_name}/versions/{self.secret_version}\"  # noqa\n        request = AccessSecretVersionRequest(name=name)\n\n        self.logger.debug(f\"Preparing to read secret data from {name!r}.\")\n        response = await run_sync_in_worker_thread(\n            client.access_secret_version, request=request\n        )\n        secret = response.payload.data\n        self.logger.info(f\"The secret {name!r} data was successfully read.\")\n        return secret\n\n    @sync_compatible\n    async def write_secret(self, secret_data: bytes) -> str:\n        \"\"\"\n        Writes the secret data to the secret storage service; if it doesn't exist\n        it will be created.\n\n        Args:\n            secret_data: The secret to write.\n\n        Returns:\n            The path that the secret was written to.\n        \"\"\"\n        client = self.gcp_credentials.get_secret_manager_client()\n        project = self.gcp_credentials.project\n        parent = f\"projects/{project}/secrets/{self.secret_name}\"\n        payload = SecretPayload(data=secret_data)\n        add_request = AddSecretVersionRequest(parent=parent, payload=payload)\n\n        self.logger.debug(f\"Preparing to write secret data to {parent!r}.\")\n        try:\n            response = await run_sync_in_worker_thread(\n                client.add_secret_version, request=add_request\n            )\n        except NotFound:\n            self.logger.info(\n                f\"The secret {parent!r} does not exist yet, creating it now.\"\n            )\n            create_parent = f\"projects/{project}\"\n            secret_id = self.secret_name\n            secret = Secret(replication=Replication(automatic=Replication.Automatic()))\n            create_request = CreateSecretRequest(\n                parent=create_parent, secret_id=secret_id, secret=secret\n            )\n            await run_sync_in_worker_thread(\n                client.create_secret, request=create_request\n            )\n\n            self.logger.debug(f\"Preparing to write secret data to {parent!r} again.\")\n            response = await run_sync_in_worker_thread(\n                client.add_secret_version, request=add_request\n            )\n\n        self.logger.info(f\"The secret data was written successfully to {parent!r}.\")\n        return response.name\n\n    @sync_compatible\n    async def delete_secret(self) -> str:\n        \"\"\"\n        Deletes the secret from the secret storage service.\n\n        Returns:\n            The path that the secret was deleted from.\n        \"\"\"\n        client = self.gcp_credentials.get_secret_manager_client()\n        project = self.gcp_credentials.project\n\n        name = f\"projects/{project}/secrets/{self.secret_name}\"\n        request = DeleteSecretRequest(name=name)\n\n        self.logger.debug(f\"Preparing to delete the secret {name!r}.\")\n        await run_sync_in_worker_thread(client.delete_secret, request=request)\n        self.logger.info(f\"The secret {name!r} was successfully deleted.\")\n        return name\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.GcpSecret-functions","title":"Functions","text":""},{"location":"secret_manager/#prefect_gcp.secret_manager.GcpSecret.delete_secret","title":"delete_secret async","text":"

Deletes the secret from the secret storage service.

Returns:

Type Description str

The path that the secret was deleted from.

Source code in prefect_gcp/secret_manager.py
@sync_compatible\nasync def delete_secret(self) -> str:\n    \"\"\"\n    Deletes the secret from the secret storage service.\n\n    Returns:\n        The path that the secret was deleted from.\n    \"\"\"\n    client = self.gcp_credentials.get_secret_manager_client()\n    project = self.gcp_credentials.project\n\n    name = f\"projects/{project}/secrets/{self.secret_name}\"\n    request = DeleteSecretRequest(name=name)\n\n    self.logger.debug(f\"Preparing to delete the secret {name!r}.\")\n    await run_sync_in_worker_thread(client.delete_secret, request=request)\n    self.logger.info(f\"The secret {name!r} was successfully deleted.\")\n    return name\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.GcpSecret.read_secret","title":"read_secret async","text":"

Reads the secret data from the secret storage service.

Returns:

Type Description bytes

The secret data as bytes.

Source code in prefect_gcp/secret_manager.py
@sync_compatible\nasync def read_secret(self) -> bytes:\n    \"\"\"\n    Reads the secret data from the secret storage service.\n\n    Returns:\n        The secret data as bytes.\n    \"\"\"\n    client = self.gcp_credentials.get_secret_manager_client()\n    project = self.gcp_credentials.project\n    name = f\"projects/{project}/secrets/{self.secret_name}/versions/{self.secret_version}\"  # noqa\n    request = AccessSecretVersionRequest(name=name)\n\n    self.logger.debug(f\"Preparing to read secret data from {name!r}.\")\n    response = await run_sync_in_worker_thread(\n        client.access_secret_version, request=request\n    )\n    secret = response.payload.data\n    self.logger.info(f\"The secret {name!r} data was successfully read.\")\n    return secret\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.GcpSecret.write_secret","title":"write_secret async","text":"

Writes the secret data to the secret storage service; if it doesn't exist it will be created.

Parameters:

Name Type Description Default secret_data bytes

The secret to write.

required

Returns:

Type Description str

The path that the secret was written to.

Source code in prefect_gcp/secret_manager.py
@sync_compatible\nasync def write_secret(self, secret_data: bytes) -> str:\n    \"\"\"\n    Writes the secret data to the secret storage service; if it doesn't exist\n    it will be created.\n\n    Args:\n        secret_data: The secret to write.\n\n    Returns:\n        The path that the secret was written to.\n    \"\"\"\n    client = self.gcp_credentials.get_secret_manager_client()\n    project = self.gcp_credentials.project\n    parent = f\"projects/{project}/secrets/{self.secret_name}\"\n    payload = SecretPayload(data=secret_data)\n    add_request = AddSecretVersionRequest(parent=parent, payload=payload)\n\n    self.logger.debug(f\"Preparing to write secret data to {parent!r}.\")\n    try:\n        response = await run_sync_in_worker_thread(\n            client.add_secret_version, request=add_request\n        )\n    except NotFound:\n        self.logger.info(\n            f\"The secret {parent!r} does not exist yet, creating it now.\"\n        )\n        create_parent = f\"projects/{project}\"\n        secret_id = self.secret_name\n        secret = Secret(replication=Replication(automatic=Replication.Automatic()))\n        create_request = CreateSecretRequest(\n            parent=create_parent, secret_id=secret_id, secret=secret\n        )\n        await run_sync_in_worker_thread(\n            client.create_secret, request=create_request\n        )\n\n        self.logger.debug(f\"Preparing to write secret data to {parent!r} again.\")\n        response = await run_sync_in_worker_thread(\n            client.add_secret_version, request=add_request\n        )\n\n    self.logger.info(f\"The secret data was written successfully to {parent!r}.\")\n    return response.name\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager-functions","title":"Functions","text":""},{"location":"secret_manager/#prefect_gcp.secret_manager.create_secret","title":"create_secret async","text":"

Creates a secret in Google Cloud Platform's Secret Manager.

Parameters:

Name Type Description Default secret_name str

Name of the secret to retrieve.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required timeout float

The number of seconds the transport should wait for the server response.

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None

Returns:

Type Description str

The path of the created secret.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.secret_manager import create_secret\n\n@flow()\ndef example_cloud_storage_create_secret_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    secret_path = create_secret(\"secret_name\", gcp_credentials)\n    return secret_path\n\nexample_cloud_storage_create_secret_flow()\n
Source code in prefect_gcp/secret_manager.py
@task\nasync def create_secret(\n    secret_name: str,\n    gcp_credentials: \"GcpCredentials\",\n    timeout: float = 60,\n    project: Optional[str] = None,\n) -> str:\n    \"\"\"\n    Creates a secret in Google Cloud Platform's Secret Manager.\n\n    Args:\n        secret_name: Name of the secret to retrieve.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        timeout: The number of seconds the transport should wait\n            for the server response.\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n\n    Returns:\n        The path of the created secret.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.secret_manager import create_secret\n\n        @flow()\n        def example_cloud_storage_create_secret_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            secret_path = create_secret(\"secret_name\", gcp_credentials)\n            return secret_path\n\n        example_cloud_storage_create_secret_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Creating the %s secret\", secret_name)\n\n    client = gcp_credentials.get_secret_manager_client()\n    project = project or gcp_credentials.project\n\n    parent = f\"projects/{project}\"\n    secret_settings = {\"replication\": {\"automatic\": {}}}\n\n    partial_create = partial(\n        client.create_secret,\n        parent=parent,\n        secret_id=secret_name,\n        secret=secret_settings,\n        timeout=timeout,\n    )\n    response = await to_thread.run_sync(partial_create)\n    return response.name\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.delete_secret","title":"delete_secret async","text":"

Deletes the specified secret from Google Cloud Platform's Secret Manager.

Parameters:

Name Type Description Default secret_name str

Name of the secret to delete.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required timeout float

The number of seconds the transport should wait for the server response.

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None

Returns:

Type Description str

The path of the deleted secret.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.secret_manager import delete_secret\n\n@flow()\ndef example_cloud_storage_delete_secret_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    secret_path = delete_secret(\"secret_name\", gcp_credentials)\n    return secret_path\n\nexample_cloud_storage_delete_secret_flow()\n
Source code in prefect_gcp/secret_manager.py
@task\nasync def delete_secret(\n    secret_name: str,\n    gcp_credentials: \"GcpCredentials\",\n    timeout: float = 60,\n    project: Optional[str] = None,\n) -> str:\n    \"\"\"\n    Deletes the specified secret from Google Cloud Platform's Secret Manager.\n\n    Args:\n        secret_name: Name of the secret to delete.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        timeout: The number of seconds the transport should wait\n            for the server response.\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n\n    Returns:\n        The path of the deleted secret.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.secret_manager import delete_secret\n\n        @flow()\n        def example_cloud_storage_delete_secret_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            secret_path = delete_secret(\"secret_name\", gcp_credentials)\n            return secret_path\n\n        example_cloud_storage_delete_secret_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Deleting %s secret\", secret_name)\n\n    client = gcp_credentials.get_secret_manager_client()\n    project = project or gcp_credentials.project\n\n    name = f\"projects/{project}/secrets/{secret_name}/\"\n    partial_delete = partial(client.delete_secret, name=name, timeout=timeout)\n    await to_thread.run_sync(partial_delete)\n    return name\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.delete_secret_version","title":"delete_secret_version async","text":"

Deletes a version of a given secret from Google Cloud Platform's Secret Manager.

Parameters:

Name Type Description Default secret_name str

Name of the secret to retrieve.

required version_id int

Version number of the secret to use; \"latest\" can NOT be used.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required timeout float

The number of seconds the transport should wait for the server response.

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None

Returns:

Type Description str

The path of the deleted secret version.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.secret_manager import delete_secret_version\n\n@flow()\ndef example_cloud_storage_delete_secret_version_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    secret_value = delete_secret_version(\"secret_name\", 1, gcp_credentials)\n    return secret_value\n\nexample_cloud_storage_delete_secret_version_flow()\n
Source code in prefect_gcp/secret_manager.py
@task\nasync def delete_secret_version(\n    secret_name: str,\n    version_id: int,\n    gcp_credentials: \"GcpCredentials\",\n    timeout: float = 60,\n    project: Optional[str] = None,\n) -> str:\n    \"\"\"\n    Deletes a version of a given secret from Google Cloud Platform's Secret Manager.\n\n    Args:\n        secret_name: Name of the secret to retrieve.\n        version_id: Version number of the secret to use; \"latest\" can NOT be used.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        timeout: The number of seconds the transport should wait\n            for the server response.\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n\n    Returns:\n        The path of the deleted secret version.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.secret_manager import delete_secret_version\n\n        @flow()\n        def example_cloud_storage_delete_secret_version_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            secret_value = delete_secret_version(\"secret_name\", 1, gcp_credentials)\n            return secret_value\n\n        example_cloud_storage_delete_secret_version_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Reading %s version of %s secret\", version_id, secret_name)\n\n    client = gcp_credentials.get_secret_manager_client()\n    project = project or gcp_credentials.project\n\n    if version_id == \"latest\":\n        raise ValueError(\"The version_id cannot be 'latest'\")\n\n    name = f\"projects/{project}/secrets/{secret_name}/versions/{version_id}\"\n    partial_destroy = partial(client.destroy_secret_version, name=name, timeout=timeout)\n    await to_thread.run_sync(partial_destroy)\n    return name\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.read_secret","title":"read_secret async","text":"

Reads the value of a given secret from Google Cloud Platform's Secret Manager.

Parameters:

Name Type Description Default secret_name str

Name of the secret to retrieve.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required timeout float

The number of seconds the transport should wait for the server response.

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None

Returns:

Type Description str

Contents of the specified secret.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.secret_manager import read_secret\n\n@flow()\ndef example_cloud_storage_read_secret_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    secret_value = read_secret(\"secret_name\", gcp_credentials, version_id=1)\n    return secret_value\n\nexample_cloud_storage_read_secret_flow()\n
Source code in prefect_gcp/secret_manager.py
@task\nasync def read_secret(\n    secret_name: str,\n    gcp_credentials: \"GcpCredentials\",\n    version_id: Union[str, int] = \"latest\",\n    timeout: float = 60,\n    project: Optional[str] = None,\n) -> str:\n    \"\"\"\n    Reads the value of a given secret from Google Cloud Platform's Secret Manager.\n\n    Args:\n        secret_name: Name of the secret to retrieve.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        timeout: The number of seconds the transport should wait\n            for the server response.\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n\n    Returns:\n        Contents of the specified secret.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.secret_manager import read_secret\n\n        @flow()\n        def example_cloud_storage_read_secret_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            secret_value = read_secret(\"secret_name\", gcp_credentials, version_id=1)\n            return secret_value\n\n        example_cloud_storage_read_secret_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Reading %s version of %s secret\", version_id, secret_name)\n\n    client = gcp_credentials.get_secret_manager_client()\n    project = project or gcp_credentials.project\n\n    name = f\"projects/{project}/secrets/{secret_name}/versions/{version_id}\"\n    partial_access = partial(client.access_secret_version, name=name, timeout=timeout)\n    response = await to_thread.run_sync(partial_access)\n    secret = response.payload.data.decode(\"UTF-8\")\n    return secret\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.update_secret","title":"update_secret async","text":"

Updates a secret in Google Cloud Platform's Secret Manager.

Parameters:

Name Type Description Default secret_name str

Name of the secret to retrieve.

required secret_value Union[str, bytes]

Desired value of the secret. Can be either str or bytes.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required timeout float

The number of seconds the transport should wait for the server response.

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None

Returns:

Type Description str

The path of the updated secret.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.secret_manager import update_secret\n\n@flow()\ndef example_cloud_storage_update_secret_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    secret_path = update_secret(\"secret_name\", \"secret_value\", gcp_credentials)\n    return secret_path\n\nexample_cloud_storage_update_secret_flow()\n
Source code in prefect_gcp/secret_manager.py
@task\nasync def update_secret(\n    secret_name: str,\n    secret_value: Union[str, bytes],\n    gcp_credentials: \"GcpCredentials\",\n    timeout: float = 60,\n    project: Optional[str] = None,\n) -> str:\n    \"\"\"\n    Updates a secret in Google Cloud Platform's Secret Manager.\n\n    Args:\n        secret_name: Name of the secret to retrieve.\n        secret_value: Desired value of the secret. Can be either `str` or `bytes`.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        timeout: The number of seconds the transport should wait\n            for the server response.\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n\n    Returns:\n        The path of the updated secret.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.secret_manager import update_secret\n\n        @flow()\n        def example_cloud_storage_update_secret_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            secret_path = update_secret(\"secret_name\", \"secret_value\", gcp_credentials)\n            return secret_path\n\n        example_cloud_storage_update_secret_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Updating the %s secret\", secret_name)\n\n    client = gcp_credentials.get_secret_manager_client()\n    project = project or gcp_credentials.project\n\n    parent = f\"projects/{project}/secrets/{secret_name}\"\n    if isinstance(secret_value, str):\n        secret_value = secret_value.encode(\"UTF-8\")\n    partial_add = partial(\n        client.add_secret_version,\n        parent=parent,\n        payload={\"data\": secret_value},\n        timeout=timeout,\n    )\n    response = await to_thread.run_sync(partial_add)\n    return response.name\n
"},{"location":"vertex_worker/","title":"Vertex AI","text":""},{"location":"vertex_worker/#prefect_gcp.workers.vertex","title":"prefect_gcp.workers.vertex","text":"

Module containing the custom worker used for executing flow runs as Vertex AI Custom Jobs.

Get started by creating a Cloud Run work pool:

prefect work-pool create 'my-vertex-pool' --type vertex-ai\n

Then start a Cloud Run worker with the following command:

prefect worker start --pool 'my-vertex-pool'\n
"},{"location":"vertex_worker/#prefect_gcp.workers.vertex--configuration","title":"Configuration","text":"

Read more about configuring work pools here.

"},{"location":"vertex_worker/#prefect_gcp.workers.vertex-classes","title":"Classes","text":""},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorker","title":"VertexAIWorker","text":"

Bases: BaseWorker

Prefect worker that executes flow runs within Vertex AI Jobs.

Source code in prefect_gcp/workers/vertex.py
class VertexAIWorker(BaseWorker):\n    \"\"\"Prefect worker that executes flow runs within Vertex AI Jobs.\"\"\"\n\n    type = \"vertex-ai\"\n    job_configuration = VertexAIWorkerJobConfiguration\n    job_configuration_variables = VertexAIWorkerVariables\n    _description = (\n        \"Execute flow runs within containers on Google Vertex AI. Requires \"\n        \"a Google Cloud Platform account.\"\n    )\n    _display_name = \"Google Vertex AI\"\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/vertex_worker/\"\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n\n    async def run(\n        self,\n        flow_run: \"FlowRun\",\n        configuration: VertexAIWorkerJobConfiguration,\n        task_status: Optional[anyio.abc.TaskStatus] = None,\n    ) -> VertexAIWorkerResult:\n        \"\"\"\n        Executes a flow run within a Vertex AI Job and waits for the flow run\n        to complete.\n\n        Args:\n            flow_run: The flow run to execute\n            configuration: The configuration to use when executing the flow run.\n            task_status: The task status object for the current flow run. If provided,\n                the task will be marked as started.\n\n        Returns:\n            VertexAIWorkerResult: A result object containing information about the\n                final state of the flow run\n        \"\"\"\n        logger = self.get_flow_run_logger(flow_run)\n\n        client_options = ClientOptions(\n            api_endpoint=f\"{configuration.region}-aiplatform.googleapis.com\"\n        )\n\n        job_name = configuration.job_name\n\n        job_spec = self._build_job_spec(configuration)\n        with configuration.credentials.get_job_service_client(\n            client_options=client_options\n        ) as job_service_client:\n            job_run = await self._create_and_begin_job(\n                job_name, job_spec, job_service_client, configuration, logger\n            )\n\n            if task_status:\n                task_status.started(job_run.name)\n\n            final_job_run = await self._watch_job_run(\n                job_name=job_name,\n                full_job_name=job_run.name,\n                job_service_client=job_service_client,\n                current_state=job_run.state,\n                until_states=(\n                    JobState.JOB_STATE_SUCCEEDED,\n                    JobState.JOB_STATE_FAILED,\n                    JobState.JOB_STATE_CANCELLED,\n                    JobState.JOB_STATE_EXPIRED,\n                ),\n                configuration=configuration,\n                logger=logger,\n                timeout=int(\n                    datetime.timedelta(\n                        hours=configuration.job_spec[\"maximum_run_time_hours\"]\n                    ).total_seconds()\n                ),\n            )\n\n        error_msg = final_job_run.error.message\n\n        # Vertex will include an error message upon valid\n        # flow cancellations, so we'll avoid raising an error in that case\n        if error_msg and \"CANCELED\" not in error_msg:\n            raise RuntimeError(error_msg)\n\n        status_code = 0 if final_job_run.state == JobState.JOB_STATE_SUCCEEDED else 1\n\n        return VertexAIWorkerResult(\n            identifier=final_job_run.display_name, status_code=status_code\n        )\n\n    def _build_job_spec(\n        self, configuration: VertexAIWorkerJobConfiguration\n    ) -> \"CustomJobSpec\":\n        \"\"\"\n        Builds a job spec by gathering details.\n        \"\"\"\n        # here, we extract the `worker_pool_specs` out of the job_spec\n        worker_pool_specs = [\n            WorkerPoolSpec(\n                container_spec=ContainerSpec(**spec[\"container_spec\"]),\n                machine_spec=MachineSpec(**spec[\"machine_spec\"]),\n                replica_count=spec[\"replica_count\"],\n                disk_spec=DiskSpec(**spec[\"disk_spec\"]),\n            )\n            for spec in configuration.job_spec.pop(\"worker_pool_specs\", [])\n        ]\n\n        timeout = Duration().FromTimedelta(\n            td=datetime.timedelta(\n                hours=configuration.job_spec[\"maximum_run_time_hours\"]\n            )\n        )\n        scheduling = Scheduling(timeout=timeout)\n\n        # construct the final job spec that we will provide to Vertex AI\n        job_spec = CustomJobSpec(\n            worker_pool_specs=worker_pool_specs,\n            scheduling=scheduling,\n            ignore_unknown_fields=True,\n            **configuration.job_spec,\n        )\n        return job_spec\n\n    async def _create_and_begin_job(\n        self,\n        job_name: str,\n        job_spec: \"CustomJobSpec\",\n        job_service_client: \"JobServiceClient\",\n        configuration: VertexAIWorkerJobConfiguration,\n        logger: PrefectLogAdapter,\n    ) -> \"CustomJob\":\n        \"\"\"\n        Builds a custom job and begins running it.\n        \"\"\"\n        # create custom job\n        custom_job = CustomJob(\n            display_name=job_name,\n            job_spec=job_spec,\n            labels=self._get_compatible_labels(configuration=configuration),\n        )\n\n        # run job\n        logger.info(f\"Job {job_name!r} starting to run \")\n\n        project = configuration.project\n        resource_name = f\"projects/{project}/locations/{configuration.region}\"\n\n        retry_policy = retry(\n            stop=stop_after_attempt(3), wait=wait_fixed(1) + wait_random(0, 3)\n        )\n\n        custom_job_run = await run_sync_in_worker_thread(\n            retry_policy(job_service_client.create_custom_job),\n            parent=resource_name,\n            custom_job=custom_job,\n        )\n\n        logger.info(\n            f\"Job {job_name!r} has successfully started; \"\n            f\"the full job name is {custom_job_run.name!r}\"\n        )\n\n        return custom_job_run\n\n    async def _watch_job_run(\n        self,\n        job_name: str,\n        full_job_name: str,  # different from job_name\n        job_service_client: \"JobServiceClient\",\n        current_state: \"JobState\",\n        until_states: Tuple[\"JobState\"],\n        configuration: VertexAIWorkerJobConfiguration,\n        logger: PrefectLogAdapter,\n        timeout: int = None,\n    ) -> \"CustomJob\":\n        \"\"\"\n        Polls job run to see if status changed.\n        \"\"\"\n        state = JobState.JOB_STATE_UNSPECIFIED\n        last_state = current_state\n        t0 = time.time()\n\n        while state not in until_states:\n            job_run = await run_sync_in_worker_thread(\n                job_service_client.get_custom_job,\n                name=full_job_name,\n            )\n            state = job_run.state\n            if state != last_state:\n                state_label = (\n                    state.name.replace(\"_\", \" \")\n                    .lower()\n                    .replace(\"state\", \"state is now:\")\n                )\n                # results in \"New job state is now: succeeded\"\n                logger.info(f\"{job_name} has new {state_label}\")\n                last_state = state\n            else:\n                # Intermittently, the job will not be described. We want to respect the\n                # watch timeout though.\n                logger.debug(f\"Job {job_name} not found.\")\n\n            elapsed_time = time.time() - t0\n            if timeout is not None and elapsed_time > timeout:\n                raise RuntimeError(\n                    f\"Timed out after {elapsed_time}s while watching job for states \"\n                    \"{until_states!r}\"\n                )\n            time.sleep(configuration.job_watch_poll_interval)\n\n        return job_run\n\n    def _get_compatible_labels(\n        self, configuration: VertexAIWorkerJobConfiguration\n    ) -> Dict[str, str]:\n        \"\"\"\n        Ensures labels are compatible with GCP label requirements.\n        https://cloud.google.com/resource-manager/docs/creating-managing-labels\n\n        Ex: the Prefect provided key of prefect.io/flow-name -> prefect-io_flow-name\n        \"\"\"\n        compatible_labels = {}\n        for key, val in configuration.labels.items():\n            new_key = slugify(\n                key,\n                lowercase=True,\n                replacements=[(\"/\", \"_\"), (\".\", \"-\")],\n                max_length=63,\n                regex_pattern=_DISALLOWED_GCP_LABEL_CHARACTERS,\n            )\n            compatible_labels[new_key] = slugify(\n                val,\n                lowercase=True,\n                replacements=[(\"/\", \"_\"), (\".\", \"-\")],\n                max_length=63,\n                regex_pattern=_DISALLOWED_GCP_LABEL_CHARACTERS,\n            )\n        return compatible_labels\n\n    async def kill_infrastructure(\n        self,\n        infrastructure_pid: str,\n        configuration: VertexAIWorkerJobConfiguration,\n        grace_seconds: int = 30,\n    ):\n        \"\"\"\n        Stops a job running in Vertex AI upon flow cancellation,\n        based on the provided infrastructure PID + run configuration.\n        \"\"\"\n        if grace_seconds != 30:\n            self._logger.warning(\n                f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n                \"support dynamic grace period configuration. See here for more info: \"\n                \"https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs/cancel\"  # noqa\n            )\n\n        client_options = ClientOptions(\n            api_endpoint=f\"{configuration.region}-aiplatform.googleapis.com\"\n        )\n        with configuration.credentials.get_job_service_client(\n            client_options=client_options\n        ) as job_service_client:\n            await run_sync_in_worker_thread(\n                self._stop_job,\n                client=job_service_client,\n                vertex_job_name=infrastructure_pid,\n            )\n\n    def _stop_job(self, client: \"JobServiceClient\", vertex_job_name: str):\n        \"\"\"\n        Calls the `cancel_custom_job` method on the Vertex AI Job Service Client.\n        \"\"\"\n        cancel_custom_job_request = CancelCustomJobRequest(name=vertex_job_name)\n        try:\n            client.cancel_custom_job(\n                request=cancel_custom_job_request,\n            )\n        except Exception as exc:\n            if \"does not exist\" in str(exc):\n                raise InfrastructureNotFound(\n                    f\"Cannot stop Vertex AI job; the job name {vertex_job_name!r} \"\n                    \"could not be found.\"\n                ) from exc\n            raise\n
"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorker-functions","title":"Functions","text":""},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorker.kill_infrastructure","title":"kill_infrastructure async","text":"

Stops a job running in Vertex AI upon flow cancellation, based on the provided infrastructure PID + run configuration.

Source code in prefect_gcp/workers/vertex.py
async def kill_infrastructure(\n    self,\n    infrastructure_pid: str,\n    configuration: VertexAIWorkerJobConfiguration,\n    grace_seconds: int = 30,\n):\n    \"\"\"\n    Stops a job running in Vertex AI upon flow cancellation,\n    based on the provided infrastructure PID + run configuration.\n    \"\"\"\n    if grace_seconds != 30:\n        self._logger.warning(\n            f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n            \"support dynamic grace period configuration. See here for more info: \"\n            \"https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs/cancel\"  # noqa\n        )\n\n    client_options = ClientOptions(\n        api_endpoint=f\"{configuration.region}-aiplatform.googleapis.com\"\n    )\n    with configuration.credentials.get_job_service_client(\n        client_options=client_options\n    ) as job_service_client:\n        await run_sync_in_worker_thread(\n            self._stop_job,\n            client=job_service_client,\n            vertex_job_name=infrastructure_pid,\n        )\n
"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorker.run","title":"run async","text":"

Executes a flow run within a Vertex AI Job and waits for the flow run to complete.

Parameters:

Name Type Description Default flow_run FlowRun

The flow run to execute

required configuration VertexAIWorkerJobConfiguration

The configuration to use when executing the flow run.

required task_status Optional[TaskStatus]

The task status object for the current flow run. If provided, the task will be marked as started.

None

Returns:

Name Type Description VertexAIWorkerResult VertexAIWorkerResult

A result object containing information about the final state of the flow run

Source code in prefect_gcp/workers/vertex.py
async def run(\n    self,\n    flow_run: \"FlowRun\",\n    configuration: VertexAIWorkerJobConfiguration,\n    task_status: Optional[anyio.abc.TaskStatus] = None,\n) -> VertexAIWorkerResult:\n    \"\"\"\n    Executes a flow run within a Vertex AI Job and waits for the flow run\n    to complete.\n\n    Args:\n        flow_run: The flow run to execute\n        configuration: The configuration to use when executing the flow run.\n        task_status: The task status object for the current flow run. If provided,\n            the task will be marked as started.\n\n    Returns:\n        VertexAIWorkerResult: A result object containing information about the\n            final state of the flow run\n    \"\"\"\n    logger = self.get_flow_run_logger(flow_run)\n\n    client_options = ClientOptions(\n        api_endpoint=f\"{configuration.region}-aiplatform.googleapis.com\"\n    )\n\n    job_name = configuration.job_name\n\n    job_spec = self._build_job_spec(configuration)\n    with configuration.credentials.get_job_service_client(\n        client_options=client_options\n    ) as job_service_client:\n        job_run = await self._create_and_begin_job(\n            job_name, job_spec, job_service_client, configuration, logger\n        )\n\n        if task_status:\n            task_status.started(job_run.name)\n\n        final_job_run = await self._watch_job_run(\n            job_name=job_name,\n            full_job_name=job_run.name,\n            job_service_client=job_service_client,\n            current_state=job_run.state,\n            until_states=(\n                JobState.JOB_STATE_SUCCEEDED,\n                JobState.JOB_STATE_FAILED,\n                JobState.JOB_STATE_CANCELLED,\n                JobState.JOB_STATE_EXPIRED,\n            ),\n            configuration=configuration,\n            logger=logger,\n            timeout=int(\n                datetime.timedelta(\n                    hours=configuration.job_spec[\"maximum_run_time_hours\"]\n                ).total_seconds()\n            ),\n        )\n\n    error_msg = final_job_run.error.message\n\n    # Vertex will include an error message upon valid\n    # flow cancellations, so we'll avoid raising an error in that case\n    if error_msg and \"CANCELED\" not in error_msg:\n        raise RuntimeError(error_msg)\n\n    status_code = 0 if final_job_run.state == JobState.JOB_STATE_SUCCEEDED else 1\n\n    return VertexAIWorkerResult(\n        identifier=final_job_run.display_name, status_code=status_code\n    )\n
"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorkerJobConfiguration","title":"VertexAIWorkerJobConfiguration","text":"

Bases: BaseJobConfiguration

Configuration class used by the Vertex AI Worker to create a Job.

An instance of this class is passed to the Vertex AI Worker's run method for each flow run. It contains all information necessary to execute the flow run as a Vertex AI Job.

Attributes:

Name Type Description region str

The region where the Vertex AI Job resides.

credentials Optional[GcpCredentials]

The GCP Credentials used to connect to Vertex AI.

job_spec Dict[str, Any]

The Vertex AI Job spec used to create the Job.

job_watch_poll_interval float

The interval between GCP API calls to check Job state.

Source code in prefect_gcp/workers/vertex.py
class VertexAIWorkerJobConfiguration(BaseJobConfiguration):\n    \"\"\"\n    Configuration class used by the Vertex AI Worker to create a Job.\n\n    An instance of this class is passed to the Vertex AI Worker's `run` method\n    for each flow run. It contains all information necessary to execute\n    the flow run as a Vertex AI Job.\n\n    Attributes:\n        region: The region where the Vertex AI Job resides.\n        credentials: The GCP Credentials used to connect to Vertex AI.\n        job_spec: The Vertex AI Job spec used to create the Job.\n        job_watch_poll_interval: The interval between GCP API calls to check Job state.\n    \"\"\"\n\n    region: str = Field(\n        description=\"The region where the Vertex AI Job resides.\",\n        example=\"us-central1\",\n    )\n    credentials: Optional[GcpCredentials] = Field(\n        title=\"GCP Credentials\",\n        default_factory=GcpCredentials,\n        description=\"The GCP Credentials used to initiate the \"\n        \"Vertex AI Job. If not provided credentials will be \"\n        \"inferred from the local environment.\",\n    )\n\n    job_spec: Dict[str, Any] = Field(\n        template={\n            \"service_account_name\": \"{{ service_account_name }}\",\n            \"network\": \"{{ network }}\",\n            \"reserved_ip_ranges\": \"{{ reserved_ip_ranges }}\",\n            \"maximum_run_time_hours\": \"{{ maximum_run_time_hours }}\",\n            \"worker_pool_specs\": [\n                {\n                    \"replica_count\": 1,\n                    \"container_spec\": {\n                        \"image_uri\": \"{{ image }}\",\n                        \"command\": \"{{ command }}\",\n                        \"args\": [],\n                    },\n                    \"machine_spec\": {\n                        \"machine_type\": \"{{ machine_type }}\",\n                        \"accelerator_type\": \"{{ accelerator_type }}\",\n                        \"accelerator_count\": \"{{ accelerator_count }}\",\n                    },\n                    \"disk_spec\": {\n                        \"boot_disk_type\": \"{{ boot_disk_type }}\",\n                        \"boot_disk_size_gb\": \"{{ boot_disk_size_gb }}\",\n                    },\n                }\n            ],\n        }\n    )\n    job_watch_poll_interval: float = Field(\n        default=5.0,\n        title=\"Poll Interval (Seconds)\",\n        description=(\n            \"The amount of time to wait between GCP API calls while monitoring the \"\n            \"state of a Vertex AI Job.\"\n        ),\n    )\n\n    @property\n    def project(self) -> str:\n        \"\"\"property for accessing the project from the credentials.\"\"\"\n        return self.credentials.project\n\n    @property\n    def job_name(self) -> str:\n        \"\"\"\n        The name can be up to 128 characters long and can be consist of any UTF-8 characters. Reference:\n        https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomJob#google_cloud_aiplatform_CustomJob_display_name\n        \"\"\"  # noqa\n        unique_suffix = uuid4().hex\n        job_name = f\"{self.name}-{unique_suffix}\"\n        return job_name\n\n    def prepare_for_flow_run(\n        self,\n        flow_run: \"FlowRun\",\n        deployment: Optional[\"DeploymentResponse\"] = None,\n        flow: Optional[\"Flow\"] = None,\n    ):\n        super().prepare_for_flow_run(flow_run, deployment, flow)\n\n        self._inject_formatted_env_vars()\n        self._inject_formatted_command()\n        self._ensure_existence_of_service_account()\n\n    def _inject_formatted_env_vars(self):\n        \"\"\"Inject environment variables in the Vertex job_spec configuration,\n        in the correct format, which is sourced from the BaseJobConfiguration.\n        This method is invoked by `prepare_for_flow_run()`.\"\"\"\n        worker_pool_specs = self.job_spec[\"worker_pool_specs\"]\n        formatted_env_vars = [\n            {\"name\": key, \"value\": value} for key, value in self.env.items()\n        ]\n        worker_pool_specs[0][\"container_spec\"][\"env\"] = formatted_env_vars\n\n    def _inject_formatted_command(self):\n        \"\"\"Inject shell commands in the Vertex job_spec configuration,\n        in the correct format, which is sourced from the BaseJobConfiguration.\n        Here, we'll ensure that the default string format\n        is converted to a list of strings.\"\"\"\n        worker_pool_specs = self.job_spec[\"worker_pool_specs\"]\n\n        existing_command = worker_pool_specs[0][\"container_spec\"].get(\"command\")\n        if existing_command is None:\n            worker_pool_specs[0][\"container_spec\"][\"command\"] = shlex.split(\n                self._base_flow_run_command()\n            )\n        elif isinstance(existing_command, str):\n            worker_pool_specs[0][\"container_spec\"][\"command\"] = shlex.split(\n                existing_command\n            )\n\n    def _ensure_existence_of_service_account(self):\n        \"\"\"Verify that a service account was provided, either in the credentials\n        or as a standalone service account name override.\"\"\"\n\n        provided_service_account_name = self.job_spec.get(\"service_account_name\")\n        credential_service_account = self.credentials._service_account_email\n\n        service_account_to_use = (\n            provided_service_account_name or credential_service_account\n        )\n\n        if service_account_to_use is None:\n            raise ValueError(\n                \"A service account is required for the Vertex job. \"\n                \"A service account could not be detected in the attached credentials \"\n                \"or in the service_account_name input. \"\n                \"Please pass in valid GCP credentials or a valid service_account_name\"\n            )\n\n        self.job_spec[\"service_account_name\"] = service_account_to_use\n\n    @validator(\"job_spec\")\n    def _ensure_job_spec_includes_required_attributes(cls, value: Dict[str, Any]):\n        \"\"\"\n        Ensures that the job spec includes all required components.\n        \"\"\"\n        patch = JsonPatch.from_diff(value, _get_base_job_spec())\n        missing_paths = sorted([op[\"path\"] for op in patch if op[\"op\"] == \"add\"])\n        if missing_paths:\n            raise ValueError(\n                \"Job is missing required attributes at the following paths: \"\n                f\"{', '.join(missing_paths)}\"\n            )\n        return value\n
"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorkerJobConfiguration-attributes","title":"Attributes","text":""},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorkerJobConfiguration.job_name","title":"job_name: str property","text":"

The name can be up to 128 characters long and can be consist of any UTF-8 characters. Reference: https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomJob#google_cloud_aiplatform_CustomJob_display_name

"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorkerJobConfiguration.project","title":"project: str property","text":"

property for accessing the project from the credentials.

"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorkerResult","title":"VertexAIWorkerResult","text":"

Bases: BaseWorkerResult

Contains information about the final state of a completed process

Source code in prefect_gcp/workers/vertex.py
class VertexAIWorkerResult(BaseWorkerResult):\n    \"\"\"Contains information about the final state of a completed process\"\"\"\n
"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorkerVariables","title":"VertexAIWorkerVariables","text":"

Bases: BaseVariables

Default variables for the Vertex AI worker.

The schema for this class is used to populate the variables section of the default base job template.

Source code in prefect_gcp/workers/vertex.py
class VertexAIWorkerVariables(BaseVariables):\n    \"\"\"\n    Default variables for the Vertex AI worker.\n\n    The schema for this class is used to populate the `variables` section of the default\n    base job template.\n    \"\"\"\n\n    region: str = Field(\n        description=\"The region where the Vertex AI Job resides.\",\n        example=\"us-central1\",\n    )\n    image: str = Field(\n        title=\"Image Name\",\n        description=(\n            \"The URI of a container image in the Container or Artifact Registry, \"\n            \"used to run your Vertex AI Job. Note that Vertex AI will need access\"\n            \"to the project and region where the container image is stored. See \"\n            \"https://cloud.google.com/vertex-ai/docs/training/create-custom-container\"\n        ),\n        example=\"gcr.io/your-project/your-repo:latest\",\n    )\n    credentials: Optional[GcpCredentials] = Field(\n        title=\"GCP Credentials\",\n        default_factory=GcpCredentials,\n        description=\"The GCP Credentials used to initiate the \"\n        \"Vertex AI Job. If not provided credentials will be \"\n        \"inferred from the local environment.\",\n    )\n    machine_type: str = Field(\n        title=\"Machine Type\",\n        description=(\n            \"The machine type to use for the run, which controls \"\n            \"the available CPU and memory. \"\n            \"See https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec\"\n        ),\n        default=\"n1-standard-4\",\n    )\n    accelerator_type: Optional[str] = Field(\n        title=\"Accelerator Type\",\n        description=(\n            \"The type of accelerator to attach to the machine. \"\n            \"See https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec\"\n        ),\n        example=\"NVIDIA_TESLA_K80\",\n        default=None,\n    )\n    accelerator_count: Optional[int] = Field(\n        title=\"Accelerator Count\",\n        description=(\n            \"The number of accelerators to attach to the machine. \"\n            \"See https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec\"\n        ),\n        example=1,\n        default=None,\n    )\n    boot_disk_type: str = Field(\n        title=\"Boot Disk Type\",\n        description=\"The type of boot disk to attach to the machine.\",\n        default=\"pd-ssd\",\n    )\n    boot_disk_size_gb: int = Field(\n        title=\"Boot Disk Size (GB)\",\n        description=\"The size of the boot disk to attach to the machine, in gigabytes.\",\n        default=100,\n    )\n    maximum_run_time_hours: int = Field(\n        default=1,\n        title=\"Maximum Run Time (Hours)\",\n        description=\"The maximum job running time, in hours\",\n    )\n    network: Optional[str] = Field(\n        default=None,\n        title=\"Network\",\n        description=\"The full name of the Compute Engine network\"\n        \"to which the Job should be peered. Private services access must \"\n        \"already be configured for the network. If left unspecified, the job \"\n        \"is not peered with any network. \"\n        \"For example: projects/12345/global/networks/myVPC\",\n    )\n    reserved_ip_ranges: Optional[List[str]] = Field(\n        default=None,\n        title=\"Reserved IP Ranges\",\n        description=\"A list of names for the reserved ip ranges under the VPC \"\n        \"network that can be used for this job. If set, we will deploy the job \"\n        \"within the provided ip ranges. Otherwise, the job will be deployed to \"\n        \"any ip ranges under the provided VPC network.\",\n    )\n    service_account_name: Optional[str] = Field(\n        default=None,\n        title=\"Service Account Name\",\n        description=(\n            \"Specifies the service account to use \"\n            \"as the run-as account in Vertex AI. The worker submitting jobs must have \"\n            \"act-as permission on this run-as account. If unspecified, the AI \"\n            \"Platform Custom Code Service Agent for the CustomJob's project is \"\n            \"used. Takes precedence over the service account found in GCP credentials, \"\n            \"and required if a service account cannot be detected in GCP credentials.\"\n        ),\n    )\n    job_watch_poll_interval: float = Field(\n        default=5.0,\n        title=\"Poll Interval (Seconds)\",\n        description=(\n            \"The amount of time to wait between GCP API calls while monitoring the \"\n            \"state of a Vertex AI Job.\"\n        ),\n    )\n
"},{"location":"deployments/steps/","title":"Deployment Steps","text":""},{"location":"deployments/steps/#prefect_gcp.deployments.steps","title":"prefect_gcp.deployments.steps","text":"

Prefect deployment steps for code storage in and retrieval from Google Cloud Storage.

"},{"location":"deployments/steps/#prefect_gcp.deployments.steps-classes","title":"Classes","text":""},{"location":"deployments/steps/#prefect_gcp.deployments.steps.PullFromGcsOutput","title":"PullFromGcsOutput","text":"

Bases: TypedDict

The output of the pull_from_gcs step.

Source code in prefect_gcp/deployments/steps.py
class PullFromGcsOutput(TypedDict):\n    \"\"\"\n    The output of the `pull_from_gcs` step.\n    \"\"\"\n\n    bucket: str\n    folder: str\n    directory: str\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps.PullProjectFromGcsOutput","title":"PullProjectFromGcsOutput","text":"

Bases: PullFromGcsOutput

Deprecated. Use PullFromGcsOutput instead.

Source code in prefect_gcp/deployments/steps.py
@deprecated_callable(start_date=\"Jun 2023\", help=\"Use `PullFromGcsOutput` instead.\")\nclass PullProjectFromGcsOutput(PullFromGcsOutput):\n    \"\"\"Deprecated. Use `PullFromGcsOutput` instead.\"\"\"\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps.PushProjectToGcsOutput","title":"PushProjectToGcsOutput","text":"

Bases: PushToGcsOutput

Deprecated. Use PushToGcsOutput instead.

Source code in prefect_gcp/deployments/steps.py
@deprecated_callable(start_date=\"Jun 2023\", help=\"Use `PushToGcsOutput` instead.\")\nclass PushProjectToGcsOutput(PushToGcsOutput):\n    \"\"\"Deprecated. Use `PushToGcsOutput` instead.\"\"\"\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps.PushToGcsOutput","title":"PushToGcsOutput","text":"

Bases: TypedDict

The output of the push_to_gcs step.

Source code in prefect_gcp/deployments/steps.py
class PushToGcsOutput(TypedDict):\n    \"\"\"\n    The output of the `push_to_gcs` step.\n    \"\"\"\n\n    bucket: str\n    folder: str\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps-functions","title":"Functions","text":""},{"location":"deployments/steps/#prefect_gcp.deployments.steps.pull_from_gcs","title":"pull_from_gcs","text":"

Pulls the contents of a project from an GCS bucket to the current working directory.

Parameters:

Name Type Description Default bucket str

The name of the GCS bucket where files are stored.

required folder str

The folder in the GCS bucket where files are stored.

required project Optional[str]

The GCP project the bucket belongs to. If not provided, the project will be inferred from the credentials or the local environment.

None credentials Optional[Dict]

A dictionary containing the service account information and project used for authentication. If not provided, the application default credentials will be used.

None

Returns:

Type Description PullProjectFromGcsOutput

A dictionary containing the bucket, folder, and local directory where files were downloaded.

Examples:

Pull from GCS using the default environment credentials:

build:\n    - prefect_gcp.deployments.steps.pull_from_gcs:\n        requires: prefect-gcp\n        bucket: my-bucket\n        folder: my-folder\n

Pull from GCS using credentials stored in a block:

build:\n    - prefect_gcp.deployments.steps.pull_from_gcs:\n        requires: prefect-gcp\n        bucket: my-bucket\n        folder: my-folder\n        credentials: \"{{ prefect.blocks.gcp-credentials.dev-credentials }}\"\n

Pull from to an GCS bucket using credentials stored in a service account file:

build:\n    - prefect_gcp.deployments.steps.pull_from_gcs:\n        requires: prefect-gcp\n        bucket: my-bucket\n        folder: my-folder\n        credentials:\n            project: my-project\n            service_account_file: /path/to/service_account.json\n

Source code in prefect_gcp/deployments/steps.py
def pull_from_gcs(\n    bucket: str,\n    folder: str,\n    project: Optional[str] = None,\n    credentials: Optional[Dict] = None,\n) -> PullProjectFromGcsOutput:\n    \"\"\"\n    Pulls the contents of a project from an GCS bucket to the current working directory.\n\n    Args:\n        bucket: The name of the GCS bucket where files are stored.\n        folder: The folder in the GCS bucket where files are stored.\n        project: The GCP project the bucket belongs to. If not provided, the project will be\n            inferred from the credentials or the local environment.\n        credentials: A dictionary containing the service account information and project\n            used for authentication. If not provided, the application default\n            credentials will be used.\n\n    Returns:\n        A dictionary containing the bucket, folder, and local directory where files were downloaded.\n\n    Examples:\n        Pull from GCS using the default environment credentials:\n        ```yaml\n        build:\n            - prefect_gcp.deployments.steps.pull_from_gcs:\n                requires: prefect-gcp\n                bucket: my-bucket\n                folder: my-folder\n        ```\n\n        Pull from GCS using credentials stored in a block:\n        ```yaml\n        build:\n            - prefect_gcp.deployments.steps.pull_from_gcs:\n                requires: prefect-gcp\n                bucket: my-bucket\n                folder: my-folder\n                credentials: \"{{ prefect.blocks.gcp-credentials.dev-credentials }}\"\n        ```\n\n        Pull from to an GCS bucket using credentials stored in a service account file:\n        ```yaml\n        build:\n            - prefect_gcp.deployments.steps.pull_from_gcs:\n                requires: prefect-gcp\n                bucket: my-bucket\n                folder: my-folder\n                credentials:\n                    project: my-project\n                    service_account_file: /path/to/service_account.json\n        ```\n\n    \"\"\"  # noqa\n    local_path = Path.cwd()\n    project = credentials.get(\"project\") if credentials else None\n\n    gcp_creds = None\n    if credentials is not None:\n        if credentials.get(\"service_account_info\") is not None:\n            gcp_creds = Credentials.from_service_account_info(\n                credentials.get(\"service_account_info\"),\n                scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n            )\n        elif credentials.get(\"service_account_file\") is not None:\n            gcp_creds = Credentials.from_service_account_file(\n                credentials.get(\"service_account_file\"),\n                scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n            )\n\n    gcp_creds = gcp_creds or google.auth.default()[0]\n\n    storage_client = StorageClient(credentials=gcp_creds, project=project)\n\n    blobs = storage_client.list_blobs(bucket, prefix=folder)\n\n    for blob in blobs:\n        if blob.name.endswith(\"/\"):\n            # object is a folder and will be created if it contains any objects\n            continue\n        local_blob_download_path = PurePosixPath(\n            local_path\n            / relative_path_to_current_platform(blob.name).relative_to(folder)\n        )\n        Path.mkdir(Path(local_blob_download_path.parent), parents=True, exist_ok=True)\n\n        blob.download_to_filename(local_blob_download_path)\n\n    return {\n        \"bucket\": bucket,\n        \"folder\": folder,\n        \"directory\": str(local_path),\n    }\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps.pull_project_from_gcs","title":"pull_project_from_gcs","text":"

Deprecated. Use pull_from_gcs instead.

Source code in prefect_gcp/deployments/steps.py
@deprecated_callable(start_date=\"Jun 2023\", help=\"Use `pull_from_gcs` instead.\")\ndef pull_project_from_gcs(*args, **kwargs) -> PullProjectFromGcsOutput:\n    \"\"\"\n    Deprecated. Use `pull_from_gcs` instead.\n    \"\"\"\n    return pull_from_gcs(*args, **kwargs)\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps.push_project_to_gcs","title":"push_project_to_gcs","text":"

Deprecated. Use push_to_gcs instead.

Source code in prefect_gcp/deployments/steps.py
@deprecated_callable(start_date=\"Jun 2023\", help=\"Use `push_to_gcs` instead.\")\ndef push_project_to_gcs(*args, **kwargs) -> PushToGcsOutput:\n    \"\"\"\n    Deprecated. Use `push_to_gcs` instead.\n    \"\"\"\n    return push_to_gcs(*args, **kwargs)\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps.push_to_gcs","title":"push_to_gcs","text":"

Pushes the contents of the current working directory to a GCS bucket, excluding files and folders specified in the ignore_file.

Parameters:

Name Type Description Default bucket str

The name of the GCS bucket where files will be uploaded.

required folder str

The folder in the GCS bucket where files will be uploaded.

required project Optional[str]

The GCP project the bucket belongs to. If not provided, the project will be inferred from the credentials or the local environment.

None credentials Optional[Dict]

A dictionary containing the service account information and project used for authentication. If not provided, the application default credentials will be used.

None ignore_file

The name of the file containing ignore patterns.

'.prefectignore'

Returns:

Type Description PushToGcsOutput

A dictionary containing the bucket and folder where files were uploaded.

Examples:

Push to a GCS bucket:

build:\n    - prefect_gcp.deployments.steps.push_to_gcs:\n        requires: prefect-gcp\n        bucket: my-bucket\n        folder: my-project\n

Push to a GCS bucket using credentials stored in a block:

build:\n    - prefect_gcp.deployments.steps.push_to_gcs:\n        requires: prefect-gcp\n        bucket: my-bucket\n        folder: my-folder\n        credentials: \"{{ prefect.blocks.gcp-credentials.dev-credentials }}\"\n

Push to a GCS bucket using credentials stored in a service account file:

build:\n    - prefect_gcp.deployments.steps.push_to_gcs:\n        requires: prefect-gcp\n        bucket: my-bucket\n        folder: my-folder\n        credentials:\n            project: my-project\n            service_account_file: /path/to/service_account.json\n

Source code in prefect_gcp/deployments/steps.py
def push_to_gcs(\n    bucket: str,\n    folder: str,\n    project: Optional[str] = None,\n    credentials: Optional[Dict] = None,\n    ignore_file=\".prefectignore\",\n) -> PushToGcsOutput:\n    \"\"\"\n    Pushes the contents of the current working directory to a GCS bucket,\n    excluding files and folders specified in the ignore_file.\n\n    Args:\n        bucket: The name of the GCS bucket where files will be uploaded.\n        folder: The folder in the GCS bucket where files will be uploaded.\n        project: The GCP project the bucket belongs to. If not provided, the project\n            will be inferred from the credentials or the local environment.\n        credentials: A dictionary containing the service account information and project\n            used for authentication. If not provided, the application default\n            credentials will be used.\n        ignore_file: The name of the file containing ignore patterns.\n\n    Returns:\n        A dictionary containing the bucket and folder where files were uploaded.\n\n    Examples:\n        Push to a GCS bucket:\n        ```yaml\n        build:\n            - prefect_gcp.deployments.steps.push_to_gcs:\n                requires: prefect-gcp\n                bucket: my-bucket\n                folder: my-project\n        ```\n\n        Push  to a GCS bucket using credentials stored in a block:\n        ```yaml\n        build:\n            - prefect_gcp.deployments.steps.push_to_gcs:\n                requires: prefect-gcp\n                bucket: my-bucket\n                folder: my-folder\n                credentials: \"{{ prefect.blocks.gcp-credentials.dev-credentials }}\"\n        ```\n\n        Push to a GCS bucket using credentials stored in a service account\n        file:\n        ```yaml\n        build:\n            - prefect_gcp.deployments.steps.push_to_gcs:\n                requires: prefect-gcp\n                bucket: my-bucket\n                folder: my-folder\n                credentials:\n                    project: my-project\n                    service_account_file: /path/to/service_account.json\n        ```\n\n    \"\"\"\n    project = credentials.get(\"project\") if credentials else None\n\n    gcp_creds = None\n    if credentials is not None:\n        if credentials.get(\"service_account_info\") is not None:\n            gcp_creds = Credentials.from_service_account_info(\n                credentials.get(\"service_account_info\"),\n                scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n            )\n        elif credentials.get(\"service_account_file\") is not None:\n            gcp_creds = Credentials.from_service_account_file(\n                credentials.get(\"service_account_file\"),\n                scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n            )\n\n    gcp_creds = gcp_creds or google.auth.default()[0]\n\n    storage_client = StorageClient(credentials=gcp_creds, project=project)\n    bucket_resource = storage_client.bucket(bucket)\n\n    local_path = Path.cwd()\n\n    included_files = None\n    if ignore_file and Path(ignore_file).exists():\n        with open(ignore_file, \"r\") as f:\n            ignore_patterns = f.readlines()\n        included_files = filter_files(str(local_path), ignore_patterns)\n\n    for local_file_path in local_path.expanduser().rglob(\"*\"):\n        relative_local_file_path = local_file_path.relative_to(local_path)\n        if (\n            included_files is not None\n            and str(relative_local_file_path) not in included_files\n        ):\n            continue\n        elif not local_file_path.is_dir():\n            remote_file_path = (folder / relative_local_file_path).as_posix()\n\n            blob_resource = bucket_resource.blob(remote_file_path)\n            blob_resource.upload_from_filename(local_file_path)\n\n    return {\n        \"bucket\": bucket,\n        \"folder\": folder,\n    }\n
"}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"prefect-gcp","text":"

prefect-gcp makes it easy to leverage the capabilities of Google Cloud Platform (GCP) in your flows, featuring support for Vertex AI, Cloud Run, BigQuery, Cloud Storage, and Secret Manager.

"},{"location":"#getting-started","title":"Getting Started","text":""},{"location":"#saving-credentials-to-a-block","title":"Saving credentials to a block","text":"

You will need to first install prefect-gcp and authenticate with a service account in order to use prefect-gcp.

prefect-gcp is able to safely save and load the service account, so they can be reused across the collection! Simply follow the steps below.

  1. Refer to the GCP service account documentation on how to create and download a service account key file.
  2. Copy the JSON contents.
  3. Create a short script, replacing the placeholders with your information.
from prefect_gcp import GcpCredentials\n\n# replace this PLACEHOLDER dict with your own service account info\nservice_account_info = {\n  \"type\": \"service_account\",\n  \"project_id\": \"PROJECT_ID\",\n  \"private_key_id\": \"KEY_ID\",\n  \"private_key\": \"-----BEGIN PRIVATE KEY-----\\nPRIVATE_KEY\\n-----END PRIVATE KEY-----\\n\",\n  \"client_email\": \"SERVICE_ACCOUNT_EMAIL\",\n  \"client_id\": \"CLIENT_ID\",\n  \"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\",\n  \"token_uri\": \"https://accounts.google.com/o/oauth2/token\",\n  \"auth_provider_x509_cert_url\": \"https://www.googleapis.com/oauth2/v1/certs\",\n  \"client_x509_cert_url\": \"https://www.googleapis.com/robot/v1/metadata/x509/SERVICE_ACCOUNT_EMAIL\"\n}\n\nGcpCredentials(\n    service_account_info=service_account_info\n).save(\"BLOCK-NAME-PLACEHOLDER\")\n

service_account_info vs service_account_file

The advantage of using service_account_info, instead of service_account_file, is that it is accessible across containers.

If service_account_file is used, the provided file path must be available in the container executing the flow.

Congrats! You can now easily load the saved block, which holds your credentials:

from prefect_gcp import GcpCredentials\nGcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n

Registering blocks

Register blocks in this module to view and edit them on Prefect Cloud:

prefect block register -m prefect_gcp\n
"},{"location":"#using-prefect-with-google-cloud-run","title":"Using Prefect with Google Cloud Run","text":"

Is your local computer or server running out of memory or taking too long to complete a job?

prefect_gcp can offers a solution by enabling you to execute your Prefect flows remotely, on-demand thru Google Cloud Run.

The following code snippets demonstrate how prefect_gcp can be used to run a job on Cloud Run, either as part of a Prefect deployment's infrastructure or within a flow.

"},{"location":"#as-infrastructure","title":"As Infrastructure","text":"

Below is a simple walkthrough for how to use Google Cloud Run as infrastructure for a deployment.

"},{"location":"#set-variables","title":"Set variables","text":"

To expedite copy/paste without the needing to update placeholders manually, update and execute the following.

export CREDENTIALS_BLOCK_NAME=\"BLOCK-NAME-PLACEHOLDER\"\nexport CLOUD_RUN_JOB_BLOCK_NAME=\"cloud-run-job-example\"\nexport CLOUD_RUN_JOB_REGION=\"us-central1\"\nexport GCS_BUCKET_BLOCK_NAME=\"cloud-run-job-bucket-example\"\nexport GCP_PROJECT_ID=$(gcloud config get-value project)\n
"},{"location":"#build-an-image","title":"Build an image","text":"

First, find an existing image within the Google Artifact Registry. Ensure it has Python and prefect-gcp[cloud_storage] installed, or follow the instructions below to set one up.

Create a Dockerfile.

FROM prefecthq/prefect:2-python3.11\nRUN pip install \"prefect-gcp[cloud_storage]\"\n

Then push to the Google Artifact Registry.

gcloud artifacts repositories create test-example-repository --repository-format=docker --location=us\ngcloud auth configure-docker us-docker.pkg.dev\ndocker build -t us-docker.pkg.dev/${GCP_PROJECT_ID}/test-example-repository/prefect-gcp:2-python3.11 .\ndocker push us-docker.pkg.dev/${GCP_PROJECT_ID}/test-example-repository/prefect-gcp:2-python3.11\n
"},{"location":"#save-an-infrastructure-and-storage-block","title":"Save an infrastructure and storage block","text":"

Save a custom infrastructure and storage block by executing the following snippet.

import os\nfrom prefect_gcp import GcpCredentials, CloudRunJob, GcsBucket\n\ngcp_credentials = GcpCredentials.load(os.environ[\"CREDENTIALS_BLOCK_NAME\"])\n\n# must be from GCR and have Python + Prefect\nimage = f\"us-docker.pkg.dev/{os.environ['GCP_PROJECT_ID']}/test-example-repository/prefect-gcp:2-python3.11\"  # noqa\n\ncloud_run_job = CloudRunJob(\n    image=image,\n    credentials=gcp_credentials,\n    region=os.environ[\"CLOUD_RUN_JOB_REGION\"],\n)\ncloud_run_job.save(os.environ[\"CLOUD_RUN_JOB_BLOCK_NAME\"], overwrite=True)\n\nbucket_name = \"cloud-run-job-bucket\"\ncloud_storage_client = gcp_credentials.get_cloud_storage_client()\ncloud_storage_client.create_bucket(bucket_name)\ngcs_bucket = GcsBucket(\n    bucket=bucket_name,\n    gcp_credentials=gcp_credentials,\n)\ngcs_bucket.save(os.environ[\"GCS_BUCKET_BLOCK_NAME\"], overwrite=True)\n
"},{"location":"#write-a-flow","title":"Write a flow","text":"

Then, use an existing flow to create a deployment with, or use the flow below if you don't have an existing flow handy.

from prefect import flow\n\n@flow(log_prints=True)\ndef cloud_run_job_flow():\n    print(\"Hello, Prefect!\")\n\nif __name__ == \"__main__\":\n    cloud_run_job_flow()\n
"},{"location":"#create-a-deployment","title":"Create a deployment","text":"

If the script was named \"cloud_run_job_script.py\", build a deployment manifest with the following command.

prefect deployment build cloud_run_job_script.py:cloud_run_job_flow \\\n    -n cloud-run-deployment \\\n    -ib cloud-run-job/${CLOUD_RUN_JOB_BLOCK_NAME} \\\n    -sb gcs-bucket/${GCS_BUCKET_BLOCK_NAME}\n

Now apply the deployment!

prefect deployment apply cloud_run_job_flow-deployment.yaml\n
"},{"location":"#test-the-deployment","title":"Test the deployment","text":"

Start up an agent in a separate terminal. The agent will poll the Prefect API for scheduled flow runs that are ready to run.

prefect agent start -q 'default'\n

Run the deployment once to test.

prefect deployment run cloud-run-job-flow/cloud-run-deployment\n

Once the flow run has completed, you will see Hello, Prefect! logged in the Prefect UI.

No class found for dispatch key

If you encounter an error message like KeyError: \"No class found for dispatch key 'cloud-run-job' in registry for type 'Block'.\", ensure prefect-gcp is installed in the environment that your agent is running!

"},{"location":"#within-flow","title":"Within Flow","text":"

You can execute commands through Cloud Run Job directly within a Prefect flow.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_run import CloudRunJob\n\n@flow\ndef cloud_run_job_flow():\n    cloud_run_job = CloudRunJob(\n        image=\"us-docker.pkg.dev/cloudrun/container/job:latest\",\n        credentials=GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\"),\n        region=\"us-central1\",\n        command=[\"echo\", \"Hello, Prefect!\"],\n    )\n    return cloud_run_job.run()\n
"},{"location":"#using-prefect-with-google-vertex-ai","title":"Using Prefect with Google Vertex AI","text":"

prefect_gcp can enable you to execute your Prefect flows remotely, on-demand using Google Vertex AI too!

Be sure to additionally install the AI Platform extra!

Setting up a Vertex AI job is extremely similar to setting up a Cloud Run Job, but replace CloudRunJob with the following snippet.

from prefect_gcp import GcpCredentials, VertexAICustomTrainingJob, GcsBucket\n\ngcp_credentials = GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n\nvertex_ai_job = VertexAICustomTrainingJob(\n    image=\"IMAGE-NAME-PLACEHOLDER\",  # must be from GCR and have Python + Prefect\n    credentials=gcp_credentials,\n    region=\"us-central1\",\n)\nvertex_ai_job.save(\"test-example\")\n

Cloud Run Job vs Vertex AI

With Vertex AI, you can allocate computational resources on-the-fly for your executions, much like Cloud Run.

However, unlike Cloud Run, you have the flexibility to provision instances with higher CPU, GPU, TPU, and RAM capacities.

Additionally, jobs can run for up to 7 days, which is significantly longer than the maximum duration allowed on Cloud Run.

"},{"location":"#using-prefect-with-google-bigquery","title":"Using Prefect with Google BigQuery","text":"

Got big data in BigQuery? prefect_gcp allows you to steadily stream data from and write to Google BigQuery within your Prefect flows!

Be sure to install prefect-gcp with the BigQuery extra!

The provided code snippet shows how you can use prefect_gcp to create a new dataset in BigQuery, define a table, insert rows, and fetch data from the table.

from prefect import flow\nfrom prefect_gcp.bigquery import GcpCredentials, BigQueryWarehouse\n\n@flow\ndef bigquery_flow():\n    all_rows = []\n    gcp_credentials = GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n\n    client = gcp_credentials.get_bigquery_client()\n    client.create_dataset(\"test_example\", exists_ok=True)\n\n    with BigQueryWarehouse(gcp_credentials=gcp_credentials) as warehouse:\n        warehouse.execute(\n            \"CREATE TABLE IF NOT EXISTS test_example.customers (name STRING, address STRING);\"\n        )\n        warehouse.execute_many(\n            \"INSERT INTO test_example.customers (name, address) VALUES (%(name)s, %(address)s);\",\n            seq_of_parameters=[\n                {\"name\": \"Marvin\", \"address\": \"Highway 42\"},\n                {\"name\": \"Ford\", \"address\": \"Highway 42\"},\n                {\"name\": \"Unknown\", \"address\": \"Highway 42\"},\n            ],\n        )\n        while True:\n            # Repeated fetch* calls using the same operation will\n            # skip re-executing and instead return the next set of results\n            new_rows = warehouse.fetch_many(\"SELECT * FROM test_example.customers\", size=2)\n            if len(new_rows) == 0:\n                break\n            all_rows.extend(new_rows)\n    return all_rows\n\nbigquery_flow()\n
"},{"location":"#using-prefect-with-google-cloud-storage","title":"Using Prefect with Google Cloud Storage","text":"

With prefect_gcp, you can have peace of mind that your Prefect flows have not only seamlessly uploaded and downloaded objects to Google Cloud Storage, but also have these actions logged.

Be sure to additionally install prefect-gcp with the Cloud Storage extra!

The provided code snippet shows how you can use prefect_gcp to upload a file to a Google Cloud Storage bucket and download the same file under a different file name.

from pathlib import Path\nfrom prefect import flow\nfrom prefect_gcp import GcpCredentials, GcsBucket\n\n\n@flow\ndef cloud_storage_flow():\n    # create a dummy file to upload\n    file_path = Path(\"test-example.txt\")\n    file_path.write_text(\"Hello, Prefect!\")\n\n    gcp_credentials = GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n    gcs_bucket = GcsBucket(\n        bucket=\"BUCKET-NAME-PLACEHOLDER\",\n        gcp_credentials=gcp_credentials\n    )\n\n    gcs_bucket_path = gcs_bucket.upload_from_path(file_path)\n    downloaded_file_path = gcs_bucket.download_object_to_path(\n        gcs_bucket_path, \"downloaded-test-example.txt\"\n    )\n    return downloaded_file_path.read_text()\n\n\ncloud_storage_flow()\n

Upload and download directories

GcsBucket supports uploading and downloading entire directories. To view examples, check out the Examples Catalog!

"},{"location":"#using-prefect-with-google-secret-manager","title":"Using Prefect with Google Secret Manager","text":"

Do you already have secrets available on Google Secret Manager? There's no need to migrate them!

prefect_gcp allows you to read and write secrets with Google Secret Manager within your Prefect flows.

Be sure to install prefect-gcp with the Secret Manager extra!

The provided code snippet shows how you can use prefect_gcp to write a secret to the Secret Manager, read the secret data, delete the secret, and finally return the secret data.

from prefect import flow\nfrom prefect_gcp import GcpCredentials, GcpSecret\n\n\n@flow\ndef secret_manager_flow():\n    gcp_credentials = GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n    gcp_secret = GcpSecret(secret_name=\"test-example\", gcp_credentials=gcp_credentials)\n    gcp_secret.write_secret(secret_data=b\"Hello, Prefect!\")\n    secret_data = gcp_secret.read_secret()\n    gcp_secret.delete_secret()\n    return secret_data\n\nsecret_manager_flow()\n
"},{"location":"#accessing-google-credentials-or-clients-from-gcpcredentials","title":"Accessing Google credentials or clients from GcpCredentials","text":"

In the case that prefect-gcp is missing a feature, feel free to submit an issue.

In the meantime, you may want to access the underlying Google Cloud credentials or clients, which prefect-gcp exposes via the GcpCredentials block.

The provided code snippet shows how you can use prefect_gcp to instantiate a Google Cloud client, like bigquery.Client.

Note a GcpCredentials object is NOT a valid input to the underlying BigQuery client--use the get_credentials_from_service_account method to access and pass an actual google.auth.Credentials object.

import google.cloud.bigquery\nfrom prefect import flow\nfrom prefect_gcp import GcpCredentials\n\n@flow\ndef create_bigquery_client():\n    gcp_credentials = GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n    google_auth_credentials = gcp_credentials.get_credentials_from_service_account()\n    bigquery_client = bigquery.Client(credentials=google_auth_credentials)\n

If you simply want to access the underlying client, prefect-gcp exposes a get_client method from GcpCredentials.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\n\n@flow\ndef create_bigquery_client():\n    gcp_credentials = GcpCredentials.load(\"BLOCK-NAME-PLACEHOLDER\")\n    bigquery_client = gcp_credentials.get_client(\"bigquery\")\n
"},{"location":"#resources","title":"Resources","text":"

For more tips on how to use tasks and flows in a Collection, check out Using Collections!

"},{"location":"#installation","title":"Installation","text":"

To use prefect-gcp and Cloud Run:

pip install prefect-gcp\n

To use Cloud Storage:

pip install \"prefect-gcp[cloud_storage]\"\n

To use BigQuery:

pip install \"prefect-gcp[bigquery]\"\n

To use Secret Manager:

pip install \"prefect-gcp[secret_manager]\"\n

To use Vertex AI:

pip install \"prefect-gcp[aiplatform]\"\n

A list of available blocks in prefect-gcp and their setup instructions can be found here.

Requires an installation of Python 3.7+.

We recommend using a Python virtual environment manager such as pipenv, conda or virtualenv.

These tasks are designed to work with Prefect 2. For more information about how to use Prefect, please refer to the Prefect documentation.

"},{"location":"#feedback","title":"Feedback","text":"

If you encounter any bugs while using prefect-gcp, feel free to open an issue in the prefect-gcp repository.

If you have any questions or issues while using prefect-gcp, you can find help in either the Prefect Discourse forum or the Prefect Slack community.

Feel free to star or watch prefect-gcp for updates too!

"},{"location":"aiplatform/","title":"AI Platform","text":""},{"location":"aiplatform/#prefect_gcp.aiplatform","title":"prefect_gcp.aiplatform","text":"

DEPRECATION WARNING:

This module is deprecated as of March 2024 and will not be available after September 2024. It has been replaced by the Vertex AI worker, which offers enhanced functionality and better performance.

For upgrade instructions, see https://docs.prefect.io/latest/guides/upgrade-guide-agents-to-workers/.

Integrations with Google AI Platform.

Note this module is experimental. The intefaces within may change without notice.

Examples:

Run a job using Vertex AI Custom Training:\n```python\nfrom prefect_gcp.credentials import GcpCredentials\nfrom prefect_gcp.aiplatform import VertexAICustomTrainingJob\n\ngcp_credentials = GcpCredentials.load(\"BLOCK_NAME\")\njob = VertexAICustomTrainingJob(\n    region=\"us-east1\",\n    image=\"us-docker.pkg.dev/cloudrun/container/job:latest\",\n    gcp_credentials=gcp_credentials,\n)\njob.run()\n```\n\nRun a job that runs the command `echo hello world` using Google Cloud Run Jobs:\n```python\nfrom prefect_gcp.credentials import GcpCredentials\nfrom prefect_gcp.aiplatform import VertexAICustomTrainingJob\n\ngcp_credentials = GcpCredentials.load(\"BLOCK_NAME\")\njob = VertexAICustomTrainingJob(\n    command=[\"echo\", \"hello world\"],\n    region=\"us-east1\",\n    image=\"us-docker.pkg.dev/cloudrun/container/job:latest\",\n    gcp_credentials=gcp_credentials,\n)\njob.run()\n```\n\nPreview job specs:\n```python\nfrom prefect_gcp.credentials import GcpCredentials\nfrom prefect_gcp.aiplatform import VertexAICustomTrainingJob\n\ngcp_credentials = GcpCredentials.load(\"BLOCK_NAME\")\njob = VertexAICustomTrainingJob(\n    command=[\"echo\", \"hello world\"],\n    region=\"us-east1\",\n    image=\"us-docker.pkg.dev/cloudrun/container/job:latest\",\n    gcp_credentials=gcp_credentials,\n)\njob.preview()\n```\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform-classes","title":"Classes","text":""},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob","title":"VertexAICustomTrainingJob","text":"

Bases: Infrastructure

Infrastructure block used to run Vertex AI custom training jobs.

Source code in prefect_gcp/aiplatform.py
@deprecated_class(\n    start_date=\"Mar 2024\",\n    help=(\n        \"Use the Vertex AI worker instead.\"\n        \" Refer to the upgrade guide for more information:\"\n        \" https://docs.prefect.io/latest/guides/upgrade-guide-agents-to-workers/.\"\n    ),\n)\nclass VertexAICustomTrainingJob(Infrastructure):\n    \"\"\"\n    Infrastructure block used to run Vertex AI custom training jobs.\n    \"\"\"\n\n    _block_type_name = \"Vertex AI Custom Training Job\"\n    _block_type_slug = \"vertex-ai-custom-training-job\"\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob\"  # noqa: E501\n\n    type: Literal[\"vertex-ai-custom-training-job\"] = Field(\n        \"vertex-ai-custom-training-job\", description=\"The slug for this task type.\"\n    )\n\n    gcp_credentials: GcpCredentials = Field(\n        default_factory=GcpCredentials,\n        description=(\n            \"GCP credentials to use when running the configured Vertex AI custom \"\n            \"training job. If not provided, credentials will be inferred from the \"\n            \"environment. See `GcpCredentials` for details.\"\n        ),\n    )\n    region: str = Field(\n        default=...,\n        description=\"The region where the Vertex AI custom training job resides.\",\n    )\n    image: str = Field(\n        default=...,\n        title=\"Image Name\",\n        description=(\n            \"The image to use for a new Vertex AI custom training job. This value must \"\n            \"refer to an image within either Google Container Registry \"\n            \"or Google Artifact Registry, like `gcr.io/<project_name>/<repo>/`.\"\n        ),\n    )\n    env: Dict[str, str] = Field(\n        default_factory=dict,\n        title=\"Environment Variables\",\n        description=\"Environment variables to be passed to your Cloud Run Job.\",\n    )\n    machine_type: str = Field(\n        default=\"n1-standard-4\",\n        description=\"The machine type to use for the run, which controls the available \"\n        \"CPU and memory.\",\n    )\n    accelerator_type: Optional[str] = Field(\n        default=None, description=\"The type of accelerator to attach to the machine.\"\n    )\n    accelerator_count: Optional[int] = Field(\n        default=None, description=\"The number of accelerators to attach to the machine.\"\n    )\n    boot_disk_type: str = Field(\n        default=\"pd-ssd\",\n        title=\"Boot Disk Type\",\n        description=\"The type of boot disk to attach to the machine.\",\n    )\n    boot_disk_size_gb: int = Field(\n        default=100,\n        title=\"Boot Disk Size\",\n        description=\"The size of the boot disk to attach to the machine, in gigabytes.\",\n    )\n    maximum_run_time: datetime.timedelta = Field(\n        default=datetime.timedelta(days=7), description=\"The maximum job running time.\"\n    )\n    network: Optional[str] = Field(\n        default=None,\n        description=\"The full name of the Compute Engine network\"\n        \"to which the Job should be peered. Private services access must \"\n        \"already be configured for the network. If left unspecified, the job \"\n        \"is not peered with any network.\",\n    )\n    reserved_ip_ranges: Optional[List[str]] = Field(\n        default=None,\n        description=\"A list of names for the reserved ip ranges under the VPC \"\n        \"network that can be used for this job. If set, we will deploy the job \"\n        \"within the provided ip ranges. Otherwise, the job will be deployed to \"\n        \"any ip ranges under the provided VPC network.\",\n    )\n    service_account: Optional[str] = Field(\n        default=None,\n        description=(\n            \"Specifies the service account to use \"\n            \"as the run-as account in Vertex AI. The agent submitting jobs must have \"\n            \"act-as permission on this run-as account. If unspecified, the AI \"\n            \"Platform Custom Code Service Agent for the CustomJob's project is \"\n            \"used. Takes precedence over the service account found in gcp_credentials, \"\n            \"and required if a service account cannot be detected in gcp_credentials.\"\n        ),\n    )\n    job_watch_poll_interval: float = Field(\n        default=5.0,\n        description=(\n            \"The amount of time to wait between GCP API calls while monitoring the \"\n            \"state of a Vertex AI Job.\"\n        ),\n    )\n\n    @property\n    def job_name(self):\n        \"\"\"\n        The name can be up to 128 characters long and can be consist of any UTF-8 characters. Reference:\n        https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomJob#google_cloud_aiplatform_CustomJob_display_name\n        \"\"\"  # noqa\n        try:\n            base_name = self.name or self.image.split(\"/\")[2]\n            return f\"{base_name}-{uuid4().hex}\"\n        except IndexError:\n            raise ValueError(\n                \"The provided image must be from either Google Container Registry \"\n                \"or Google Artifact Registry\"\n            )\n\n    def _get_compatible_labels(self) -> Dict[str, str]:\n        \"\"\"\n        Ensures labels are compatible with GCP label requirements.\n        https://cloud.google.com/resource-manager/docs/creating-managing-labels\n\n        Ex: the Prefect provided key of prefect.io/flow-name -> prefect-io_flow-name\n        \"\"\"\n        compatible_labels = {}\n        for key, val in self.labels.items():\n            new_key = slugify(\n                key,\n                lowercase=True,\n                replacements=[(\"/\", \"_\"), (\".\", \"-\")],\n                max_length=63,\n                regex_pattern=_DISALLOWED_GCP_LABEL_CHARACTERS,\n            )\n            compatible_labels[new_key] = slugify(\n                val,\n                lowercase=True,\n                replacements=[(\"/\", \"_\"), (\".\", \"-\")],\n                max_length=63,\n                regex_pattern=_DISALLOWED_GCP_LABEL_CHARACTERS,\n            )\n        return compatible_labels\n\n    def preview(self) -> str:\n        \"\"\"Generate a preview of the job definition that will be sent to GCP.\"\"\"\n        job_spec = self._build_job_spec()\n        custom_job = CustomJob(\n            display_name=self.job_name,\n            job_spec=job_spec,\n            labels=self._get_compatible_labels(),\n        )\n        return str(custom_job)  # outputs a json string\n\n    def get_corresponding_worker_type(self) -> str:\n        \"\"\"Return the corresponding worker type for this infrastructure block.\"\"\"\n        return \"vertex-ai\"\n\n    async def generate_work_pool_base_job_template(self) -> dict:\n        \"\"\"\n        Generate a base job template for a `Vertex AI` work pool with the same\n        configuration as this block.\n        Returns:\n            - dict: a base job template for a `Vertex AI` work pool\n        \"\"\"\n        base_job_template = await get_default_base_job_template_for_infrastructure_type(\n            self.get_corresponding_worker_type(),\n        )\n        assert (\n            base_job_template is not None\n        ), \"Failed to generate default base job template for Cloud Run worker.\"\n        for key, value in self.dict(exclude_unset=True, exclude_defaults=True).items():\n            if key == \"command\":\n                base_job_template[\"variables\"][\"properties\"][\"command\"][\n                    \"default\"\n                ] = shlex.join(value)\n            elif key in [\n                \"type\",\n                \"block_type_slug\",\n                \"_block_document_id\",\n                \"_block_document_name\",\n                \"_is_anonymous\",\n            ]:\n                continue\n            elif key == \"gcp_credentials\":\n                if not self.gcp_credentials._block_document_id:\n                    raise BlockNotSavedError(\n                        \"It looks like you are trying to use a block that\"\n                        \" has not been saved. Please call `.save` on your block\"\n                        \" before publishing it as a work pool.\"\n                    )\n                base_job_template[\"variables\"][\"properties\"][\"credentials\"][\n                    \"default\"\n                ] = {\n                    \"$ref\": {\n                        \"block_document_id\": str(\n                            self.gcp_credentials._block_document_id\n                        )\n                    }\n                }\n            elif key == \"maximum_run_time\":\n                base_job_template[\"variables\"][\"properties\"][\"maximum_run_time_hours\"][\n                    \"default\"\n                ] = round(value.total_seconds() / 3600)\n            elif key == \"service_account\":\n                base_job_template[\"variables\"][\"properties\"][\"service_account_name\"][\n                    \"default\"\n                ] = value\n            elif key in base_job_template[\"variables\"][\"properties\"]:\n                base_job_template[\"variables\"][\"properties\"][key][\"default\"] = value\n            else:\n                self.logger.warning(\n                    f\"Variable {key!r} is not supported by `Vertex AI` work pools.\"\n                    \" Skipping.\"\n                )\n\n        return base_job_template\n\n    def _build_job_spec(self) -> \"CustomJobSpec\":\n        \"\"\"\n        Builds a job spec by gathering details.\n        \"\"\"\n        # gather worker pool spec\n        env_list = [\n            {\"name\": name, \"value\": value}\n            for name, value in {\n                **self._base_environment(),\n                **self.env,\n            }.items()\n        ]\n        container_spec = ContainerSpec(\n            image_uri=self.image, command=self.command, args=[], env=env_list\n        )\n        machine_spec = MachineSpec(\n            machine_type=self.machine_type,\n            accelerator_type=self.accelerator_type,\n            accelerator_count=self.accelerator_count,\n        )\n        worker_pool_spec = WorkerPoolSpec(\n            container_spec=container_spec,\n            machine_spec=machine_spec,\n            replica_count=1,\n            disk_spec=DiskSpec(\n                boot_disk_type=self.boot_disk_type,\n                boot_disk_size_gb=self.boot_disk_size_gb,\n            ),\n        )\n        # look for service account\n        service_account = (\n            self.service_account or self.gcp_credentials._service_account_email\n        )\n        if service_account is None:\n            raise ValueError(\n                \"A service account is required for the Vertex job. \"\n                \"A service account could not be detected in the attached credentials; \"\n                \"please set a service account explicitly, e.g. \"\n                '`VertexAICustomTrainingJob(service_acount=\"...\")`'\n            )\n\n        # build custom job specs\n        timeout = Duration().FromTimedelta(td=self.maximum_run_time)\n        scheduling = Scheduling(timeout=timeout)\n        job_spec = CustomJobSpec(\n            worker_pool_specs=[worker_pool_spec],\n            service_account=service_account,\n            scheduling=scheduling,\n            network=self.network,\n            reserved_ip_ranges=self.reserved_ip_ranges,\n        )\n        return job_spec\n\n    async def _create_and_begin_job(\n        self, job_spec: \"CustomJobSpec\", job_service_client: \"JobServiceClient\"\n    ) -> \"CustomJob\":\n        \"\"\"\n        Builds a custom job and begins running it.\n        \"\"\"\n        # create custom job\n        custom_job = CustomJob(\n            display_name=self.job_name,\n            job_spec=job_spec,\n            labels=self._get_compatible_labels(),\n        )\n\n        # run job\n        self.logger.info(\n            f\"{self._log_prefix}: Job {self.job_name!r} starting to run \"\n            f\"the command {' '.join(self.command)!r} in region \"\n            f\"{self.region!r} using image {self.image!r}\"\n        )\n\n        project = self.gcp_credentials.project\n        resource_name = f\"projects/{project}/locations/{self.region}\"\n\n        retry_policy = retry(\n            stop=stop_after_attempt(3), wait=wait_fixed(1) + wait_random(0, 3)\n        )\n\n        custom_job_run = await run_sync_in_worker_thread(\n            retry_policy(job_service_client.create_custom_job),\n            parent=resource_name,\n            custom_job=custom_job,\n        )\n\n        self.logger.info(\n            f\"{self._log_prefix}: Job {self.job_name!r} has successfully started; \"\n            f\"the full job name is {custom_job_run.name!r}\"\n        )\n\n        return custom_job_run\n\n    async def _watch_job_run(\n        self,\n        full_job_name: str,  # different from self.job_name\n        job_service_client: \"JobServiceClient\",\n        current_state: \"JobState\",\n        until_states: Tuple[\"JobState\"],\n        timeout: int = None,\n    ) -> \"CustomJob\":\n        \"\"\"\n        Polls job run to see if status changed.\n        \"\"\"\n        state = JobState.JOB_STATE_UNSPECIFIED\n        last_state = current_state\n        t0 = time.time()\n\n        while state not in until_states:\n            job_run = await run_sync_in_worker_thread(\n                job_service_client.get_custom_job,\n                name=full_job_name,\n            )\n            state = job_run.state\n            if state != last_state:\n                state_label = (\n                    state.name.replace(\"_\", \" \")\n                    .lower()\n                    .replace(\"state\", \"state is now:\")\n                )\n                # results in \"New job state is now: succeeded\"\n                self.logger.info(\n                    f\"{self._log_prefix}: {self.job_name} has new {state_label}\"\n                )\n                last_state = state\n            else:\n                # Intermittently, the job will not be described. We want to respect the\n                # watch timeout though.\n                self.logger.debug(f\"{self._log_prefix}: Job not found.\")\n\n            elapsed_time = time.time() - t0\n            if timeout is not None and elapsed_time > timeout:\n                raise RuntimeError(\n                    f\"Timed out after {elapsed_time}s while watching job for states \"\n                    \"{until_states!r}\"\n                )\n            time.sleep(self.job_watch_poll_interval)\n\n        return job_run\n\n    @sync_compatible\n    async def run(\n        self, task_status: Optional[\"TaskStatus\"] = None\n    ) -> VertexAICustomTrainingJobResult:\n        \"\"\"\n        Run the configured task on VertexAI.\n\n        Args:\n            task_status: An optional `TaskStatus` to update when the container starts.\n\n        Returns:\n            The `VertexAICustomTrainingJobResult`.\n        \"\"\"\n        client_options = ClientOptions(\n            api_endpoint=f\"{self.region}-aiplatform.googleapis.com\"\n        )\n\n        job_spec = self._build_job_spec()\n        with self.gcp_credentials.get_job_service_client(\n            client_options=client_options\n        ) as job_service_client:\n            job_run = await self._create_and_begin_job(job_spec, job_service_client)\n\n            if task_status:\n                task_status.started(self.job_name)\n\n            final_job_run = await self._watch_job_run(\n                full_job_name=job_run.name,\n                job_service_client=job_service_client,\n                current_state=job_run.state,\n                until_states=(\n                    JobState.JOB_STATE_SUCCEEDED,\n                    JobState.JOB_STATE_FAILED,\n                    JobState.JOB_STATE_CANCELLED,\n                    JobState.JOB_STATE_EXPIRED,\n                ),\n                timeout=self.maximum_run_time.total_seconds(),\n            )\n\n        error_msg = final_job_run.error.message\n        if error_msg:\n            raise RuntimeError(f\"{self._log_prefix}: {error_msg}\")\n\n        status_code = 0 if final_job_run.state == JobState.JOB_STATE_SUCCEEDED else 1\n\n        return VertexAICustomTrainingJobResult(\n            identifier=final_job_run.display_name, status_code=status_code\n        )\n\n    @sync_compatible\n    async def kill(self, identifier: str, grace_seconds: int = 30) -> None:\n        \"\"\"\n        Kill a job running Cloud Run.\n\n        Args:\n            identifier: The Vertex AI full job name, formatted like\n                \"projects/{project}/locations/{location}/customJobs/{custom_job}\".\n\n        Returns:\n            The `VertexAICustomTrainingJobResult`.\n        \"\"\"\n        client_options = ClientOptions(\n            api_endpoint=f\"{self.region}-aiplatform.googleapis.com\"\n        )\n        with self.gcp_credentials.get_job_service_client(\n            client_options=client_options\n        ) as job_service_client:\n            await run_sync_in_worker_thread(\n                self._kill_job,\n                job_service_client=job_service_client,\n                full_job_name=identifier,\n            )\n            self.logger.info(f\"Requested to cancel {identifier}...\")\n\n    def _kill_job(\n        self, job_service_client: \"JobServiceClient\", full_job_name: str\n    ) -> None:\n        \"\"\"\n        Thin wrapper around Job.delete, wrapping a try/except since\n        Job is an independent class that doesn't have knowledge of\n        CloudRunJob and its associated logic.\n        \"\"\"\n        cancel_custom_job_request = CancelCustomJobRequest(name=full_job_name)\n        try:\n            job_service_client.cancel_custom_job(\n                request=cancel_custom_job_request,\n            )\n        except Exception as exc:\n            if \"does not exist\" in str(exc):\n                raise InfrastructureNotFound(\n                    f\"Cannot stop Vertex AI job; the job name {full_job_name!r} \"\n                    \"could not be found.\"\n                ) from exc\n            raise\n\n    @property\n    def _log_prefix(self) -> str:\n        \"\"\"\n        Internal property for generating a prefix for logs where `name` may be null\n        \"\"\"\n        if self.name is not None:\n            return f\"VertexAICustomTrainingJob {self.name!r}\"\n        else:\n            return \"VertexAICustomTrainingJob\"\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob-attributes","title":"Attributes","text":""},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob.job_name","title":"job_name property","text":"

The name can be up to 128 characters long and can be consist of any UTF-8 characters. Reference: https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomJob#google_cloud_aiplatform_CustomJob_display_name

"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob-functions","title":"Functions","text":""},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob.generate_work_pool_base_job_template","title":"generate_work_pool_base_job_template async","text":"

Generate a base job template for a Vertex AI work pool with the same configuration as this block. Returns: - dict: a base job template for a Vertex AI work pool

Source code in prefect_gcp/aiplatform.py
async def generate_work_pool_base_job_template(self) -> dict:\n    \"\"\"\n    Generate a base job template for a `Vertex AI` work pool with the same\n    configuration as this block.\n    Returns:\n        - dict: a base job template for a `Vertex AI` work pool\n    \"\"\"\n    base_job_template = await get_default_base_job_template_for_infrastructure_type(\n        self.get_corresponding_worker_type(),\n    )\n    assert (\n        base_job_template is not None\n    ), \"Failed to generate default base job template for Cloud Run worker.\"\n    for key, value in self.dict(exclude_unset=True, exclude_defaults=True).items():\n        if key == \"command\":\n            base_job_template[\"variables\"][\"properties\"][\"command\"][\n                \"default\"\n            ] = shlex.join(value)\n        elif key in [\n            \"type\",\n            \"block_type_slug\",\n            \"_block_document_id\",\n            \"_block_document_name\",\n            \"_is_anonymous\",\n        ]:\n            continue\n        elif key == \"gcp_credentials\":\n            if not self.gcp_credentials._block_document_id:\n                raise BlockNotSavedError(\n                    \"It looks like you are trying to use a block that\"\n                    \" has not been saved. Please call `.save` on your block\"\n                    \" before publishing it as a work pool.\"\n                )\n            base_job_template[\"variables\"][\"properties\"][\"credentials\"][\n                \"default\"\n            ] = {\n                \"$ref\": {\n                    \"block_document_id\": str(\n                        self.gcp_credentials._block_document_id\n                    )\n                }\n            }\n        elif key == \"maximum_run_time\":\n            base_job_template[\"variables\"][\"properties\"][\"maximum_run_time_hours\"][\n                \"default\"\n            ] = round(value.total_seconds() / 3600)\n        elif key == \"service_account\":\n            base_job_template[\"variables\"][\"properties\"][\"service_account_name\"][\n                \"default\"\n            ] = value\n        elif key in base_job_template[\"variables\"][\"properties\"]:\n            base_job_template[\"variables\"][\"properties\"][key][\"default\"] = value\n        else:\n            self.logger.warning(\n                f\"Variable {key!r} is not supported by `Vertex AI` work pools.\"\n                \" Skipping.\"\n            )\n\n    return base_job_template\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob.get_corresponding_worker_type","title":"get_corresponding_worker_type","text":"

Return the corresponding worker type for this infrastructure block.

Source code in prefect_gcp/aiplatform.py
def get_corresponding_worker_type(self) -> str:\n    \"\"\"Return the corresponding worker type for this infrastructure block.\"\"\"\n    return \"vertex-ai\"\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob.kill","title":"kill async","text":"

Kill a job running Cloud Run.

Parameters:

Name Type Description Default identifier str

The Vertex AI full job name, formatted like \"projects/{project}/locations/{location}/customJobs/{custom_job}\".

required

Returns:

Type Description None

The VertexAICustomTrainingJobResult.

Source code in prefect_gcp/aiplatform.py
@sync_compatible\nasync def kill(self, identifier: str, grace_seconds: int = 30) -> None:\n    \"\"\"\n    Kill a job running Cloud Run.\n\n    Args:\n        identifier: The Vertex AI full job name, formatted like\n            \"projects/{project}/locations/{location}/customJobs/{custom_job}\".\n\n    Returns:\n        The `VertexAICustomTrainingJobResult`.\n    \"\"\"\n    client_options = ClientOptions(\n        api_endpoint=f\"{self.region}-aiplatform.googleapis.com\"\n    )\n    with self.gcp_credentials.get_job_service_client(\n        client_options=client_options\n    ) as job_service_client:\n        await run_sync_in_worker_thread(\n            self._kill_job,\n            job_service_client=job_service_client,\n            full_job_name=identifier,\n        )\n        self.logger.info(f\"Requested to cancel {identifier}...\")\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob.preview","title":"preview","text":"

Generate a preview of the job definition that will be sent to GCP.

Source code in prefect_gcp/aiplatform.py
def preview(self) -> str:\n    \"\"\"Generate a preview of the job definition that will be sent to GCP.\"\"\"\n    job_spec = self._build_job_spec()\n    custom_job = CustomJob(\n        display_name=self.job_name,\n        job_spec=job_spec,\n        labels=self._get_compatible_labels(),\n    )\n    return str(custom_job)  # outputs a json string\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJob.run","title":"run async","text":"

Run the configured task on VertexAI.

Parameters:

Name Type Description Default task_status Optional[TaskStatus]

An optional TaskStatus to update when the container starts.

None

Returns:

Type Description VertexAICustomTrainingJobResult

The VertexAICustomTrainingJobResult.

Source code in prefect_gcp/aiplatform.py
@sync_compatible\nasync def run(\n    self, task_status: Optional[\"TaskStatus\"] = None\n) -> VertexAICustomTrainingJobResult:\n    \"\"\"\n    Run the configured task on VertexAI.\n\n    Args:\n        task_status: An optional `TaskStatus` to update when the container starts.\n\n    Returns:\n        The `VertexAICustomTrainingJobResult`.\n    \"\"\"\n    client_options = ClientOptions(\n        api_endpoint=f\"{self.region}-aiplatform.googleapis.com\"\n    )\n\n    job_spec = self._build_job_spec()\n    with self.gcp_credentials.get_job_service_client(\n        client_options=client_options\n    ) as job_service_client:\n        job_run = await self._create_and_begin_job(job_spec, job_service_client)\n\n        if task_status:\n            task_status.started(self.job_name)\n\n        final_job_run = await self._watch_job_run(\n            full_job_name=job_run.name,\n            job_service_client=job_service_client,\n            current_state=job_run.state,\n            until_states=(\n                JobState.JOB_STATE_SUCCEEDED,\n                JobState.JOB_STATE_FAILED,\n                JobState.JOB_STATE_CANCELLED,\n                JobState.JOB_STATE_EXPIRED,\n            ),\n            timeout=self.maximum_run_time.total_seconds(),\n        )\n\n    error_msg = final_job_run.error.message\n    if error_msg:\n        raise RuntimeError(f\"{self._log_prefix}: {error_msg}\")\n\n    status_code = 0 if final_job_run.state == JobState.JOB_STATE_SUCCEEDED else 1\n\n    return VertexAICustomTrainingJobResult(\n        identifier=final_job_run.display_name, status_code=status_code\n    )\n
"},{"location":"aiplatform/#prefect_gcp.aiplatform.VertexAICustomTrainingJobResult","title":"VertexAICustomTrainingJobResult","text":"

Bases: InfrastructureResult

Result from a Vertex AI custom training job.

Source code in prefect_gcp/aiplatform.py
class VertexAICustomTrainingJobResult(InfrastructureResult):\n    \"\"\"Result from a Vertex AI custom training job.\"\"\"\n
"},{"location":"bigquery/","title":"BigQuery","text":""},{"location":"bigquery/#prefect_gcp.bigquery","title":"prefect_gcp.bigquery","text":"

Tasks for interacting with GCP BigQuery

"},{"location":"bigquery/#prefect_gcp.bigquery-classes","title":"Classes","text":""},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse","title":"BigQueryWarehouse","text":"

Bases: DatabaseBlock

A block for querying a database with BigQuery.

Upon instantiating, a connection to BigQuery is established and maintained for the life of the object until the close method is called.

It is recommended to use this block as a context manager, which will automatically close the connection and its cursors when the context is exited.

It is also recommended that this block is loaded and consumed within a single task or flow because if the block is passed across separate tasks and flows, the state of the block's connection and cursor could be lost.

Attributes:

Name Type Description gcp_credentials GcpCredentials

The credentials to use to authenticate.

fetch_size int

The number of rows to fetch at a time when calling fetch_many. Note, this parameter is executed on the client side and is not passed to the database. To limit on the server side, add the LIMIT clause, or the dialect's equivalent clause, like TOP, to the query.

Source code in prefect_gcp/bigquery.py
class BigQueryWarehouse(DatabaseBlock):\n    \"\"\"\n    A block for querying a database with BigQuery.\n\n    Upon instantiating, a connection to BigQuery is established\n    and maintained for the life of the object until the close method is called.\n\n    It is recommended to use this block as a context manager, which will automatically\n    close the connection and its cursors when the context is exited.\n\n    It is also recommended that this block is loaded and consumed within a single task\n    or flow because if the block is passed across separate tasks and flows,\n    the state of the block's connection and cursor could be lost.\n\n    Attributes:\n        gcp_credentials: The credentials to use to authenticate.\n        fetch_size: The number of rows to fetch at a time when calling fetch_many.\n            Note, this parameter is executed on the client side and is not\n            passed to the database. To limit on the server side, add the `LIMIT`\n            clause, or the dialect's equivalent clause, like `TOP`, to the query.\n    \"\"\"  # noqa\n\n    _block_type_name = \"BigQuery Warehouse\"\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/bigquery/#prefect_gcp.bigquery.BigQueryWarehouse\"  # noqa: E501\n\n    gcp_credentials: GcpCredentials\n    fetch_size: int = Field(\n        default=1, description=\"The number of rows to fetch at a time.\"\n    )\n\n    _connection: Optional[\"Connection\"] = None\n    _unique_cursors: Dict[str, \"Cursor\"] = None\n\n    def _start_connection(self):\n        \"\"\"\n        Starts a connection.\n        \"\"\"\n        with self.gcp_credentials.get_bigquery_client() as client:\n            self._connection = Connection(client=client)\n\n    def block_initialization(self) -> None:\n        super().block_initialization()\n        if self._connection is None:\n            self._start_connection()\n\n        if self._unique_cursors is None:\n            self._unique_cursors = {}\n\n    def get_connection(self) -> \"Connection\":\n        \"\"\"\n        Get the opened connection to BigQuery.\n        \"\"\"\n        return self._connection\n\n    def _get_cursor(self, inputs: Dict[str, Any]) -> Tuple[bool, \"Cursor\"]:\n        \"\"\"\n        Get a BigQuery cursor.\n\n        Args:\n            inputs: The inputs to generate a unique hash, used to decide\n                whether a new cursor should be used.\n\n        Returns:\n            Whether a cursor is new and a BigQuery cursor.\n        \"\"\"\n        input_hash = hash_objects(inputs)\n        assert input_hash is not None, (\n            \"We were not able to hash your inputs, \"\n            \"which resulted in an unexpected data return; \"\n            \"please open an issue with a reproducible example.\"\n        )\n        if input_hash not in self._unique_cursors.keys():\n            new_cursor = self._connection.cursor()\n            self._unique_cursors[input_hash] = new_cursor\n            return True, new_cursor\n        else:\n            existing_cursor = self._unique_cursors[input_hash]\n            return False, existing_cursor\n\n    def reset_cursors(self) -> None:\n        \"\"\"\n        Tries to close all opened cursors.\n        \"\"\"\n        input_hashes = tuple(self._unique_cursors.keys())\n        for input_hash in input_hashes:\n            cursor = self._unique_cursors.pop(input_hash)\n            try:\n                cursor.close()\n            except Exception as exc:\n                self.logger.warning(\n                    f\"Failed to close cursor for input hash {input_hash!r}: {exc}\"\n                )\n\n    @sync_compatible\n    async def fetch_one(\n        self,\n        operation: str,\n        parameters: Optional[Dict[str, Any]] = None,\n        **execution_options: Dict[str, Any],\n    ) -> \"Row\":\n        \"\"\"\n        Fetch a single result from the database.\n\n        Repeated calls using the same inputs to *any* of the fetch methods of this\n        block will skip executing the operation again, and instead,\n        return the next set of results from the previous execution,\n        until the reset_cursors method is called.\n\n        Args:\n            operation: The SQL query or other operation to be executed.\n            parameters: The parameters for the operation.\n            **execution_options: Additional options to pass to `connection.execute`.\n\n        Returns:\n            A tuple containing the data returned by the database,\n                where each row is a tuple and each column is a value in the tuple.\n\n        Examples:\n            Execute operation with parameters, fetching one new row at a time:\n            ```python\n            from prefect_gcp.bigquery import BigQueryWarehouse\n\n            with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n                operation = '''\n                    SELECT word, word_count\n                    FROM `bigquery-public-data.samples.shakespeare`\n                    WHERE corpus = %(corpus)s\n                    AND word_count >= %(min_word_count)s\n                    ORDER BY word_count DESC\n                    LIMIT 3;\n                '''\n                parameters = {\n                    \"corpus\": \"romeoandjuliet\",\n                    \"min_word_count\": 250,\n                }\n                for _ in range(0, 3):\n                    result = warehouse.fetch_one(operation, parameters=parameters)\n                    print(result)\n            ```\n        \"\"\"\n        inputs = dict(\n            operation=operation,\n            parameters=parameters,\n            **execution_options,\n        )\n        new, cursor = self._get_cursor(inputs)\n        if new:\n            await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n        result = await run_sync_in_worker_thread(cursor.fetchone)\n        return result\n\n    @sync_compatible\n    async def fetch_many(\n        self,\n        operation: str,\n        parameters: Optional[Dict[str, Any]] = None,\n        size: Optional[int] = None,\n        **execution_options: Dict[str, Any],\n    ) -> List[\"Row\"]:\n        \"\"\"\n        Fetch a limited number of results from the database.\n\n        Repeated calls using the same inputs to *any* of the fetch methods of this\n        block will skip executing the operation again, and instead,\n        return the next set of results from the previous execution,\n        until the reset_cursors method is called.\n\n        Args:\n            operation: The SQL query or other operation to be executed.\n            parameters: The parameters for the operation.\n            size: The number of results to return; if None or 0, uses the value of\n                `fetch_size` configured on the block.\n            **execution_options: Additional options to pass to `connection.execute`.\n\n        Returns:\n            A list of tuples containing the data returned by the database,\n                where each row is a tuple and each column is a value in the tuple.\n\n        Examples:\n            Execute operation with parameters, fetching two new rows at a time:\n            ```python\n            from prefect_gcp.bigquery import BigQueryWarehouse\n\n            with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n                operation = '''\n                    SELECT word, word_count\n                    FROM `bigquery-public-data.samples.shakespeare`\n                    WHERE corpus = %(corpus)s\n                    AND word_count >= %(min_word_count)s\n                    ORDER BY word_count DESC\n                    LIMIT 6;\n                '''\n                parameters = {\n                    \"corpus\": \"romeoandjuliet\",\n                    \"min_word_count\": 250,\n                }\n                for _ in range(0, 3):\n                    result = warehouse.fetch_many(\n                        operation,\n                        parameters=parameters,\n                        size=2\n                    )\n                    print(result)\n            ```\n        \"\"\"\n        inputs = dict(\n            operation=operation,\n            parameters=parameters,\n            **execution_options,\n        )\n        new, cursor = self._get_cursor(inputs)\n        if new:\n            await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n        size = size or self.fetch_size\n        result = await run_sync_in_worker_thread(cursor.fetchmany, size=size)\n        return result\n\n    @sync_compatible\n    async def fetch_all(\n        self,\n        operation: str,\n        parameters: Optional[Dict[str, Any]] = None,\n        **execution_options: Dict[str, Any],\n    ) -> List[\"Row\"]:\n        \"\"\"\n        Fetch all results from the database.\n\n        Repeated calls using the same inputs to *any* of the fetch methods of this\n        block will skip executing the operation again, and instead,\n        return the next set of results from the previous execution,\n        until the reset_cursors method is called.\n\n        Args:\n            operation: The SQL query or other operation to be executed.\n            parameters: The parameters for the operation.\n            **execution_options: Additional options to pass to `connection.execute`.\n\n        Returns:\n            A list of tuples containing the data returned by the database,\n                where each row is a tuple and each column is a value in the tuple.\n\n        Examples:\n            Execute operation with parameters, fetching all rows:\n            ```python\n            from prefect_gcp.bigquery import BigQueryWarehouse\n\n            with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n                operation = '''\n                    SELECT word, word_count\n                    FROM `bigquery-public-data.samples.shakespeare`\n                    WHERE corpus = %(corpus)s\n                    AND word_count >= %(min_word_count)s\n                    ORDER BY word_count DESC\n                    LIMIT 3;\n                '''\n                parameters = {\n                    \"corpus\": \"romeoandjuliet\",\n                    \"min_word_count\": 250,\n                }\n                result = warehouse.fetch_all(operation, parameters=parameters)\n            ```\n        \"\"\"\n        inputs = dict(\n            operation=operation,\n            parameters=parameters,\n            **execution_options,\n        )\n        new, cursor = self._get_cursor(inputs)\n        if new:\n            await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n        result = await run_sync_in_worker_thread(cursor.fetchall)\n        return result\n\n    @sync_compatible\n    async def execute(\n        self,\n        operation: str,\n        parameters: Optional[Dict[str, Any]] = None,\n        **execution_options: Dict[str, Any],\n    ) -> None:\n        \"\"\"\n        Executes an operation on the database. This method is intended to be used\n        for operations that do not return data, such as INSERT, UPDATE, or DELETE.\n\n        Unlike the fetch methods, this method will always execute the operation\n        upon calling.\n\n        Args:\n            operation: The SQL query or other operation to be executed.\n            parameters: The parameters for the operation.\n            **execution_options: Additional options to pass to `connection.execute`.\n\n        Examples:\n            Execute operation with parameters:\n            ```python\n            from prefect_gcp.bigquery import BigQueryWarehouse\n\n            with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n                operation = '''\n                    CREATE TABLE mydataset.trips AS (\n                    SELECT\n                        bikeid,\n                        start_time,\n                        duration_minutes\n                    FROM\n                        bigquery-public-data.austin_bikeshare.bikeshare_trips\n                    LIMIT %(limit)s\n                    );\n                '''\n                warehouse.execute(operation, parameters={\"limit\": 5})\n            ```\n        \"\"\"\n        inputs = dict(\n            operation=operation,\n            parameters=parameters,\n            **execution_options,\n        )\n        cursor = self._get_cursor(inputs)[1]\n        await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n    @sync_compatible\n    async def execute_many(\n        self,\n        operation: str,\n        seq_of_parameters: List[Dict[str, Any]],\n    ) -> None:\n        \"\"\"\n        Executes many operations on the database. This method is intended to be used\n        for operations that do not return data, such as INSERT, UPDATE, or DELETE.\n\n        Unlike the fetch methods, this method will always execute the operations\n        upon calling.\n\n        Args:\n            operation: The SQL query or other operation to be executed.\n            seq_of_parameters: The sequence of parameters for the operation.\n\n        Examples:\n            Create mytable in mydataset and insert two rows into it:\n            ```python\n            from prefect_gcp.bigquery import BigQueryWarehouse\n\n            with BigQueryWarehouse.load(\"bigquery\") as warehouse:\n                create_operation = '''\n                CREATE TABLE IF NOT EXISTS mydataset.mytable (\n                    col1 STRING,\n                    col2 INTEGER,\n                    col3 BOOLEAN\n                )\n                '''\n                warehouse.execute(create_operation)\n                insert_operation = '''\n                INSERT INTO mydataset.mytable (col1, col2, col3) VALUES (%s, %s, %s)\n                '''\n                seq_of_parameters = [\n                    (\"a\", 1, True),\n                    (\"b\", 2, False),\n                ]\n                warehouse.execute_many(\n                    insert_operation,\n                    seq_of_parameters=seq_of_parameters\n                )\n            ```\n        \"\"\"\n        inputs = dict(\n            operation=operation,\n            seq_of_parameters=seq_of_parameters,\n        )\n        cursor = self._get_cursor(inputs)[1]\n        await run_sync_in_worker_thread(cursor.executemany, **inputs)\n\n    def close(self):\n        \"\"\"\n        Closes connection and its cursors.\n        \"\"\"\n        try:\n            self.reset_cursors()\n        finally:\n            if self._connection is not None:\n                self._connection.close()\n                self._connection = None\n\n    def __enter__(self):\n        \"\"\"\n        Start a connection upon entry.\n        \"\"\"\n        return self\n\n    def __exit__(self, *args):\n        \"\"\"\n        Closes connection and its cursors upon exit.\n        \"\"\"\n        self.close()\n\n    def __getstate__(self):\n        \"\"\" \"\"\"\n        data = self.__dict__.copy()\n        data.update({k: None for k in {\"_connection\", \"_unique_cursors\"}})\n        return data\n\n    def __setstate__(self, data: dict):\n        \"\"\" \"\"\"\n        self.__dict__.update(data)\n        self._unique_cursors = {}\n        self._start_connection()\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse-functions","title":"Functions","text":""},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.close","title":"close","text":"

Closes connection and its cursors.

Source code in prefect_gcp/bigquery.py
def close(self):\n    \"\"\"\n    Closes connection and its cursors.\n    \"\"\"\n    try:\n        self.reset_cursors()\n    finally:\n        if self._connection is not None:\n            self._connection.close()\n            self._connection = None\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.execute","title":"execute async","text":"

Executes an operation on the database. This method is intended to be used for operations that do not return data, such as INSERT, UPDATE, or DELETE.

Unlike the fetch methods, this method will always execute the operation upon calling.

Parameters:

Name Type Description Default operation str

The SQL query or other operation to be executed.

required parameters Optional[Dict[str, Any]]

The parameters for the operation.

None **execution_options Dict[str, Any]

Additional options to pass to connection.execute.

{}

Examples:

Execute operation with parameters:

from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        CREATE TABLE mydataset.trips AS (\n        SELECT\n            bikeid,\n            start_time,\n            duration_minutes\n        FROM\n            bigquery-public-data.austin_bikeshare.bikeshare_trips\n        LIMIT %(limit)s\n        );\n    '''\n    warehouse.execute(operation, parameters={\"limit\": 5})\n

Source code in prefect_gcp/bigquery.py
@sync_compatible\nasync def execute(\n    self,\n    operation: str,\n    parameters: Optional[Dict[str, Any]] = None,\n    **execution_options: Dict[str, Any],\n) -> None:\n    \"\"\"\n    Executes an operation on the database. This method is intended to be used\n    for operations that do not return data, such as INSERT, UPDATE, or DELETE.\n\n    Unlike the fetch methods, this method will always execute the operation\n    upon calling.\n\n    Args:\n        operation: The SQL query or other operation to be executed.\n        parameters: The parameters for the operation.\n        **execution_options: Additional options to pass to `connection.execute`.\n\n    Examples:\n        Execute operation with parameters:\n        ```python\n        from prefect_gcp.bigquery import BigQueryWarehouse\n\n        with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n            operation = '''\n                CREATE TABLE mydataset.trips AS (\n                SELECT\n                    bikeid,\n                    start_time,\n                    duration_minutes\n                FROM\n                    bigquery-public-data.austin_bikeshare.bikeshare_trips\n                LIMIT %(limit)s\n                );\n            '''\n            warehouse.execute(operation, parameters={\"limit\": 5})\n        ```\n    \"\"\"\n    inputs = dict(\n        operation=operation,\n        parameters=parameters,\n        **execution_options,\n    )\n    cursor = self._get_cursor(inputs)[1]\n    await run_sync_in_worker_thread(cursor.execute, **inputs)\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.execute_many","title":"execute_many async","text":"

Executes many operations on the database. This method is intended to be used for operations that do not return data, such as INSERT, UPDATE, or DELETE.

Unlike the fetch methods, this method will always execute the operations upon calling.

Parameters:

Name Type Description Default operation str

The SQL query or other operation to be executed.

required seq_of_parameters List[Dict[str, Any]]

The sequence of parameters for the operation.

required

Examples:

Create mytable in mydataset and insert two rows into it:

from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"bigquery\") as warehouse:\n    create_operation = '''\n    CREATE TABLE IF NOT EXISTS mydataset.mytable (\n        col1 STRING,\n        col2 INTEGER,\n        col3 BOOLEAN\n    )\n    '''\n    warehouse.execute(create_operation)\n    insert_operation = '''\n    INSERT INTO mydataset.mytable (col1, col2, col3) VALUES (%s, %s, %s)\n    '''\n    seq_of_parameters = [\n        (\"a\", 1, True),\n        (\"b\", 2, False),\n    ]\n    warehouse.execute_many(\n        insert_operation,\n        seq_of_parameters=seq_of_parameters\n    )\n

Source code in prefect_gcp/bigquery.py
@sync_compatible\nasync def execute_many(\n    self,\n    operation: str,\n    seq_of_parameters: List[Dict[str, Any]],\n) -> None:\n    \"\"\"\n    Executes many operations on the database. This method is intended to be used\n    for operations that do not return data, such as INSERT, UPDATE, or DELETE.\n\n    Unlike the fetch methods, this method will always execute the operations\n    upon calling.\n\n    Args:\n        operation: The SQL query or other operation to be executed.\n        seq_of_parameters: The sequence of parameters for the operation.\n\n    Examples:\n        Create mytable in mydataset and insert two rows into it:\n        ```python\n        from prefect_gcp.bigquery import BigQueryWarehouse\n\n        with BigQueryWarehouse.load(\"bigquery\") as warehouse:\n            create_operation = '''\n            CREATE TABLE IF NOT EXISTS mydataset.mytable (\n                col1 STRING,\n                col2 INTEGER,\n                col3 BOOLEAN\n            )\n            '''\n            warehouse.execute(create_operation)\n            insert_operation = '''\n            INSERT INTO mydataset.mytable (col1, col2, col3) VALUES (%s, %s, %s)\n            '''\n            seq_of_parameters = [\n                (\"a\", 1, True),\n                (\"b\", 2, False),\n            ]\n            warehouse.execute_many(\n                insert_operation,\n                seq_of_parameters=seq_of_parameters\n            )\n        ```\n    \"\"\"\n    inputs = dict(\n        operation=operation,\n        seq_of_parameters=seq_of_parameters,\n    )\n    cursor = self._get_cursor(inputs)[1]\n    await run_sync_in_worker_thread(cursor.executemany, **inputs)\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.fetch_all","title":"fetch_all async","text":"

Fetch all results from the database.

Repeated calls using the same inputs to any of the fetch methods of this block will skip executing the operation again, and instead, return the next set of results from the previous execution, until the reset_cursors method is called.

Parameters:

Name Type Description Default operation str

The SQL query or other operation to be executed.

required parameters Optional[Dict[str, Any]]

The parameters for the operation.

None **execution_options Dict[str, Any]

Additional options to pass to connection.execute.

{}

Returns:

Type Description List[Row]

A list of tuples containing the data returned by the database, where each row is a tuple and each column is a value in the tuple.

Examples:

Execute operation with parameters, fetching all rows:

from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = %(corpus)s\n        AND word_count >= %(min_word_count)s\n        ORDER BY word_count DESC\n        LIMIT 3;\n    '''\n    parameters = {\n        \"corpus\": \"romeoandjuliet\",\n        \"min_word_count\": 250,\n    }\n    result = warehouse.fetch_all(operation, parameters=parameters)\n

Source code in prefect_gcp/bigquery.py
@sync_compatible\nasync def fetch_all(\n    self,\n    operation: str,\n    parameters: Optional[Dict[str, Any]] = None,\n    **execution_options: Dict[str, Any],\n) -> List[\"Row\"]:\n    \"\"\"\n    Fetch all results from the database.\n\n    Repeated calls using the same inputs to *any* of the fetch methods of this\n    block will skip executing the operation again, and instead,\n    return the next set of results from the previous execution,\n    until the reset_cursors method is called.\n\n    Args:\n        operation: The SQL query or other operation to be executed.\n        parameters: The parameters for the operation.\n        **execution_options: Additional options to pass to `connection.execute`.\n\n    Returns:\n        A list of tuples containing the data returned by the database,\n            where each row is a tuple and each column is a value in the tuple.\n\n    Examples:\n        Execute operation with parameters, fetching all rows:\n        ```python\n        from prefect_gcp.bigquery import BigQueryWarehouse\n\n        with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n            operation = '''\n                SELECT word, word_count\n                FROM `bigquery-public-data.samples.shakespeare`\n                WHERE corpus = %(corpus)s\n                AND word_count >= %(min_word_count)s\n                ORDER BY word_count DESC\n                LIMIT 3;\n            '''\n            parameters = {\n                \"corpus\": \"romeoandjuliet\",\n                \"min_word_count\": 250,\n            }\n            result = warehouse.fetch_all(operation, parameters=parameters)\n        ```\n    \"\"\"\n    inputs = dict(\n        operation=operation,\n        parameters=parameters,\n        **execution_options,\n    )\n    new, cursor = self._get_cursor(inputs)\n    if new:\n        await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n    result = await run_sync_in_worker_thread(cursor.fetchall)\n    return result\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.fetch_many","title":"fetch_many async","text":"

Fetch a limited number of results from the database.

Repeated calls using the same inputs to any of the fetch methods of this block will skip executing the operation again, and instead, return the next set of results from the previous execution, until the reset_cursors method is called.

Parameters:

Name Type Description Default operation str

The SQL query or other operation to be executed.

required parameters Optional[Dict[str, Any]]

The parameters for the operation.

None size Optional[int]

The number of results to return; if None or 0, uses the value of fetch_size configured on the block.

None **execution_options Dict[str, Any]

Additional options to pass to connection.execute.

{}

Returns:

Type Description List[Row]

A list of tuples containing the data returned by the database, where each row is a tuple and each column is a value in the tuple.

Examples:

Execute operation with parameters, fetching two new rows at a time:

from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = %(corpus)s\n        AND word_count >= %(min_word_count)s\n        ORDER BY word_count DESC\n        LIMIT 6;\n    '''\n    parameters = {\n        \"corpus\": \"romeoandjuliet\",\n        \"min_word_count\": 250,\n    }\n    for _ in range(0, 3):\n        result = warehouse.fetch_many(\n            operation,\n            parameters=parameters,\n            size=2\n        )\n        print(result)\n

Source code in prefect_gcp/bigquery.py
@sync_compatible\nasync def fetch_many(\n    self,\n    operation: str,\n    parameters: Optional[Dict[str, Any]] = None,\n    size: Optional[int] = None,\n    **execution_options: Dict[str, Any],\n) -> List[\"Row\"]:\n    \"\"\"\n    Fetch a limited number of results from the database.\n\n    Repeated calls using the same inputs to *any* of the fetch methods of this\n    block will skip executing the operation again, and instead,\n    return the next set of results from the previous execution,\n    until the reset_cursors method is called.\n\n    Args:\n        operation: The SQL query or other operation to be executed.\n        parameters: The parameters for the operation.\n        size: The number of results to return; if None or 0, uses the value of\n            `fetch_size` configured on the block.\n        **execution_options: Additional options to pass to `connection.execute`.\n\n    Returns:\n        A list of tuples containing the data returned by the database,\n            where each row is a tuple and each column is a value in the tuple.\n\n    Examples:\n        Execute operation with parameters, fetching two new rows at a time:\n        ```python\n        from prefect_gcp.bigquery import BigQueryWarehouse\n\n        with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n            operation = '''\n                SELECT word, word_count\n                FROM `bigquery-public-data.samples.shakespeare`\n                WHERE corpus = %(corpus)s\n                AND word_count >= %(min_word_count)s\n                ORDER BY word_count DESC\n                LIMIT 6;\n            '''\n            parameters = {\n                \"corpus\": \"romeoandjuliet\",\n                \"min_word_count\": 250,\n            }\n            for _ in range(0, 3):\n                result = warehouse.fetch_many(\n                    operation,\n                    parameters=parameters,\n                    size=2\n                )\n                print(result)\n        ```\n    \"\"\"\n    inputs = dict(\n        operation=operation,\n        parameters=parameters,\n        **execution_options,\n    )\n    new, cursor = self._get_cursor(inputs)\n    if new:\n        await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n    size = size or self.fetch_size\n    result = await run_sync_in_worker_thread(cursor.fetchmany, size=size)\n    return result\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.fetch_one","title":"fetch_one async","text":"

Fetch a single result from the database.

Repeated calls using the same inputs to any of the fetch methods of this block will skip executing the operation again, and instead, return the next set of results from the previous execution, until the reset_cursors method is called.

Parameters:

Name Type Description Default operation str

The SQL query or other operation to be executed.

required parameters Optional[Dict[str, Any]]

The parameters for the operation.

None **execution_options Dict[str, Any]

Additional options to pass to connection.execute.

{}

Returns:

Type Description Row

A tuple containing the data returned by the database, where each row is a tuple and each column is a value in the tuple.

Examples:

Execute operation with parameters, fetching one new row at a time:

from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = %(corpus)s\n        AND word_count >= %(min_word_count)s\n        ORDER BY word_count DESC\n        LIMIT 3;\n    '''\n    parameters = {\n        \"corpus\": \"romeoandjuliet\",\n        \"min_word_count\": 250,\n    }\n    for _ in range(0, 3):\n        result = warehouse.fetch_one(operation, parameters=parameters)\n        print(result)\n

Source code in prefect_gcp/bigquery.py
@sync_compatible\nasync def fetch_one(\n    self,\n    operation: str,\n    parameters: Optional[Dict[str, Any]] = None,\n    **execution_options: Dict[str, Any],\n) -> \"Row\":\n    \"\"\"\n    Fetch a single result from the database.\n\n    Repeated calls using the same inputs to *any* of the fetch methods of this\n    block will skip executing the operation again, and instead,\n    return the next set of results from the previous execution,\n    until the reset_cursors method is called.\n\n    Args:\n        operation: The SQL query or other operation to be executed.\n        parameters: The parameters for the operation.\n        **execution_options: Additional options to pass to `connection.execute`.\n\n    Returns:\n        A tuple containing the data returned by the database,\n            where each row is a tuple and each column is a value in the tuple.\n\n    Examples:\n        Execute operation with parameters, fetching one new row at a time:\n        ```python\n        from prefect_gcp.bigquery import BigQueryWarehouse\n\n        with BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n            operation = '''\n                SELECT word, word_count\n                FROM `bigquery-public-data.samples.shakespeare`\n                WHERE corpus = %(corpus)s\n                AND word_count >= %(min_word_count)s\n                ORDER BY word_count DESC\n                LIMIT 3;\n            '''\n            parameters = {\n                \"corpus\": \"romeoandjuliet\",\n                \"min_word_count\": 250,\n            }\n            for _ in range(0, 3):\n                result = warehouse.fetch_one(operation, parameters=parameters)\n                print(result)\n        ```\n    \"\"\"\n    inputs = dict(\n        operation=operation,\n        parameters=parameters,\n        **execution_options,\n    )\n    new, cursor = self._get_cursor(inputs)\n    if new:\n        await run_sync_in_worker_thread(cursor.execute, **inputs)\n\n    result = await run_sync_in_worker_thread(cursor.fetchone)\n    return result\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.get_connection","title":"get_connection","text":"

Get the opened connection to BigQuery.

Source code in prefect_gcp/bigquery.py
def get_connection(self) -> \"Connection\":\n    \"\"\"\n    Get the opened connection to BigQuery.\n    \"\"\"\n    return self._connection\n
"},{"location":"bigquery/#prefect_gcp.bigquery.BigQueryWarehouse.reset_cursors","title":"reset_cursors","text":"

Tries to close all opened cursors.

Source code in prefect_gcp/bigquery.py
def reset_cursors(self) -> None:\n    \"\"\"\n    Tries to close all opened cursors.\n    \"\"\"\n    input_hashes = tuple(self._unique_cursors.keys())\n    for input_hash in input_hashes:\n        cursor = self._unique_cursors.pop(input_hash)\n        try:\n            cursor.close()\n        except Exception as exc:\n            self.logger.warning(\n                f\"Failed to close cursor for input hash {input_hash!r}: {exc}\"\n            )\n
"},{"location":"bigquery/#prefect_gcp.bigquery-functions","title":"Functions","text":""},{"location":"bigquery/#prefect_gcp.bigquery.bigquery_create_table","title":"bigquery_create_table async","text":"

Creates table in BigQuery. Args: dataset: Name of a dataset in that the table will be created. table: Name of a table to create. schema: Schema to use when creating the table. gcp_credentials: Credentials to use for authentication with GCP. clustering_fields: List of fields to cluster the table by. time_partitioning: bigquery.TimePartitioning object specifying a partitioning of the newly created table project: Project to initialize the BigQuery Client with; if not provided, will default to the one inferred from your credentials. location: The location of the dataset that will be written to. external_config: The external data source. # noqa Returns: Table name. Example:

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_create_table\nfrom google.cloud.bigquery import SchemaField\n@flow\ndef example_bigquery_create_table_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    schema = [\n        SchemaField(\"number\", field_type=\"INTEGER\", mode=\"REQUIRED\"),\n        SchemaField(\"text\", field_type=\"STRING\", mode=\"REQUIRED\"),\n        SchemaField(\"bool\", field_type=\"BOOLEAN\")\n    ]\n    result = bigquery_create_table(\n        dataset=\"dataset\",\n        table=\"test_table\",\n        schema=schema,\n        gcp_credentials=gcp_credentials\n    )\n    return result\nexample_bigquery_create_table_flow()\n

Source code in prefect_gcp/bigquery.py
@task\nasync def bigquery_create_table(\n    dataset: str,\n    table: str,\n    gcp_credentials: GcpCredentials,\n    schema: Optional[List[\"SchemaField\"]] = None,\n    clustering_fields: List[str] = None,\n    time_partitioning: \"TimePartitioning\" = None,\n    project: Optional[str] = None,\n    location: str = \"US\",\n    external_config: Optional[\"ExternalConfig\"] = None,\n) -> str:\n    \"\"\"\n    Creates table in BigQuery.\n    Args:\n        dataset: Name of a dataset in that the table will be created.\n        table: Name of a table to create.\n        schema: Schema to use when creating the table.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        clustering_fields: List of fields to cluster the table by.\n        time_partitioning: `bigquery.TimePartitioning` object specifying a partitioning\n            of the newly created table\n        project: Project to initialize the BigQuery Client with; if\n            not provided, will default to the one inferred from your credentials.\n        location: The location of the dataset that will be written to.\n        external_config: The [external data source](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/bigquery_table#nested_external_data_configuration).  # noqa\n    Returns:\n        Table name.\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.bigquery import bigquery_create_table\n        from google.cloud.bigquery import SchemaField\n        @flow\n        def example_bigquery_create_table_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            schema = [\n                SchemaField(\"number\", field_type=\"INTEGER\", mode=\"REQUIRED\"),\n                SchemaField(\"text\", field_type=\"STRING\", mode=\"REQUIRED\"),\n                SchemaField(\"bool\", field_type=\"BOOLEAN\")\n            ]\n            result = bigquery_create_table(\n                dataset=\"dataset\",\n                table=\"test_table\",\n                schema=schema,\n                gcp_credentials=gcp_credentials\n            )\n            return result\n        example_bigquery_create_table_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Creating %s.%s\", dataset, table)\n\n    if not external_config and not schema:\n        raise ValueError(\"Either a schema or an external config must be provided.\")\n\n    client = gcp_credentials.get_bigquery_client(project=project, location=location)\n    try:\n        partial_get_dataset = partial(client.get_dataset, dataset)\n        dataset_ref = await to_thread.run_sync(partial_get_dataset)\n    except NotFound:\n        logger.debug(\"Dataset %s not found, creating\", dataset)\n        partial_create_dataset = partial(client.create_dataset, dataset)\n        dataset_ref = await to_thread.run_sync(partial_create_dataset)\n\n    table_ref = dataset_ref.table(table)\n    try:\n        partial_get_table = partial(client.get_table, table_ref)\n        await to_thread.run_sync(partial_get_table)\n        logger.info(\"%s.%s already exists\", dataset, table)\n    except NotFound:\n        logger.debug(\"Table %s not found, creating\", table)\n        table_obj = Table(table_ref, schema=schema)\n\n        # external data configuration\n        if external_config:\n            table_obj.external_data_configuration = external_config\n\n        # cluster for optimal data sorting/access\n        if clustering_fields:\n            table_obj.clustering_fields = clustering_fields\n\n        # partitioning\n        if time_partitioning:\n            table_obj.time_partitioning = time_partitioning\n\n        partial_create_table = partial(client.create_table, table_obj)\n        await to_thread.run_sync(partial_create_table)\n\n    return table\n
"},{"location":"bigquery/#prefect_gcp.bigquery.bigquery_insert_stream","title":"bigquery_insert_stream async","text":"

Insert records in a Google BigQuery table via the streaming API.

Parameters:

Name Type Description Default dataset str

Name of a dataset where the records will be written to.

required table str

Name of a table to write to.

required records List[dict]

The list of records to insert as rows into the BigQuery table; each item in the list should be a dictionary whose keys correspond to columns in the table.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required project Optional[str]

The project to initialize the BigQuery Client with; if not provided, will default to the one inferred from your credentials.

None location str

Location of the dataset that will be written to.

'US'

Returns:

Type Description List

List of inserted rows.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_insert_stream\nfrom google.cloud.bigquery import SchemaField\n\n@flow\ndef example_bigquery_insert_stream_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    records = [\n        {\"number\": 1, \"text\": \"abc\", \"bool\": True},\n        {\"number\": 2, \"text\": \"def\", \"bool\": False},\n    ]\n    result = bigquery_insert_stream(\n        dataset=\"integrations\",\n        table=\"test_table\",\n        records=records,\n        gcp_credentials=gcp_credentials\n    )\n    return result\n\nexample_bigquery_insert_stream_flow()\n
Source code in prefect_gcp/bigquery.py
@task\nasync def bigquery_insert_stream(\n    dataset: str,\n    table: str,\n    records: List[dict],\n    gcp_credentials: GcpCredentials,\n    project: Optional[str] = None,\n    location: str = \"US\",\n) -> List:\n    \"\"\"\n    Insert records in a Google BigQuery table via the [streaming\n    API](https://cloud.google.com/bigquery/streaming-data-into-bigquery).\n\n    Args:\n        dataset: Name of a dataset where the records will be written to.\n        table: Name of a table to write to.\n        records: The list of records to insert as rows into the BigQuery table;\n            each item in the list should be a dictionary whose keys correspond to\n            columns in the table.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        project: The project to initialize the BigQuery Client with; if\n            not provided, will default to the one inferred from your credentials.\n        location: Location of the dataset that will be written to.\n\n    Returns:\n        List of inserted rows.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.bigquery import bigquery_insert_stream\n        from google.cloud.bigquery import SchemaField\n\n        @flow\n        def example_bigquery_insert_stream_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            records = [\n                {\"number\": 1, \"text\": \"abc\", \"bool\": True},\n                {\"number\": 2, \"text\": \"def\", \"bool\": False},\n            ]\n            result = bigquery_insert_stream(\n                dataset=\"integrations\",\n                table=\"test_table\",\n                records=records,\n                gcp_credentials=gcp_credentials\n            )\n            return result\n\n        example_bigquery_insert_stream_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Inserting into %s.%s as a stream\", dataset, table)\n\n    client = gcp_credentials.get_bigquery_client(project=project, location=location)\n    table_ref = client.dataset(dataset).table(table)\n    partial_insert = partial(\n        client.insert_rows_json, table=table_ref, json_rows=records\n    )\n    response = await to_thread.run_sync(partial_insert)\n\n    errors = []\n    output = []\n    for row in response:\n        output.append(row)\n        if \"errors\" in row:\n            errors.append(row[\"errors\"])\n\n    if errors:\n        raise ValueError(errors)\n\n    return output\n
"},{"location":"bigquery/#prefect_gcp.bigquery.bigquery_load_cloud_storage","title":"bigquery_load_cloud_storage async","text":"

Run method for this Task. Invoked by calling this Task within a Flow context, after initialization. Args: uri: GCS path to load data from. dataset: The id of a destination dataset to write the records to. table: The name of a destination table to write the records to. gcp_credentials: Credentials to use for authentication with GCP. schema: The schema to use when creating the table. job_config: Dictionary of job configuration parameters; note that the parameters provided here must be pickleable (e.g., dataset references will be rejected). project: The project to initialize the BigQuery Client with; if not provided, will default to the one inferred from your credentials. location: Location of the dataset that will be written to.

Returns:

Type Description LoadJob

The response from load_table_from_uri.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_load_cloud_storage\n\n@flow\ndef example_bigquery_load_cloud_storage_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    result = bigquery_load_cloud_storage(\n        dataset=\"dataset\",\n        table=\"test_table\",\n        uri=\"uri\",\n        gcp_credentials=gcp_credentials\n    )\n    return result\n\nexample_bigquery_load_cloud_storage_flow()\n
Source code in prefect_gcp/bigquery.py
@task\nasync def bigquery_load_cloud_storage(\n    dataset: str,\n    table: str,\n    uri: str,\n    gcp_credentials: GcpCredentials,\n    schema: Optional[List[\"SchemaField\"]] = None,\n    job_config: Optional[dict] = None,\n    project: Optional[str] = None,\n    location: str = \"US\",\n) -> \"LoadJob\":\n    \"\"\"\n    Run method for this Task.  Invoked by _calling_ this\n    Task within a Flow context, after initialization.\n    Args:\n        uri: GCS path to load data from.\n        dataset: The id of a destination dataset to write the records to.\n        table: The name of a destination table to write the records to.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        schema: The schema to use when creating the table.\n        job_config: Dictionary of job configuration parameters;\n            note that the parameters provided here must be pickleable\n            (e.g., dataset references will be rejected).\n        project: The project to initialize the BigQuery Client with; if\n            not provided, will default to the one inferred from your credentials.\n        location: Location of the dataset that will be written to.\n\n    Returns:\n        The response from `load_table_from_uri`.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.bigquery import bigquery_load_cloud_storage\n\n        @flow\n        def example_bigquery_load_cloud_storage_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            result = bigquery_load_cloud_storage(\n                dataset=\"dataset\",\n                table=\"test_table\",\n                uri=\"uri\",\n                gcp_credentials=gcp_credentials\n            )\n            return result\n\n        example_bigquery_load_cloud_storage_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Loading into %s.%s from cloud storage\", dataset, table)\n\n    client = gcp_credentials.get_bigquery_client(project=project, location=location)\n    table_ref = client.dataset(dataset).table(table)\n\n    job_config = job_config or {}\n    if \"autodetect\" not in job_config:\n        job_config[\"autodetect\"] = True\n    job_config = LoadJobConfig(**job_config)\n    if schema:\n        job_config.schema = schema\n\n    result = None\n    try:\n        partial_load = partial(\n            _result_sync,\n            client.load_table_from_uri,\n            uri,\n            table_ref,\n            job_config=job_config,\n        )\n        result = await to_thread.run_sync(partial_load)\n    except Exception as exception:\n        logger.exception(exception)\n        if result is not None and result.errors is not None:\n            for error in result.errors:\n                logger.exception(error)\n        raise\n\n    if result is not None:\n        # remove unpickleable attributes\n        result._client = None\n        result._completion_lock = None\n\n    return result\n
"},{"location":"bigquery/#prefect_gcp.bigquery.bigquery_load_file","title":"bigquery_load_file async","text":"

Loads file into BigQuery.

Parameters:

Name Type Description Default dataset str

ID of a destination dataset to write the records to; if not provided here, will default to the one provided at initialization.

required table str

Name of a destination table to write the records to; if not provided here, will default to the one provided at initialization.

required path Union[str, Path]

A string or path-like object of the file to be loaded.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required schema Optional[List[SchemaField]]

Schema to use when creating the table.

None job_config Optional[dict]

An optional dictionary of job configuration parameters; note that the parameters provided here must be pickleable (e.g., dataset references will be rejected).

None rewind bool

if True, seek to the beginning of the file handle before reading the file.

False size Optional[int]

Number of bytes to read from the file handle. If size is None or large, resumable upload will be used. Otherwise, multipart upload will be used.

None project Optional[str]

Project to initialize the BigQuery Client with; if not provided, will default to the one inferred from your credentials.

None location str

location of the dataset that will be written to.

'US'

Returns:

Type Description LoadJob

The response from load_table_from_file.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_load_file\nfrom google.cloud.bigquery import SchemaField\n\n@flow\ndef example_bigquery_load_file_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    result = bigquery_load_file(\n        dataset=\"dataset\",\n        table=\"test_table\",\n        path=\"path\",\n        gcp_credentials=gcp_credentials\n    )\n    return result\n\nexample_bigquery_load_file_flow()\n
Source code in prefect_gcp/bigquery.py
@task\nasync def bigquery_load_file(\n    dataset: str,\n    table: str,\n    path: Union[str, Path],\n    gcp_credentials: GcpCredentials,\n    schema: Optional[List[\"SchemaField\"]] = None,\n    job_config: Optional[dict] = None,\n    rewind: bool = False,\n    size: Optional[int] = None,\n    project: Optional[str] = None,\n    location: str = \"US\",\n) -> \"LoadJob\":\n    \"\"\"\n    Loads file into BigQuery.\n\n    Args:\n        dataset: ID of a destination dataset to write the records to;\n            if not provided here, will default to the one provided at initialization.\n        table: Name of a destination table to write the records to;\n            if not provided here, will default to the one provided at initialization.\n        path: A string or path-like object of the file to be loaded.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        schema: Schema to use when creating the table.\n        job_config: An optional dictionary of job configuration parameters;\n            note that the parameters provided here must be pickleable\n            (e.g., dataset references will be rejected).\n        rewind: if True, seek to the beginning of the file handle\n            before reading the file.\n        size: Number of bytes to read from the file handle. If size is None or large,\n            resumable upload will be used. Otherwise, multipart upload will be used.\n        project: Project to initialize the BigQuery Client with; if\n            not provided, will default to the one inferred from your credentials.\n        location: location of the dataset that will be written to.\n\n    Returns:\n        The response from `load_table_from_file`.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.bigquery import bigquery_load_file\n        from google.cloud.bigquery import SchemaField\n\n        @flow\n        def example_bigquery_load_file_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            result = bigquery_load_file(\n                dataset=\"dataset\",\n                table=\"test_table\",\n                path=\"path\",\n                gcp_credentials=gcp_credentials\n            )\n            return result\n\n        example_bigquery_load_file_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Loading into %s.%s from file\", dataset, table)\n\n    if not os.path.exists(path):\n        raise ValueError(f\"{path} does not exist\")\n    elif not os.path.isfile(path):\n        raise ValueError(f\"{path} is not a file\")\n\n    client = gcp_credentials.get_bigquery_client(project=project)\n    table_ref = client.dataset(dataset).table(table)\n\n    job_config = job_config or {}\n    if \"autodetect\" not in job_config:\n        job_config[\"autodetect\"] = True\n        # TODO: test if autodetect is needed when schema is passed\n    job_config = LoadJobConfig(**job_config)\n    if schema:\n        # TODO: test if schema can be passed directly in job_config\n        job_config.schema = schema\n\n    try:\n        with open(path, \"rb\") as file_obj:\n            partial_load = partial(\n                _result_sync,\n                client.load_table_from_file,\n                file_obj,\n                table_ref,\n                rewind=rewind,\n                size=size,\n                location=location,\n                job_config=job_config,\n            )\n            result = await to_thread.run_sync(partial_load)\n    except IOError:\n        logger.exception(f\"Could not open and read from {path}\")\n        raise\n\n    if result is not None:\n        # remove unpickleable attributes\n        result._client = None\n        result._completion_lock = None\n\n    return result\n
"},{"location":"bigquery/#prefect_gcp.bigquery.bigquery_query","title":"bigquery_query async","text":"

Runs a BigQuery query.

Parameters:

Name Type Description Default query str

String of the query to execute.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required query_params Optional[List[tuple]]

List of 3-tuples specifying BigQuery query parameters; currently only scalar query parameters are supported. See the Google documentation for more details on how both the query and the query parameters should be formatted.

None dry_run_max_bytes Optional[int]

If provided, the maximum number of bytes the query is allowed to process; this will be determined by executing a dry run and raising a ValueError if the maximum is exceeded.

None dataset Optional[str]

Name of a destination dataset to write the query results to, if you don't want them returned; if provided, table must also be provided.

None table Optional[str]

Name of a destination table to write the query results to, if you don't want them returned; if provided, dataset must also be provided.

None to_dataframe bool

If provided, returns the results of the query as a pandas dataframe instead of a list of bigquery.table.Row objects.

False job_config Optional[dict]

Dictionary of job configuration parameters; note that the parameters provided here must be pickleable (e.g., dataset references will be rejected).

None project Optional[str]

The project to initialize the BigQuery Client with; if not provided, will default to the one inferred from your credentials.

None result_transformer Optional[Callable[[List[Row]], Any]]

Function that can be passed to transform the result of a query before returning. The function will be passed the list of rows returned by BigQuery for the given query.

None location str

Location of the dataset that will be queried.

'US'

Returns:

Type Description Any

A list of rows, or pandas DataFrame if to_dataframe,

Any

matching the query criteria.

Example

Queries the public names database, returning 10 results.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_query\n\n@flow\ndef example_bigquery_query_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\",\n        project=\"project\"\n    )\n    query = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = @corpus\n        AND word_count >= @min_word_count\n        ORDER BY word_count DESC;\n    '''\n    query_params = [\n        (\"corpus\", \"STRING\", \"romeoandjuliet\"),\n        (\"min_word_count\", \"INT64\", 250)\n    ]\n    result = bigquery_query(\n        query, gcp_credentials, query_params=query_params\n    )\n    return result\n\nexample_bigquery_query_flow()\n

Source code in prefect_gcp/bigquery.py
@task\nasync def bigquery_query(\n    query: str,\n    gcp_credentials: GcpCredentials,\n    query_params: Optional[List[tuple]] = None,  # 3-tuples\n    dry_run_max_bytes: Optional[int] = None,\n    dataset: Optional[str] = None,\n    table: Optional[str] = None,\n    to_dataframe: bool = False,\n    job_config: Optional[dict] = None,\n    project: Optional[str] = None,\n    result_transformer: Optional[Callable[[List[\"Row\"]], Any]] = None,\n    location: str = \"US\",\n) -> Any:\n    \"\"\"\n    Runs a BigQuery query.\n\n    Args:\n        query: String of the query to execute.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        query_params: List of 3-tuples specifying BigQuery query parameters; currently\n            only scalar query parameters are supported.  See the\n            [Google documentation](https://cloud.google.com/bigquery/docs/parameterized-queries#bigquery-query-params-python)\n            for more details on how both the query and the query parameters should be formatted.\n        dry_run_max_bytes: If provided, the maximum number of bytes the query\n            is allowed to process; this will be determined by executing a dry run\n            and raising a `ValueError` if the maximum is exceeded.\n        dataset: Name of a destination dataset to write the query results to,\n            if you don't want them returned; if provided, `table` must also be provided.\n        table: Name of a destination table to write the query results to,\n            if you don't want them returned; if provided, `dataset` must also be provided.\n        to_dataframe: If provided, returns the results of the query as a pandas\n            dataframe instead of a list of `bigquery.table.Row` objects.\n        job_config: Dictionary of job configuration parameters;\n            note that the parameters provided here must be pickleable\n            (e.g., dataset references will be rejected).\n        project: The project to initialize the BigQuery Client with; if not\n            provided, will default to the one inferred from your credentials.\n        result_transformer: Function that can be passed to transform the result of a query before returning. The function will be passed the list of rows returned by BigQuery for the given query.\n        location: Location of the dataset that will be queried.\n\n    Returns:\n        A list of rows, or pandas DataFrame if to_dataframe,\n        matching the query criteria.\n\n    Example:\n        Queries the public names database, returning 10 results.\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.bigquery import bigquery_query\n\n        @flow\n        def example_bigquery_query_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\",\n                project=\"project\"\n            )\n            query = '''\n                SELECT word, word_count\n                FROM `bigquery-public-data.samples.shakespeare`\n                WHERE corpus = @corpus\n                AND word_count >= @min_word_count\n                ORDER BY word_count DESC;\n            '''\n            query_params = [\n                (\"corpus\", \"STRING\", \"romeoandjuliet\"),\n                (\"min_word_count\", \"INT64\", 250)\n            ]\n            result = bigquery_query(\n                query, gcp_credentials, query_params=query_params\n            )\n            return result\n\n        example_bigquery_query_flow()\n        ```\n    \"\"\"  # noqa\n    logger = get_run_logger()\n    logger.info(\"Running BigQuery query\")\n\n    client = gcp_credentials.get_bigquery_client(project=project, location=location)\n\n    # setup job config\n    job_config = QueryJobConfig(**job_config or {})\n    if query_params is not None:\n        job_config.query_parameters = [ScalarQueryParameter(*qp) for qp in query_params]\n\n    # perform dry_run if requested\n    if dry_run_max_bytes is not None:\n        saved_info = dict(\n            dry_run=job_config.dry_run, use_query_cache=job_config.use_query_cache\n        )\n        job_config.dry_run = True\n        job_config.use_query_cache = False\n        partial_query = partial(client.query, query, job_config=job_config)\n        response = await to_thread.run_sync(partial_query)\n        total_bytes_processed = response.total_bytes_processed\n        if total_bytes_processed > dry_run_max_bytes:\n            raise RuntimeError(\n                f\"Query will process {total_bytes_processed} bytes which is above \"\n                f\"the set maximum of {dry_run_max_bytes} for this task.\"\n            )\n        job_config.dry_run = saved_info[\"dry_run\"]\n        job_config.use_query_cache = saved_info[\"use_query_cache\"]\n\n    # if writing to a destination table\n    if dataset is not None:\n        table_ref = client.dataset(dataset).table(table)\n        job_config.destination = table_ref\n\n    partial_query = partial(\n        _result_sync,\n        client.query,\n        query,\n        job_config=job_config,\n    )\n    result = await to_thread.run_sync(partial_query)\n\n    if to_dataframe:\n        return result.to_dataframe()\n    else:\n        if result_transformer:\n            return result_transformer(result)\n        else:\n            return list(result)\n
"},{"location":"blocks_catalog/","title":"Blocks Catalog","text":"

Below is a list of Blocks available for registration in prefect-gcp.

To register blocks in this module to view and edit them on Prefect Cloud, first install the required packages, then

prefect block register -m prefect_gcp\n
Note, to use the load method on Blocks, you must already have a block document saved through code or saved through the UI.

"},{"location":"blocks_catalog/#credentials-module","title":"Credentials Module","text":"

GcpCredentials

Block used to manage authentication with GCP. Google authentication is handled via the google.oauth2 module or through the CLI. Specify either one of service account_file or service_account_info; if both are not specified, the client will try to detect the credentials following Google's Application Default Credentials. See Google's Authentication documentation for details on inference and recommended authentication patterns.

To load the GcpCredentials:

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow\ndef my_flow():\n    my_block = GcpCredentials.load(\"MY_BLOCK_NAME\")\n\nmy_flow()\n
For additional examples, check out the Credentials Module under Examples Catalog.

"},{"location":"blocks_catalog/#aiplatform-module","title":"Aiplatform Module","text":"

VertexAICustomTrainingJob

Infrastructure block used to run Vertex AI custom training jobs.

To load the VertexAICustomTrainingJob:

from prefect import flow\nfrom prefect_gcp.aiplatform import VertexAICustomTrainingJob\n\n@flow\ndef my_flow():\n    my_block = VertexAICustomTrainingJob.load(\"MY_BLOCK_NAME\")\n\nmy_flow()\n
For additional examples, check out the Aiplatform Module under Examples Catalog.

"},{"location":"blocks_catalog/#bigquery-module","title":"Bigquery Module","text":"

BigQueryWarehouse

A block for querying a database with BigQuery.

Upon instantiating, a connection to BigQuery is established and maintained for the life of the object until the close method is called.

It is recommended to use this block as a context manager, which will automatically close the connection and its cursors when the context is exited.

It is also recommended that this block is loaded and consumed within a single task or flow because if the block is passed across separate tasks and flows, the state of the block's connection and cursor could be lost.

To load the BigQueryWarehouse:

from prefect import flow\nfrom prefect_gcp.bigquery import BigQueryWarehouse\n\n@flow\ndef my_flow():\n    my_block = BigQueryWarehouse.load(\"MY_BLOCK_NAME\")\n\nmy_flow()\n
For additional examples, check out the Bigquery Module under Examples Catalog.

"},{"location":"blocks_catalog/#cloud-run-module","title":"Cloud Run Module","text":"

CloudRunJob

Infrastructure block used to run GCP Cloud Run Jobs. Note this block is experimental. The interface may change without notice.

To load the CloudRunJob:

from prefect import flow\nfrom prefect_gcp.cloud_run import CloudRunJob\n\n@flow\ndef my_flow():\n    my_block = CloudRunJob.load(\"MY_BLOCK_NAME\")\n\nmy_flow()\n
For additional examples, check out the Cloud Run Module under Examples Catalog.

"},{"location":"blocks_catalog/#cloud-storage-module","title":"Cloud Storage Module","text":"

GcsBucket

Block used to store data using GCP Cloud Storage Buckets.

Note! GcsBucket in prefect-gcp is a unique block, separate from GCS in core Prefect. GcsBucket does not use gcsfs under the hood, instead using the google-cloud-storage package, and offers more configuration and functionality.

To load the GcsBucket:

from prefect import flow\nfrom prefect_gcp.cloud_storage import GcsBucket\n\n@flow\ndef my_flow():\n    my_block = GcsBucket.load(\"MY_BLOCK_NAME\")\n\nmy_flow()\n
For additional examples, check out the Cloud Storage Module under Examples Catalog.

"},{"location":"blocks_catalog/#secret-manager-module","title":"Secret Manager Module","text":"

GcpSecret

Manages a secret in Google Cloud Platform's Secret Manager.

To load the GcpSecret:

from prefect import flow\nfrom prefect_gcp.secret_manager import GcpSecret\n\n@flow\ndef my_flow():\n    my_block = GcpSecret.load(\"MY_BLOCK_NAME\")\n\nmy_flow()\n
For additional examples, check out the Secret Manager Module under Examples Catalog.

"},{"location":"cloud_run/","title":"Cloud Run","text":""},{"location":"cloud_run/#prefect_gcp.cloud_run","title":"prefect_gcp.cloud_run","text":"

DEPRECATION WARNING:

This module is deprecated as of March 2024 and will not be available after September 2024. It has been replaced by the Cloud Run and Cloud Run V2 workers, which offer enhanced functionality and better performance.

For upgrade instructions, see https://docs.prefect.io/latest/guides/upgrade-guide-agents-to-workers/.

Integrations with Google Cloud Run Job.

Note this module is experimental. The intefaces within may change without notice.

Examples:

Run a job using Google Cloud Run Jobs:\n```python\nCloudRunJob(\n    image=\"gcr.io/my-project/my-image\",\n    region=\"us-east1\",\n    credentials=my_gcp_credentials\n).run()\n```\n\nRun a job that runs the command `echo hello world` using Google Cloud Run Jobs:\n```python\nCloudRunJob(\n    image=\"gcr.io/my-project/my-image\",\n    region=\"us-east1\",\n    credentials=my_gcp_credentials\n    command=[\"echo\", \"hello world\"]\n).run()\n```\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run-classes","title":"Classes","text":""},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob","title":"CloudRunJob","text":"

Bases: Infrastructure

Infrastructure block used to run GCP Cloud Run Jobs.

Project name information is provided by the Credentials object, and should always be correct as long as the Credentials object is for the correct project.

Note this block is experimental. The interface may change without notice.

Source code in prefect_gcp/cloud_run.py
@deprecated_class(\n    start_date=\"Mar 2024\",\n    help=(\n        \"Use the Cloud Run or Cloud Run v2 worker instead.\"\n        \" Refer to the upgrade guide for more information:\"\n        \" https://docs.prefect.io/latest/guides/upgrade-guide-agents-to-workers/.\"\n    ),\n)\nclass CloudRunJob(Infrastructure):\n    \"\"\"\n    <span class=\"badge-api experimental\"/>\n\n    Infrastructure block used to run GCP Cloud Run Jobs.\n\n    Project name information is provided by the Credentials object, and should always\n    be correct as long as the Credentials object is for the correct project.\n\n    Note this block is experimental. The interface may change without notice.\n    \"\"\"\n\n    _block_type_slug = \"cloud-run-job\"\n    _block_type_name = \"GCP Cloud Run Job\"\n    _description = \"Infrastructure block used to run GCP Cloud Run Jobs. Note this block is experimental. The interface may change without notice.\"  # noqa\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/cloud_run/#prefect_gcp.cloud_run.CloudRunJob\"  # noqa: E501\n\n    type: Literal[\"cloud-run-job\"] = Field(\n        \"cloud-run-job\", description=\"The slug for this task type.\"\n    )\n    image: str = Field(\n        ...,\n        title=\"Image Name\",\n        description=(\n            \"The image to use for a new Cloud Run Job. This value must \"\n            \"refer to an image within either Google Container Registry \"\n            \"or Google Artifact Registry, like `gcr.io/<project_name>/<repo>/`.\"\n        ),\n    )\n    region: str = Field(..., description=\"The region where the Cloud Run Job resides.\")\n    credentials: GcpCredentials  # cannot be Field; else it shows as Json\n\n    # Job settings\n    cpu: Optional[int] = Field(\n        default=None,\n        title=\"CPU\",\n        description=(\n            \"The amount of compute allocated to the Cloud Run Job. \"\n            \"The int must be valid based on the rules specified at \"\n            \"https://cloud.google.com/run/docs/configuring/cpu#setting-jobs .\"\n        ),\n    )\n    memory: Optional[int] = Field(\n        default=None,\n        title=\"Memory\",\n        description=\"The amount of memory allocated to the Cloud Run Job.\",\n    )\n    memory_unit: Optional[Literal[\"G\", \"Gi\", \"M\", \"Mi\"]] = Field(\n        default=None,\n        title=\"Memory Units\",\n        description=(\n            \"The unit of memory. See \"\n            \"https://cloud.google.com/run/docs/configuring/memory-limits#setting \"\n            \"for additional details.\"\n        ),\n    )\n    vpc_connector_name: Optional[str] = Field(\n        default=None,\n        title=\"VPC Connector Name\",\n        description=\"The name of the VPC connector to use for the Cloud Run Job.\",\n    )\n    args: Optional[List[str]] = Field(\n        default=None,\n        description=(\n            \"Arguments to be passed to your Cloud Run Job's entrypoint command.\"\n        ),\n    )\n    env: Dict[str, str] = Field(\n        default_factory=dict,\n        description=\"Environment variables to be passed to your Cloud Run Job.\",\n    )\n\n    # Cleanup behavior\n    keep_job: Optional[bool] = Field(\n        default=False,\n        title=\"Keep Job After Completion\",\n        description=\"Keep the completed Cloud Run Job on Google Cloud Platform.\",\n    )\n    timeout: Optional[int] = Field(\n        default=600,\n        gt=0,\n        le=3600,\n        title=\"Job Timeout\",\n        description=(\n            \"The length of time that Prefect will wait for a Cloud Run Job to complete \"\n            \"before raising an exception.\"\n        ),\n    )\n    max_retries: Optional[int] = Field(\n        default=3,\n        ge=0,\n        le=10,\n        title=\"Max Retries\",\n        description=(\n            \"The maximum retries setting specifies the number of times a task is \"\n            \"allowed to restart in case of failure before being failed permanently.\"\n        ),\n    )\n    # For private use\n    _job_name: str = None\n    _execution: Optional[Execution] = None\n\n    @property\n    def job_name(self):\n        \"\"\"Create a unique and valid job name.\"\"\"\n\n        if self._job_name is None:\n            # get `repo` from `gcr.io/<project_name>/repo/other`\n            components = self.image.split(\"/\")\n            image_name = components[2]\n            # only alphanumeric and '-' allowed for a job name\n            modified_image_name = image_name.replace(\":\", \"-\").replace(\".\", \"-\")\n            # make 50 char limit for final job name, which will be '<name>-<uuid>'\n            if len(modified_image_name) > 17:\n                modified_image_name = modified_image_name[:17]\n            name = f\"{modified_image_name}-{uuid4().hex}\"\n            self._job_name = name\n\n        return self._job_name\n\n    @property\n    def memory_string(self):\n        \"\"\"Returns the string expected for memory resources argument.\"\"\"\n        if self.memory and self.memory_unit:\n            return str(self.memory) + self.memory_unit\n        return None\n\n    @validator(\"image\")\n    def _remove_image_spaces(cls, value):\n        \"\"\"Deal with spaces in image names.\"\"\"\n        if value is not None:\n            return value.strip()\n\n    @root_validator\n    def _check_valid_memory(cls, values):\n        \"\"\"Make sure memory conforms to expected values for API.\n        See: https://cloud.google.com/run/docs/configuring/memory-limits#setting\n        \"\"\"  # noqa\n        if (values.get(\"memory\") is not None and values.get(\"memory_unit\") is None) or (\n            values.get(\"memory_unit\") is not None and values.get(\"memory\") is None\n        ):\n            raise ValueError(\n                \"A memory value and unit must both be supplied to specify a memory\"\n                \" value other than the default memory value.\"\n            )\n        return values\n\n    def get_corresponding_worker_type(self) -> str:\n        \"\"\"Return the corresponding worker type for this infrastructure block.\"\"\"\n        return \"cloud-run\"\n\n    async def generate_work_pool_base_job_template(self) -> dict:\n        \"\"\"\n        Generate a base job template for a cloud-run work pool with the same\n        configuration as this block.\n\n        Returns:\n            - dict: a base job template for a cloud-run work pool\n        \"\"\"\n        base_job_template = await get_default_base_job_template_for_infrastructure_type(\n            self.get_corresponding_worker_type(),\n        )\n        assert (\n            base_job_template is not None\n        ), \"Failed to generate default base job template for Cloud Run worker.\"\n        for key, value in self.dict(exclude_unset=True, exclude_defaults=True).items():\n            if key == \"command\":\n                base_job_template[\"variables\"][\"properties\"][\"command\"][\n                    \"default\"\n                ] = shlex.join(value)\n            elif key in [\n                \"type\",\n                \"block_type_slug\",\n                \"_block_document_id\",\n                \"_block_document_name\",\n                \"_is_anonymous\",\n                \"memory_unit\",\n            ]:\n                continue\n            elif key == \"credentials\":\n                if not self.credentials._block_document_id:\n                    raise BlockNotSavedError(\n                        \"It looks like you are trying to use a block that\"\n                        \" has not been saved. Please call `.save` on your block\"\n                        \" before publishing it as a work pool.\"\n                    )\n                base_job_template[\"variables\"][\"properties\"][\"credentials\"][\n                    \"default\"\n                ] = {\n                    \"$ref\": {\n                        \"block_document_id\": str(self.credentials._block_document_id)\n                    }\n                }\n            elif key == \"memory\" and self.memory_string:\n                base_job_template[\"variables\"][\"properties\"][\"memory\"][\n                    \"default\"\n                ] = self.memory_string\n            elif key == \"cpu\" and self.cpu is not None:\n                base_job_template[\"variables\"][\"properties\"][\"cpu\"][\n                    \"default\"\n                ] = f\"{self.cpu * 1000}m\"\n            elif key == \"args\":\n                # Not a default variable, but we can add it to the template\n                base_job_template[\"variables\"][\"properties\"][\"args\"] = {\n                    \"title\": \"Arguments\",\n                    \"type\": \"string\",\n                    \"description\": \"Arguments to be passed to your Cloud Run Job's entrypoint command.\",  # noqa\n                    \"default\": value,\n                }\n                base_job_template[\"job_configuration\"][\"job_body\"][\"spec\"][\"template\"][\n                    \"spec\"\n                ][\"template\"][\"spec\"][\"containers\"][0][\"args\"] = \"{{ args }}\"\n            elif key in base_job_template[\"variables\"][\"properties\"]:\n                base_job_template[\"variables\"][\"properties\"][key][\"default\"] = value\n            else:\n                self.logger.warning(\n                    f\"Variable {key!r} is not supported by Cloud Run work pools.\"\n                    \" Skipping.\"\n                )\n\n        return base_job_template\n\n    def _create_job_error(self, exc):\n        \"\"\"Provides a nicer error for 404s when trying to create a Cloud Run Job.\"\"\"\n        # TODO consider lookup table instead of the if/else,\n        # also check for documented errors\n        if exc.status_code == 404:\n            raise RuntimeError(\n                f\"Failed to find resources at {exc.uri}. Confirm that region\"\n                f\" '{self.region}' is the correct region for your Cloud Run Job and\"\n                f\" that {self.credentials.project} is the correct GCP project. If\"\n                f\" your project ID is not correct, you are using a Credentials block\"\n                f\" with permissions for the wrong project.\"\n            ) from exc\n        raise exc\n\n    def _job_run_submission_error(self, exc):\n        \"\"\"Provides a nicer error for 404s when submitting job runs.\"\"\"\n        if exc.status_code == 404:\n            pat1 = r\"The requested URL [^ ]+ was not found on this server\"\n            # pat2 = (\n            #     r\"Resource '[^ ]+' of kind 'JOB' in region '[\\w\\-0-9]+' \"\n            #     r\"in project '[\\w\\-0-9]+' does not exist\"\n            # )\n            if re.findall(pat1, str(exc)):\n                raise RuntimeError(\n                    f\"Failed to find resources at {exc.uri}. \"\n                    f\"Confirm that region '{self.region}' is \"\n                    f\"the correct region for your Cloud Run Job \"\n                    f\"and that '{self.credentials.project}' is the \"\n                    f\"correct GCP project. If your project ID is not \"\n                    f\"correct, you are using a Credentials \"\n                    f\"block with permissions for the wrong project.\"\n                ) from exc\n            else:\n                raise exc\n\n        raise exc\n\n    def _cpu_as_k8s_quantity(self) -> str:\n        \"\"\"Return the CPU integer in the format expected by GCP Cloud Run Jobs API.\n        See: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/\n        See also: https://cloud.google.com/run/docs/configuring/cpu#setting-jobs\n        \"\"\"  # noqa\n        return str(self.cpu * 1000) + \"m\"\n\n    @sync_compatible\n    async def run(self, task_status: Optional[TaskStatus] = None):\n        \"\"\"Run the configured job on a Google Cloud Run Job.\"\"\"\n        with self._get_client() as client:\n            await run_sync_in_worker_thread(\n                self._create_job_and_wait_for_registration, client\n            )\n            job_execution = await run_sync_in_worker_thread(\n                self._begin_job_execution, client\n            )\n\n            if task_status:\n                task_status.started(self.job_name)\n\n            result = await run_sync_in_worker_thread(\n                self._watch_job_execution_and_get_result,\n                client,\n                job_execution,\n                5,\n            )\n            return result\n\n    @sync_compatible\n    async def kill(self, identifier: str, grace_seconds: int = 30) -> None:\n        \"\"\"\n        Kill a task running Cloud Run.\n\n        Args:\n            identifier: The Cloud Run Job name. This should match a\n                value yielded by CloudRunJob.run.\n        \"\"\"\n        if grace_seconds != 30:\n            self.logger.warning(\n                f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n                \"support dynamic grace period configuration. See here for more info: \"\n                \"https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs/delete\"  # noqa\n            )\n\n        with self._get_client() as client:\n            await run_sync_in_worker_thread(\n                self._kill_job,\n                client=client,\n                namespace=self.credentials.project,\n                job_name=identifier,\n            )\n\n    def _kill_job(self, client: Resource, namespace: str, job_name: str) -> None:\n        \"\"\"\n        Thin wrapper around Job.delete, wrapping a try/except since\n        Job is an independent class that doesn't have knowledge of\n        CloudRunJob and its associated logic.\n        \"\"\"\n        try:\n            Job.delete(client=client, namespace=namespace, job_name=job_name)\n        except Exception as exc:\n            if \"does not exist\" in str(exc):\n                raise InfrastructureNotFound(\n                    f\"Cannot stop Cloud Run Job; the job name {job_name!r} \"\n                    \"could not be found.\"\n                ) from exc\n            raise\n\n    def _create_job_and_wait_for_registration(self, client: Resource) -> None:\n        \"\"\"Create a new job wait for it to finish registering.\"\"\"\n        try:\n            self.logger.info(f\"Creating Cloud Run Job {self.job_name}\")\n            Job.create(\n                client=client,\n                namespace=self.credentials.project,\n                body=self._jobs_body(),\n            )\n        except googleapiclient.errors.HttpError as exc:\n            self._create_job_error(exc)\n\n        try:\n            self._wait_for_job_creation(client=client, timeout=self.timeout)\n        except Exception:\n            self.logger.exception(\n                \"Encountered an exception while waiting for job run creation\"\n            )\n            if not self.keep_job:\n                self.logger.info(\n                    f\"Deleting Cloud Run Job {self.job_name} from Google Cloud Run.\"\n                )\n                try:\n                    Job.delete(\n                        client=client,\n                        namespace=self.credentials.project,\n                        job_name=self.job_name,\n                    )\n                except Exception:\n                    self.logger.exception(\n                        \"Received an unexpected exception while attempting to delete\"\n                        f\" Cloud Run Job {self.job_name!r}\"\n                    )\n            raise\n\n    def _begin_job_execution(self, client: Resource) -> Execution:\n        \"\"\"Submit a job run for execution and return the execution object.\"\"\"\n        try:\n            self.logger.info(\n                f\"Submitting Cloud Run Job {self.job_name!r} for execution.\"\n            )\n            submission = Job.run(\n                client=client,\n                namespace=self.credentials.project,\n                job_name=self.job_name,\n            )\n\n            job_execution = Execution.get(\n                client=client,\n                namespace=submission[\"metadata\"][\"namespace\"],\n                execution_name=submission[\"metadata\"][\"name\"],\n            )\n\n            command = (\n                \" \".join(self.command) if self.command else \"default container command\"\n            )\n\n            self.logger.info(\n                f\"Cloud Run Job {self.job_name!r}: Running command {command!r}\"\n            )\n        except Exception as exc:\n            self._job_run_submission_error(exc)\n\n        return job_execution\n\n    def _watch_job_execution_and_get_result(\n        self, client: Resource, execution: Execution, poll_interval: int\n    ) -> CloudRunJobResult:\n        \"\"\"Wait for execution to complete and then return result.\"\"\"\n        try:\n            job_execution = self._watch_job_execution(\n                client=client,\n                job_execution=execution,\n                timeout=self.timeout,\n                poll_interval=poll_interval,\n            )\n        except Exception:\n            self.logger.exception(\n                \"Received an unexpected exception while monitoring Cloud Run Job \"\n                f\"{self.job_name!r}\"\n            )\n            raise\n\n        if job_execution.succeeded():\n            status_code = 0\n            self.logger.info(f\"Job Run {self.job_name} completed successfully\")\n        else:\n            status_code = 1\n            error_msg = job_execution.condition_after_completion()[\"message\"]\n            self.logger.error(\n                f\"Job Run {self.job_name} did not complete successfully. {error_msg}\"\n            )\n\n        self.logger.info(\n            f\"Job Run logs can be found on GCP at: {job_execution.log_uri}\"\n        )\n\n        if not self.keep_job:\n            self.logger.info(\n                f\"Deleting completed Cloud Run Job {self.job_name!r} from Google Cloud\"\n                \" Run...\"\n            )\n            try:\n                Job.delete(\n                    client=client,\n                    namespace=self.credentials.project,\n                    job_name=self.job_name,\n                )\n            except Exception:\n                self.logger.exception(\n                    \"Received an unexpected exception while attempting to delete Cloud\"\n                    f\" Run Job {self.job_name}\"\n                )\n\n        return CloudRunJobResult(identifier=self.job_name, status_code=status_code)\n\n    def _jobs_body(self) -> dict:\n        \"\"\"Create properly formatted body used for a Job CREATE request.\n        See: https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs\n        \"\"\"\n        jobs_metadata = {\"name\": self.job_name}\n\n        annotations = {\n            # See: https://cloud.google.com/run/docs/troubleshooting#launch-stage-validation  # noqa\n            \"run.googleapis.com/launch-stage\": \"BETA\",\n        }\n        # add vpc connector if specified\n        if self.vpc_connector_name:\n            annotations[\n                \"run.googleapis.com/vpc-access-connector\"\n            ] = self.vpc_connector_name\n\n        # env and command here\n        containers = [self._add_container_settings({\"image\": self.image})]\n\n        # apply this timeout to each task\n        timeout_seconds = str(self.timeout)\n\n        body = {\n            \"apiVersion\": \"run.googleapis.com/v1\",\n            \"kind\": \"Job\",\n            \"metadata\": jobs_metadata,\n            \"spec\": {  # JobSpec\n                \"template\": {  # ExecutionTemplateSpec\n                    \"metadata\": {\"annotations\": annotations},\n                    \"spec\": {  # ExecutionSpec\n                        \"template\": {  # TaskTemplateSpec\n                            \"spec\": {\n                                \"containers\": containers,\n                                \"timeoutSeconds\": timeout_seconds,\n                                \"maxRetries\": self.max_retries,\n                            }  # TaskSpec\n                        }\n                    },\n                }\n            },\n        }\n        return body\n\n    def preview(self) -> str:\n        \"\"\"Generate a preview of the job definition that will be sent to GCP.\"\"\"\n        body = self._jobs_body()\n        container_settings = body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n            \"containers\"\n        ][0][\"env\"]\n        body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\"containers\"][0][\"env\"] = [\n            container_setting\n            for container_setting in container_settings\n            if container_setting[\"name\"] != \"PREFECT_API_KEY\"\n        ]\n        return json.dumps(body, indent=2)\n\n    def _watch_job_execution(\n        self, client, job_execution: Execution, timeout: int, poll_interval: int = 5\n    ):\n        \"\"\"\n        Update job_execution status until it is no longer running or timeout is reached.\n        \"\"\"\n        t0 = time.time()\n        while job_execution.is_running():\n            job_execution = Execution.get(\n                client=client,\n                namespace=job_execution.namespace,\n                execution_name=job_execution.name,\n            )\n\n            elapsed_time = time.time() - t0\n            if timeout is not None and elapsed_time > timeout:\n                raise RuntimeError(\n                    f\"Timed out after {elapsed_time}s while waiting for Cloud Run Job \"\n                    \"execution to complete. Your job may still be running on GCP.\"\n                )\n\n            time.sleep(poll_interval)\n\n        return job_execution\n\n    def _wait_for_job_creation(\n        self, client: Resource, timeout: int, poll_interval: int = 5\n    ):\n        \"\"\"Give created job time to register.\"\"\"\n        job = Job.get(\n            client=client, namespace=self.credentials.project, job_name=self.job_name\n        )\n\n        t0 = time.time()\n        while not job.is_ready():\n            ready_condition = (\n                job.ready_condition\n                if job.ready_condition\n                else \"waiting for condition update\"\n            )\n            self.logger.info(\n                f\"Job is not yet ready... Current condition: {ready_condition}\"\n            )\n            job = Job.get(\n                client=client,\n                namespace=self.credentials.project,\n                job_name=self.job_name,\n            )\n\n            elapsed_time = time.time() - t0\n            if timeout is not None and elapsed_time > timeout:\n                raise RuntimeError(\n                    f\"Timed out after {elapsed_time}s while waiting for Cloud Run Job \"\n                    \"execution to complete. Your job may still be running on GCP.\"\n                )\n\n            time.sleep(poll_interval)\n\n    def _get_client(self) -> Resource:\n        \"\"\"Get the base client needed for interacting with GCP APIs.\"\"\"\n        # region needed for 'v1' API\n        api_endpoint = f\"https://{self.region}-run.googleapis.com\"\n        gcp_creds = self.credentials.get_credentials_from_service_account()\n        options = ClientOptions(api_endpoint=api_endpoint)\n\n        return discovery.build(\n            \"run\", \"v1\", client_options=options, credentials=gcp_creds\n        ).namespaces()\n\n    # CONTAINER SETTINGS\n    def _add_container_settings(self, base_settings: Dict[str, Any]) -> Dict[str, Any]:\n        \"\"\"\n        Add settings related to containers for Cloud Run Jobs to a dictionary.\n        Includes environment variables, entrypoint command, entrypoint arguments,\n        and cpu and memory limits.\n        See: https://cloud.google.com/run/docs/reference/rest/v1/Container\n        and https://cloud.google.com/run/docs/reference/rest/v1/Container#ResourceRequirements\n        \"\"\"  # noqa\n        container_settings = base_settings.copy()\n        container_settings.update(self._add_env())\n        container_settings.update(self._add_resources())\n        container_settings.update(self._add_command())\n        container_settings.update(self._add_args())\n        return container_settings\n\n    def _add_args(self) -> dict:\n        \"\"\"Set the arguments that will be passed to the entrypoint for a Cloud Run Job.\n        See: https://cloud.google.com/run/docs/reference/rest/v1/Container\n        \"\"\"  # noqa\n        return {\"args\": self.args} if self.args else {}\n\n    def _add_command(self) -> dict:\n        \"\"\"Set the command that a container will run for a Cloud Run Job.\n        See: https://cloud.google.com/run/docs/reference/rest/v1/Container\n        \"\"\"  # noqa\n        return {\"command\": self.command}\n\n    def _add_resources(self) -> dict:\n        \"\"\"Set specified resources limits for a Cloud Run Job.\n        See: https://cloud.google.com/run/docs/reference/rest/v1/Container#ResourceRequirements\n        See also: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/\n        \"\"\"  # noqa\n        resources = {\"limits\": {}, \"requests\": {}}\n\n        if self.cpu is not None:\n            cpu = self._cpu_as_k8s_quantity()\n            resources[\"limits\"][\"cpu\"] = cpu\n            resources[\"requests\"][\"cpu\"] = cpu\n        if self.memory_string is not None:\n            resources[\"limits\"][\"memory\"] = self.memory_string\n            resources[\"requests\"][\"memory\"] = self.memory_string\n\n        return {\"resources\": resources} if resources[\"requests\"] else {}\n\n    def _add_env(self) -> dict:\n        \"\"\"Add environment variables for a Cloud Run Job.\n\n        Method `self._base_environment()` gets necessary Prefect environment variables\n        from the config.\n\n        See: https://cloud.google.com/run/docs/reference/rest/v1/Container#envvar for\n        how environment variables are specified for Cloud Run Jobs.\n        \"\"\"  # noqa\n        env = {**self._base_environment(), **self.env}\n        cloud_run_env = [{\"name\": k, \"value\": v} for k, v in env.items()]\n        return {\"env\": cloud_run_env}\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob-attributes","title":"Attributes","text":""},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.job_name","title":"job_name property","text":"

Create a unique and valid job name.

"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.memory_string","title":"memory_string property","text":"

Returns the string expected for memory resources argument.

"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob-functions","title":"Functions","text":""},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.generate_work_pool_base_job_template","title":"generate_work_pool_base_job_template async","text":"

Generate a base job template for a cloud-run work pool with the same configuration as this block.

Returns:

Type Description dict
  • dict: a base job template for a cloud-run work pool
Source code in prefect_gcp/cloud_run.py
async def generate_work_pool_base_job_template(self) -> dict:\n    \"\"\"\n    Generate a base job template for a cloud-run work pool with the same\n    configuration as this block.\n\n    Returns:\n        - dict: a base job template for a cloud-run work pool\n    \"\"\"\n    base_job_template = await get_default_base_job_template_for_infrastructure_type(\n        self.get_corresponding_worker_type(),\n    )\n    assert (\n        base_job_template is not None\n    ), \"Failed to generate default base job template for Cloud Run worker.\"\n    for key, value in self.dict(exclude_unset=True, exclude_defaults=True).items():\n        if key == \"command\":\n            base_job_template[\"variables\"][\"properties\"][\"command\"][\n                \"default\"\n            ] = shlex.join(value)\n        elif key in [\n            \"type\",\n            \"block_type_slug\",\n            \"_block_document_id\",\n            \"_block_document_name\",\n            \"_is_anonymous\",\n            \"memory_unit\",\n        ]:\n            continue\n        elif key == \"credentials\":\n            if not self.credentials._block_document_id:\n                raise BlockNotSavedError(\n                    \"It looks like you are trying to use a block that\"\n                    \" has not been saved. Please call `.save` on your block\"\n                    \" before publishing it as a work pool.\"\n                )\n            base_job_template[\"variables\"][\"properties\"][\"credentials\"][\n                \"default\"\n            ] = {\n                \"$ref\": {\n                    \"block_document_id\": str(self.credentials._block_document_id)\n                }\n            }\n        elif key == \"memory\" and self.memory_string:\n            base_job_template[\"variables\"][\"properties\"][\"memory\"][\n                \"default\"\n            ] = self.memory_string\n        elif key == \"cpu\" and self.cpu is not None:\n            base_job_template[\"variables\"][\"properties\"][\"cpu\"][\n                \"default\"\n            ] = f\"{self.cpu * 1000}m\"\n        elif key == \"args\":\n            # Not a default variable, but we can add it to the template\n            base_job_template[\"variables\"][\"properties\"][\"args\"] = {\n                \"title\": \"Arguments\",\n                \"type\": \"string\",\n                \"description\": \"Arguments to be passed to your Cloud Run Job's entrypoint command.\",  # noqa\n                \"default\": value,\n            }\n            base_job_template[\"job_configuration\"][\"job_body\"][\"spec\"][\"template\"][\n                \"spec\"\n            ][\"template\"][\"spec\"][\"containers\"][0][\"args\"] = \"{{ args }}\"\n        elif key in base_job_template[\"variables\"][\"properties\"]:\n            base_job_template[\"variables\"][\"properties\"][key][\"default\"] = value\n        else:\n            self.logger.warning(\n                f\"Variable {key!r} is not supported by Cloud Run work pools.\"\n                \" Skipping.\"\n            )\n\n    return base_job_template\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.get_corresponding_worker_type","title":"get_corresponding_worker_type","text":"

Return the corresponding worker type for this infrastructure block.

Source code in prefect_gcp/cloud_run.py
def get_corresponding_worker_type(self) -> str:\n    \"\"\"Return the corresponding worker type for this infrastructure block.\"\"\"\n    return \"cloud-run\"\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.kill","title":"kill async","text":"

Kill a task running Cloud Run.

Parameters:

Name Type Description Default identifier str

The Cloud Run Job name. This should match a value yielded by CloudRunJob.run.

required Source code in prefect_gcp/cloud_run.py
@sync_compatible\nasync def kill(self, identifier: str, grace_seconds: int = 30) -> None:\n    \"\"\"\n    Kill a task running Cloud Run.\n\n    Args:\n        identifier: The Cloud Run Job name. This should match a\n            value yielded by CloudRunJob.run.\n    \"\"\"\n    if grace_seconds != 30:\n        self.logger.warning(\n            f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n            \"support dynamic grace period configuration. See here for more info: \"\n            \"https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs/delete\"  # noqa\n        )\n\n    with self._get_client() as client:\n        await run_sync_in_worker_thread(\n            self._kill_job,\n            client=client,\n            namespace=self.credentials.project,\n            job_name=identifier,\n        )\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.preview","title":"preview","text":"

Generate a preview of the job definition that will be sent to GCP.

Source code in prefect_gcp/cloud_run.py
def preview(self) -> str:\n    \"\"\"Generate a preview of the job definition that will be sent to GCP.\"\"\"\n    body = self._jobs_body()\n    container_settings = body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n        \"containers\"\n    ][0][\"env\"]\n    body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\"containers\"][0][\"env\"] = [\n        container_setting\n        for container_setting in container_settings\n        if container_setting[\"name\"] != \"PREFECT_API_KEY\"\n    ]\n    return json.dumps(body, indent=2)\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJob.run","title":"run async","text":"

Run the configured job on a Google Cloud Run Job.

Source code in prefect_gcp/cloud_run.py
@sync_compatible\nasync def run(self, task_status: Optional[TaskStatus] = None):\n    \"\"\"Run the configured job on a Google Cloud Run Job.\"\"\"\n    with self._get_client() as client:\n        await run_sync_in_worker_thread(\n            self._create_job_and_wait_for_registration, client\n        )\n        job_execution = await run_sync_in_worker_thread(\n            self._begin_job_execution, client\n        )\n\n        if task_status:\n            task_status.started(self.job_name)\n\n        result = await run_sync_in_worker_thread(\n            self._watch_job_execution_and_get_result,\n            client,\n            job_execution,\n            5,\n        )\n        return result\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.CloudRunJobResult","title":"CloudRunJobResult","text":"

Bases: InfrastructureResult

Result from a Cloud Run Job.

Source code in prefect_gcp/cloud_run.py
class CloudRunJobResult(InfrastructureResult):\n    \"\"\"Result from a Cloud Run Job.\"\"\"\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Execution","title":"Execution","text":"

Bases: BaseModel

Utility class to call GCP executions API and interact with the returned objects.

Source code in prefect_gcp/cloud_run.py
class Execution(BaseModel):\n    \"\"\"\n    Utility class to call GCP `executions` API and\n    interact with the returned objects.\n    \"\"\"\n\n    name: str\n    namespace: str\n    metadata: dict\n    spec: dict\n    status: dict\n    log_uri: str\n\n    def is_running(self) -> bool:\n        \"\"\"Returns True if Execution is not completed.\"\"\"\n        return self.status.get(\"completionTime\") is None\n\n    def condition_after_completion(self):\n        \"\"\"Returns Execution condition if Execution has completed.\"\"\"\n        for condition in self.status[\"conditions\"]:\n            if condition[\"type\"] == \"Completed\":\n                return condition\n\n    def succeeded(self):\n        \"\"\"Whether or not the Execution completed is a successful state.\"\"\"\n        completed_condition = self.condition_after_completion()\n        if completed_condition and completed_condition[\"status\"] == \"True\":\n            return True\n\n        return False\n\n    @classmethod\n    def get(cls, client: Resource, namespace: str, execution_name: str):\n        \"\"\"\n        Make a get request to the GCP executions API\n        and return an Execution instance.\n        \"\"\"\n        request = client.executions().get(\n            name=f\"namespaces/{namespace}/executions/{execution_name}\"\n        )\n        response = request.execute()\n\n        return cls(\n            name=response[\"metadata\"][\"name\"],\n            namespace=response[\"metadata\"][\"namespace\"],\n            metadata=response[\"metadata\"],\n            spec=response[\"spec\"],\n            status=response[\"status\"],\n            log_uri=response[\"status\"][\"logUri\"],\n        )\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Execution-functions","title":"Functions","text":""},{"location":"cloud_run/#prefect_gcp.cloud_run.Execution.condition_after_completion","title":"condition_after_completion","text":"

Returns Execution condition if Execution has completed.

Source code in prefect_gcp/cloud_run.py
def condition_after_completion(self):\n    \"\"\"Returns Execution condition if Execution has completed.\"\"\"\n    for condition in self.status[\"conditions\"]:\n        if condition[\"type\"] == \"Completed\":\n            return condition\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Execution.get","title":"get classmethod","text":"

Make a get request to the GCP executions API and return an Execution instance.

Source code in prefect_gcp/cloud_run.py
@classmethod\ndef get(cls, client: Resource, namespace: str, execution_name: str):\n    \"\"\"\n    Make a get request to the GCP executions API\n    and return an Execution instance.\n    \"\"\"\n    request = client.executions().get(\n        name=f\"namespaces/{namespace}/executions/{execution_name}\"\n    )\n    response = request.execute()\n\n    return cls(\n        name=response[\"metadata\"][\"name\"],\n        namespace=response[\"metadata\"][\"namespace\"],\n        metadata=response[\"metadata\"],\n        spec=response[\"spec\"],\n        status=response[\"status\"],\n        log_uri=response[\"status\"][\"logUri\"],\n    )\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Execution.is_running","title":"is_running","text":"

Returns True if Execution is not completed.

Source code in prefect_gcp/cloud_run.py
def is_running(self) -> bool:\n    \"\"\"Returns True if Execution is not completed.\"\"\"\n    return self.status.get(\"completionTime\") is None\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Execution.succeeded","title":"succeeded","text":"

Whether or not the Execution completed is a successful state.

Source code in prefect_gcp/cloud_run.py
def succeeded(self):\n    \"\"\"Whether or not the Execution completed is a successful state.\"\"\"\n    completed_condition = self.condition_after_completion()\n    if completed_condition and completed_condition[\"status\"] == \"True\":\n        return True\n\n    return False\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job","title":"Job","text":"

Bases: BaseModel

Utility class to call GCP jobs API and interact with the returned objects.

Source code in prefect_gcp/cloud_run.py
class Job(BaseModel):\n    \"\"\"\n    Utility class to call GCP `jobs` API and\n    interact with the returned objects.\n    \"\"\"\n\n    metadata: dict\n    spec: dict\n    status: dict\n    name: str\n    ready_condition: dict\n    execution_status: dict\n\n    def _is_missing_container(self):\n        \"\"\"\n        Check if Job status is not ready because\n        the specified container cannot be found.\n        \"\"\"\n        if (\n            self.ready_condition.get(\"status\") == \"False\"\n            and self.ready_condition.get(\"reason\") == \"ContainerMissing\"\n        ):\n            return True\n        return False\n\n    def is_ready(self) -> bool:\n        \"\"\"Whether a job is finished registering and ready to be executed\"\"\"\n        if self._is_missing_container():\n            raise Exception(f\"{self.ready_condition['message']}\")\n        return self.ready_condition.get(\"status\") == \"True\"\n\n    def has_execution_in_progress(self) -> bool:\n        \"\"\"See if job has a run in progress.\"\"\"\n        return (\n            self.execution_status == {}\n            or self.execution_status.get(\"completionTimestamp\") is None\n        )\n\n    @staticmethod\n    def _get_ready_condition(job: dict) -> dict:\n        \"\"\"Utility to access JSON field containing ready condition.\"\"\"\n        if job[\"status\"].get(\"conditions\"):\n            for condition in job[\"status\"][\"conditions\"]:\n                if condition[\"type\"] == \"Ready\":\n                    return condition\n\n        return {}\n\n    @staticmethod\n    def _get_execution_status(job: dict):\n        \"\"\"Utility to access JSON field containing execution status.\"\"\"\n        if job[\"status\"].get(\"latestCreatedExecution\"):\n            return job[\"status\"][\"latestCreatedExecution\"]\n\n        return {}\n\n    @classmethod\n    def get(cls, client: Resource, namespace: str, job_name: str):\n        \"\"\"Make a get request to the GCP jobs API and return a Job instance.\"\"\"\n        request = client.jobs().get(name=f\"namespaces/{namespace}/jobs/{job_name}\")\n        response = request.execute()\n\n        return cls(\n            metadata=response[\"metadata\"],\n            spec=response[\"spec\"],\n            status=response[\"status\"],\n            name=response[\"metadata\"][\"name\"],\n            ready_condition=cls._get_ready_condition(response),\n            execution_status=cls._get_execution_status(response),\n        )\n\n    @staticmethod\n    def create(client: Resource, namespace: str, body: dict):\n        \"\"\"Make a create request to the GCP jobs API.\"\"\"\n        request = client.jobs().create(parent=f\"namespaces/{namespace}\", body=body)\n        response = request.execute()\n        return response\n\n    @staticmethod\n    def delete(client: Resource, namespace: str, job_name: str):\n        \"\"\"Make a delete request to the GCP jobs API.\"\"\"\n        request = client.jobs().delete(name=f\"namespaces/{namespace}/jobs/{job_name}\")\n        response = request.execute()\n        return response\n\n    @staticmethod\n    def run(client: Resource, namespace: str, job_name: str):\n        \"\"\"Make a run request to the GCP jobs API.\"\"\"\n        request = client.jobs().run(name=f\"namespaces/{namespace}/jobs/{job_name}\")\n        response = request.execute()\n        return response\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job-functions","title":"Functions","text":""},{"location":"cloud_run/#prefect_gcp.cloud_run.Job.create","title":"create staticmethod","text":"

Make a create request to the GCP jobs API.

Source code in prefect_gcp/cloud_run.py
@staticmethod\ndef create(client: Resource, namespace: str, body: dict):\n    \"\"\"Make a create request to the GCP jobs API.\"\"\"\n    request = client.jobs().create(parent=f\"namespaces/{namespace}\", body=body)\n    response = request.execute()\n    return response\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job.delete","title":"delete staticmethod","text":"

Make a delete request to the GCP jobs API.

Source code in prefect_gcp/cloud_run.py
@staticmethod\ndef delete(client: Resource, namespace: str, job_name: str):\n    \"\"\"Make a delete request to the GCP jobs API.\"\"\"\n    request = client.jobs().delete(name=f\"namespaces/{namespace}/jobs/{job_name}\")\n    response = request.execute()\n    return response\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job.get","title":"get classmethod","text":"

Make a get request to the GCP jobs API and return a Job instance.

Source code in prefect_gcp/cloud_run.py
@classmethod\ndef get(cls, client: Resource, namespace: str, job_name: str):\n    \"\"\"Make a get request to the GCP jobs API and return a Job instance.\"\"\"\n    request = client.jobs().get(name=f\"namespaces/{namespace}/jobs/{job_name}\")\n    response = request.execute()\n\n    return cls(\n        metadata=response[\"metadata\"],\n        spec=response[\"spec\"],\n        status=response[\"status\"],\n        name=response[\"metadata\"][\"name\"],\n        ready_condition=cls._get_ready_condition(response),\n        execution_status=cls._get_execution_status(response),\n    )\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job.has_execution_in_progress","title":"has_execution_in_progress","text":"

See if job has a run in progress.

Source code in prefect_gcp/cloud_run.py
def has_execution_in_progress(self) -> bool:\n    \"\"\"See if job has a run in progress.\"\"\"\n    return (\n        self.execution_status == {}\n        or self.execution_status.get(\"completionTimestamp\") is None\n    )\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job.is_ready","title":"is_ready","text":"

Whether a job is finished registering and ready to be executed

Source code in prefect_gcp/cloud_run.py
def is_ready(self) -> bool:\n    \"\"\"Whether a job is finished registering and ready to be executed\"\"\"\n    if self._is_missing_container():\n        raise Exception(f\"{self.ready_condition['message']}\")\n    return self.ready_condition.get(\"status\") == \"True\"\n
"},{"location":"cloud_run/#prefect_gcp.cloud_run.Job.run","title":"run staticmethod","text":"

Make a run request to the GCP jobs API.

Source code in prefect_gcp/cloud_run.py
@staticmethod\ndef run(client: Resource, namespace: str, job_name: str):\n    \"\"\"Make a run request to the GCP jobs API.\"\"\"\n    request = client.jobs().run(name=f\"namespaces/{namespace}/jobs/{job_name}\")\n    response = request.execute()\n    return response\n
"},{"location":"cloud_run_worker/","title":"Cloud Run","text":""},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run","title":"prefect_gcp.workers.cloud_run","text":"

Module containing the Cloud Run worker used for executing flow runs as Cloud Run jobs.

Get started by creating a Cloud Run work pool:

prefect work-pool create 'my-cloud-run-pool' --type cloud-run\n

Then start a Cloud Run worker with the following command:

prefect worker start --pool 'my-cloud-run-pool'\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run--configuration","title":"Configuration","text":"

Read more about configuring work pools here.

"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run--advanced-configuration","title":"Advanced Configuration","text":"

Using a custom Cloud Run job template

Below is the default job body template used by the Cloud Run Worker:

{\n    \"apiVersion\": \"run.googleapis.com/v1\",\n    \"kind\": \"Job\",\n    \"metadata\":\n        {\n            \"name\": \"{{ name }}\",\n            \"annotations\":\n            {\n                \"run.googleapis.com/launch-stage\": \"BETA\",\n            }\n        },\n        \"spec\":\n        {\n            \"template\":\n            {\n                \"spec\":\n                {\n                    \"template\":\n                    {\n                        \"spec\":\n                        {\n                            \"containers\":\n                            [\n                                {\n                                    \"image\": \"{{ image }}\",\n                                    \"args\": \"{{ args }}\",\n                                    \"resources\":\n                                    {\n                                        \"limits\":\n                                        {\n                                            \"cpu\": \"{{ cpu }}\",\n                                            \"memory\": \"{{ memory }}\"\n                                        },\n                                        \"requests\":\n                                        {\n                                            \"cpu\": \"{{ cpu }}\",\n                                            \"memory\": \"{{ memory }}\"\n                                        }\n                                    }\n                                }\n                            ],\n                            \"timeoutSeconds\": \"{{ timeout }}\",\n                            \"serviceAccountName\": \"{{ service_account_name }}\"\n                        }\n                    }\n                }\n                }\n            },\n            \"metadata\":\n            {\n                \"annotations\":\n                {\n                    \"run.googleapis.com/vpc-access-connector\": \"{{ vpc_connector_name }}\"\n                }\n            }\n        },\n    },\n    \"timeout\": \"{{ timeout }}\",\n    \"keep_job\": \"{{ keep_job }}\"\n}\n
Each values enclosed in {{ }} is a placeholder that will be replaced with a value at runtime on a per-deployment basis. The values that can be used a placeholders are defined by the variables schema defined in the base job template.

The default job body template and available variables can be customized on a work pool by work pool basis. By editing the default job body template you can:

  • Add additional placeholders to the default job template
  • Remove placeholders from the default job template
  • Pass values to Cloud Run that are not defined in the variables schema
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run--adding-additional-placeholders","title":"Adding additional placeholders","text":"

For example, to allow for extra customization of a new annotation not described in the default job template, you can add the following:

{\n    \"apiVersion\": \"run.googleapis.com/v1\",\n    \"kind\": \"Job\",\n    \"metadata\":\n    {\n        \"name\": \"{{ name }}\",\n        \"annotations\":\n        {\n            \"run.googleapis.com/my-custom-annotation\": \"{{ my_custom_annotation }}\",\n            \"run.googleapis.com/launch-stage\": \"BETA\",\n        },\n      ...\n    },\n  ...\n}\n
my_custom_annotation can now be used as a placeholder in the job template and set on a per-deployment basis.

# deployment.yaml\n...\ninfra_overrides: {\"my_custom_annotation\": \"my-custom-value\"}\n

Additionally, fields can be set to prevent configuration at the deployment level. For example to configure the vpc_connector_name field, the placeholder can be removed and replaced with an actual value. Now all deployments that point to this work pool will use the same vpc_connector_name value.

{\n    \"apiVersion\": \"run.googleapis.com/v1\",\n    \"kind\": \"Job\",\n    \"spec\":\n    {\n        \"template\":\n        {\n            \"metadata\":\n            {\n                \"annotations\":\n                {\n                    \"run.googleapis.com/vpc-access-connector\": \"my-vpc-connector\"\n                }\n            },\n            ...\n        },\n        ...\n    }\n}\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run-classes","title":"Classes","text":""},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorker","title":"CloudRunWorker","text":"

Bases: BaseWorker

Prefect worker that executes flow runs within Cloud Run Jobs.

Source code in prefect_gcp/workers/cloud_run.py
class CloudRunWorker(BaseWorker):\n    \"\"\"Prefect worker that executes flow runs within Cloud Run Jobs.\"\"\"\n\n    type = \"cloud-run\"\n    job_configuration = CloudRunWorkerJobConfiguration\n    job_configuration_variables = CloudRunWorkerVariables\n    _description = (\n        \"Execute flow runs within containers on Google Cloud Run. Requires \"\n        \"a Google Cloud Platform account.\"\n    )\n    _display_name = \"Google Cloud Run\"\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/cloud_run_worker/\"\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n\n    def _create_job_error(self, exc, configuration):\n        \"\"\"Provides a nicer error for 404s when trying to create a Cloud Run Job.\"\"\"\n        # TODO consider lookup table instead of the if/else,\n        # also check for documented errors\n        if exc.status_code == 404:\n            raise RuntimeError(\n                f\"Failed to find resources at {exc.uri}. Confirm that region\"\n                f\" '{self.region}' is the correct region for your Cloud Run Job and\"\n                f\" that {configuration.project} is the correct GCP project. If\"\n                f\" your project ID is not correct, you are using a Credentials block\"\n                f\" with permissions for the wrong project.\"\n            ) from exc\n        raise exc\n\n    def _job_run_submission_error(self, exc, configuration):\n        \"\"\"Provides a nicer error for 404s when submitting job runs.\"\"\"\n        if exc.status_code == 404:\n            pat1 = r\"The requested URL [^ ]+ was not found on this server\"\n            # pat2 = (\n            #     r\"Resource '[^ ]+' of kind 'JOB' in region '[\\w\\-0-9]+' \"\n            #     r\"in project '[\\w\\-0-9]+' does not exist\"\n            # )\n            if re.findall(pat1, str(exc)):\n                raise RuntimeError(\n                    f\"Failed to find resources at {exc.uri}. \"\n                    f\"Confirm that region '{self.region}' is \"\n                    f\"the correct region for your Cloud Run Job \"\n                    f\"and that '{configuration.project}' is the \"\n                    f\"correct GCP project. If your project ID is not \"\n                    f\"correct, you are using a Credentials \"\n                    f\"block with permissions for the wrong project.\"\n                ) from exc\n            else:\n                raise exc\n\n        raise exc\n\n    async def run(\n        self,\n        flow_run: \"FlowRun\",\n        configuration: CloudRunWorkerJobConfiguration,\n        task_status: Optional[anyio.abc.TaskStatus] = None,\n    ) -> CloudRunWorkerResult:\n        \"\"\"\n        Executes a flow run within a Cloud Run Job and waits for the flow run\n        to complete.\n\n        Args:\n            flow_run: The flow run to execute\n            configuration: The configuration to use when executing the flow run.\n            task_status: The task status object for the current flow run. If provided,\n                the task will be marked as started.\n\n        Returns:\n            CloudRunWorkerResult: A result object containing information about the\n                final state of the flow run\n        \"\"\"\n\n        logger = self.get_flow_run_logger(flow_run)\n\n        with self._get_client(configuration) as client:\n            await run_sync_in_worker_thread(\n                self._create_job_and_wait_for_registration,\n                configuration,\n                client,\n                logger,\n            )\n            job_execution = await run_sync_in_worker_thread(\n                self._begin_job_execution, configuration, client, logger\n            )\n\n            if task_status:\n                task_status.started(configuration.job_name)\n\n            result = await run_sync_in_worker_thread(\n                self._watch_job_execution_and_get_result,\n                configuration,\n                client,\n                job_execution,\n                logger,\n            )\n            return result\n\n    def _get_client(self, configuration: CloudRunWorkerJobConfiguration) -> Resource:\n        \"\"\"Get the base client needed for interacting with GCP APIs.\"\"\"\n        # region needed for 'v1' API\n        api_endpoint = f\"https://{configuration.region}-run.googleapis.com\"\n        gcp_creds = configuration.credentials.get_credentials_from_service_account()\n        options = ClientOptions(api_endpoint=api_endpoint)\n\n        return discovery.build(\n            \"run\", \"v1\", client_options=options, credentials=gcp_creds\n        ).namespaces()\n\n    def _create_job_and_wait_for_registration(\n        self,\n        configuration: CloudRunWorkerJobConfiguration,\n        client: Resource,\n        logger: PrefectLogAdapter,\n    ) -> None:\n        \"\"\"Create a new job wait for it to finish registering.\"\"\"\n        try:\n            logger.info(f\"Creating Cloud Run Job {configuration.job_name}\")\n\n            Job.create(\n                client=client,\n                namespace=configuration.credentials.project,\n                body=configuration.job_body,\n            )\n        except googleapiclient.errors.HttpError as exc:\n            self._create_job_error(exc, configuration)\n\n        try:\n            self._wait_for_job_creation(\n                client=client, configuration=configuration, logger=logger\n            )\n        except Exception:\n            logger.exception(\n                \"Encountered an exception while waiting for job run creation\"\n            )\n            if not configuration.keep_job:\n                logger.info(\n                    f\"Deleting Cloud Run Job {configuration.job_name} from \"\n                    \"Google Cloud Run.\"\n                )\n                try:\n                    Job.delete(\n                        client=client,\n                        namespace=configuration.credentials.project,\n                        job_name=configuration.job_name,\n                    )\n                except Exception:\n                    logger.exception(\n                        \"Received an unexpected exception while attempting to delete\"\n                        f\" Cloud Run Job {configuration.job_name!r}\"\n                    )\n            raise\n\n    def _begin_job_execution(\n        self,\n        configuration: CloudRunWorkerJobConfiguration,\n        client: Resource,\n        logger: PrefectLogAdapter,\n    ) -> Execution:\n        \"\"\"Submit a job run for execution and return the execution object.\"\"\"\n        try:\n            logger.info(\n                f\"Submitting Cloud Run Job {configuration.job_name!r} for execution.\"\n            )\n            submission = Job.run(\n                client=client,\n                namespace=configuration.project,\n                job_name=configuration.job_name,\n            )\n\n            job_execution = Execution.get(\n                client=client,\n                namespace=submission[\"metadata\"][\"namespace\"],\n                execution_name=submission[\"metadata\"][\"name\"],\n            )\n        except Exception as exc:\n            self._job_run_submission_error(exc, configuration)\n\n        return job_execution\n\n    def _watch_job_execution_and_get_result(\n        self,\n        configuration: CloudRunWorkerJobConfiguration,\n        client: Resource,\n        execution: Execution,\n        logger: PrefectLogAdapter,\n        poll_interval: int = 5,\n    ) -> CloudRunWorkerResult:\n        \"\"\"Wait for execution to complete and then return result.\"\"\"\n        try:\n            job_execution = self._watch_job_execution(\n                client=client,\n                job_execution=execution,\n                timeout=configuration.timeout,\n                poll_interval=poll_interval,\n            )\n        except Exception:\n            logger.exception(\n                \"Received an unexpected exception while monitoring Cloud Run Job \"\n                f\"{configuration.job_name!r}\"\n            )\n            raise\n\n        if job_execution.succeeded():\n            status_code = 0\n            logger.info(f\"Job Run {configuration.job_name} completed successfully\")\n        else:\n            status_code = 1\n            error_msg = job_execution.condition_after_completion()[\"message\"]\n            logger.error(\n                \"Job Run {configuration.job_name} did not complete successfully. \"\n                f\"{error_msg}\"\n            )\n\n        logger.info(f\"Job Run logs can be found on GCP at: {job_execution.log_uri}\")\n\n        if not configuration.keep_job:\n            logger.info(\n                f\"Deleting completed Cloud Run Job {configuration.job_name!r} \"\n                \"from Google Cloud Run...\"\n            )\n            try:\n                Job.delete(\n                    client=client,\n                    namespace=configuration.project,\n                    job_name=configuration.job_name,\n                )\n            except Exception:\n                logger.exception(\n                    \"Received an unexpected exception while attempting to delete Cloud\"\n                    f\" Run Job {configuration.job_name}\"\n                )\n\n        return CloudRunWorkerResult(\n            identifier=configuration.job_name, status_code=status_code\n        )\n\n    def _watch_job_execution(\n        self, client, job_execution: Execution, timeout: int, poll_interval: int = 5\n    ):\n        \"\"\"\n        Update job_execution status until it is no longer running or timeout is reached.\n        \"\"\"\n        t0 = time.time()\n        while job_execution.is_running():\n            job_execution = Execution.get(\n                client=client,\n                namespace=job_execution.namespace,\n                execution_name=job_execution.name,\n            )\n\n            elapsed_time = time.time() - t0\n            if timeout is not None and elapsed_time > timeout:\n                raise RuntimeError(\n                    f\"Timed out after {elapsed_time}s while waiting for Cloud Run Job \"\n                    \"execution to complete. Your job may still be running on GCP.\"\n                )\n\n            time.sleep(poll_interval)\n\n        return job_execution\n\n    def _wait_for_job_creation(\n        self,\n        client: Resource,\n        configuration: CloudRunWorkerJobConfiguration,\n        logger: PrefectLogAdapter,\n        poll_interval: int = 5,\n    ):\n        \"\"\"Give created job time to register.\"\"\"\n        job = Job.get(\n            client=client,\n            namespace=configuration.project,\n            job_name=configuration.job_name,\n        )\n\n        t0 = time.time()\n        while not job.is_ready():\n            ready_condition = (\n                job.ready_condition\n                if job.ready_condition\n                else \"waiting for condition update\"\n            )\n            logger.info(f\"Job is not yet ready... Current condition: {ready_condition}\")\n            job = Job.get(\n                client=client,\n                namespace=configuration.project,\n                job_name=configuration.job_name,\n            )\n\n            elapsed_time = time.time() - t0\n            if (\n                configuration.timeout is not None\n                and elapsed_time > configuration.timeout\n            ):\n                raise RuntimeError(\n                    f\"Timed out after {elapsed_time}s while waiting for Cloud Run Job \"\n                    \"execution to complete. Your job may still be running on GCP.\"\n                )\n\n            time.sleep(poll_interval)\n\n    async def kill_infrastructure(\n        self,\n        infrastructure_pid: str,\n        configuration: CloudRunWorkerJobConfiguration,\n        grace_seconds: int = 30,\n    ):\n        \"\"\"\n        Stops a job for a cancelled flow run based on the provided infrastructure PID\n        and run configuration.\n        \"\"\"\n        if grace_seconds != 30:\n            self._logger.warning(\n                f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n                \"support dynamic grace period configuration. See here for more info: \"\n                \"https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs/delete\"  # noqa\n            )\n\n        with self._get_client(configuration) as client:\n            await run_sync_in_worker_thread(\n                self._stop_job,\n                client=client,\n                namespace=configuration.project,\n                job_name=infrastructure_pid,\n            )\n\n    def _stop_job(self, client: Resource, namespace: str, job_name: str):\n        try:\n            Job.delete(client=client, namespace=namespace, job_name=job_name)\n        except Exception as exc:\n            if \"does not exist\" in str(exc):\n                raise InfrastructureNotFound(\n                    f\"Cannot stop Cloud Run Job; the job name {job_name!r} \"\n                    \"could not be found.\"\n                ) from exc\n            raise\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorker-functions","title":"Functions","text":""},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorker.kill_infrastructure","title":"kill_infrastructure async","text":"

Stops a job for a cancelled flow run based on the provided infrastructure PID and run configuration.

Source code in prefect_gcp/workers/cloud_run.py
async def kill_infrastructure(\n    self,\n    infrastructure_pid: str,\n    configuration: CloudRunWorkerJobConfiguration,\n    grace_seconds: int = 30,\n):\n    \"\"\"\n    Stops a job for a cancelled flow run based on the provided infrastructure PID\n    and run configuration.\n    \"\"\"\n    if grace_seconds != 30:\n        self._logger.warning(\n            f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n            \"support dynamic grace period configuration. See here for more info: \"\n            \"https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs/delete\"  # noqa\n        )\n\n    with self._get_client(configuration) as client:\n        await run_sync_in_worker_thread(\n            self._stop_job,\n            client=client,\n            namespace=configuration.project,\n            job_name=infrastructure_pid,\n        )\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorker.run","title":"run async","text":"

Executes a flow run within a Cloud Run Job and waits for the flow run to complete.

Parameters:

Name Type Description Default flow_run FlowRun

The flow run to execute

required configuration CloudRunWorkerJobConfiguration

The configuration to use when executing the flow run.

required task_status Optional[TaskStatus]

The task status object for the current flow run. If provided, the task will be marked as started.

None

Returns:

Name Type Description CloudRunWorkerResult CloudRunWorkerResult

A result object containing information about the final state of the flow run

Source code in prefect_gcp/workers/cloud_run.py
async def run(\n    self,\n    flow_run: \"FlowRun\",\n    configuration: CloudRunWorkerJobConfiguration,\n    task_status: Optional[anyio.abc.TaskStatus] = None,\n) -> CloudRunWorkerResult:\n    \"\"\"\n    Executes a flow run within a Cloud Run Job and waits for the flow run\n    to complete.\n\n    Args:\n        flow_run: The flow run to execute\n        configuration: The configuration to use when executing the flow run.\n        task_status: The task status object for the current flow run. If provided,\n            the task will be marked as started.\n\n    Returns:\n        CloudRunWorkerResult: A result object containing information about the\n            final state of the flow run\n    \"\"\"\n\n    logger = self.get_flow_run_logger(flow_run)\n\n    with self._get_client(configuration) as client:\n        await run_sync_in_worker_thread(\n            self._create_job_and_wait_for_registration,\n            configuration,\n            client,\n            logger,\n        )\n        job_execution = await run_sync_in_worker_thread(\n            self._begin_job_execution, configuration, client, logger\n        )\n\n        if task_status:\n            task_status.started(configuration.job_name)\n\n        result = await run_sync_in_worker_thread(\n            self._watch_job_execution_and_get_result,\n            configuration,\n            client,\n            job_execution,\n            logger,\n        )\n        return result\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerJobConfiguration","title":"CloudRunWorkerJobConfiguration","text":"

Bases: BaseJobConfiguration

Configuration class used by the Cloud Run Worker to create a Cloud Run Job.

An instance of this class is passed to the Cloud Run worker's run method for each flow run. It contains all information necessary to execute the flow run as a Cloud Run Job.

Attributes:

Name Type Description region str

The region where the Cloud Run Job resides.

credentials Optional[GcpCredentials]

The GCP Credentials used to connect to Cloud Run.

job_body Dict[str, Any]

The job body used to create the Cloud Run Job.

timeout Optional[int]

The length of time that Prefect will wait for a Cloud Run Job.

keep_job Optional[bool]

Whether to delete the Cloud Run Job after it completes.

Source code in prefect_gcp/workers/cloud_run.py
class CloudRunWorkerJobConfiguration(BaseJobConfiguration):\n    \"\"\"\n    Configuration class used by the Cloud Run Worker to create a Cloud Run Job.\n\n    An instance of this class is passed to the Cloud Run worker's `run` method\n    for each flow run. It contains all information necessary to execute\n    the flow run as a Cloud Run Job.\n\n    Attributes:\n        region: The region where the Cloud Run Job resides.\n        credentials: The GCP Credentials used to connect to Cloud Run.\n        job_body: The job body used to create the Cloud Run Job.\n        timeout: The length of time that Prefect will wait for a Cloud Run Job.\n        keep_job: Whether to delete the Cloud Run Job after it completes.\n    \"\"\"\n\n    region: str = Field(\n        default=\"us-central1\", description=\"The region where the Cloud Run Job resides.\"\n    )\n    credentials: Optional[GcpCredentials] = Field(\n        title=\"GCP Credentials\",\n        default_factory=GcpCredentials,\n        description=\"The GCP Credentials used to connect to Cloud Run. \"\n        \"If not provided credentials will be inferred from \"\n        \"the local environment.\",\n    )\n    job_body: Dict[str, Any] = Field(template=_get_default_job_body_template())\n    timeout: Optional[int] = Field(\n        default=600,\n        gt=0,\n        le=3600,\n        title=\"Job Timeout\",\n        description=(\n            \"The length of time that Prefect will wait for a Cloud Run Job to complete \"\n            \"before raising an exception.\"\n        ),\n    )\n    keep_job: Optional[bool] = Field(\n        default=False,\n        title=\"Keep Job After Completion\",\n        description=\"Keep the completed Cloud Run Job on Google Cloud Platform.\",\n    )\n\n    @property\n    def project(self) -> str:\n        \"\"\"property for accessing the project from the credentials.\"\"\"\n        return self.credentials.project\n\n    @property\n    def job_name(self) -> str:\n        \"\"\"property for accessing the name from the job metadata.\"\"\"\n        return self.job_body[\"metadata\"][\"name\"]\n\n    def prepare_for_flow_run(\n        self,\n        flow_run: \"FlowRun\",\n        deployment: Optional[\"DeploymentResponse\"] = None,\n        flow: Optional[\"Flow\"] = None,\n    ):\n        \"\"\"\n        Prepares the job configuration for a flow run.\n\n        Ensures that necessary values are present in the job body and that the\n        job body is valid.\n\n        Args:\n            flow_run: The flow run to prepare the job configuration for\n            deployment: The deployment associated with the flow run used for\n                preparation.\n            flow: The flow associated with the flow run used for preparation.\n        \"\"\"\n        super().prepare_for_flow_run(flow_run, deployment, flow)\n\n        self._populate_envs()\n        self._populate_or_format_command()\n        self._format_args_if_present()\n        self._populate_image_if_not_present()\n        self._populate_name_if_not_present()\n\n    def _populate_envs(self):\n        \"\"\"Populate environment variables. BaseWorker.prepare_for_flow_run handles\n        putting the environment variables in the `env` attribute. This method\n        moves them into the jobs body\"\"\"\n        envs = [{\"name\": k, \"value\": v} for k, v in self.env.items()]\n        self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\"containers\"][0][\n            \"env\"\n        ] = envs\n\n    def _populate_name_if_not_present(self):\n        \"\"\"Adds the flow run name to the job if one is not already provided.\"\"\"\n        try:\n            if \"name\" not in self.job_body[\"metadata\"]:\n                base_job_name = slugify_name(self.name)\n                job_name = f\"{base_job_name}-{uuid4().hex}\"\n                self.job_body[\"metadata\"][\"name\"] = job_name\n        except KeyError:\n            raise ValueError(\"Unable to verify name due to invalid job body template.\")\n\n    def _populate_image_if_not_present(self):\n        \"\"\"Adds the latest prefect image to the job if one is not already provided.\"\"\"\n        try:\n            if (\n                \"image\"\n                not in self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                    \"containers\"\n                ][0]\n            ):\n                self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                    \"containers\"\n                ][0][\"image\"] = f\"docker.io/{get_prefect_image_name()}\"\n        except KeyError:\n            raise ValueError(\"Unable to verify image due to invalid job body template.\")\n\n    def _populate_or_format_command(self):\n        \"\"\"\n        Ensures that the command is present in the job manifest. Populates the command\n        with the `prefect -m prefect.engine` if a command is not present.\n        \"\"\"\n        try:\n            command = self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                \"containers\"\n            ][0].get(\"command\")\n            if command is None:\n                self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                    \"containers\"\n                ][0][\"command\"] = shlex.split(self._base_flow_run_command())\n            elif isinstance(command, str):\n                self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                    \"containers\"\n                ][0][\"command\"] = shlex.split(command)\n        except KeyError:\n            raise ValueError(\n                \"Unable to verify command due to invalid job body template.\"\n            )\n\n    def _format_args_if_present(self):\n        try:\n            args = self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                \"containers\"\n            ][0].get(\"args\")\n            if args is not None and isinstance(args, str):\n                self.job_body[\"spec\"][\"template\"][\"spec\"][\"template\"][\"spec\"][\n                    \"containers\"\n                ][0][\"args\"] = shlex.split(args)\n        except KeyError:\n            raise ValueError(\"Unable to verify args due to invalid job body template.\")\n\n    @validator(\"job_body\")\n    def _ensure_job_includes_all_required_components(cls, value: Dict[str, Any]):\n        \"\"\"\n        Ensures that the job body includes all required components.\n        \"\"\"\n        patch = JsonPatch.from_diff(value, _get_base_job_body())\n        missing_paths = sorted([op[\"path\"] for op in patch if op[\"op\"] == \"add\"])\n        if missing_paths:\n            raise ValueError(\n                \"Job is missing required attributes at the following paths: \"\n                f\"{', '.join(missing_paths)}\"\n            )\n        return value\n\n    @validator(\"job_body\")\n    def _ensure_job_has_compatible_values(cls, value: Dict[str, Any]):\n        \"\"\"Ensure that the job body has compatible values.\"\"\"\n        patch = JsonPatch.from_diff(value, _get_base_job_body())\n        incompatible = sorted(\n            [\n                f\"{op['path']} must have value {op['value']!r}\"\n                for op in patch\n                if op[\"op\"] == \"replace\"\n            ]\n        )\n        if incompatible:\n            raise ValueError(\n                \"Job has incompatible values for the following attributes: \"\n                f\"{', '.join(incompatible)}\"\n            )\n        return value\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerJobConfiguration-attributes","title":"Attributes","text":""},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerJobConfiguration.job_name","title":"job_name: str property","text":"

property for accessing the name from the job metadata.

"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerJobConfiguration.project","title":"project: str property","text":"

property for accessing the project from the credentials.

"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerJobConfiguration-functions","title":"Functions","text":""},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerJobConfiguration.prepare_for_flow_run","title":"prepare_for_flow_run","text":"

Prepares the job configuration for a flow run.

Ensures that necessary values are present in the job body and that the job body is valid.

Parameters:

Name Type Description Default flow_run FlowRun

The flow run to prepare the job configuration for

required deployment Optional[DeploymentResponse]

The deployment associated with the flow run used for preparation.

None flow Optional[Flow]

The flow associated with the flow run used for preparation.

None Source code in prefect_gcp/workers/cloud_run.py
def prepare_for_flow_run(\n    self,\n    flow_run: \"FlowRun\",\n    deployment: Optional[\"DeploymentResponse\"] = None,\n    flow: Optional[\"Flow\"] = None,\n):\n    \"\"\"\n    Prepares the job configuration for a flow run.\n\n    Ensures that necessary values are present in the job body and that the\n    job body is valid.\n\n    Args:\n        flow_run: The flow run to prepare the job configuration for\n        deployment: The deployment associated with the flow run used for\n            preparation.\n        flow: The flow associated with the flow run used for preparation.\n    \"\"\"\n    super().prepare_for_flow_run(flow_run, deployment, flow)\n\n    self._populate_envs()\n    self._populate_or_format_command()\n    self._format_args_if_present()\n    self._populate_image_if_not_present()\n    self._populate_name_if_not_present()\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerResult","title":"CloudRunWorkerResult","text":"

Bases: BaseWorkerResult

Contains information about the final state of a completed process

Source code in prefect_gcp/workers/cloud_run.py
class CloudRunWorkerResult(BaseWorkerResult):\n    \"\"\"Contains information about the final state of a completed process\"\"\"\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run.CloudRunWorkerVariables","title":"CloudRunWorkerVariables","text":"

Bases: BaseVariables

Default variables for the Cloud Run worker.

The schema for this class is used to populate the variables section of the default base job template.

Source code in prefect_gcp/workers/cloud_run.py
class CloudRunWorkerVariables(BaseVariables):\n    \"\"\"\n    Default variables for the Cloud Run worker.\n\n    The schema for this class is used to populate the `variables` section of the default\n    base job template.\n    \"\"\"\n\n    region: str = Field(\n        default=\"us-central1\",\n        description=\"The region where the Cloud Run Job resides.\",\n        example=\"us-central1\",\n    )\n    credentials: Optional[GcpCredentials] = Field(\n        title=\"GCP Credentials\",\n        default_factory=GcpCredentials,\n        description=\"The GCP Credentials used to initiate the \"\n        \"Cloud Run Job. If not provided credentials will be \"\n        \"inferred from the local environment.\",\n    )\n    image: Optional[str] = Field(\n        default=None,\n        title=\"Image Name\",\n        description=(\n            \"The image to use for a new Cloud Run Job. \"\n            \"If not set, the latest Prefect image will be used. \"\n            \"See https://cloud.google.com/run/docs/deploying#images.\"\n        ),\n        example=\"docker.io/prefecthq/prefect:2-latest\",\n    )\n    cpu: Optional[str] = Field(\n        default=None,\n        title=\"CPU\",\n        description=(\n            \"The amount of compute allocated to the Cloud Run Job. \"\n            \"(1000m = 1 CPU). See \"\n            \"https://cloud.google.com/run/docs/configuring/cpu#setting-jobs.\"\n        ),\n        example=\"1000m\",\n        regex=r\"^(\\d*000)m$\",\n    )\n    memory: Optional[str] = Field(\n        default=None,\n        title=\"Memory\",\n        description=(\n            \"The amount of memory allocated to the Cloud Run Job. \"\n            \"Must be specified in units of 'G', 'Gi', 'M', or 'Mi'. \"\n            \"See https://cloud.google.com/run/docs/configuring/memory-limits#setting.\"\n        ),\n        example=\"512Mi\",\n        regex=r\"^\\d+(?:G|Gi|M|Mi)$\",\n    )\n    vpc_connector_name: Optional[str] = Field(\n        default=None,\n        title=\"VPC Connector Name\",\n        description=\"The name of the VPC connector to use for the Cloud Run Job.\",\n    )\n    service_account_name: Optional[str] = Field(\n        default=None,\n        title=\"Service Account Name\",\n        description=\"The name of the service account to use for the task execution \"\n        \"of Cloud Run Job. By default Cloud Run jobs run as the default \"\n        \"Compute Engine Service Account. \",\n        example=\"service-account@example.iam.gserviceaccount.com\",\n    )\n    keep_job: Optional[bool] = Field(\n        default=False,\n        title=\"Keep Job After Completion\",\n        description=\"Keep the completed Cloud Run Job after it has run.\",\n    )\n    timeout: Optional[int] = Field(\n        default=600,\n        gt=0,\n        le=3600,\n        title=\"Job Timeout\",\n        description=(\n            \"The length of time that Prefect will wait for Cloud Run Job state changes.\"\n        ),\n    )\n
"},{"location":"cloud_run_worker/#prefect_gcp.workers.cloud_run-functions","title":"Functions","text":""},{"location":"cloud_run_worker_v2/","title":"Cloud Run V2","text":""},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2","title":"prefect_gcp.workers.cloud_run_v2","text":""},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2-classes","title":"Classes","text":""},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerJobV2Configuration","title":"CloudRunWorkerJobV2Configuration","text":"

Bases: BaseJobConfiguration

The configuration for the Cloud Run worker V2.

The schema for this class is used to populate the job_body section of the default base job template.

Source code in prefect_gcp/workers/cloud_run_v2.py
class CloudRunWorkerJobV2Configuration(BaseJobConfiguration):\n    \"\"\"\n    The configuration for the Cloud Run worker V2.\n\n    The schema for this class is used to populate the `job_body` section of the\n    default base job template.\n    \"\"\"\n\n    credentials: GcpCredentials = Field(\n        title=\"GCP Credentials\",\n        default_factory=GcpCredentials,\n        description=(\n            \"The GCP Credentials used to connect to Cloud Run. \"\n            \"If not provided credentials will be inferred from \"\n            \"the local environment.\"\n        ),\n    )\n    job_body: Dict[str, Any] = Field(\n        template=_get_default_job_body_template(),\n    )\n    keep_job: bool = Field(\n        default=False,\n        title=\"Keep Job After Completion\",\n        description=\"Keep the completed Cloud run job on Google Cloud Platform.\",\n    )\n    region: str = Field(\n        default=\"us-central1\",\n        description=\"The region in which to run the Cloud Run job\",\n    )\n    timeout: int = Field(\n        default=600,\n        gt=0,\n        le=86400,\n        description=(\n            \"The length of time that Prefect will wait for a Cloud Run Job to \"\n            \"complete before raising an exception.\"\n        ),\n    )\n    _job_name: str = PrivateAttr(default=None)\n\n    @property\n    def project(self) -> str:\n        \"\"\"\n        Returns the GCP project associated with the credentials.\n\n        Returns:\n            str: The GCP project associated with the credentials.\n        \"\"\"\n        return self.credentials.project\n\n    @property\n    def job_name(self) -> str:\n        \"\"\"\n        Returns the name of the job.\n\n        Returns:\n            str: The name of the job.\n        \"\"\"\n        if self._job_name is None:\n            base_job_name = slugify_name(self.name)\n            job_name = f\"{base_job_name}-{uuid4().hex}\"\n            self._job_name = job_name\n\n        return self._job_name\n\n    def prepare_for_flow_run(\n        self,\n        flow_run: \"FlowRun\",\n        deployment: Optional[\"DeploymentResponse\"] = None,\n        flow: Optional[\"Flow\"] = None,\n    ):\n        \"\"\"\n        Prepares the job configuration for a flow run.\n\n        Ensures that necessary values are present in the job body and that the\n        job body is valid.\n\n        Args:\n            flow_run: The flow run to prepare the job configuration for\n            deployment: The deployment associated with the flow run used for\n                preparation.\n            flow: The flow associated with the flow run used for preparation.\n        \"\"\"\n        super().prepare_for_flow_run(\n            flow_run=flow_run,\n            deployment=deployment,\n            flow=flow,\n        )\n\n        self._populate_env()\n        self._populate_or_format_command()\n        self._format_args_if_present()\n        self._populate_image_if_not_present()\n        self._populate_timeout()\n        self._populate_vpc_if_present()\n\n    def _populate_timeout(self):\n        \"\"\"\n        Populates the job body with the timeout.\n        \"\"\"\n        self.job_body[\"template\"][\"template\"][\"timeout\"] = f\"{self.timeout}s\"\n\n    def _populate_env(self):\n        \"\"\"\n        Populates the job body with environment variables.\n        \"\"\"\n        envs = [{\"name\": k, \"value\": v} for k, v in self.env.items()]\n\n        self.job_body[\"template\"][\"template\"][\"containers\"][0][\"env\"] = envs\n\n    def _populate_image_if_not_present(self):\n        \"\"\"\n        Populates the job body with the image if not present.\n        \"\"\"\n        if \"image\" not in self.job_body[\"template\"][\"template\"][\"containers\"][0]:\n            self.job_body[\"template\"][\"template\"][\"containers\"][0][\n                \"image\"\n            ] = f\"docker.io/{get_prefect_image_name()}\"\n\n    def _populate_or_format_command(self):\n        \"\"\"\n        Populates the job body with the command if not present.\n        \"\"\"\n        command = self.job_body[\"template\"][\"template\"][\"containers\"][0].get(\"command\")\n\n        if command is None:\n            self.job_body[\"template\"][\"template\"][\"containers\"][0][\n                \"command\"\n            ] = shlex.split(self._base_flow_run_command())\n        elif isinstance(command, str):\n            self.job_body[\"template\"][\"template\"][\"containers\"][0][\n                \"command\"\n            ] = shlex.split(command)\n\n    def _format_args_if_present(self):\n        \"\"\"\n        Formats the job body args if present.\n        \"\"\"\n        args = self.job_body[\"template\"][\"template\"][\"containers\"][0].get(\"args\")\n\n        if args is not None and isinstance(args, str):\n            self.job_body[\"template\"][\"template\"][\"containers\"][0][\n                \"args\"\n            ] = shlex.split(args)\n\n    def _populate_vpc_if_present(self):\n        \"\"\"\n        Populates the job body with the VPC connector if present.\n        \"\"\"\n        if self.job_body[\"template\"][\"template\"].get(\"vpcAccess\") is not None:\n            self.job_body[\"template\"][\"template\"][\"vpcAccess\"] = {\n                \"connector\": self.job_body[\"template\"][\"template\"][\"vpcAccess\"],\n            }\n\n    # noinspection PyMethodParameters\n    @validator(\"job_body\")\n    def _ensure_job_includes_all_required_components(cls, value: Dict[str, Any]):\n        \"\"\"\n        Ensures that the job body includes all required components.\n\n        Args:\n            value: The job body to validate.\n        Returns:\n            The validated job body.\n        \"\"\"\n        patch = JsonPatch.from_diff(value, _get_base_job_body())\n\n        missing_paths = sorted([op[\"path\"] for op in patch if op[\"op\"] == \"add\"])\n\n        if missing_paths:\n            raise ValueError(\n                f\"Job body is missing required components: {', '.join(missing_paths)}\"\n            )\n\n        return value\n\n    # noinspection PyMethodParameters\n    @validator(\"job_body\")\n    def _ensure_job_has_compatible_values(cls, value: Dict[str, Any]):\n        \"\"\"Ensure that the job body has compatible values.\"\"\"\n        patch = JsonPatch.from_diff(value, _get_base_job_body())\n        incompatible = sorted(\n            [\n                f\"{op['path']} must have value {op['value']!r}\"\n                for op in patch\n                if op[\"op\"] == \"replace\"\n            ]\n        )\n        if incompatible:\n            raise ValueError(\n                \"Job has incompatible values for the following attributes: \"\n                f\"{', '.join(incompatible)}\"\n            )\n        return value\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerJobV2Configuration-attributes","title":"Attributes","text":""},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerJobV2Configuration.job_name","title":"job_name: str property","text":"

Returns the name of the job.

Returns:

Name Type Description str str

The name of the job.

"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerJobV2Configuration.project","title":"project: str property","text":"

Returns the GCP project associated with the credentials.

Returns:

Name Type Description str str

The GCP project associated with the credentials.

"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerJobV2Configuration-functions","title":"Functions","text":""},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerJobV2Configuration.prepare_for_flow_run","title":"prepare_for_flow_run","text":"

Prepares the job configuration for a flow run.

Ensures that necessary values are present in the job body and that the job body is valid.

Parameters:

Name Type Description Default flow_run FlowRun

The flow run to prepare the job configuration for

required deployment Optional[DeploymentResponse]

The deployment associated with the flow run used for preparation.

None flow Optional[Flow]

The flow associated with the flow run used for preparation.

None Source code in prefect_gcp/workers/cloud_run_v2.py
def prepare_for_flow_run(\n    self,\n    flow_run: \"FlowRun\",\n    deployment: Optional[\"DeploymentResponse\"] = None,\n    flow: Optional[\"Flow\"] = None,\n):\n    \"\"\"\n    Prepares the job configuration for a flow run.\n\n    Ensures that necessary values are present in the job body and that the\n    job body is valid.\n\n    Args:\n        flow_run: The flow run to prepare the job configuration for\n        deployment: The deployment associated with the flow run used for\n            preparation.\n        flow: The flow associated with the flow run used for preparation.\n    \"\"\"\n    super().prepare_for_flow_run(\n        flow_run=flow_run,\n        deployment=deployment,\n        flow=flow,\n    )\n\n    self._populate_env()\n    self._populate_or_format_command()\n    self._format_args_if_present()\n    self._populate_image_if_not_present()\n    self._populate_timeout()\n    self._populate_vpc_if_present()\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerV2","title":"CloudRunWorkerV2","text":"

Bases: BaseWorker

The Cloud Run worker V2.

Source code in prefect_gcp/workers/cloud_run_v2.py
class CloudRunWorkerV2(BaseWorker):\n    \"\"\"\n    The Cloud Run worker V2.\n    \"\"\"\n\n    type = \"cloud-run-v2\"\n    job_configuration = CloudRunWorkerJobV2Configuration\n    job_configuration_variables = CloudRunWorkerV2Variables\n    _description = \"Execute flow runs within containers on Google Cloud Run (V2 API). Requires a Google Cloud Platform account.\"  # noqa\n    _display_name = \"Google Cloud Run V2\"\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/worker_v2/\"\n    _logo_url = \"https://images.ctfassets.net/gm98wzqotmnx/4SpnOBvMYkHp6z939MDKP6/549a91bc1ce9afd4fb12c68db7b68106/social-icon-google-cloud-1200-630.png?h=250\"  # noqa\n\n    async def run(\n        self,\n        flow_run: \"FlowRun\",\n        configuration: CloudRunWorkerJobV2Configuration,\n        task_status: Optional[TaskStatus] = None,\n    ) -> CloudRunJobV2Result:\n        \"\"\"\n        Runs the flow run on Cloud Run and waits for it to complete.\n\n        Args:\n            flow_run: The flow run to run.\n            configuration: The configuration for the job.\n            task_status: The task status to update.\n\n        Returns:\n            The result of the job.\n        \"\"\"\n        logger = self.get_flow_run_logger(flow_run)\n\n        with self._get_client(configuration=configuration) as cr_client:\n            await run_sync_in_worker_thread(\n                self._create_job_and_wait_for_registration,\n                configuration=configuration,\n                cr_client=cr_client,\n                logger=logger,\n            )\n\n            execution = await run_sync_in_worker_thread(\n                self._begin_job_execution,\n                configuration=configuration,\n                cr_client=cr_client,\n                logger=logger,\n            )\n\n            if task_status:\n                task_status.started(configuration.job_name)\n\n            result = await run_sync_in_worker_thread(\n                self._watch_job_execution_and_get_result,\n                configuration=configuration,\n                cr_client=cr_client,\n                execution=execution,\n                logger=logger,\n            )\n\n            return result\n\n    async def kill_infrastructure(\n        self,\n        infrastructure_pid: str,\n        configuration: CloudRunWorkerJobV2Configuration,\n        grace_seconds: int = 30,\n    ):\n        \"\"\"\n        Stops the Cloud Run job.\n\n        Args:\n            infrastructure_pid: The ID of the infrastructure to stop.\n            configuration: The configuration for the job.\n            grace_seconds: The number of seconds to wait before stopping the job.\n        \"\"\"\n        if grace_seconds != 30:\n            self._logger.warning(\n                f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n                \"support dynamic grace period configuration. See here for more info: \"\n                \"https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs/delete\"  # noqa\n            )\n\n        with self._get_client(configuration=configuration) as cr_client:\n            await run_sync_in_worker_thread(\n                self._stop_job,\n                cr_client=cr_client,\n                configuration=configuration,\n                job_name=infrastructure_pid,\n            )\n\n    @staticmethod\n    def _get_client(\n        configuration: CloudRunWorkerJobV2Configuration,\n    ) -> ResourceWarning:\n        \"\"\"\n        Get the base client needed for interacting with GCP Cloud Run V2 API.\n\n        Returns:\n            Resource: The base client needed for interacting with GCP Cloud Run V2 API.\n        \"\"\"\n        api_endpoint = \"https://run.googleapis.com\"\n        gcp_creds = configuration.credentials.get_credentials_from_service_account()\n\n        options = ClientOptions(api_endpoint=api_endpoint)\n\n        return (\n            discovery.build(\n                \"run\",\n                \"v2\",\n                client_options=options,\n                credentials=gcp_creds,\n                num_retries=3,  # Set to 3 in case of intermittent/connection issues\n            )\n            .projects()\n            .locations()\n        )\n\n    def _create_job_and_wait_for_registration(\n        self,\n        configuration: CloudRunWorkerJobV2Configuration,\n        cr_client: Resource,\n        logger: PrefectLogAdapter,\n    ):\n        \"\"\"\n        Creates the Cloud Run job and waits for it to register.\n\n        Args:\n            configuration: The configuration for the job.\n            cr_client: The Cloud Run client.\n            logger: The logger to use.\n        \"\"\"\n        try:\n            logger.info(f\"Creating Cloud Run JobV2 {configuration.job_name}\")\n\n            JobV2.create(\n                cr_client=cr_client,\n                project=configuration.project,\n                location=configuration.region,\n                job_id=configuration.job_name,\n                body=configuration.job_body,\n            )\n        except HttpError as exc:\n            self._create_job_error(\n                exc=exc,\n                configuration=configuration,\n            )\n\n        try:\n            self._wait_for_job_creation(\n                cr_client=cr_client,\n                configuration=configuration,\n                logger=logger,\n            )\n        except Exception as exc:\n            logger.critical(\n                f\"Failed to create Cloud Run JobV2 {configuration.job_name}.\\n{exc}\"\n            )\n\n            if not configuration.keep_job:\n                try:\n                    JobV2.delete(\n                        cr_client=cr_client,\n                        project=configuration.project,\n                        location=configuration.region,\n                        job_name=configuration.job_name,\n                    )\n                except Exception as exc2:\n                    logger.critical(\n                        f\"Failed to delete Cloud Run JobV2 {configuration.job_name}.\"\n                        f\"\\n{exc2}\"\n                    )\n\n            raise\n\n    @staticmethod\n    def _wait_for_job_creation(\n        cr_client: Resource,\n        configuration: CloudRunWorkerJobV2Configuration,\n        logger: PrefectLogAdapter,\n        poll_interval: int = 5,\n    ):\n        \"\"\"\n        Waits for the Cloud Run job to be created.\n\n        Args:\n            cr_client: The Cloud Run client.\n            configuration: The configuration for the job.\n            logger: The logger to use.\n            poll_interval: The interval to poll the Cloud Run job, defaults to 5\n                seconds.\n        \"\"\"\n        job = JobV2.get(\n            cr_client=cr_client,\n            project=configuration.project,\n            location=configuration.region,\n            job_name=configuration.job_name,\n        )\n\n        t0 = time.time()\n\n        while not job.is_ready():\n            if not (ready_condition := job.get_ready_condition()):\n                ready_condition = \"waiting for condition update\"\n\n            logger.info(f\"Current Job Condition: {ready_condition}\")\n\n            job = JobV2.get(\n                cr_client=cr_client,\n                project=configuration.project,\n                location=configuration.region,\n                job_name=configuration.job_name,\n            )\n\n            elapsed_time = time.time() - t0\n\n            if elapsed_time > configuration.timeout:\n                raise RuntimeError(\n                    f\"Timeout of {configuration.timeout} seconds reached while \"\n                    f\"waiting for Cloud Run Job V2 {configuration.job_name} to be \"\n                    \"created.\"\n                )\n\n            time.sleep(poll_interval)\n\n    @staticmethod\n    def _create_job_error(\n        exc: HttpError,\n        configuration: CloudRunWorkerJobV2Configuration,\n    ):\n        \"\"\"\n        Creates a formatted error message for the Cloud Run V2 API errors\n        \"\"\"\n        # noinspection PyUnresolvedReferences\n        if exc.status_code == 404:\n            raise RuntimeError(\n                f\"Failed to find resources at {exc.uri}. Confirm that region\"\n                f\" '{configuration.region}' is the correct region for your Cloud\"\n                f\" Run Job and that {configuration.project} is the correct GCP \"\n                f\" project. If your project ID is not correct, you are using a \"\n                f\"Credentials block with permissions for the wrong project.\"\n            ) from exc\n\n        raise exc\n\n    def _begin_job_execution(\n        self,\n        cr_client: Resource,\n        configuration: CloudRunWorkerJobV2Configuration,\n        logger: PrefectLogAdapter,\n    ) -> ExecutionV2:\n        \"\"\"\n        Begins the Cloud Run job execution.\n\n        Args:\n            cr_client: The Cloud Run client.\n            configuration: The configuration for the job.\n            logger: The logger to use.\n\n        Returns:\n            The Cloud Run job execution.\n        \"\"\"\n        try:\n            logger.info(\n                f\"Submitting Cloud Run Job V2 {configuration.job_name} for execution...\"\n            )\n\n            submission = JobV2.run(\n                cr_client=cr_client,\n                project=configuration.project,\n                location=configuration.region,\n                job_name=configuration.job_name,\n            )\n\n            job_execution = ExecutionV2.get(\n                cr_client=cr_client,\n                execution_id=submission[\"metadata\"][\"name\"],\n            )\n\n            command = (\n                \" \".join(configuration.command)\n                if configuration.command\n                else \"default container command\"\n            )\n\n            logger.info(\n                f\"Cloud Run Job V2 {configuration.job_name} submitted for execution \"\n                f\"with command: {command}\"\n            )\n\n            return job_execution\n        except Exception as exc:\n            self._job_run_submission_error(\n                exc=exc,\n                configuration=configuration,\n            )\n            raise\n\n    def _watch_job_execution_and_get_result(\n        self,\n        cr_client: Resource,\n        configuration: CloudRunWorkerJobV2Configuration,\n        execution: ExecutionV2,\n        logger: PrefectLogAdapter,\n        poll_interval: int = 5,\n    ) -> CloudRunJobV2Result:\n        \"\"\"\n        Watch the job execution and get the result.\n\n        Args:\n            cr_client (Resource): The base client needed for interacting with GCP\n                Cloud Run V2 API.\n            configuration (CloudRunWorkerJobV2Configuration): The configuration for\n                the job.\n            execution (ExecutionV2): The execution to watch.\n            logger (PrefectLogAdapter): The logger to use.\n            poll_interval (int): The number of seconds to wait between polls.\n                Defaults to 5 seconds.\n\n        Returns:\n            The result of the job.\n        \"\"\"\n        try:\n            execution = self._watch_job_execution(\n                cr_client=cr_client,\n                configuration=configuration,\n                execution=execution,\n                poll_interval=poll_interval,\n            )\n        except Exception as exc:\n            logger.critical(\n                f\"Encountered an exception while waiting for job run completion - \"\n                f\"{exc}\"\n            )\n            raise\n\n        if execution.succeeded():\n            status_code = 0\n            logger.info(f\"Cloud Run Job V2 {configuration.job_name} succeeded\")\n        else:\n            status_code = 1\n            error_mg = execution.condition_after_completion().get(\"message\")\n            logger.error(\n                f\"Cloud Run Job V2 {configuration.job_name} failed - {error_mg}\"\n            )\n\n        logger.info(f\"Job run logs can be found on GCP at: {execution.logUri}\")\n\n        if not configuration.keep_job:\n            logger.info(\n                f\"Deleting completed Cloud Run Job {configuration.job_name!r} from \"\n                \"Google Cloud Run...\"\n            )\n\n            try:\n                JobV2.delete(\n                    cr_client=cr_client,\n                    project=configuration.project,\n                    location=configuration.region,\n                    job_name=configuration.job_name,\n                )\n            except Exception as exc:\n                logger.critical(\n                    \"Received an exception while deleting the Cloud Run Job V2 \"\n                    f\"- {configuration.job_name} - {exc}\"\n                )\n\n        return CloudRunJobV2Result(\n            identifier=configuration.job_name,\n            status_code=status_code,\n        )\n\n    # noinspection DuplicatedCode\n    @staticmethod\n    def _watch_job_execution(\n        cr_client: Resource,\n        configuration: CloudRunWorkerJobV2Configuration,\n        execution: ExecutionV2,\n        poll_interval: int,\n    ) -> ExecutionV2:\n        \"\"\"\n        Update execution status until it is no longer running or timeout is reached.\n\n        Args:\n            cr_client (Resource): The base client needed for interacting with GCP\n                Cloud Run V2 API.\n            configuration (CloudRunWorkerJobV2Configuration): The configuration for\n                the job.\n            execution (ExecutionV2): The execution to watch.\n            poll_interval (int): The number of seconds to wait between polls.\n\n        Returns:\n            The execution.\n        \"\"\"\n        t0 = time.time()\n\n        while execution.is_running():\n            execution = ExecutionV2.get(\n                cr_client=cr_client,\n                execution_id=execution.name,\n            )\n\n            elapsed_time = time.time() - t0\n\n            if elapsed_time > configuration.timeout:\n                raise RuntimeError(\n                    f\"Timeout of {configuration.timeout} seconds reached while \"\n                    f\"waiting for Cloud Run Job V2 {configuration.job_name} to \"\n                    \"complete.\"\n                )\n\n            time.sleep(poll_interval)\n\n        return execution\n\n    @staticmethod\n    def _job_run_submission_error(\n        exc: Exception,\n        configuration: CloudRunWorkerJobV2Configuration,\n    ):\n        \"\"\"\n        Creates a formatted error message for the Cloud Run V2 API errors\n\n        Args:\n            exc: The exception to format.\n            configuration: The configuration for the job.\n        \"\"\"\n        # noinspection PyUnresolvedReferences\n        if exc.status_code == 404:\n            pat1 = r\"The requested URL [^ ]+ was not found on this server\"\n\n            if re.findall(pat1, str(exc)):\n                # noinspection PyUnresolvedReferences\n                raise RuntimeError(\n                    f\"Failed to find resources at {exc.uri}. \"\n                    f\"Confirm that region '{configuration.region}' is \"\n                    f\"the correct region for your Cloud Run Job \"\n                    f\"and that '{configuration.project}' is the \"\n                    f\"correct GCP project. If your project ID is not \"\n                    f\"correct, you are using a Credentials \"\n                    f\"block with permissions for the wrong project.\"\n                ) from exc\n            else:\n                raise exc\n\n    @staticmethod\n    def _stop_job(\n        cr_client: Resource,\n        configuration: CloudRunWorkerJobV2Configuration,\n        job_name: str,\n    ):\n        \"\"\"\n        Stops/deletes the Cloud Run job.\n\n        Args:\n            cr_client: The Cloud Run client.\n            configuration: The configuration for the job.\n            job_name: The name of the job to stop.\n        \"\"\"\n        try:\n            JobV2.delete(\n                cr_client=cr_client,\n                project=configuration.project,\n                location=configuration.region,\n                job_name=job_name,\n            )\n        except Exception as exc:\n            if \"does not exist\" in str(exc):\n                raise InfrastructureNotFound(\n                    f\"Cannot stop Cloud Run Job; the job name {job_name!r} \"\n                    \"could not be found.\"\n                ) from exc\n            raise\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerV2-functions","title":"Functions","text":""},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerV2.kill_infrastructure","title":"kill_infrastructure async","text":"

Stops the Cloud Run job.

Parameters:

Name Type Description Default infrastructure_pid str

The ID of the infrastructure to stop.

required configuration CloudRunWorkerJobV2Configuration

The configuration for the job.

required grace_seconds int

The number of seconds to wait before stopping the job.

30 Source code in prefect_gcp/workers/cloud_run_v2.py
async def kill_infrastructure(\n    self,\n    infrastructure_pid: str,\n    configuration: CloudRunWorkerJobV2Configuration,\n    grace_seconds: int = 30,\n):\n    \"\"\"\n    Stops the Cloud Run job.\n\n    Args:\n        infrastructure_pid: The ID of the infrastructure to stop.\n        configuration: The configuration for the job.\n        grace_seconds: The number of seconds to wait before stopping the job.\n    \"\"\"\n    if grace_seconds != 30:\n        self._logger.warning(\n            f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n            \"support dynamic grace period configuration. See here for more info: \"\n            \"https://cloud.google.com/run/docs/reference/rest/v1/namespaces.jobs/delete\"  # noqa\n        )\n\n    with self._get_client(configuration=configuration) as cr_client:\n        await run_sync_in_worker_thread(\n            self._stop_job,\n            cr_client=cr_client,\n            configuration=configuration,\n            job_name=infrastructure_pid,\n        )\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerV2.run","title":"run async","text":"

Runs the flow run on Cloud Run and waits for it to complete.

Parameters:

Name Type Description Default flow_run FlowRun

The flow run to run.

required configuration CloudRunWorkerJobV2Configuration

The configuration for the job.

required task_status Optional[TaskStatus]

The task status to update.

None

Returns:

Type Description CloudRunJobV2Result

The result of the job.

Source code in prefect_gcp/workers/cloud_run_v2.py
async def run(\n    self,\n    flow_run: \"FlowRun\",\n    configuration: CloudRunWorkerJobV2Configuration,\n    task_status: Optional[TaskStatus] = None,\n) -> CloudRunJobV2Result:\n    \"\"\"\n    Runs the flow run on Cloud Run and waits for it to complete.\n\n    Args:\n        flow_run: The flow run to run.\n        configuration: The configuration for the job.\n        task_status: The task status to update.\n\n    Returns:\n        The result of the job.\n    \"\"\"\n    logger = self.get_flow_run_logger(flow_run)\n\n    with self._get_client(configuration=configuration) as cr_client:\n        await run_sync_in_worker_thread(\n            self._create_job_and_wait_for_registration,\n            configuration=configuration,\n            cr_client=cr_client,\n            logger=logger,\n        )\n\n        execution = await run_sync_in_worker_thread(\n            self._begin_job_execution,\n            configuration=configuration,\n            cr_client=cr_client,\n            logger=logger,\n        )\n\n        if task_status:\n            task_status.started(configuration.job_name)\n\n        result = await run_sync_in_worker_thread(\n            self._watch_job_execution_and_get_result,\n            configuration=configuration,\n            cr_client=cr_client,\n            execution=execution,\n            logger=logger,\n        )\n\n        return result\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerV2Result","title":"CloudRunWorkerV2Result","text":"

Bases: BaseWorkerResult

The result of a Cloud Run worker V2 job.

Source code in prefect_gcp/workers/cloud_run_v2.py
class CloudRunWorkerV2Result(BaseWorkerResult):\n    \"\"\"\n    The result of a Cloud Run worker V2 job.\n    \"\"\"\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2.CloudRunWorkerV2Variables","title":"CloudRunWorkerV2Variables","text":"

Bases: BaseVariables

Default variables for the Cloud Run worker V2.

The schema for this class is used to populate the variables section of the default base job template.

Source code in prefect_gcp/workers/cloud_run_v2.py
class CloudRunWorkerV2Variables(BaseVariables):\n    \"\"\"\n    Default variables for the Cloud Run worker V2.\n\n    The schema for this class is used to populate the `variables` section of the\n    default base job template.\n    \"\"\"\n\n    credentials: GcpCredentials = Field(\n        title=\"GCP Credentials\",\n        default_factory=GcpCredentials,\n        description=(\n            \"The GCP Credentials used to connect to Cloud Run. \"\n            \"If not provided credentials will be inferred from \"\n            \"the local environment.\"\n        ),\n    )\n    region: str = Field(\n        default=\"us-central1\",\n        description=\"The region in which to run the Cloud Run job\",\n    )\n    image: Optional[str] = Field(\n        default=\"prefecthq/prefect:2-latest\",\n        title=\"Image Name\",\n        description=(\n            \"The image to use for the Cloud Run job. \"\n            \"If not provided the default Prefect image will be used.\"\n        ),\n    )\n    args: List[str] = Field(\n        default_factory=list,\n        description=(\n            \"The arguments to pass to the Cloud Run Job V2's entrypoint command.\"\n        ),\n    )\n    keep_job: bool = Field(\n        default=False,\n        title=\"Keep Job After Completion\",\n        description=\"Keep the completed Cloud run job on Google Cloud Platform.\",\n    )\n    launch_stage: Literal[\n        \"ALPHA\",\n        \"BETA\",\n        \"GA\",\n        \"DEPRECATED\",\n        \"EARLY_ACCESS\",\n        \"PRELAUNCH\",\n        \"UNIMPLEMENTED\",\n        \"LAUNCH_TAG_UNSPECIFIED\",\n    ] = Field(\n        \"BETA\",\n        description=(\n            \"The launch stage of the Cloud Run Job V2. \"\n            \"See https://cloud.google.com/run/docs/about-features-categories \"\n            \"for additional details.\"\n        ),\n    )\n    max_retries: int = Field(\n        default=0,\n        title=\"Max Retries\",\n        description=\"The number of times to retry the Cloud Run job.\",\n    )\n    cpu: str = Field(\n        default=\"1000m\",\n        title=\"CPU\",\n        description=\"The CPU to allocate to the Cloud Run job.\",\n    )\n    memory: str = Field(\n        default=\"512Mi\",\n        title=\"Memory\",\n        description=(\n            \"The memory to allocate to the Cloud Run job along with the units, which\"\n            \"could be: G, Gi, M, Mi.\"\n        ),\n        example=\"512Mi\",\n        pattern=r\"^\\d+(?:G|Gi|M|Mi)$\",\n    )\n    timeout: int = Field(\n        default=600,\n        gt=0,\n        le=86400,\n        title=\"Job Timeout\",\n        description=(\n            \"The length of time that Prefect will wait for a Cloud Run Job to \"\n            \"complete before raising an exception (maximum of 86400 seconds, 1 day).\"\n        ),\n    )\n    vpc_connector_name: Optional[str] = Field(\n        default=None,\n        title=\"VPC Connector Name\",\n        description=\"The name of the VPC connector to use for the Cloud Run job.\",\n    )\n    service_account_name: Optional[str] = Field(\n        default=None,\n        title=\"Service Account Name\",\n        description=(\n            \"The name of the service account to use for the task execution \"\n            \"of Cloud Run Job. By default Cloud Run jobs run as the default \"\n            \"Compute Engine Service Account.\"\n        ),\n        example=\"service-account@example.iam.gserviceaccount.com\",\n    )\n
"},{"location":"cloud_run_worker_v2/#prefect_gcp.workers.cloud_run_v2-functions","title":"Functions","text":""},{"location":"cloud_storage/","title":"Cloud Storage","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage","title":"prefect_gcp.cloud_storage","text":"

Tasks for interacting with GCP Cloud Storage.

"},{"location":"cloud_storage/#prefect_gcp.cloud_storage-classes","title":"Classes","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat","title":"DataFrameSerializationFormat","text":"

Bases: Enum

An enumeration class to represent different file formats, compression options for upload_from_dataframe

Attributes:

Name Type Description CSV

Representation for 'csv' file format with no compression and its related content type and suffix.

CSV_GZIP

Representation for 'csv' file format with 'gzip' compression and its related content type and suffix.

PARQUET

Representation for 'parquet' file format with no compression and its related content type and suffix.

PARQUET_SNAPPY

Representation for 'parquet' file format with 'snappy' compression and its related content type and suffix.

PARQUET_GZIP

Representation for 'parquet' file format with 'gzip' compression and its related content type and suffix.

Source code in prefect_gcp/cloud_storage.py
class DataFrameSerializationFormat(Enum):\n    \"\"\"\n    An enumeration class to represent different file formats,\n    compression options for upload_from_dataframe\n\n    Attributes:\n        CSV: Representation for 'csv' file format with no compression\n            and its related content type and suffix.\n\n        CSV_GZIP: Representation for 'csv' file format with 'gzip' compression\n            and its related content type and suffix.\n\n        PARQUET: Representation for 'parquet' file format with no compression\n            and its related content type and suffix.\n\n        PARQUET_SNAPPY: Representation for 'parquet' file format\n            with 'snappy' compression and its related content type and suffix.\n\n        PARQUET_GZIP: Representation for 'parquet' file format\n            with 'gzip' compression and its related content type and suffix.\n    \"\"\"\n\n    CSV = (\"csv\", None, \"text/csv\", \".csv\")\n    CSV_GZIP = (\"csv\", \"gzip\", \"application/x-gzip\", \".csv.gz\")\n    PARQUET = (\"parquet\", None, \"application/octet-stream\", \".parquet\")\n    PARQUET_SNAPPY = (\n        \"parquet\",\n        \"snappy\",\n        \"application/octet-stream\",\n        \".snappy.parquet\",\n    )\n    PARQUET_GZIP = (\"parquet\", \"gzip\", \"application/octet-stream\", \".gz.parquet\")\n\n    @property\n    def format(self) -> str:\n        \"\"\"The file format of the current instance.\"\"\"\n        return self.value[0]\n\n    @property\n    def compression(self) -> Union[str, None]:\n        \"\"\"The compression type of the current instance.\"\"\"\n        return self.value[1]\n\n    @property\n    def content_type(self) -> str:\n        \"\"\"The content type of the current instance.\"\"\"\n        return self.value[2]\n\n    @property\n    def suffix(self) -> str:\n        \"\"\"The suffix of the file format of the current instance.\"\"\"\n        return self.value[3]\n\n    def fix_extension_with(self, gcs_blob_path: str) -> str:\n        \"\"\"Fix the extension of a GCS blob.\n\n        Args:\n            gcs_blob_path: The path to the GCS blob to be modified.\n\n        Returns:\n            The modified path to the GCS blob with the new extension.\n        \"\"\"\n        gcs_blob_path = PurePosixPath(gcs_blob_path)\n        folder = gcs_blob_path.parent\n        filename = PurePosixPath(gcs_blob_path.stem).with_suffix(self.suffix)\n        return str(folder.joinpath(filename))\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat-attributes","title":"Attributes","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat.compression","title":"compression: Union[str, None] property","text":"

The compression type of the current instance.

"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat.content_type","title":"content_type: str property","text":"

The content type of the current instance.

"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat.format","title":"format: str property","text":"

The file format of the current instance.

"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat.suffix","title":"suffix: str property","text":"

The suffix of the file format of the current instance.

"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat-functions","title":"Functions","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage.DataFrameSerializationFormat.fix_extension_with","title":"fix_extension_with","text":"

Fix the extension of a GCS blob.

Parameters:

Name Type Description Default gcs_blob_path str

The path to the GCS blob to be modified.

required

Returns:

Type Description str

The modified path to the GCS blob with the new extension.

Source code in prefect_gcp/cloud_storage.py
def fix_extension_with(self, gcs_blob_path: str) -> str:\n    \"\"\"Fix the extension of a GCS blob.\n\n    Args:\n        gcs_blob_path: The path to the GCS blob to be modified.\n\n    Returns:\n        The modified path to the GCS blob with the new extension.\n    \"\"\"\n    gcs_blob_path = PurePosixPath(gcs_blob_path)\n    folder = gcs_blob_path.parent\n    filename = PurePosixPath(gcs_blob_path.stem).with_suffix(self.suffix)\n    return str(folder.joinpath(filename))\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket","title":"GcsBucket","text":"

Bases: WritableDeploymentStorage, WritableFileSystem, ObjectStorageBlock

Block used to store data using GCP Cloud Storage Buckets.

Note! GcsBucket in prefect-gcp is a unique block, separate from GCS in core Prefect. GcsBucket does not use gcsfs under the hood, instead using the google-cloud-storage package, and offers more configuration and functionality.

Attributes:

Name Type Description bucket str

Name of the bucket.

gcp_credentials GcpCredentials

The credentials to authenticate with GCP.

bucket_folder str

A default path to a folder within the GCS bucket to use for reading and writing objects.

Example

Load stored GCP Cloud Storage Bucket:

from prefect_gcp.cloud_storage import GcsBucket\ngcp_cloud_storage_bucket_block = GcsBucket.load(\"BLOCK_NAME\")\n

Source code in prefect_gcp/cloud_storage.py
class GcsBucket(WritableDeploymentStorage, WritableFileSystem, ObjectStorageBlock):\n    \"\"\"\n    Block used to store data using GCP Cloud Storage Buckets.\n\n    Note! `GcsBucket` in `prefect-gcp` is a unique block, separate from `GCS`\n    in core Prefect. `GcsBucket` does not use `gcsfs` under the hood,\n    instead using the `google-cloud-storage` package, and offers more configuration\n    and functionality.\n\n    Attributes:\n        bucket: Name of the bucket.\n        gcp_credentials: The credentials to authenticate with GCP.\n        bucket_folder: A default path to a folder within the GCS bucket to use\n            for reading and writing objects.\n\n    Example:\n        Load stored GCP Cloud Storage Bucket:\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n        gcp_cloud_storage_bucket_block = GcsBucket.load(\"BLOCK_NAME\")\n        ```\n    \"\"\"\n\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n    _block_type_name = \"GCS Bucket\"\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/cloud_storage/#prefect_gcp.cloud_storage.GcsBucket\"  # noqa: E501\n\n    bucket: str = Field(..., description=\"Name of the bucket.\")\n    gcp_credentials: GcpCredentials = Field(\n        default_factory=GcpCredentials,\n        description=\"The credentials to authenticate with GCP.\",\n    )\n    bucket_folder: str = Field(\n        default=\"\",\n        description=(\n            \"A default path to a folder within the GCS bucket to use \"\n            \"for reading and writing objects.\"\n        ),\n    )\n\n    @property\n    def basepath(self) -> str:\n        \"\"\"\n        Read-only property that mirrors the bucket folder.\n\n        Used for deployment.\n        \"\"\"\n        return self.bucket_folder\n\n    @validator(\"bucket_folder\", pre=True, always=True)\n    def _bucket_folder_suffix(cls, value):\n        \"\"\"\n        Ensures that the bucket folder is suffixed with a forward slash.\n        \"\"\"\n        if value != \"\" and not value.endswith(\"/\"):\n            value = f\"{value}/\"\n        return value\n\n    def _resolve_path(self, path: str) -> str:\n        \"\"\"\n        A helper function used in write_path to join `self.bucket_folder` and `path`.\n\n        Args:\n            path: Name of the key, e.g. \"file1\". Each object in your\n                bucket has a unique key (or key name).\n\n        Returns:\n            The joined path.\n        \"\"\"\n        # If bucket_folder provided, it means we won't write to the root dir of\n        # the bucket. So we need to add it on the front of the path.\n        path = (\n            str(PurePosixPath(self.bucket_folder, path)) if self.bucket_folder else path\n        )\n        if path in [\"\", \".\", \"/\"]:\n            # client.bucket.list_blobs(prefix=None) is the proper way\n            # of specifying the root folder of the bucket\n            path = None\n        return path\n\n    @sync_compatible\n    async def get_directory(\n        self, from_path: Optional[str] = None, local_path: Optional[str] = None\n    ) -> List[Union[str, Path]]:\n        \"\"\"\n        Copies a folder from the configured GCS bucket to a local directory.\n        Defaults to copying the entire contents of the block's bucket_folder\n        to the current working directory.\n\n        Args:\n            from_path: Path in GCS bucket to download from. Defaults to the block's\n                configured bucket_folder.\n            local_path: Local path to download GCS bucket contents to.\n                Defaults to the current working directory.\n\n        Returns:\n            A list of downloaded file paths.\n        \"\"\"\n        from_path = (\n            self.bucket_folder if from_path is None else self._resolve_path(from_path)\n        )\n\n        if local_path is None:\n            local_path = os.path.abspath(\".\")\n        else:\n            local_path = os.path.abspath(os.path.expanduser(local_path))\n\n        project = self.gcp_credentials.project\n        client = self.gcp_credentials.get_cloud_storage_client(project=project)\n\n        blobs = await run_sync_in_worker_thread(\n            client.list_blobs, self.bucket, prefix=from_path\n        )\n\n        file_paths = []\n        for blob in blobs:\n            blob_path = blob.name\n            if blob_path[-1] == \"/\":\n                # object is a folder and will be created if it contains any objects\n                continue\n            local_file_path = os.path.join(local_path, blob_path)\n            os.makedirs(os.path.dirname(local_file_path), exist_ok=True)\n\n            with disable_run_logger():\n                file_path = await cloud_storage_download_blob_to_file.fn(\n                    bucket=self.bucket,\n                    blob=blob_path,\n                    path=local_file_path,\n                    gcp_credentials=self.gcp_credentials,\n                )\n                file_paths.append(file_path)\n        return file_paths\n\n    @sync_compatible\n    async def put_directory(\n        self,\n        local_path: Optional[str] = None,\n        to_path: Optional[str] = None,\n        ignore_file: Optional[str] = None,\n    ) -> int:\n        \"\"\"\n        Uploads a directory from a given local path to the configured GCS bucket in a\n        given folder.\n\n        Defaults to uploading the entire contents the current working directory to the\n        block's bucket_folder.\n\n        Args:\n            local_path: Path to local directory to upload from.\n            to_path: Path in GCS bucket to upload to. Defaults to block's configured\n                bucket_folder.\n            ignore_file: Path to file containing gitignore style expressions for\n                filepaths to ignore.\n\n        Returns:\n            The number of files uploaded.\n        \"\"\"\n        if local_path is None:\n            local_path = os.path.abspath(\".\")\n        else:\n            local_path = os.path.expanduser(local_path)\n\n        to_path = self.bucket_folder if to_path is None else self._resolve_path(to_path)\n\n        included_files = None\n        if ignore_file:\n            with open(ignore_file, \"r\") as f:\n                ignore_patterns = f.readlines()\n            included_files = filter_files(local_path, ignore_patterns)\n\n        uploaded_file_count = 0\n        for local_file_path in Path(local_path).rglob(\"*\"):\n            if (\n                included_files is not None\n                and local_file_path.name not in included_files\n            ):\n                continue\n            elif not local_file_path.is_dir():\n                remote_file_path = str(\n                    PurePosixPath(to_path, local_file_path.relative_to(local_path))\n                )\n                local_file_content = local_file_path.read_bytes()\n                await self.write_path(remote_file_path, content=local_file_content)\n                uploaded_file_count += 1\n\n        return uploaded_file_count\n\n    @sync_compatible\n    async def read_path(self, path: str) -> bytes:\n        \"\"\"\n        Read specified path from GCS and return contents. Provide the entire\n        path to the key in GCS.\n\n        Args:\n            path: Entire path to (and including) the key.\n\n        Returns:\n            A bytes or string representation of the blob object.\n        \"\"\"\n        path = self._resolve_path(path)\n        with disable_run_logger():\n            contents = await cloud_storage_download_blob_as_bytes.fn(\n                bucket=self.bucket, blob=path, gcp_credentials=self.gcp_credentials\n            )\n        return contents\n\n    @sync_compatible\n    async def write_path(self, path: str, content: bytes) -> str:\n        \"\"\"\n        Writes to an GCS bucket.\n\n        Args:\n            path: The key name. Each object in your bucket has a unique\n                key (or key name).\n            content: What you are uploading to GCS Bucket.\n\n        Returns:\n            The path that the contents were written to.\n        \"\"\"\n        path = self._resolve_path(path)\n        with disable_run_logger():\n            await cloud_storage_upload_blob_from_string.fn(\n                data=content,\n                bucket=self.bucket,\n                blob=path,\n                gcp_credentials=self.gcp_credentials,\n            )\n        return path\n\n    # NEW BLOCK INTERFACE METHODS BELOW\n    def _join_bucket_folder(self, bucket_path: str = \"\") -> str:\n        \"\"\"\n        Joins the base bucket folder to the bucket path.\n\n        NOTE: If a method reuses another method in this class, be careful to not\n        call this  twice because it'll join the bucket folder twice.\n        See https://github.com/PrefectHQ/prefect-aws/issues/141 for a past issue.\n        \"\"\"\n        bucket_path = str(bucket_path)\n        if self.bucket_folder != \"\" and bucket_path.startswith(self.bucket_folder):\n            self.logger.info(\n                f\"Bucket path {bucket_path!r} is already prefixed with \"\n                f\"bucket folder {self.bucket_folder!r}; is this intentional?\"\n            )\n\n        bucket_path = str(PurePosixPath(self.bucket_folder) / bucket_path)\n        if bucket_path in [\"\", \".\", \"/\"]:\n            # client.bucket.list_blobs(prefix=None) is the proper way\n            # of specifying the root folder of the bucket\n            bucket_path = None\n        return bucket_path\n\n    @sync_compatible\n    async def create_bucket(\n        self, location: Optional[str] = None, **create_kwargs\n    ) -> \"Bucket\":\n        \"\"\"\n        Creates a bucket.\n\n        Args:\n            location: The location of the bucket.\n            **create_kwargs: Additional keyword arguments to pass to the\n                `create_bucket` method.\n\n        Returns:\n            The bucket object.\n\n        Examples:\n            Create a bucket.\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket(bucket=\"my-bucket\")\n            gcs_bucket.create_bucket()\n            ```\n        \"\"\"\n        self.logger.info(f\"Creating bucket {self.bucket!r}.\")\n        client = self.gcp_credentials.get_cloud_storage_client()\n        bucket = await run_sync_in_worker_thread(\n            client.create_bucket, self.bucket, location=location, **create_kwargs\n        )\n        return bucket\n\n    @sync_compatible\n    async def get_bucket(self) -> \"Bucket\":\n        \"\"\"\n        Returns the bucket object.\n\n        Returns:\n            The bucket object.\n\n        Examples:\n            Get the bucket object.\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.get_bucket()\n            ```\n        \"\"\"\n        self.logger.info(f\"Getting bucket {self.bucket!r}.\")\n        client = self.gcp_credentials.get_cloud_storage_client()\n        bucket = await run_sync_in_worker_thread(client.get_bucket, self.bucket)\n        return bucket\n\n    @sync_compatible\n    async def list_blobs(self, folder: str = \"\") -> List[\"Blob\"]:\n        \"\"\"\n        Lists all blobs in the bucket that are in a folder.\n        Folders are not included in the output.\n\n        Args:\n            folder: The folder to list blobs from.\n\n        Returns:\n            A list of Blob objects.\n\n        Examples:\n            Get all blobs from a folder named \"prefect\".\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.list_blobs(\"prefect\")\n            ```\n        \"\"\"\n        client = self.gcp_credentials.get_cloud_storage_client()\n\n        bucket_path = self._join_bucket_folder(folder)\n        if bucket_path is None:\n            self.logger.info(f\"Listing blobs in bucket {self.bucket!r}.\")\n        else:\n            self.logger.info(\n                f\"Listing blobs in folder {bucket_path!r} in bucket {self.bucket!r}.\"\n            )\n        blobs = await run_sync_in_worker_thread(\n            client.list_blobs, self.bucket, prefix=bucket_path\n        )\n\n        # Ignore folders\n        return [blob for blob in blobs if not blob.name.endswith(\"/\")]\n\n    @sync_compatible\n    async def list_folders(self, folder: str = \"\") -> List[str]:\n        \"\"\"\n        Lists all folders and subfolders in the bucket.\n\n        Args:\n            folder: List all folders and subfolders inside given folder.\n\n        Returns:\n            A list of folders.\n\n        Examples:\n            Get all folders from a bucket named \"my-bucket\".\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.list_folders()\n            ```\n\n            Get all folders from a folder called years\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.list_folders(\"years\")\n            ```\n        \"\"\"\n\n        # Beware of calling _join_bucket_folder twice, see note in method.\n        # However, we just want to use it to check if we are listing the root folder\n        bucket_path = self._join_bucket_folder(folder)\n        if bucket_path is None:\n            self.logger.info(f\"Listing folders in bucket {self.bucket!r}.\")\n        else:\n            self.logger.info(\n                f\"Listing folders in {bucket_path!r} in bucket {self.bucket!r}.\"\n            )\n\n        blobs = await self.list_blobs(folder)\n        # gets all folders with full path\n        folders = {str(PurePosixPath(blob.name).parent) for blob in blobs}\n\n        return [folder for folder in folders if folder != \".\"]\n\n    @sync_compatible\n    async def download_object_to_path(\n        self,\n        from_path: str,\n        to_path: Optional[Union[str, Path]] = None,\n        **download_kwargs: Dict[str, Any],\n    ) -> Path:\n        \"\"\"\n        Downloads an object from the object storage service to a path.\n\n        Args:\n            from_path: The path to the blob to download; this gets prefixed\n                with the bucket_folder.\n            to_path: The path to download the blob to. If not provided, the\n                blob's name will be used.\n            **download_kwargs: Additional keyword arguments to pass to\n                `Blob.download_to_filename`.\n\n        Returns:\n            The absolute path that the object was downloaded to.\n\n        Examples:\n            Download my_folder/notes.txt object to notes.txt.\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.download_object_to_path(\"my_folder/notes.txt\", \"notes.txt\")\n            ```\n        \"\"\"\n        if to_path is None:\n            to_path = Path(from_path).name\n\n        # making path absolute, but converting back to str here\n        # since !r looks nicer that way and filename arg expects str\n        to_path = str(Path(to_path).absolute())\n\n        bucket = await self.get_bucket()\n        bucket_path = self._join_bucket_folder(from_path)\n        blob = bucket.blob(bucket_path)\n        self.logger.info(\n            f\"Downloading blob from bucket {self.bucket!r} path {bucket_path!r}\"\n            f\"to {to_path!r}.\"\n        )\n\n        await run_sync_in_worker_thread(\n            blob.download_to_filename, filename=to_path, **download_kwargs\n        )\n        return Path(to_path)\n\n    @sync_compatible\n    async def download_object_to_file_object(\n        self,\n        from_path: str,\n        to_file_object: BinaryIO,\n        **download_kwargs: Dict[str, Any],\n    ) -> BinaryIO:\n        \"\"\"\n        Downloads an object from the object storage service to a file-like object,\n        which can be a BytesIO object or a BufferedWriter.\n\n        Args:\n            from_path: The path to the blob to download from; this gets prefixed\n                with the bucket_folder.\n            to_file_object: The file-like object to download the blob to.\n            **download_kwargs: Additional keyword arguments to pass to\n                `Blob.download_to_file`.\n\n        Returns:\n            The file-like object that the object was downloaded to.\n\n        Examples:\n            Download my_folder/notes.txt object to a BytesIO object.\n            ```python\n            from io import BytesIO\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            with BytesIO() as buf:\n                gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", buf)\n            ```\n\n            Download my_folder/notes.txt object to a BufferedWriter.\n            ```python\n                from prefect_gcp.cloud_storage import GcsBucket\n\n                gcs_bucket = GcsBucket.load(\"my-bucket\")\n                with open(\"notes.txt\", \"wb\") as f:\n                    gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", f)\n            ```\n        \"\"\"\n        bucket = await self.get_bucket()\n\n        bucket_path = self._join_bucket_folder(from_path)\n        blob = bucket.blob(bucket_path)\n        self.logger.info(\n            f\"Downloading blob from bucket {self.bucket!r} path {bucket_path!r}\"\n            f\"to file object.\"\n        )\n\n        await run_sync_in_worker_thread(\n            blob.download_to_file, file_obj=to_file_object, **download_kwargs\n        )\n        return to_file_object\n\n    @sync_compatible\n    async def download_folder_to_path(\n        self,\n        from_folder: str,\n        to_folder: Optional[Union[str, Path]] = None,\n        **download_kwargs: Dict[str, Any],\n    ) -> Path:\n        \"\"\"\n        Downloads objects *within* a folder (excluding the folder itself)\n        from the object storage service to a folder.\n\n        Args:\n            from_folder: The path to the folder to download from; this gets prefixed\n                with the bucket_folder.\n            to_folder: The path to download the folder to. If not provided, will default\n                to the current directory.\n            **download_kwargs: Additional keyword arguments to pass to\n                `Blob.download_to_filename`.\n\n        Returns:\n            The absolute path that the folder was downloaded to.\n\n        Examples:\n            Download my_folder to a local folder named my_folder.\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.download_folder_to_path(\"my_folder\", \"my_folder\")\n            ```\n        \"\"\"\n        if to_folder is None:\n            to_folder = \"\"\n        to_folder = Path(to_folder).absolute()\n\n        blobs = await self.list_blobs(folder=from_folder)\n        if len(blobs) == 0:\n            self.logger.warning(\n                f\"No blobs were downloaded from \"\n                f\"bucket {self.bucket!r} path {from_folder!r}.\"\n            )\n            return to_folder\n\n        # do not call self._join_bucket_folder for list_blobs\n        # because it's built-in to that method already!\n        # however, we still need to do it because we're using relative_to\n        bucket_folder = self._join_bucket_folder(from_folder)\n\n        async_coros = []\n        for blob in blobs:\n            bucket_path = PurePosixPath(blob.name).relative_to(bucket_folder)\n            if str(bucket_path).endswith(\"/\"):\n                continue\n            to_path = to_folder / bucket_path\n            to_path.parent.mkdir(parents=True, exist_ok=True)\n            self.logger.info(\n                f\"Downloading blob from bucket {self.bucket!r} path \"\n                f\"{str(bucket_path)!r} to {to_path}.\"\n            )\n            async_coros.append(\n                run_sync_in_worker_thread(\n                    blob.download_to_filename, filename=str(to_path), **download_kwargs\n                )\n            )\n        await asyncio.gather(*async_coros)\n\n        return to_folder\n\n    @sync_compatible\n    async def upload_from_path(\n        self,\n        from_path: Union[str, Path],\n        to_path: Optional[str] = None,\n        **upload_kwargs: Dict[str, Any],\n    ) -> str:\n        \"\"\"\n        Uploads an object from a path to the object storage service.\n\n        Args:\n            from_path: The path to the file to upload from.\n            to_path: The path to upload the file to. If not provided, will use\n                the file name of from_path; this gets prefixed\n                with the bucket_folder.\n            **upload_kwargs: Additional keyword arguments to pass to\n                `Blob.upload_from_filename`.\n\n        Returns:\n            The path that the object was uploaded to.\n\n        Examples:\n            Upload notes.txt to my_folder/notes.txt.\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.upload_from_path(\"notes.txt\", \"my_folder/notes.txt\")\n            ```\n        \"\"\"\n        if to_path is None:\n            to_path = Path(from_path).name\n\n        bucket_path = self._join_bucket_folder(to_path)\n        bucket = await self.get_bucket()\n        blob = bucket.blob(bucket_path)\n        self.logger.info(\n            f\"Uploading from {from_path!r} to the bucket \"\n            f\"{self.bucket!r} path {bucket_path!r}.\"\n        )\n\n        await run_sync_in_worker_thread(\n            blob.upload_from_filename, filename=from_path, **upload_kwargs\n        )\n        return bucket_path\n\n    @sync_compatible\n    async def upload_from_file_object(\n        self, from_file_object: BinaryIO, to_path: str, **upload_kwargs\n    ) -> str:\n        \"\"\"\n        Uploads an object to the object storage service from a file-like object,\n        which can be a BytesIO object or a BufferedReader.\n\n        Args:\n            from_file_object: The file-like object to upload from.\n            to_path: The path to upload the object to; this gets prefixed\n                with the bucket_folder.\n            **upload_kwargs: Additional keyword arguments to pass to\n                `Blob.upload_from_file`.\n\n        Returns:\n            The path that the object was uploaded to.\n\n        Examples:\n            Upload my_folder/notes.txt object to a BytesIO object.\n            ```python\n            from io import BytesIO\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            with open(\"notes.txt\", \"rb\") as f:\n                gcs_bucket.upload_from_file_object(f, \"my_folder/notes.txt\")\n            ```\n\n            Upload BufferedReader object to my_folder/notes.txt.\n            ```python\n            from io import BufferedReader\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            with open(\"notes.txt\", \"rb\") as f:\n                gcs_bucket.upload_from_file_object(\n                    BufferedReader(f), \"my_folder/notes.txt\"\n                )\n            ```\n        \"\"\"\n        bucket = await self.get_bucket()\n\n        bucket_path = self._join_bucket_folder(to_path)\n        blob = bucket.blob(bucket_path)\n        self.logger.info(\n            f\"Uploading from file object to the bucket \"\n            f\"{self.bucket!r} path {bucket_path!r}.\"\n        )\n\n        await run_sync_in_worker_thread(\n            blob.upload_from_file, from_file_object, **upload_kwargs\n        )\n        return bucket_path\n\n    @sync_compatible\n    async def upload_from_folder(\n        self,\n        from_folder: Union[str, Path],\n        to_folder: Optional[str] = None,\n        **upload_kwargs: Dict[str, Any],\n    ) -> str:\n        \"\"\"\n        Uploads files *within* a folder (excluding the folder itself)\n        to the object storage service folder.\n\n        Args:\n            from_folder: The path to the folder to upload from.\n            to_folder: The path to upload the folder to. If not provided, will default\n                to bucket_folder or the base directory of the bucket.\n            **upload_kwargs: Additional keyword arguments to pass to\n                `Blob.upload_from_filename`.\n\n        Returns:\n            The path that the folder was uploaded to.\n\n        Examples:\n            Upload local folder my_folder to the bucket's folder my_folder.\n            ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            gcs_bucket.upload_from_folder(\"my_folder\")\n            ```\n        \"\"\"\n        from_folder = Path(from_folder)\n        # join bucket folder expects string for the first input\n        # when it returns None, we need to convert it back to empty string\n        # so relative_to works\n        bucket_folder = self._join_bucket_folder(to_folder or \"\") or \"\"\n\n        num_uploaded = 0\n        bucket = await self.get_bucket()\n\n        async_coros = []\n        for from_path in from_folder.rglob(\"**/*\"):\n            if from_path.is_dir():\n                continue\n            bucket_path = str(Path(bucket_folder) / from_path.relative_to(from_folder))\n            self.logger.info(\n                f\"Uploading from {str(from_path)!r} to the bucket \"\n                f\"{self.bucket!r} path {bucket_path!r}.\"\n            )\n            blob = bucket.blob(bucket_path)\n            async_coros.append(\n                run_sync_in_worker_thread(\n                    blob.upload_from_filename, filename=from_path, **upload_kwargs\n                )\n            )\n            num_uploaded += 1\n        await asyncio.gather(*async_coros)\n        if num_uploaded == 0:\n            self.logger.warning(f\"No files were uploaded from {from_folder}.\")\n        return bucket_folder\n\n    @sync_compatible\n    async def upload_from_dataframe(\n        self,\n        df: \"DataFrame\",\n        to_path: str,\n        serialization_format: Union[\n            str, DataFrameSerializationFormat\n        ] = DataFrameSerializationFormat.CSV_GZIP,\n        **upload_kwargs: Dict[str, Any],\n    ) -> str:\n        \"\"\"Upload a Pandas DataFrame to Google Cloud Storage in various formats.\n\n        This function uploads the data in a Pandas DataFrame to Google Cloud Storage\n        in a specified format, such as .csv, .csv.gz, .parquet,\n        .parquet.snappy, and .parquet.gz.\n\n        Args:\n            df: The Pandas DataFrame to be uploaded.\n            to_path: The destination path for the uploaded DataFrame.\n            serialization_format: The format to serialize the DataFrame into.\n                When passed as a `str`, the valid options are:\n                'csv', 'csv_gzip',  'parquet', 'parquet_snappy', 'parquet_gzip'.\n                Defaults to `DataFrameSerializationFormat.CSV_GZIP`.\n            **upload_kwargs: Additional keyword arguments to pass to the underlying\n            `Blob.upload_from_dataframe` method.\n\n        Returns:\n            The path that the object was uploaded to.\n        \"\"\"\n        if isinstance(serialization_format, str):\n            serialization_format = DataFrameSerializationFormat[\n                serialization_format.upper()\n            ]\n\n        with BytesIO() as bytes_buffer:\n            if serialization_format.format == \"parquet\":\n                df.to_parquet(\n                    path=bytes_buffer,\n                    compression=serialization_format.compression,\n                    index=False,\n                )\n            elif serialization_format.format == \"csv\":\n                df.to_csv(\n                    path_or_buf=bytes_buffer,\n                    compression=serialization_format.compression,\n                    index=False,\n                )\n\n            bytes_buffer.seek(0)\n            to_path = serialization_format.fix_extension_with(gcs_blob_path=to_path)\n\n            return await self.upload_from_file_object(\n                from_file_object=bytes_buffer,\n                to_path=to_path,\n                **{\"content_type\": serialization_format.content_type, **upload_kwargs},\n            )\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket-attributes","title":"Attributes","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.basepath","title":"basepath: str property","text":"

Read-only property that mirrors the bucket folder.

Used for deployment.

"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket-functions","title":"Functions","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.create_bucket","title":"create_bucket async","text":"

Creates a bucket.

Parameters:

Name Type Description Default location Optional[str]

The location of the bucket.

None **create_kwargs

Additional keyword arguments to pass to the create_bucket method.

{}

Returns:

Type Description Bucket

The bucket object.

Examples:

Create a bucket.

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket(bucket=\"my-bucket\")\ngcs_bucket.create_bucket()\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def create_bucket(\n    self, location: Optional[str] = None, **create_kwargs\n) -> \"Bucket\":\n    \"\"\"\n    Creates a bucket.\n\n    Args:\n        location: The location of the bucket.\n        **create_kwargs: Additional keyword arguments to pass to the\n            `create_bucket` method.\n\n    Returns:\n        The bucket object.\n\n    Examples:\n        Create a bucket.\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket(bucket=\"my-bucket\")\n        gcs_bucket.create_bucket()\n        ```\n    \"\"\"\n    self.logger.info(f\"Creating bucket {self.bucket!r}.\")\n    client = self.gcp_credentials.get_cloud_storage_client()\n    bucket = await run_sync_in_worker_thread(\n        client.create_bucket, self.bucket, location=location, **create_kwargs\n    )\n    return bucket\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.download_folder_to_path","title":"download_folder_to_path async","text":"

Downloads objects within a folder (excluding the folder itself) from the object storage service to a folder.

Parameters:

Name Type Description Default from_folder str

The path to the folder to download from; this gets prefixed with the bucket_folder.

required to_folder Optional[Union[str, Path]]

The path to download the folder to. If not provided, will default to the current directory.

None **download_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.download_to_filename.

{}

Returns:

Type Description Path

The absolute path that the folder was downloaded to.

Examples:

Download my_folder to a local folder named my_folder.

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.download_folder_to_path(\"my_folder\", \"my_folder\")\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def download_folder_to_path(\n    self,\n    from_folder: str,\n    to_folder: Optional[Union[str, Path]] = None,\n    **download_kwargs: Dict[str, Any],\n) -> Path:\n    \"\"\"\n    Downloads objects *within* a folder (excluding the folder itself)\n    from the object storage service to a folder.\n\n    Args:\n        from_folder: The path to the folder to download from; this gets prefixed\n            with the bucket_folder.\n        to_folder: The path to download the folder to. If not provided, will default\n            to the current directory.\n        **download_kwargs: Additional keyword arguments to pass to\n            `Blob.download_to_filename`.\n\n    Returns:\n        The absolute path that the folder was downloaded to.\n\n    Examples:\n        Download my_folder to a local folder named my_folder.\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.download_folder_to_path(\"my_folder\", \"my_folder\")\n        ```\n    \"\"\"\n    if to_folder is None:\n        to_folder = \"\"\n    to_folder = Path(to_folder).absolute()\n\n    blobs = await self.list_blobs(folder=from_folder)\n    if len(blobs) == 0:\n        self.logger.warning(\n            f\"No blobs were downloaded from \"\n            f\"bucket {self.bucket!r} path {from_folder!r}.\"\n        )\n        return to_folder\n\n    # do not call self._join_bucket_folder for list_blobs\n    # because it's built-in to that method already!\n    # however, we still need to do it because we're using relative_to\n    bucket_folder = self._join_bucket_folder(from_folder)\n\n    async_coros = []\n    for blob in blobs:\n        bucket_path = PurePosixPath(blob.name).relative_to(bucket_folder)\n        if str(bucket_path).endswith(\"/\"):\n            continue\n        to_path = to_folder / bucket_path\n        to_path.parent.mkdir(parents=True, exist_ok=True)\n        self.logger.info(\n            f\"Downloading blob from bucket {self.bucket!r} path \"\n            f\"{str(bucket_path)!r} to {to_path}.\"\n        )\n        async_coros.append(\n            run_sync_in_worker_thread(\n                blob.download_to_filename, filename=str(to_path), **download_kwargs\n            )\n        )\n    await asyncio.gather(*async_coros)\n\n    return to_folder\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.download_object_to_file_object","title":"download_object_to_file_object async","text":"

Downloads an object from the object storage service to a file-like object, which can be a BytesIO object or a BufferedWriter.

Parameters:

Name Type Description Default from_path str

The path to the blob to download from; this gets prefixed with the bucket_folder.

required to_file_object BinaryIO

The file-like object to download the blob to.

required **download_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.download_to_file.

{}

Returns:

Type Description BinaryIO

The file-like object that the object was downloaded to.

Examples:

Download my_folder/notes.txt object to a BytesIO object.

from io import BytesIO\nfrom prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\nwith BytesIO() as buf:\n    gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", buf)\n

Download my_folder/notes.txt object to a BufferedWriter.

    from prefect_gcp.cloud_storage import GcsBucket\n\n    gcs_bucket = GcsBucket.load(\"my-bucket\")\n    with open(\"notes.txt\", \"wb\") as f:\n        gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", f)\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def download_object_to_file_object(\n    self,\n    from_path: str,\n    to_file_object: BinaryIO,\n    **download_kwargs: Dict[str, Any],\n) -> BinaryIO:\n    \"\"\"\n    Downloads an object from the object storage service to a file-like object,\n    which can be a BytesIO object or a BufferedWriter.\n\n    Args:\n        from_path: The path to the blob to download from; this gets prefixed\n            with the bucket_folder.\n        to_file_object: The file-like object to download the blob to.\n        **download_kwargs: Additional keyword arguments to pass to\n            `Blob.download_to_file`.\n\n    Returns:\n        The file-like object that the object was downloaded to.\n\n    Examples:\n        Download my_folder/notes.txt object to a BytesIO object.\n        ```python\n        from io import BytesIO\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        with BytesIO() as buf:\n            gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", buf)\n        ```\n\n        Download my_folder/notes.txt object to a BufferedWriter.\n        ```python\n            from prefect_gcp.cloud_storage import GcsBucket\n\n            gcs_bucket = GcsBucket.load(\"my-bucket\")\n            with open(\"notes.txt\", \"wb\") as f:\n                gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", f)\n        ```\n    \"\"\"\n    bucket = await self.get_bucket()\n\n    bucket_path = self._join_bucket_folder(from_path)\n    blob = bucket.blob(bucket_path)\n    self.logger.info(\n        f\"Downloading blob from bucket {self.bucket!r} path {bucket_path!r}\"\n        f\"to file object.\"\n    )\n\n    await run_sync_in_worker_thread(\n        blob.download_to_file, file_obj=to_file_object, **download_kwargs\n    )\n    return to_file_object\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.download_object_to_path","title":"download_object_to_path async","text":"

Downloads an object from the object storage service to a path.

Parameters:

Name Type Description Default from_path str

The path to the blob to download; this gets prefixed with the bucket_folder.

required to_path Optional[Union[str, Path]]

The path to download the blob to. If not provided, the blob's name will be used.

None **download_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.download_to_filename.

{}

Returns:

Type Description Path

The absolute path that the object was downloaded to.

Examples:

Download my_folder/notes.txt object to notes.txt.

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.download_object_to_path(\"my_folder/notes.txt\", \"notes.txt\")\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def download_object_to_path(\n    self,\n    from_path: str,\n    to_path: Optional[Union[str, Path]] = None,\n    **download_kwargs: Dict[str, Any],\n) -> Path:\n    \"\"\"\n    Downloads an object from the object storage service to a path.\n\n    Args:\n        from_path: The path to the blob to download; this gets prefixed\n            with the bucket_folder.\n        to_path: The path to download the blob to. If not provided, the\n            blob's name will be used.\n        **download_kwargs: Additional keyword arguments to pass to\n            `Blob.download_to_filename`.\n\n    Returns:\n        The absolute path that the object was downloaded to.\n\n    Examples:\n        Download my_folder/notes.txt object to notes.txt.\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.download_object_to_path(\"my_folder/notes.txt\", \"notes.txt\")\n        ```\n    \"\"\"\n    if to_path is None:\n        to_path = Path(from_path).name\n\n    # making path absolute, but converting back to str here\n    # since !r looks nicer that way and filename arg expects str\n    to_path = str(Path(to_path).absolute())\n\n    bucket = await self.get_bucket()\n    bucket_path = self._join_bucket_folder(from_path)\n    blob = bucket.blob(bucket_path)\n    self.logger.info(\n        f\"Downloading blob from bucket {self.bucket!r} path {bucket_path!r}\"\n        f\"to {to_path!r}.\"\n    )\n\n    await run_sync_in_worker_thread(\n        blob.download_to_filename, filename=to_path, **download_kwargs\n    )\n    return Path(to_path)\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.get_bucket","title":"get_bucket async","text":"

Returns the bucket object.

Returns:

Type Description Bucket

The bucket object.

Examples:

Get the bucket object.

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.get_bucket()\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def get_bucket(self) -> \"Bucket\":\n    \"\"\"\n    Returns the bucket object.\n\n    Returns:\n        The bucket object.\n\n    Examples:\n        Get the bucket object.\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.get_bucket()\n        ```\n    \"\"\"\n    self.logger.info(f\"Getting bucket {self.bucket!r}.\")\n    client = self.gcp_credentials.get_cloud_storage_client()\n    bucket = await run_sync_in_worker_thread(client.get_bucket, self.bucket)\n    return bucket\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.get_directory","title":"get_directory async","text":"

Copies a folder from the configured GCS bucket to a local directory. Defaults to copying the entire contents of the block's bucket_folder to the current working directory.

Parameters:

Name Type Description Default from_path Optional[str]

Path in GCS bucket to download from. Defaults to the block's configured bucket_folder.

None local_path Optional[str]

Local path to download GCS bucket contents to. Defaults to the current working directory.

None

Returns:

Type Description List[Union[str, Path]]

A list of downloaded file paths.

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def get_directory(\n    self, from_path: Optional[str] = None, local_path: Optional[str] = None\n) -> List[Union[str, Path]]:\n    \"\"\"\n    Copies a folder from the configured GCS bucket to a local directory.\n    Defaults to copying the entire contents of the block's bucket_folder\n    to the current working directory.\n\n    Args:\n        from_path: Path in GCS bucket to download from. Defaults to the block's\n            configured bucket_folder.\n        local_path: Local path to download GCS bucket contents to.\n            Defaults to the current working directory.\n\n    Returns:\n        A list of downloaded file paths.\n    \"\"\"\n    from_path = (\n        self.bucket_folder if from_path is None else self._resolve_path(from_path)\n    )\n\n    if local_path is None:\n        local_path = os.path.abspath(\".\")\n    else:\n        local_path = os.path.abspath(os.path.expanduser(local_path))\n\n    project = self.gcp_credentials.project\n    client = self.gcp_credentials.get_cloud_storage_client(project=project)\n\n    blobs = await run_sync_in_worker_thread(\n        client.list_blobs, self.bucket, prefix=from_path\n    )\n\n    file_paths = []\n    for blob in blobs:\n        blob_path = blob.name\n        if blob_path[-1] == \"/\":\n            # object is a folder and will be created if it contains any objects\n            continue\n        local_file_path = os.path.join(local_path, blob_path)\n        os.makedirs(os.path.dirname(local_file_path), exist_ok=True)\n\n        with disable_run_logger():\n            file_path = await cloud_storage_download_blob_to_file.fn(\n                bucket=self.bucket,\n                blob=blob_path,\n                path=local_file_path,\n                gcp_credentials=self.gcp_credentials,\n            )\n            file_paths.append(file_path)\n    return file_paths\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.list_blobs","title":"list_blobs async","text":"

Lists all blobs in the bucket that are in a folder. Folders are not included in the output.

Parameters:

Name Type Description Default folder str

The folder to list blobs from.

''

Returns:

Type Description List[Blob]

A list of Blob objects.

Examples:

Get all blobs from a folder named \"prefect\".

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.list_blobs(\"prefect\")\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def list_blobs(self, folder: str = \"\") -> List[\"Blob\"]:\n    \"\"\"\n    Lists all blobs in the bucket that are in a folder.\n    Folders are not included in the output.\n\n    Args:\n        folder: The folder to list blobs from.\n\n    Returns:\n        A list of Blob objects.\n\n    Examples:\n        Get all blobs from a folder named \"prefect\".\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.list_blobs(\"prefect\")\n        ```\n    \"\"\"\n    client = self.gcp_credentials.get_cloud_storage_client()\n\n    bucket_path = self._join_bucket_folder(folder)\n    if bucket_path is None:\n        self.logger.info(f\"Listing blobs in bucket {self.bucket!r}.\")\n    else:\n        self.logger.info(\n            f\"Listing blobs in folder {bucket_path!r} in bucket {self.bucket!r}.\"\n        )\n    blobs = await run_sync_in_worker_thread(\n        client.list_blobs, self.bucket, prefix=bucket_path\n    )\n\n    # Ignore folders\n    return [blob for blob in blobs if not blob.name.endswith(\"/\")]\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.list_folders","title":"list_folders async","text":"

Lists all folders and subfolders in the bucket.

Parameters:

Name Type Description Default folder str

List all folders and subfolders inside given folder.

''

Returns:

Type Description List[str]

A list of folders.

Examples:

Get all folders from a bucket named \"my-bucket\".

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.list_folders()\n

Get all folders from a folder called years

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.list_folders(\"years\")\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def list_folders(self, folder: str = \"\") -> List[str]:\n    \"\"\"\n    Lists all folders and subfolders in the bucket.\n\n    Args:\n        folder: List all folders and subfolders inside given folder.\n\n    Returns:\n        A list of folders.\n\n    Examples:\n        Get all folders from a bucket named \"my-bucket\".\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.list_folders()\n        ```\n\n        Get all folders from a folder called years\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.list_folders(\"years\")\n        ```\n    \"\"\"\n\n    # Beware of calling _join_bucket_folder twice, see note in method.\n    # However, we just want to use it to check if we are listing the root folder\n    bucket_path = self._join_bucket_folder(folder)\n    if bucket_path is None:\n        self.logger.info(f\"Listing folders in bucket {self.bucket!r}.\")\n    else:\n        self.logger.info(\n            f\"Listing folders in {bucket_path!r} in bucket {self.bucket!r}.\"\n        )\n\n    blobs = await self.list_blobs(folder)\n    # gets all folders with full path\n    folders = {str(PurePosixPath(blob.name).parent) for blob in blobs}\n\n    return [folder for folder in folders if folder != \".\"]\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.put_directory","title":"put_directory async","text":"

Uploads a directory from a given local path to the configured GCS bucket in a given folder.

Defaults to uploading the entire contents the current working directory to the block's bucket_folder.

Parameters:

Name Type Description Default local_path Optional[str]

Path to local directory to upload from.

None to_path Optional[str]

Path in GCS bucket to upload to. Defaults to block's configured bucket_folder.

None ignore_file Optional[str]

Path to file containing gitignore style expressions for filepaths to ignore.

None

Returns:

Type Description int

The number of files uploaded.

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def put_directory(\n    self,\n    local_path: Optional[str] = None,\n    to_path: Optional[str] = None,\n    ignore_file: Optional[str] = None,\n) -> int:\n    \"\"\"\n    Uploads a directory from a given local path to the configured GCS bucket in a\n    given folder.\n\n    Defaults to uploading the entire contents the current working directory to the\n    block's bucket_folder.\n\n    Args:\n        local_path: Path to local directory to upload from.\n        to_path: Path in GCS bucket to upload to. Defaults to block's configured\n            bucket_folder.\n        ignore_file: Path to file containing gitignore style expressions for\n            filepaths to ignore.\n\n    Returns:\n        The number of files uploaded.\n    \"\"\"\n    if local_path is None:\n        local_path = os.path.abspath(\".\")\n    else:\n        local_path = os.path.expanduser(local_path)\n\n    to_path = self.bucket_folder if to_path is None else self._resolve_path(to_path)\n\n    included_files = None\n    if ignore_file:\n        with open(ignore_file, \"r\") as f:\n            ignore_patterns = f.readlines()\n        included_files = filter_files(local_path, ignore_patterns)\n\n    uploaded_file_count = 0\n    for local_file_path in Path(local_path).rglob(\"*\"):\n        if (\n            included_files is not None\n            and local_file_path.name not in included_files\n        ):\n            continue\n        elif not local_file_path.is_dir():\n            remote_file_path = str(\n                PurePosixPath(to_path, local_file_path.relative_to(local_path))\n            )\n            local_file_content = local_file_path.read_bytes()\n            await self.write_path(remote_file_path, content=local_file_content)\n            uploaded_file_count += 1\n\n    return uploaded_file_count\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.read_path","title":"read_path async","text":"

Read specified path from GCS and return contents. Provide the entire path to the key in GCS.

Parameters:

Name Type Description Default path str

Entire path to (and including) the key.

required

Returns:

Type Description bytes

A bytes or string representation of the blob object.

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def read_path(self, path: str) -> bytes:\n    \"\"\"\n    Read specified path from GCS and return contents. Provide the entire\n    path to the key in GCS.\n\n    Args:\n        path: Entire path to (and including) the key.\n\n    Returns:\n        A bytes or string representation of the blob object.\n    \"\"\"\n    path = self._resolve_path(path)\n    with disable_run_logger():\n        contents = await cloud_storage_download_blob_as_bytes.fn(\n            bucket=self.bucket, blob=path, gcp_credentials=self.gcp_credentials\n        )\n    return contents\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.upload_from_dataframe","title":"upload_from_dataframe async","text":"

Upload a Pandas DataFrame to Google Cloud Storage in various formats.

This function uploads the data in a Pandas DataFrame to Google Cloud Storage in a specified format, such as .csv, .csv.gz, .parquet, .parquet.snappy, and .parquet.gz.

Parameters:

Name Type Description Default df DataFrame

The Pandas DataFrame to be uploaded.

required to_path str

The destination path for the uploaded DataFrame.

required serialization_format Union[str, DataFrameSerializationFormat]

The format to serialize the DataFrame into. When passed as a str, the valid options are: 'csv', 'csv_gzip', 'parquet', 'parquet_snappy', 'parquet_gzip'. Defaults to DataFrameSerializationFormat.CSV_GZIP.

CSV_GZIP **upload_kwargs Dict[str, Any]

Additional keyword arguments to pass to the underlying

{}

Returns:

Type Description str

The path that the object was uploaded to.

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def upload_from_dataframe(\n    self,\n    df: \"DataFrame\",\n    to_path: str,\n    serialization_format: Union[\n        str, DataFrameSerializationFormat\n    ] = DataFrameSerializationFormat.CSV_GZIP,\n    **upload_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"Upload a Pandas DataFrame to Google Cloud Storage in various formats.\n\n    This function uploads the data in a Pandas DataFrame to Google Cloud Storage\n    in a specified format, such as .csv, .csv.gz, .parquet,\n    .parquet.snappy, and .parquet.gz.\n\n    Args:\n        df: The Pandas DataFrame to be uploaded.\n        to_path: The destination path for the uploaded DataFrame.\n        serialization_format: The format to serialize the DataFrame into.\n            When passed as a `str`, the valid options are:\n            'csv', 'csv_gzip',  'parquet', 'parquet_snappy', 'parquet_gzip'.\n            Defaults to `DataFrameSerializationFormat.CSV_GZIP`.\n        **upload_kwargs: Additional keyword arguments to pass to the underlying\n        `Blob.upload_from_dataframe` method.\n\n    Returns:\n        The path that the object was uploaded to.\n    \"\"\"\n    if isinstance(serialization_format, str):\n        serialization_format = DataFrameSerializationFormat[\n            serialization_format.upper()\n        ]\n\n    with BytesIO() as bytes_buffer:\n        if serialization_format.format == \"parquet\":\n            df.to_parquet(\n                path=bytes_buffer,\n                compression=serialization_format.compression,\n                index=False,\n            )\n        elif serialization_format.format == \"csv\":\n            df.to_csv(\n                path_or_buf=bytes_buffer,\n                compression=serialization_format.compression,\n                index=False,\n            )\n\n        bytes_buffer.seek(0)\n        to_path = serialization_format.fix_extension_with(gcs_blob_path=to_path)\n\n        return await self.upload_from_file_object(\n            from_file_object=bytes_buffer,\n            to_path=to_path,\n            **{\"content_type\": serialization_format.content_type, **upload_kwargs},\n        )\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.upload_from_file_object","title":"upload_from_file_object async","text":"

Uploads an object to the object storage service from a file-like object, which can be a BytesIO object or a BufferedReader.

Parameters:

Name Type Description Default from_file_object BinaryIO

The file-like object to upload from.

required to_path str

The path to upload the object to; this gets prefixed with the bucket_folder.

required **upload_kwargs

Additional keyword arguments to pass to Blob.upload_from_file.

{}

Returns:

Type Description str

The path that the object was uploaded to.

Examples:

Upload my_folder/notes.txt object to a BytesIO object.

from io import BytesIO\nfrom prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\nwith open(\"notes.txt\", \"rb\") as f:\n    gcs_bucket.upload_from_file_object(f, \"my_folder/notes.txt\")\n

Upload BufferedReader object to my_folder/notes.txt.

from io import BufferedReader\nfrom prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\nwith open(\"notes.txt\", \"rb\") as f:\n    gcs_bucket.upload_from_file_object(\n        BufferedReader(f), \"my_folder/notes.txt\"\n    )\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def upload_from_file_object(\n    self, from_file_object: BinaryIO, to_path: str, **upload_kwargs\n) -> str:\n    \"\"\"\n    Uploads an object to the object storage service from a file-like object,\n    which can be a BytesIO object or a BufferedReader.\n\n    Args:\n        from_file_object: The file-like object to upload from.\n        to_path: The path to upload the object to; this gets prefixed\n            with the bucket_folder.\n        **upload_kwargs: Additional keyword arguments to pass to\n            `Blob.upload_from_file`.\n\n    Returns:\n        The path that the object was uploaded to.\n\n    Examples:\n        Upload my_folder/notes.txt object to a BytesIO object.\n        ```python\n        from io import BytesIO\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        with open(\"notes.txt\", \"rb\") as f:\n            gcs_bucket.upload_from_file_object(f, \"my_folder/notes.txt\")\n        ```\n\n        Upload BufferedReader object to my_folder/notes.txt.\n        ```python\n        from io import BufferedReader\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        with open(\"notes.txt\", \"rb\") as f:\n            gcs_bucket.upload_from_file_object(\n                BufferedReader(f), \"my_folder/notes.txt\"\n            )\n        ```\n    \"\"\"\n    bucket = await self.get_bucket()\n\n    bucket_path = self._join_bucket_folder(to_path)\n    blob = bucket.blob(bucket_path)\n    self.logger.info(\n        f\"Uploading from file object to the bucket \"\n        f\"{self.bucket!r} path {bucket_path!r}.\"\n    )\n\n    await run_sync_in_worker_thread(\n        blob.upload_from_file, from_file_object, **upload_kwargs\n    )\n    return bucket_path\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.upload_from_folder","title":"upload_from_folder async","text":"

Uploads files within a folder (excluding the folder itself) to the object storage service folder.

Parameters:

Name Type Description Default from_folder Union[str, Path]

The path to the folder to upload from.

required to_folder Optional[str]

The path to upload the folder to. If not provided, will default to bucket_folder or the base directory of the bucket.

None **upload_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.upload_from_filename.

{}

Returns:

Type Description str

The path that the folder was uploaded to.

Examples:

Upload local folder my_folder to the bucket's folder my_folder.

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.upload_from_folder(\"my_folder\")\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def upload_from_folder(\n    self,\n    from_folder: Union[str, Path],\n    to_folder: Optional[str] = None,\n    **upload_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"\n    Uploads files *within* a folder (excluding the folder itself)\n    to the object storage service folder.\n\n    Args:\n        from_folder: The path to the folder to upload from.\n        to_folder: The path to upload the folder to. If not provided, will default\n            to bucket_folder or the base directory of the bucket.\n        **upload_kwargs: Additional keyword arguments to pass to\n            `Blob.upload_from_filename`.\n\n    Returns:\n        The path that the folder was uploaded to.\n\n    Examples:\n        Upload local folder my_folder to the bucket's folder my_folder.\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.upload_from_folder(\"my_folder\")\n        ```\n    \"\"\"\n    from_folder = Path(from_folder)\n    # join bucket folder expects string for the first input\n    # when it returns None, we need to convert it back to empty string\n    # so relative_to works\n    bucket_folder = self._join_bucket_folder(to_folder or \"\") or \"\"\n\n    num_uploaded = 0\n    bucket = await self.get_bucket()\n\n    async_coros = []\n    for from_path in from_folder.rglob(\"**/*\"):\n        if from_path.is_dir():\n            continue\n        bucket_path = str(Path(bucket_folder) / from_path.relative_to(from_folder))\n        self.logger.info(\n            f\"Uploading from {str(from_path)!r} to the bucket \"\n            f\"{self.bucket!r} path {bucket_path!r}.\"\n        )\n        blob = bucket.blob(bucket_path)\n        async_coros.append(\n            run_sync_in_worker_thread(\n                blob.upload_from_filename, filename=from_path, **upload_kwargs\n            )\n        )\n        num_uploaded += 1\n    await asyncio.gather(*async_coros)\n    if num_uploaded == 0:\n        self.logger.warning(f\"No files were uploaded from {from_folder}.\")\n    return bucket_folder\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.upload_from_path","title":"upload_from_path async","text":"

Uploads an object from a path to the object storage service.

Parameters:

Name Type Description Default from_path Union[str, Path]

The path to the file to upload from.

required to_path Optional[str]

The path to upload the file to. If not provided, will use the file name of from_path; this gets prefixed with the bucket_folder.

None **upload_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.upload_from_filename.

{}

Returns:

Type Description str

The path that the object was uploaded to.

Examples:

Upload notes.txt to my_folder/notes.txt.

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.upload_from_path(\"notes.txt\", \"my_folder/notes.txt\")\n

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def upload_from_path(\n    self,\n    from_path: Union[str, Path],\n    to_path: Optional[str] = None,\n    **upload_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"\n    Uploads an object from a path to the object storage service.\n\n    Args:\n        from_path: The path to the file to upload from.\n        to_path: The path to upload the file to. If not provided, will use\n            the file name of from_path; this gets prefixed\n            with the bucket_folder.\n        **upload_kwargs: Additional keyword arguments to pass to\n            `Blob.upload_from_filename`.\n\n    Returns:\n        The path that the object was uploaded to.\n\n    Examples:\n        Upload notes.txt to my_folder/notes.txt.\n        ```python\n        from prefect_gcp.cloud_storage import GcsBucket\n\n        gcs_bucket = GcsBucket.load(\"my-bucket\")\n        gcs_bucket.upload_from_path(\"notes.txt\", \"my_folder/notes.txt\")\n        ```\n    \"\"\"\n    if to_path is None:\n        to_path = Path(from_path).name\n\n    bucket_path = self._join_bucket_folder(to_path)\n    bucket = await self.get_bucket()\n    blob = bucket.blob(bucket_path)\n    self.logger.info(\n        f\"Uploading from {from_path!r} to the bucket \"\n        f\"{self.bucket!r} path {bucket_path!r}.\"\n    )\n\n    await run_sync_in_worker_thread(\n        blob.upload_from_filename, filename=from_path, **upload_kwargs\n    )\n    return bucket_path\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.GcsBucket.write_path","title":"write_path async","text":"

Writes to an GCS bucket.

Parameters:

Name Type Description Default path str

The key name. Each object in your bucket has a unique key (or key name).

required content bytes

What you are uploading to GCS Bucket.

required

Returns:

Type Description str

The path that the contents were written to.

Source code in prefect_gcp/cloud_storage.py
@sync_compatible\nasync def write_path(self, path: str, content: bytes) -> str:\n    \"\"\"\n    Writes to an GCS bucket.\n\n    Args:\n        path: The key name. Each object in your bucket has a unique\n            key (or key name).\n        content: What you are uploading to GCS Bucket.\n\n    Returns:\n        The path that the contents were written to.\n    \"\"\"\n    path = self._resolve_path(path)\n    with disable_run_logger():\n        await cloud_storage_upload_blob_from_string.fn(\n            data=content,\n            bucket=self.bucket,\n            blob=path,\n            gcp_credentials=self.gcp_credentials,\n        )\n    return path\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage-functions","title":"Functions","text":""},{"location":"cloud_storage/#prefect_gcp.cloud_storage.cloud_storage_copy_blob","title":"cloud_storage_copy_blob async","text":"

Copies data from one Google Cloud Storage bucket to another, without downloading it locally.

Parameters:

Name Type Description Default source_bucket str

Source bucket name.

required dest_bucket str

Destination bucket name.

required source_blob str

Source blob name.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required dest_blob Optional[str]

Destination blob name; if not provided, defaults to source_blob.

None timeout Union[float, Tuple[float, float]]

The number of seconds the transport should wait for the server response. Can also be passed as a tuple (connect_timeout, read_timeout).

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None **copy_kwargs Dict[str, Any]

Additional keyword arguments to pass to Bucket.copy_blob.

{}

Returns:

Type Description str

Destination blob name.

Example

Copies blob from one bucket to another.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_copy_blob\n\n@flow()\ndef example_cloud_storage_copy_blob_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    blob = cloud_storage_copy_blob(\n        \"source_bucket\",\n        \"dest_bucket\",\n        \"source_blob\",\n        gcp_credentials\n    )\n    return blob\n\nexample_cloud_storage_copy_blob_flow()\n

Source code in prefect_gcp/cloud_storage.py
@task\nasync def cloud_storage_copy_blob(\n    source_bucket: str,\n    dest_bucket: str,\n    source_blob: str,\n    gcp_credentials: GcpCredentials,\n    dest_blob: Optional[str] = None,\n    timeout: Union[float, Tuple[float, float]] = 60,\n    project: Optional[str] = None,\n    **copy_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"\n    Copies data from one Google Cloud Storage bucket to another,\n    without downloading it locally.\n\n    Args:\n        source_bucket: Source bucket name.\n        dest_bucket: Destination bucket name.\n        source_blob: Source blob name.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        dest_blob: Destination blob name; if not provided, defaults to source_blob.\n        timeout: The number of seconds the transport should wait\n            for the server response. Can also be passed as a tuple\n            (connect_timeout, read_timeout).\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n        **copy_kwargs: Additional keyword arguments to pass to\n            `Bucket.copy_blob`.\n\n    Returns:\n        Destination blob name.\n\n    Example:\n        Copies blob from one bucket to another.\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.cloud_storage import cloud_storage_copy_blob\n\n        @flow()\n        def example_cloud_storage_copy_blob_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\")\n            blob = cloud_storage_copy_blob(\n                \"source_bucket\",\n                \"dest_bucket\",\n                \"source_blob\",\n                gcp_credentials\n            )\n            return blob\n\n        example_cloud_storage_copy_blob_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\n        \"Copying blob named %s from the %s bucket to the %s bucket\",\n        source_blob,\n        source_bucket,\n        dest_bucket,\n    )\n\n    source_bucket_obj = await _get_bucket(\n        source_bucket, gcp_credentials, project=project\n    )\n\n    dest_bucket_obj = await _get_bucket(dest_bucket, gcp_credentials, project=project)\n    if dest_blob is None:\n        dest_blob = source_blob\n\n    source_blob_obj = source_bucket_obj.blob(source_blob)\n    await run_sync_in_worker_thread(\n        source_bucket_obj.copy_blob,\n        blob=source_blob_obj,\n        destination_bucket=dest_bucket_obj,\n        new_name=dest_blob,\n        timeout=timeout,\n        **copy_kwargs,\n    )\n\n    return dest_blob\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.cloud_storage_create_bucket","title":"cloud_storage_create_bucket async","text":"

Creates a bucket.

Parameters:

Name Type Description Default bucket str

Name of the bucket.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None location Optional[str]

Location of the bucket.

None **create_kwargs Dict[str, Any]

Additional keyword arguments to pass to client.create_bucket.

{}

Returns:

Type Description str

The bucket name.

Example

Creates a bucket named \"prefect\".

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_create_bucket\n\n@flow()\ndef example_cloud_storage_create_bucket_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    bucket = cloud_storage_create_bucket(\"prefect\", gcp_credentials)\n\nexample_cloud_storage_create_bucket_flow()\n

Source code in prefect_gcp/cloud_storage.py
@task\nasync def cloud_storage_create_bucket(\n    bucket: str,\n    gcp_credentials: GcpCredentials,\n    project: Optional[str] = None,\n    location: Optional[str] = None,\n    **create_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"\n    Creates a bucket.\n\n    Args:\n        bucket: Name of the bucket.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n        location: Location of the bucket.\n        **create_kwargs: Additional keyword arguments to pass to `client.create_bucket`.\n\n    Returns:\n        The bucket name.\n\n    Example:\n        Creates a bucket named \"prefect\".\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.cloud_storage import cloud_storage_create_bucket\n\n        @flow()\n        def example_cloud_storage_create_bucket_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\")\n            bucket = cloud_storage_create_bucket(\"prefect\", gcp_credentials)\n\n        example_cloud_storage_create_bucket_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Creating %s bucket\", bucket)\n\n    client = gcp_credentials.get_cloud_storage_client(project=project)\n    await run_sync_in_worker_thread(\n        client.create_bucket, bucket, location=location, **create_kwargs\n    )\n    return bucket\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.cloud_storage_download_blob_as_bytes","title":"cloud_storage_download_blob_as_bytes async","text":"

Downloads a blob as bytes.

Parameters:

Name Type Description Default bucket str

Name of the bucket.

required blob str

Name of the Cloud Storage blob.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required chunk_size int

The size of a chunk of data whenever iterating (in bytes). This must be a multiple of 256 KB per the API specification.

None encryption_key Optional[str]

An encryption key.

None timeout Union[float, Tuple[float, float]]

The number of seconds the transport should wait for the server response. Can also be passed as a tuple (connect_timeout, read_timeout).

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None **download_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.download_as_bytes.

{}

Returns:

Type Description bytes

A bytes or string representation of the blob object.

Example

Downloads blob from bucket.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_download_blob_as_bytes\n\n@flow()\ndef example_cloud_storage_download_blob_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    contents = cloud_storage_download_blob_as_bytes(\n        \"bucket\", \"blob\", gcp_credentials)\n    return contents\n\nexample_cloud_storage_download_blob_flow()\n

Source code in prefect_gcp/cloud_storage.py
@task\nasync def cloud_storage_download_blob_as_bytes(\n    bucket: str,\n    blob: str,\n    gcp_credentials: GcpCredentials,\n    chunk_size: Optional[int] = None,\n    encryption_key: Optional[str] = None,\n    timeout: Union[float, Tuple[float, float]] = 60,\n    project: Optional[str] = None,\n    **download_kwargs: Dict[str, Any],\n) -> bytes:\n    \"\"\"\n    Downloads a blob as bytes.\n\n    Args:\n        bucket: Name of the bucket.\n        blob: Name of the Cloud Storage blob.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        chunk_size (int, optional): The size of a chunk of data whenever\n            iterating (in bytes). This must be a multiple of 256 KB\n            per the API specification.\n        encryption_key: An encryption key.\n        timeout: The number of seconds the transport should wait\n            for the server response. Can also be passed as a tuple\n            (connect_timeout, read_timeout).\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n        **download_kwargs: Additional keyword arguments to pass to\n            `Blob.download_as_bytes`.\n\n    Returns:\n        A bytes or string representation of the blob object.\n\n    Example:\n        Downloads blob from bucket.\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.cloud_storage import cloud_storage_download_blob_as_bytes\n\n        @flow()\n        def example_cloud_storage_download_blob_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\")\n            contents = cloud_storage_download_blob_as_bytes(\n                \"bucket\", \"blob\", gcp_credentials)\n            return contents\n\n        example_cloud_storage_download_blob_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Downloading blob named %s from the %s bucket\", blob, bucket)\n\n    bucket_obj = await _get_bucket(bucket, gcp_credentials, project=project)\n    blob_obj = bucket_obj.blob(\n        blob, chunk_size=chunk_size, encryption_key=encryption_key\n    )\n\n    contents = await run_sync_in_worker_thread(\n        blob_obj.download_as_bytes, timeout=timeout, **download_kwargs\n    )\n    return contents\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.cloud_storage_download_blob_to_file","title":"cloud_storage_download_blob_to_file async","text":"

Downloads a blob to a file path.

Parameters:

Name Type Description Default bucket str

Name of the bucket.

required blob str

Name of the Cloud Storage blob.

required path Union[str, Path]

Downloads the contents to the provided file path; if the path is a directory, automatically joins the blob name.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required chunk_size int

The size of a chunk of data whenever iterating (in bytes). This must be a multiple of 256 KB per the API specification.

None encryption_key Optional[str]

An encryption key.

None timeout Union[float, Tuple[float, float]]

The number of seconds the transport should wait for the server response. Can also be passed as a tuple (connect_timeout, read_timeout).

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None **download_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.download_to_filename.

{}

Returns:

Type Description Union[str, Path]

The path to the blob object.

Example

Downloads blob from bucket.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_download_blob_to_file\n\n@flow()\ndef example_cloud_storage_download_blob_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    path = cloud_storage_download_blob_to_file(\n        \"bucket\", \"blob\", \"file_path\", gcp_credentials)\n    return path\n\nexample_cloud_storage_download_blob_flow()\n

Source code in prefect_gcp/cloud_storage.py
@task\nasync def cloud_storage_download_blob_to_file(\n    bucket: str,\n    blob: str,\n    path: Union[str, Path],\n    gcp_credentials: GcpCredentials,\n    chunk_size: Optional[int] = None,\n    encryption_key: Optional[str] = None,\n    timeout: Union[float, Tuple[float, float]] = 60,\n    project: Optional[str] = None,\n    **download_kwargs: Dict[str, Any],\n) -> Union[str, Path]:\n    \"\"\"\n    Downloads a blob to a file path.\n\n    Args:\n        bucket: Name of the bucket.\n        blob: Name of the Cloud Storage blob.\n        path: Downloads the contents to the provided file path;\n            if the path is a directory, automatically joins the blob name.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        chunk_size (int, optional): The size of a chunk of data whenever\n            iterating (in bytes). This must be a multiple of 256 KB\n            per the API specification.\n        encryption_key: An encryption key.\n        timeout: The number of seconds the transport should wait\n            for the server response. Can also be passed as a tuple\n            (connect_timeout, read_timeout).\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n        **download_kwargs: Additional keyword arguments to pass to\n            `Blob.download_to_filename`.\n\n    Returns:\n        The path to the blob object.\n\n    Example:\n        Downloads blob from bucket.\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.cloud_storage import cloud_storage_download_blob_to_file\n\n        @flow()\n        def example_cloud_storage_download_blob_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\")\n            path = cloud_storage_download_blob_to_file(\n                \"bucket\", \"blob\", \"file_path\", gcp_credentials)\n            return path\n\n        example_cloud_storage_download_blob_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\n        \"Downloading blob named %s from the %s bucket to %s\", blob, bucket, path\n    )\n\n    bucket_obj = await _get_bucket(bucket, gcp_credentials, project=project)\n    blob_obj = bucket_obj.blob(\n        blob, chunk_size=chunk_size, encryption_key=encryption_key\n    )\n\n    if os.path.isdir(path):\n        if isinstance(path, Path):\n            path = path.joinpath(blob)  # keep as Path if Path is passed\n        else:\n            path = os.path.join(path, blob)  # keep as str if a str is passed\n\n    await run_sync_in_worker_thread(\n        blob_obj.download_to_filename, path, timeout=timeout, **download_kwargs\n    )\n    return path\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.cloud_storage_upload_blob_from_file","title":"cloud_storage_upload_blob_from_file async","text":"

Uploads a blob from file path or file-like object. Usage for passing in file-like object is if the data was downloaded from the web; can bypass writing to disk and directly upload to Cloud Storage.

Parameters:

Name Type Description Default file Union[str, Path, BytesIO]

Path to data or file like object to upload.

required bucket str

Name of the bucket.

required blob str

Name of the Cloud Storage blob.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required content_type Optional[str]

Type of content being uploaded.

None chunk_size Optional[int]

The size of a chunk of data whenever iterating (in bytes). This must be a multiple of 256 KB per the API specification.

None encryption_key Optional[str]

An encryption key.

None timeout Union[float, Tuple[float, float]]

The number of seconds the transport should wait for the server response. Can also be passed as a tuple (connect_timeout, read_timeout).

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None **upload_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.upload_from_file or Blob.upload_from_filename.

{}

Returns:

Type Description str

The blob name.

Example

Uploads blob to bucket.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_file\n\n@flow()\ndef example_cloud_storage_upload_blob_from_file_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    blob = cloud_storage_upload_blob_from_file(\n        \"/path/somewhere\", \"bucket\", \"blob\", gcp_credentials)\n    return blob\n\nexample_cloud_storage_upload_blob_from_file_flow()\n

Source code in prefect_gcp/cloud_storage.py
@task\nasync def cloud_storage_upload_blob_from_file(\n    file: Union[str, Path, BytesIO],\n    bucket: str,\n    blob: str,\n    gcp_credentials: GcpCredentials,\n    content_type: Optional[str] = None,\n    chunk_size: Optional[int] = None,\n    encryption_key: Optional[str] = None,\n    timeout: Union[float, Tuple[float, float]] = 60,\n    project: Optional[str] = None,\n    **upload_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"\n    Uploads a blob from file path or file-like object. Usage for passing in\n    file-like object is if the data was downloaded from the web;\n    can bypass writing to disk and directly upload to Cloud Storage.\n\n    Args:\n        file: Path to data or file like object to upload.\n        bucket: Name of the bucket.\n        blob: Name of the Cloud Storage blob.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        content_type: Type of content being uploaded.\n        chunk_size: The size of a chunk of data whenever\n            iterating (in bytes). This must be a multiple of 256 KB\n            per the API specification.\n        encryption_key: An encryption key.\n        timeout: The number of seconds the transport should wait\n            for the server response. Can also be passed as a tuple\n            (connect_timeout, read_timeout).\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n        **upload_kwargs: Additional keyword arguments to pass to\n            `Blob.upload_from_file` or `Blob.upload_from_filename`.\n\n    Returns:\n        The blob name.\n\n    Example:\n        Uploads blob to bucket.\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_file\n\n        @flow()\n        def example_cloud_storage_upload_blob_from_file_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\")\n            blob = cloud_storage_upload_blob_from_file(\n                \"/path/somewhere\", \"bucket\", \"blob\", gcp_credentials)\n            return blob\n\n        example_cloud_storage_upload_blob_from_file_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Uploading blob named %s to the %s bucket\", blob, bucket)\n\n    bucket_obj = await _get_bucket(bucket, gcp_credentials, project=project)\n    blob_obj = bucket_obj.blob(\n        blob, chunk_size=chunk_size, encryption_key=encryption_key\n    )\n\n    if isinstance(file, BytesIO):\n        await run_sync_in_worker_thread(\n            blob_obj.upload_from_file,\n            file,\n            content_type=content_type,\n            timeout=timeout,\n            **upload_kwargs,\n        )\n    else:\n        await run_sync_in_worker_thread(\n            blob_obj.upload_from_filename,\n            file,\n            content_type=content_type,\n            timeout=timeout,\n            **upload_kwargs,\n        )\n    return blob\n
"},{"location":"cloud_storage/#prefect_gcp.cloud_storage.cloud_storage_upload_blob_from_string","title":"cloud_storage_upload_blob_from_string async","text":"

Uploads a blob from a string or bytes representation of data.

Parameters:

Name Type Description Default data Union[str, bytes]

String or bytes representation of data to upload.

required bucket str

Name of the bucket.

required blob str

Name of the Cloud Storage blob.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required content_type Optional[str]

Type of content being uploaded.

None chunk_size Optional[int]

The size of a chunk of data whenever iterating (in bytes). This must be a multiple of 256 KB per the API specification.

None encryption_key Optional[str]

An encryption key.

None timeout Union[float, Tuple[float, float]]

The number of seconds the transport should wait for the server response. Can also be passed as a tuple (connect_timeout, read_timeout).

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None **upload_kwargs Dict[str, Any]

Additional keyword arguments to pass to Blob.upload_from_string.

{}

Returns:

Type Description str

The blob name.

Example

Uploads blob to bucket.

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_string\n\n@flow()\ndef example_cloud_storage_upload_blob_from_string_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    blob = cloud_storage_upload_blob_from_string(\n        \"data\", \"bucket\", \"blob\", gcp_credentials)\n    return blob\n\nexample_cloud_storage_upload_blob_from_string_flow()\n

Source code in prefect_gcp/cloud_storage.py
@task\nasync def cloud_storage_upload_blob_from_string(\n    data: Union[str, bytes],\n    bucket: str,\n    blob: str,\n    gcp_credentials: GcpCredentials,\n    content_type: Optional[str] = None,\n    chunk_size: Optional[int] = None,\n    encryption_key: Optional[str] = None,\n    timeout: Union[float, Tuple[float, float]] = 60,\n    project: Optional[str] = None,\n    **upload_kwargs: Dict[str, Any],\n) -> str:\n    \"\"\"\n    Uploads a blob from a string or bytes representation of data.\n\n    Args:\n        data: String or bytes representation of data to upload.\n        bucket: Name of the bucket.\n        blob: Name of the Cloud Storage blob.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        content_type: Type of content being uploaded.\n        chunk_size: The size of a chunk of data whenever\n            iterating (in bytes). This must be a multiple of 256 KB\n            per the API specification.\n        encryption_key: An encryption key.\n        timeout: The number of seconds the transport should wait\n            for the server response. Can also be passed as a tuple\n            (connect_timeout, read_timeout).\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n        **upload_kwargs: Additional keyword arguments to pass to\n            `Blob.upload_from_string`.\n\n    Returns:\n        The blob name.\n\n    Example:\n        Uploads blob to bucket.\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_string\n\n        @flow()\n        def example_cloud_storage_upload_blob_from_string_flow():\n            gcp_credentials = GcpCredentials(\n                service_account_file=\"/path/to/service/account/keyfile.json\")\n            blob = cloud_storage_upload_blob_from_string(\n                \"data\", \"bucket\", \"blob\", gcp_credentials)\n            return blob\n\n        example_cloud_storage_upload_blob_from_string_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Uploading blob named %s to the %s bucket\", blob, bucket)\n\n    bucket_obj = await _get_bucket(bucket, gcp_credentials, project=project)\n    blob_obj = bucket_obj.blob(\n        blob, chunk_size=chunk_size, encryption_key=encryption_key\n    )\n\n    await run_sync_in_worker_thread(\n        blob_obj.upload_from_string,\n        data,\n        content_type=content_type,\n        timeout=timeout,\n        **upload_kwargs,\n    )\n    return blob\n
"},{"location":"contributing/","title":"Contributing","text":"

If you'd like to help contribute to fix an issue or add a feature to prefect-gcp, please propose changes through a pull request from a fork of the repository.

Here are the steps:

  1. Fork the repository
  2. Clone the forked repository
  3. Install the repository and its dependencies:
    pip install -e \".[dev]\"\n
  4. Make desired changes
  5. Add tests
  6. Insert an entry to CHANGELOG.md
  7. Install pre-commit to perform quality checks prior to commit:
    pre-commit install\n
  8. git commit, git push, and create a pull request
"},{"location":"credentials/","title":"Credentials","text":""},{"location":"credentials/#prefect_gcp.credentials","title":"prefect_gcp.credentials","text":"

Module handling GCP credentials.

"},{"location":"credentials/#prefect_gcp.credentials-classes","title":"Classes","text":""},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials","title":"GcpCredentials","text":"

Bases: CredentialsBlock

Block used to manage authentication with GCP. Google authentication is handled via the google.oauth2 module or through the CLI. Specify either one of service account_file or service_account_info; if both are not specified, the client will try to detect the credentials following Google's Application Default Credentials. See Google's Authentication documentation for details on inference and recommended authentication patterns.

Attributes:

Name Type Description service_account_file Optional[Path]

Path to the service account JSON keyfile.

service_account_info Optional[SecretDict]

The contents of the keyfile as a dict.

Example

Load GCP credentials stored in a GCP Credentials Block:

from prefect_gcp import GcpCredentials\ngcp_credentials_block = GcpCredentials.load(\"BLOCK_NAME\")\n

Source code in prefect_gcp/credentials.py
class GcpCredentials(CredentialsBlock):\n    \"\"\"\n    Block used to manage authentication with GCP. Google authentication is\n    handled via the `google.oauth2` module or through the CLI.\n    Specify either one of service `account_file` or `service_account_info`; if both\n    are not specified, the client will try to detect the credentials following Google's\n    [Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials).\n    See Google's [Authentication documentation](https://cloud.google.com/docs/authentication#service-accounts)\n    for details on inference and recommended authentication patterns.\n\n    Attributes:\n        service_account_file: Path to the service account JSON keyfile.\n        service_account_info: The contents of the keyfile as a dict.\n\n    Example:\n        Load GCP credentials stored in a `GCP Credentials` Block:\n        ```python\n        from prefect_gcp import GcpCredentials\n        gcp_credentials_block = GcpCredentials.load(\"BLOCK_NAME\")\n        ```\n    \"\"\"  # noqa\n\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n    _block_type_name = \"GCP Credentials\"\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/credentials/#prefect_gcp.credentials.GcpCredentials\"  # noqa: E501\n\n    service_account_file: Optional[Path] = Field(\n        default=None, description=\"Path to the service account JSON keyfile.\"\n    )\n    service_account_info: Optional[SecretDict] = Field(\n        default=None, description=\"The contents of the keyfile as a dict.\"\n    )\n    project: Optional[str] = Field(\n        default=None, description=\"The GCP project to use for the client.\"\n    )\n\n    _service_account_email: Optional[str] = None\n\n    @root_validator\n    def _provide_one_service_account_source(cls, values):\n        \"\"\"\n        Ensure that only a service account file or service account info ias provided.\n        \"\"\"\n        both_service_account = (\n            values.get(\"service_account_info\") is not None\n            and values.get(\"service_account_file\") is not None\n        )\n        if both_service_account:\n            raise ValueError(\n                \"Only one of service_account_info or service_account_file \"\n                \"can be specified at once\"\n            )\n        return values\n\n    @validator(\"service_account_file\")\n    def _check_service_account_file(cls, file):\n        \"\"\"Get full path of provided file and make sure that it exists.\"\"\"\n        if not file:\n            return file\n\n        service_account_file = Path(file).expanduser()\n        if not service_account_file.exists():\n            raise ValueError(\"The provided path to the service account is invalid\")\n        return service_account_file\n\n    @validator(\"service_account_info\", pre=True)\n    def _convert_json_string_json_service_account_info(cls, value):\n        \"\"\"\n        Converts service account info provided as a json formatted string\n        to a dictionary\n        \"\"\"\n        if isinstance(value, str):\n            try:\n                service_account_info = json.loads(value)\n                return service_account_info\n            except Exception:\n                raise ValueError(\"Unable to decode service_account_info\")\n        else:\n            return value\n\n    def block_initialization(self):\n        credentials = self.get_credentials_from_service_account()\n        if self.project is None:\n            if self.service_account_info or self.service_account_file:\n                credentials_project = credentials.project_id\n            # google.auth.default using gcloud auth application-default login\n            elif credentials.quota_project_id:\n                credentials_project = credentials.quota_project_id\n            # compute-assigned service account via GCP metadata server\n            else:\n                _, credentials_project = google.auth.default()\n            self.project = credentials_project\n\n        if hasattr(credentials, \"service_account_email\"):\n            self._service_account_email = credentials.service_account_email\n\n    def get_credentials_from_service_account(self) -> Credentials:\n        \"\"\"\n        Helper method to serialize credentials by using either\n        service_account_file or service_account_info.\n        \"\"\"\n        if self.service_account_info:\n            credentials = Credentials.from_service_account_info(\n                self.service_account_info.get_secret_value(),\n                scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n            )\n        elif self.service_account_file:\n            credentials = Credentials.from_service_account_file(\n                self.service_account_file,\n                scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n            )\n        else:\n            credentials, _ = google.auth.default()\n        return credentials\n\n    @sync_compatible\n    async def get_access_token(self):\n        \"\"\"\n        See: https://stackoverflow.com/a/69107745\n        Also: https://www.jhanley.com/google-cloud-creating-oauth-access-tokens-for-rest-api-calls/\n        \"\"\"  # noqa\n        request = google.auth.transport.requests.Request()\n        credentials = self.get_credentials_from_service_account()\n        await run_sync_in_worker_thread(credentials.refresh, request)\n        return credentials.token\n\n    def get_client(\n        self,\n        client_type: Union[str, ClientType],\n        **get_client_kwargs: Dict[str, Any],\n    ) -> Any:\n        \"\"\"\n        Helper method to dynamically get a client type.\n\n        Args:\n            client_type: The name of the client to get.\n            **get_client_kwargs: Additional keyword arguments to pass to the\n                `get_*_client` method.\n\n        Returns:\n            An authenticated client.\n\n        Raises:\n            ValueError: if the client is not supported.\n        \"\"\"\n        if isinstance(client_type, str):\n            client_type = ClientType(client_type)\n        client_type = client_type.value\n        get_client_method = getattr(self, f\"get_{client_type}_client\")\n        return get_client_method(**get_client_kwargs)\n\n    @_raise_help_msg(\"cloud_storage\")\n    def get_cloud_storage_client(\n        self, project: Optional[str] = None\n    ) -> \"StorageClient\":\n        \"\"\"\n        Gets an authenticated Cloud Storage client.\n\n        Args:\n            project: Name of the project to use; overrides the base\n                class's project if provided.\n\n        Returns:\n            An authenticated Cloud Storage client.\n\n        Examples:\n            Gets a GCP Cloud Storage client from a path.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_file = \"~/.secrets/prefect-service-account.json\"\n                client = GcpCredentials(\n                    service_account_file=service_account_file\n                ).get_cloud_storage_client()\n            example_get_client_flow()\n            ```\n\n            Gets a GCP Cloud Storage client from a dictionary.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_info = {\n                    \"type\": \"service_account\",\n                    \"project_id\": \"project_id\",\n                    \"private_key_id\": \"private_key_id\",\n                    \"private_key\": \"private_key\",\n                    \"client_email\": \"client_email\",\n                    \"client_id\": \"client_id\",\n                    \"auth_uri\": \"auth_uri\",\n                    \"token_uri\": \"token_uri\",\n                    \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                    \"client_x509_cert_url\": \"client_x509_cert_url\"\n                }\n                client = GcpCredentials(\n                    service_account_info=service_account_info\n                ).get_cloud_storage_client()\n            example_get_client_flow()\n            ```\n        \"\"\"\n        credentials = self.get_credentials_from_service_account()\n\n        # override class project if method project is provided\n        project = project or self.project\n        storage_client = StorageClient(credentials=credentials, project=project)\n        return storage_client\n\n    @_raise_help_msg(\"bigquery\")\n    def get_bigquery_client(\n        self, project: str = None, location: str = None\n    ) -> \"BigQueryClient\":\n        \"\"\"\n        Gets an authenticated BigQuery client.\n\n        Args:\n            project: Name of the project to use; overrides the base\n                class's project if provided.\n            location: Location to use.\n\n        Returns:\n            An authenticated BigQuery client.\n\n        Examples:\n            Gets a GCP BigQuery client from a path.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_file = \"~/.secrets/prefect-service-account.json\"\n                client = GcpCredentials(\n                    service_account_file=service_account_file\n                ).get_bigquery_client()\n            example_get_client_flow()\n            ```\n\n            Gets a GCP BigQuery client from a dictionary.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_info = {\n                    \"type\": \"service_account\",\n                    \"project_id\": \"project_id\",\n                    \"private_key_id\": \"private_key_id\",\n                    \"private_key\": \"private_key\",\n                    \"client_email\": \"client_email\",\n                    \"client_id\": \"client_id\",\n                    \"auth_uri\": \"auth_uri\",\n                    \"token_uri\": \"token_uri\",\n                    \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                    \"client_x509_cert_url\": \"client_x509_cert_url\"\n                }\n                client = GcpCredentials(\n                    service_account_info=service_account_info\n                ).get_bigquery_client()\n\n            example_get_client_flow()\n            ```\n        \"\"\"\n        credentials = self.get_credentials_from_service_account()\n\n        # override class project if method project is provided\n        project = project or self.project\n        big_query_client = BigQueryClient(\n            credentials=credentials, project=project, location=location\n        )\n        return big_query_client\n\n    @_raise_help_msg(\"secret_manager\")\n    def get_secret_manager_client(self) -> \"SecretManagerServiceClient\":\n        \"\"\"\n        Gets an authenticated Secret Manager Service client.\n\n        Returns:\n            An authenticated Secret Manager Service client.\n\n        Examples:\n            Gets a GCP Secret Manager client from a path.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_file = \"~/.secrets/prefect-service-account.json\"\n                client = GcpCredentials(\n                    service_account_file=service_account_file\n                ).get_secret_manager_client()\n            example_get_client_flow()\n            ```\n\n            Gets a GCP Cloud Storage client from a dictionary.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_info = {\n                    \"type\": \"service_account\",\n                    \"project_id\": \"project_id\",\n                    \"private_key_id\": \"private_key_id\",\n                    \"private_key\": \"private_key\",\n                    \"client_email\": \"client_email\",\n                    \"client_id\": \"client_id\",\n                    \"auth_uri\": \"auth_uri\",\n                    \"token_uri\": \"token_uri\",\n                    \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                    \"client_x509_cert_url\": \"client_x509_cert_url\"\n                }\n                client = GcpCredentials(\n                    service_account_info=service_account_info\n                ).get_secret_manager_client()\n            example_get_client_flow()\n            ```\n        \"\"\"\n        credentials = self.get_credentials_from_service_account()\n\n        # doesn't accept project; must pass in project in tasks\n        secret_manager_client = SecretManagerServiceClient(credentials=credentials)\n        return secret_manager_client\n\n    @_raise_help_msg(\"aiplatform\")\n    def get_job_service_client(\n        self, client_options: Dict[str, Any] = None\n    ) -> \"JobServiceClient\":\n        \"\"\"\n        Gets an authenticated Job Service client for Vertex AI.\n\n        Returns:\n            An authenticated Job Service client.\n\n        Examples:\n            Gets a GCP Job Service client from a path.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_file = \"~/.secrets/prefect-service-account.json\"\n                client = GcpCredentials(\n                    service_account_file=service_account_file\n                ).get_job_service_client()\n\n            example_get_client_flow()\n            ```\n\n            Gets a GCP Cloud Storage client from a dictionary.\n            ```python\n            from prefect import flow\n            from prefect_gcp.credentials import GcpCredentials\n\n            @flow()\n            def example_get_client_flow():\n                service_account_info = {\n                    \"type\": \"service_account\",\n                    \"project_id\": \"project_id\",\n                    \"private_key_id\": \"private_key_id\",\n                    \"private_key\": \"private_key\",\n                    \"client_email\": \"client_email\",\n                    \"client_id\": \"client_id\",\n                    \"auth_uri\": \"auth_uri\",\n                    \"token_uri\": \"token_uri\",\n                    \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                    \"client_x509_cert_url\": \"client_x509_cert_url\"\n                }\n                client = GcpCredentials(\n                    service_account_info=service_account_info\n                ).get_job_service_client()\n\n            example_get_client_flow()\n            ```\n        \"\"\"\n        credentials = self.get_credentials_from_service_account()\n        job_service_client = JobServiceClient(\n            credentials=credentials, client_options=client_options\n        )\n        return job_service_client\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials-functions","title":"Functions","text":""},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_access_token","title":"get_access_token async","text":"Source code in prefect_gcp/credentials.py
@sync_compatible\nasync def get_access_token(self):\n    \"\"\"\n    See: https://stackoverflow.com/a/69107745\n    Also: https://www.jhanley.com/google-cloud-creating-oauth-access-tokens-for-rest-api-calls/\n    \"\"\"  # noqa\n    request = google.auth.transport.requests.Request()\n    credentials = self.get_credentials_from_service_account()\n    await run_sync_in_worker_thread(credentials.refresh, request)\n    return credentials.token\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_bigquery_client","title":"get_bigquery_client","text":"

Gets an authenticated BigQuery client.

Parameters:

Name Type Description Default project str

Name of the project to use; overrides the base class's project if provided.

None location str

Location to use.

None

Returns:

Type Description Client

An authenticated BigQuery client.

Examples:

Gets a GCP BigQuery client from a path.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_bigquery_client()\nexample_get_client_flow()\n

Gets a GCP BigQuery client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_bigquery_client()\n\nexample_get_client_flow()\n

Source code in prefect_gcp/credentials.py
@_raise_help_msg(\"bigquery\")\ndef get_bigquery_client(\n    self, project: str = None, location: str = None\n) -> \"BigQueryClient\":\n    \"\"\"\n    Gets an authenticated BigQuery client.\n\n    Args:\n        project: Name of the project to use; overrides the base\n            class's project if provided.\n        location: Location to use.\n\n    Returns:\n        An authenticated BigQuery client.\n\n    Examples:\n        Gets a GCP BigQuery client from a path.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_file = \"~/.secrets/prefect-service-account.json\"\n            client = GcpCredentials(\n                service_account_file=service_account_file\n            ).get_bigquery_client()\n        example_get_client_flow()\n        ```\n\n        Gets a GCP BigQuery client from a dictionary.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_info = {\n                \"type\": \"service_account\",\n                \"project_id\": \"project_id\",\n                \"private_key_id\": \"private_key_id\",\n                \"private_key\": \"private_key\",\n                \"client_email\": \"client_email\",\n                \"client_id\": \"client_id\",\n                \"auth_uri\": \"auth_uri\",\n                \"token_uri\": \"token_uri\",\n                \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                \"client_x509_cert_url\": \"client_x509_cert_url\"\n            }\n            client = GcpCredentials(\n                service_account_info=service_account_info\n            ).get_bigquery_client()\n\n        example_get_client_flow()\n        ```\n    \"\"\"\n    credentials = self.get_credentials_from_service_account()\n\n    # override class project if method project is provided\n    project = project or self.project\n    big_query_client = BigQueryClient(\n        credentials=credentials, project=project, location=location\n    )\n    return big_query_client\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_client","title":"get_client","text":"

Helper method to dynamically get a client type.

Parameters:

Name Type Description Default client_type Union[str, ClientType]

The name of the client to get.

required **get_client_kwargs Dict[str, Any]

Additional keyword arguments to pass to the get_*_client method.

{}

Returns:

Type Description Any

An authenticated client.

Raises:

Type Description ValueError

if the client is not supported.

Source code in prefect_gcp/credentials.py
def get_client(\n    self,\n    client_type: Union[str, ClientType],\n    **get_client_kwargs: Dict[str, Any],\n) -> Any:\n    \"\"\"\n    Helper method to dynamically get a client type.\n\n    Args:\n        client_type: The name of the client to get.\n        **get_client_kwargs: Additional keyword arguments to pass to the\n            `get_*_client` method.\n\n    Returns:\n        An authenticated client.\n\n    Raises:\n        ValueError: if the client is not supported.\n    \"\"\"\n    if isinstance(client_type, str):\n        client_type = ClientType(client_type)\n    client_type = client_type.value\n    get_client_method = getattr(self, f\"get_{client_type}_client\")\n    return get_client_method(**get_client_kwargs)\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_cloud_storage_client","title":"get_cloud_storage_client","text":"

Gets an authenticated Cloud Storage client.

Parameters:

Name Type Description Default project Optional[str]

Name of the project to use; overrides the base class's project if provided.

None

Returns:

Type Description Client

An authenticated Cloud Storage client.

Examples:

Gets a GCP Cloud Storage client from a path.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_cloud_storage_client()\nexample_get_client_flow()\n

Gets a GCP Cloud Storage client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_cloud_storage_client()\nexample_get_client_flow()\n

Source code in prefect_gcp/credentials.py
@_raise_help_msg(\"cloud_storage\")\ndef get_cloud_storage_client(\n    self, project: Optional[str] = None\n) -> \"StorageClient\":\n    \"\"\"\n    Gets an authenticated Cloud Storage client.\n\n    Args:\n        project: Name of the project to use; overrides the base\n            class's project if provided.\n\n    Returns:\n        An authenticated Cloud Storage client.\n\n    Examples:\n        Gets a GCP Cloud Storage client from a path.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_file = \"~/.secrets/prefect-service-account.json\"\n            client = GcpCredentials(\n                service_account_file=service_account_file\n            ).get_cloud_storage_client()\n        example_get_client_flow()\n        ```\n\n        Gets a GCP Cloud Storage client from a dictionary.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_info = {\n                \"type\": \"service_account\",\n                \"project_id\": \"project_id\",\n                \"private_key_id\": \"private_key_id\",\n                \"private_key\": \"private_key\",\n                \"client_email\": \"client_email\",\n                \"client_id\": \"client_id\",\n                \"auth_uri\": \"auth_uri\",\n                \"token_uri\": \"token_uri\",\n                \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                \"client_x509_cert_url\": \"client_x509_cert_url\"\n            }\n            client = GcpCredentials(\n                service_account_info=service_account_info\n            ).get_cloud_storage_client()\n        example_get_client_flow()\n        ```\n    \"\"\"\n    credentials = self.get_credentials_from_service_account()\n\n    # override class project if method project is provided\n    project = project or self.project\n    storage_client = StorageClient(credentials=credentials, project=project)\n    return storage_client\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_credentials_from_service_account","title":"get_credentials_from_service_account","text":"

Helper method to serialize credentials by using either service_account_file or service_account_info.

Source code in prefect_gcp/credentials.py
def get_credentials_from_service_account(self) -> Credentials:\n    \"\"\"\n    Helper method to serialize credentials by using either\n    service_account_file or service_account_info.\n    \"\"\"\n    if self.service_account_info:\n        credentials = Credentials.from_service_account_info(\n            self.service_account_info.get_secret_value(),\n            scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n        )\n    elif self.service_account_file:\n        credentials = Credentials.from_service_account_file(\n            self.service_account_file,\n            scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n        )\n    else:\n        credentials, _ = google.auth.default()\n    return credentials\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_job_service_client","title":"get_job_service_client","text":"

Gets an authenticated Job Service client for Vertex AI.

Returns:

Type Description JobServiceClient

An authenticated Job Service client.

Examples:

Gets a GCP Job Service client from a path.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_job_service_client()\n\nexample_get_client_flow()\n

Gets a GCP Cloud Storage client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_job_service_client()\n\nexample_get_client_flow()\n

Source code in prefect_gcp/credentials.py
@_raise_help_msg(\"aiplatform\")\ndef get_job_service_client(\n    self, client_options: Dict[str, Any] = None\n) -> \"JobServiceClient\":\n    \"\"\"\n    Gets an authenticated Job Service client for Vertex AI.\n\n    Returns:\n        An authenticated Job Service client.\n\n    Examples:\n        Gets a GCP Job Service client from a path.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_file = \"~/.secrets/prefect-service-account.json\"\n            client = GcpCredentials(\n                service_account_file=service_account_file\n            ).get_job_service_client()\n\n        example_get_client_flow()\n        ```\n\n        Gets a GCP Cloud Storage client from a dictionary.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_info = {\n                \"type\": \"service_account\",\n                \"project_id\": \"project_id\",\n                \"private_key_id\": \"private_key_id\",\n                \"private_key\": \"private_key\",\n                \"client_email\": \"client_email\",\n                \"client_id\": \"client_id\",\n                \"auth_uri\": \"auth_uri\",\n                \"token_uri\": \"token_uri\",\n                \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                \"client_x509_cert_url\": \"client_x509_cert_url\"\n            }\n            client = GcpCredentials(\n                service_account_info=service_account_info\n            ).get_job_service_client()\n\n        example_get_client_flow()\n        ```\n    \"\"\"\n    credentials = self.get_credentials_from_service_account()\n    job_service_client = JobServiceClient(\n        credentials=credentials, client_options=client_options\n    )\n    return job_service_client\n
"},{"location":"credentials/#prefect_gcp.credentials.GcpCredentials.get_secret_manager_client","title":"get_secret_manager_client","text":"

Gets an authenticated Secret Manager Service client.

Returns:

Type Description SecretManagerServiceClient

An authenticated Secret Manager Service client.

Examples:

Gets a GCP Secret Manager client from a path.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_secret_manager_client()\nexample_get_client_flow()\n

Gets a GCP Cloud Storage client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_secret_manager_client()\nexample_get_client_flow()\n

Source code in prefect_gcp/credentials.py
@_raise_help_msg(\"secret_manager\")\ndef get_secret_manager_client(self) -> \"SecretManagerServiceClient\":\n    \"\"\"\n    Gets an authenticated Secret Manager Service client.\n\n    Returns:\n        An authenticated Secret Manager Service client.\n\n    Examples:\n        Gets a GCP Secret Manager client from a path.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_file = \"~/.secrets/prefect-service-account.json\"\n            client = GcpCredentials(\n                service_account_file=service_account_file\n            ).get_secret_manager_client()\n        example_get_client_flow()\n        ```\n\n        Gets a GCP Cloud Storage client from a dictionary.\n        ```python\n        from prefect import flow\n        from prefect_gcp.credentials import GcpCredentials\n\n        @flow()\n        def example_get_client_flow():\n            service_account_info = {\n                \"type\": \"service_account\",\n                \"project_id\": \"project_id\",\n                \"private_key_id\": \"private_key_id\",\n                \"private_key\": \"private_key\",\n                \"client_email\": \"client_email\",\n                \"client_id\": \"client_id\",\n                \"auth_uri\": \"auth_uri\",\n                \"token_uri\": \"token_uri\",\n                \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n                \"client_x509_cert_url\": \"client_x509_cert_url\"\n            }\n            client = GcpCredentials(\n                service_account_info=service_account_info\n            ).get_secret_manager_client()\n        example_get_client_flow()\n        ```\n    \"\"\"\n    credentials = self.get_credentials_from_service_account()\n\n    # doesn't accept project; must pass in project in tasks\n    secret_manager_client = SecretManagerServiceClient(credentials=credentials)\n    return secret_manager_client\n
"},{"location":"examples_catalog/","title":"Examples Catalog","text":"

Below is a list of examples for prefect-gcp.

"},{"location":"examples_catalog/#bigquery-module","title":"Bigquery Module","text":"

Execute operation with parameters:

from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        CREATE TABLE mydataset.trips AS (\n        SELECT\n            bikeid,\n            start_time,\n            duration_minutes\n        FROM\n            bigquery-public-data.austin_bikeshare.bikeshare_trips\n        LIMIT %(limit)s\n        );\n    '''\n    warehouse.execute(operation, parameters={\"limit\": 5})\n
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_insert_stream\nfrom google.cloud.bigquery import SchemaField\n\n@flow\ndef example_bigquery_insert_stream_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    records = [\n        {\"number\": 1, \"text\": \"abc\", \"bool\": True},\n        {\"number\": 2, \"text\": \"def\", \"bool\": False},\n    ]\n    result = bigquery_insert_stream(\n        dataset=\"integrations\",\n        table=\"test_table\",\n        records=records,\n        gcp_credentials=gcp_credentials\n    )\n    return result\n\nexample_bigquery_insert_stream_flow()\n
Queries the public names database, returning 10 results.
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_query\n\n@flow\ndef example_bigquery_query_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\",\n        project=\"project\"\n    )\n    query = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = @corpus\n        AND word_count >= @min_word_count\n        ORDER BY word_count DESC;\n    '''\n    query_params = [\n        (\"corpus\", \"STRING\", \"romeoandjuliet\"),\n        (\"min_word_count\", \"INT64\", 250)\n    ]\n    result = bigquery_query(\n        query, gcp_credentials, query_params=query_params\n    )\n    return result\n\nexample_bigquery_query_flow()\n
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_load_cloud_storage\n\n@flow\ndef example_bigquery_load_cloud_storage_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    result = bigquery_load_cloud_storage(\n        dataset=\"dataset\",\n        table=\"test_table\",\n        uri=\"uri\",\n        gcp_credentials=gcp_credentials\n    )\n    return result\n\nexample_bigquery_load_cloud_storage_flow()\n
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.bigquery import bigquery_load_file\nfrom google.cloud.bigquery import SchemaField\n\n@flow\ndef example_bigquery_load_file_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    result = bigquery_load_file(\n        dataset=\"dataset\",\n        table=\"test_table\",\n        path=\"path\",\n        gcp_credentials=gcp_credentials\n    )\n    return result\n\nexample_bigquery_load_file_flow()\n
Create mytable in mydataset and insert two rows into it:
from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"bigquery\") as warehouse:\n    create_operation = '''\n    CREATE TABLE IF NOT EXISTS mydataset.mytable (\n        col1 STRING,\n        col2 INTEGER,\n        col3 BOOLEAN\n    )\n    '''\n    warehouse.execute(create_operation)\n    insert_operation = '''\n    INSERT INTO mydataset.mytable (col1, col2, col3) VALUES (%s, %s, %s)\n    '''\n    seq_of_parameters = [\n        (\"a\", 1, True),\n        (\"b\", 2, False),\n    ]\n    warehouse.execute_many(\n        insert_operation,\n        seq_of_parameters=seq_of_parameters\n    )\n
Execute operation with parameters, fetching one new row at a time:
from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = %(corpus)s\n        AND word_count >= %(min_word_count)s\n        ORDER BY word_count DESC\n        LIMIT 3;\n    '''\n    parameters = {\n        \"corpus\": \"romeoandjuliet\",\n        \"min_word_count\": 250,\n    }\n    for _ in range(0, 3):\n        result = warehouse.fetch_one(operation, parameters=parameters)\n        print(result)\n
Execute operation with parameters, fetching all rows:
from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = %(corpus)s\n        AND word_count >= %(min_word_count)s\n        ORDER BY word_count DESC\n        LIMIT 3;\n    '''\n    parameters = {\n        \"corpus\": \"romeoandjuliet\",\n        \"min_word_count\": 250,\n    }\n    result = warehouse.fetch_all(operation, parameters=parameters)\n
Execute operation with parameters, fetching two new rows at a time:
from prefect_gcp.bigquery import BigQueryWarehouse\n\nwith BigQueryWarehouse.load(\"BLOCK_NAME\") as warehouse:\n    operation = '''\n        SELECT word, word_count\n        FROM `bigquery-public-data.samples.shakespeare`\n        WHERE corpus = %(corpus)s\n        AND word_count >= %(min_word_count)s\n        ORDER BY word_count DESC\n        LIMIT 6;\n    '''\n    parameters = {\n        \"corpus\": \"romeoandjuliet\",\n        \"min_word_count\": 250,\n    }\n    for _ in range(0, 3):\n        result = warehouse.fetch_many(\n            operation,\n            parameters=parameters,\n            size=2\n        )\n        print(result)\n

"},{"location":"examples_catalog/#cloud-storage-module","title":"Cloud Storage Module","text":"

Creates a bucket named \"prefect\".

from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_create_bucket\n\n@flow()\ndef example_cloud_storage_create_bucket_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    bucket = cloud_storage_create_bucket(\"prefect\", gcp_credentials)\n\nexample_cloud_storage_create_bucket_flow()\n
Upload local folder my_folder to the bucket's folder my_folder.
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.upload_from_folder(\"my_folder\")\n
Downloads blob from bucket.
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_download_blob_as_bytes\n\n@flow()\ndef example_cloud_storage_download_blob_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    contents = cloud_storage_download_blob_as_bytes(\n        \"bucket\", \"blob\", gcp_credentials)\n    return contents\n\nexample_cloud_storage_download_blob_flow()\n
Get all folders from a bucket named \"my-bucket\".
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.list_folders()\n

Get all folders from a folder called years

from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.list_folders(\"years\")\n
Download my_folder to a local folder named my_folder.
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.download_folder_to_path(\"my_folder\", \"my_folder\")\n
Uploads blob to bucket.
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_file\n\n@flow()\ndef example_cloud_storage_upload_blob_from_file_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    blob = cloud_storage_upload_blob_from_file(\n        \"/path/somewhere\", \"bucket\", \"blob\", gcp_credentials)\n    return blob\n\nexample_cloud_storage_upload_blob_from_file_flow()\n
Uploads blob to bucket.
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_upload_blob_from_string\n\n@flow()\ndef example_cloud_storage_upload_blob_from_string_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    blob = cloud_storage_upload_blob_from_string(\n        \"data\", \"bucket\", \"blob\", gcp_credentials)\n    return blob\n\nexample_cloud_storage_upload_blob_from_string_flow()\n
Create a bucket.
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket(bucket=\"my-bucket\")\ngcs_bucket.create_bucket()\n
Download my_folder/notes.txt object to notes.txt.
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.download_object_to_path(\"my_folder/notes.txt\", \"notes.txt\")\n
Get the bucket object.
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.get_bucket()\n
Get all blobs from a folder named \"prefect\".
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.list_blobs(\"prefect\")\n
Upload my_folder/notes.txt object to a BytesIO object.
from io import BytesIO\nfrom prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\nwith open(\"notes.txt\", \"rb\") as f:\n    gcs_bucket.upload_from_file_object(f, \"my_folder/notes.txt\")\n

Upload BufferedReader object to my_folder/notes.txt.

from io import BufferedReader\nfrom prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\nwith open(\"notes.txt\", \"rb\") as f:\n    gcs_bucket.upload_from_file_object(\n        BufferedReader(f), \"my_folder/notes.txt\"\n    )\n
Download my_folder/notes.txt object to a BytesIO object.
from io import BytesIO\nfrom prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\nwith BytesIO() as buf:\n    gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", buf)\n

Download my_folder/notes.txt object to a BufferedWriter.

    from prefect_gcp.cloud_storage import GcsBucket\n\n    gcs_bucket = GcsBucket.load(\"my-bucket\")\n    with open(\"notes.txt\", \"wb\") as f:\n        gcs_bucket.download_object_to_file_object(\"my_folder/notes.txt\", f)\n
Copies blob from one bucket to another.
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_copy_blob\n\n@flow()\ndef example_cloud_storage_copy_blob_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    blob = cloud_storage_copy_blob(\n        \"source_bucket\",\n        \"dest_bucket\",\n        \"source_blob\",\n        gcp_credentials\n    )\n    return blob\n\nexample_cloud_storage_copy_blob_flow()\n
Downloads blob from bucket.
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.cloud_storage import cloud_storage_download_blob_to_file\n\n@flow()\ndef example_cloud_storage_download_blob_flow():\n    gcp_credentials = GcpCredentials(\n        service_account_file=\"/path/to/service/account/keyfile.json\")\n    path = cloud_storage_download_blob_to_file(\n        \"bucket\", \"blob\", \"file_path\", gcp_credentials)\n    return path\n\nexample_cloud_storage_download_blob_flow()\n
Upload notes.txt to my_folder/notes.txt.
from prefect_gcp.cloud_storage import GcsBucket\n\ngcs_bucket = GcsBucket.load(\"my-bucket\")\ngcs_bucket.upload_from_path(\"notes.txt\", \"my_folder/notes.txt\")\n

"},{"location":"examples_catalog/#credentials-module","title":"Credentials Module","text":"

Gets a GCP Cloud Storage client from a path.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_cloud_storage_client()\nexample_get_client_flow()\n

Gets a GCP Cloud Storage client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_cloud_storage_client()\nexample_get_client_flow()\n
Gets a GCP Job Service client from a path.
from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_job_service_client()\n\nexample_get_client_flow()\n

Gets a GCP Cloud Storage client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_job_service_client()\n\nexample_get_client_flow()\n
Gets a GCP Secret Manager client from a path.
from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_secret_manager_client()\nexample_get_client_flow()\n

Gets a GCP Cloud Storage client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_secret_manager_client()\nexample_get_client_flow()\n
Gets a GCP BigQuery client from a path.
from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_file = \"~/.secrets/prefect-service-account.json\"\n    client = GcpCredentials(\n        service_account_file=service_account_file\n    ).get_bigquery_client()\nexample_get_client_flow()\n

Gets a GCP BigQuery client from a dictionary.

from prefect import flow\nfrom prefect_gcp.credentials import GcpCredentials\n\n@flow()\ndef example_get_client_flow():\n    service_account_info = {\n        \"type\": \"service_account\",\n        \"project_id\": \"project_id\",\n        \"private_key_id\": \"private_key_id\",\n        \"private_key\": \"private_key\",\n        \"client_email\": \"client_email\",\n        \"client_id\": \"client_id\",\n        \"auth_uri\": \"auth_uri\",\n        \"token_uri\": \"token_uri\",\n        \"auth_provider_x509_cert_url\": \"auth_provider_x509_cert_url\",\n        \"client_x509_cert_url\": \"client_x509_cert_url\"\n    }\n    client = GcpCredentials(\n        service_account_info=service_account_info\n    ).get_bigquery_client()\n\nexample_get_client_flow()\n

"},{"location":"gcp-worker-guide/","title":"Google Cloud Run Worker Guide","text":""},{"location":"gcp-worker-guide/#why-use-google-cloud-run-for-flow-run-execution","title":"Why use Google Cloud Run for flow run execution?","text":"

Google Cloud Run is a fully managed compute platform that automatically scales your containerized applications.

  1. Serverless architecture: Cloud Run follows a serverless architecture, which means you don't need to manage any underlying infrastructure. Google Cloud Run automatically handles the scaling and availability of your flow run infrastructure, allowing you to focus on developing and deploying your code.

  2. Scalability: Cloud Run can automatically scale your pipeline to handle varying workloads and traffic. It can quickly respond to increased demand and scale back down during low activity periods, ensuring efficient resource utilization.

  3. Integration with Google Cloud services: Google Cloud Run easily integrates with other Google Cloud services, such as Google Cloud Storage, Google Cloud Pub/Sub, and Google Cloud Build. This interoperability enables you to build end-to-end data pipelines that use a variety of services.

  4. Portability: Since Cloud Run uses container images, you can develop your pipelines locally using Docker and then deploy them on Google Cloud Run without significant modifications. This portability allows you to run the same pipeline in different environments.

"},{"location":"gcp-worker-guide/#google-cloud-run-guide","title":"Google Cloud Run guide","text":"

After completing this guide, you will have:

  1. Created a Google Cloud Service Account
  2. Created a Prefect Work Pool
  3. Deployed a Prefect Worker as a Cloud Run Service
  4. Deployed a Flow
  5. Executed the Flow as a Google Cloud Run Job

If you're looking for a general introduction to workers, work pools, and deployments, check out the workers and work pools tutorial.

"},{"location":"gcp-worker-guide/#prerequisites","title":"Prerequisites","text":"

Before starting this guide, make sure you have:

  • A Google Cloud Platform (GCP) account.
  • A project on your GCP account where you have the necessary permissions to create Cloud Run Services and Service Accounts.
  • The gcloud CLI installed on your local machine. You can follow Google Cloud's installation guide. If you're using Apple (or a Linux system) you can also use Homebrew for installation.
  • Docker installed on your local machine.
  • A Prefect server instance. You can sign up for a forever free Prefect Cloud Account or, alternatively, self-host a Prefect server.
"},{"location":"gcp-worker-guide/#step-1-create-a-google-cloud-service-account","title":"Step 1. Create a Google Cloud service account","text":"

First, open a terminal or command prompt on your local machine where gcloud is installed. If you haven't already authenticated with gcloud, run the following command and follow the instructions to log in to your GCP account.

gcloud auth login\n

Next, you'll set your project where you'd like to create the service account. Use the following command and replace <PROJECT_ID> with your GCP project's ID.

gcloud config set project <PROJECT-ID>\n

For example, if your project's ID is prefect-project the command will look like this:

gcloud config set project prefect-project\n

Now you're ready to make the service account. To do so, you'll need to run this command:

gcloud iam service-accounts create <SERVICE-ACCOUNT-NAME> --display-name=\"<DISPLAY-NAME>\"\n

Here's an example of the command above which you can use which already has the service account name and display name provided. An additional option to describe the service account has also been added:

gcloud iam service-accounts create prefect-service-account \\\n    --description=\"service account to use for the prefect worker\" \\\n    --display-name=\"prefect-service-account\"\n

The last step of this process is to make sure the service account has the proper permissions to execute flow runs as Cloud Run jobs. Run the following commands to grant the necessary permissions:

gcloud projects add-iam-policy-binding <PROJECT-ID> \\\n    --member=\"serviceAccount:<SERVICE-ACCOUNT-NAME>@<PROJECT-ID>.iam.gserviceaccount.com\" \\\n    --role=\"roles/iam.serviceAccountUser\"\n
gcloud projects add-iam-policy-binding <PROJECT-ID> \\\n    --member=\"serviceAccount:<SERVICE-ACCOUNT-NAME>@<PROJECT-ID>.iam.gserviceaccount.com\" \\\n    --role=\"roles/run.admin\"\n

"},{"location":"gcp-worker-guide/#step-2-create-a-cloud-run-work-pool","title":"Step 2. Create a Cloud Run work pool","text":"

Let's walk through the process of creating a Cloud Run work pool.

"},{"location":"gcp-worker-guide/#fill-out-the-work-pool-base-job-template","title":"Fill out the work pool base job template","text":"

You can create a new work pool using the Prefect UI or CLI. The following command creates a work pool of type cloud-run via the CLI (you'll want to replace the <WORK-POOL-NAME> with the name of your work pool):

prefect work-pool create --type cloud-run <WORK-POOL-NAME>\n

Once the work pool is created, find the work pool in the UI and edit it.

There are many ways to customize the base job template for the work pool. Modifying the template influences the infrastructure configuration that the worker provisions for flow runs submitted to the work pool. For this guide we are going to modify just a few of the available fields.

Specify the region for the cloud run job.

Save the name of the service account created in first step of this guide.

Your work pool is now ready to receive scheduled flow runs!

"},{"location":"gcp-worker-guide/#step-3-deploy-a-cloud-run-worker","title":"Step 3. Deploy a Cloud Run worker","text":"

Now you can launch a Cloud Run service to host the Cloud Run worker. This worker will poll the work pool that you created in the previous step.

Navigate back to your terminal and run the following commands to set your Prefect API key and URL as environment variables. Be sure to replace <ACCOUNT-ID> and <WORKSPACE-ID> with your Prefect account and workspace IDs (both will be available in the URL of the UI when previewing the workspace dashboard). You'll want to replace <YOUR-API-KEY> with an active API key as well.

export PREFECT_API_URL='https://api.prefect.cloud/api/accounts/<ACCOUNT-ID>/workspaces/<WORKSPACE-ID>'\nexport PREFECT_API_KEY='<YOUR-API-KEY>'\n

Once those variables are set, run the following shell command to deploy your worker as a service. Don't forget to replace <YOUR-SERVICE-ACCOUNT-NAME> with the name of the service account you created in the first step of this guide, and replace <WORK-POOL-NAME> with the name of the work pool you created in the second step.

gcloud run deploy prefect-worker --image=prefecthq/prefect:2-latest \\\n--set-env-vars PREFECT_API_URL=$PREFECT_API_URL,PREFECT_API_KEY=$PREFECT_API_KEY \\\n--service-account <YOUR-SERVICE-ACCOUNT-NAME> \\\n--no-cpu-throttling \\\n--min-instances 1 \\\n--args \"prefect\",\"worker\",\"start\",\"--install-policy\",\"always\",\"--with-healthcheck\",\"-p\",\"<WORK-POOL-NAME>\",\"-t\",\"cloud-run\"\n

After running this command, you'll be prompted to specify a region. Choose the same region that you selected when creating the Cloud Run work pool in the second step of this guide. The next prompt will ask if you'd like to allow unauthentiated invocations to your worker. For this guide, you can select \"No\".

After a few seconds, you'll be able to see your new prefect-worker service by navigating to the Cloud Run page of your Google Cloud console. Additionally, you should be able to see a record of this worker in the Prefect UI on the work pool's page by navigating to the Worker tab. Let's not leave our worker hanging, it's time to give it a job.

"},{"location":"gcp-worker-guide/#step-4-deploy-a-flow","title":"Step 4. Deploy a flow","text":"

Let's prepare a flow to run as a Cloud Run job. In this section of the guide, we'll \"bake\" our code into a Docker image, and push that image to Google Artifact Registry.

"},{"location":"gcp-worker-guide/#create-a-registry","title":"Create a registry","text":"

Let's create a docker repository in your Google Artifact Registry to host your custom image. If you already have a registry, and are authenticated to it, skip ahead to the Write a flow section.

The following command creates a repository using the gcloud CLI. You'll want to replace the <REPOSITORY-NAME> with your own value. :

gcloud artifacts repositories create <REPOSITORY-NAME> \\\n--repository-format=docker --location=us\n

Now you can authenticate to artifact registry:

gcloud auth configure-docker us-docker.pkg.dev\n

"},{"location":"gcp-worker-guide/#write-a-flow","title":"Write a flow","text":"

First, create a new directory. This will serve as the root of your project's repository. Within the directory, create a sub-directory called flows. Navigate to the flows subdirectory and create a new file for your flow. Feel free to write your own flow, but here's a ready-made one for your convenience:

import httpx\nfrom prefect import flow, task\nfrom prefect.artifacts import create_markdown_artifact\n\n@task\ndef mark_it_down(temp):\n    markdown_report = f\"\"\"# Weather Report\n## Recent weather\n\n| Time        | Temperature |\n|:--------------|-------:|\n| Now | {temp} |\n| In 1 hour       | {temp + 2} |\n\"\"\"\n    create_markdown_artifact(\n        key=\"weather-report\",\n        markdown=markdown_report,\n        description=\"Very scientific weather report\",\n    )\n\n\n@flow\ndef fetch_weather(lat: float, lon: float):\n    base_url = \"https://api.open-meteo.com/v1/forecast/\"\n    weather = httpx.get(\n        base_url,\n        params=dict(latitude=lat, longitude=lon, hourly=\"temperature_2m\"),\n    )\n    most_recent_temp = float(weather.json()[\"hourly\"][\"temperature_2m\"][0])\n    mark_it_down(most_recent_temp)\n\n\nif __name__ == \"__main__\":\n    fetch_weather(38.9, -77.0)\n

In the remainder of this guide, this script will be referred to as weather_flow.py, but you can name yours whatever you'd like.

"},{"location":"gcp-worker-guide/#creating-a-prefectyaml-file","title":"Creating a prefect.yaml file","text":"

Now we're ready to make a prefect.yaml file, which will be responsible for managing the deployments of this repository. Navigate back to the root of your directory, and run the following command to create a prefect.yaml file using Prefect's docker deployment recipe.

prefect init --recipe docker\n

You'll receive a prompt to put in values for the image name and tag. Since we will be pushing the image to Google Artifact Registry, the name of your image should be prefixed with the path to the docker repository you created within the registry. For example: us-docker.pkg.dev/<PROJECT-ID>/<REPOSITORY-NAME>/. You'll want to replace <PROJECT-ID> with the ID of your project in GCP. This should match the ID of the project you used in first step of this guide. Here is an example of what this could look like:

image_name: us-docker.pkg.dev/prefect-project/my-artifact-registry/gcp-weather-image\ntag: latest\n

At this point, there will be a new prefect.yaml file available at the root of your project. The contents will look similar to the example below, however, I've added in a combination of yaml templating options and prefect deployment actions to build out a simple CI/CD process. Feel free to copy the contents and paste them in your prefect.yaml:

# Welcome to your prefect.yaml file! You can you this file for storing and managing\n# configuration for deploying your flows. We recommend committing this file to source\n# control along with your flow code.\n\n# Generic metadata about this project\nname: <WORKING-DIRECTORY>\nprefect-version: 2.13.4\n\n# build section allows you to manage and build docker image\nbuild:\n- prefect_docker.deployments.steps.build_docker_image:\n    id: build_image\n    requires: prefect-docker>=0.3.1\n    image_name: <PATH-TO-ARTIFACT-REGISTRY>/gcp-weather-image\n    tag: latest\n    dockerfile: auto\n    platform: linux/amd64\n\n# push section allows you to manage if and how this project is uploaded to remote locations\npush:\n- prefect_docker.deployments.steps.push_docker_image:\n    requires: prefect-docker>=0.3.1\n    image_name: '{{ build_image.image_name }}'\n    tag: '{{ build_image.tag }}'\n\n# pull section allows you to provide instructions for cloning this project in remote locations\npull:\n- prefect.deployments.steps.set_working_directory:\n    directory: /opt/prefect/<WORKING-DIRECTORY>\n\n# the deployments section allows you to provide configuration for deploying flows\ndeployments:\n- name: gcp-weather-deploy\n  version: null\n  tags: []\n  description: null\n  schedule: {}\n  flow_name: null\n  entrypoint: flows/weather_flow.py:fetch_weather\n  parameters:\n    lat: 14.5994\n    lon: 28.6731\n  work_pool:\n    name: my-cloud-run-pool\n    work_queue_name: default\n    job_variables:\n      image: '{{ build_image.image }}'\n

Tip

After copying the example above, don't forget to replace <WORKING-DIRECTORY> with the name of the directory where your flow folder and prefect.yaml live. You'll also need to replace <PATH-TO-ARTIFACT-REGISTRY> with the path to the Docker repository in your Google Artifact Registry.

To get a better understanding of the different components of the prefect.yaml file above and what they do, feel free to read this next section. Otherwise, you can skip ahead to Flow Deployment.

In the build section of the prefect.yaml the following step is executed at deployment build time:

  1. prefect_docker.deployments.steps.build_docker_image : builds a Docker image automatically which uses the name and tag chosen previously.

Warning

If you are using an ARM-based chip (such as an M1 or M2 Mac), you'll want to ensure that you add platform: linux/amd64 to your build_docker_image step to ensure that your docker image uses an AMD architecture. For example:

- prefect_docker.deployments.steps.build_docker_image:\nid: build_image\nrequires: prefect-docker>=0.3.1\nimage_name: us-docker.pkg.dev/prefect-project/my-docker-repository/gcp-weather-image\ntag: latest\ndockerfile: auto\nplatform: linux/amd64\n

The push section sends the Docker image to the Docker repository in your Google Artifact Registry, so that it can be easily accessed by the worker for flow run execution.

The pull section sets the working directory for the process prior to importing your flow.

In the deployments section of the prefect.yaml file above, you'll see that there is a deployment declaration named gcp-weather-deploy. Within the declaration, the entrypoint for the flow is specified along with some default parameters which will be passed to the flow at runtime. Last but not least, the name of the workpool that we created in step 2 of this guide is specified.

"},{"location":"gcp-worker-guide/#flow-deployment","title":"Flow deployment","text":"

Once you're happy with the specifications in the prefect.yaml file, run the following command in the terminal to deploy your flow:

prefect deploy --name gcp-weather-deploy\n

Once the flow is deployed to Prefect Cloud or your local Prefect Server, it's time to queue up a flow run!

"},{"location":"gcp-worker-guide/#step-5-flow-execution","title":"Step 5. Flow execution","text":"

Find your deployment in the UI, and hit the Quick Run button. You have now successfully submitted a flow run to your Cloud Run worker! If you used the flow script provided in this guide, check the Artifacts tab for the flow run once it completes. You'll have a nice little weather report waiting for you there. Hope your day is a sunny one!

"},{"location":"gcp-worker-guide/#recap-and-next-steps","title":"Recap and next steps","text":"

Congratulations on completing this guide! Looking back on our journey, you have:

  1. Created a Google Cloud service account
  2. Created a Cloud Run work pool
  3. Deployed a Cloud Run worker
  4. Deployed a flow
  5. Executed a flow

For next steps, you could:

  • Take a look at some of the other work pools Prefect has to offer
  • Do a deep drive on Prefect concepts
  • Try out another guide to explore new deployment patterns and recipes

The world is your oyster \ud83e\uddaa\u2728.

"},{"location":"secret_manager/","title":"Secret Manager","text":""},{"location":"secret_manager/#prefect_gcp.secret_manager","title":"prefect_gcp.secret_manager","text":""},{"location":"secret_manager/#prefect_gcp.secret_manager-classes","title":"Classes","text":""},{"location":"secret_manager/#prefect_gcp.secret_manager.GcpSecret","title":"GcpSecret","text":"

Bases: SecretBlock

Manages a secret in Google Cloud Platform's Secret Manager.

Attributes:

Name Type Description gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

secret_name str

Name of the secret to manage.

secret_version str

Version number of the secret to use, or \"latest\".

Source code in prefect_gcp/secret_manager.py
class GcpSecret(SecretBlock):\n    \"\"\"\n    Manages a secret in Google Cloud Platform's Secret Manager.\n\n    Attributes:\n        gcp_credentials: Credentials to use for authentication with GCP.\n        secret_name: Name of the secret to manage.\n        secret_version: Version number of the secret to use, or \"latest\".\n    \"\"\"\n\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/secret_manager/#prefect_gcp.secret_manager.GcpSecret\"  # noqa: E501\n\n    gcp_credentials: GcpCredentials\n    secret_name: str = Field(default=..., description=\"Name of the secret to manage.\")\n    secret_version: str = Field(\n        default=\"latest\", description=\"Version number of the secret to use.\"\n    )\n\n    @sync_compatible\n    async def read_secret(self) -> bytes:\n        \"\"\"\n        Reads the secret data from the secret storage service.\n\n        Returns:\n            The secret data as bytes.\n        \"\"\"\n        client = self.gcp_credentials.get_secret_manager_client()\n        project = self.gcp_credentials.project\n        name = f\"projects/{project}/secrets/{self.secret_name}/versions/{self.secret_version}\"  # noqa\n        request = AccessSecretVersionRequest(name=name)\n\n        self.logger.debug(f\"Preparing to read secret data from {name!r}.\")\n        response = await run_sync_in_worker_thread(\n            client.access_secret_version, request=request\n        )\n        secret = response.payload.data\n        self.logger.info(f\"The secret {name!r} data was successfully read.\")\n        return secret\n\n    @sync_compatible\n    async def write_secret(self, secret_data: bytes) -> str:\n        \"\"\"\n        Writes the secret data to the secret storage service; if it doesn't exist\n        it will be created.\n\n        Args:\n            secret_data: The secret to write.\n\n        Returns:\n            The path that the secret was written to.\n        \"\"\"\n        client = self.gcp_credentials.get_secret_manager_client()\n        project = self.gcp_credentials.project\n        parent = f\"projects/{project}/secrets/{self.secret_name}\"\n        payload = SecretPayload(data=secret_data)\n        add_request = AddSecretVersionRequest(parent=parent, payload=payload)\n\n        self.logger.debug(f\"Preparing to write secret data to {parent!r}.\")\n        try:\n            response = await run_sync_in_worker_thread(\n                client.add_secret_version, request=add_request\n            )\n        except NotFound:\n            self.logger.info(\n                f\"The secret {parent!r} does not exist yet, creating it now.\"\n            )\n            create_parent = f\"projects/{project}\"\n            secret_id = self.secret_name\n            secret = Secret(replication=Replication(automatic=Replication.Automatic()))\n            create_request = CreateSecretRequest(\n                parent=create_parent, secret_id=secret_id, secret=secret\n            )\n            await run_sync_in_worker_thread(\n                client.create_secret, request=create_request\n            )\n\n            self.logger.debug(f\"Preparing to write secret data to {parent!r} again.\")\n            response = await run_sync_in_worker_thread(\n                client.add_secret_version, request=add_request\n            )\n\n        self.logger.info(f\"The secret data was written successfully to {parent!r}.\")\n        return response.name\n\n    @sync_compatible\n    async def delete_secret(self) -> str:\n        \"\"\"\n        Deletes the secret from the secret storage service.\n\n        Returns:\n            The path that the secret was deleted from.\n        \"\"\"\n        client = self.gcp_credentials.get_secret_manager_client()\n        project = self.gcp_credentials.project\n\n        name = f\"projects/{project}/secrets/{self.secret_name}\"\n        request = DeleteSecretRequest(name=name)\n\n        self.logger.debug(f\"Preparing to delete the secret {name!r}.\")\n        await run_sync_in_worker_thread(client.delete_secret, request=request)\n        self.logger.info(f\"The secret {name!r} was successfully deleted.\")\n        return name\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.GcpSecret-functions","title":"Functions","text":""},{"location":"secret_manager/#prefect_gcp.secret_manager.GcpSecret.delete_secret","title":"delete_secret async","text":"

Deletes the secret from the secret storage service.

Returns:

Type Description str

The path that the secret was deleted from.

Source code in prefect_gcp/secret_manager.py
@sync_compatible\nasync def delete_secret(self) -> str:\n    \"\"\"\n    Deletes the secret from the secret storage service.\n\n    Returns:\n        The path that the secret was deleted from.\n    \"\"\"\n    client = self.gcp_credentials.get_secret_manager_client()\n    project = self.gcp_credentials.project\n\n    name = f\"projects/{project}/secrets/{self.secret_name}\"\n    request = DeleteSecretRequest(name=name)\n\n    self.logger.debug(f\"Preparing to delete the secret {name!r}.\")\n    await run_sync_in_worker_thread(client.delete_secret, request=request)\n    self.logger.info(f\"The secret {name!r} was successfully deleted.\")\n    return name\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.GcpSecret.read_secret","title":"read_secret async","text":"

Reads the secret data from the secret storage service.

Returns:

Type Description bytes

The secret data as bytes.

Source code in prefect_gcp/secret_manager.py
@sync_compatible\nasync def read_secret(self) -> bytes:\n    \"\"\"\n    Reads the secret data from the secret storage service.\n\n    Returns:\n        The secret data as bytes.\n    \"\"\"\n    client = self.gcp_credentials.get_secret_manager_client()\n    project = self.gcp_credentials.project\n    name = f\"projects/{project}/secrets/{self.secret_name}/versions/{self.secret_version}\"  # noqa\n    request = AccessSecretVersionRequest(name=name)\n\n    self.logger.debug(f\"Preparing to read secret data from {name!r}.\")\n    response = await run_sync_in_worker_thread(\n        client.access_secret_version, request=request\n    )\n    secret = response.payload.data\n    self.logger.info(f\"The secret {name!r} data was successfully read.\")\n    return secret\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.GcpSecret.write_secret","title":"write_secret async","text":"

Writes the secret data to the secret storage service; if it doesn't exist it will be created.

Parameters:

Name Type Description Default secret_data bytes

The secret to write.

required

Returns:

Type Description str

The path that the secret was written to.

Source code in prefect_gcp/secret_manager.py
@sync_compatible\nasync def write_secret(self, secret_data: bytes) -> str:\n    \"\"\"\n    Writes the secret data to the secret storage service; if it doesn't exist\n    it will be created.\n\n    Args:\n        secret_data: The secret to write.\n\n    Returns:\n        The path that the secret was written to.\n    \"\"\"\n    client = self.gcp_credentials.get_secret_manager_client()\n    project = self.gcp_credentials.project\n    parent = f\"projects/{project}/secrets/{self.secret_name}\"\n    payload = SecretPayload(data=secret_data)\n    add_request = AddSecretVersionRequest(parent=parent, payload=payload)\n\n    self.logger.debug(f\"Preparing to write secret data to {parent!r}.\")\n    try:\n        response = await run_sync_in_worker_thread(\n            client.add_secret_version, request=add_request\n        )\n    except NotFound:\n        self.logger.info(\n            f\"The secret {parent!r} does not exist yet, creating it now.\"\n        )\n        create_parent = f\"projects/{project}\"\n        secret_id = self.secret_name\n        secret = Secret(replication=Replication(automatic=Replication.Automatic()))\n        create_request = CreateSecretRequest(\n            parent=create_parent, secret_id=secret_id, secret=secret\n        )\n        await run_sync_in_worker_thread(\n            client.create_secret, request=create_request\n        )\n\n        self.logger.debug(f\"Preparing to write secret data to {parent!r} again.\")\n        response = await run_sync_in_worker_thread(\n            client.add_secret_version, request=add_request\n        )\n\n    self.logger.info(f\"The secret data was written successfully to {parent!r}.\")\n    return response.name\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager-functions","title":"Functions","text":""},{"location":"secret_manager/#prefect_gcp.secret_manager.create_secret","title":"create_secret async","text":"

Creates a secret in Google Cloud Platform's Secret Manager.

Parameters:

Name Type Description Default secret_name str

Name of the secret to retrieve.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required timeout float

The number of seconds the transport should wait for the server response.

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None

Returns:

Type Description str

The path of the created secret.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.secret_manager import create_secret\n\n@flow()\ndef example_cloud_storage_create_secret_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    secret_path = create_secret(\"secret_name\", gcp_credentials)\n    return secret_path\n\nexample_cloud_storage_create_secret_flow()\n
Source code in prefect_gcp/secret_manager.py
@task\nasync def create_secret(\n    secret_name: str,\n    gcp_credentials: \"GcpCredentials\",\n    timeout: float = 60,\n    project: Optional[str] = None,\n) -> str:\n    \"\"\"\n    Creates a secret in Google Cloud Platform's Secret Manager.\n\n    Args:\n        secret_name: Name of the secret to retrieve.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        timeout: The number of seconds the transport should wait\n            for the server response.\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n\n    Returns:\n        The path of the created secret.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.secret_manager import create_secret\n\n        @flow()\n        def example_cloud_storage_create_secret_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            secret_path = create_secret(\"secret_name\", gcp_credentials)\n            return secret_path\n\n        example_cloud_storage_create_secret_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Creating the %s secret\", secret_name)\n\n    client = gcp_credentials.get_secret_manager_client()\n    project = project or gcp_credentials.project\n\n    parent = f\"projects/{project}\"\n    secret_settings = {\"replication\": {\"automatic\": {}}}\n\n    partial_create = partial(\n        client.create_secret,\n        parent=parent,\n        secret_id=secret_name,\n        secret=secret_settings,\n        timeout=timeout,\n    )\n    response = await to_thread.run_sync(partial_create)\n    return response.name\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.delete_secret","title":"delete_secret async","text":"

Deletes the specified secret from Google Cloud Platform's Secret Manager.

Parameters:

Name Type Description Default secret_name str

Name of the secret to delete.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required timeout float

The number of seconds the transport should wait for the server response.

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None

Returns:

Type Description str

The path of the deleted secret.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.secret_manager import delete_secret\n\n@flow()\ndef example_cloud_storage_delete_secret_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    secret_path = delete_secret(\"secret_name\", gcp_credentials)\n    return secret_path\n\nexample_cloud_storage_delete_secret_flow()\n
Source code in prefect_gcp/secret_manager.py
@task\nasync def delete_secret(\n    secret_name: str,\n    gcp_credentials: \"GcpCredentials\",\n    timeout: float = 60,\n    project: Optional[str] = None,\n) -> str:\n    \"\"\"\n    Deletes the specified secret from Google Cloud Platform's Secret Manager.\n\n    Args:\n        secret_name: Name of the secret to delete.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        timeout: The number of seconds the transport should wait\n            for the server response.\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n\n    Returns:\n        The path of the deleted secret.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.secret_manager import delete_secret\n\n        @flow()\n        def example_cloud_storage_delete_secret_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            secret_path = delete_secret(\"secret_name\", gcp_credentials)\n            return secret_path\n\n        example_cloud_storage_delete_secret_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Deleting %s secret\", secret_name)\n\n    client = gcp_credentials.get_secret_manager_client()\n    project = project or gcp_credentials.project\n\n    name = f\"projects/{project}/secrets/{secret_name}/\"\n    partial_delete = partial(client.delete_secret, name=name, timeout=timeout)\n    await to_thread.run_sync(partial_delete)\n    return name\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.delete_secret_version","title":"delete_secret_version async","text":"

Deletes a version of a given secret from Google Cloud Platform's Secret Manager.

Parameters:

Name Type Description Default secret_name str

Name of the secret to retrieve.

required version_id int

Version number of the secret to use; \"latest\" can NOT be used.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required timeout float

The number of seconds the transport should wait for the server response.

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None

Returns:

Type Description str

The path of the deleted secret version.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.secret_manager import delete_secret_version\n\n@flow()\ndef example_cloud_storage_delete_secret_version_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    secret_value = delete_secret_version(\"secret_name\", 1, gcp_credentials)\n    return secret_value\n\nexample_cloud_storage_delete_secret_version_flow()\n
Source code in prefect_gcp/secret_manager.py
@task\nasync def delete_secret_version(\n    secret_name: str,\n    version_id: int,\n    gcp_credentials: \"GcpCredentials\",\n    timeout: float = 60,\n    project: Optional[str] = None,\n) -> str:\n    \"\"\"\n    Deletes a version of a given secret from Google Cloud Platform's Secret Manager.\n\n    Args:\n        secret_name: Name of the secret to retrieve.\n        version_id: Version number of the secret to use; \"latest\" can NOT be used.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        timeout: The number of seconds the transport should wait\n            for the server response.\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n\n    Returns:\n        The path of the deleted secret version.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.secret_manager import delete_secret_version\n\n        @flow()\n        def example_cloud_storage_delete_secret_version_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            secret_value = delete_secret_version(\"secret_name\", 1, gcp_credentials)\n            return secret_value\n\n        example_cloud_storage_delete_secret_version_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Reading %s version of %s secret\", version_id, secret_name)\n\n    client = gcp_credentials.get_secret_manager_client()\n    project = project or gcp_credentials.project\n\n    if version_id == \"latest\":\n        raise ValueError(\"The version_id cannot be 'latest'\")\n\n    name = f\"projects/{project}/secrets/{secret_name}/versions/{version_id}\"\n    partial_destroy = partial(client.destroy_secret_version, name=name, timeout=timeout)\n    await to_thread.run_sync(partial_destroy)\n    return name\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.read_secret","title":"read_secret async","text":"

Reads the value of a given secret from Google Cloud Platform's Secret Manager.

Parameters:

Name Type Description Default secret_name str

Name of the secret to retrieve.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required timeout float

The number of seconds the transport should wait for the server response.

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None

Returns:

Type Description str

Contents of the specified secret.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.secret_manager import read_secret\n\n@flow()\ndef example_cloud_storage_read_secret_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    secret_value = read_secret(\"secret_name\", gcp_credentials, version_id=1)\n    return secret_value\n\nexample_cloud_storage_read_secret_flow()\n
Source code in prefect_gcp/secret_manager.py
@task\nasync def read_secret(\n    secret_name: str,\n    gcp_credentials: \"GcpCredentials\",\n    version_id: Union[str, int] = \"latest\",\n    timeout: float = 60,\n    project: Optional[str] = None,\n) -> str:\n    \"\"\"\n    Reads the value of a given secret from Google Cloud Platform's Secret Manager.\n\n    Args:\n        secret_name: Name of the secret to retrieve.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        timeout: The number of seconds the transport should wait\n            for the server response.\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n\n    Returns:\n        Contents of the specified secret.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.secret_manager import read_secret\n\n        @flow()\n        def example_cloud_storage_read_secret_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            secret_value = read_secret(\"secret_name\", gcp_credentials, version_id=1)\n            return secret_value\n\n        example_cloud_storage_read_secret_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Reading %s version of %s secret\", version_id, secret_name)\n\n    client = gcp_credentials.get_secret_manager_client()\n    project = project or gcp_credentials.project\n\n    name = f\"projects/{project}/secrets/{secret_name}/versions/{version_id}\"\n    partial_access = partial(client.access_secret_version, name=name, timeout=timeout)\n    response = await to_thread.run_sync(partial_access)\n    secret = response.payload.data.decode(\"UTF-8\")\n    return secret\n
"},{"location":"secret_manager/#prefect_gcp.secret_manager.update_secret","title":"update_secret async","text":"

Updates a secret in Google Cloud Platform's Secret Manager.

Parameters:

Name Type Description Default secret_name str

Name of the secret to retrieve.

required secret_value Union[str, bytes]

Desired value of the secret. Can be either str or bytes.

required gcp_credentials GcpCredentials

Credentials to use for authentication with GCP.

required timeout float

The number of seconds the transport should wait for the server response.

60 project Optional[str]

Name of the project to use; overrides the gcp_credentials project if provided.

None

Returns:

Type Description str

The path of the updated secret.

Example
from prefect import flow\nfrom prefect_gcp import GcpCredentials\nfrom prefect_gcp.secret_manager import update_secret\n\n@flow()\ndef example_cloud_storage_update_secret_flow():\n    gcp_credentials = GcpCredentials(project=\"project\")\n    secret_path = update_secret(\"secret_name\", \"secret_value\", gcp_credentials)\n    return secret_path\n\nexample_cloud_storage_update_secret_flow()\n
Source code in prefect_gcp/secret_manager.py
@task\nasync def update_secret(\n    secret_name: str,\n    secret_value: Union[str, bytes],\n    gcp_credentials: \"GcpCredentials\",\n    timeout: float = 60,\n    project: Optional[str] = None,\n) -> str:\n    \"\"\"\n    Updates a secret in Google Cloud Platform's Secret Manager.\n\n    Args:\n        secret_name: Name of the secret to retrieve.\n        secret_value: Desired value of the secret. Can be either `str` or `bytes`.\n        gcp_credentials: Credentials to use for authentication with GCP.\n        timeout: The number of seconds the transport should wait\n            for the server response.\n        project: Name of the project to use; overrides the\n            gcp_credentials project if provided.\n\n    Returns:\n        The path of the updated secret.\n\n    Example:\n        ```python\n        from prefect import flow\n        from prefect_gcp import GcpCredentials\n        from prefect_gcp.secret_manager import update_secret\n\n        @flow()\n        def example_cloud_storage_update_secret_flow():\n            gcp_credentials = GcpCredentials(project=\"project\")\n            secret_path = update_secret(\"secret_name\", \"secret_value\", gcp_credentials)\n            return secret_path\n\n        example_cloud_storage_update_secret_flow()\n        ```\n    \"\"\"\n    logger = get_run_logger()\n    logger.info(\"Updating the %s secret\", secret_name)\n\n    client = gcp_credentials.get_secret_manager_client()\n    project = project or gcp_credentials.project\n\n    parent = f\"projects/{project}/secrets/{secret_name}\"\n    if isinstance(secret_value, str):\n        secret_value = secret_value.encode(\"UTF-8\")\n    partial_add = partial(\n        client.add_secret_version,\n        parent=parent,\n        payload={\"data\": secret_value},\n        timeout=timeout,\n    )\n    response = await to_thread.run_sync(partial_add)\n    return response.name\n
"},{"location":"vertex_worker/","title":"Vertex AI","text":""},{"location":"vertex_worker/#prefect_gcp.workers.vertex","title":"prefect_gcp.workers.vertex","text":"

Module containing the custom worker used for executing flow runs as Vertex AI Custom Jobs.

Get started by creating a Cloud Run work pool:

prefect work-pool create 'my-vertex-pool' --type vertex-ai\n

Then start a Cloud Run worker with the following command:

prefect worker start --pool 'my-vertex-pool'\n
"},{"location":"vertex_worker/#prefect_gcp.workers.vertex--configuration","title":"Configuration","text":"

Read more about configuring work pools here.

"},{"location":"vertex_worker/#prefect_gcp.workers.vertex-classes","title":"Classes","text":""},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorker","title":"VertexAIWorker","text":"

Bases: BaseWorker

Prefect worker that executes flow runs within Vertex AI Jobs.

Source code in prefect_gcp/workers/vertex.py
class VertexAIWorker(BaseWorker):\n    \"\"\"Prefect worker that executes flow runs within Vertex AI Jobs.\"\"\"\n\n    type = \"vertex-ai\"\n    job_configuration = VertexAIWorkerJobConfiguration\n    job_configuration_variables = VertexAIWorkerVariables\n    _description = (\n        \"Execute flow runs within containers on Google Vertex AI. Requires \"\n        \"a Google Cloud Platform account.\"\n    )\n    _display_name = \"Google Vertex AI\"\n    _documentation_url = \"https://prefecthq.github.io/prefect-gcp/vertex_worker/\"\n    _logo_url = \"https://cdn.sanity.io/images/3ugk85nk/production/10424e311932e31c477ac2b9ef3d53cefbaad708-250x250.png\"  # noqa\n\n    async def run(\n        self,\n        flow_run: \"FlowRun\",\n        configuration: VertexAIWorkerJobConfiguration,\n        task_status: Optional[anyio.abc.TaskStatus] = None,\n    ) -> VertexAIWorkerResult:\n        \"\"\"\n        Executes a flow run within a Vertex AI Job and waits for the flow run\n        to complete.\n\n        Args:\n            flow_run: The flow run to execute\n            configuration: The configuration to use when executing the flow run.\n            task_status: The task status object for the current flow run. If provided,\n                the task will be marked as started.\n\n        Returns:\n            VertexAIWorkerResult: A result object containing information about the\n                final state of the flow run\n        \"\"\"\n        logger = self.get_flow_run_logger(flow_run)\n\n        client_options = ClientOptions(\n            api_endpoint=f\"{configuration.region}-aiplatform.googleapis.com\"\n        )\n\n        job_name = configuration.job_name\n\n        job_spec = self._build_job_spec(configuration)\n        with configuration.credentials.get_job_service_client(\n            client_options=client_options\n        ) as job_service_client:\n            job_run = await self._create_and_begin_job(\n                job_name, job_spec, job_service_client, configuration, logger\n            )\n\n            if task_status:\n                task_status.started(job_run.name)\n\n            final_job_run = await self._watch_job_run(\n                job_name=job_name,\n                full_job_name=job_run.name,\n                job_service_client=job_service_client,\n                current_state=job_run.state,\n                until_states=(\n                    JobState.JOB_STATE_SUCCEEDED,\n                    JobState.JOB_STATE_FAILED,\n                    JobState.JOB_STATE_CANCELLED,\n                    JobState.JOB_STATE_EXPIRED,\n                ),\n                configuration=configuration,\n                logger=logger,\n                timeout=int(\n                    datetime.timedelta(\n                        hours=configuration.job_spec[\"maximum_run_time_hours\"]\n                    ).total_seconds()\n                ),\n            )\n\n        error_msg = final_job_run.error.message\n\n        # Vertex will include an error message upon valid\n        # flow cancellations, so we'll avoid raising an error in that case\n        if error_msg and \"CANCELED\" not in error_msg:\n            raise RuntimeError(error_msg)\n\n        status_code = 0 if final_job_run.state == JobState.JOB_STATE_SUCCEEDED else 1\n\n        return VertexAIWorkerResult(\n            identifier=final_job_run.display_name, status_code=status_code\n        )\n\n    def _build_job_spec(\n        self, configuration: VertexAIWorkerJobConfiguration\n    ) -> \"CustomJobSpec\":\n        \"\"\"\n        Builds a job spec by gathering details.\n        \"\"\"\n        # here, we extract the `worker_pool_specs` out of the job_spec\n        worker_pool_specs = [\n            WorkerPoolSpec(\n                container_spec=ContainerSpec(**spec[\"container_spec\"]),\n                machine_spec=MachineSpec(**spec[\"machine_spec\"]),\n                replica_count=spec[\"replica_count\"],\n                disk_spec=DiskSpec(**spec[\"disk_spec\"]),\n            )\n            for spec in configuration.job_spec.pop(\"worker_pool_specs\", [])\n        ]\n\n        timeout = Duration().FromTimedelta(\n            td=datetime.timedelta(\n                hours=configuration.job_spec[\"maximum_run_time_hours\"]\n            )\n        )\n        scheduling = Scheduling(timeout=timeout)\n\n        # construct the final job spec that we will provide to Vertex AI\n        job_spec = CustomJobSpec(\n            worker_pool_specs=worker_pool_specs,\n            scheduling=scheduling,\n            ignore_unknown_fields=True,\n            **configuration.job_spec,\n        )\n        return job_spec\n\n    async def _create_and_begin_job(\n        self,\n        job_name: str,\n        job_spec: \"CustomJobSpec\",\n        job_service_client: \"JobServiceClient\",\n        configuration: VertexAIWorkerJobConfiguration,\n        logger: PrefectLogAdapter,\n    ) -> \"CustomJob\":\n        \"\"\"\n        Builds a custom job and begins running it.\n        \"\"\"\n        # create custom job\n        custom_job = CustomJob(\n            display_name=job_name,\n            job_spec=job_spec,\n            labels=self._get_compatible_labels(configuration=configuration),\n        )\n\n        # run job\n        logger.info(f\"Job {job_name!r} starting to run \")\n\n        project = configuration.project\n        resource_name = f\"projects/{project}/locations/{configuration.region}\"\n\n        retry_policy = retry(\n            stop=stop_after_attempt(3), wait=wait_fixed(1) + wait_random(0, 3)\n        )\n\n        custom_job_run = await run_sync_in_worker_thread(\n            retry_policy(job_service_client.create_custom_job),\n            parent=resource_name,\n            custom_job=custom_job,\n        )\n\n        logger.info(\n            f\"Job {job_name!r} has successfully started; \"\n            f\"the full job name is {custom_job_run.name!r}\"\n        )\n\n        return custom_job_run\n\n    async def _watch_job_run(\n        self,\n        job_name: str,\n        full_job_name: str,  # different from job_name\n        job_service_client: \"JobServiceClient\",\n        current_state: \"JobState\",\n        until_states: Tuple[\"JobState\"],\n        configuration: VertexAIWorkerJobConfiguration,\n        logger: PrefectLogAdapter,\n        timeout: int = None,\n    ) -> \"CustomJob\":\n        \"\"\"\n        Polls job run to see if status changed.\n        \"\"\"\n        state = JobState.JOB_STATE_UNSPECIFIED\n        last_state = current_state\n        t0 = time.time()\n\n        while state not in until_states:\n            job_run = await run_sync_in_worker_thread(\n                job_service_client.get_custom_job,\n                name=full_job_name,\n            )\n            state = job_run.state\n            if state != last_state:\n                state_label = (\n                    state.name.replace(\"_\", \" \")\n                    .lower()\n                    .replace(\"state\", \"state is now:\")\n                )\n                # results in \"New job state is now: succeeded\"\n                logger.info(f\"{job_name} has new {state_label}\")\n                last_state = state\n            else:\n                # Intermittently, the job will not be described. We want to respect the\n                # watch timeout though.\n                logger.debug(f\"Job {job_name} not found.\")\n\n            elapsed_time = time.time() - t0\n            if timeout is not None and elapsed_time > timeout:\n                raise RuntimeError(\n                    f\"Timed out after {elapsed_time}s while watching job for states \"\n                    \"{until_states!r}\"\n                )\n            time.sleep(configuration.job_watch_poll_interval)\n\n        return job_run\n\n    def _get_compatible_labels(\n        self, configuration: VertexAIWorkerJobConfiguration\n    ) -> Dict[str, str]:\n        \"\"\"\n        Ensures labels are compatible with GCP label requirements.\n        https://cloud.google.com/resource-manager/docs/creating-managing-labels\n\n        Ex: the Prefect provided key of prefect.io/flow-name -> prefect-io_flow-name\n        \"\"\"\n        compatible_labels = {}\n        for key, val in configuration.labels.items():\n            new_key = slugify(\n                key,\n                lowercase=True,\n                replacements=[(\"/\", \"_\"), (\".\", \"-\")],\n                max_length=63,\n                regex_pattern=_DISALLOWED_GCP_LABEL_CHARACTERS,\n            )\n            compatible_labels[new_key] = slugify(\n                val,\n                lowercase=True,\n                replacements=[(\"/\", \"_\"), (\".\", \"-\")],\n                max_length=63,\n                regex_pattern=_DISALLOWED_GCP_LABEL_CHARACTERS,\n            )\n        return compatible_labels\n\n    async def kill_infrastructure(\n        self,\n        infrastructure_pid: str,\n        configuration: VertexAIWorkerJobConfiguration,\n        grace_seconds: int = 30,\n    ):\n        \"\"\"\n        Stops a job running in Vertex AI upon flow cancellation,\n        based on the provided infrastructure PID + run configuration.\n        \"\"\"\n        if grace_seconds != 30:\n            self._logger.warning(\n                f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n                \"support dynamic grace period configuration. See here for more info: \"\n                \"https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs/cancel\"  # noqa\n            )\n\n        client_options = ClientOptions(\n            api_endpoint=f\"{configuration.region}-aiplatform.googleapis.com\"\n        )\n        with configuration.credentials.get_job_service_client(\n            client_options=client_options\n        ) as job_service_client:\n            await run_sync_in_worker_thread(\n                self._stop_job,\n                client=job_service_client,\n                vertex_job_name=infrastructure_pid,\n            )\n\n    def _stop_job(self, client: \"JobServiceClient\", vertex_job_name: str):\n        \"\"\"\n        Calls the `cancel_custom_job` method on the Vertex AI Job Service Client.\n        \"\"\"\n        cancel_custom_job_request = CancelCustomJobRequest(name=vertex_job_name)\n        try:\n            client.cancel_custom_job(\n                request=cancel_custom_job_request,\n            )\n        except Exception as exc:\n            if \"does not exist\" in str(exc):\n                raise InfrastructureNotFound(\n                    f\"Cannot stop Vertex AI job; the job name {vertex_job_name!r} \"\n                    \"could not be found.\"\n                ) from exc\n            raise\n
"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorker-functions","title":"Functions","text":""},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorker.kill_infrastructure","title":"kill_infrastructure async","text":"

Stops a job running in Vertex AI upon flow cancellation, based on the provided infrastructure PID + run configuration.

Source code in prefect_gcp/workers/vertex.py
async def kill_infrastructure(\n    self,\n    infrastructure_pid: str,\n    configuration: VertexAIWorkerJobConfiguration,\n    grace_seconds: int = 30,\n):\n    \"\"\"\n    Stops a job running in Vertex AI upon flow cancellation,\n    based on the provided infrastructure PID + run configuration.\n    \"\"\"\n    if grace_seconds != 30:\n        self._logger.warning(\n            f\"Kill grace period of {grace_seconds}s requested, but GCP does not \"\n            \"support dynamic grace period configuration. See here for more info: \"\n            \"https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs/cancel\"  # noqa\n        )\n\n    client_options = ClientOptions(\n        api_endpoint=f\"{configuration.region}-aiplatform.googleapis.com\"\n    )\n    with configuration.credentials.get_job_service_client(\n        client_options=client_options\n    ) as job_service_client:\n        await run_sync_in_worker_thread(\n            self._stop_job,\n            client=job_service_client,\n            vertex_job_name=infrastructure_pid,\n        )\n
"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorker.run","title":"run async","text":"

Executes a flow run within a Vertex AI Job and waits for the flow run to complete.

Parameters:

Name Type Description Default flow_run FlowRun

The flow run to execute

required configuration VertexAIWorkerJobConfiguration

The configuration to use when executing the flow run.

required task_status Optional[TaskStatus]

The task status object for the current flow run. If provided, the task will be marked as started.

None

Returns:

Name Type Description VertexAIWorkerResult VertexAIWorkerResult

A result object containing information about the final state of the flow run

Source code in prefect_gcp/workers/vertex.py
async def run(\n    self,\n    flow_run: \"FlowRun\",\n    configuration: VertexAIWorkerJobConfiguration,\n    task_status: Optional[anyio.abc.TaskStatus] = None,\n) -> VertexAIWorkerResult:\n    \"\"\"\n    Executes a flow run within a Vertex AI Job and waits for the flow run\n    to complete.\n\n    Args:\n        flow_run: The flow run to execute\n        configuration: The configuration to use when executing the flow run.\n        task_status: The task status object for the current flow run. If provided,\n            the task will be marked as started.\n\n    Returns:\n        VertexAIWorkerResult: A result object containing information about the\n            final state of the flow run\n    \"\"\"\n    logger = self.get_flow_run_logger(flow_run)\n\n    client_options = ClientOptions(\n        api_endpoint=f\"{configuration.region}-aiplatform.googleapis.com\"\n    )\n\n    job_name = configuration.job_name\n\n    job_spec = self._build_job_spec(configuration)\n    with configuration.credentials.get_job_service_client(\n        client_options=client_options\n    ) as job_service_client:\n        job_run = await self._create_and_begin_job(\n            job_name, job_spec, job_service_client, configuration, logger\n        )\n\n        if task_status:\n            task_status.started(job_run.name)\n\n        final_job_run = await self._watch_job_run(\n            job_name=job_name,\n            full_job_name=job_run.name,\n            job_service_client=job_service_client,\n            current_state=job_run.state,\n            until_states=(\n                JobState.JOB_STATE_SUCCEEDED,\n                JobState.JOB_STATE_FAILED,\n                JobState.JOB_STATE_CANCELLED,\n                JobState.JOB_STATE_EXPIRED,\n            ),\n            configuration=configuration,\n            logger=logger,\n            timeout=int(\n                datetime.timedelta(\n                    hours=configuration.job_spec[\"maximum_run_time_hours\"]\n                ).total_seconds()\n            ),\n        )\n\n    error_msg = final_job_run.error.message\n\n    # Vertex will include an error message upon valid\n    # flow cancellations, so we'll avoid raising an error in that case\n    if error_msg and \"CANCELED\" not in error_msg:\n        raise RuntimeError(error_msg)\n\n    status_code = 0 if final_job_run.state == JobState.JOB_STATE_SUCCEEDED else 1\n\n    return VertexAIWorkerResult(\n        identifier=final_job_run.display_name, status_code=status_code\n    )\n
"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorkerJobConfiguration","title":"VertexAIWorkerJobConfiguration","text":"

Bases: BaseJobConfiguration

Configuration class used by the Vertex AI Worker to create a Job.

An instance of this class is passed to the Vertex AI Worker's run method for each flow run. It contains all information necessary to execute the flow run as a Vertex AI Job.

Attributes:

Name Type Description region str

The region where the Vertex AI Job resides.

credentials Optional[GcpCredentials]

The GCP Credentials used to connect to Vertex AI.

job_spec Dict[str, Any]

The Vertex AI Job spec used to create the Job.

job_watch_poll_interval float

The interval between GCP API calls to check Job state.

Source code in prefect_gcp/workers/vertex.py
class VertexAIWorkerJobConfiguration(BaseJobConfiguration):\n    \"\"\"\n    Configuration class used by the Vertex AI Worker to create a Job.\n\n    An instance of this class is passed to the Vertex AI Worker's `run` method\n    for each flow run. It contains all information necessary to execute\n    the flow run as a Vertex AI Job.\n\n    Attributes:\n        region: The region where the Vertex AI Job resides.\n        credentials: The GCP Credentials used to connect to Vertex AI.\n        job_spec: The Vertex AI Job spec used to create the Job.\n        job_watch_poll_interval: The interval between GCP API calls to check Job state.\n    \"\"\"\n\n    region: str = Field(\n        description=\"The region where the Vertex AI Job resides.\",\n        example=\"us-central1\",\n    )\n    credentials: Optional[GcpCredentials] = Field(\n        title=\"GCP Credentials\",\n        default_factory=GcpCredentials,\n        description=\"The GCP Credentials used to initiate the \"\n        \"Vertex AI Job. If not provided credentials will be \"\n        \"inferred from the local environment.\",\n    )\n\n    job_spec: Dict[str, Any] = Field(\n        template={\n            \"service_account_name\": \"{{ service_account_name }}\",\n            \"network\": \"{{ network }}\",\n            \"reserved_ip_ranges\": \"{{ reserved_ip_ranges }}\",\n            \"maximum_run_time_hours\": \"{{ maximum_run_time_hours }}\",\n            \"worker_pool_specs\": [\n                {\n                    \"replica_count\": 1,\n                    \"container_spec\": {\n                        \"image_uri\": \"{{ image }}\",\n                        \"command\": \"{{ command }}\",\n                        \"args\": [],\n                    },\n                    \"machine_spec\": {\n                        \"machine_type\": \"{{ machine_type }}\",\n                        \"accelerator_type\": \"{{ accelerator_type }}\",\n                        \"accelerator_count\": \"{{ accelerator_count }}\",\n                    },\n                    \"disk_spec\": {\n                        \"boot_disk_type\": \"{{ boot_disk_type }}\",\n                        \"boot_disk_size_gb\": \"{{ boot_disk_size_gb }}\",\n                    },\n                }\n            ],\n        }\n    )\n    job_watch_poll_interval: float = Field(\n        default=5.0,\n        title=\"Poll Interval (Seconds)\",\n        description=(\n            \"The amount of time to wait between GCP API calls while monitoring the \"\n            \"state of a Vertex AI Job.\"\n        ),\n    )\n\n    @property\n    def project(self) -> str:\n        \"\"\"property for accessing the project from the credentials.\"\"\"\n        return self.credentials.project\n\n    @property\n    def job_name(self) -> str:\n        \"\"\"\n        The name can be up to 128 characters long and can be consist of any UTF-8 characters. Reference:\n        https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomJob#google_cloud_aiplatform_CustomJob_display_name\n        \"\"\"  # noqa\n        unique_suffix = uuid4().hex\n        job_name = f\"{self.name}-{unique_suffix}\"\n        return job_name\n\n    def prepare_for_flow_run(\n        self,\n        flow_run: \"FlowRun\",\n        deployment: Optional[\"DeploymentResponse\"] = None,\n        flow: Optional[\"Flow\"] = None,\n    ):\n        super().prepare_for_flow_run(flow_run, deployment, flow)\n\n        self._inject_formatted_env_vars()\n        self._inject_formatted_command()\n        self._ensure_existence_of_service_account()\n\n    def _inject_formatted_env_vars(self):\n        \"\"\"Inject environment variables in the Vertex job_spec configuration,\n        in the correct format, which is sourced from the BaseJobConfiguration.\n        This method is invoked by `prepare_for_flow_run()`.\"\"\"\n        worker_pool_specs = self.job_spec[\"worker_pool_specs\"]\n        formatted_env_vars = [\n            {\"name\": key, \"value\": value} for key, value in self.env.items()\n        ]\n        worker_pool_specs[0][\"container_spec\"][\"env\"] = formatted_env_vars\n\n    def _inject_formatted_command(self):\n        \"\"\"Inject shell commands in the Vertex job_spec configuration,\n        in the correct format, which is sourced from the BaseJobConfiguration.\n        Here, we'll ensure that the default string format\n        is converted to a list of strings.\"\"\"\n        worker_pool_specs = self.job_spec[\"worker_pool_specs\"]\n\n        existing_command = worker_pool_specs[0][\"container_spec\"].get(\"command\")\n        if existing_command is None:\n            worker_pool_specs[0][\"container_spec\"][\"command\"] = shlex.split(\n                self._base_flow_run_command()\n            )\n        elif isinstance(existing_command, str):\n            worker_pool_specs[0][\"container_spec\"][\"command\"] = shlex.split(\n                existing_command\n            )\n\n    def _ensure_existence_of_service_account(self):\n        \"\"\"Verify that a service account was provided, either in the credentials\n        or as a standalone service account name override.\"\"\"\n\n        provided_service_account_name = self.job_spec.get(\"service_account_name\")\n        credential_service_account = self.credentials._service_account_email\n\n        service_account_to_use = (\n            provided_service_account_name or credential_service_account\n        )\n\n        if service_account_to_use is None:\n            raise ValueError(\n                \"A service account is required for the Vertex job. \"\n                \"A service account could not be detected in the attached credentials \"\n                \"or in the service_account_name input. \"\n                \"Please pass in valid GCP credentials or a valid service_account_name\"\n            )\n\n        self.job_spec[\"service_account_name\"] = service_account_to_use\n\n    @validator(\"job_spec\")\n    def _ensure_job_spec_includes_required_attributes(cls, value: Dict[str, Any]):\n        \"\"\"\n        Ensures that the job spec includes all required components.\n        \"\"\"\n        patch = JsonPatch.from_diff(value, _get_base_job_spec())\n        missing_paths = sorted([op[\"path\"] for op in patch if op[\"op\"] == \"add\"])\n        if missing_paths:\n            raise ValueError(\n                \"Job is missing required attributes at the following paths: \"\n                f\"{', '.join(missing_paths)}\"\n            )\n        return value\n
"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorkerJobConfiguration-attributes","title":"Attributes","text":""},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorkerJobConfiguration.job_name","title":"job_name: str property","text":"

The name can be up to 128 characters long and can be consist of any UTF-8 characters. Reference: https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomJob#google_cloud_aiplatform_CustomJob_display_name

"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorkerJobConfiguration.project","title":"project: str property","text":"

property for accessing the project from the credentials.

"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorkerResult","title":"VertexAIWorkerResult","text":"

Bases: BaseWorkerResult

Contains information about the final state of a completed process

Source code in prefect_gcp/workers/vertex.py
class VertexAIWorkerResult(BaseWorkerResult):\n    \"\"\"Contains information about the final state of a completed process\"\"\"\n
"},{"location":"vertex_worker/#prefect_gcp.workers.vertex.VertexAIWorkerVariables","title":"VertexAIWorkerVariables","text":"

Bases: BaseVariables

Default variables for the Vertex AI worker.

The schema for this class is used to populate the variables section of the default base job template.

Source code in prefect_gcp/workers/vertex.py
class VertexAIWorkerVariables(BaseVariables):\n    \"\"\"\n    Default variables for the Vertex AI worker.\n\n    The schema for this class is used to populate the `variables` section of the default\n    base job template.\n    \"\"\"\n\n    region: str = Field(\n        description=\"The region where the Vertex AI Job resides.\",\n        example=\"us-central1\",\n    )\n    image: str = Field(\n        title=\"Image Name\",\n        description=(\n            \"The URI of a container image in the Container or Artifact Registry, \"\n            \"used to run your Vertex AI Job. Note that Vertex AI will need access\"\n            \"to the project and region where the container image is stored. See \"\n            \"https://cloud.google.com/vertex-ai/docs/training/create-custom-container\"\n        ),\n        example=\"gcr.io/your-project/your-repo:latest\",\n    )\n    credentials: Optional[GcpCredentials] = Field(\n        title=\"GCP Credentials\",\n        default_factory=GcpCredentials,\n        description=\"The GCP Credentials used to initiate the \"\n        \"Vertex AI Job. If not provided credentials will be \"\n        \"inferred from the local environment.\",\n    )\n    machine_type: str = Field(\n        title=\"Machine Type\",\n        description=(\n            \"The machine type to use for the run, which controls \"\n            \"the available CPU and memory. \"\n            \"See https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec\"\n        ),\n        default=\"n1-standard-4\",\n    )\n    accelerator_type: Optional[str] = Field(\n        title=\"Accelerator Type\",\n        description=(\n            \"The type of accelerator to attach to the machine. \"\n            \"See https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec\"\n        ),\n        example=\"NVIDIA_TESLA_K80\",\n        default=None,\n    )\n    accelerator_count: Optional[int] = Field(\n        title=\"Accelerator Count\",\n        description=(\n            \"The number of accelerators to attach to the machine. \"\n            \"See https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec\"\n        ),\n        example=1,\n        default=None,\n    )\n    boot_disk_type: str = Field(\n        title=\"Boot Disk Type\",\n        description=\"The type of boot disk to attach to the machine.\",\n        default=\"pd-ssd\",\n    )\n    boot_disk_size_gb: int = Field(\n        title=\"Boot Disk Size (GB)\",\n        description=\"The size of the boot disk to attach to the machine, in gigabytes.\",\n        default=100,\n    )\n    maximum_run_time_hours: int = Field(\n        default=1,\n        title=\"Maximum Run Time (Hours)\",\n        description=\"The maximum job running time, in hours\",\n    )\n    network: Optional[str] = Field(\n        default=None,\n        title=\"Network\",\n        description=\"The full name of the Compute Engine network\"\n        \"to which the Job should be peered. Private services access must \"\n        \"already be configured for the network. If left unspecified, the job \"\n        \"is not peered with any network. \"\n        \"For example: projects/12345/global/networks/myVPC\",\n    )\n    reserved_ip_ranges: Optional[List[str]] = Field(\n        default=None,\n        title=\"Reserved IP Ranges\",\n        description=\"A list of names for the reserved ip ranges under the VPC \"\n        \"network that can be used for this job. If set, we will deploy the job \"\n        \"within the provided ip ranges. Otherwise, the job will be deployed to \"\n        \"any ip ranges under the provided VPC network.\",\n    )\n    service_account_name: Optional[str] = Field(\n        default=None,\n        title=\"Service Account Name\",\n        description=(\n            \"Specifies the service account to use \"\n            \"as the run-as account in Vertex AI. The worker submitting jobs must have \"\n            \"act-as permission on this run-as account. If unspecified, the AI \"\n            \"Platform Custom Code Service Agent for the CustomJob's project is \"\n            \"used. Takes precedence over the service account found in GCP credentials, \"\n            \"and required if a service account cannot be detected in GCP credentials.\"\n        ),\n    )\n    job_watch_poll_interval: float = Field(\n        default=5.0,\n        title=\"Poll Interval (Seconds)\",\n        description=(\n            \"The amount of time to wait between GCP API calls while monitoring the \"\n            \"state of a Vertex AI Job.\"\n        ),\n    )\n
"},{"location":"deployments/steps/","title":"Deployment Steps","text":""},{"location":"deployments/steps/#prefect_gcp.deployments.steps","title":"prefect_gcp.deployments.steps","text":"

Prefect deployment steps for code storage in and retrieval from Google Cloud Storage.

"},{"location":"deployments/steps/#prefect_gcp.deployments.steps-classes","title":"Classes","text":""},{"location":"deployments/steps/#prefect_gcp.deployments.steps.PullFromGcsOutput","title":"PullFromGcsOutput","text":"

Bases: TypedDict

The output of the pull_from_gcs step.

Source code in prefect_gcp/deployments/steps.py
class PullFromGcsOutput(TypedDict):\n    \"\"\"\n    The output of the `pull_from_gcs` step.\n    \"\"\"\n\n    bucket: str\n    folder: str\n    directory: str\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps.PullProjectFromGcsOutput","title":"PullProjectFromGcsOutput","text":"

Bases: PullFromGcsOutput

Deprecated. Use PullFromGcsOutput instead.

Source code in prefect_gcp/deployments/steps.py
@deprecated_callable(start_date=\"Jun 2023\", help=\"Use `PullFromGcsOutput` instead.\")\nclass PullProjectFromGcsOutput(PullFromGcsOutput):\n    \"\"\"Deprecated. Use `PullFromGcsOutput` instead.\"\"\"\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps.PushProjectToGcsOutput","title":"PushProjectToGcsOutput","text":"

Bases: PushToGcsOutput

Deprecated. Use PushToGcsOutput instead.

Source code in prefect_gcp/deployments/steps.py
@deprecated_callable(start_date=\"Jun 2023\", help=\"Use `PushToGcsOutput` instead.\")\nclass PushProjectToGcsOutput(PushToGcsOutput):\n    \"\"\"Deprecated. Use `PushToGcsOutput` instead.\"\"\"\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps.PushToGcsOutput","title":"PushToGcsOutput","text":"

Bases: TypedDict

The output of the push_to_gcs step.

Source code in prefect_gcp/deployments/steps.py
class PushToGcsOutput(TypedDict):\n    \"\"\"\n    The output of the `push_to_gcs` step.\n    \"\"\"\n\n    bucket: str\n    folder: str\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps-functions","title":"Functions","text":""},{"location":"deployments/steps/#prefect_gcp.deployments.steps.pull_from_gcs","title":"pull_from_gcs","text":"

Pulls the contents of a project from an GCS bucket to the current working directory.

Parameters:

Name Type Description Default bucket str

The name of the GCS bucket where files are stored.

required folder str

The folder in the GCS bucket where files are stored.

required project Optional[str]

The GCP project the bucket belongs to. If not provided, the project will be inferred from the credentials or the local environment.

None credentials Optional[Dict]

A dictionary containing the service account information and project used for authentication. If not provided, the application default credentials will be used.

None

Returns:

Type Description PullProjectFromGcsOutput

A dictionary containing the bucket, folder, and local directory where files were downloaded.

Examples:

Pull from GCS using the default environment credentials:

build:\n    - prefect_gcp.deployments.steps.pull_from_gcs:\n        requires: prefect-gcp\n        bucket: my-bucket\n        folder: my-folder\n

Pull from GCS using credentials stored in a block:

build:\n    - prefect_gcp.deployments.steps.pull_from_gcs:\n        requires: prefect-gcp\n        bucket: my-bucket\n        folder: my-folder\n        credentials: \"{{ prefect.blocks.gcp-credentials.dev-credentials }}\"\n

Pull from to an GCS bucket using credentials stored in a service account file:

build:\n    - prefect_gcp.deployments.steps.pull_from_gcs:\n        requires: prefect-gcp\n        bucket: my-bucket\n        folder: my-folder\n        credentials:\n            project: my-project\n            service_account_file: /path/to/service_account.json\n

Source code in prefect_gcp/deployments/steps.py
def pull_from_gcs(\n    bucket: str,\n    folder: str,\n    project: Optional[str] = None,\n    credentials: Optional[Dict] = None,\n) -> PullProjectFromGcsOutput:\n    \"\"\"\n    Pulls the contents of a project from an GCS bucket to the current working directory.\n\n    Args:\n        bucket: The name of the GCS bucket where files are stored.\n        folder: The folder in the GCS bucket where files are stored.\n        project: The GCP project the bucket belongs to. If not provided, the project will be\n            inferred from the credentials or the local environment.\n        credentials: A dictionary containing the service account information and project\n            used for authentication. If not provided, the application default\n            credentials will be used.\n\n    Returns:\n        A dictionary containing the bucket, folder, and local directory where files were downloaded.\n\n    Examples:\n        Pull from GCS using the default environment credentials:\n        ```yaml\n        build:\n            - prefect_gcp.deployments.steps.pull_from_gcs:\n                requires: prefect-gcp\n                bucket: my-bucket\n                folder: my-folder\n        ```\n\n        Pull from GCS using credentials stored in a block:\n        ```yaml\n        build:\n            - prefect_gcp.deployments.steps.pull_from_gcs:\n                requires: prefect-gcp\n                bucket: my-bucket\n                folder: my-folder\n                credentials: \"{{ prefect.blocks.gcp-credentials.dev-credentials }}\"\n        ```\n\n        Pull from to an GCS bucket using credentials stored in a service account file:\n        ```yaml\n        build:\n            - prefect_gcp.deployments.steps.pull_from_gcs:\n                requires: prefect-gcp\n                bucket: my-bucket\n                folder: my-folder\n                credentials:\n                    project: my-project\n                    service_account_file: /path/to/service_account.json\n        ```\n\n    \"\"\"  # noqa\n    local_path = Path.cwd()\n    project = credentials.get(\"project\") if credentials else None\n\n    gcp_creds = None\n    if credentials is not None:\n        if credentials.get(\"service_account_info\") is not None:\n            gcp_creds = Credentials.from_service_account_info(\n                credentials.get(\"service_account_info\"),\n                scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n            )\n        elif credentials.get(\"service_account_file\") is not None:\n            gcp_creds = Credentials.from_service_account_file(\n                credentials.get(\"service_account_file\"),\n                scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n            )\n\n    gcp_creds = gcp_creds or google.auth.default()[0]\n\n    storage_client = StorageClient(credentials=gcp_creds, project=project)\n\n    blobs = storage_client.list_blobs(bucket, prefix=folder)\n\n    for blob in blobs:\n        if blob.name.endswith(\"/\"):\n            # object is a folder and will be created if it contains any objects\n            continue\n        local_blob_download_path = PurePosixPath(\n            local_path\n            / relative_path_to_current_platform(blob.name).relative_to(folder)\n        )\n        Path.mkdir(Path(local_blob_download_path.parent), parents=True, exist_ok=True)\n\n        blob.download_to_filename(local_blob_download_path)\n\n    return {\n        \"bucket\": bucket,\n        \"folder\": folder,\n        \"directory\": str(local_path),\n    }\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps.pull_project_from_gcs","title":"pull_project_from_gcs","text":"

Deprecated. Use pull_from_gcs instead.

Source code in prefect_gcp/deployments/steps.py
@deprecated_callable(start_date=\"Jun 2023\", help=\"Use `pull_from_gcs` instead.\")\ndef pull_project_from_gcs(*args, **kwargs) -> PullProjectFromGcsOutput:\n    \"\"\"\n    Deprecated. Use `pull_from_gcs` instead.\n    \"\"\"\n    return pull_from_gcs(*args, **kwargs)\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps.push_project_to_gcs","title":"push_project_to_gcs","text":"

Deprecated. Use push_to_gcs instead.

Source code in prefect_gcp/deployments/steps.py
@deprecated_callable(start_date=\"Jun 2023\", help=\"Use `push_to_gcs` instead.\")\ndef push_project_to_gcs(*args, **kwargs) -> PushToGcsOutput:\n    \"\"\"\n    Deprecated. Use `push_to_gcs` instead.\n    \"\"\"\n    return push_to_gcs(*args, **kwargs)\n
"},{"location":"deployments/steps/#prefect_gcp.deployments.steps.push_to_gcs","title":"push_to_gcs","text":"

Pushes the contents of the current working directory to a GCS bucket, excluding files and folders specified in the ignore_file.

Parameters:

Name Type Description Default bucket str

The name of the GCS bucket where files will be uploaded.

required folder str

The folder in the GCS bucket where files will be uploaded.

required project Optional[str]

The GCP project the bucket belongs to. If not provided, the project will be inferred from the credentials or the local environment.

None credentials Optional[Dict]

A dictionary containing the service account information and project used for authentication. If not provided, the application default credentials will be used.

None ignore_file

The name of the file containing ignore patterns.

'.prefectignore'

Returns:

Type Description PushToGcsOutput

A dictionary containing the bucket and folder where files were uploaded.

Examples:

Push to a GCS bucket:

build:\n    - prefect_gcp.deployments.steps.push_to_gcs:\n        requires: prefect-gcp\n        bucket: my-bucket\n        folder: my-project\n

Push to a GCS bucket using credentials stored in a block:

build:\n    - prefect_gcp.deployments.steps.push_to_gcs:\n        requires: prefect-gcp\n        bucket: my-bucket\n        folder: my-folder\n        credentials: \"{{ prefect.blocks.gcp-credentials.dev-credentials }}\"\n

Push to a GCS bucket using credentials stored in a service account file:

build:\n    - prefect_gcp.deployments.steps.push_to_gcs:\n        requires: prefect-gcp\n        bucket: my-bucket\n        folder: my-folder\n        credentials:\n            project: my-project\n            service_account_file: /path/to/service_account.json\n

Source code in prefect_gcp/deployments/steps.py
def push_to_gcs(\n    bucket: str,\n    folder: str,\n    project: Optional[str] = None,\n    credentials: Optional[Dict] = None,\n    ignore_file=\".prefectignore\",\n) -> PushToGcsOutput:\n    \"\"\"\n    Pushes the contents of the current working directory to a GCS bucket,\n    excluding files and folders specified in the ignore_file.\n\n    Args:\n        bucket: The name of the GCS bucket where files will be uploaded.\n        folder: The folder in the GCS bucket where files will be uploaded.\n        project: The GCP project the bucket belongs to. If not provided, the project\n            will be inferred from the credentials or the local environment.\n        credentials: A dictionary containing the service account information and project\n            used for authentication. If not provided, the application default\n            credentials will be used.\n        ignore_file: The name of the file containing ignore patterns.\n\n    Returns:\n        A dictionary containing the bucket and folder where files were uploaded.\n\n    Examples:\n        Push to a GCS bucket:\n        ```yaml\n        build:\n            - prefect_gcp.deployments.steps.push_to_gcs:\n                requires: prefect-gcp\n                bucket: my-bucket\n                folder: my-project\n        ```\n\n        Push  to a GCS bucket using credentials stored in a block:\n        ```yaml\n        build:\n            - prefect_gcp.deployments.steps.push_to_gcs:\n                requires: prefect-gcp\n                bucket: my-bucket\n                folder: my-folder\n                credentials: \"{{ prefect.blocks.gcp-credentials.dev-credentials }}\"\n        ```\n\n        Push to a GCS bucket using credentials stored in a service account\n        file:\n        ```yaml\n        build:\n            - prefect_gcp.deployments.steps.push_to_gcs:\n                requires: prefect-gcp\n                bucket: my-bucket\n                folder: my-folder\n                credentials:\n                    project: my-project\n                    service_account_file: /path/to/service_account.json\n        ```\n\n    \"\"\"\n    project = credentials.get(\"project\") if credentials else None\n\n    gcp_creds = None\n    if credentials is not None:\n        if credentials.get(\"service_account_info\") is not None:\n            gcp_creds = Credentials.from_service_account_info(\n                credentials.get(\"service_account_info\"),\n                scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n            )\n        elif credentials.get(\"service_account_file\") is not None:\n            gcp_creds = Credentials.from_service_account_file(\n                credentials.get(\"service_account_file\"),\n                scopes=[\"https://www.googleapis.com/auth/cloud-platform\"],\n            )\n\n    gcp_creds = gcp_creds or google.auth.default()[0]\n\n    storage_client = StorageClient(credentials=gcp_creds, project=project)\n    bucket_resource = storage_client.bucket(bucket)\n\n    local_path = Path.cwd()\n\n    included_files = None\n    if ignore_file and Path(ignore_file).exists():\n        with open(ignore_file, \"r\") as f:\n            ignore_patterns = f.readlines()\n        included_files = filter_files(str(local_path), ignore_patterns)\n\n    for local_file_path in local_path.expanduser().rglob(\"*\"):\n        relative_local_file_path = local_file_path.relative_to(local_path)\n        if (\n            included_files is not None\n            and str(relative_local_file_path) not in included_files\n        ):\n            continue\n        elif not local_file_path.is_dir():\n            remote_file_path = (folder / relative_local_file_path).as_posix()\n\n            blob_resource = bucket_resource.blob(remote_file_path)\n            blob_resource.upload_from_filename(local_file_path)\n\n    return {\n        \"bucket\": bucket,\n        \"folder\": folder,\n    }\n
"}]} \ No newline at end of file diff --git a/secret_manager/index.html b/secret_manager/index.html index 35954a5..4bc4fb9 100644 --- a/secret_manager/index.html +++ b/secret_manager/index.html @@ -18,7 +18,7 @@ - + diff --git a/sitemap.xml b/sitemap.xml index c6f62c8..8e8b147 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,77 +2,77 @@ https://PrefectHQ.github.io/prefect-gcp/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/aiplatform/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/bigquery/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/blocks_catalog/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/cloud_run/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/cloud_run_worker/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/cloud_run_worker_v2/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/cloud_storage/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/contributing/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/credentials/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/examples_catalog/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/gcp-worker-guide/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/secret_manager/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/vertex_worker/ - 2024-03-05 + 2024-03-15 daily https://PrefectHQ.github.io/prefect-gcp/deployments/steps/ - 2024-03-05 + 2024-03-15 daily \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index 2964938a32a3f23f265bc511a336a6c71884bb4e..040aed4d9e92b6c28fab54ea0c1fad8e624c69c9 100644 GIT binary patch literal 352 zcmV-m0iXUKiwFo&QuJj4|8r?{Wo=<_E_iKh0M(XFYr`-MhVT6qA@{^-x4|~j^fq?f zU^@rbRvc4*)s~y~-_LEz=zTD1gRw1Mi{~J{sNEd3Dh}X-v}Ro{>!vKgFw3P8>+<{S zt$Hpu-J*?N1;8SSO<0#BqI)G|nx;BP1WjG2trsM)F+Bpg21!%Dl-;68>zrV|T~@0s z>&{t1O7r1i0EX|MwU9W*Z7nTvbF31~k(Qj*x1VW1ZF#q9R*zNlP%WR5`sZ^(y+(c$7@{9EsKq{T0I9RXmpLHw6K7v1GK$^t6&@+luS#kfoOxehLA5BK6 z{bcS-V!6|#vspZmqhv5F9>n=wcJG&poF%u7_3IG6ZMX8?v literal 352 zcmV-m0iXUKiwFoCLFZ)x|8r?{Wo=<_E_iKh0M(XFYr`-MhVT0oA@{^tx4|~j^fq?f zU^@rbRvlA+)mEDJ-%oAI=zTD1gRw1Mi{~J{s9hbbDfSSP_HJ3;*G*Y~6JBbkmgV=? zTlHM7y1RCaCIJ>Cc4=7-h~bs7X`1S!5iAd>_EE7U26`lR39_bsDZ9IZ)+xb!JFnJw z(Vem+O7jt60|MVaYo&1**R}R6%&|&|z*=@vUw`Hat(V=RSv*$FL)AQy zn+Us3a&3++yNb+I>=)}VfLsT|apR-Ceb%*ByAk3s15y%iQ!hAj