From c87e68c4eae17755814aff21cdbc0a66f1005168 Mon Sep 17 00:00:00 2001 From: Diego Orellana Date: Tue, 10 Dec 2024 15:59:17 -0800 Subject: [PATCH] Add support for Dataproc Serverless Native Query Execution engine. PiperOrigin-RevId: 704876717 --- perfkitbenchmarker/configs/benchmark_config_spec.py | 7 +++++++ perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py | 3 +++ 2 files changed, 10 insertions(+) diff --git a/perfkitbenchmarker/configs/benchmark_config_spec.py b/perfkitbenchmarker/configs/benchmark_config_spec.py index 457a2f6ebc..8d0e6397f7 100644 --- a/perfkitbenchmarker/configs/benchmark_config_spec.py +++ b/perfkitbenchmarker/configs/benchmark_config_spec.py @@ -213,6 +213,13 @@ def _GetOptionDecoderConstructions(cls): option_decoders.IntDecoder, {'default': None, 'none_ok': True}, ), + 'dataproc_serverless_runtime_engine': ( + option_decoders.EnumDecoder, + { + 'valid_values': ('spark', 'native'), + 'default': 'spark', + }, + ), 'dataproc_serverless_memory_overhead': ( option_decoders.IntDecoder, {'default': None, 'none_ok': True}, diff --git a/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py b/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py index f2aa1ce1ae..d2f61b74fd 100644 --- a/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py +++ b/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py @@ -699,6 +699,8 @@ def GetJobProperties(self) -> Dict[str, str]: result['spark.executor.memoryOverhead'] = ( f'{self.spec.dataproc_serverless_memory_overhead}m' ) + if self.spec.dataproc_serverless_runtime_engine == 'native': + result['spark.dataproc.runtimeEngine'] = 'native' result.update(super().GetJobProperties()) return result @@ -751,6 +753,7 @@ def _FillMetadata(self) -> None: 'dpb_disk_size': self.metadata['dpb_disk_size'], 'dpb_service_zone': self.metadata['dpb_service_zone'], 'dpb_job_properties': self.metadata['dpb_job_properties'], + 'dpb_runtime_engine': self.spec.dataproc_serverless_runtime_engine, } def CalculateLastJobCosts(self) -> dpb_service.JobCosts: