diff --git a/perfkitbenchmarker/configs/benchmark_config_spec.py b/perfkitbenchmarker/configs/benchmark_config_spec.py index 457a2f6eb..8d0e6397f 100644 --- a/perfkitbenchmarker/configs/benchmark_config_spec.py +++ b/perfkitbenchmarker/configs/benchmark_config_spec.py @@ -213,6 +213,13 @@ def _GetOptionDecoderConstructions(cls): option_decoders.IntDecoder, {'default': None, 'none_ok': True}, ), + 'dataproc_serverless_runtime_engine': ( + option_decoders.EnumDecoder, + { + 'valid_values': ('spark', 'native'), + 'default': 'spark', + }, + ), 'dataproc_serverless_memory_overhead': ( option_decoders.IntDecoder, {'default': None, 'none_ok': True}, diff --git a/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py b/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py index f2aa1ce1a..d2f61b74f 100644 --- a/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py +++ b/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py @@ -699,6 +699,8 @@ def GetJobProperties(self) -> Dict[str, str]: result['spark.executor.memoryOverhead'] = ( f'{self.spec.dataproc_serverless_memory_overhead}m' ) + if self.spec.dataproc_serverless_runtime_engine == 'native': + result['spark.dataproc.runtimeEngine'] = 'native' result.update(super().GetJobProperties()) return result @@ -751,6 +753,7 @@ def _FillMetadata(self) -> None: 'dpb_disk_size': self.metadata['dpb_disk_size'], 'dpb_service_zone': self.metadata['dpb_service_zone'], 'dpb_job_properties': self.metadata['dpb_job_properties'], + 'dpb_runtime_engine': self.spec.dataproc_serverless_runtime_engine, } def CalculateLastJobCosts(self) -> dpb_service.JobCosts: