Skip to content

Commit

Permalink
[k8s] Add validation for pod_config #4206 (#4466)
Browse files Browse the repository at this point in the history
* [k8s] Add validation for pod_config #4206

Check pod_config when run 'sky check k8s' by using k8s api

* update: check pod_config when launch

check merged pod_config during launch using k8s api

* fix test

* ignore check failed when test with dryrun

if there is no kube config in env, ignore ValueError when launch
with dryrun. For now, we don't support check schema offline.

* use deserialize api to check pod_config schema

* test

* create another api_client with no kubeconfig

* test

* update error message

* update test

* test

* test

* Update sky/backends/backend_utils.py

---------

Co-authored-by: Romil Bhardwaj <[email protected]>
  • Loading branch information
chesterli29 and romilbhardwaj authored Jan 3, 2025
1 parent 2ccbbff commit 061d4bd
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 0 deletions.
7 changes: 7 additions & 0 deletions sky/backends/backend_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,13 @@ def write_cluster_config(
tmp_yaml_path,
cluster_config_overrides=to_provision.cluster_config_overrides)
kubernetes_utils.combine_metadata_fields(tmp_yaml_path)
yaml_obj = common_utils.read_yaml(tmp_yaml_path)
pod_config = yaml_obj['available_node_types']['ray_head_default'][
'node_config']
valid, message = kubernetes_utils.check_pod_config(pod_config)
if not valid:
raise exceptions.InvalidCloudConfigs(
f'Invalid pod_config. Details: {message}')

if dryrun:
# If dryrun, return the unfinished tmp yaml path.
Expand Down
46 changes: 46 additions & 0 deletions sky/provision/kubernetes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,52 @@ def check_credentials(context: Optional[str],
return True, None


def check_pod_config(pod_config: dict) \
-> Tuple[bool, Optional[str]]:
"""Check if the pod_config is a valid pod config
Using deserialize api to check the pod_config is valid or not.
Returns:
bool: True if pod_config is valid.
str: Error message about why the pod_config is invalid, None otherwise.
"""
errors = []
# This api_client won't be used to send any requests, so there is no need to
# load kubeconfig
api_client = kubernetes.kubernetes.client.ApiClient()

# Used for kubernetes api_client deserialize function, the function will use
# data attr, the detail ref:
# https://github.com/kubernetes-client/python/blob/master/kubernetes/client/api_client.py#L244
class InnerResponse():

def __init__(self, data: dict):
self.data = json.dumps(data)

try:
# Validate metadata if present
if 'metadata' in pod_config:
try:
value = InnerResponse(pod_config['metadata'])
api_client.deserialize(
value, kubernetes.kubernetes.client.V1ObjectMeta)
except ValueError as e:
errors.append(f'Invalid metadata: {str(e)}')
# Validate spec if present
if 'spec' in pod_config:
try:
value = InnerResponse(pod_config['spec'])
api_client.deserialize(value,
kubernetes.kubernetes.client.V1PodSpec)
except ValueError as e:
errors.append(f'Invalid spec: {str(e)}')
return len(errors) == 0, '.'.join(errors)
except Exception as e: # pylint: disable=broad-except
errors.append(f'Validation error: {str(e)}')
return False, '.'.join(errors)


def is_kubeconfig_exec_auth(
context: Optional[str] = None) -> Tuple[bool, Optional[str]]:
"""Checks if the kubeconfig file uses exec-based authentication
Expand Down
46 changes: 46 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import sky
from sky import skypilot_config
import sky.exceptions
from sky.skylet import constants
from sky.utils import common_utils
from sky.utils import kubernetes_enums
Expand Down Expand Up @@ -99,6 +100,29 @@ def _create_task_yaml_file(task_file_path: pathlib.Path) -> None:
"""))


def _create_invalid_config_yaml_file(task_file_path: pathlib.Path) -> None:
task_file_path.write_text(
textwrap.dedent("""\
experimental:
config_overrides:
kubernetes:
pod_config:
metadata:
labels:
test-key: test-value
annotations:
abc: def
spec:
containers:
- name:
imagePullSecrets:
- name: my-secret-2
setup: echo 'Setting up...'
run: echo 'Running...'
"""))


def test_nested_config(monkeypatch) -> None:
"""Test that the nested config works."""
config = skypilot_config.Config()
Expand Down Expand Up @@ -335,6 +359,28 @@ def test_k8s_config_with_override(monkeypatch, tmp_path,
assert cluster_pod_config['spec']['runtimeClassName'] == 'nvidia'


def test_k8s_config_with_invalid_config(monkeypatch, tmp_path,
enable_all_clouds) -> None:
config_path = tmp_path / 'config.yaml'
_create_config_file(config_path)
monkeypatch.setattr(skypilot_config, 'CONFIG_PATH', config_path)

_reload_config()
task_path = tmp_path / 'task.yaml'
_create_invalid_config_yaml_file(task_path)
task = sky.Task.from_yaml(task_path)

# Test Kubernetes pod_config invalid
cluster_name = 'test_k8s_config_with_invalid_config'
task.set_resources_override({'cloud': sky.Kubernetes()})
exception_occurred = False
try:
sky.launch(task, cluster_name=cluster_name, dryrun=True)
except sky.exceptions.ResourcesUnavailableError:
exception_occurred = True
assert exception_occurred


def test_gcp_config_with_override(monkeypatch, tmp_path,
enable_all_clouds) -> None:
config_path = tmp_path / 'config.yaml'
Expand Down

0 comments on commit 061d4bd

Please sign in to comment.