Skip to content

Commit

Permalink
Split core.py into smaller files
Browse files Browse the repository at this point in the history
  • Loading branch information
IrvingMg committed Feb 5, 2025
1 parent dd721f5 commit 783700d
Show file tree
Hide file tree
Showing 37 changed files with 3,231 additions and 2,976 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ dev = [
]

[tool.setuptools.dynamic]
version = {attr = "xpk.core.core.__version__"}
version = {attr = "xpk.core.config.__version__"}

[tool.setuptools]
packages = ["xpk", "xpk.parser", "xpk.core", "xpk.commands", "xpk.utils", "xpk.core.blueprint", "xpk.core.workload_decorators"]
Expand Down
4 changes: 0 additions & 4 deletions pytype-conf.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,8 @@ exclude =
src/xpk/commands
src/xpk/core/tests
src/xpk/core/__init__.py
src/xpk/core/app_profile.py
src/xpk/core/blueprint.py
src/xpk/core/cluster_private.py
src/xpk/core/commands.py
src/xpk/core/core.py
src/xpk/core/job_template.py
src/xpk/core/kjob.py
src/xpk/core/kueue.py
src/xpk/core/nap.py
Expand Down
6 changes: 3 additions & 3 deletions src/xpk/commands/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

from argparse import Namespace

from ..core.commands import run_command_for_value
from ..core.gcloud_context import add_zone_and_project
from ..core.kjob import AppProfileDefaults
from ..core.kueue import LOCAL_QUEUE_NAME
from ..utils.console import xpk_exit, xpk_print
from .common import set_cluster_command
from ..core.core import add_zone_and_project
from ..core.kjob import AppProfileDefaults
from ..core.commands import run_command_for_value
from .kind import set_local_cluster_command


Expand Down
46 changes: 22 additions & 24 deletions src/xpk/commands/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,54 +14,52 @@
limitations under the License.
"""

from ..core.commands import run_command_for_value, run_command_with_updates
from ..core.core import (
VERTEX_TENSORBOARD_FEATURE_FLAG,
add_zone_and_project,
create_cluster_configmaps,
create_cluster_network_config,
create_vertex_tensorboard,
delete_cluster_subnets,
from tabulate import tabulate

from ..core.capacity import H100_DEVICE_TYPE
from ..core.cluster import (
get_all_clusters_programmatic,
get_gke_control_plane_version,
get_gke_node_pool_version,
get_gke_server_config,
h100_device_type,
install_nccl_on_cluster,
run_gke_node_pool_create_command,
set_jobset_on_cluster,
set_up_cluster_network_for_gpu,
zone_to_region,
get_user_input,
)
from ..core.cluster_private import authorize_private_cluster_access_if_necessary
from ..core.kjob import (
verify_kjob_installed,
prepare_kjob,
apply_kjob_crds,
from ..core.commands import run_command_for_value, run_command_with_updates
from ..core.config import VERTEX_TENSORBOARD_FEATURE_FLAG
from ..core.gcloud_context import (
add_zone_and_project,
get_gke_control_plane_version,
get_gke_server_config,
zone_to_region,
)
from ..core.kjob import apply_kjob_crds, prepare_kjob, verify_kjob_installed
from ..core.kueue import (
cluster_preheat_yml,
install_kueue_crs,
install_kueue_on_cluster,
wait_for_kueue_available,
)
from ..core.nap import enable_autoprovisioning_on_cluster
from ..core.network import (
create_cluster_network_config,
delete_cluster_subnets,
set_up_cluster_network_for_gpu,
)
from ..core.nodepool import get_gke_node_pool_version, run_gke_node_pool_create_command
from ..core.ray import install_ray_cluster
from ..core.resources import create_cluster_configmaps
from ..core.system_characteristics import (
AcceleratorType,
AcceleratorTypeToAcceleratorCharacteristics,
SystemCharacteristics,
get_system_characteristics,
)
from ..core.vertex import create_vertex_tensorboard
from ..core.workload import get_workload_list
from ..utils.console import get_user_input, xpk_exit, xpk_print
from ..utils.file import write_tmp_file
from ..utils.console import xpk_exit, xpk_print
from . import cluster_gcluster
from .common import set_cluster_command

from tabulate import tabulate


def cluster_create(args) -> None:
"""Function around cluster creation.
Expand Down Expand Up @@ -131,7 +129,7 @@ def cluster_create(args) -> None:
if set_up_cluster_network_code != 0:
xpk_exit(set_up_cluster_network_code)

if system.device_type == h100_device_type:
if system.device_type == H100_DEVICE_TYPE:
xpk_print('Creating Network Config for cluster')
create_cluster_network_config_code = create_cluster_network_config(args)
if create_cluster_network_config_code != 0:
Expand Down
16 changes: 12 additions & 4 deletions src/xpk/commands/cluster_gcluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,24 @@
limitations under the License.
"""

from ..core.blueprint.blueprint_generator import BlueprintGenerator, BlueprintGeneratorOutput, supported_device_types, a3mega_device_type, a3ultra_device_type
import os

from ..core.blueprint.blueprint_generator import (
BlueprintGenerator,
BlueprintGeneratorOutput,
a3mega_device_type,
a3ultra_device_type,
supported_device_types,
)
from ..core.capacity import get_capacity_type
from ..core.docker_manager import DockerManager
from ..core.gcloud_context import zone_to_region
from ..core.gcluster_manager import GclusterManager
from ..core.core import zone_to_region, get_capacity_type
from ..utils.console import xpk_exit, xpk_print
from ..utils.network import all_IPs_cidr
from ..utils.file import ensure_directory_exists
from ..utils.network import all_IPs_cidr
from ..utils.objects import hash_string
from .common import set_cluster_command
import os

blueprints_path = os.path.abspath('xpkclusters/blueprints')
gcluster_working_dir = os.path.abspath('xpkclusters/gcluster-out')
Expand Down
2 changes: 1 addition & 1 deletion src/xpk/commands/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"""

from ..core.commands import run_command_with_updates_retry
from ..core.core import zone_to_region
from ..core.gcloud_context import zone_to_region
from ..utils.console import xpk_print


Expand Down
18 changes: 8 additions & 10 deletions src/xpk/commands/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,17 @@
limitations under the License.
"""

from ..utils.console import xpk_exit, xpk_print
from ..core.kueue import verify_kueuectl
from .common import set_cluster_command
from ..core.commands import (
run_command_for_value,
)
from ..core.core import (
add_zone_and_project,
)
import json
from tabulate import tabulate
from argparse import Namespace

from tabulate import tabulate

from ..core.commands import run_command_for_value
from ..core.gcloud_context import add_zone_and_project
from ..core.kueue import verify_kueuectl
from ..utils.console import xpk_exit, xpk_print
from .common import set_cluster_command

table_fmt = 'plain'


Expand Down
10 changes: 3 additions & 7 deletions src/xpk/commands/inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,11 @@
"""

from ..core.commands import run_command_for_value
from ..core.core import (
CLUSTER_METADATA_CONFIGMAP,
CLUSTER_RESOURCES_CONFIGMAP,
add_zone_and_project,
zone_to_region,
)
from ..core.gcloud_context import add_zone_and_project, zone_to_region
from ..core.kueue import CLUSTER_QUEUE_NAME, LOCAL_QUEUE_NAME
from ..utils.file import append_tmp_file, write_tmp_file
from ..core.resources import CLUSTER_METADATA_CONFIGMAP, CLUSTER_RESOURCES_CONFIGMAP
from ..utils.console import xpk_exit, xpk_print
from ..utils.file import append_tmp_file, write_tmp_file
from .common import set_cluster_command
from .workload import get_workload_list

Expand Down
16 changes: 9 additions & 7 deletions src/xpk/commands/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,18 @@
limitations under the License.
"""

from .common import set_cluster_command
from .kind import set_local_cluster_command
from ..core.commands import run_command_for_value, run_command_with_updates
from ..utils.console import xpk_exit, xpk_print
from ..core.kjob import AppProfileDefaults
from ..core.core import add_zone_and_project
from ruamel.yaml import YAML
import re
import sys

from ruamel.yaml import YAML

from ..core.commands import run_command_for_value, run_command_with_updates
from ..core.gcloud_context import add_zone_and_project
from ..core.kjob import AppProfileDefaults
from ..utils.console import xpk_exit, xpk_print
from .common import set_cluster_command
from .kind import set_local_cluster_command


def job_info(args):
"""Run commands obtaining information about a job given by name.
Expand Down
2 changes: 1 addition & 1 deletion src/xpk/commands/kind.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
run_command_for_value,
run_command_with_updates,
)
from ..core.core import (
from ..core.cluster import (
set_jobset_on_cluster,
)
from ..core.kjob import (
Expand Down
6 changes: 3 additions & 3 deletions src/xpk/commands/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

from argparse import Namespace

from ..core.commands import run_command_with_full_controls
from ..core.gcloud_context import add_zone_and_project
from ..core.kjob import AppProfileDefaults
from ..core.kueue import LOCAL_QUEUE_NAME
from ..utils.console import xpk_exit, xpk_print
from .cluster import set_cluster_command
from ..core.core import add_zone_and_project
from ..core.kjob import AppProfileDefaults
from ..core.commands import run_command_with_full_controls
from .kind import set_local_cluster_command


Expand Down
Loading

0 comments on commit 783700d

Please sign in to comment.