Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OCI] Support OCI Object Storage #4501

Merged
merged 20 commits into from
Dec 29, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions examples/oci/dataset-mount.yaml
HysunHe marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,21 +1,17 @@
name: cpu-task1

resources:
# Optional; if left out, automatically pick the cheapest cloud.
cloud: oci
region: us-sanjose-1
#zone: AP-SEOUL-1-AD-1
#instance_type: VM.Standard.E4.Flex$_2_8
cpus: 2
#image_id: skypilot:cpu-ubuntu-2004
disk_size: 256
disk_tier: medium
use_spot: False

file_mounts:
# Mount an existing oci bucket
/datasets-storage:
source: oci://skybucket5
source: oci://skybucket
mode: MOUNT # Either MOUNT or COPY. Optional.

# Working directory (optional) containing the project codebase.
Expand All @@ -34,8 +30,6 @@ setup: |
run: |
echo "*** Running the task on OCI ***"
timestamp=$(date +%s)
for i in {1..10}; do
echo "$timestamp $i"
sleep 1
done
echo "The task is completed."
ls -lthr /datasets-storage
echo "hi" >> /datasets-storage/foo.txt
ls -lthr /datasets-storage
17 changes: 5 additions & 12 deletions examples/oci/dataset-upload-and-mount.yaml
Original file line number Diff line number Diff line change
@@ -1,21 +1,17 @@
name: cpu-task1

resources:
# Optional; if left out, automatically pick the cheapest cloud.
cloud: oci
region: us-sanjose-1
#zone: AP-SEOUL-1-AD-1
#instance_type: VM.Standard.E4.Flex$_2_8
cpus: 2
#image_id: skypilot:cpu-ubuntu-2004
disk_size: 256
disk_tier: medium
use_spot: False

file_mounts:
/datasets-storage:
name: skybucket5 # Name of storage, optional when source is bucket URI
source: ['~/Hysun/dataset'] # Source path, can be local or bucket URL. Optional, do not specify to create an empty bucket.
name: skybucket # Name of storage, optional when source is bucket URI
source: ['./examples/oci'] # Source path, can be local or bucket URL. Optional, do not specify to create an empty bucket.
store: oci # E.g 'oci', 's3', 'gcs'...; default: None. Optional.
persistent: True # Defaults to True; can be set to false. Optional.
mode: MOUNT # Either MOUNT or COPY. Optional.
Expand All @@ -35,9 +31,6 @@ setup: |
# Invoked under the workdir (i.e., can use its files).
run: |
echo "*** Running the task on OCI ***"
timestamp=$(date +%s)
for i in {1..10}; do
echo "$timestamp $i"
sleep 1
done
echo "The task is completed."
ls -lthr /datasets-storage
echo "hi" >> /datasets-storage/foo.txt
ls -lthr /datasets-storage
35 changes: 18 additions & 17 deletions sky/adaptors/oci.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Oracle OCI cloud adaptor"""

import functools
import logging
import os
from typing import List

from sky.adaptors import common
from sky.clouds.utils import oci_utils
Expand Down Expand Up @@ -67,23 +67,24 @@ def get_object_storage_client(region=None, profile='DEFAULT'):
get_oci_config(region, profile))


def goto_oci_cli_venv() -> List:
# Create a specfic venv for oci-cli due to its dependancy conflict
# with runpod (on 'click' version)
# pylint: disable=line-too-long
cmds = [
'conda info --envs | grep "sky-oci-cli-env" || conda create -n sky-oci-cli-env python=3.10 -y',
'. $(conda info --base 2> /dev/null)/etc/profile.d/conda.sh > /dev/null 2>&1 || true',
'conda activate sky-oci-cli-env', 'pip install oci-cli',
'export OCI_CLI_SUPPRESS_FILE_PERMISSIONS_WARNING=True'
]
return cmds
def service_exception():
"""OCI service exception."""
return oci.exceptions.ServiceError


def leave_oci_cli_venv() -> str:
return 'conda deactivate'
def with_oci_env(f):

@functools.wraps(f)
def wrapper(*args, **kwargs):
# pylint: disable=line-too-long
enter_env_cmds = [
'conda info --envs | grep "sky-oci-cli-env" || conda create -n sky-oci-cli-env python=3.10 -y',
'. $(conda info --base 2> /dev/null)/etc/profile.d/conda.sh > /dev/null 2>&1 || true',
'conda activate sky-oci-cli-env', 'pip install oci-cli',
'export OCI_CLI_SUPPRESS_FILE_PERMISSIONS_WARNING=True'
]
operation_cmd = [f(*args, **kwargs)]
leave_env_cmds = ['conda deactivate']
return ' && '.join(enter_env_cmds + operation_cmd + leave_env_cmds)

def service_exception():
"""OCI service exception."""
return oci.exceptions.ServiceError
return wrapper
18 changes: 5 additions & 13 deletions sky/cloud_stores.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ def is_directory(self, url: str) -> bool:
# A directory with only 1 item
return True

@oci.with_oci_env
def make_sync_dir_command(self, source: str, destination: str) -> str:
"""Downloads using OCI CLI."""
bucket_name, path = data_utils.split_oci_path(source)
Expand All @@ -514,28 +515,19 @@ def make_sync_dir_command(self, source: str, destination: str) -> str:
f'--bucket-name {bucket_name} '
f'--prefix "{path}" --dest-dir "{destination}"')

all_commands = oci.goto_oci_cli_venv()
all_commands.append(download_via_ocicli)
all_commands.append(oci.leave_oci_cli_venv())
return ' && '.join(all_commands)
return download_via_ocicli

@oci.with_oci_env
def make_sync_file_command(self, source: str, destination: str) -> str:
"""Downloads a file using OCI CLI."""
bucket_name, path = data_utils.split_oci_path(source)
filename = os.path.basename(path)

if destination.endswith('/'):
destination = f'{destination}{filename}'
else:
destination = f'{destination}/{filename}'
destination = os.path.join(destination, filename)

download_via_ocicli = (f'oci os object get --bucket-name {bucket_name} '
f'--name "{path}" --file "{destination}"')

all_commands = oci.goto_oci_cli_venv()
all_commands.append(download_via_ocicli)
all_commands.append(oci.leave_oci_cli_venv())
return ' && '.join(all_commands)
return download_via_ocicli


def get_storage_from_path(url: str) -> CloudStorage:
Expand Down
37 changes: 0 additions & 37 deletions sky/data/data_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,40 +237,3 @@ def r2_to_oci(r2_bucket_name: str, oci_bucket_name: str) -> None:
'Moving data directly from Cloudflare R2 to OCI '
'bucket is currently not supported. Please specify '
'a local source for the storage object.')


def oci_to_gcs(oci_bucket_name: str, gs_bucket_name: str) -> None:
"""Creates a one-time transfer from OCI Object Storage to
Google Cloud Storage.
Args:
oci_bucket_name: str; Name of the OCI Bucket
gs_bucket_name: str; Name of the Google Cloud Storage Bucket
"""
# TODO(HysunHe): Implement sync with other clouds (s3, gs)
raise NotImplementedError('Moving data directly from OCI to GCS bucket '
'is currently not supported. Please specify '
'a local source for the storage object.')


def oci_to_s3(oci_bucket_name: str, gs_bucket_name: str) -> None:
"""Creates a one-time transfer from OCI Object Storage to Amazon S3.
Args:
oci_bucket_name: str; Name of the OCI Bucket
s3_bucket_name: str; Name of the Amazon S3 Bucket
"""
# TODO(HysunHe): Implement sync with other clouds (s3, gs)
raise NotImplementedError('Moving data directly from OCI to S3 bucket '
'is currently not supported. Please specify '
'a local source for the storage object.')


def oci_to_r2(oci_bucket_name: str, r2_bucket_name: str) -> None:
"""Creates a one-time transfer from OCI Object Storage to
Cloudflare R2 Bucket.
Args:
oci_bucket_name: str; Name of the OCI Bucket
r2_bucket_name: str; Name of the Cloudflare R2 Bucket
"""
raise NotImplementedError('Moving data directly from OCI to Cloudflare '
'R2 bucket is currently not supported. Please '
'specify a local source for the storage object.')
14 changes: 0 additions & 14 deletions sky/data/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,17 +741,3 @@ def split_oci_path(oci_path: str) -> Tuple[str, str]:
bucket = path_parts.pop(0)
key = '/'.join(path_parts)
return bucket, key


def verify_oci_bucket(name: str) -> bool:
"""Helper method that checks if the OCI bucket exists
This method is mainly used by other cloud stores to check the
existence of an OCI bucket when it is specified as source. However,
We don't verify the existence of OCI bucket because moving data
directly between other cloud buckets and OCI buckets is currently
not supported.
Args:
name: str; Name of OCI Bucket (without oci:// prefix)
"""
logger.debug(f'verify_oci_bucket: {name}')
return True
43 changes: 43 additions & 0 deletions sky/data/mounting_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
_BLOBFUSE_CACHE_ROOT_DIR = '~/.sky/blobfuse2_cache'
_BLOBFUSE_CACHE_DIR = ('~/.sky/blobfuse2_cache/'
'{storage_account_name}_{container_name}')
RCLONE_VERSION = 'v1.68.2'


def get_s3_mount_install_cmd() -> str:
Expand Down Expand Up @@ -158,6 +159,48 @@ def get_cos_mount_cmd(rclone_config_data: str, rclone_config_path: str,
return mount_cmd


def get_rclone_install_cmd() -> str:
""" RClone installation for both apt-get and rpm.
This would be common command.
"""
# pylint: disable=line-too-long
install_cmd = (
f'(which dpkg > /dev/null 2>&1 && (which rclone > /dev/null || (cd ~ > /dev/null'
f' && curl -O https://downloads.rclone.org/{RCLONE_VERSION}/rclone-{RCLONE_VERSION}-linux-amd64.deb'
f' && sudo dpkg -i rclone-{RCLONE_VERSION}-linux-amd64.deb'
f' && rm -f rclone-{RCLONE_VERSION}-linux-amd64.deb)))'
f' || (which rclone > /dev/null || (cd ~ > /dev/null'
f' && curl -O https://downloads.rclone.org/{RCLONE_VERSION}/rclone-{RCLONE_VERSION}-linux-amd64.rpm'
f' && sudo yum --nogpgcheck install rclone-{RCLONE_VERSION}-linux-amd64.rpm -y'
f' && rm -f rclone-{RCLONE_VERSION}-linux-amd64.rpm))')
return install_cmd


def get_oci_mount_cmd(mount_path: str, store_name: str, region: str,
namespace: str, compartment: str, config_file: str,
config_profile: str) -> str:
""" OCI specific RClone mount command for oci object storage. """
# pylint: disable=line-too-long
mount_cmd = (
f'sudo chown -R `whoami` {mount_path}'
f' && rclone config create oos_{store_name} oracleobjectstorage'
f' provider user_principal_auth namespace {namespace}'
f' compartment {compartment} region {region}'
f' oci-config-file {config_file}'
f' oci-config-profile {config_profile}'
f' && sed -i "s/oci-config-file/config_file/g;'
f' s/oci-config-profile/config_profile/g" ~/.config/rclone/rclone.conf'
f' && ([ ! -f /bin/fusermount3 ] && sudo ln -s /bin/fusermount /bin/fusermount3 || true)'
f' && (grep -q {mount_path} /proc/mounts || rclone mount oos_{store_name}:{store_name} {mount_path} --daemon --allow-non-empty)'
)
return mount_cmd


def get_rclone_version_check_cmd() -> str:
""" RClone version check. This would be common command. """
return f'rclone --version | grep -q {RCLONE_VERSION}'


def _get_mount_binary(mount_cmd: str) -> str:
"""Returns mounting binary in string given as the mount command.

Expand Down
Loading
Loading