Skip to content

Commit

Permalink
Merge pull request #2088 from FedML-AI/alexleung/dev_v070_for_refactor
Browse files Browse the repository at this point in the history
Alexleung/dev v070 for refactor
  • Loading branch information
fedml-alex authored May 12, 2024
2 parents a35d433 + c81073e commit dec0f51
Show file tree
Hide file tree
Showing 70 changed files with 3,378 additions and 1,210 deletions.
2 changes: 2 additions & 0 deletions devops/dockerfile/device-image/Dockerfile-Dev
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ RUN pip3 install -e ./
#RUN pip3 install -e '.[jax]'
#RUN pip3 install -e '.[mxnet]'
RUN pip3 install MNN==1.1.6
RUN pip3 install -U pydantic
RUN pip3 install -U fastapi

WORKDIR /fedml

Expand Down
2 changes: 2 additions & 0 deletions devops/dockerfile/device-image/Dockerfile-Release
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ RUN pip3 install -e ./
#RUN pip3 install -e '.[jax]'
#RUN pip3 install -e '.[mxnet]'
RUN pip3 install MNN==1.1.6
RUN pip3 install -U pydantic
RUN pip3 install -U fastapi

WORKDIR /fedml

Expand Down
2 changes: 2 additions & 0 deletions devops/dockerfile/device-image/Dockerfile-Test
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ RUN pip3 install -e ./
#RUN pip3 install -e '.[jax]'
#RUN pip3 install -e '.[mxnet]'
RUN pip3 install MNN==1.1.6
RUN pip3 install -U pydantic
RUN pip3 install -U fastapi

WORKDIR /fedml

Expand Down
2 changes: 2 additions & 0 deletions devops/dockerfile/server-agent/Dockerfile-Dev
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ RUN pip3 install -r ./fedml/requirements.txt
COPY ./python ./fedml/fedml-pip
WORKDIR ./fedml/fedml-pip
RUN pip3 install -e ./
RUN pip3 install -U pydantic
RUN pip3 install -U fastapi
#RUN pip3 install -e '.[tensorflow]'
#RUN pip3 install -e '.[jax]'
#RUN pip3 install -e '.[mxnet]'
Expand Down
2 changes: 2 additions & 0 deletions devops/dockerfile/server-agent/Dockerfile-Release
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ RUN pip3 install -r ./fedml/requirements.txt
COPY ./python ./fedml/fedml-pip
WORKDIR ./fedml/fedml-pip
RUN pip3 install -e ./
RUN pip3 install -U pydantic
RUN pip3 install -U fastapi
#RUN pip3 install -e '.[tensorflow]'
#RUN pip3 install -e '.[jax]'
#RUN pip3 install -e '.[mxnet]'
Expand Down
2 changes: 2 additions & 0 deletions devops/dockerfile/server-agent/Dockerfile-Test
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ RUN pip3 install -r ./fedml/requirements.txt
COPY ./python ./fedml/fedml-pip
WORKDIR ./fedml/fedml-pip
RUN pip3 install -e ./
RUN pip3 install -U pydantic
RUN pip3 install -U fastapi
#RUN pip3 install -e '.[tensorflow]'
#RUN pip3 install -e '.[jax]'
#RUN pip3 install -e '.[mxnet]'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
workspace: "./src"

inference_image_name: "raphaeljin/fedml"
enable_custom_image: true

bootstrap: |
echo "Bootstrap start..."
cat serve_main.py
echo "Bootstrap finished"
# Simulate a successful deployment
job: |
python3 serve_main.py
auto_detect_public_ip: true
use_gpu: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os

from fedml.serving import FedMLPredictor
from fedml.serving import FedMLInferenceRunner
import uuid
import torch

# Calculate the number of elements
num_elements = 1_073_741_824 // 4 # using integer division for whole elements


class DummyPredictor(FedMLPredictor):
def __init__(self):
super().__init__()
# Create a tensor with these many elements
tensor = torch.empty(num_elements, dtype=torch.float32)

# Move the tensor to GPU
tensor_gpu = tensor.cuda()

# for debug
with open("/tmp/dummy_gpu_occupier.txt", "w") as f:
f.write("GPU is occupied")

if os.getenv("FEDML_REPLICA_RANK") == "2":
# Simulate a failure
raise Exception("Simulated failure")
exit(1)

self.worker_id = uuid.uuid4()

def predict(self, request):
return {f"AlohaV0From{self.worker_id}": request}


if __name__ == "__main__":
predictor = DummyPredictor()
fedml_inference_runner = FedMLInferenceRunner(predictor)
fedml_inference_runner.run()
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ model_args:

train_args:
federated_optimizer: "FedOpt"
client_optimizer:
server_optimizer: "FedOpt"
client_id_list:
client_num_in_total: 1000
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
fedml-a6000-node-1
Dimitris-FedML.local
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env bash

set -e
cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/

# The name of the current run.
RUN_ID=$1
if [ -z "${RUN_ID}" ]; then
echo "Need to provide the id of the run as a string."
exit 1
fi

# The number of workers.
WORKER_NUM=$2
if [ -z "${WORKER_NUM}" ]; then
echo "Need to provide the number of workers you want to run the experiment for."
exit 1
fi

# Spawn server process.
echo "Starting server"
python3 torch_server.py --cf config/fedml_config.yaml --rank 0 --role server --run_id $RUN_ID &
sleep 3 # Sleep for 3s to give the server enough time to start

# Spawn client(s) process.
# Change the number next to seq for spawning more than 1 clients.
for i in `seq $WORKER_NUM`; do
echo "Starting client $i"
python3 torch_client.py --cf config/fedml_config.yaml --rank $i --role client --run_id $RUN_ID &
done

# Enable CTRL+C to stop all background processes
trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM

# Wait for all background processes to complete
wait
Binary file added python/examples/launch/dump.rdb
Binary file not shown.
2 changes: 1 addition & 1 deletion python/fedml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
_global_training_type = None
_global_comm_backend = None

__version__ = "0.8.29.dev10"
__version__ = "0.8.30"


# This is the deployment environment used for different roles (RD/PM/BD/Public Developers). Potential VALUE: local, dev, test, release
Expand Down
7 changes: 3 additions & 4 deletions python/fedml/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,13 +179,12 @@ def cluster_killall(api_key=None) -> bool:
return cluster.kill(cluster_names=(), api_key=api_key)


def upload(data_path, api_key=None, service="R2", name=None, description=None, metadata=None, show_progress=False,
def upload(data_path, api_key=None, tag_list=[], service="R2", name=None, description=None, metadata=None, show_progress=False,
out_progress_to_err=True, progress_desc=None) -> FedMLResponse:
return storage.upload(data_path=data_path, api_key=api_key, name=name, description=description,
return storage.upload(data_path=data_path, api_key=api_key, name=name, description=description, tag_list =tag_list,
service=service, progress_desc=progress_desc, show_progress=show_progress,
out_progress_to_err=out_progress_to_err, metadata=metadata)


def get_storage_user_defined_metadata(data_name, api_key=None) -> FedMLResponse:
return storage.get_user_metadata(data_name=data_name, api_key=api_key)

Expand All @@ -194,7 +193,7 @@ def get_storage_metadata(data_name, api_key=None) -> FedMLResponse:
return storage.get_metadata(api_key=api_key, data_name=data_name)


def list_storage_obects(api_key=None) -> FedMLResponse:
def list_storage_objects(api_key=None) -> FedMLResponse:
return storage.list_objects(api_key=api_key)


Expand Down
38 changes: 32 additions & 6 deletions python/fedml/api/modules/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,31 @@
from fedml.api.fedml_response import FedMLResponse, ResponseCode


# Todo (alaydshah): Add file size
class StorageMetadata(object):
def __init__(self, data: dict):
self.dataName = data.get("datasetName", None)
self.description = data.get("description", None)
self.tags = data.get("description", None)
self.createdAt = data.get("createTime", None)
self.updatedAt = data.get("updateTime", None)
self.size = _get_size(data.get("fileSize",None))
self.tag_list = data.get("tags", None)


# Todo (alaydshah): Add file size while creating objects. Store service name in metadata
# Todo (alaydshah): Store service name in metadata
# Todo (alaydshah): If data already exists, don't upload again. Instead suggest to use update command


def upload(data_path, api_key, name, description, service, show_progress, out_progress_to_err, progress_desc,
def upload(data_path, api_key, name, description, tag_list, service, show_progress, out_progress_to_err, progress_desc,
metadata) -> FedMLResponse:
api_key = authenticate(api_key)

user_id, message = _get_user_id_from_api_key(api_key)

if user_id is None:
return FedMLResponse(code=ResponseCode.FAILURE, message=message)

if(not _check_data_path(data_path)):
return FedMLResponse(code=ResponseCode.FAILURE,message="Invalid data path")

archive_path, message = _archive_data(data_path)
if not archive_path:
Expand All @@ -41,6 +44,7 @@ def upload(data_path, api_key, name, description, service, show_progress, out_pr
name = os.path.splitext(os.path.basename(archive_path))[0] if name is None else name
file_name = name + ".zip"
dest_path = os.path.join(user_id, file_name)
file_size = os.path.getsize(archive_path)

file_uploaded_url = store.upload_file_with_progress(src_local_path=archive_path, dest_s3_path=dest_path,
show_progress=show_progress,
Expand All @@ -53,9 +57,9 @@ def upload(data_path, api_key, name, description, service, show_progress, out_pr
json_data = {
"datasetName": name,
"description": description,
"fileSize": "",
"fileSize": file_size,
"fileUrl": file_uploaded_url,
"tagNameList": [],
"tagNameList": tag_list,
}

try:
Expand Down Expand Up @@ -228,6 +232,11 @@ def _get_storage_service(service):
else:
raise NotImplementedError(f"Service {service} not implemented")

def _check_data_path(data_path):
if os.path.isdir(data_path) or os.path.isfile(data_path):
return True
return False


def _archive_data(data_path: str) -> (str, str):
src_local_path = os.path.abspath(data_path)
Expand Down Expand Up @@ -350,3 +359,20 @@ def _get_data_from_response(message: str, response: requests.Response) -> (Respo
return ResponseCode.FAILURE, message, None

return ResponseCode.SUCCESS, "Successfully parsed data from response", data

def _get_size(size_in_bytes:str)->str:
size_str = ""
if(size_in_bytes):
size = int(size_in_bytes)
size_in_gb = size / (1024 * 1024 * 1024)
size_in_mb = size / (1024 * 1024)
size_in_kb = size / 1024
if(size_in_gb >= 1):
size_str = f"{size_in_gb:.2f} GB"
elif(size_in_mb >= 1):
size_str = f"{size_in_mb:.2f} MB"
elif(size_in_kb >= 1):
size_str = f"{size_in_kb:.2f} KB"
else:
size_str = f"{size} B"
return size_str
27 changes: 18 additions & 9 deletions python/fedml/cli/modules/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,10 @@ def validate_argument(ctx, param, value):
"the data file or directory name.")
@click.option("--description", "-d", type=str, help="Add description to your data to store. If not provided, "
"the description will be empty.")
@click.option("--user_metadata", "-um", type=str, help="User-defined metadata in the form of a name-value (key-value) "
"pair. Defaults to None.")
@click.option("--user_metadata", "-um", type=str, help="User-defined metadata in the form of a dictionary, for instance, "
" {'name':'value'} within double quotes. "" "
"Defaults to None.")
@click.option("--tags", "-t", type=str, help="Add tags to your data to store. Give tags in comma separated form like 'cv,unet,segmentation' If not provided, the tags will be empty.")
@click.option('--service', "-s", type=click.Choice(['R2']), default="R2", help="Storage service for object storage. "
"Only R2 is supported as of now")
@click.option(
Expand All @@ -64,10 +66,11 @@ def validate_argument(ctx, param, value):
default="release",
help=version_help,
)
def upload(data_path: str, name: str, user_metadata: str, description: str, version: str, api_key: str, service):
def upload(data_path: str, name: str, user_metadata: str, description: str, version: str, api_key: str, tags:str, service):
metadata = _parse_metadata(user_metadata)
tag_list = _parse_tags(tags)
fedml.set_env_version(version)
response = fedml.api.upload(data_path=data_path, api_key=api_key, name=name, service=service, show_progress=True,
response = fedml.api.upload(data_path=data_path, api_key=api_key, name=name, tag_list = tag_list, service=service, show_progress=True,
description=description, metadata=metadata)
if response.code == ResponseCode.SUCCESS:
click.echo(f"Data uploaded successfully. | url: {response.data}")
Expand All @@ -89,16 +92,16 @@ def upload(data_path: str, name: str, user_metadata: str, description: str, vers
)
def list_data(version, api_key):
fedml.set_env_version(version)
response = fedml.api.list_storage_obects(api_key=api_key)
response = fedml.api.list_storage_objects(api_key=api_key)
if response.code == ResponseCode.SUCCESS:
click.echo(f"Successfully fetched list of stored objects:")
if not response.data:
click.echo(f"No stored objects found for account linked with apikey: {api_key}")
return
object_list_table = PrettyTable(["Data Name", "Description", "Created At", "Updated At"])
object_list_table = PrettyTable(["Data Name", "Data Size", "Description", "Data Tags","Created At", "Updated At"])
for stored_object in response.data:
object_list_table.add_row(
[stored_object.dataName, stored_object.description, stored_object.createdAt, stored_object.updatedAt])
[stored_object.dataName, stored_object.size, stored_object.description, stored_object.tag_list,stored_object.createdAt, stored_object.updatedAt])
click.echo(object_list_table)
else:
click.echo(f"Failed to list stored objects for account linked with apikey {api_key}. "
Expand Down Expand Up @@ -156,8 +159,8 @@ def get_metadata(data_name, version, api_key):
return
click.echo(f"Successfully fetched metadata for object {data_name}:")
# Todo (alaydshah): Add file size and tags
metadata_table = PrettyTable(["Data Name", "Description", "Created At", "Updated At"])
metadata_table.add_row([metadata.dataName, metadata.description, metadata.createdAt, metadata.updatedAt])
metadata_table = PrettyTable(["Data Name","Data Size","Description","Data Tags","Created At", "Updated At"])
metadata_table.add_row([metadata.dataName,metadata.size,metadata.description,metadata.tag_list,metadata.createdAt, metadata.updatedAt])
click.echo(metadata_table)
click.echo("")
else:
Expand Down Expand Up @@ -237,3 +240,9 @@ def _parse_metadata(metadata: str):
click.echo(
f"Input metadata cannot be evaluated. Please make sure metadata is in the correct format. Error: {e}.")
exit()

def _parse_tags(tags:str):
if not tags:
return []
tag_list = tags.split(",")
return tag_list
Loading

0 comments on commit dec0f51

Please sign in to comment.