-
-
Notifications
You must be signed in to change notification settings - Fork 743
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1887 from FedML-AI/test/v0.7.0
Test/v0.7.0
- Loading branch information
Showing
15 changed files
with
411 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
*.pyc |
45 changes: 45 additions & 0 deletions
45
python/fedml/workflow/driver_example/hello_world/hello_world.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import os | ||
import time | ||
|
||
import fedml | ||
|
||
if __name__ == "__main__": | ||
print("Hi everyone, I am an launch job.") | ||
|
||
print(f"current config is {fedml.get_env_version()}") | ||
|
||
run_id = os.getenv('FEDML_CURRENT_RUN_ID', 0) | ||
edge_id = os.getenv('FEDML_CURRENT_EDGE_ID', 0) | ||
|
||
artifact = fedml.mlops.Artifact(name=f"general-file@{run_id}-{edge_id}", type=fedml.mlops.ARTIFACT_TYPE_NAME_GENERAL) | ||
artifact.add_file("./requirements.txt") | ||
artifact.add_dir("./config") | ||
fedml.mlops.log_artifact(artifact) | ||
|
||
fedml.mlops.log_model(f"model-file@{run_id}-{edge_id}", "requirements.txt") | ||
|
||
artifact = fedml.mlops.Artifact(name=f"log-file@{run_id}-{edge_id}", type=fedml.mlops.ARTIFACT_TYPE_NAME_LOG) | ||
artifact.add_file("./requirements.txt") | ||
artifact.add_dir("./config") | ||
fedml.mlops.log_artifact(artifact) | ||
|
||
artifact = fedml.mlops.Artifact(name=f"source-file@{run_id}-{edge_id}", type=fedml.mlops.ARTIFACT_TYPE_NAME_SOURCE) | ||
artifact.add_file("./requirements.txt") | ||
artifact.add_dir("./config") | ||
fedml.mlops.log_artifact(artifact) | ||
|
||
artifact = fedml.mlops.Artifact(name=f"dataset-file@{run_id}-{edge_id}", type=fedml.mlops.ARTIFACT_TYPE_NAME_DATASET) | ||
artifact.add_file("./requirements.txt") | ||
artifact.add_dir("./config") | ||
fedml.mlops.log_artifact(artifact) | ||
|
||
acc = 0.1 | ||
loss = 2.0 | ||
for iter_count in range(10): | ||
acc += 0.01 | ||
loss -= 0.02 | ||
fedml.mlops.log_metric({"acc": acc, "loss": loss}) | ||
time.sleep(2) | ||
|
||
|
||
time.sleep(10) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Local directory where your source code resides. | ||
# It should be the relative path to this job yaml file or the absolute path. | ||
# If your job doesn't contain any source code, it can be empty. | ||
workspace: hello_world | ||
|
||
# Docker image name. | ||
# It should be the full name of the image with tag. | ||
# If you want to use the default image, it can be empty. | ||
docker: | ||
image: fedml/fedml-default-launch:cu12.1-u22.04 | ||
|
||
# Running entry commands which will be executed as the job entry point. | ||
# Support multiple lines, which can not be empty. | ||
job: | | ||
echo "Hello, Here is the Falcon platform." | ||
echo "Current directory is as follows." | ||
pwd | ||
python3 hello_world.py | ||
# Bootstrap shell commands which will be executed before running entry commands. | ||
# Support multiple lines, which can be empty. | ||
bootstrap: | | ||
pip install -r requirements.txt | ||
echo "Bootstrap finished." | ||
computing: | ||
resource_type: H100-80GB-HBM3 # e.g., A100-80G, please check the resource type list by "fedml show-resource-type" or visiting URL: https://open.fedml.ai/accelerator_resource_type | ||
minimum_num_gpus: 1 # minimum # of GPUs to provision | ||
maximum_cost_per_hour: $0.5 # max cost per hour of all machines for your job |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import logging | ||
|
||
import fedml | ||
import os | ||
from fedml.workflow.jobs import Job, JobStatus | ||
from fedml.workflow.workflow import Workflow | ||
|
||
|
||
class HelloWorldJob(Job): | ||
def __init__(self, name): | ||
super().__init__(name) | ||
self.run_id = None | ||
|
||
def run(self): | ||
fedml.set_env_version("test") | ||
working_directory = os.path.dirname(os.path.abspath(__file__)) | ||
absolute_path = os.path.join(working_directory, "hello_world_job.yaml") | ||
result = fedml.api.launch_job(yaml_file=absolute_path, api_key="30d1bbcae9ec48ffa314caa8e944d187") | ||
if result.run_id and int(result.run_id) > 0: | ||
self.run_id = result.run_id | ||
|
||
def status(self): | ||
if self.run_id: | ||
try: | ||
_, run_status = fedml.api.run_status(run_id=self.run_id, api_key="30d1bbcae9ec48ffa314caa8e944d187") | ||
return JobStatus.get_job_status_from_run_status(run_status) | ||
except Exception as e: | ||
logging.error(f"Error while getting status of run {self.run_id}: {e}") | ||
return JobStatus.UNDETERMINED | ||
|
||
def kill(self): | ||
if self.run_id: | ||
try: | ||
return fedml.api.run_stop(run_id=self.run_id, api_key="30d1bbcae9ec48ffa314caa8e944d187") | ||
except Exception as e: | ||
logging.error(f"Error while stopping run {self.run_id}: {e}") | ||
|
||
|
||
if __name__ == "__main__": | ||
job_1 = HelloWorldJob(name="hello_world") | ||
job_2 = HelloWorldJob(name="hello_world_dependent_on_job_1") | ||
workflow = Workflow(name="hello_world_workflow", loop=False) | ||
workflow.add_job(job_1) | ||
workflow.add_job(job_2, dependencies=[job_1]) | ||
workflow.run() | ||
|
||
job_1 = HelloWorldJob(name="hello_world") | ||
job_2 = HelloWorldJob(name="hello_world_dependent_on_job_1") | ||
workflow = Workflow(name="hello_world_workflow", loop=False) | ||
workflow.add_job(job_1) | ||
workflow.add_job(job_2, dependencies=[job_1]) | ||
workflow.run() | ||
print("graph", workflow.metadata.graph) | ||
print("nodes", workflow.metadata.nodes) | ||
print("topological_order", workflow.metadata.topological_order) | ||
print("loop", workflow.loop) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
from enum import Enum | ||
from abc import ABC, abstractmethod | ||
from fedml.api.constants import RunStatus | ||
|
||
|
||
# Define an enum for job status | ||
class JobStatus(Enum): | ||
|
||
""" | ||
Enum for job status | ||
""" | ||
PROVISIONING = "PROVISIONING" | ||
RUNNING = "RUNNING" | ||
FINISHED = "FINISHED" | ||
FAILED = "FAILED" | ||
UNDETERMINED = "UNDETERMINED" | ||
|
||
@classmethod | ||
def _create_run_status_to_job_status_mapping(cls): | ||
cls._run_status_to_job_status_mapping = { | ||
JobStatus.PROVISIONING: {RunStatus.NOT_STARTED, RunStatus.QUEUED, RunStatus.CLUSTER_QUEUE, | ||
RunStatus.PRE_QUEUE, RunStatus.PROVISIONING}, | ||
JobStatus.RUNNING: {RunStatus.STARTING, RunStatus.RUNNING, RunStatus.LAUNCHED}, | ||
JobStatus.FINISHED: {RunStatus.FINISHED}, | ||
JobStatus.FAILED: {RunStatus.STOPPING, RunStatus.KILLED, RunStatus.FAILED, RunStatus.ABANDONED, | ||
RunStatus.ERROR, RunStatus.BLOCKED, RunStatus.INVALID}, | ||
JobStatus.UNDETERMINED: {RunStatus.UNDETERMINED} | ||
} | ||
|
||
@classmethod | ||
def get_job_status_from_run_status(cls, run_status: RunStatus): | ||
if not hasattr(cls, "_run_status_to_job_status_mapping"): | ||
cls._create_run_status_to_job_status_mapping() | ||
for job_status, run_status_set in cls._run_status_to_job_status_mapping.items(): | ||
if run_status in run_status_set: | ||
return job_status | ||
return JobStatus.UNDETERMINED | ||
|
||
|
||
class Job(ABC): | ||
|
||
def __init__(self, name): | ||
""" | ||
Initialize the Job instance. | ||
Parameters: | ||
- name (str): Name for the job. This is used to identify the job in the workflow so it should be unique. | ||
""" | ||
self.name = name | ||
|
||
def __repr__(self): | ||
return "<{klass} @{id:x} {attrs}>".format( | ||
klass=self.__class__.__name__, | ||
id=id(self) & 0xFFFFFF, | ||
attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), | ||
) | ||
|
||
@abstractmethod | ||
def run(self): | ||
""" | ||
Abstract method to run the job. This method should contain the execution logic of the job. | ||
""" | ||
|
||
@abstractmethod | ||
def status(self) -> JobStatus: | ||
""" | ||
Abstract method to get the status of the job. | ||
Represents the status of the job, which should be of type JobStatus: Running, Success, or Failed. | ||
""" | ||
|
||
@abstractmethod | ||
def kill(self): | ||
""" | ||
Method to kill the job if running on remote server. | ||
""" |
Oops, something went wrong.