diff --git a/python/fedml/__init__.py b/python/fedml/__init__.py
index 2e95fedeeb..81e864b2ea 100644
--- a/python/fedml/__init__.py
+++ b/python/fedml/__init__.py
@@ -34,7 +34,7 @@
 _global_training_type = None
 _global_comm_backend = None
 
-__version__ = "0.8.18a11"
+__version__ = "0.8.18a13"
 
 
 # This is the deployment environment used for different roles (RD/PM/BD/Public Developers). Potential VALUE: local, dev, test, release
@@ -537,7 +537,7 @@ def _get_local_s3_like_service_url():
 from fedml import api
 
 from fedml import mlops
-from fedml.mlops import log, Artifact, log_artifact, log_model, log_metric, log_llm_record
+from fedml.mlops import log, Artifact, log_artifact, log_model, log_metric, log_llm_record, log_endpoint
 
 __all__ = [
     "FedMLRunner",
diff --git a/python/fedml/computing/scheduler/comm_utils/job_monitor.py b/python/fedml/computing/scheduler/comm_utils/job_monitor.py
index 178cd42828..2077b2509f 100644
--- a/python/fedml/computing/scheduler/comm_utils/job_monitor.py
+++ b/python/fedml/computing/scheduler/comm_utils/job_monitor.py
@@ -84,7 +84,8 @@ def monitor_slave_run_process_status(self):
                     if not self.released_runs.get(str(job.job_id), False):
                         self.released_runs[str(job.job_id)] = True
                         # Release the gpu ids
-                        print(f"[run/device][{job.job_id}/{job.edge_id}] Release gpu resource when run processes has exited on monioring slave runs periodically.")
+                        print(
+                            f"[run/device][{job.job_id}/{job.edge_id}] Release gpu resource when run processes has exited on monioring slave runs periodically.")
                         JobRunnerUtils.get_instance().release_gpu_ids(job.job_id, job.edge_id)
 
                 # Get the timeout threshold
@@ -137,7 +138,8 @@ def monitor_slave_run_process_status(self):
                         self.released_endpoints[str(job.job_id)] = True
 
                         # Release the gpu ids
-                        print(f"[endpoint/device][{job.job_id}/{job.edge_id}] Release gpu resource when monioring worker endpoint periodically.")
+                        print(
+                            f"[endpoint/device][{job.job_id}/{job.edge_id}] Release gpu resource when monioring worker endpoint periodically.")
                         JobRunnerUtils.get_instance().release_gpu_ids(job.job_id, job.edge_id)
 
         except Exception as e:
@@ -235,7 +237,8 @@ def monitor_slave_endpoint_status(self):
                 device_client_data_interface.FedMLClientDataInterface.get_instance().create_job_table()
             except Exception as e:
                 pass
-            FedMLModelDatabase.get_instance().set_database_base_dir(device_client_constants.ClientConstants.get_database_dir())
+            FedMLModelDatabase.get_instance().set_database_base_dir(
+                device_client_constants.ClientConstants.get_database_dir())
             job_list = device_client_data_interface.FedMLClientDataInterface.get_instance().get_jobs_from_db()
             agent_config = dict()
             agent_config["mqtt_config"] = self.mqtt_config
@@ -252,7 +255,8 @@ def monitor_slave_endpoint_status(self):
                             self.released_endpoints[str(job.job_id)] = True
 
                             # Release the gpu ids
-                            print(f"[endpoint/device][{job.job_id}/{job.edge_id}] Release gpu resource when woker endpoint failed on monitoring periodically.")
+                            print(
+                                f"[endpoint/device][{job.job_id}/{job.edge_id}] Release gpu resource when woker endpoint failed on monitoring periodically.")
                             JobRunnerUtils.get_instance().release_gpu_ids(job.job_id, job.edge_id)
 
                     elif job.status == device_client_constants.ClientConstants.MSG_MLOPS_CLIENT_STATUS_FINISHED:
@@ -295,7 +299,7 @@ def monitor_slave_endpoint_status(self):
                                         device_ids[0], job.edge_id, job.job_id, endpoint_name, model_name,
                                         model_id, model_version, inference_port=None,
                                         disable=True)
-                                    
+
                                     # [Critical] 
                                     # 1. After restart,
                                     # the "running" status of container does NOT mean the endpoint is ready.
@@ -304,18 +308,22 @@ def monitor_slave_endpoint_status(self):
                                     status = result_json.get("model_status", None)
 
                                     if status != device_server_constants.ServerConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_UPDATING:
-                                        started, inference_port = ContainerUtils.get_instance().restart_container(endpoint_container_name)
-                                        deployment_result["model_status"] = device_server_constants.ServerConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_UPDATING
-                                        
+                                        started, inference_port = ContainerUtils.get_instance().restart_container(
+                                            endpoint_container_name)
+                                        deployment_result[
+                                            "model_status"] = device_server_constants.ServerConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_UPDATING
+
                                         # Change the local port for next ready check
                                         endpoint_sync_protocol.set_local_deployment_status_result(
                                             job.job_id, endpoint_name, model_name, model_version, job.edge_id,
                                             inference_port, status_result, deployment_result)
                                 else:
-                                    started, inference_port = ContainerUtils.get_instance().start_container(endpoint_container_name)
-                            
+                                    started, inference_port = ContainerUtils.get_instance().start_container(
+                                        endpoint_container_name)
+
                             if is_endpoint_ready:
-                                deployment_result["model_status"] = device_server_constants.ServerConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_DEPLOYED
+                                deployment_result[
+                                    "model_status"] = device_server_constants.ServerConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_DEPLOYED
 
                                 # Send the inference info to the master agent
                                 # TODO: Consistency control
@@ -327,7 +335,7 @@ def monitor_slave_endpoint_status(self):
                                 endpoint_sync_protocol.set_local_deployment_status_result(
                                     job.job_id, endpoint_name, model_name, model_version, job.edge_id,
                                     inference_port, status_result, deployment_result)
-                                
+
                     elif job.status == device_client_constants.ClientConstants.MSG_MLOPS_CLIENT_STATUS_OFFLINE:
                         endpoint_json = json.loads(job.running_json)
                         model_config = endpoint_json.get("model_config", {})
@@ -357,7 +365,8 @@ def monitor_slave_endpoint_status(self):
 
                             for i in range(num_containers):
                                 endpoint_container_name = endpoint_container_name_prefix + f"__{i}"
-                                started, inference_port = ContainerUtils.get_instance().start_container(endpoint_container_name)
+                                started, inference_port = ContainerUtils.get_instance().start_container(
+                                    endpoint_container_name)
                                 if started and inference_port != 0:
                                     endpoint_sync_protocol.send_sync_inference_info(
                                         device_ids[0], job.edge_id, job.job_id, endpoint_name, model_name,
@@ -383,7 +392,8 @@ def monitor_slave_endpoint_status(self):
                                   f"{device_client_constants.ClientConstants.MSG_MLOPS_CLIENT_STATUS_FAILED}.")
 
                             mlops.log_training_status(
-                                device_client_constants.ClientConstants.MSG_MLOPS_CLIENT_STATUS_FAILED, run_id=job.job_id,
+                                device_client_constants.ClientConstants.MSG_MLOPS_CLIENT_STATUS_FAILED,
+                                run_id=job.job_id,
                                 edge_id=job.edge_id, is_from_model=True, enable_broadcast=True
                             )
 
@@ -391,7 +401,8 @@ def monitor_slave_endpoint_status(self):
                                 self.released_endpoints[str(job.job_id)] = True
 
                                 # Release the gpu ids
-                                print(f"[endpoint/device][{job.job_id}/{job.edge_id}] Release gpu resource when the worker endpoint runs timeout on monioring periodically.")
+                                print(
+                                    f"[endpoint/device][{job.job_id}/{job.edge_id}] Release gpu resource when the worker endpoint runs timeout on monioring periodically.")
                                 JobRunnerUtils.get_instance().release_gpu_ids(job.job_id, job.edge_id)
 
                                 # Get endpoint container name prefix
@@ -408,7 +419,8 @@ def monitor_slave_endpoint_status(self):
 
                                 print(f"[Worker][{job.job_id}:{job.edge_id}] Release gpu ids.")
                 except Exception as e:
-                    print(f"[Worker][{job.job_id}:{job.edge_id}] Exception when syncing endpoint process on the slave agent. {traceback.format_exc()}")
+                    print(
+                        f"[Worker][{job.job_id}:{job.edge_id}] Exception when syncing endpoint process on the slave agent. {traceback.format_exc()}")
         except Exception as e:
             print(f"[Worker] Exception when syncing endpoint process on the slave agent. {traceback.format_exc()}")
             pass
@@ -458,7 +470,8 @@ def _check_and_reset_endpoint_status(
                     self.released_endpoints[str(endpoint_id)] = True
 
                     # Release the gpu ids
-                    print(f"[endpoint/device][{endpoint_id}/{device_id}] Release gpu resource when the worker endpoint is not ready on monitoring periodically.")
+                    print(
+                        f"[endpoint/device][{endpoint_id}/{device_id}] Release gpu resource when the worker endpoint is not ready on monitoring periodically.")
                     JobRunnerUtils.get_instance().release_gpu_ids(endpoint_id, device_id)
 
                 return False
@@ -471,11 +484,11 @@ def is_inference_ready(self, inference_url, timeout=None, device_id=None, endpoi
         if response_ok:
             print("Use http health check.")
             return response_ok
-        
+
         if response_ok is None:
             # Internal server can response, but reply is not ready
             return False
-        
+
         # Cannot reach the server, will try other protocols
         print("Use http health check failed.")
 
@@ -549,7 +562,7 @@ def inference(
 
         return False, None
 
-    def _check_all_slave_endpoint_status(self, endpoint_id, endpoint_name, model_name, 
+    def _check_all_slave_endpoint_status(self, endpoint_id, endpoint_name, model_name,
                                          server_internal_port=ServerConstants.MODEL_INFERENCE_DEFAULT_PORT):
         # Get model deployment result
         is_endpoint_offline = True
@@ -610,7 +623,8 @@ def monitor_master_endpoint_status(self):
 
                 if endpoint_status == device_server_constants.ServerConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_FAILED:
                     # Release the gpu ids
-                    print(f"[endpoint/device][{job.job_id}/{job.edge_id}] Release gpu resource when the master endpoint failed on monitoring periodically.")
+                    print(
+                        f"[endpoint/device][{job.job_id}/{job.edge_id}] Release gpu resource when the master endpoint failed on monitoring periodically.")
                     JobRunnerUtils.get_instance().release_gpu_ids(job.job_id, job.edge_id)
                 elif endpoint_status == device_server_constants.ServerConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_DEPLOYED:
                     if model_name is None:
@@ -618,8 +632,8 @@ def monitor_master_endpoint_status(self):
                     try:
                         # If the endpoint is offline, then report offline status to the MLOps.
                         model_config_parameters = model_config.get("parameters", {})
-                        server_internal_port = model_config_parameters.get("server_internal_port", 
-                                                                    ServerConstants.MODEL_INFERENCE_DEFAULT_PORT)
+                        server_internal_port = model_config_parameters.get("server_internal_port",
+                                                                           ServerConstants.MODEL_INFERENCE_DEFAULT_PORT)
                         is_endpoint_online = self._check_all_slave_endpoint_status(job.job_id, endpoint_name,
                                                                                    model_name, server_internal_port)
                         if not is_endpoint_online:
@@ -664,8 +678,8 @@ def monitor_master_endpoint_status(self):
                 elif endpoint_status == device_server_constants.ServerConstants.MSG_MLOPS_SERVER_STATUS_OFFLINE:
                     # If the endpoint is offline, then report offline status to the MLOps.
                     model_config_parameters = model_config.get("parameters", {})
-                    server_internal_port = model_config_parameters.get("server_internal_port", 
-                                                                ServerConstants.MODEL_INFERENCE_DEFAULT_PORT)
+                    server_internal_port = model_config_parameters.get("server_internal_port",
+                                                                       ServerConstants.MODEL_INFERENCE_DEFAULT_PORT)
                     is_endpoint_online = self._check_all_slave_endpoint_status(
                         job.job_id, endpoint_name, model_name, server_internal_port)
                     if is_endpoint_online:
@@ -691,14 +705,23 @@ def monitor_endpoint_logs(self):
                 device_client_data_interface.FedMLClientDataInterface.get_instance().create_job_table()
             except Exception as e:
                 pass
+            number_of_finished_jobs = 0
             job_list = device_client_data_interface.FedMLClientDataInterface.get_instance().get_jobs_from_db()
             for job in job_list.job_list:
                 count += 1
                 if count >= 1000:
                     break
 
+                if job.status == device_client_constants.ClientConstants.MSG_MLOPS_CLIENT_STATUS_FAILED or \
+                        job.status == device_client_constants.ClientConstants.MSG_MLOPS_CLIENT_STATUS_KILLED:
+                    MLOpsRuntimeLogDaemon.get_instance(fedml_args).stop_log_processor(job.job_id, job.edge_id)
+                    continue
+
                 if job.status != device_client_constants.ClientConstants.MSG_MLOPS_CLIENT_STATUS_FINISHED:
                     continue
+                number_of_finished_jobs += 1
+                if number_of_finished_jobs >= 100:
+                    break
 
                 endpoint_json = json.loads(job.running_json) if job.running_json is not None else {}
                 model_config = endpoint_json.get("model_config", {})
@@ -711,14 +734,6 @@ def monitor_endpoint_logs(self):
                     job.job_id, job.edge_id, device_server_constants.ServerConstants.get_log_file_dir(), is_server=True,
                     log_file_prefix=JobMonitor.ENDPOINT_CONTAINER_LOG_PREFIX,
                 )
-                if not MLOpsRuntimeLogDaemon.get_instance(fedml_args).is_log_processor_running(job.job_id, job.edge_id):
-                    setattr(fedml_args, "log_file_dir", os.path.dirname(log_file_path))
-                    MLOpsRuntimeLogDaemon.get_instance(fedml_args).log_file_dir = os.path.dirname(log_file_path)
-                    MLOpsRuntimeLogDaemon.get_instance(fedml_args).start_log_processor(
-                        job.job_id, job.edge_id,
-                        log_source=device_client_constants.ClientConstants.FEDML_LOG_SOURCE_TYPE_MODEL_END_POINT,
-                        log_file_prefix=JobMonitor.ENDPOINT_CONTAINER_LOG_PREFIX
-                    )
 
                 # Get endpoint container name
                 endpoint_container_name_prefix = device_client_constants.ClientConstants.get_endpoint_container_name(
@@ -728,12 +743,14 @@ def monitor_endpoint_logs(self):
                 num_containers = ContainerUtils.get_instance().get_container_rank_same_model(
                     endpoint_container_name_prefix)
 
+                is_job_container_running = False
                 for i in range(num_containers):
                     endpoint_container_name = endpoint_container_name_prefix + f"__{i}"
 
                     endpoint_logs = ContainerUtils.get_instance().get_container_logs(endpoint_container_name)
                     if endpoint_logs is None:
                         continue
+                    is_job_container_running = True
 
                     # Write container logs to the log file
                     if i == 0:
@@ -743,5 +760,15 @@ def monitor_endpoint_logs(self):
                         with open(log_file_path, "a") as f:
                             f.write(endpoint_logs)
 
+                if is_job_container_running and not MLOpsRuntimeLogDaemon.get_instance(fedml_args). \
+                        is_log_processor_running(job.job_id, job.edge_id):
+                    setattr(fedml_args, "log_file_dir", os.path.dirname(log_file_path))
+                    MLOpsRuntimeLogDaemon.get_instance(fedml_args).log_file_dir = os.path.dirname(log_file_path)
+                    MLOpsRuntimeLogDaemon.get_instance(fedml_args).start_log_processor(
+                        job.job_id, job.edge_id,
+                        log_source=device_client_constants.ClientConstants.FEDML_LOG_SOURCE_TYPE_MODEL_END_POINT,
+                        log_file_prefix=JobMonitor.ENDPOINT_CONTAINER_LOG_PREFIX
+                    )
+
         except Exception as e:
             print(f"Exception when syncing endpoint log to MLOps {traceback.format_exc()}.")
diff --git a/python/fedml/computing/scheduler/comm_utils/run_process_utils.py b/python/fedml/computing/scheduler/comm_utils/run_process_utils.py
index 33d5202589..7358c5bfb8 100644
--- a/python/fedml/computing/scheduler/comm_utils/run_process_utils.py
+++ b/python/fedml/computing/scheduler/comm_utils/run_process_utils.py
@@ -186,3 +186,17 @@ def get_pid_from_cmd_line(cmd_line, break_on_first=True):
                 pass
 
         return ret_pids
+
+    @staticmethod
+    def is_process_running(process_id):
+        is_running = False
+        try:
+            process = psutil.Process(process_id)
+            if process.status() == psutil.STATUS_RUNNING or \
+                    process.status() == psutil.STATUS_SLEEPING or \
+                    process.status() == psutil.STATUS_IDLE:
+                is_running = True
+        except Exception as e:
+            pass
+
+        return is_running
diff --git a/python/fedml/computing/scheduler/comm_utils/sys_utils.py b/python/fedml/computing/scheduler/comm_utils/sys_utils.py
index f238956ba2..8bdc6f8141 100644
--- a/python/fedml/computing/scheduler/comm_utils/sys_utils.py
+++ b/python/fedml/computing/scheduler/comm_utils/sys_utils.py
@@ -35,6 +35,7 @@
                     "143": "Command terminated with signal 15 (SIGTERM) (kill command)."}
 
 enable_simulation_gpu = False
+simulation_gpu_count = 1
 
 
 def get_sys_runner_info():
@@ -112,7 +113,7 @@ def get_sys_runner_info():
         pass
 
     if enable_simulation_gpu:
-        gpu_count = 8
+        gpu_count = simulation_gpu_count
         gpu_total_mem = "80G"
         gpu_available_mem = "80G"
         gpu_vendor = "NVIDIA"
@@ -156,7 +157,7 @@ def get_gpu_list():
              'memoryUsed': 7.0, 'memoryFree': 81042.0, 'driver': '535.54.03', 'gpu_name': 'NVIDIA A100-SXM4-80GB',
              'serial': '1320723000504', 'display_mode': 'Enabled', 'display_active': 'Disabled', 'temperature': 33.0}]
 
-        return ret_gpu_list
+        return ret_gpu_list[0:simulation_gpu_count]
 
     gpu_list = GPUtil.getGPUs()
     ret_gpu_list = list()
diff --git a/python/fedml/computing/scheduler/model_scheduler/device_client_runner.py b/python/fedml/computing/scheduler/model_scheduler/device_client_runner.py
index 90e8c79be0..e64222313f 100755
--- a/python/fedml/computing/scheduler/model_scheduler/device_client_runner.py
+++ b/python/fedml/computing/scheduler/model_scheduler/device_client_runner.py
@@ -279,12 +279,12 @@ def run(self, process_event, completed_event):
                 run_id, self.edge_id, ClientConstants.MSG_MLOPS_CLIENT_STATUS_FAILED)
             logging.info(f"[endpoint/device][{run_id}/{self.edge_id}] Release gpu resource when the worker deployment occurred exceptions.")
             self.release_gpu_ids(run_id)
-            MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(self.run_id, self.edge_id)
+            MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(run_id, self.edge_id)
             time.sleep(2)
             sys.exit(1)
         finally:
             logging.info("Release resources.")
-            MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(self.run_id, self.edge_id)
+            MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(run_id, self.edge_id)
             if self.mlops_metrics is not None:
                 self.mlops_metrics.stop_sys_perf()
             time.sleep(3)
diff --git a/python/fedml/computing/scheduler/model_scheduler/device_server_runner.py b/python/fedml/computing/scheduler/model_scheduler/device_server_runner.py
index 783ffab0c6..ff1ce82b4a 100755
--- a/python/fedml/computing/scheduler/model_scheduler/device_server_runner.py
+++ b/python/fedml/computing/scheduler/model_scheduler/device_server_runner.py
@@ -210,6 +210,8 @@ def run(self, process_event, completed_event):
 
         self.run_process_event = process_event
         self.run_process_completed_event = completed_event
+        run_id = self.request_json.get("end_point_id")
+
         try:
             MLOpsUtils.set_ntp_offset(self.ntp_offset)
 
@@ -230,14 +232,14 @@ def run(self, process_event, completed_event):
             self.mlops_metrics.report_server_training_status(
                 self.run_id, ServerConstants.MSG_MLOPS_SERVER_STATUS_FAILED,
                 is_from_model=True, edge_id=self.edge_id)
-            MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(self.run_id, self.edge_id)
+            MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(run_id, self.edge_id)
             if self.mlops_metrics is not None:
                 self.mlops_metrics.stop_sys_perf()
             time.sleep(3)
             sys.exit(1)
         finally:
             logging.info("Release resources.")
-            MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(self.run_id, self.edge_id)
+            MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(run_id, self.edge_id)
             if self.mlops_metrics is not None:
                 self.mlops_metrics.stop_sys_perf()
             time.sleep(3)
diff --git a/python/fedml/computing/scheduler/slave/client_runner.py b/python/fedml/computing/scheduler/slave/client_runner.py
index 3821c6874a..80d0f86949 100755
--- a/python/fedml/computing/scheduler/slave/client_runner.py
+++ b/python/fedml/computing/scheduler/slave/client_runner.py
@@ -1648,6 +1648,8 @@ def setup_agent_mqtt_connection(self, service_config):
 
         JobCleanup.get_instance().sync_data_on_startup(self.edge_id)
 
+        os.environ["FEDML_DEPLOY_MASTER_ID"] = str(self.model_device_server.get_edge_id())
+        os.environ["FEDML_DEPLOY_WORKER_IDS"] = str([client.get_edge_id() for client in self.model_device_client_list])
         self.mlops_metrics.stop_device_realtime_perf()
         self.mlops_metrics.report_device_realtime_perf(self.args, service_config["mqtt_config"])
 
diff --git a/python/fedml/core/mlops/mlops_device_perfs.py b/python/fedml/core/mlops/mlops_device_perfs.py
index 7d4f58bf87..e02203ff6f 100644
--- a/python/fedml/core/mlops/mlops_device_perfs.py
+++ b/python/fedml/core/mlops/mlops_device_perfs.py
@@ -168,6 +168,8 @@ def report_gpu_device_info(edge_id, mqtt_mgr=None):
         topic_name = "ml_client/mlops/gpu_device_info"
         device_info_json = {
             "edgeId": edge_id,
+            "deploy_master_id": os.environ.get("FEDML_DEPLOY_MASTER_ID", ""),
+            "deploy_worker_ids": os.environ.get("FEDML_DEPLOY_WORKER_IDS", "[]"),
             "memoryTotal": round(total_mem * MLOpsUtils.BYTES_TO_GB, 2),
             "memoryAvailable": round(free_mem * MLOpsUtils.BYTES_TO_GB, 2),
             "diskSpaceTotal": round(total_disk_size * MLOpsUtils.BYTES_TO_GB, 2),
diff --git a/python/fedml/core/mlops/mlops_runtime_log_daemon.py b/python/fedml/core/mlops/mlops_runtime_log_daemon.py
index b131ecef16..3fecdb834b 100644
--- a/python/fedml/core/mlops/mlops_runtime_log_daemon.py
+++ b/python/fedml/core/mlops/mlops_runtime_log_daemon.py
@@ -10,6 +10,7 @@
 import requests
 import yaml
 
+from fedml.computing.scheduler.comm_utils.run_process_utils import RunProcessUtils
 from ...core.mlops.mlops_configs import MLOpsConfigs
 import fedml
 
@@ -478,7 +479,8 @@ def stop_all_log_processor(self):
 
     def is_log_processor_running(self, in_run_id, in_device_id):
         for (log_child_process, log_run_id, log_device_id) in self.log_child_process_list:
-            if str(in_run_id) == str(log_run_id) and str(in_device_id) == str(log_device_id):
+            if str(in_run_id) == str(log_run_id) and str(in_device_id) == str(log_device_id) and \
+                    log_child_process is not None and RunProcessUtils.is_process_running(log_child_process.pid):
                 return True
 
         return False
diff --git a/python/fedml/workflow/driver_example/hello_world_job.yaml b/python/fedml/workflow/driver_example/hello_world_job.yaml
index ba9a2d0384..046fb2b0e2 100755
--- a/python/fedml/workflow/driver_example/hello_world_job.yaml
+++ b/python/fedml/workflow/driver_example/hello_world_job.yaml
@@ -24,6 +24,6 @@ bootstrap: |
   echo "Bootstrap finished."
 
 computing:
-  resource_type: H100-80GB-HBM3    # e.g., A100-80G, please check the resource type list by "fedml show-resource-type" or visiting URL: https://open.fedml.ai/accelerator_resource_type
+  resource_type: A100-80GB-SXM    # e.g., A100-80G, please check the resource type list by "fedml show-resource-type" or visiting URL: https://open.fedml.ai/accelerator_resource_type
   minimum_num_gpus: 1             # minimum # of GPUs to provision
-  maximum_cost_per_hour: $0.5    # max cost per hour of all machines for your job
+  maximum_cost_per_hour: $10    # max cost per hour of all machines for your job
diff --git a/python/fedml/workflow/driver_example/main.py b/python/fedml/workflow/driver_example/main.py
index d432345558..eafcb61117 100644
--- a/python/fedml/workflow/driver_example/main.py
+++ b/python/fedml/workflow/driver_example/main.py
@@ -5,6 +5,13 @@
 from fedml.workflow.jobs import Job, JobStatus
 from fedml.workflow.workflow import Workflow
 
+# CURRENT_CONFIG_VERSION = "release"
+# MY_API_KEY = "10e87dd6d6574311a80200455e4d9b30"
+CURRENT_CONFIG_VERSION = "local"
+CURRENT_ON_PREM_LOCAL_HOST = "localhost"
+CURRENT_ON_PREM_LOCAL_PORT = 18080
+MY_API_KEY = "1316b93c82da40ce90113a2ed12f0b14"
+
 
 class HelloWorldJob(Job):
     def __init__(self, name):
@@ -12,17 +19,20 @@ def __init__(self, name):
         self.run_id = None
 
     def run(self):
-        fedml.set_env_version("test")
+        fedml.set_env_version(CURRENT_CONFIG_VERSION)
+        fedml.set_local_on_premise_platform_host(CURRENT_ON_PREM_LOCAL_HOST)
+        fedml.set_local_on_premise_platform_port(CURRENT_ON_PREM_LOCAL_PORT)
+
         working_directory = os.path.dirname(os.path.abspath(__file__))
         absolute_path = os.path.join(working_directory, "hello_world_job.yaml")
-        result = fedml.api.launch_job(yaml_file=absolute_path, api_key="30d1bbcae9ec48ffa314caa8e944d187")
+        result = fedml.api.launch_job(yaml_file=absolute_path, api_key=MY_API_KEY)
         if result.run_id and int(result.run_id) > 0:
             self.run_id = result.run_id
 
     def status(self):
         if self.run_id:
             try:
-                _, run_status = fedml.api.run_status(run_id=self.run_id, api_key="30d1bbcae9ec48ffa314caa8e944d187")
+                _, run_status = fedml.api.run_status(run_id=self.run_id, api_key=MY_API_KEY)
                 return JobStatus.get_job_status_from_run_status(run_status)
             except Exception as e:
                 logging.error(f"Error while getting status of run {self.run_id}: {e}")
@@ -31,7 +41,7 @@ def status(self):
     def kill(self):
         if self.run_id:
             try:
-                return fedml.api.run_stop(run_id=self.run_id, api_key="30d1bbcae9ec48ffa314caa8e944d187")
+                return fedml.api.run_stop(run_id=self.run_id, api_key=MY_API_KEY)
             except Exception as e:
                 logging.error(f"Error while stopping run {self.run_id}: {e}")
 
diff --git a/python/setup.py b/python/setup.py
index 578c6e6067..05a0ba4c50 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -114,7 +114,7 @@ def finalize_options(self):
 
 setup(
     name="fedml",
-    version="0.8.18a11",
+    version="0.8.18a13",
     author="FedML Team",
     author_email="ch@fedml.ai",
     description="A research and production integrated edge-cloud library for "