[Feature] Add new system model (#336)

alibaba · Aug 20, 2022 · 34dba76 · 34dba76
1 parent 4a43191
commit 34dba76
Show file tree

Hide file tree

Showing 8 changed files with 117 additions and 43 deletions.
diff --git a/benchmark/FedHPOB/README.md b/benchmark/FedHPOB/README.md
@@ -12,8 +12,8 @@ We highly recommend running FedHPO-B with conda.
 
   ```bash
   git clone https://github.com/alibaba/FederatedScope.git
-  git checkout a653102c6b8d5421d2874077594df0b2e29401a1
   cd FederatedScope
+  git checkout a653102c6b8d5421d2874077594df0b2e29401a1
   pip install -e .
   ```
 

diff --git a/benchmark/FedHPOB/example.py b/benchmark/FedHPOB/example.py
@@ -0,0 +1,17 @@
+from fedhpob.config import fhb_cfg
+from fedhpob.benchmarks import TabularBenchmark
+
+benchmark = TabularBenchmark('cnn', 'femnist', 'avg')
+
+# get hyperparameters space
+config_space = benchmark.get_configuration_space(CS=True)
+
+# get fidelity space
+fidelity_space = benchmark.get_fidelity_space(CS=True)
+
+# get results
+res = benchmark(config_space.sample_configuration(),
+                fidelity_space.sample_configuration(),
+                fhb_cfg=fhb_cfg,
+                seed=12345)
+print(res)
diff --git a/benchmark/FedHPOB/fedhpob/benchmarks/base_benchmark.py b/benchmark/FedHPOB/fedhpob/benchmarks/base_benchmark.py
@@ -1,14 +1,17 @@
 import abc
 import os
+import pickle
+import datetime
 import numpy as np
 from federatedscope.core.configs.config import global_cfg
 from federatedscope.core.auxiliaries.data_builder import get_data
+from fedhpob.utils.tabular_dataloader import load_data
 from fedhpob.utils.util import disable_fs_logger
 from fedhpob.utils.cost_model import get_cost_model
 
 
 class BaseBenchmark(abc.ABC):
-    def __init__(self, model, dname, algo, rng=None, **kwargs):
+    def __init__(self, model, dname, algo, cost_mode, rng=None, **kwargs):
         """
 
         :param rng:
@@ -20,7 +23,8 @@ def __init__(self, model, dname, algo, rng=None, **kwargs):
             self.rng = np.random.RandomState()
         self.configuration_space = self.get_configuration_space()
         self.fidelity_space = self.get_fidelity_space()
-
+        self.model, self.dname, self.algo, self.cost_mode = model, dname, \
+            algo, cost_mode
         # Load data and modify cfg of FS.
         self.cfg = global_cfg.clone()
         self.cfg.set_new_allowed(True)
@@ -29,6 +33,12 @@ def __init__(self, model, dname, algo, rng=None, **kwargs):
         self.cfg.data.type = dname
         self.data, modified_cfg = get_data(config=self.cfg.clone())
         self.cfg.merge_from_other_cfg(modified_cfg)
+        # Try load time data
+        try:
+            datadir = os.path.join('data', 'tabular_data')
+            self.table, _ = load_data(datadir, model, dname, algo)
+        except:
+            self.table = None
         disable_fs_logger(self.cfg, True)
 
     def __call__(self, configuration, fidelity, seed=1, **kwargs):
@@ -40,7 +50,44 @@ def __call__(self, configuration, fidelity, seed=1, **kwargs):
     def _check(self, configuration, fidelity):
         pass
 
+    def _search(self, configuration, fidelity):
+        # For configuration
+        mask = np.array([True] * self.table.shape[0])
+        for col in configuration.keys():
+            mask *= (self.table[col].values == configuration[col])
+        idx = np.where(mask)
+        result = self.table.iloc[idx]
+
+        # For fidelity
+        mask = np.array([True] * result.shape[0])
+        for col in fidelity.keys():
+            if col == 'round':
+                continue
+            mask *= (result[col].values == fidelity[col])
+        idx = np.where(mask)
+        result = result.iloc[idx]["result"]
+        return result
+
+    def get_lamba_from_df(self, configuration, fidelity):
+        if self.table is not None:
+            client_num = self.cfg.federate.client_num * \
+                       self.cfg.federate.sample_client_rate
+            result = self._search({'seed': 0, **configuration}, fidelity)
+            index = list(result.keys())
+            filterd_result = eval(result[index[0]])
+            c = np.mean(filterd_result['train_time']) + np.mean(
+                filterd_result['eval_time'])
+            return c.total_seconds() / float(client_num)
+        else:
+            from fedhpob.config import fhb_cfg
+            return fhb_cfg.cost.c
+
     def _cost(self, configuration, fidelity, **kwargs):
+        try:
+            kwargs['const'] = self.get_lamba_from_df(configuration, fidelity)
+        except:
+            from fedhpob.config import fhb_cfg
+            kwargs['const'] = fhb_cfg.cost.c
         cost_model = get_cost_model(mode=self.cost_mode)
         t = cost_model(self.cfg, configuration, fidelity, self.data, **kwargs)
         return t

diff --git a/benchmark/FedHPOB/fedhpob/benchmarks/raw_benchmark.py b/benchmark/FedHPOB/fedhpob/benchmarks/raw_benchmark.py
@@ -18,10 +18,9 @@ def __init__(self,
                  rng=None,
                  cost_mode='estimated',
                  **kwargs):
-        self.model, self.dname, self.algo, self.cost_mode = model, dname, \
-                                                            algo, cost_mode
+        super(RawBenchmark, self).__init__(model, dname, algo, cost_mode, rng,
+                                           **kwargs)
         self.device = kwargs['device']
-        super(RawBenchmark, self).__init__(model, dname, algo, rng, **kwargs)
 
     def _run_fl(self, configuration, fidelity, key='val_avg_loss', seed=1):
         init_cfg = self.cfg.clone()

diff --git a/benchmark/FedHPOB/fedhpob/benchmarks/surrogate_benchmark.py b/benchmark/FedHPOB/fedhpob/benchmarks/surrogate_benchmark.py
@@ -25,8 +25,8 @@ def __init__(self,
         else:
             self.surrogate_models, self.meta_info, self.X, self.Y = \
                 load_surrogate_model(modeldir, model, dname, algo)
-        super(SurrogateBenchmark, self).__init__(model, dname, algo, rng,
-                                                 **kwargs)
+        super(SurrogateBenchmark, self).__init__(model, dname, algo, cost_mode,
+                                                 rng, **kwargs)
 
     def _check(self, configuration, fidelity):
         for key in configuration:

diff --git a/benchmark/FedHPOB/fedhpob/benchmarks/tabular_benchmark.py b/benchmark/FedHPOB/fedhpob/benchmarks/tabular_benchmark.py
@@ -15,12 +15,10 @@ def __init__(self,
                  rng=None,
                  cost_mode='estimated',
                  **kwargs):
-        self.model, self.dname, self.algo, self.cost_mode = model, dname, \
-                                                            algo, cost_mode
         self.table, self.meta_info = load_data(datadir, model, dname, algo)
         self.eval_freq = self.meta_info['eval_freq']
-        super(TabularBenchmark, self).__init__(model, dname, algo, rng,
-                                               **kwargs)
+        super(TabularBenchmark, self).__init__(model, dname, algo, cost_mode,
+                                               rng, **kwargs)
 
     def _check(self, configuration, fidelity):
         for key, value in configuration.items():
@@ -31,24 +29,6 @@ def _check(self, configuration, fidelity):
             assert value in self.fidelity_space[
                 key], 'fidelity invalid, check `fidelity_space` for help.'
 
-    def _search(self, configuration, fidelity):
-        # For configuration
-        mask = np.array([True] * self.table.shape[0])
-        for col in configuration.keys():
-            mask *= (self.table[col].values == configuration[col])
-        idx = np.where(mask)
-        result = self.table.iloc[idx]
-
-        # For fidelity
-        mask = np.array([True] * result.shape[0])
-        for col in fidelity.keys():
-            if col == 'round':
-                continue
-            mask *= (result[col].values == fidelity[col])
-        idx = np.where(mask)
-        result = result.iloc[idx]["result"]
-        return result
-
     def objective_function(self,
                            configuration,
                            fidelity,

diff --git a/benchmark/FedHPOB/fedhpob/config.py b/benchmark/FedHPOB/fedhpob/config.py
@@ -360,6 +360,7 @@ def initial_cfg(cfg):
     cfg.cost.c = 1  # lambda for exponential distribution, time consumed in
     # client
     cfg.cost.time_server = 0  # time consumed in server, `0` for real time
+    cfg.cost.lag_const = 65535  # Max port number
 
     # bandwidth for estimated cost
     cfg.cost.bandwidth = CN()

diff --git a/benchmark/FedHPOB/fedhpob/utils/cost_model.py b/benchmark/FedHPOB/fedhpob/utils/cost_model.py
@@ -1,3 +1,4 @@
+import math
 from federatedscope.core.auxiliaries.model_builder import get_model
 
 
@@ -40,12 +41,13 @@ def get_cost_model(mode='estimated'):
     """
     cost_dict = {
         'raw': raw_cost,
-        'estimated': estimated_cost,
+        'estimated': cs_cost,
+        'cross_deivce': cd_cost
     }
     return cost_dict[mode]
 
 
-def communication_cost(cfg, model_size, fhb_cfg):
+def communication_csilo_cost(cfg, model_size, fhb_cfg):
     t_up = model_size / fhb_cfg.cost.bandwidth.client_up
     t_down = max(
         cfg.federate.client_num * cfg.federate.sample_client_rate *
@@ -54,7 +56,13 @@ def communication_cost(cfg, model_size, fhb_cfg):
     return t_up + t_down
 
 
-def computation_cost(cfg, fhb_cfg):
+def communication_cdevice_cost(cfg, model_size, fhb_cfg):
+    t_up = model_size / fhb_cfg.cost.bandwidth.client_up
+    t_down = model_size / fhb_cfg.cost.bandwidth.client_down
+    return t_up + t_down
+
+
+def computation_cost(cfg, fhb_cfg, const):
     """
     Assume the time is exponential distribution with c,
     return the expected maximum of M iid random variables plus server time.
@@ -63,26 +71,48 @@ def computation_cost(cfg, fhb_cfg):
         1.0 / i for i in range(
             1,
             int(cfg.federate.client_num * cfg.federate.sample_client_rate) + 1)
-    ]) * fhb_cfg.cost.c
+    ]) * const
     return t_client + fhb_cfg.cost.time_server
 
 
 def raw_cost(**kwargs):
     return None
 
 
-def estimated_cost(cfg, configuration, fidelity, data, **kwargs):
+def get_info(cfg, configuration, fidelity, data):
+    cfg = merge_cfg(cfg, configuration, fidelity)
+    model = get_model(cfg.model, list(data.values())[0])
+    model_size = sum([param.nelement() for param in model.parameters()])
+    return cfg, model_size
+
+
+def cs_cost(cfg, configuration, fidelity, data, **kwargs):
     """
-    Works on raw, tabular and surrogate mode.
+        Works on raw, tabular and surrogate mode, cross-silo
     """
-    def get_info(cfg, configuration, fidelity, data):
-        cfg = merge_cfg(cfg, configuration, fidelity)
-        model = get_model(cfg.model, list(data.values())[0])
-        model_size = sum([param.nelement() for param in model.parameters()])
-        return cfg, model_size
+    cfg, num_param = get_info(cfg, configuration, fidelity, data)
+    t_comm = communication_csilo_cost(cfg, num_param, kwargs['fhb_cfg'])
+    t_comp = computation_cost(cfg, kwargs['fhb_cfg'], kwargs['const'])
+    t_round = t_comm + t_comp
+    return t_round * cfg.federate.total_round_num
+
 
+def cd_cost(cfg, configuration, fidelity, data, **kwargs):
+    """
+        Works on raw, tabular and surrogate mode, cross-device
+    """
     cfg, num_param = get_info(cfg, configuration, fidelity, data)
-    t_comm = communication_cost(cfg, num_param, kwargs['fhb_cfg'])
-    t_comp = computation_cost(cfg, kwargs['fhb_cfg'])
+    t_comm = communication_cdevice_cost(cfg, num_param, kwargs['fhb_cfg']) +\
+        latency(cfg, kwargs['fhb_cfg'])
+    t_comp = computation_cost(cfg, kwargs['fhb_cfg'], kwargs['const'])
     t_round = t_comm + t_comp
     return t_round * cfg.federate.total_round_num
+
+
+def latency(fs_cfg, fhb_cfg):
+    """
+        A fixed latency for every communication.
+        From: Wang et al. “A field guide to federated optimization”.
+    """
+    act = fs_cfg.federate.client_num * fs_cfg.federate.sample_client_rate
+    return math.exp(act - fhb_cfg.cost.lag_const)