From e9b3a8c45fc232aebbb7c076cdde8e236c87a96a Mon Sep 17 00:00:00 2001 From: Weirui Kuang <39145382+rayrayraykk@users.noreply.github.com> Date: Tue, 6 Sep 2022 19:56:45 +0800 Subject: [PATCH 01/39] Refactor data-related interfaces --- ...xtra_dependencies_torch1.10-application.sh | 3 + .../attack/trainer/benign_trainer.py | 7 - federatedscope/contrib/data/example.py | 7 +- .../core/auxiliaries/data_builder.py | 137 ++++------------- federatedscope/core/auxiliaries/utils.py | 57 +++++++ federatedscope/core/fed_runner.py | 45 ++---- federatedscope/core/interface/__init__.py | 0 federatedscope/core/interface/base_data.py | 141 ++++++++++++++++++ federatedscope/core/monitors/monitor.py | 3 +- federatedscope/core/trainers/context.py | 4 +- federatedscope/cv/dataloader/dataloader.py | 40 +++-- .../gfl/dataloader/dataloader_graph.py | 47 +++--- .../gfl/dataloader/dataloader_link.py | 17 ++- .../gfl/dataloader/dataloader_node.py | 17 ++- federatedscope/gfl/dataset/cikm_cup.py | 21 ++- federatedscope/hpo.py | 2 +- federatedscope/main.py | 9 +- federatedscope/mf/dataloader/dataloader.py | 26 ++-- federatedscope/nlp/dataloader/dataloader.py | 40 +++-- .../tabular/dataloader/quadratic.py | 20 ++- federatedscope/translator/__init__.py | 3 + federatedscope/translator/data_translator.py | 83 +++++++++++ .../vertical_fl/dataloader/dataloader.py | 4 +- 23 files changed, 470 insertions(+), 263 deletions(-) create mode 100644 federatedscope/core/interface/__init__.py create mode 100644 federatedscope/core/interface/base_data.py create mode 100644 federatedscope/translator/__init__.py create mode 100644 federatedscope/translator/data_translator.py diff --git a/environment/extra_dependencies_torch1.10-application.sh b/environment/extra_dependencies_torch1.10-application.sh index eaffc5844..4657afe7e 100644 --- a/environment/extra_dependencies_torch1.10-application.sh +++ b/environment/extra_dependencies_torch1.10-application.sh @@ -9,3 +9,6 @@ conda install -y nltk conda install -y sentencepiece textgrid typeguard -c conda-forge conda install -y transformers==4.16.2 tokenizers==0.10.3 datasets -c huggingface -c conda-forge conda install -y torchtext -c pytorch + +# Tabular +conda install -y openml==0.12.2 diff --git a/federatedscope/attack/trainer/benign_trainer.py b/federatedscope/attack/trainer/benign_trainer.py index 800c93ed2..e6bd30c24 100644 --- a/federatedscope/attack/trainer/benign_trainer.py +++ b/federatedscope/attack/trainer/benign_trainer.py @@ -1,15 +1,8 @@ -from calendar import c import logging from typing import Type -import torch import numpy as np from federatedscope.core.trainers import GeneralTorchTrainer -from federatedscope.core.auxiliaries.transform_builder import get_transform -from federatedscope.attack.auxiliary.backdoor_utils import normalize -from federatedscope.core.auxiliaries.dataloader_builder import WrapDataset -from federatedscope.core.auxiliaries.dataloader_builder import get_dataloader -from federatedscope.core.auxiliaries.ReIterator import ReIterator logger = logging.getLogger(__name__) diff --git a/federatedscope/contrib/data/example.py b/federatedscope/contrib/data/example.py index ef800f584..da3e9c1cd 100644 --- a/federatedscope/contrib/data/example.py +++ b/federatedscope/contrib/data/example.py @@ -1,7 +1,7 @@ from federatedscope.register import register_data -def MyData(config): +def MyData(config, client_cfgs): r""" Returns: data: @@ -17,12 +17,13 @@ def MyData(config): """ data = None config = config + client_cfgs = client_cfgs return data, config -def call_my_data(config): +def call_my_data(config, client_cfgs): if config.data.type == "mydata": - data, modified_config = MyData(config) + data, modified_config = MyData(config, client_cfgs) return data, modified_config diff --git a/federatedscope/core/auxiliaries/data_builder.py b/federatedscope/core/auxiliaries/data_builder.py index 6f41f1452..00f57f6da 100644 --- a/federatedscope/core/auxiliaries/data_builder.py +++ b/federatedscope/core/auxiliaries/data_builder.py @@ -4,9 +4,10 @@ from random import shuffle import numpy as np -from collections import defaultdict from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.interface.base_data import StandaloneDataDict +from federatedscope.translator import BaseDataTranslator import federatedscope.register as register @@ -20,8 +21,7 @@ f'available.') -def load_toy_data(config=None): - +def load_toy_data(config=None, client_cfgs=None): generate = config.federate.mode.lower() == 'standalone' def _generate_data(client_num=5, @@ -131,10 +131,10 @@ def _generate_data(client_num=5, for k, v in data[key].items() } if data[key] is not None else None - return data, config + return StandaloneDataDict(data, config), config -def load_external_data(config=None): +def load_external_data(config=None, client_cfgs=None): r""" Based on the configuration file, this function imports external datasets and applies train/valid/test splits and split by some specific `splitter` into the standard FederatedScope input data format. @@ -161,7 +161,6 @@ def load_external_data(config=None): import inspect from importlib import import_module from torch.utils.data import DataLoader - from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.auxiliaries.transform_builder import get_transform def get_func_args(func): @@ -401,13 +400,11 @@ def load_torchtext_data(name, splits=None, config=None): return data_dict def load_torchaudio_data(name, splits=None, config=None): - import torchaudio # dataset_func = getattr(import_module('torchaudio.datasets'), name) raise NotImplementedError def load_torch_geometric_data(name, splits=None, config=None): - import torch_geometric # dataset_func = getattr(import_module('torch_geometric.datasets'), # name) @@ -566,44 +563,16 @@ def load_openml_data(tid, splits=None, config=None): name, package = modified_config.data.type.split('@') dataset = DATA_LOAD_FUNCS[package.lower()](name, splits, modified_config) - splitter = get_splitter(modified_config) - - data_local_dict = { - x: {} - for x in range(1, modified_config.federate.client_num + 1) - } - - # Build dict of Dataloader - train_label_distribution = None - for split in dataset: - if dataset[split] is None: - continue - train_labels = list() - for i, ds in enumerate( - splitter(dataset[split], prior=train_label_distribution)): - labels = [x[1] for x in ds] - if split == 'train': - train_labels.append(labels) - data_local_dict[i + 1][split] = DataLoader( - ds, - batch_size=modified_config.data.batch_size, - shuffle=True, - num_workers=modified_config.data.num_workers) - else: - data_local_dict[i + 1][split] = DataLoader( - ds, - batch_size=modified_config.data.batch_size, - shuffle=False, - num_workers=modified_config.data.num_workers) + dataset = (dataset.get('train'), dataset.get('val'), dataset.get('test')) - if modified_config.data.consistent_label_distribution and len( - train_labels) > 0: - train_label_distribution = train_labels + # Translate dataset to `StandaloneDataDict` + datadict = BaseDataTranslator(dataset, modified_config, DataLoader, + client_cfgs) - return data_local_dict, modified_config + return datadict, modified_config -def get_data(config): +def get_data(config, client_cfgs=None): """Instantiate the dataset and update the configuration accordingly if necessary. Arguments: @@ -616,22 +585,22 @@ def get_data(config): # will restore the user-specified on after the generation setup_seed(12345) for func in register.data_dict.values(): - data_and_config = func(config) + data_and_config = func(config, client_cfgs) if data_and_config is not None: return data_and_config if config.data.type.lower() == 'toy': - data, modified_config = load_toy_data(config) + data, modified_config = load_toy_data(config, client_cfgs) elif config.data.type.lower() == 'quadratic': from federatedscope.tabular.dataloader import load_quadratic_dataset - data, modified_config = load_quadratic_dataset(config) + data, modified_config = load_quadratic_dataset(config, client_cfgs) elif config.data.type.lower() in ['femnist', 'celeba']: from federatedscope.cv.dataloader import load_cv_dataset - data, modified_config = load_cv_dataset(config) + data, modified_config = load_cv_dataset(config, client_cfgs) elif config.data.type.lower() in [ 'shakespeare', 'twitter', 'subreddit', 'synthetic' ]: from federatedscope.nlp.dataloader import load_nlp_dataset - data, modified_config = load_nlp_dataset(config) + data, modified_config = load_nlp_dataset(config, client_cfgs) elif config.data.type.lower() in [ 'cora', 'citeseer', @@ -640,28 +609,28 @@ def get_data(config): 'dblp_org', ] or config.data.type.lower().startswith('csbm'): from federatedscope.gfl.dataloader import load_nodelevel_dataset - data, modified_config = load_nodelevel_dataset(config) + data, modified_config = load_nodelevel_dataset(config, client_cfgs) elif config.data.type.lower() in ['ciao', 'epinions', 'fb15k-237', 'wn18']: from federatedscope.gfl.dataloader import load_linklevel_dataset - data, modified_config = load_linklevel_dataset(config) + data, modified_config = load_linklevel_dataset(config, client_cfgs) elif config.data.type.lower() in [ 'hiv', 'proteins', 'imdb-binary', 'bbbp', 'tox21', 'bace', 'sider', 'clintox', 'esol', 'freesolv', 'lipo' ] or config.data.type.startswith('graph_multi_domain'): from federatedscope.gfl.dataloader import load_graphlevel_dataset - data, modified_config = load_graphlevel_dataset(config) + data, modified_config = load_graphlevel_dataset(config, client_cfgs) elif config.data.type.lower() == 'vertical_fl_data': from federatedscope.vertical_fl.dataloader import load_vertical_data data, modified_config = load_vertical_data(config, generate=True) elif 'movielens' in config.data.type.lower( ) or 'netflix' in config.data.type.lower(): from federatedscope.mf.dataloader import load_mf_dataset - data, modified_config = load_mf_dataset(config) + data, modified_config = load_mf_dataset(config, client_cfgs) elif '@' in config.data.type.lower(): - data, modified_config = load_external_data(config) + data, modified_config = load_external_data(config, client_cfgs) elif 'cikmcup' in config.data.type.lower(): from federatedscope.gfl.dataset.cikm_cup import load_cikmcup_data - data, modified_config = load_cikmcup_data(config) + data, modified_config = load_cikmcup_data(config, client_cfgs) elif config.data.type is None or config.data.type == "": # The participant (only for server in this version) does not own data data = None @@ -673,7 +642,7 @@ def get_data(config): config.attack.trigger_type: import os import torch - from federatedscope.attack.auxiliary import\ + from federatedscope.attack.auxiliary import \ create_ardis_poisoned_dataset, create_ardis_test_dataset if not os.path.exists(config.attack.edge_path): os.makedirs(config.attack.edge_path) @@ -690,8 +659,8 @@ def get_data(config): "wb") as saved_data_file: torch.save(poisoned_edgeset, saved_data_file) - with open(config.attack.edge_path+"ardis_test_dataset.pt", "wb") \ - as ardis_data_file: + with open(config.attack.edge_path + "ardis_test_dataset.pt", + "wb") as ardis_data_file: torch.save(ardis_test_dataset, ardis_data_file) logger.warning('please notice: downloading the poisoned dataset \ on cifar-10 from \ @@ -720,59 +689,3 @@ def get_data(config): return data[data_idx], config setup_seed(config.seed) - - -def merge_data(all_data, merged_max_data_id, specified_dataset_name=None): - if specified_dataset_name is None: - dataset_names = list(all_data[1].keys()) # e.g., train, test, val - else: - if not isinstance(specified_dataset_name, list): - specified_dataset_name = [specified_dataset_name] - dataset_names = specified_dataset_name - - import torch.utils.data - assert len(dataset_names) >= 1, \ - "At least one sub-dataset is required in client 1" - data_name = "test" if "test" in dataset_names else dataset_names[0] - id_has_key = 1 - while "test" not in all_data[id_has_key]: - id_has_key += 1 - if len(all_data) <= id_has_key: - raise KeyError(f'All data do not key {data_name}.') - if isinstance(all_data[id_has_key][data_name], dict): - data_elem_names = list( - all_data[id_has_key][data_name].keys()) # e.g., x, y - merged_data = {name: defaultdict(list) for name in dataset_names} - for data_id in range(1, merged_max_data_id): - for d_name in dataset_names: - if d_name not in all_data[data_id]: - continue - for elem_name in data_elem_names: - merged_data[d_name][elem_name].append( - all_data[data_id][d_name][elem_name]) - for d_name in dataset_names: - for elem_name in data_elem_names: - merged_data[d_name][elem_name] = np.concatenate( - merged_data[d_name][elem_name]) - elif issubclass(type(all_data[id_has_key][data_name]), - torch.utils.data.DataLoader): - merged_data = { - name: all_data[id_has_key][name] - for name in dataset_names - } - for data_id in range(1, merged_max_data_id): - if data_id == id_has_key: - continue - for d_name in dataset_names: - if d_name not in all_data[data_id]: - continue - merged_data[d_name].dataset.extend( - all_data[data_id][d_name].dataset) - else: - raise NotImplementedError( - "Un-supported type when merging data across different clients." - f"Your data type is {type(all_data[id_has_key][data_name])}. " - f"Currently we only support the following forms: " - " 1): {data_id: {train: {x:ndarray, y:ndarray}} }" - " 2): {data_id: {train: DataLoader }") - return merged_data diff --git a/federatedscope/core/auxiliaries/utils.py b/federatedscope/core/auxiliaries/utils.py index 190a96102..4b68e1d14 100644 --- a/federatedscope/core/auxiliaries/utils.py +++ b/federatedscope/core/auxiliaries/utils.py @@ -7,12 +7,15 @@ import signal import ssl import urllib.request +from collections import defaultdict from os import path as osp import pickle import numpy as np # Blind torch +import torch.utils + try: import torch import torchvision @@ -303,3 +306,57 @@ def merge_param_dict(raw_param, filtered_param): for key in filtered_param.keys(): raw_param[key] = filtered_param[key] return raw_param + + +def merge_data(all_data, merged_max_data_id, specified_dataset_name=None): + if specified_dataset_name is None: + dataset_names = list(all_data[1].keys()) # e.g., train, test, val + else: + if not isinstance(specified_dataset_name, list): + specified_dataset_name = [specified_dataset_name] + dataset_names = specified_dataset_name + + import torch.utils.data + from federatedscope.core.interface.base_data import ClientData + assert len(dataset_names) >= 1, \ + "At least one sub-dataset is required in client 1" + data_name = "test" if "test" in dataset_names else dataset_names[0] + id_has_key = 1 + while "test" not in all_data[id_has_key]: + id_has_key += 1 + if len(all_data) <= id_has_key: + raise KeyError(f'All data do not key {data_name}.') + if isinstance(all_data[id_has_key][data_name], dict): + data_elem_names = list( + all_data[id_has_key][data_name].keys()) # e.g., x, y + merged_data = {name: defaultdict(list) for name in dataset_names} + for data_id in range(1, merged_max_data_id): + for d_name in dataset_names: + if d_name not in all_data[data_id]: + continue + for elem_name in data_elem_names: + merged_data[d_name][elem_name].append( + all_data[data_id][d_name][elem_name]) + for d_name in dataset_names: + for elem_name in data_elem_names: + merged_data[d_name][elem_name] = np.concatenate( + merged_data[d_name][elem_name]) + elif issubclass(type(all_data[id_has_key][data_name]), + torch.utils.data.DataLoader): + merged_data = all_data[id_has_key] + for data_id in range(1, merged_max_data_id): + if data_id == id_has_key: + continue + for d_name in dataset_names: + if d_name not in all_data[data_id]: + continue + merged_data[d_name].dataset.extend( + all_data[data_id][d_name].dataset) + else: + raise NotImplementedError( + "Un-supported type when merging data across different clients." + f"Your data type is {type(all_data[id_has_key][data_name])}. " + f"Currently we only support the following forms: " + " 1): {data_id: {train: {x:ndarray, y:ndarray}} }" + " 2): {data_id: {train: DataLoader }") + return merged_data diff --git a/federatedscope/core/fed_runner.py b/federatedscope/core/fed_runner.py index 795cf0d32..52cb7db72 100644 --- a/federatedscope/core/fed_runner.py +++ b/federatedscope/core/fed_runner.py @@ -8,8 +8,7 @@ from federatedscope.core.workers import Server, Client from federatedscope.core.gpu_manager import GPUManager from federatedscope.core.auxiliaries.model_builder import get_model -from federatedscope.core.auxiliaries.data_builder import merge_data -from federatedscope.core.auxiliaries.utils import get_resource_info +from federatedscope.core.auxiliaries.utils import get_resource_info, merge_data logger = logging.getLogger(__name__) @@ -28,14 +27,14 @@ class FedRunner(object): client_class: The client class is used for instantiating a ( customized) client. config: The configurations of the FL course. - client_config: The clients' configurations. + client_configs: The clients' configurations. """ def __init__(self, data, server_class=Server, client_class=Client, config=None, - client_config=None): + client_configs=None): self.data = data self.server_class = server_class self.client_class = client_class @@ -44,7 +43,7 @@ def __init__(self, if not config.is_ready_for_run: config.ready_for_run() self.cfg = config - self.client_cfg = client_config + self.client_cfgs = client_configs self.mode = self.cfg.federate.mode.lower() self.gpu_manager = GPUManager(gpu_available=self.cfg.use_gpu, @@ -89,24 +88,10 @@ def _setup_for_standalone(self): "specify a non-zero value for client_num" if self.cfg.federate.method == "global": - if self.cfg.federate.client_num != 1: - if self.cfg.data.server_holds_all: - assert self.data[0] is not None \ - and len(self.data[0]) != 0, \ - "You specified cfg.data.server_holds_all=True " \ - "but data[0] is None. Please check whether you " \ - "pre-process the data[0] correctly" - self.data[1] = self.data[0] - else: - logger.info(f"Will merge data from clients whose ids in " - f"[1, {self.cfg.federate.client_num}]") - self.data[1] = merge_data( - all_data=self.data, - merged_max_data_id=self.cfg.federate.client_num) - self.cfg.defrost() - self.cfg.federate.client_num = 1 - self.cfg.federate.sample_client_num = 1 - self.cfg.freeze() + self.cfg.defrost() + self.cfg.federate.client_num = 1 + self.cfg.federate.sample_client_num = 1 + self.cfg.freeze() # sample resource information if self.resource_info is not None: @@ -286,15 +271,7 @@ def _setup_server(self, resource_info=None, client_resource_info=None): """ self.server_id = 0 if self.mode == 'standalone': - if self.cfg.federate.merge_test_data: - server_data = merge_data( - all_data=self.data, - merged_max_data_id=self.cfg.federate.client_num, - specified_dataset_name=['test']) - model = get_model(self.cfg.model, - server_data, - backend=self.cfg.backend) - elif self.server_id in self.data: + if self.server_id in self.data: server_data = self.data[self.server_id] model = get_model(self.cfg.model, server_data, @@ -375,10 +352,10 @@ def _setup_client(self, if self.client_class: client_specific_config = self.cfg.clone() - if self.client_cfg: + if self.client_cfgs: client_specific_config.defrost() client_specific_config.merge_from_other_cfg( - self.client_cfg.get('client_{}'.format(client_id))) + self.client_cfgs.get('client_{}'.format(client_id))) client_specific_config.freeze() client_device = self._server_device if \ self.cfg.federate.share_local_model else \ diff --git a/federatedscope/core/interface/__init__.py b/federatedscope/core/interface/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/federatedscope/core/interface/base_data.py b/federatedscope/core/interface/base_data.py new file mode 100644 index 000000000..2ff6dc6be --- /dev/null +++ b/federatedscope/core/interface/base_data.py @@ -0,0 +1,141 @@ +import logging +from federatedscope.core.auxiliaries.utils import merge_data + +logger = logging.getLogger(__name__) + + +class StandaloneDataDict(dict): + """ + `StandaloneDataDict` maintain several `ClientData`. + """ + client_cfgs = None + + def __init__(self, datadict, global_cfg): + """ + + Args: + datadict: `Dict` with `client_id` as key, `ClientData` as value. + global_cfg: global CfgNode + """ + self.cfg = global_cfg + datadict = self.preprocess(datadict) + super(StandaloneDataDict, self).__init__(datadict) + + def resetup(self, global_cfg, client_cfgs=None): + """ + Resetup new configs for `ClientData`, which might be used in HPO. + + Args: + global_cfg: enable new config for `ClientData` + client_cfg: enable new client-specific config for `ClientData` + """ + self.cfg, self.client_cfgs = global_cfg, client_cfgs + for client_id, client_data in self.items(): + if isinstance(client_data, ClientData): + if client_cfgs is not None: + client_cfg = global_cfg.clone() + client_cfg.merge_from_other_cfg( + client_cfgs.get(f'client_{client_id}')) + else: + client_cfg = global_cfg + client_data.setup(client_cfg) + else: + logger.warning('`client_data` is not subclass of ' + '`ClientData`, and cannot re-setup ' + 'DataLoader with new configs.') + + def preprocess(self, datadict): + """ + Preprocess for StandaloneDataDict. + + Args: + datadict: dict with `client_id` as key, `ClientData` as value. + """ + if self.cfg.federate.merge_test_data: + server_data = merge_data( + all_data=datadict, + merged_max_data_id=self.cfg.federate.client_num, + specified_dataset_name=['test']) + # `0` indicate Server + datadict[0] = server_data + + if self.cfg.federate.method == "global": + if self.cfg.federate.client_num != 1: + if self.cfg.data.server_holds_all: + assert datadict[0] is not None \ + and len(datadict[0]) != 0, \ + "You specified cfg.data.server_holds_all=True " \ + "but data[0] is None. Please check whether you " \ + "pre-process the data[0] correctly" + datadict[1] = datadict[0] + else: + logger.info(f"Will merge data from clients whose ids in " + f"[1, {self.cfg.federate.client_num}]") + datadict[1] = merge_data( + all_data=datadict, + merged_max_data_id=self.cfg.federate.client_num) + return datadict + + +class ClientData(dict): + """ + `ClientData` converts dataset to loader. + """ + client_cfg = None + + def __init__(self, loader, client_cfg, train=None, val=None, test=None): + """ + + Args: + loader: Dataloader class or data dict which have been built + client_cfg: client-specific CfgNode + train: train dataset + val: valid dataset + test: test dataset + """ + self.train = train + self.val = val + self.test = test + self.loader = loader + self.setup(client_cfg) + super(ClientData, self).__init__() + + def setup(self, new_client_cfg=None): + """ + + Args: + new_client_cfg: new client-specific CfgNode + + Returns: + Status: indicate whether the client_cfg is updated + """ + # if `batch_size` or `shuffle` change, reinstantiate DataLoader + if self.client_cfg is not None: + if self.client_cfg.data.batch_size == \ + new_client_cfg.data.batch_size or \ + self.client_cfg.data.shuffle == \ + new_client_cfg.data.shuffle: + return False + + self.client_cfg = new_client_cfg + if self.train is not None: + self['train'] = self.loader( + self.train, + batch_size=new_client_cfg.data.batch_size, + shuffle=new_client_cfg.data.shuffle, + num_workers=new_client_cfg.data.num_workers) + + if self.val is not None: + self['val'] = self.loader( + self.val, + batch_size=new_client_cfg.data.batch_size, + shuffle=False, + num_workers=new_client_cfg.data.num_workers) + + if self.test is not None: + self['test'] = self.loader( + self.test, + batch_size=new_client_cfg.data.batch_size, + shuffle=False, + num_workers=new_client_cfg.data.num_workers) + return True diff --git a/federatedscope/core/monitors/monitor.py b/federatedscope/core/monitors/monitor.py index 4c85ea804..8862b855a 100644 --- a/federatedscope/core/monitors/monitor.py +++ b/federatedscope/core/monitors/monitor.py @@ -250,7 +250,8 @@ def finish_fed_runner(self, fl_mode=None): "cfg.wandb.use=True but not install the wandb package") exit() - from federatedscope.core.auxiliaries.logging import logfile_2_wandb_dict + from federatedscope.core.auxiliaries.logging import \ + logfile_2_wandb_dict with open(os.path.join(self.outdir, "eval_results.log"), "r") as exp_log_f: # track the prediction related performance diff --git a/federatedscope/core/trainers/context.py b/federatedscope/core/trainers/context.py index 319044b93..956170687 100644 --- a/federatedscope/core/trainers/context.py +++ b/federatedscope/core/trainers/context.py @@ -1,4 +1,3 @@ -import math import logging import collections @@ -8,6 +7,7 @@ from federatedscope.core.auxiliaries.regularizer_builder import get_regularizer from federatedscope.core.auxiliaries.enums import MODE from federatedscope.core.auxiliaries.utils import calculate_batch_epoch_num +from federatedscope.core.interface.base_data import ClientData logger = logging.getLogger(__name__) @@ -128,6 +128,8 @@ def setup_vars(self): self.device) self.regularizer = get_regularizer(self.cfg.regularizer.type) self.grad_clip = self.cfg.grad.grad_clip + if isinstance(self.data, ClientData): + self.data.setup(self.cfg) elif self.cfg.backend == 'tensorflow': self.trainable_para_names = self.model.trainable_variables() self.criterion = None diff --git a/federatedscope/cv/dataloader/dataloader.py b/federatedscope/cv/dataloader/dataloader.py index 6ca535f43..d665dce2d 100644 --- a/federatedscope/cv/dataloader/dataloader.py +++ b/federatedscope/cv/dataloader/dataloader.py @@ -2,9 +2,11 @@ from federatedscope.cv.dataset.leaf_cv import LEAF_CV from federatedscope.core.auxiliaries.transform_builder import get_transform +from federatedscope.core.interface.base_data import ClientData, \ + StandaloneDataDict -def load_cv_dataset(config=None): +def load_cv_dataset(config=None, client_cfgs=None): r""" return { 'client_id': { @@ -18,7 +20,6 @@ def load_cv_dataset(config=None): path = config.data.root name = config.data.type.lower() - batch_size = config.data.batch_size transforms_funcs = get_transform(config, 'torchvision') if name in ['femnist', 'celeba']: @@ -38,23 +39,18 @@ def load_cv_dataset(config=None): # get local dataset data_local_dict = dict() - for client_idx in range(client_num): - dataloader = { - 'train': DataLoader(dataset[client_idx]['train'], - batch_size, - shuffle=config.data.shuffle, - num_workers=config.data.num_workers), - 'test': DataLoader(dataset[client_idx]['test'], - batch_size, - shuffle=False, - num_workers=config.data.num_workers) - } - if 'val' in dataset[client_idx]: - dataloader['val'] = DataLoader(dataset[client_idx]['val'], - batch_size, - shuffle=False, - num_workers=config.data.num_workers) - - data_local_dict[client_idx + 1] = dataloader - - return data_local_dict, config + for client_idx in range(1, client_num + 1): + if client_cfgs is not None: + client_cfg = config.clone() + client_cfg.merge_from_other_cfg( + client_cfgs.get(f'client_{client_idx}')) + else: + client_cfg = config + client_data = ClientData(DataLoader, + client_cfg, + train=dataset[client_idx - 1].get('train'), + val=dataset[client_idx - 1].get('val'), + test=dataset[client_idx - 1].get('test')) + data_local_dict[client_idx] = client_data + + return StandaloneDataDict(data_local_dict, config), config diff --git a/federatedscope/gfl/dataloader/dataloader_graph.py b/federatedscope/gfl/dataloader/dataloader_graph.py index 709a40e38..c4ad27a8a 100644 --- a/federatedscope/gfl/dataloader/dataloader_graph.py +++ b/federatedscope/gfl/dataloader/dataloader_graph.py @@ -6,6 +6,8 @@ from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.auxiliaries.transform_builder import get_transform +from federatedscope.core.interface.base_data import ClientData, \ + StandaloneDataDict def get_numGraphLabels(dataset): @@ -15,7 +17,7 @@ def get_numGraphLabels(dataset): return len(s) -def load_graphlevel_dataset(config=None): +def load_graphlevel_dataset(config=None, client_cfgs=None): r"""Convert dataset to Dataloader. :returns: data_local_dict @@ -117,35 +119,34 @@ def load_graphlevel_dataset(config=None): raw_valid = [] raw_test = [] for client_idx, gs in enumerate(dataset): + if client_cfgs is not None: + client_cfg = config.clone() + client_cfg.merge_from_other_cfg( + client_cfgs.get(f'client_{client_idx+1}')) + else: + client_cfg = config + index = np.random.permutation(np.arange(len(gs))) train_idx = index[:int(len(gs) * splits[0])] valid_idx = index[int(len(gs) * splits[0]):int(len(gs) * sum(splits[:2]))] test_idx = index[int(len(gs) * sum(splits[:2])):] - dataloader = { - 'num_label': get_numGraphLabels(gs), - 'train': DataLoader([gs[idx] for idx in train_idx], - batch_size, - shuffle=True, - num_workers=config.data.num_workers), - 'val': DataLoader([gs[idx] for idx in valid_idx], - batch_size, - shuffle=False, - num_workers=config.data.num_workers), - 'test': DataLoader([gs[idx] for idx in test_idx], - batch_size, - shuffle=False, - num_workers=config.data.num_workers), - } - data_local_dict[client_idx + 1] = dataloader + client_data = ClientData(DataLoader, + client_cfg, + train=[gs[idx] for idx in train_idx], + val=[gs[idx] for idx in valid_idx], + test=[gs[idx] for idx in test_idx]) + client_data['num_label'] = get_numGraphLabels(gs) + + data_local_dict[client_idx + 1] = client_data raw_train = raw_train + [gs[idx] for idx in train_idx] raw_valid = raw_valid + [gs[idx] for idx in valid_idx] raw_test = raw_test + [gs[idx] for idx in test_idx] if not name.startswith('graph_multi_domain'.upper()): - data_local_dict[0] = { - 'train': DataLoader(raw_train, batch_size, shuffle=True), - 'val': DataLoader(raw_valid, batch_size, shuffle=False), - 'test': DataLoader(raw_test, batch_size, shuffle=False), - } + data_local_dict[0] = ClientData(DataLoader, + config, + train=raw_train, + val=raw_valid, + test=raw_test) - return data_local_dict, config + return StandaloneDataDict(data_local_dict, config), config diff --git a/federatedscope/gfl/dataloader/dataloader_link.py b/federatedscope/gfl/dataloader/dataloader_link.py index 168b32517..755b8af5d 100644 --- a/federatedscope/gfl/dataloader/dataloader_link.py +++ b/federatedscope/gfl/dataloader/dataloader_link.py @@ -5,6 +5,7 @@ from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.auxiliaries.transform_builder import get_transform +from federatedscope.core.interface.base_data import StandaloneDataDict def raw2loader(raw_data, config=None): @@ -43,7 +44,7 @@ def raw2loader(raw_data, config=None): return sampler -def load_linklevel_dataset(config=None): +def load_linklevel_dataset(config=None, client_cfgs=None): r""" :returns: data_local_dict @@ -87,9 +88,15 @@ def load_linklevel_dataset(config=None): # get local dataset data_local_dict = dict() - for client_idx in range(len(dataset)): - local_data = raw2loader(dataset[client_idx], config) - data_local_dict[client_idx + 1] = local_data + for client_idx in range(1, len(dataset) + 1): + if client_cfgs is not None: + client_cfg = config.clone() + client_cfg.merge_from_other_cfg( + client_cfgs.get(f'client_{client_idx}')) + else: + client_cfg = config + local_data = raw2loader(dataset[client_idx - 1], client_cfg) + data_local_dict[client_idx] = local_data if global_dataset is not None: # Recode train & valid & test mask for global data @@ -127,4 +134,4 @@ def load_linklevel_dataset(config=None): global_graph.edge_type = global_edge_type data_local_dict[0] = raw2loader(global_graph, config) - return data_local_dict, config + return StandaloneDataDict(data_local_dict, config), config diff --git a/federatedscope/gfl/dataloader/dataloader_node.py b/federatedscope/gfl/dataloader/dataloader_node.py index e79a4e964..89796297d 100644 --- a/federatedscope/gfl/dataloader/dataloader_node.py +++ b/federatedscope/gfl/dataloader/dataloader_node.py @@ -9,6 +9,7 @@ from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.auxiliaries.transform_builder import get_transform +from federatedscope.core.interface.base_data import StandaloneDataDict INF = np.iinfo(np.int64).max @@ -73,7 +74,7 @@ def raw2loader(raw_data, config=None): return sampler -def load_nodelevel_dataset(config=None): +def load_nodelevel_dataset(config=None, client_cfgs=None): r""" :returns: data_local_dict @@ -156,9 +157,15 @@ def load_nodelevel_dataset(config=None): # get local dataset data_local_dict = dict() - for client_idx in range(len(dataset)): - local_data = raw2loader(dataset[client_idx], config) - data_local_dict[client_idx + 1] = local_data + for client_idx in range(1, len(dataset) + 1): + if client_cfgs is not None: + client_cfg = config.clone() + client_cfg.merge_from_other_cfg( + client_cfgs.get(f'client_{client_idx}')) + else: + client_cfg = config + local_data = raw2loader(dataset[client_idx - 1], client_cfg) + data_local_dict[client_idx] = local_data if global_dataset is not None: global_graph = global_dataset[0] @@ -183,4 +190,4 @@ def load_nodelevel_dataset(config=None): data_local_dict[0] = raw2loader(global_graph, config) - return data_local_dict, config + return StandaloneDataDict(data_local_dict, config), config diff --git a/federatedscope/gfl/dataset/cikm_cup.py b/federatedscope/gfl/dataset/cikm_cup.py index 7ae47a3ad..60678b85c 100644 --- a/federatedscope/gfl/dataset/cikm_cup.py +++ b/federatedscope/gfl/dataset/cikm_cup.py @@ -47,7 +47,7 @@ def __getitem__(self, idx): return data -def load_cikmcup_data(config): +def load_cikmcup_data(config, client_cfgs=None): from torch_geometric.loader import DataLoader # Build data @@ -60,19 +60,28 @@ def load_cikmcup_data(config): logger.info(f'Loading CIKMCUP data for Client #{client_idx}.') dataloader_dict = {} tmp_dataset = [] + + if client_cfgs is not None: + client_cfg = config.clone() + client_cfg.merge_from_other_cfg( + client_cfgs.get(f'client_{client_idx}')) + else: + client_cfg = config + if 'train' in dataset[client_idx]: - dataloader_dict['train'] = DataLoader(dataset[client_idx]['train'], - config.data.batch_size, - shuffle=config.data.shuffle) + dataloader_dict['train'] = DataLoader( + dataset[client_idx]['train'], + client_cfg.data.batch_size, + shuffle=client_cfg.data.shuffle) tmp_dataset += dataset[client_idx]['train'] if 'val' in dataset[client_idx]: dataloader_dict['val'] = DataLoader(dataset[client_idx]['val'], - config.data.batch_size, + client_cfg.data.batch_size, shuffle=False) tmp_dataset += dataset[client_idx]['val'] if 'test' in dataset[client_idx]: dataloader_dict['test'] = DataLoader(dataset[client_idx]['test'], - config.data.batch_size, + client_cfg.data.batch_size, shuffle=False) tmp_dataset += dataset[client_idx]['test'] if tmp_dataset: diff --git a/federatedscope/hpo.py b/federatedscope/hpo.py index 472bd4e15..c3b16cf9f 100644 --- a/federatedscope/hpo.py +++ b/federatedscope/hpo.py @@ -26,7 +26,7 @@ init_cfg.merge_from_file(args.cfg_file) init_cfg.merge_from_list(args.opts) - update_logger(init_cfg) + update_logger(init_cfg, clear_before_add=True) setup_seed(init_cfg.seed) assert not args.client_cfg_file, 'No support for client-wise config in ' \ diff --git a/federatedscope/main.py b/federatedscope/main.py index 40d8a0e94..0eae9c8c3 100644 --- a/federatedscope/main.py +++ b/federatedscope/main.py @@ -32,13 +32,14 @@ setup_seed(init_cfg.seed) # load clients' cfg file - client_cfg = CfgNode.load_cfg(open(args.client_cfg_file, - 'r')) if args.client_cfg_file else None + client_cfgs = CfgNode.load_cfg(open(args.client_cfg_file, + 'r')) if args.client_cfg_file else None # federated dataset might change the number of clients # thus, we allow the creation procedure of dataset to modify the global # cfg object - data, modified_cfg = get_data(config=init_cfg.clone()) + data, modified_cfg = get_data(config=init_cfg.clone(), + client_cfgs=client_cfgs) init_cfg.merge_from_other_cfg(modified_cfg) init_cfg.freeze() @@ -47,5 +48,5 @@ server_class=get_server_cls(init_cfg), client_class=get_client_cls(init_cfg), config=init_cfg.clone(), - client_config=client_cfg) + client_configs=client_cfgs) _ = runner.run() diff --git a/federatedscope/mf/dataloader/dataloader.py b/federatedscope/mf/dataloader/dataloader.py index c65f18603..c8a21e5c5 100644 --- a/federatedscope/mf/dataloader/dataloader.py +++ b/federatedscope/mf/dataloader/dataloader.py @@ -7,6 +7,8 @@ import collections import importlib +from federatedscope.core.interface.base_data import StandaloneDataDict + MFDATA_CLASS_DICT = { "vflmovielens1m": "VFLMovieLens1M", "vflmovielens10m": "VFLMovieLens10M", @@ -17,7 +19,7 @@ } -def load_mf_dataset(config=None): +def load_mf_dataset(config=None, client_cfgs=None): """Return the dataset of matrix factorization Format: @@ -48,24 +50,30 @@ def load_mf_dataset(config=None): data_local_dict = collections.defaultdict(dict) for id_client, data in dataset.data.items(): + if client_cfgs is not None: + client_cfg = config.clone() + client_cfg.merge_from_other_cfg( + client_cfgs.get(f'client_{id_client}')) + else: + client_cfg = config data_local_dict[id_client]["train"] = MFDataLoader( data["train"], - shuffle=config.data.shuffle, - batch_size=config.data.batch_size, - drop_last=config.data.drop_last, - theta=config.sgdmf.theta) + shuffle=client_cfg.data.shuffle, + batch_size=client_cfg.data.batch_size, + drop_last=client_cfg.data.drop_last, + theta=client_cfg.sgdmf.theta) data_local_dict[id_client]["test"] = MFDataLoader( data["test"], shuffle=False, - batch_size=config.data.batch_size, - drop_last=config.data.drop_last, - theta=config.sgdmf.theta) + batch_size=client_cfg.data.batch_size, + drop_last=client_cfg.data.drop_last, + theta=client_cfg.sgdmf.theta) # Modify config config.merge_from_list(['model.num_user', dataset.n_user]) config.merge_from_list(['model.num_item', dataset.n_item]) - return data_local_dict, config + return StandaloneDataDict(data_local_dict, config), config class MFDataLoader(object): diff --git a/federatedscope/nlp/dataloader/dataloader.py b/federatedscope/nlp/dataloader/dataloader.py index c33a75ec7..835b897a5 100644 --- a/federatedscope/nlp/dataloader/dataloader.py +++ b/federatedscope/nlp/dataloader/dataloader.py @@ -4,9 +4,11 @@ from federatedscope.nlp.dataset.leaf_twitter import LEAF_TWITTER from federatedscope.nlp.dataset.leaf_synthetic import LEAF_SYNTHETIC from federatedscope.core.auxiliaries.transform_builder import get_transform +from federatedscope.core.interface.base_data import ClientData, \ + StandaloneDataDict -def load_nlp_dataset(config=None): +def load_nlp_dataset(config=None, client_cfgs=None): r""" return { 'client_id': { @@ -20,7 +22,6 @@ def load_nlp_dataset(config=None): path = config.data.root name = config.data.type.lower() - batch_size = config.data.batch_size transforms_funcs = get_transform(config, 'torchtext') if name in ['shakespeare', 'subreddit']: @@ -50,25 +51,18 @@ def load_nlp_dataset(config=None): # get local dataset data_local_dict = dict() - for client_idx in range(client_num): - dataloader = { - 'train': DataLoader(dataset[client_idx]['train'], - batch_size, - shuffle=config.data.shuffle, - num_workers=config.data.num_workers) - } - if 'test' in dataset[client_idx]: - dataloader['test'] = DataLoader( - dataset[client_idx]['test'], - batch_size, - shuffle=False, - num_workers=config.data.num_workers) - if 'val' in dataset[client_idx]: - dataloader['val'] = DataLoader(dataset[client_idx]['val'], - batch_size, - shuffle=False, - num_workers=config.data.num_workers) + for client_idx in range(1, client_num + 1): + if client_cfgs is not None: + client_cfg = config.clone() + client_cfg.merge_from_other_cfg( + client_cfgs.get(f'client_{client_idx}')) + else: + client_cfg = config + client_data = ClientData(DataLoader, + client_cfg, + train=dataset[client_idx - 1].get('train'), + val=dataset[client_idx - 1].get('val'), + test=dataset[client_idx - 1].get('test')) + data_local_dict[client_idx] = client_data - data_local_dict[client_idx + 1] = dataloader - - return data_local_dict, config + return StandaloneDataDict(data_local_dict, config), config diff --git a/federatedscope/tabular/dataloader/quadratic.py b/federatedscope/tabular/dataloader/quadratic.py index 37b73829d..0c6330773 100644 --- a/federatedscope/tabular/dataloader/quadratic.py +++ b/federatedscope/tabular/dataloader/quadratic.py @@ -1,9 +1,11 @@ import numpy as np from torch.utils.data import DataLoader +from federatedscope.core.interface.base_data import ClientData, \ + StandaloneDataDict -def load_quadratic_dataset(config): +def load_quadratic_dataset(config, client_cfgs=None): dataset = dict() d = config.data.quadratic.dim base = np.exp( @@ -13,9 +15,15 @@ def load_quadratic_dataset(config): # TODO: enable sphere a = 0.02 * base**(i - 1) * np.identity(d) # TODO: enable non-zero minimizer, i.e., provide a shift - client_data = dict() - client_data['train'] = DataLoader([(a.astype(np.float32), .0)]) - client_data['val'] = DataLoader([(a.astype(np.float32), .0)]) - client_data['test'] = DataLoader([(a.astype(np.float32), .0)]) + if client_cfgs is not None: + client_cfg = config.clone() + client_cfg.merge_from_other_cfg(client_cfgs.get(f'client_{i}')) + else: + client_cfg = config + client_data = ClientData(DataLoader, + client_cfg, + train=[(a.astype(np.float32), .0)], + val=[(a.astype(np.float32), .0)], + test=[(a.astype(np.float32), .0)]) dataset[i] = client_data - return dataset, config + return StandaloneDataDict(dataset, config), config diff --git a/federatedscope/translator/__init__.py b/federatedscope/translator/__init__.py new file mode 100644 index 000000000..336603fc0 --- /dev/null +++ b/federatedscope/translator/__init__.py @@ -0,0 +1,3 @@ +from federatedscope.translator.data_translator import BaseDataTranslator + +__all__ = ["BaseDataTranslator"] diff --git a/federatedscope/translator/data_translator.py b/federatedscope/translator/data_translator.py new file mode 100644 index 000000000..e3e2038c5 --- /dev/null +++ b/federatedscope/translator/data_translator.py @@ -0,0 +1,83 @@ +from federatedscope.core.auxiliaries.splitter_builder import get_splitter +from federatedscope.core.interface.base_data import ClientData, \ + StandaloneDataDict + + +class BaseDataTranslator(StandaloneDataDict): + def __init__(self, dataset, global_cfg, loader, client_cfgs=None): + """ + + Args: + dataset: `torch.utils.data.Dataset`, `List` of (feature, label) + or split dataset tuple of (train, val, test) + global_cfg: global CfgNode + loader: `torch.utils.data.DataLoader` or subclass of it + client_cfgs: client cfg `Dict` + """ + self.dataset = dataset + self.loader = loader + self.global_cfg = global_cfg.clone() + self.client_cfgs = client_cfgs + self.splitter = get_splitter(global_cfg) + + train, val, test = self.split_train_val_test(dataset) + datadict = self.split_to_client(train, val, test) + super(BaseDataTranslator, self).__init__(datadict, global_cfg) + + def split_train_val_test(self): + """ + Split dataset to train, val, test if not provided. + + Returns: + split_data (List): List of split dataset, [train, val, test] + + """ + dataset, splits = self.dataset, self.global_cfg.data.splits + if isinstance(dataset, tuple): + return [dataset[0], dataset[1], dataset[2]] + + from torch.utils.data.dataset import random_split + train_size = int(splits[0] * len(dataset)) + val_size = int(splits[1] * len(dataset)) + test_size = len(dataset) - train_size - val_size + split_data = random_split(dataset, [train_size, val_size, test_size]) + return split_data + + def split_to_client(self, train, val, test): + """ + Split dataset to clients. + + Returns: + datadict (dict): dict of `ClientData` with client_idx as key. + + """ + + # Initialization + client_num = self.global_cfg.federate.client_num + split_train, split_val, split_test = [[None] * client_num] * 3 + train_label_distribution = None + + # Split train/val/test to client + if len(train) > 0: + split_train = self.splitter(train) + train_label_distribution = [[j[1] for j in x] for x in split_train] + if len(val) > 0: + split_val = self.splitter(val, prior=train_label_distribution) + if len(test) > 0: + split_test = self.splitter(test, prior=train_label_distribution) + + # Build data dict with `ClientData` + datadict = {} + for client_id in range(1, client_num + 1): + if self.client_cfgs is not None: + client_cfg = self.global_cfg.clone() + client_cfg.merge_from_other_cfg( + self.client_cfgs.get(f'client_{client_id}')) + else: + client_cfg = self.global_cfg + datadict[client_id] = ClientData(self.loader, + client_cfg, + train=split_train[client_id - 1], + val=split_val[client_id - 1], + test=split_test[client_id - 1]) + return datadict diff --git a/federatedscope/vertical_fl/dataloader/dataloader.py b/federatedscope/vertical_fl/dataloader/dataloader.py index e58560c12..c07d4b5f4 100644 --- a/federatedscope/vertical_fl/dataloader/dataloader.py +++ b/federatedscope/vertical_fl/dataloader/dataloader.py @@ -1,5 +1,7 @@ import numpy as np +from federatedscope.core.interface.base_data import StandaloneDataDict + def load_vertical_data(config=None, generate=False): """ @@ -51,6 +53,6 @@ def load_vertical_data(config=None, generate=False): data[2]['val'] = None data[2]['test'] = test_data - return data, config + return StandaloneDataDict(data, config), config else: raise ValueError('You must provide the data file') From 7aa3387cc52da67e4d7e6d9b10f2e60238ad4f06 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Wed, 7 Sep 2022 10:59:54 +0800 Subject: [PATCH 02/39] fix minor bugs --- federatedscope/translator/data_translator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/federatedscope/translator/data_translator.py b/federatedscope/translator/data_translator.py index e3e2038c5..4cd15291d 100644 --- a/federatedscope/translator/data_translator.py +++ b/federatedscope/translator/data_translator.py @@ -20,7 +20,7 @@ def __init__(self, dataset, global_cfg, loader, client_cfgs=None): self.client_cfgs = client_cfgs self.splitter = get_splitter(global_cfg) - train, val, test = self.split_train_val_test(dataset) + train, val, test = self.split_train_val_test() datadict = self.split_to_client(train, val, test) super(BaseDataTranslator, self).__init__(datadict, global_cfg) From 30f81d6d1c67b8c5db69c111cde1fb3217440c15 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Wed, 7 Sep 2022 11:19:34 +0800 Subject: [PATCH 03/39] move interface --- federatedscope/core/auxiliaries/data_builder.py | 2 +- federatedscope/core/auxiliaries/utils.py | 1 - federatedscope/core/data/__init__.py | 3 +++ federatedscope/core/{interface => data}/base_data.py | 0 federatedscope/core/interface/__init__.py | 0 federatedscope/core/trainers/context.py | 2 +- federatedscope/cv/dataloader/dataloader.py | 3 +-- federatedscope/gfl/dataloader/dataloader_graph.py | 3 +-- federatedscope/gfl/dataloader/dataloader_link.py | 2 +- federatedscope/gfl/dataloader/dataloader_node.py | 2 +- federatedscope/mf/dataloader/dataloader.py | 2 +- federatedscope/nlp/dataloader/dataloader.py | 3 +-- federatedscope/tabular/dataloader/quadratic.py | 3 +-- federatedscope/translator/data_translator.py | 3 +-- federatedscope/vertical_fl/dataloader/dataloader.py | 2 +- 15 files changed, 14 insertions(+), 17 deletions(-) create mode 100644 federatedscope/core/data/__init__.py rename federatedscope/core/{interface => data}/base_data.py (100%) delete mode 100644 federatedscope/core/interface/__init__.py diff --git a/federatedscope/core/auxiliaries/data_builder.py b/federatedscope/core/auxiliaries/data_builder.py index 00f57f6da..fd7e89960 100644 --- a/federatedscope/core/auxiliaries/data_builder.py +++ b/federatedscope/core/auxiliaries/data_builder.py @@ -6,7 +6,7 @@ import numpy as np from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.interface.base_data import StandaloneDataDict +from federatedscope.core.data import StandaloneDataDict from federatedscope.translator import BaseDataTranslator import federatedscope.register as register diff --git a/federatedscope/core/auxiliaries/utils.py b/federatedscope/core/auxiliaries/utils.py index 4b68e1d14..1290f68a6 100644 --- a/federatedscope/core/auxiliaries/utils.py +++ b/federatedscope/core/auxiliaries/utils.py @@ -317,7 +317,6 @@ def merge_data(all_data, merged_max_data_id, specified_dataset_name=None): dataset_names = specified_dataset_name import torch.utils.data - from federatedscope.core.interface.base_data import ClientData assert len(dataset_names) >= 1, \ "At least one sub-dataset is required in client 1" data_name = "test" if "test" in dataset_names else dataset_names[0] diff --git a/federatedscope/core/data/__init__.py b/federatedscope/core/data/__init__.py new file mode 100644 index 000000000..79d9f9d81 --- /dev/null +++ b/federatedscope/core/data/__init__.py @@ -0,0 +1,3 @@ +from federatedscope.core.data.base_data import StandaloneDataDict, ClientData + +__all__ = ['StandaloneDataDict', 'ClientData'] diff --git a/federatedscope/core/interface/base_data.py b/federatedscope/core/data/base_data.py similarity index 100% rename from federatedscope/core/interface/base_data.py rename to federatedscope/core/data/base_data.py diff --git a/federatedscope/core/interface/__init__.py b/federatedscope/core/interface/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/federatedscope/core/trainers/context.py b/federatedscope/core/trainers/context.py index 956170687..cc174843d 100644 --- a/federatedscope/core/trainers/context.py +++ b/federatedscope/core/trainers/context.py @@ -7,7 +7,7 @@ from federatedscope.core.auxiliaries.regularizer_builder import get_regularizer from federatedscope.core.auxiliaries.enums import MODE from federatedscope.core.auxiliaries.utils import calculate_batch_epoch_num -from federatedscope.core.interface.base_data import ClientData +from federatedscope.core.data import ClientData logger = logging.getLogger(__name__) diff --git a/federatedscope/cv/dataloader/dataloader.py b/federatedscope/cv/dataloader/dataloader.py index d665dce2d..81b473aa0 100644 --- a/federatedscope/cv/dataloader/dataloader.py +++ b/federatedscope/cv/dataloader/dataloader.py @@ -2,8 +2,7 @@ from federatedscope.cv.dataset.leaf_cv import LEAF_CV from federatedscope.core.auxiliaries.transform_builder import get_transform -from federatedscope.core.interface.base_data import ClientData, \ - StandaloneDataDict +from federatedscope.core.data import ClientData, StandaloneDataDict def load_cv_dataset(config=None, client_cfgs=None): diff --git a/federatedscope/gfl/dataloader/dataloader_graph.py b/federatedscope/gfl/dataloader/dataloader_graph.py index c4ad27a8a..b300097c2 100644 --- a/federatedscope/gfl/dataloader/dataloader_graph.py +++ b/federatedscope/gfl/dataloader/dataloader_graph.py @@ -6,8 +6,7 @@ from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.auxiliaries.transform_builder import get_transform -from federatedscope.core.interface.base_data import ClientData, \ - StandaloneDataDict +from federatedscope.core.data import ClientData, StandaloneDataDict def get_numGraphLabels(dataset): diff --git a/federatedscope/gfl/dataloader/dataloader_link.py b/federatedscope/gfl/dataloader/dataloader_link.py index 755b8af5d..ae983b078 100644 --- a/federatedscope/gfl/dataloader/dataloader_link.py +++ b/federatedscope/gfl/dataloader/dataloader_link.py @@ -5,7 +5,7 @@ from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.auxiliaries.transform_builder import get_transform -from federatedscope.core.interface.base_data import StandaloneDataDict +from federatedscope.core.data import StandaloneDataDict def raw2loader(raw_data, config=None): diff --git a/federatedscope/gfl/dataloader/dataloader_node.py b/federatedscope/gfl/dataloader/dataloader_node.py index 89796297d..ce04ee35b 100644 --- a/federatedscope/gfl/dataloader/dataloader_node.py +++ b/federatedscope/gfl/dataloader/dataloader_node.py @@ -9,7 +9,7 @@ from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.auxiliaries.transform_builder import get_transform -from federatedscope.core.interface.base_data import StandaloneDataDict +from federatedscope.core.data import StandaloneDataDict INF = np.iinfo(np.int64).max diff --git a/federatedscope/mf/dataloader/dataloader.py b/federatedscope/mf/dataloader/dataloader.py index c8a21e5c5..062a614d3 100644 --- a/federatedscope/mf/dataloader/dataloader.py +++ b/federatedscope/mf/dataloader/dataloader.py @@ -7,7 +7,7 @@ import collections import importlib -from federatedscope.core.interface.base_data import StandaloneDataDict +from federatedscope.core.data import StandaloneDataDict MFDATA_CLASS_DICT = { "vflmovielens1m": "VFLMovieLens1M", diff --git a/federatedscope/nlp/dataloader/dataloader.py b/federatedscope/nlp/dataloader/dataloader.py index 835b897a5..eb21e1cf5 100644 --- a/federatedscope/nlp/dataloader/dataloader.py +++ b/federatedscope/nlp/dataloader/dataloader.py @@ -4,8 +4,7 @@ from federatedscope.nlp.dataset.leaf_twitter import LEAF_TWITTER from federatedscope.nlp.dataset.leaf_synthetic import LEAF_SYNTHETIC from federatedscope.core.auxiliaries.transform_builder import get_transform -from federatedscope.core.interface.base_data import ClientData, \ - StandaloneDataDict +from federatedscope.core.data import ClientData, StandaloneDataDict def load_nlp_dataset(config=None, client_cfgs=None): diff --git a/federatedscope/tabular/dataloader/quadratic.py b/federatedscope/tabular/dataloader/quadratic.py index 0c6330773..67a6686d7 100644 --- a/federatedscope/tabular/dataloader/quadratic.py +++ b/federatedscope/tabular/dataloader/quadratic.py @@ -1,8 +1,7 @@ import numpy as np from torch.utils.data import DataLoader -from federatedscope.core.interface.base_data import ClientData, \ - StandaloneDataDict +from federatedscope.core.data import ClientData, StandaloneDataDict def load_quadratic_dataset(config, client_cfgs=None): diff --git a/federatedscope/translator/data_translator.py b/federatedscope/translator/data_translator.py index 4cd15291d..8c4783ab7 100644 --- a/federatedscope/translator/data_translator.py +++ b/federatedscope/translator/data_translator.py @@ -1,6 +1,5 @@ from federatedscope.core.auxiliaries.splitter_builder import get_splitter -from federatedscope.core.interface.base_data import ClientData, \ - StandaloneDataDict +from federatedscope.core.data import ClientData, StandaloneDataDict class BaseDataTranslator(StandaloneDataDict): diff --git a/federatedscope/vertical_fl/dataloader/dataloader.py b/federatedscope/vertical_fl/dataloader/dataloader.py index c07d4b5f4..bd7460574 100644 --- a/federatedscope/vertical_fl/dataloader/dataloader.py +++ b/federatedscope/vertical_fl/dataloader/dataloader.py @@ -1,6 +1,6 @@ import numpy as np -from federatedscope.core.interface.base_data import StandaloneDataDict +from federatedscope.core.data import StandaloneDataDict def load_vertical_data(config=None, generate=False): From 0e85138a7200dc525e88e69c26c77e9d68d52fff Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Wed, 7 Sep 2022 11:28:42 +0800 Subject: [PATCH 04/39] move data translator --- federatedscope/core/auxiliaries/data_builder.py | 3 +-- federatedscope/core/data/__init__.py | 3 ++- federatedscope/{translator => core/data}/data_translator.py | 0 federatedscope/translator/__init__.py | 3 --- 4 files changed, 3 insertions(+), 6 deletions(-) rename federatedscope/{translator => core/data}/data_translator.py (100%) delete mode 100644 federatedscope/translator/__init__.py diff --git a/federatedscope/core/auxiliaries/data_builder.py b/federatedscope/core/auxiliaries/data_builder.py index fd7e89960..edb0dd1f0 100644 --- a/federatedscope/core/auxiliaries/data_builder.py +++ b/federatedscope/core/auxiliaries/data_builder.py @@ -6,8 +6,7 @@ import numpy as np from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.data import StandaloneDataDict -from federatedscope.translator import BaseDataTranslator +from federatedscope.core.data import StandaloneDataDict, BaseDataTranslator import federatedscope.register as register diff --git a/federatedscope/core/data/__init__.py b/federatedscope/core/data/__init__.py index 79d9f9d81..8b58d26d5 100644 --- a/federatedscope/core/data/__init__.py +++ b/federatedscope/core/data/__init__.py @@ -1,3 +1,4 @@ from federatedscope.core.data.base_data import StandaloneDataDict, ClientData +from federatedscope.core.data.data_translator import BaseDataTranslator -__all__ = ['StandaloneDataDict', 'ClientData'] +__all__ = ['StandaloneDataDict', 'ClientData', 'BaseDataTranslator'] diff --git a/federatedscope/translator/data_translator.py b/federatedscope/core/data/data_translator.py similarity index 100% rename from federatedscope/translator/data_translator.py rename to federatedscope/core/data/data_translator.py diff --git a/federatedscope/translator/__init__.py b/federatedscope/translator/__init__.py deleted file mode 100644 index 336603fc0..000000000 --- a/federatedscope/translator/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from federatedscope.translator.data_translator import BaseDataTranslator - -__all__ = ["BaseDataTranslator"] From 0c7cd412c0b41df3a0e0b15adc738c028efe0466 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Wed, 7 Sep 2022 11:31:15 +0800 Subject: [PATCH 05/39] rename file --- federatedscope/core/data/__init__.py | 2 +- .../core/data/{data_translator.py => base_translator.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename federatedscope/core/data/{data_translator.py => base_translator.py} (100%) diff --git a/federatedscope/core/data/__init__.py b/federatedscope/core/data/__init__.py index 8b58d26d5..c051085b6 100644 --- a/federatedscope/core/data/__init__.py +++ b/federatedscope/core/data/__init__.py @@ -1,4 +1,4 @@ from federatedscope.core.data.base_data import StandaloneDataDict, ClientData -from federatedscope.core.data.data_translator import BaseDataTranslator +from federatedscope.core.data.base_translator import BaseDataTranslator __all__ = ['StandaloneDataDict', 'ClientData', 'BaseDataTranslator'] diff --git a/federatedscope/core/data/data_translator.py b/federatedscope/core/data/base_translator.py similarity index 100% rename from federatedscope/core/data/data_translator.py rename to federatedscope/core/data/base_translator.py From 1477c613692add82054ce546e36fd24790e041a2 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Tue, 13 Sep 2022 20:12:41 +0800 Subject: [PATCH 06/39] add README for data protocal --- federatedscope/core/data/README.md | 154 ++++++++++++++++++++ federatedscope/core/data/base_translator.py | 7 +- 2 files changed, 160 insertions(+), 1 deletion(-) create mode 100644 federatedscope/core/data/README.md diff --git a/federatedscope/core/data/README.md b/federatedscope/core/data/README.md new file mode 100644 index 000000000..c451a7ffd --- /dev/null +++ b/federatedscope/core/data/README.md @@ -0,0 +1,154 @@ +# DataZoo + +FederatedScope provides a rich collection of federated datasets for researchers, including images, texts, graphs, recommendation systems, and speeches, as well as utility classes `BaseDataTranslator` for building your own FS datasets. + +## Built-in FS data + +All datasets can be accessed from [`federatedscope.core.auxiliaries.data_builder.get_data`](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/core/auxiliaries/data_builder.py), which are built to [`federatedscope.core.data.StandaloneDataDict`](https://github.com/alibaba/FederatedScope/tree/master/federatedscope/core/data/base_data.py) (for more details, see [[DataZoo advanced]](#advanced)). By setting `cfg.data.type = DATASET_NAME`, FS would download and pre-process a specific dataset to be passed to `FedRunner`. For example: + +```python +# Source: federatedscope/main.py + +data, cfg = get_data(cfg) +runner = FedRunner(data=data, + server_class=get_server_cls(cfg), + client_class=get_client_cls(cfg), + config=cfg.clone()) +``` + +We provide a **look-up table** for you to get started with our DataZoo: + +| `cfg.data.type` | Domain | +| ------------------------------------------------------------ | ------------------- | +| [FEMNIST](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/cv/dataset/leaf_cv.py) | CV | +| [Celeba](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/cv/dataset/leaf_cv.py) | CV | +| [{DNAME}@torchvision](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/core/auxiliaries/data_builder.py) | CV | +| [Shakespeare](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/nlp/dataset/leaf_nlp.py) | NLP | +| [SubReddit](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/nlp/dataset/leaf_nlp.py) | NLP | +| [Twitter (Sentiment140)](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/nlp/dataset/leaf_twitter.py) | NLP | +| [{DNAME}@torchtext](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/core/auxiliaries/data_builder.py) | NLP | +| [{DNAME}@huggingface_datasets](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/core/auxiliaries/data_builder.py) | NLP | +| [Cora](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_node.py) | Graph (node-level) | +| [CiteSeer](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_node.py) | Graph (node-level) | +| [PubMed](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_node.py) | Graph (node-level) | +| [DBLP_conf](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataset/dblp_new.py) | Graph (node-level) | +| [DBLP_org](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataset/dblp_new.py) | Graph (node-level) | +| [csbm](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataset/cSBM_dataset.py) | Graph (node-level) | +| [Epinions](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataset/recsys.py) | Graph (link-level) | +| [Ciao](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataset/recsys.py) | Graph (link-level) | +| [FB15k](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_link.py) | Graph (link-level) | +| [FB15k-237](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_link.py) | Graph (link-level) | +| [WN18](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_link.py) | Graph (link-level) | +| [MUTAG](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [BZR](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [COX2](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [DHFR](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [PTC_MR](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [AIDS](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [NCI1](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [ENZYMES](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [DD](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [PROTEINS](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [COLLAB](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [IMDB-BINARY](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [IMDB-MULTI](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [REDDIT-BINARY](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [HIV](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [ESOL](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [FREESOLV](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [LIPO](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [PCBA](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [MUV](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [BACE](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [BBBP](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [TOX21](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [TOXCAST](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [SIDER](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [CLINTOX](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [graph_multi_domain_mol](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [graph_multi_domain_small](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [graph_multi_domain_biochem](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataloader/dataloader_graph.py) | Graph (graph-level) | +| [cikmcup](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/gfl/dataset/cikm_cup.py) | Graph (graph-level) | +| [toy](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/core/auxiliaries/data_builder.py) | Tabular | +| [synthetic](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/nlp/dataset/leaf_synthetic.py) | Tabular | +| [quadratic](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/tabular/dataloader/quadratic.py) | Tabular | +| [{DNAME}openml](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/core/auxiliaries/data_builder.py) | Tabular | +| [vertical_fl_data](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/vertical_fl/dataloader/dataloader.py) | Tabular(vertical) | +| [VFLMovieLens1M](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/mf/dataset/movielens.py) | Recommendation | +| [VFLMovieLens10M](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/mf/dataset/movielens.py) | Recommendation | +| [HFLMovieLens1M](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/mf/dataset/movielens.py) | Recommendation | +| [HFLMovieLens10M](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/mf/dataset/movielens.py) | Recommendation | +| [VFLNetflix](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/mf/dataset/netflix.py) | Recommendation | +| [HFLNetflix](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/mf/dataset/netflix.py) | Recommendation | + +## DataZoo Advanced + +In this section, we will introduce key concepts and tools to help you understand how FS data works and how to use it to build your own data in FS. + +Concepts: + +* [`federatedscope.core.data.ClientData`](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/core/data/base_data.py) + + * `ClientData` is a subclass of `dict`. In federated learning, each client (server) owns a `ClientData` for training, validating, or testing. Thus, each `ClientData` has one or more of `train`, `val`, and `test` as keys, and `DataLoader` accordingly. + + * The `DataLoader` of each key is created by `setup()` method, which specifies the arguments of `DataLoader`, such as `batch_size`, `shuffle` of `cfg`. + + Example: + + ```python + # Instantiate client_data for each Client + client_data = ClientData(DataLoader, + cfg, + train=train_data, + val=None, + test=test_data) + # other_cfg with different batch size + client_data.setup(other_cfg) + print(client_data) + + >> {'train': DataLoader(train_data), 'test': DataLoader(test_data)} + ``` + +* [`federatedscope.core.data.StandaloneDataDict`](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/core/data/base_data.py) + * `StandaloneDataDict` is a subclass of `dict`. As the name implies, `StandaloneDataDict` consists of all `ClientData` with client index as key (`0`, `1`, `2`, ...) in standalone mode. The key `0` is the data of the server for global evaluation or other usages. + * The method `preprocess()` in `StandaloneDataDict` makes changes to inner `ClientData` when `cfg` changes, such as in global mode, we set `cfg.federate.method == "global"`, and `StandaloneDataDict` will merge all `ClientData` to one client to perform global training. + +Tools + +* [`federatedscope.core.data.BaseDataTranslator`](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/core/data/base_translator.py) + + * `BaseDataTranslator` is a subclass of `StandaloneDataDict`, which convert [`torch.utils.data.Dataset`](https://pytorch.org/docs/stable/data.html#torch.utils.data.Dataset) or `dict` of data split to `StandaloneDataDict` according to `cfg`. After translating, it can be directly passed to `FedRunner` to launch a FL course. + + * `BaseDataTranslator` will split data to `train`, `val,` and `test` by `cfg.data.splits`. And using `Splitter` to split each data split to each client. In order to use `BaseDataTranslator`, `cfg.data.splitter`, `cfg.federate.client_num,` and other arguments of `Splitter` must be specified. + + Example: + + ```python + cfg.data.splitter = 'lda' + cfg.federate.client_num = 5 + cfg.data.splitter_args = [{'alpha': 0.2}] + + raw_data = CIFAR10() + fs_data = BaseDataTranslator(raw_data, global_cfg, DataLoader) + + runner = FedRunner(data=fs_data, + server_class=get_server_cls(cfg), + client_class=get_client_cls(cfg), + config=cfg.clone()) + ``` + +* [`federatedscope.core.splitters`](federatedscope.core.splitters) + + * To generate simulated federation datasets, we provide `splitter` who are responsible for dispersing a given standalone dataset into multiple clients, with configurable statistical heterogeneity among them. + + We provide a **look-up table** for you to get started with our `Splitter`: + + | `cfg.data.splitter` | Domain | Arguments | + | :------------------ | ------------------- | :----------------------------------------------- | + | LDA | Generic | `alpha` | + | Louvain | Graph (node-level) | `delta` | + | Random | Graph (node-level) | `sampling_rate`, `overlapping_rate`, `drop_edge` | + | rel_type | Graph (link-level) | `alpha` | + | Scaffold | Molecular | - | + | Scaffold_lda | Molecular | `alpha` | + | Rand_chunk | Graph (graph-level) | - | diff --git a/federatedscope/core/data/base_translator.py b/federatedscope/core/data/base_translator.py index 8c4783ab7..cc992ba9c 100644 --- a/federatedscope/core/data/base_translator.py +++ b/federatedscope/core/data/base_translator.py @@ -8,7 +8,8 @@ def __init__(self, dataset, global_cfg, loader, client_cfgs=None): Args: dataset: `torch.utils.data.Dataset`, `List` of (feature, label) - or split dataset tuple of (train, val, test) + or split dataset tuple of (train, val, test) or Tuple of + split dataset with [train, val, test] global_cfg: global CfgNode loader: `torch.utils.data.DataLoader` or subclass of it client_cfgs: client cfg `Dict` @@ -33,6 +34,10 @@ def split_train_val_test(self): """ dataset, splits = self.dataset, self.global_cfg.data.splits if isinstance(dataset, tuple): + # No need to split train/val/test for tuple dataset. + error_msg = 'If dataset is tuple, it must contains ' \ + 'train, valid and test split.' + assert len(dataset) == len(['train', 'val', 'test']), error_msg return [dataset[0], dataset[1], dataset[2]] from torch.utils.data.dataset import random_split From d9b1dc07e9c168a966a5ee799a6f7b1be2041a94 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Wed, 14 Sep 2022 15:05:49 +0800 Subject: [PATCH 07/39] update interface of data translator --- federatedscope/core/data/README.md | 9 ++++---- federatedscope/core/data/base_translator.py | 23 +++++++++++++-------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/federatedscope/core/data/README.md b/federatedscope/core/data/README.md index c451a7ffd..e63e062df 100644 --- a/federatedscope/core/data/README.md +++ b/federatedscope/core/data/README.md @@ -117,9 +117,9 @@ Tools * [`federatedscope.core.data.BaseDataTranslator`](https://github.com/alibaba/FederatedScope/blob/master/federatedscope/core/data/base_translator.py) - * `BaseDataTranslator` is a subclass of `StandaloneDataDict`, which convert [`torch.utils.data.Dataset`](https://pytorch.org/docs/stable/data.html#torch.utils.data.Dataset) or `dict` of data split to `StandaloneDataDict` according to `cfg`. After translating, it can be directly passed to `FedRunner` to launch a FL course. + * `BaseDataTranslator` converts [`torch.utils.data.Dataset`](https://pytorch.org/docs/stable/data.html#torch.utils.data.Dataset) or `dict` of data split to `StandaloneDataDict` according to `cfg`. After translating, it can be directly passed to `FedRunner` to launch a FL course. - * `BaseDataTranslator` will split data to `train`, `val,` and `test` by `cfg.data.splits`. And using `Splitter` to split each data split to each client. In order to use `BaseDataTranslator`, `cfg.data.splitter`, `cfg.federate.client_num,` and other arguments of `Splitter` must be specified. + * `BaseDataTranslator` will split data to `train`, `val,` and `test` by `cfg.data.splits` (**ML split**). And using `Splitter` to split each data split to each client (**FL split**). In order to use `BaseDataTranslator`, `cfg.data.splitter`, `cfg.federate.client_num,` and other arguments of `Splitter` must be specified. Example: @@ -128,8 +128,9 @@ Tools cfg.federate.client_num = 5 cfg.data.splitter_args = [{'alpha': 0.2}] + translator = BaseDataTranslator(global_cfg, DataLoader) raw_data = CIFAR10() - fs_data = BaseDataTranslator(raw_data, global_cfg, DataLoader) + fs_data = translator(raw_data) runner = FedRunner(data=fs_data, server_class=get_server_cls(cfg), @@ -151,4 +152,4 @@ Tools | rel_type | Graph (link-level) | `alpha` | | Scaffold | Molecular | - | | Scaffold_lda | Molecular | `alpha` | - | Rand_chunk | Graph (graph-level) | - | + | Rand_chunk | Graph (graph-level) | - | \ No newline at end of file diff --git a/federatedscope/core/data/base_translator.py b/federatedscope/core/data/base_translator.py index cc992ba9c..a48cd6202 100644 --- a/federatedscope/core/data/base_translator.py +++ b/federatedscope/core/data/base_translator.py @@ -2,29 +2,34 @@ from federatedscope.core.data import ClientData, StandaloneDataDict -class BaseDataTranslator(StandaloneDataDict): - def __init__(self, dataset, global_cfg, loader, client_cfgs=None): +class BaseDataTranslator: + def __init__(self, global_cfg, loader, client_cfgs=None): """ + Convert data to `StandaloneDataDict`. Args: - dataset: `torch.utils.data.Dataset`, `List` of (feature, label) - or split dataset tuple of (train, val, test) or Tuple of - split dataset with [train, val, test] global_cfg: global CfgNode loader: `torch.utils.data.DataLoader` or subclass of it client_cfgs: client cfg `Dict` """ - self.dataset = dataset self.loader = loader self.global_cfg = global_cfg.clone() self.client_cfgs = client_cfgs self.splitter = get_splitter(global_cfg) + def __call__(self, dataset): + """ + + Args: + dataset: `torch.utils.data.Dataset`, `List` of (feature, label) + or split dataset tuple of (train, val, test) or Tuple of + split dataset with [train, val, test] + """ train, val, test = self.split_train_val_test() datadict = self.split_to_client(train, val, test) - super(BaseDataTranslator, self).__init__(datadict, global_cfg) + return StandaloneDataDict(datadict, self.global_cfg) - def split_train_val_test(self): + def split_train_val_test(self, dataset): """ Split dataset to train, val, test if not provided. @@ -32,7 +37,7 @@ def split_train_val_test(self): split_data (List): List of split dataset, [train, val, test] """ - dataset, splits = self.dataset, self.global_cfg.data.splits + splits = self.global_cfg.data.splits if isinstance(dataset, tuple): # No need to split train/val/test for tuple dataset. error_msg = 'If dataset is tuple, it must contains ' \ From 2dd4bca5812f864903a20666e6a1fec5a3a55e88 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Wed, 14 Sep 2022 15:41:37 +0800 Subject: [PATCH 08/39] move toy to tabular folder --- .../core/auxiliaries/data_builder.py | 118 +----------------- federatedscope/core/data/__init__.py | 8 +- federatedscope/core/data/base_translator.py | 10 ++ federatedscope/tabular/dataloader/toy.py | 118 ++++++++++++++++++ 4 files changed, 138 insertions(+), 116 deletions(-) create mode 100644 federatedscope/tabular/dataloader/toy.py diff --git a/federatedscope/core/auxiliaries/data_builder.py b/federatedscope/core/auxiliaries/data_builder.py index edb0dd1f0..b0781a047 100644 --- a/federatedscope/core/auxiliaries/data_builder.py +++ b/federatedscope/core/auxiliaries/data_builder.py @@ -1,12 +1,11 @@ import os -import pickle import logging from random import shuffle import numpy as np from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.data import StandaloneDataDict, BaseDataTranslator +from federatedscope.core.data import BaseDataTranslator import federatedscope.register as register @@ -19,118 +18,8 @@ f'{error} in `federatedscope.contrib.data`, some modules are not ' f'available.') - -def load_toy_data(config=None, client_cfgs=None): - generate = config.federate.mode.lower() == 'standalone' - - def _generate_data(client_num=5, - instance_num=1000, - feature_num=5, - save_data=False): - """ - Generate data in FedRunner format - Args: - client_num: - instance_num: - feature_num: - save_data: - - Returns: - { - '{client_id}': { - 'train': { - 'x': ..., - 'y': ... - }, - 'test': { - 'x': ..., - 'y': ... - }, - 'val': { - 'x': ..., - 'y': ... - } - } - } - - """ - weights = np.random.normal(loc=0.0, scale=1.0, size=feature_num) - bias = np.random.normal(loc=0.0, scale=1.0) - data = dict() - for each_client in range(1, client_num + 1): - data[each_client] = dict() - client_x = np.random.normal(loc=0.0, - scale=0.5 * each_client, - size=(instance_num, feature_num)) - client_y = np.sum(client_x * weights, axis=-1) + bias - client_y = np.expand_dims(client_y, -1) - client_data = {'x': client_x, 'y': client_y} - data[each_client]['train'] = client_data - - # test data - test_x = np.random.normal(loc=0.0, - scale=1.0, - size=(instance_num, feature_num)) - test_y = np.sum(test_x * weights, axis=-1) + bias - test_y = np.expand_dims(test_y, -1) - test_data = {'x': test_x, 'y': test_y} - for each_client in range(1, client_num + 1): - data[each_client]['test'] = test_data - - # val data - val_x = np.random.normal(loc=0.0, - scale=1.0, - size=(instance_num, feature_num)) - val_y = np.sum(val_x * weights, axis=-1) + bias - val_y = np.expand_dims(val_y, -1) - val_data = {'x': val_x, 'y': val_y} - for each_client in range(1, client_num + 1): - data[each_client]['val'] = val_data - - # server_data - data[0] = dict() - data[0]['train'] = None - data[0]['val'] = val_data - data[0]['test'] = test_data - - if save_data: - # server_data = dict() - save_client_data = dict() - - for client_idx in range(0, client_num + 1): - if client_idx == 0: - filename = 'data/server_data' - else: - filename = 'data/client_{:d}_data'.format(client_idx) - with open(filename, 'wb') as f: - save_client_data['train'] = { - k: v.tolist() - for k, v in data[client_idx]['train'].items() - } - save_client_data['val'] = { - k: v.tolist() - for k, v in data[client_idx]['val'].items() - } - save_client_data['test'] = { - k: v.tolist() - for k, v in data[client_idx]['test'].items() - } - pickle.dump(save_client_data, f) - - return data - - if generate: - data = _generate_data(client_num=config.federate.client_num, - save_data=config.data.save_data) - else: - with open(config.distribute.data_file, 'rb') as f: - data = pickle.load(f) - for key in data.keys(): - data[key] = {k: np.asarray(v) - for k, v in data[key].items() - } if data[key] is not None else None - - return StandaloneDataDict(data, config), config +# TODO: Add more translator +DATATYPE_TO_TRANSLATOR = {} def load_external_data(config=None, client_cfgs=None): @@ -588,6 +477,7 @@ def get_data(config, client_cfgs=None): if data_and_config is not None: return data_and_config if config.data.type.lower() == 'toy': + from federatedscope.tabular.dataloader.toy import load_toy_data data, modified_config = load_toy_data(config, client_cfgs) elif config.data.type.lower() == 'quadratic': from federatedscope.tabular.dataloader import load_quadratic_dataset diff --git a/federatedscope/core/data/__init__.py b/federatedscope/core/data/__init__.py index c051085b6..acaeb2598 100644 --- a/federatedscope/core/data/__init__.py +++ b/federatedscope/core/data/__init__.py @@ -1,4 +1,8 @@ from federatedscope.core.data.base_data import StandaloneDataDict, ClientData -from federatedscope.core.data.base_translator import BaseDataTranslator +from federatedscope.core.data.base_translator import BaseDataTranslator, \ + DummyDataTranslator -__all__ = ['StandaloneDataDict', 'ClientData', 'BaseDataTranslator'] +__all__ = [ + 'StandaloneDataDict', 'ClientData', 'BaseDataTranslator', + 'DummyDataTranslator' +] diff --git a/federatedscope/core/data/base_translator.py b/federatedscope/core/data/base_translator.py index a48cd6202..8d1cf2e0e 100644 --- a/federatedscope/core/data/base_translator.py +++ b/federatedscope/core/data/base_translator.py @@ -90,3 +90,13 @@ def split_to_client(self, train, val, test): val=split_val[client_id - 1], test=split_test[client_id - 1]) return datadict + + +class DummyDataTranslator(BaseDataTranslator): + """ + Translator split data_dict to `StandaloneDataDict`. + """ + def __call__(self, datadict): + if not isinstance(datadict, StandaloneDataDict): + datadict = StandaloneDataDict(datadict, self.global_cfg) + return datadict diff --git a/federatedscope/tabular/dataloader/toy.py b/federatedscope/tabular/dataloader/toy.py new file mode 100644 index 000000000..b8fb4ad9a --- /dev/null +++ b/federatedscope/tabular/dataloader/toy.py @@ -0,0 +1,118 @@ +import pickle + +import numpy as np + +from federatedscope.core.data import StandaloneDataDict + + +def load_toy_data(config=None, client_cfgs=None): + generate = config.federate.mode.lower() == 'standalone' + + def _generate_data(client_num=5, + instance_num=1000, + feature_num=5, + save_data=False): + """ + Generate data in FedRunner format + Args: + client_num: + instance_num: + feature_num: + save_data: + + Returns: + { + '{client_id}': { + 'train': { + 'x': ..., + 'y': ... + }, + 'test': { + 'x': ..., + 'y': ... + }, + 'val': { + 'x': ..., + 'y': ... + } + } + } + + """ + weights = np.random.normal(loc=0.0, scale=1.0, size=feature_num) + bias = np.random.normal(loc=0.0, scale=1.0) + data = dict() + for each_client in range(1, client_num + 1): + data[each_client] = dict() + client_x = np.random.normal(loc=0.0, + scale=0.5 * each_client, + size=(instance_num, feature_num)) + client_y = np.sum(client_x * weights, axis=-1) + bias + client_y = np.expand_dims(client_y, -1) + client_data = {'x': client_x, 'y': client_y} + data[each_client]['train'] = client_data + + # test data + test_x = np.random.normal(loc=0.0, + scale=1.0, + size=(instance_num, feature_num)) + test_y = np.sum(test_x * weights, axis=-1) + bias + test_y = np.expand_dims(test_y, -1) + test_data = {'x': test_x, 'y': test_y} + for each_client in range(1, client_num + 1): + data[each_client]['test'] = test_data + + # val data + val_x = np.random.normal(loc=0.0, + scale=1.0, + size=(instance_num, feature_num)) + val_y = np.sum(val_x * weights, axis=-1) + bias + val_y = np.expand_dims(val_y, -1) + val_data = {'x': val_x, 'y': val_y} + for each_client in range(1, client_num + 1): + data[each_client]['val'] = val_data + + # server_data + data[0] = dict() + data[0]['train'] = None + data[0]['val'] = val_data + data[0]['test'] = test_data + + if save_data: + # server_data = dict() + save_client_data = dict() + + for client_idx in range(0, client_num + 1): + if client_idx == 0: + filename = 'data/server_data' + else: + filename = 'data/client_{:d}_data'.format(client_idx) + with open(filename, 'wb') as f: + save_client_data['train'] = { + k: v.tolist() + for k, v in data[client_idx]['train'].items() + } + save_client_data['val'] = { + k: v.tolist() + for k, v in data[client_idx]['val'].items() + } + save_client_data['test'] = { + k: v.tolist() + for k, v in data[client_idx]['test'].items() + } + pickle.dump(save_client_data, f) + + return data + + if generate: + data = _generate_data(client_num=config.federate.client_num, + save_data=config.data.save_data) + else: + with open(config.distribute.data_file, 'rb') as f: + data = pickle.load(f) + for key in data.keys(): + data[key] = {k: np.asarray(v) + for k, v in data[key].items() + } if data[key] is not None else None + + return StandaloneDataDict(data, config), config From c7ed56628afed8d550464a09e7b9f7619111e871 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Thu, 15 Sep 2022 10:44:09 +0800 Subject: [PATCH 09/39] WIP --- federatedscope/core/auxiliaries/data_builder.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/federatedscope/core/auxiliaries/data_builder.py b/federatedscope/core/auxiliaries/data_builder.py index b0781a047..de6ca0229 100644 --- a/federatedscope/core/auxiliaries/data_builder.py +++ b/federatedscope/core/auxiliaries/data_builder.py @@ -454,8 +454,9 @@ def load_openml_data(tid, splits=None, config=None): dataset = (dataset.get('train'), dataset.get('val'), dataset.get('test')) # Translate dataset to `StandaloneDataDict` - datadict = BaseDataTranslator(dataset, modified_config, DataLoader, - client_cfgs) + data_translator = BaseDataTranslator(modified_config, DataLoader, + client_cfgs) + datadict = data_translator(dataset) return datadict, modified_config From bb2f044e1cb3ca176b29bece6e9010447bab03c1 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Thu, 15 Sep 2022 18:00:00 +0800 Subject: [PATCH 10/39] [WIP] refactor clientdata, TODO: update yaml, apply translator --- .../attack/auxiliary/poisoning_data.py | 21 +- .../core/auxiliaries/data_builder.py | 581 ++--------------- .../core/auxiliaries/dataloader_builder.py | 43 +- federatedscope/core/auxiliaries/utils.py | 54 -- federatedscope/core/configs/README.md | 70 +-- federatedscope/core/configs/cfg_data.py | 32 +- .../core/configs/cfg_differential_privacy.py | 5 +- federatedscope/core/data/__init__.py | 4 +- federatedscope/core/data/base_data.py | 71 ++- federatedscope/core/data/base_translator.py | 48 +- federatedscope/core/data/dummy_translator.py | 27 + federatedscope/core/data/utils.py | 589 ++++++++++++++++++ federatedscope/core/fed_runner.py | 3 +- federatedscope/core/trainers/context.py | 9 +- federatedscope/core/trainers/torch_trainer.py | 2 +- federatedscope/core/trainers/trainer_Ditto.py | 4 +- federatedscope/core/workers/client.py | 2 +- .../gfl/dataloader/dataloader_graph.py | 2 +- .../gfl/dataloader/dataloader_link.py | 17 +- .../gfl/dataloader/dataloader_node.py | 36 +- federatedscope/gfl/dataset/cikm_cup.py | 18 +- federatedscope/gfl/fedsageplus/worker.py | 18 +- federatedscope/gfl/trainer/linktrainer.py | 17 +- federatedscope/gfl/trainer/nodetrainer.py | 2 +- federatedscope/mf/dataloader/dataloader.py | 14 +- federatedscope/mf/trainer/trainer_sgdmf.py | 4 +- federatedscope/tabular/dataloader/toy.py | 12 +- .../vertical_fl/dataloader/dataloader.py | 16 +- .../vertical_fl/worker/vertical_client.py | 2 +- scripts/example_configs/femnist/avg/ss.yaml | 2 +- .../femnist/hpo_ss_fedex_arm.yaml | 2 +- .../femnist/hpo_ss_fedex_grid.yaml | 2 +- .../example_configs/femnist/hpo_ss_sha.yaml | 2 +- .../run_movielens1m_hfl_standalone.sh | 2 +- .../run_movielens1m_hflsgdmf_standalone.sh | 2 +- .../run_movielens1m_vfl_standalone.sh | 2 +- .../run_movielens1m_vflsgdmf_standalone.sh | 2 +- .../run_femnist_ditto.sh | 2 +- .../run_femnist_fedavg.sh | 2 +- .../run_femnist_fedbn.sh | 2 +- .../run_femnist_fedem.sh | 2 +- .../run_femnist_pfedme.sh | 2 +- .../run_shakespeare_ditto.sh | 2 +- .../run_shakespeare_fedavg.sh | 2 +- .../run_shakespeare_fedem.sh | 2 +- .../run_shakespeare_pfedme.sh | 2 +- .../run_synthetic_ditto.sh | 2 +- .../run_synthetic_fedavg.sh | 2 +- .../run_synthetic_fedem.sh | 2 +- .../run_synthetic_pfedme.sh | 2 +- tests/test_CRA_gan_attack.py | 2 +- tests/test_MIA_gradient_ascent.py | 2 +- tests/test_asyn_cifar10.py | 12 +- tests/test_backdoor_attack.py | 2 +- tests/test_ditto.py | 2 +- tests/test_external_dataset.py | 4 +- tests/test_fedem.py | 2 +- tests/test_fedopt.py | 2 +- tests/test_fedprox.py | 2 +- tests/test_fedsageplus.py | 2 +- tests/test_femnist.py | 2 +- tests/test_graph_node_trainer.py | 2 +- tests/test_mf.py | 2 +- tests/test_nbafl.py | 2 +- tests/test_optimizer.py | 2 +- tests/test_pfedme.py | 2 +- tests/test_rec_IG_opt_attack.py | 2 +- tests/test_rec_opt_attack.py | 2 +- 68 files changed, 981 insertions(+), 832 deletions(-) create mode 100644 federatedscope/core/data/dummy_translator.py create mode 100644 federatedscope/core/data/utils.py diff --git a/federatedscope/attack/auxiliary/poisoning_data.py b/federatedscope/attack/auxiliary/poisoning_data.py index ca3949ae5..0d0a9581e 100644 --- a/federatedscope/attack/auxiliary/poisoning_data.py +++ b/federatedscope/attack/auxiliary/poisoning_data.py @@ -50,9 +50,9 @@ def load_poisoned_dataset_edgeset(data, ctx, mode): poison_testset.append((transforms_funcs(sample), label)) data['poison_' + mode] = DataLoader( poison_testset, - batch_size=ctx.data.batch_size, + batch_size=ctx.dataloader.batch_size, shuffle=False, - num_workers=ctx.data.num_workers) + num_workers=ctx.dataloader.num_workers) elif "CIFAR10" in ctx.data.type: target_label = int(ctx.attack.target_label_ind) @@ -91,9 +91,9 @@ def load_poisoned_dataset_edgeset(data, ctx, mode): poison_testset.append((transforms_funcs(sample), label)) data['poison_' + mode] = DataLoader( poison_testset, - batch_size=ctx.data.batch_size, + batch_size=ctx.dataloader.batch_size, shuffle=False, - num_workers=ctx.data.num_workers) + num_workers=ctx.dataloader.num_workers) else: raise RuntimeError( @@ -213,9 +213,9 @@ def load_poisoned_dataset_pixel(data, ctx, mode): poisoned_dataset[iii] = (transforms_funcs(sample), label) data[mode] = DataLoader(poisoned_dataset, - batch_size=ctx.data.batch_size, + batch_size=ctx.dataloader.batch_size, shuffle=True, - num_workers=ctx.data.num_workers) + num_workers=ctx.dataloader.num_workers) if mode == MODE.TEST or mode == MODE.VAL: poisoned_dataset = addTrigger(data[mode].dataset, @@ -234,10 +234,11 @@ def load_poisoned_dataset_pixel(data, ctx, mode): # (channel, height, width) = sample.shape #(c,h,w) poisoned_dataset[iii] = (transforms_funcs(sample), label) - data['poison_' + mode] = DataLoader(poisoned_dataset, - batch_size=ctx.data.batch_size, - shuffle=False, - num_workers=ctx.data.num_workers) + data['poison_' + mode] = DataLoader( + poisoned_dataset, + batch_size=ctx.dataloader.batch_size, + shuffle=False, + num_workers=ctx.dataloader.num_workers) return data diff --git a/federatedscope/core/auxiliaries/data_builder.py b/federatedscope/core/auxiliaries/data_builder.py index de6ca0229..23f8b43b7 100644 --- a/federatedscope/core/auxiliaries/data_builder.py +++ b/federatedscope/core/auxiliaries/data_builder.py @@ -1,11 +1,9 @@ -import os import logging -from random import shuffle - -import numpy as np +from importlib import import_module +from federatedscope.core.data.utils import RegexInverseMap, load_dataset, \ + convert_data_mode from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.data import BaseDataTranslator import federatedscope.register as register @@ -18,564 +16,55 @@ f'{error} in `federatedscope.contrib.data`, some modules are not ' f'available.') -# TODO: Add more translator -DATATYPE_TO_TRANSLATOR = {} - - -def load_external_data(config=None, client_cfgs=None): - r""" Based on the configuration file, this function imports external - datasets and applies train/valid/test splits and split by some specific - `splitter` into the standard FederatedScope input data format. - - Args: - config: `CN` from `federatedscope/core/configs/config.py` - - Returns: - data_local_dict: dict of split dataloader. - Format: - { - 'client_id': { - 'train': DataLoader(), - 'test': DataLoader(), - 'val': DataLoader() - } - } - modified_config: `CN` from `federatedscope/core/configs/config.py`, - which might be modified in the function. - - """ - - import torch - import inspect - from importlib import import_module - from torch.utils.data import DataLoader - from federatedscope.core.auxiliaries.transform_builder import get_transform - - def get_func_args(func): - sign = inspect.signature(func).parameters.values() - sign = set([val.name for val in sign]) - return sign - - def filter_dict(func, kwarg): - sign = get_func_args(func) - common_args = sign.intersection(kwarg.keys()) - filtered_dict = {key: kwarg[key] for key in common_args} - return filtered_dict - - def load_torchvision_data(name, splits=None, config=None): - dataset_func = getattr(import_module('torchvision.datasets'), name) - transform_funcs = get_transform(config, 'torchvision') - if config.data.args: - raw_args = config.data.args[0] - else: - raw_args = {} - if 'download' not in raw_args.keys(): - raw_args.update({'download': True}) - filtered_args = filter_dict(dataset_func.__init__, raw_args) - func_args = get_func_args(dataset_func.__init__) - - # Perform split on different dataset - if 'train' in func_args: - # Split train to (train, val) - dataset_train = dataset_func(root=config.data.root, - train=True, - **filtered_args, - **transform_funcs) - dataset_val = None - dataset_test = dataset_func(root=config.data.root, - train=False, - **filtered_args, - **transform_funcs) - if splits: - train_size = int(splits[0] * len(dataset_train)) - val_size = len(dataset_train) - train_size - lengths = [train_size, val_size] - dataset_train, dataset_val = \ - torch.utils.data.dataset.random_split(dataset_train, - lengths) - - elif 'split' in func_args: - # Use raw split - dataset_train = dataset_func(root=config.data.root, - split='train', - **filtered_args, - **transform_funcs) - dataset_val = dataset_func(root=config.data.root, - split='valid', - **filtered_args, - **transform_funcs) - dataset_test = dataset_func(root=config.data.root, - split='test', - **filtered_args, - **transform_funcs) - elif 'classes' in func_args: - # Use raw split - dataset_train = dataset_func(root=config.data.root, - classes='train', - **filtered_args, - **transform_funcs) - dataset_val = dataset_func(root=config.data.root, - classes='valid', - **filtered_args, - **transform_funcs) - dataset_test = dataset_func(root=config.data.root, - classes='test', - **filtered_args, - **transform_funcs) - else: - # Use config.data.splits - dataset = dataset_func(root=config.data.root, - **filtered_args, - **transform_funcs) - train_size = int(splits[0] * len(dataset)) - val_size = int(splits[1] * len(dataset)) - test_size = len(dataset) - train_size - val_size - lengths = [train_size, val_size, test_size] - dataset_train, dataset_val, dataset_test = \ - torch.utils.data.dataset.random_split(dataset, lengths) - - data_dict = { - 'train': dataset_train, - 'val': dataset_val, - 'test': dataset_test - } - - return data_dict - - def load_torchtext_data(name, splits=None, config=None): - from torch.nn.utils.rnn import pad_sequence - from federatedscope.nlp.dataset.utils import label_to_index - - dataset_func = getattr(import_module('torchtext.datasets'), name) - if config.data.args: - raw_args = config.data.args[0] - else: - raw_args = {} - assert 'max_len' in raw_args, "Miss key 'max_len' in " \ - "`config.data.args`." - filtered_args = filter_dict(dataset_func.__init__, raw_args) - dataset = dataset_func(root=config.data.root, **filtered_args) - - # torchtext.transforms requires >= 0.12.0 and torch = 1.11.0, - # so we do not use `get_transform` in torchtext. - - # Merge all data and tokenize - x_list = [] - y_list = [] - for data_iter in dataset: - data, targets = [], [] - for i, item in enumerate(data_iter): - data.append(item[1]) - targets.append(item[0]) - x_list.append(data) - y_list.append(targets) - - x_all, y_all = [], [] - for i in range(len(x_list)): - x_all += x_list[i] - y_all += y_list[i] - - if config.model.type.endswith('transformers'): - from transformers import AutoTokenizer - cache_path = os.path.join(os.getcwd(), "huggingface") - try: - tokenizer = AutoTokenizer.from_pretrained( - config.model.type.split('@')[0], - local_files_only=True, - cache_dir=cache_path) - except Exception as e: - logging.error(f"When loading cached file form " - f"{cache_path}, we faced the exception: \n " - f"{str(e)}") - - x_all = tokenizer(x_all, - return_tensors='pt', - padding=True, - truncation=True, - max_length=raw_args['max_len']) - data = [{key: value[i] - for key, value in x_all.items()} - for i in range(len(next(iter(x_all.values()))))] - if 'classification' in config.model.task.lower(): - targets = label_to_index(y_all) - else: - y_all = tokenizer(y_all, - return_tensors='pt', - padding=True, - truncation=True, - max_length=raw_args['max_len']) - targets = [{key: value[i] - for key, value in y_all.items()} - for i in range(len(next(iter(y_all.values()))))] - else: - from torchtext.data import get_tokenizer - tokenizer = get_tokenizer("basic_english") - if len(config.data.transform) == 0: - raise ValueError( - "`transform` must be one pretrained Word Embeddings from \ - ['GloVe', 'FastText', 'CharNGram']") - if len(config.data.transform) == 1: - config.data.transform.append({}) - vocab = getattr(import_module('torchtext.vocab'), - config.data.transform[0])( - dim=config.model.in_channels, - **config.data.transform[1]) - - if 'classification' in config.model.task.lower(): - data = [ - vocab.get_vecs_by_tokens(tokenizer(x), - lower_case_backup=True) - for x in x_all - ] - targets = label_to_index(y_all) - else: - data = [ - vocab.get_vecs_by_tokens(tokenizer(x), - lower_case_backup=True) - for x in x_all - ] - targets = [ - vocab.get_vecs_by_tokens(tokenizer(y), - lower_case_backup=True) - for y in y_all - ] - targets = pad_sequence(targets).transpose( - 0, 1)[:, :raw_args['max_len'], :] - data = pad_sequence(data).transpose(0, - 1)[:, :raw_args['max_len'], :] - # Split data to raw - num_items = [len(ds) for ds in x_list] - data_list, cnt = [], 0 - for num in num_items: - data_list.append([ - (x, y) - for x, y in zip(data[cnt:cnt + num], targets[cnt:cnt + num]) - ]) - cnt += num - - if len(data_list) == 3: - # Use raw splits - data_dict = { - 'train': data_list[0], - 'val': data_list[1], - 'test': data_list[2] - } - elif len(data_list) == 2: - # Split train to (train, val) - data_dict = { - 'train': data_list[0], - 'val': None, - 'test': data_list[1] - } - if splits: - train_size = int(splits[0] * len(data_dict['train'])) - val_size = len(data_dict['train']) - train_size - lengths = [train_size, val_size] - data_dict['train'], data_dict[ - 'val'] = torch.utils.data.dataset.random_split( - data_dict['train'], lengths) - else: - # Use config.data.splits - data_dict = {} - train_size = int(splits[0] * len(data_list[0])) - val_size = int(splits[1] * len(data_list[0])) - test_size = len(data_list[0]) - train_size - val_size - lengths = [train_size, val_size, test_size] - data_dict['train'], data_dict['val'], data_dict[ - 'test'] = torch.utils.data.dataset.random_split( - data_list[0], lengths) - - return data_dict - - def load_torchaudio_data(name, splits=None, config=None): - - # dataset_func = getattr(import_module('torchaudio.datasets'), name) - raise NotImplementedError - - def load_torch_geometric_data(name, splits=None, config=None): - - # dataset_func = getattr(import_module('torch_geometric.datasets'), - # name) - raise NotImplementedError - - def load_huggingface_datasets_data(name, splits=None, config=None): - from datasets import load_dataset, load_from_disk - - if config.data.args: - raw_args = config.data.args[0] - else: - raw_args = {} - assert 'max_len' in raw_args, "Miss key 'max_len' in " \ - "`config.data.args`." - filtered_args = filter_dict(load_dataset, raw_args) - logger.info("Begin to load huggingface dataset") - if "hg_cache_dir" in raw_args: - hugging_face_path = raw_args["hg_cache_dir"] - else: - hugging_face_path = os.getcwd() - - if "load_disk_dir" in raw_args: - load_path = raw_args["load_disk_dir"] - try: - dataset = load_from_disk(load_path) - except Exception as e: - logging.error(f"When loading cached dataset form " - f"{load_path}, we faced the exception: \n " - f"{str(e)}") - else: - dataset = load_dataset(path=config.data.root, - name=name, - **filtered_args) - if config.model.type.endswith('transformers'): - os.environ["TOKENIZERS_PARALLELISM"] = "false" - from transformers import AutoTokenizer - logger.info("To load huggingface tokenizer") - tokenizer = AutoTokenizer.from_pretrained( - config.model.type.split('@')[0], - local_files_only=True, - cache_dir=os.path.join(hugging_face_path, "transformers")) - - for split in dataset: - x_all = [i['sentence'] for i in dataset[split]] - targets = [i['label'] for i in dataset[split]] - - if split == "train" and "used_train_ratio" in raw_args and \ - 1 > raw_args['used_train_ratio'] > 0: - selected_idx = [i for i in range(len(dataset[split]))] - shuffle(selected_idx) - selected_idx = selected_idx[:int( - len(selected_idx) * raw_args['used_train_ratio'])] - x_all = [ - element for i, element in enumerate(x_all) - if i in selected_idx - ] - targets = [ - element for i, element in enumerate(targets) - if i in selected_idx - ] - - x_all = tokenizer(x_all, - return_tensors='pt', - padding=True, - truncation=True, - max_length=raw_args['max_len']) - data = [{key: value[i] - for key, value in x_all.items()} - for i in range(len(next(iter(x_all.values()))))] - dataset[split] = (data, targets) - data_dict = { - 'train': [(x, y) - for x, y in zip(dataset['train'][0], dataset['train'][1]) - ], - 'val': [(x, y) for x, y in zip(dataset['validation'][0], - dataset['validation'][1])], - 'test': [ - (x, y) for x, y in zip(dataset['test'][0], dataset['test'][1]) - ] if (set(dataset['test'][1]) - set([-1])) else None, - } - original_train_size = len(data_dict["train"]) - - if "half_val_dummy_test" in raw_args and raw_args[ - "half_val_dummy_test"]: - # since the "test" set from GLUE dataset may be masked, we need to - # submit to get the ground-truth, for fast FL experiments, - # we split the validation set into two parts with the same size as - # new test/val data - original_val = [(x, y) for x, y in zip(dataset['validation'][0], - dataset['validation'][1])] - data_dict["val"], data_dict[ - "test"] = original_val[:len(original_val) // - 2], original_val[len(original_val) // - 2:] - if "val_as_dummy_test" in raw_args and raw_args["val_as_dummy_test"]: - # use the validation set as tmp test set, - # and partial training set as validation set - data_dict["test"] = data_dict["val"] - data_dict["val"] = [] - if "part_train_dummy_val" in raw_args and 1 > raw_args[ - "part_train_dummy_val"] > 0: - new_val_part = int(original_train_size * - raw_args["part_train_dummy_val"]) - data_dict["val"].extend(data_dict["train"][:new_val_part]) - data_dict["train"] = data_dict["train"][new_val_part:] - if "part_train_dummy_test" in raw_args and 1 > raw_args[ - "part_train_dummy_test"] > 0: - new_test_part = int(original_train_size * - raw_args["part_train_dummy_test"]) - data_dict["test"] = data_dict["val"] - if data_dict["test"] is not None: - data_dict["test"].extend(data_dict["train"][:new_test_part]) - else: - data_dict["test"] = (data_dict["train"][:new_test_part]) - data_dict["train"] = data_dict["train"][new_test_part:] - - return data_dict - - def load_openml_data(tid, splits=None, config=None): - import openml - from sklearn.model_selection import train_test_split - - task = openml.tasks.get_task(int(tid)) - did = task.dataset_id - dataset = openml.datasets.get_dataset(did) - data, targets, _, _ = dataset.get_data( - dataset_format="array", target=dataset.default_target_attribute) - - train_data, test_data, train_targets, test_targets = train_test_split( - data, targets, train_size=splits[0], random_state=config.seed) - val_data, test_data, val_targets, test_targets = train_test_split( - test_data, - test_targets, - train_size=splits[1] / (1. - splits[0]), - random_state=config.seed) - data_dict = { - 'train': [(x, y) for x, y in zip(train_data, train_targets)], - 'val': [(x, y) for x, y in zip(val_data, val_targets)], - 'test': [(x, y) for x, y in zip(test_data, test_targets)] - } - return data_dict - - DATA_LOAD_FUNCS = { - 'torchvision': load_torchvision_data, - 'torchtext': load_torchtext_data, - 'torchaudio': load_torchaudio_data, - 'torch_geometric': load_torch_geometric_data, - 'huggingface_datasets': load_huggingface_datasets_data, - 'openml': load_openml_data - } - - modified_config = config.clone() - - # Load dataset - splits = modified_config.data.splits - name, package = modified_config.data.type.split('@') - - dataset = DATA_LOAD_FUNCS[package.lower()](name, splits, modified_config) - dataset = (dataset.get('train'), dataset.get('val'), dataset.get('test')) - - # Translate dataset to `StandaloneDataDict` - data_translator = BaseDataTranslator(modified_config, DataLoader, - client_cfgs) - datadict = data_translator(dataset) - - return datadict, modified_config +TRANS_DATA_MAP = { + 'BaseDataTranslator': [ + '.*?@.*?', 'hiv', 'proteins', 'imdb-binary', 'bbbp', 'tox21', 'bace', + 'sider', 'clintox', 'esol', 'freesolv', 'lipo' + ], + 'PyGNodeDataTranslator': [ + 'cora', 'citeseer', 'pubmed', 'dblp_conf', 'dblp_org', 'csbm.*?' + ], + 'PyGLinkDataTranslator': ['fb15k-237', 'wn18'], + 'DummyDataTranslator': [ + 'toy', 'quadratic', 'femnist', 'celeba', 'shakespeare', 'twitter', + 'subreddit', 'synthetic', 'ciao', 'epinions', '.*?vertical_fl_data.*?', + '.*?movielens.*?', '.*?cikmcup.*?', 'graph_multi_domain.*?' + ], # Dummy for FL dataset +} +DATA_TRANS_MAP = RegexInverseMap(TRANS_DATA_MAP, None) def get_data(config, client_cfgs=None): - """Instantiate the dataset and update the configuration accordingly if + """Instantiate the data and update the configuration accordingly if necessary. Arguments: - config (obj): a cfg node object. + config: a cfg node object. + client_cfgs: dict of client-specific cfg node object. Returns: obj: The dataset object. cfg.node: The updated configuration. """ - # fix the seed for data generation, - # will restore the user-specified on after the generation + # Fix the seed for data generation setup_seed(12345) + for func in register.data_dict.values(): data_and_config = func(config, client_cfgs) if data_and_config is not None: return data_and_config - if config.data.type.lower() == 'toy': - from federatedscope.tabular.dataloader.toy import load_toy_data - data, modified_config = load_toy_data(config, client_cfgs) - elif config.data.type.lower() == 'quadratic': - from federatedscope.tabular.dataloader import load_quadratic_dataset - data, modified_config = load_quadratic_dataset(config, client_cfgs) - elif config.data.type.lower() in ['femnist', 'celeba']: - from federatedscope.cv.dataloader import load_cv_dataset - data, modified_config = load_cv_dataset(config, client_cfgs) - elif config.data.type.lower() in [ - 'shakespeare', 'twitter', 'subreddit', 'synthetic' - ]: - from federatedscope.nlp.dataloader import load_nlp_dataset - data, modified_config = load_nlp_dataset(config, client_cfgs) - elif config.data.type.lower() in [ - 'cora', - 'citeseer', - 'pubmed', - 'dblp_conf', - 'dblp_org', - ] or config.data.type.lower().startswith('csbm'): - from federatedscope.gfl.dataloader import load_nodelevel_dataset - data, modified_config = load_nodelevel_dataset(config, client_cfgs) - elif config.data.type.lower() in ['ciao', 'epinions', 'fb15k-237', 'wn18']: - from federatedscope.gfl.dataloader import load_linklevel_dataset - data, modified_config = load_linklevel_dataset(config, client_cfgs) - elif config.data.type.lower() in [ - 'hiv', 'proteins', 'imdb-binary', 'bbbp', 'tox21', 'bace', 'sider', - 'clintox', 'esol', 'freesolv', 'lipo' - ] or config.data.type.startswith('graph_multi_domain'): - from federatedscope.gfl.dataloader import load_graphlevel_dataset - data, modified_config = load_graphlevel_dataset(config, client_cfgs) - elif config.data.type.lower() == 'vertical_fl_data': - from federatedscope.vertical_fl.dataloader import load_vertical_data - data, modified_config = load_vertical_data(config, generate=True) - elif 'movielens' in config.data.type.lower( - ) or 'netflix' in config.data.type.lower(): - from federatedscope.mf.dataloader import load_mf_dataset - data, modified_config = load_mf_dataset(config, client_cfgs) - elif '@' in config.data.type.lower(): - data, modified_config = load_external_data(config, client_cfgs) - elif 'cikmcup' in config.data.type.lower(): - from federatedscope.gfl.dataset.cikm_cup import load_cikmcup_data - data, modified_config = load_cikmcup_data(config, client_cfgs) - elif config.data.type is None or config.data.type == "": - # The participant (only for server in this version) does not own data - data = None - modified_config = config - else: - raise ValueError('Data {} not found.'.format(config.data.type)) - - if 'backdoor' in config.attack.attack_method and 'edge' in \ - config.attack.trigger_type: - import os - import torch - from federatedscope.attack.auxiliary import \ - create_ardis_poisoned_dataset, create_ardis_test_dataset - if not os.path.exists(config.attack.edge_path): - os.makedirs(config.attack.edge_path) - poisoned_edgeset = create_ardis_poisoned_dataset( - data_path=config.attack.edge_path) - - ardis_test_dataset = create_ardis_test_dataset( - config.attack.edge_path) - logger.info("Writing poison_data to: {}".format( - config.attack.edge_path)) + # Load dataset from source files + dataset, modified_config = load_dataset(config, client_cfgs) - with open(config.attack.edge_path + "poisoned_edgeset_training", - "wb") as saved_data_file: - torch.save(poisoned_edgeset, saved_data_file) + # Perform translator to non-FL dataset + translator = getattr(import_module('federatedscope.core.data'), + DATA_TRANS_MAP[config.data.type.lower()])( + modified_config, client_cfgs) + data = translator(dataset) - with open(config.attack.edge_path + "ardis_test_dataset.pt", - "wb") as ardis_data_file: - torch.save(ardis_test_dataset, ardis_data_file) - logger.warning('please notice: downloading the poisoned dataset \ - on cifar-10 from \ - https://github.com/ksreenivasan/OOD_Federated_Learning') - - if 'backdoor' in config.attack.attack_method: - from federatedscope.attack.auxiliary import poisoning - poisoning(data, modified_config) + # Convert `StandaloneDataDict` to `ClientData` when in distribute mode + data = convert_data_mode(data, modified_config) + # Restore the user-specified seed after the data generation setup_seed(config.seed) - if config.federate.mode.lower() == 'standalone': - return data, modified_config - else: - # Invalid data_idx - if config.distribute.data_idx == -1: - return data, config - elif config.distribute.data_idx not in data.keys(): - data_idx = np.random.choice(list(data.keys())) - logger.warning( - f"The provided data_idx={config.distribute.data_idx} is " - f"invalid, so that we randomly sample a data_idx as {data_idx}" - ) - else: - data_idx = config.distribute.data_idx - return data[data_idx], config - - setup_seed(config.seed) + return data, modified_config diff --git a/federatedscope/core/auxiliaries/dataloader_builder.py b/federatedscope/core/auxiliaries/dataloader_builder.py index 858ba1a03..4524a47b7 100644 --- a/federatedscope/core/auxiliaries/dataloader_builder.py +++ b/federatedscope/core/auxiliaries/dataloader_builder.py @@ -1,3 +1,5 @@ +from federatedscope.core.data.utils import get_func_args, filter_dict + try: import torch from torch.utils.data import Dataset @@ -6,15 +8,40 @@ Dataset = object -def get_dataloader(dataset, config): +def get_dataloader(dataset, config, split='train'): if config.backend == 'torch': - from torch.utils.data import DataLoader - dataloader = DataLoader(dataset, - batch_size=config.data.batch_size, - shuffle=config.data.shuffle, - num_workers=config.data.num_workers, - pin_memory=True) - return dataloader + if config.data.loader.type == 'base': + from torch.utils.data import DataLoader + loader_cls = DataLoader + elif config.data.loader.type == 'raw': + loader_cls = None + elif config.data.loader.type == 'graphsaint': + if 'split' == 'train': + from torch_geometric.loader import GraphSAINTRandomWalkSampler + loader_cls = GraphSAINTRandomWalkSampler + else: + from torch_geometric.loader import NeighborSampler + loader_cls = NeighborSampler + elif config.data.loader.type == 'neighbor': + from torch_geometric.loader import NeighborSampler + loader_cls = NeighborSampler + elif config.data.loader.type == 'mf': + from federatedscope.mf.dataloader import MFDataLoader + loader_cls = MFDataLoader + else: + raise ValueError(f'data.loader.type {config.data.loader.type} ' + f'not found!') + if loader_cls is not None: + raw_args = dict(config.dataloader) + if split != 'train': + raw_args['shuffle'] = False + raw_args['sizes'] = [-1] + raw_args['batch_size'] = [4096] + filtered_args = filter_dict(loader_cls.__init__, raw_args) + dataloader = DataLoader(dataset=dataset, **filtered_args) + return dataloader + else: + return dataset else: return None diff --git a/federatedscope/core/auxiliaries/utils.py b/federatedscope/core/auxiliaries/utils.py index 1290f68a6..f2f488b66 100644 --- a/federatedscope/core/auxiliaries/utils.py +++ b/federatedscope/core/auxiliaries/utils.py @@ -7,7 +7,6 @@ import signal import ssl import urllib.request -from collections import defaultdict from os import path as osp import pickle @@ -306,56 +305,3 @@ def merge_param_dict(raw_param, filtered_param): for key in filtered_param.keys(): raw_param[key] = filtered_param[key] return raw_param - - -def merge_data(all_data, merged_max_data_id, specified_dataset_name=None): - if specified_dataset_name is None: - dataset_names = list(all_data[1].keys()) # e.g., train, test, val - else: - if not isinstance(specified_dataset_name, list): - specified_dataset_name = [specified_dataset_name] - dataset_names = specified_dataset_name - - import torch.utils.data - assert len(dataset_names) >= 1, \ - "At least one sub-dataset is required in client 1" - data_name = "test" if "test" in dataset_names else dataset_names[0] - id_has_key = 1 - while "test" not in all_data[id_has_key]: - id_has_key += 1 - if len(all_data) <= id_has_key: - raise KeyError(f'All data do not key {data_name}.') - if isinstance(all_data[id_has_key][data_name], dict): - data_elem_names = list( - all_data[id_has_key][data_name].keys()) # e.g., x, y - merged_data = {name: defaultdict(list) for name in dataset_names} - for data_id in range(1, merged_max_data_id): - for d_name in dataset_names: - if d_name not in all_data[data_id]: - continue - for elem_name in data_elem_names: - merged_data[d_name][elem_name].append( - all_data[data_id][d_name][elem_name]) - for d_name in dataset_names: - for elem_name in data_elem_names: - merged_data[d_name][elem_name] = np.concatenate( - merged_data[d_name][elem_name]) - elif issubclass(type(all_data[id_has_key][data_name]), - torch.utils.data.DataLoader): - merged_data = all_data[id_has_key] - for data_id in range(1, merged_max_data_id): - if data_id == id_has_key: - continue - for d_name in dataset_names: - if d_name not in all_data[data_id]: - continue - merged_data[d_name].dataset.extend( - all_data[data_id][d_name].dataset) - else: - raise NotImplementedError( - "Un-supported type when merging data across different clients." - f"Your data type is {type(all_data[id_has_key][data_name])}. " - f"Currently we only support the following forms: " - " 1): {data_id: {train: {x:ndarray, y:ndarray}} }" - " 2): {data_id: {train: DataLoader }") - return merged_data diff --git a/federatedscope/core/configs/README.md b/federatedscope/core/configs/README.md index 984010c02..815048234 100644 --- a/federatedscope/core/configs/README.md +++ b/federatedscope/core/configs/README.md @@ -14,33 +14,33 @@ We summarize all the customizable configurations: ### Data The configurations related to the data/dataset are defined in `cfg_data.py`. -| Name | (Type) Default Value | Description | Note | -|:----:|:-----:|:---------- |:---- | -| `data.root` | (string) 'data' | The folder where the data file located. `data.root` would be used together with `data.type` to load the dataset. | - | -| `data.type` | (string) 'toy' | Dataset name | CV: 'femnist', 'celeba' ; NLP: 'shakespeare', 'subreddit', 'twitter'; Graph: 'cora', 'citeseer', 'pubmed', 'dblp_conf', 'dblp_org', 'csbm', 'epinions', 'ciao', 'fb15k-237', 'wn18', 'fb15k' , 'MUTAG', 'BZR', 'COX2', 'DHFR', 'PTC_MR', 'AIDS', 'NCI1', 'ENZYMES', 'DD', 'PROTEINS', 'COLLAB', 'IMDB-BINARY', 'IMDB-MULTI', 'REDDIT-BINARY', 'IMDB-BINARY', 'IMDB-MULTI', 'HIV', 'ESOL', 'FREESOLV', 'LIPO', 'PCBA', 'MUV', 'BACE', 'BBBP', 'TOX21', 'TOXCAST', 'SIDER', 'CLINTOX', 'graph_multi_domain_mol', 'graph_multi_domain_small', 'graph_multi_domain_mix', 'graph_multi_domain_biochem'; MF: 'vflmovielens1m', 'vflmovielens10m', 'hflmovielens1m', 'hflmovielens10m', 'vflnetflix', 'hflnetflix'; Tabular: 'toy', 'synthetic'; External dataset: 'DNAME@torchvision', 'DNAME@torchtext', 'DNAME@huggingface_datasets', 'DNAME@openml'. | -| `data.args` | (list) [] | Args for the external dataset | Used for external dataset, eg. `[{'download': False}]` | -| `data.save_data` | (bool) False | Whether to save the generated toy data | - | -| `data.splitter` | (string) '' | Splitter name for standalone dataset | Generic splitter: 'lda'; Graph splitter: 'louvain', 'random', 'rel_type', 'graph_type', 'scaffold', 'scaffold_lda', 'rand_chunk' | -| `data.splitter_args` | (list) [] | Args for splitter. | Used for splitter, eg. `[{'alpha': 0.5}]` | -| `data.transform` | (list) [] | Transform for x of data | Used in `get_item` in torch.dataset, eg. `[['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]]` | -| `data.target_transform` | (list) [] | Transform for y of data | Use as `data.transform` | -| `data.pre_transform` | (list) [] | Pre_transform for `torch_geometric` dataset | Use as `data.transform` | -| `data.batch_size` | (int) 64 | batch_size for DataLoader | - | -| `data.drop_last` | (bool) False | Whether drop last batch (if the number of last batch is smaller than batch_size) in DataLoader | - | -| `data.sizes` | (list) [10, 5] | Sample size for graph DataLoader | The length of `data.sizes` must meet the layer of GNN models. | -| `data.shuffle` | (bool) True | Shuffle train DataLoader | - | -| `data.server_holds_all` | (bool) False | Only use in global mode, whether the server (workers with idx 0) holds all data, useful in global training/evaluation case | - | -| `data.subsample` | (float) 1.0 |  Only used in LEAF datasets, subsample clients from all clients | - | -| `data.splits` | (list) [0.8, 0.1, 0.1] | Train, valid, test splits | - | -| `data.`
`consistent_label_distribution` | (bool) False | Make label distribution of train/val/test set over clients keep consistent during splitting | - | -| `data.cSBM_phi` | (list) [0.5, 0.5, 0.5] | Phi for cSBM graph dataset | - | -| `data.loader` | (string) '' | Graph sample name, used in minibatch trainer | 'graphsaint-rw': use `GraphSAINTRandomWalkSampler` as DataLoader; 'neighbor': use `NeighborSampler` as DataLoader. | -| `data.num_workers` | (int) 0 | num_workers in DataLoader | - | -| `data.graphsaint.walk_length` | (int) 2 | The length of each random walk in graphsaint. | - | -| `data.graphsaint.num_steps` | (int) 30 | The number of iterations per epoch in graphsaint. | - | -| `data.quadratic.dim` | (int) 1 | Dim of synthetic quadratic  dataset | - | -| `data.quadratic.min_curv` | (float) 0.02 | Min_curve of synthetic quadratic dataset | - | -| `data.quadratic.max_curv` | (float) 12.5 | Max_cur of synthetic quadratic dataset | - | +| Name | (Type) Default Value | Description | Note | +|:--------------------------------------------:|:-----:|:---------- |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `data.root` | (string) 'data' | The folder where the data file located. `data.root` would be used together with `data.type` to load the dataset. | - | +| `data.type` | (string) 'toy' | Dataset name | CV: 'femnist', 'celeba' ; NLP: 'shakespeare', 'subreddit', 'twitter'; Graph: 'cora', 'citeseer', 'pubmed', 'dblp_conf', 'dblp_org', 'csbm', 'epinions', 'ciao', 'fb15k-237', 'wn18', 'fb15k' , 'MUTAG', 'BZR', 'COX2', 'DHFR', 'PTC_MR', 'AIDS', 'NCI1', 'ENZYMES', 'DD', 'PROTEINS', 'COLLAB', 'IMDB-BINARY', 'IMDB-MULTI', 'REDDIT-BINARY', 'IMDB-BINARY', 'IMDB-MULTI', 'HIV', 'ESOL', 'FREESOLV', 'LIPO', 'PCBA', 'MUV', 'BACE', 'BBBP', 'TOX21', 'TOXCAST', 'SIDER', 'CLINTOX', 'graph_multi_domain_mol', 'graph_multi_domain_small', 'graph_multi_domain_mix', 'graph_multi_domain_biochem'; MF: 'vflmovielens1m', 'vflmovielens10m', 'hflmovielens1m', 'hflmovielens10m', 'vflnetflix', 'hflnetflix'; Tabular: 'toy', 'synthetic'; External dataset: 'DNAME@torchvision', 'DNAME@torchtext', 'DNAME@huggingface_datasets', 'DNAME@openml'. | +| `data.args` | (list) [] | Args for the external dataset | Used for external dataset, eg. `[{'download': False}]` | +| `data.save_data` | (bool) False | Whether to save the generated toy data | - | +| `data.splitter` | (string) '' | Splitter name for standalone dataset | Generic splitter: 'lda'; Graph splitter: 'louvain', 'random', 'rel_type', 'graph_type', 'scaffold', 'scaffold_lda', 'rand_chunk' | +| `data.splitter_args` | (list) [] | Args for splitter. | Used for splitter, eg. `[{'alpha': 0.5}]` | +| `data.transform` | (list) [] | Transform for x of data | Used in `get_item` in torch.dataset, eg. `[['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]]` | +| `data.target_transform` | (list) [] | Transform for y of data | Use as `data.transform` | +| `data.pre_transform` | (list) [] | Pre_transform for `torch_geometric` dataset | Use as `data.transform` | +| `dataloader.batch_size` | (int) 64 | batch_size for DataLoader | - | +| `dataloader.drop_last` | (bool) False | Whether drop last batch (if the number of last batch is smaller than batch_size) in DataLoader | - | +| `dataloader.sizes` | (list) [10, 5] | Sample size for graph DataLoader | The length of `dataloader.sizes` must meet the layer of GNN models. | +| `dataloader.shuffle` | (bool) True | Shuffle train DataLoader | - | +| `data.server_holds_all` | (bool) False | Only use in global mode, whether the server (workers with idx 0) holds all data, useful in global training/evaluation case | - | +| `data.subsample` | (float) 1.0 |  Only used in LEAF datasets, subsample clients from all clients | - | +| `data.splits` | (list) [0.8, 0.1, 0.1] | Train, valid, test splits | - | +| `data.`
`consistent_label_distribution` | (bool) False | Make label distribution of train/val/test set over clients keep consistent during splitting | - | +| `data.cSBM_phi` | (list) [0.5, 0.5, 0.5] | Phi for cSBM graph dataset | - | +| `data.loader` | (string) '' | Graph sample name, used in minibatch trainer | 'graphsaint-rw': use `GraphSAINTRandomWalkSampler` as DataLoader; 'neighbor': use `NeighborSampler` as DataLoader. | +| `dataloader.num_workers` | (int) 0 | num_workers in DataLoader | - | +| `dataloader.walk_length` | (int) 2 | The length of each random walk in graphsaint. | - | +| `dataloader.num_steps` | (int) 30 | The number of iterations per epoch in graphsaint. | - | +| `data.quadratic.dim` | (int) 1 | Dim of synthetic quadratic  dataset | - | +| `data.quadratic.min_curv` | (float) 0.02 | Min_curve of synthetic quadratic dataset | - | +| `data.quadratic.max_curv` | (float) 12.5 | Max_cur of synthetic quadratic dataset | - | ### Model @@ -292,14 +292,14 @@ The configurations related to NbAFL method. #### SGDMF The configurations related to SGDMF method (only used in matrix factorization tasks). -| Name | (Type) Default Value | Description | Note | -|:---------------:|:--------------------:|:-----------------------------------|:--------------------------------------------------------| -| `sgdmf.use` | (bool) False | The indicator of the SGDMF method. | | -| `sgdmf.R` | (float) 5. | The upper bound of rating. | - | -| `sgdmf.epsilon` | (float) 4. | The $\epsilon$ used in DP. | - | -| `sgdmf.delta` | (float) 0.5 | The $\delta$ used in DP. | - | -| `sgdmf.constant` | (float) 1. | The constant in SGDMF | - | -| `sgdmf.theta` | (int) -1 | - | -1 means per-rating privacy, otherwise per-user privacy | +| Name | (Type) Default Value | Description | Note | +|:------------------:|:--------------------:|:-----------------------------------|:--------------------------------------------------------| +| `sgdmf.use` | (bool) False | The indicator of the SGDMF method. | | +| `sgdmf.R` | (float) 5. | The upper bound of rating. | - | +| `sgdmf.epsilon` | (float) 4. | The $\epsilon$ used in DP. | - | +| `sgdmf.delta` | (float) 0.5 | The $\delta$ used in DP. | - | +| `sgdmf.constant` | (float) 1. | The constant in SGDMF | - | +| `dagaloader.theta` | (int) -1 | - | -1 means per-rating privacy, otherwise per-user privacy | ### Auto-tuning Components diff --git a/federatedscope/core/configs/cfg_data.py b/federatedscope/core/configs/cfg_data.py index dea7c2091..97ef5be21 100644 --- a/federatedscope/core/configs/cfg_data.py +++ b/federatedscope/core/configs/cfg_data.py @@ -20,10 +20,6 @@ def extend_data_cfg(cfg): cfg.data.target_transform = [] # target_transform for y, use as above cfg.data.pre_transform = [ ] # pre_transform for `torch_geometric` dataset, use as above - cfg.data.batch_size = 64 - cfg.data.drop_last = False - cfg.data.sizes = [10, 5] - cfg.data.shuffle = True cfg.data.server_holds_all = False # whether the server (workers with # idx 0) holds all data, useful in global training/evaluation case cfg.data.subsample = 1.0 @@ -32,11 +28,20 @@ def extend_data_cfg(cfg): # distributions of train/val/test set over clients will be kept # consistent during splitting cfg.data.cSBM_phi = [0.5, 0.5, 0.5] - cfg.data.loader = '' - cfg.data.num_workers = 0 - cfg.data.graphsaint = CN() - cfg.data.graphsaint.walk_length = 2 - cfg.data.graphsaint.num_steps = 30 + + # DataLoader related args + cfg.dataloader = CN() + cfg.dataloader.type = 'base' + cfg.dataloader.batch_size = 64 + cfg.dataloader.shuffle = True + cfg.dataloader.num_workers = 0 + cfg.dataloader.drop_last = False + cfg.dataloader.pin_memory = True + # GFL: graphsaint DataLoader + cfg.dataloader.walk_length = 2 + cfg.dataloader.num_steps = 30 + # GFL: neighbor sampler DataLoader + cfg.dataloader.sizes = [10, 5] # quadratic cfg.data.quadratic = CN() @@ -49,12 +54,13 @@ def extend_data_cfg(cfg): def assert_data_cfg(cfg): - if cfg.data.loader == 'graphsaint-rw': - assert cfg.model.layer == cfg.data.graphsaint.walk_length, 'Sample ' \ + if cfg.dataloader.type == 'graphsaint-rw': + assert cfg.model.layer == cfg.dataloader.walk_length, 'Sample ' \ 'size ' \ 'mismatch' - if cfg.data.loader == 'neighbor': - assert cfg.model.layer == len(cfg.data.sizes), 'Sample size mismatch' + if cfg.dataloader.type == 'neighbor': + assert cfg.model.layer == len( + cfg.dataloader.sizes), 'Sample size mismatch' if '@' in cfg.data.type: assert cfg.federate.client_num > 0, '`federate.client_num` should ' \ 'be greater than 0 when using ' \ diff --git a/federatedscope/core/configs/cfg_differential_privacy.py b/federatedscope/core/configs/cfg_differential_privacy.py index 9f90ad1de..25da93eda 100644 --- a/federatedscope/core/configs/cfg_differential_privacy.py +++ b/federatedscope/core/configs/cfg_differential_privacy.py @@ -25,8 +25,9 @@ def extend_dp_cfg(cfg): cfg.sgdmf.epsilon = 4. # \epsilon in dp cfg.sgdmf.delta = 0.5 # \delta in dp cfg.sgdmf.constant = 1. # constant - cfg.sgdmf.theta = -1 # -1 means per-rating privacy, otherwise per-user - # privacy + + cfg.dataloader.theta = -1 # -1 means per-rating privacy, + # otherwise per-user privacy # --------------- register corresponding check function ---------- cfg.register_cfg_check_fun(assert_dp_cfg) diff --git a/federatedscope/core/data/__init__.py b/federatedscope/core/data/__init__.py index acaeb2598..be0c01451 100644 --- a/federatedscope/core/data/__init__.py +++ b/federatedscope/core/data/__init__.py @@ -1,6 +1,6 @@ from federatedscope.core.data.base_data import StandaloneDataDict, ClientData -from federatedscope.core.data.base_translator import BaseDataTranslator, \ - DummyDataTranslator +from federatedscope.core.data.base_translator import BaseDataTranslator +from federatedscope.core.data.dummy_translator import DummyDataTranslator __all__ = [ 'StandaloneDataDict', 'ClientData', 'BaseDataTranslator', diff --git a/federatedscope/core/data/base_data.py b/federatedscope/core/data/base_data.py index 2ff6dc6be..c1449c0cf 100644 --- a/federatedscope/core/data/base_data.py +++ b/federatedscope/core/data/base_data.py @@ -1,5 +1,6 @@ import logging -from federatedscope.core.auxiliaries.utils import merge_data +from federatedscope.core.data.utils import merge_data +from federatedscope.core.auxiliaries.dataloader_builder import get_dataloader logger = logging.getLogger(__name__) @@ -74,6 +75,49 @@ def preprocess(self, datadict): datadict[1] = merge_data( all_data=datadict, merged_max_data_id=self.cfg.federate.client_num) + datadict = self.attack(datadict) + return datadict + + def attack(self, datadict): + """ + Apply attack to `StandaloneDataDict`. + + """ + if 'backdoor' in self.global_cfg.attack.attack_method and 'edge' in \ + self.global_cfg.attack.trigger_type: + import os + import torch + from federatedscope.attack.auxiliary import \ + create_ardis_poisoned_dataset, create_ardis_test_dataset + if not os.path.exists(self.global_cfg.attack.edge_path): + os.makedirs(self.global_cfg.attack.edge_path) + poisoned_edgeset = create_ardis_poisoned_dataset( + data_path=self.global_cfg.attack.edge_path) + + ardis_test_dataset = create_ardis_test_dataset( + self.global_cfg.attack.edge_path) + + logger.info("Writing poison_data to: {}".format( + self.global_cfg.attack.edge_path)) + + with open( + self.global_cfg.attack.edge_path + + "poisoned_edgeset_training", "wb") as saved_data_file: + torch.save(poisoned_edgeset, saved_data_file) + + with open( + self.global_cfg.attack.edge_path + + "ardis_test_dataset.pt", "wb") as ardis_data_file: + torch.save(ardis_test_dataset, ardis_data_file) + logger.warning( + 'please notice: downloading the poisoned dataset \ + on cifar-10 from \ + https://github.com/ksreenivasan/OOD_Federated_Learning' + ) + + if 'backdoor' in self.global_cfg.attack.attack_method: + from federatedscope.attack.auxiliary import poisoning + poisoning(datadict, self.global_cfg) return datadict @@ -111,31 +155,18 @@ def setup(self, new_client_cfg=None): """ # if `batch_size` or `shuffle` change, reinstantiate DataLoader if self.client_cfg is not None: - if self.client_cfg.data.batch_size == \ - new_client_cfg.data.batch_size or \ - self.client_cfg.data.shuffle == \ - new_client_cfg.data.shuffle: + if dict(self.client_cfg.dataloader) == dict( + new_client_cfg.dataloader): return False self.client_cfg = new_client_cfg if self.train is not None: - self['train'] = self.loader( - self.train, - batch_size=new_client_cfg.data.batch_size, - shuffle=new_client_cfg.data.shuffle, - num_workers=new_client_cfg.data.num_workers) + self['train'] = get_dataloader(self.train, self.client_cfg, + 'train') if self.val is not None: - self['val'] = self.loader( - self.val, - batch_size=new_client_cfg.data.batch_size, - shuffle=False, - num_workers=new_client_cfg.data.num_workers) + self['val'] = get_dataloader(self.train, self.client_cfg, 'val') if self.test is not None: - self['test'] = self.loader( - self.test, - batch_size=new_client_cfg.data.batch_size, - shuffle=False, - num_workers=new_client_cfg.data.num_workers) + self['test'] = get_dataloader(self.train, self.client_cfg, 'test') return True diff --git a/federatedscope/core/data/base_translator.py b/federatedscope/core/data/base_translator.py index 8d1cf2e0e..8cef0510a 100644 --- a/federatedscope/core/data/base_translator.py +++ b/federatedscope/core/data/base_translator.py @@ -1,8 +1,17 @@ +import logging + from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.data import ClientData, StandaloneDataDict +logger = logging.getLogger(__name__) + class BaseDataTranslator: + """ + Perform process: + Dataset -> ML split -> FL split -> Data (passed to FedRunner) + + """ def __init__(self, global_cfg, loader, client_cfgs=None): """ Convert data to `StandaloneDataDict`. @@ -25,9 +34,22 @@ def __call__(self, dataset): or split dataset tuple of (train, val, test) or Tuple of split dataset with [train, val, test] """ - train, val, test = self.split_train_val_test() + datadict = self.split(dataset) + datadict = StandaloneDataDict(datadict, self.global_cfg) + + return datadict + + def split(self, dataset): + """ + Perform ML split and FL split. + + Returns: + dict of `ClientData` with client_idx as key. + + """ + train, val, test = self.split_train_val_test(dataset) datadict = self.split_to_client(train, val, test) - return StandaloneDataDict(datadict, self.global_cfg) + return datadict def split_train_val_test(self, dataset): """ @@ -54,7 +76,7 @@ def split_train_val_test(self, dataset): def split_to_client(self, train, val, test): """ - Split dataset to clients. + Split dataset to clients and build DataLoader. Returns: datadict (dict): dict of `ClientData` with client_idx as key. @@ -75,8 +97,14 @@ def split_to_client(self, train, val, test): if len(test) > 0: split_test = self.splitter(test, prior=train_label_distribution) - # Build data dict with `ClientData` - datadict = {} + # Build data dict with `ClientData`, key `0` for server. + datadict = { + 0: ClientData(self.loader, + self.global_cfg, + train=train, + val=val, + test=test) + } for client_id in range(1, client_num + 1): if self.client_cfgs is not None: client_cfg = self.global_cfg.clone() @@ -90,13 +118,3 @@ def split_to_client(self, train, val, test): val=split_val[client_id - 1], test=split_test[client_id - 1]) return datadict - - -class DummyDataTranslator(BaseDataTranslator): - """ - Translator split data_dict to `StandaloneDataDict`. - """ - def __call__(self, datadict): - if not isinstance(datadict, StandaloneDataDict): - datadict = StandaloneDataDict(datadict, self.global_cfg) - return datadict diff --git a/federatedscope/core/data/dummy_translator.py b/federatedscope/core/data/dummy_translator.py new file mode 100644 index 000000000..55c2906ea --- /dev/null +++ b/federatedscope/core/data/dummy_translator.py @@ -0,0 +1,27 @@ +from federatedscope.core.data.base_translator import BaseDataTranslator +from federatedscope.core.data.base_data import ClientData + + +class DummyDataTranslator(BaseDataTranslator): + """ + DummyDataTranslator convert FL dataset to DataLoader. + Do not perform ML split and FL split. + """ + def split(self, dataset): + if not isinstance(dataset, dict): + raise TypeError(f'Not support data type {type(dataset)}') + datadict = {} + for client_id in dataset.keys(): + if self.client_cfgs is not None: + client_cfg = self.global_cfg.clone() + client_cfg.merge_from_other_cfg( + self.client_cfgs.get(f'client_{client_id}')) + else: + client_cfg = self.global_cfg + datadict[client_id] = ClientData( + self.loader, + client_cfg, + train=dataset[client_id].get('train'), + val=dataset[client_id].get('val'), + test=dataset[client_id].get('test')) + return datadict diff --git a/federatedscope/core/data/utils.py b/federatedscope/core/data/utils.py new file mode 100644 index 000000000..0f67f7783 --- /dev/null +++ b/federatedscope/core/data/utils.py @@ -0,0 +1,589 @@ +import inspect +import logging +import os +import re +from collections import defaultdict + +import numpy as np +from random import shuffle + +import torch.utils + +from federatedscope.core.auxiliaries.data_builder import logger + + +class RegexInverseMap: + def __init__(self, n_dic, val): + self._items = {} + for key, values in n_dic.items(): + for value in values: + self._items[value] = key + self.__val = val + + def __getitem__(self, key): + for regex in self._items.keys(): + if re.compile(regex).match(key): + return self._items[regex] + return self.__val + + def __repr__(self): + return str(self._items.items()) + + +def load_dataset(config, client_cfgs): + if config.data.type.lower() == 'toy': + from federatedscope.tabular.dataloader.toy import load_toy_data + dataset, modified_config = load_toy_data(config, client_cfgs) + elif config.data.type.lower() == 'quadratic': + from federatedscope.tabular.dataloader import load_quadratic_dataset + dataset, modified_config = load_quadratic_dataset(config, client_cfgs) + elif config.data.type.lower() in ['femnist', 'celeba']: + from federatedscope.cv.dataloader import load_cv_dataset + dataset, modified_config = load_cv_dataset(config, client_cfgs) + elif config.data.type.lower() in [ + 'shakespeare', 'twitter', 'subreddit', 'synthetic' + ]: + from federatedscope.nlp.dataloader import load_nlp_dataset + dataset, modified_config = load_nlp_dataset(config, client_cfgs) + elif config.data.type.lower() in [ + 'cora', + 'citeseer', + 'pubmed', + 'dblp_conf', + 'dblp_org', + ] or config.data.type.lower().startswith('csbm'): + from federatedscope.gfl.dataloader import load_nodelevel_dataset + dataset, modified_config = load_nodelevel_dataset(config, client_cfgs) + elif config.data.type.lower() in ['ciao', 'epinions', 'fb15k-237', 'wn18']: + from federatedscope.gfl.dataloader import load_linklevel_dataset + dataset, modified_config = load_linklevel_dataset(config, client_cfgs) + elif config.data.type.lower() in [ + 'hiv', 'proteins', 'imdb-binary', 'bbbp', 'tox21', 'bace', 'sider', + 'clintox', 'esol', 'freesolv', 'lipo' + ] or config.data.type.startswith('graph_multi_domain'): + from federatedscope.gfl.dataloader import load_graphlevel_dataset + dataset, modified_config = load_graphlevel_dataset(config, client_cfgs) + elif config.data.type.lower() == 'vertical_fl_data': + from federatedscope.vertical_fl.dataloader import load_vertical_data + dataset, modified_config = load_vertical_data(config, generate=True) + elif 'movielens' in config.data.type.lower( + ) or 'netflix' in config.data.type.lower(): + from federatedscope.mf.dataloader import load_mf_dataset + dataset, modified_config = load_mf_dataset(config, client_cfgs) + elif '@' in config.data.type.lower(): + from federatedscope.core.data.utils import load_external_data + dataset, modified_config = load_external_data(config, client_cfgs) + elif 'cikmcup' in config.data.type.lower(): + from federatedscope.gfl.dataset.cikm_cup import load_cikmcup_data + dataset, modified_config = load_cikmcup_data(config, client_cfgs) + elif config.data.type is None or config.data.type == "": + # The participant (only for server in this version) does not own data + dataset = None + modified_config = config + else: + raise ValueError('Dataset {} not found.'.format(config.data.type)) + return dataset, modified_config + + +def load_external_data(config=None, client_cfgs=None): + r""" Based on the configuration file, this function imports external + datasets and applies train/valid/test splits and split by some specific + `splitter` into the standard FederatedScope input data format. + + Args: + config: `CN` from `federatedscope/core/configs/config.py` + + Returns: + data_local_dict: dict of split dataloader. + Format: + { + 'client_id': { + 'train': DataLoader(), + 'test': DataLoader(), + 'val': DataLoader() + } + } + modified_config: `CN` from `federatedscope/core/configs/config.py`, + which might be modified in the function. + + """ + + import torch + from importlib import import_module + from torch.utils.data import DataLoader + from federatedscope.core.auxiliaries.transform_builder import get_transform + + def load_torchvision_data(name, splits=None, config=None): + dataset_func = getattr(import_module('torchvision.datasets'), name) + transform_funcs = get_transform(config, 'torchvision') + if config.data.args: + raw_args = config.data.args[0] + else: + raw_args = {} + if 'download' not in raw_args.keys(): + raw_args.update({'download': True}) + filtered_args = filter_dict(dataset_func.__init__, raw_args) + func_args = get_func_args(dataset_func.__init__) + + # Perform split on different dataset + if 'train' in func_args: + # Split train to (train, val) + dataset_train = dataset_func(root=config.data.root, + train=True, + **filtered_args, + **transform_funcs) + dataset_val = None + dataset_test = dataset_func(root=config.data.root, + train=False, + **filtered_args, + **transform_funcs) + if splits: + train_size = int(splits[0] * len(dataset_train)) + val_size = len(dataset_train) - train_size + lengths = [train_size, val_size] + dataset_train, dataset_val = \ + torch.utils.data.dataset.random_split(dataset_train, + lengths) + + elif 'split' in func_args: + # Use raw split + dataset_train = dataset_func(root=config.data.root, + split='train', + **filtered_args, + **transform_funcs) + dataset_val = dataset_func(root=config.data.root, + split='valid', + **filtered_args, + **transform_funcs) + dataset_test = dataset_func(root=config.data.root, + split='test', + **filtered_args, + **transform_funcs) + elif 'classes' in func_args: + # Use raw split + dataset_train = dataset_func(root=config.data.root, + classes='train', + **filtered_args, + **transform_funcs) + dataset_val = dataset_func(root=config.data.root, + classes='valid', + **filtered_args, + **transform_funcs) + dataset_test = dataset_func(root=config.data.root, + classes='test', + **filtered_args, + **transform_funcs) + else: + # Use config.data.splits + dataset = dataset_func(root=config.data.root, + **filtered_args, + **transform_funcs) + train_size = int(splits[0] * len(dataset)) + val_size = int(splits[1] * len(dataset)) + test_size = len(dataset) - train_size - val_size + lengths = [train_size, val_size, test_size] + dataset_train, dataset_val, dataset_test = \ + torch.utils.data.dataset.random_split(dataset, lengths) + + data_split_dict = { + 'train': dataset_train, + 'val': dataset_val, + 'test': dataset_test + } + + return data_split_dict + + def load_torchtext_data(name, splits=None, config=None): + from torch.nn.utils.rnn import pad_sequence + from federatedscope.nlp.dataset.utils import label_to_index + + dataset_func = getattr(import_module('torchtext.datasets'), name) + if config.data.args: + raw_args = config.data.args[0] + else: + raw_args = {} + assert 'max_len' in raw_args, "Miss key 'max_len' in " \ + "`config.data.args`." + filtered_args = filter_dict(dataset_func.__init__, raw_args) + dataset = dataset_func(root=config.data.root, **filtered_args) + + # torchtext.transforms requires >= 0.12.0 and torch = 1.11.0, + # so we do not use `get_transform` in torchtext. + + # Merge all data and tokenize + x_list = [] + y_list = [] + for data_iter in dataset: + data, targets = [], [] + for i, item in enumerate(data_iter): + data.append(item[1]) + targets.append(item[0]) + x_list.append(data) + y_list.append(targets) + + x_all, y_all = [], [] + for i in range(len(x_list)): + x_all += x_list[i] + y_all += y_list[i] + + if config.model.type.endswith('transformers'): + from transformers import AutoTokenizer + cache_path = os.path.join(os.getcwd(), "huggingface") + try: + tokenizer = AutoTokenizer.from_pretrained( + config.model.type.split('@')[0], + local_files_only=True, + cache_dir=cache_path) + except Exception as e: + logging.error(f"When loading cached file form " + f"{cache_path}, we faced the exception: \n " + f"{str(e)}") + + x_all = tokenizer(x_all, + return_tensors='pt', + padding=True, + truncation=True, + max_length=raw_args['max_len']) + data = [{key: value[i] + for key, value in x_all.items()} + for i in range(len(next(iter(x_all.values()))))] + if 'classification' in config.model.task.lower(): + targets = label_to_index(y_all) + else: + y_all = tokenizer(y_all, + return_tensors='pt', + padding=True, + truncation=True, + max_length=raw_args['max_len']) + targets = [{key: value[i] + for key, value in y_all.items()} + for i in range(len(next(iter(y_all.values()))))] + else: + from torchtext.data import get_tokenizer + tokenizer = get_tokenizer("basic_english") + if len(config.data.transform) == 0: + raise ValueError( + "`transform` must be one pretrained Word Embeddings from \ + ['GloVe', 'FastText', 'CharNGram']") + if len(config.data.transform) == 1: + config.data.transform.append({}) + vocab = getattr(import_module('torchtext.vocab'), + config.data.transform[0])( + dim=config.model.in_channels, + **config.data.transform[1]) + + if 'classification' in config.model.task.lower(): + data = [ + vocab.get_vecs_by_tokens(tokenizer(x), + lower_case_backup=True) + for x in x_all + ] + targets = label_to_index(y_all) + else: + data = [ + vocab.get_vecs_by_tokens(tokenizer(x), + lower_case_backup=True) + for x in x_all + ] + targets = [ + vocab.get_vecs_by_tokens(tokenizer(y), + lower_case_backup=True) + for y in y_all + ] + targets = pad_sequence(targets).transpose( + 0, 1)[:, :raw_args['max_len'], :] + data = pad_sequence(data).transpose(0, + 1)[:, :raw_args['max_len'], :] + # Split data to raw + num_items = [len(ds) for ds in x_list] + data_list, cnt = [], 0 + for num in num_items: + data_list.append([ + (x, y) + for x, y in zip(data[cnt:cnt + num], targets[cnt:cnt + num]) + ]) + cnt += num + + if len(data_list) == 3: + # Use raw splits + data_split_dict = { + 'train': data_list[0], + 'val': data_list[1], + 'test': data_list[2] + } + elif len(data_list) == 2: + # Split train to (train, val) + data_split_dict = { + 'train': data_list[0], + 'val': None, + 'test': data_list[1] + } + if splits: + train_size = int(splits[0] * len(data_split_dict['train'])) + val_size = len(data_split_dict['train']) - train_size + lengths = [train_size, val_size] + data_split_dict['train'], data_split_dict[ + 'val'] = torch.utils.data.dataset.random_split( + data_split_dict['train'], lengths) + else: + # Use config.data.splits + data_split_dict = {} + train_size = int(splits[0] * len(data_list[0])) + val_size = int(splits[1] * len(data_list[0])) + test_size = len(data_list[0]) - train_size - val_size + lengths = [train_size, val_size, test_size] + data_split_dict['train'], data_split_dict['val'], data_split_dict[ + 'test'] = torch.utils.data.dataset.random_split( + data_list[0], lengths) + + return data_split_dict + + def load_torchaudio_data(name, splits=None, config=None): + + # dataset_func = getattr(import_module('torchaudio.datasets'), name) + raise NotImplementedError + + def load_huggingface_datasets_data(name, splits=None, config=None): + from datasets import load_dataset, load_from_disk + + if config.data.args: + raw_args = config.data.args[0] + else: + raw_args = {} + assert 'max_len' in raw_args, "Miss key 'max_len' in " \ + "`config.data.args`." + filtered_args = filter_dict(load_dataset, raw_args) + logger.info("Begin to load huggingface dataset") + if "hg_cache_dir" in raw_args: + hugging_face_path = raw_args["hg_cache_dir"] + else: + hugging_face_path = os.getcwd() + + if "load_disk_dir" in raw_args: + load_path = raw_args["load_disk_dir"] + try: + dataset = load_from_disk(load_path) + except Exception as e: + logging.error(f"When loading cached dataset form " + f"{load_path}, we faced the exception: \n " + f"{str(e)}") + else: + dataset = load_dataset(path=config.data.root, + name=name, + **filtered_args) + if config.model.type.endswith('transformers'): + os.environ["TOKENIZERS_PARALLELISM"] = "false" + from transformers import AutoTokenizer + logger.info("To load huggingface tokenizer") + tokenizer = AutoTokenizer.from_pretrained( + config.model.type.split('@')[0], + local_files_only=True, + cache_dir=os.path.join(hugging_face_path, "transformers")) + + for split in dataset: + x_all = [i['sentence'] for i in dataset[split]] + targets = [i['label'] for i in dataset[split]] + + if split == "train" and "used_train_ratio" in raw_args and \ + 1 > raw_args['used_train_ratio'] > 0: + selected_idx = [i for i in range(len(dataset[split]))] + shuffle(selected_idx) + selected_idx = selected_idx[:int( + len(selected_idx) * raw_args['used_train_ratio'])] + x_all = [ + element for i, element in enumerate(x_all) + if i in selected_idx + ] + targets = [ + element for i, element in enumerate(targets) + if i in selected_idx + ] + + x_all = tokenizer(x_all, + return_tensors='pt', + padding=True, + truncation=True, + max_length=raw_args['max_len']) + data = [{key: value[i] + for key, value in x_all.items()} + for i in range(len(next(iter(x_all.values()))))] + dataset[split] = (data, targets) + data_split_dict = { + 'train': [(x, y) + for x, y in zip(dataset['train'][0], dataset['train'][1]) + ], + 'val': [(x, y) for x, y in zip(dataset['validation'][0], + dataset['validation'][1])], + 'test': [ + (x, y) for x, y in zip(dataset['test'][0], dataset['test'][1]) + ] if (set(dataset['test'][1]) - set([-1])) else None, + } + original_train_size = len(data_split_dict["train"]) + + if "half_val_dummy_test" in raw_args and raw_args[ + "half_val_dummy_test"]: + # since the "test" set from GLUE dataset may be masked, we need to + # submit to get the ground-truth, for fast FL experiments, + # we split the validation set into two parts with the same size as + # new test/val data + original_val = [(x, y) for x, y in zip(dataset['validation'][0], + dataset['validation'][1])] + data_split_dict["val"], data_split_dict[ + "test"] = original_val[:len(original_val) // + 2], original_val[len(original_val) // + 2:] + if "val_as_dummy_test" in raw_args and raw_args["val_as_dummy_test"]: + # use the validation set as tmp test set, + # and partial training set as validation set + data_split_dict["test"] = data_split_dict["val"] + data_split_dict["val"] = [] + if "part_train_dummy_val" in raw_args and 1 > raw_args[ + "part_train_dummy_val"] > 0: + new_val_part = int(original_train_size * + raw_args["part_train_dummy_val"]) + data_split_dict["val"].extend( + data_split_dict["train"][:new_val_part]) + data_split_dict["train"] = data_split_dict["train"][new_val_part:] + if "part_train_dummy_test" in raw_args and 1 > raw_args[ + "part_train_dummy_test"] > 0: + new_test_part = int(original_train_size * + raw_args["part_train_dummy_test"]) + data_split_dict["test"] = data_split_dict["val"] + if data_split_dict["test"] is not None: + data_split_dict["test"].extend( + data_split_dict["train"][:new_test_part]) + else: + data_split_dict["test"] = ( + data_split_dict["train"][:new_test_part]) + data_split_dict["train"] = data_split_dict["train"][new_test_part:] + + return data_split_dict + + def load_openml_data(tid, splits=None, config=None): + import openml + from sklearn.model_selection import train_test_split + + task = openml.tasks.get_task(int(tid)) + did = task.dataset_id + dataset = load_dataset(did) + data, targets, _, _ = dataset.get_data( + dataset_format="array", target=dataset.default_target_attribute) + + train_data, test_data, train_targets, test_targets = train_test_split( + data, targets, train_size=splits[0], random_state=config.seed) + val_data, test_data, val_targets, test_targets = train_test_split( + test_data, + test_targets, + train_size=splits[1] / (1. - splits[0]), + random_state=config.seed) + data_split_dict = { + 'train': [(x, y) for x, y in zip(train_data, train_targets)], + 'val': [(x, y) for x, y in zip(val_data, val_targets)], + 'test': [(x, y) for x, y in zip(test_data, test_targets)] + } + return data_split_dict + + DATA_LOAD_FUNCS = { + 'torchvision': load_torchvision_data, + 'torchtext': load_torchtext_data, + 'torchaudio': load_torchaudio_data, + 'huggingface_datasets': load_huggingface_datasets_data, + 'openml': load_openml_data + } + + modified_config = config.clone() + + # Load dataset + splits = modified_config.data.splits + name, package = modified_config.data.type.split('@') + + # Comply with the original train/val/test + dataset = DATA_LOAD_FUNCS[package.lower()](name, splits, modified_config) + data_split_tuple = (dataset.get('train'), dataset.get('val'), + dataset.get('test')) + + return data_split_tuple, modified_config + + +def convert_data_mode(data, config): + if config.federate.mode.lower() == 'standalone': + return data + else: + # Invalid data_idx + if config.distribute.data_idx == -1: + return data + elif config.distribute.data_idx not in data.keys(): + data_idx = np.random.choice(list(data.keys())) + logger.warning( + f"The provided data_idx={config.distribute.data_idx} is " + f"invalid, so that we randomly sample a data_idx as {data_idx}" + ) + else: + data_idx = config.distribute.data_idx + return data[data_idx] + + +def get_func_args(func): + sign = inspect.signature(func).parameters.values() + sign = set([val.name for val in sign]) + return sign + + +def filter_dict(func, kwarg): + sign = get_func_args(func) + common_args = sign.intersection(kwarg.keys()) + filtered_dict = {key: kwarg[key] for key in common_args} + return filtered_dict + + +def merge_data(all_data, merged_max_data_id, specified_dataset_name=None): + if specified_dataset_name is None: + dataset_names = list(all_data[1].keys()) # e.g., train, test, val + else: + if not isinstance(specified_dataset_name, list): + specified_dataset_name = [specified_dataset_name] + dataset_names = specified_dataset_name + + import torch.utils.data + assert len(dataset_names) >= 1, \ + "At least one sub-dataset is required in client 1" + data_name = "test" if "test" in dataset_names else dataset_names[0] + id_has_key = 1 + while "test" not in all_data[id_has_key]: + id_has_key += 1 + if len(all_data) <= id_has_key: + raise KeyError(f'All data do not key {data_name}.') + if isinstance(all_data[id_has_key][data_name], dict): + data_elem_names = list( + all_data[id_has_key][data_name].keys()) # e.g., x, y + merged_data = {name: defaultdict(list) for name in dataset_names} + for data_id in range(1, merged_max_data_id): + for d_name in dataset_names: + if d_name not in all_data[data_id]: + continue + for elem_name in data_elem_names: + merged_data[d_name][elem_name].append( + all_data[data_id][d_name][elem_name]) + for d_name in dataset_names: + for elem_name in data_elem_names: + merged_data[d_name][elem_name] = np.concatenate( + merged_data[d_name][elem_name]) + elif issubclass(type(all_data[id_has_key][data_name]), + torch.utils.data.DataLoader): + merged_data = all_data[id_has_key] + for data_id in range(1, merged_max_data_id): + if data_id == id_has_key: + continue + for d_name in dataset_names: + if d_name not in all_data[data_id]: + continue + merged_data[d_name].dataset.extend( + all_data[data_id][d_name].dataset) + else: + raise NotImplementedError( + "Un-supported type when merging data across different clients." + f"Your data type is {type(all_data[id_has_key][data_name])}. " + f"Currently we only support the following forms: " + " 1): {data_id: {train: {x:ndarray, y:ndarray}} }" + " 2): {data_id: {train: DataLoader }") + return merged_data diff --git a/federatedscope/core/fed_runner.py b/federatedscope/core/fed_runner.py index 52cb7db72..dac9e6649 100644 --- a/federatedscope/core/fed_runner.py +++ b/federatedscope/core/fed_runner.py @@ -8,7 +8,8 @@ from federatedscope.core.workers import Server, Client from federatedscope.core.gpu_manager import GPUManager from federatedscope.core.auxiliaries.model_builder import get_model -from federatedscope.core.auxiliaries.utils import get_resource_info, merge_data +from federatedscope.core.auxiliaries.utils import get_resource_info +from federatedscope.core.data.utils import merge_data logger = logging.getLogger(__name__) diff --git a/federatedscope/core/trainers/context.py b/federatedscope/core/trainers/context.py index cc174843d..e612339df 100644 --- a/federatedscope/core/trainers/context.py +++ b/federatedscope/core/trainers/context.py @@ -147,7 +147,8 @@ def setup_vars(self): calculate_batch_epoch_num( self.cfg.train.local_update_steps, self.cfg.train.batch_or_epoch, self.num_train_data, - self.cfg.data.batch_size, self.cfg.data.drop_last) + self.cfg.dataloader.batch_size, + self.cfg.dataloader.drop_last) # Process evaluation data for mode in ["val", "test"]: @@ -157,10 +158,10 @@ def setup_vars(self): setattr( self, "num_{}_batch".format(mode), getattr(self, "num_{}_data".format(mode)) // - self.cfg.data.batch_size + - int(not self.cfg.data.drop_last and bool( + self.cfg.dataloader.batch_size + + int(not self.cfg.dataloader.drop_last and bool( getattr(self, "num_{}_data".format(mode)) % - self.cfg.data.batch_size))) + self.cfg.dataloader.batch_size))) def track_mode(self, mode): self.mode_stack.append(mode) diff --git a/federatedscope/core/trainers/torch_trainer.py b/federatedscope/core/trainers/torch_trainer.py index 8ffa4ea14..1cfc22155 100644 --- a/federatedscope/core/trainers/torch_trainer.py +++ b/federatedscope/core/trainers/torch_trainer.py @@ -173,7 +173,7 @@ def _hook_on_epoch_start(self, ctx): if ctx.get("{}_loader".format(ctx.cur_split)) is None: loader = get_dataloader( WrapDataset(ctx.get("{}_data".format(ctx.cur_split))), - self.cfg) + self.cfg, ctx.cur_split) setattr(ctx, "{}_loader".format(ctx.cur_split), ReIterator(loader)) elif not isinstance(ctx.get("{}_loader".format(ctx.cur_split)), ReIterator): diff --git a/federatedscope/core/trainers/trainer_Ditto.py b/federatedscope/core/trainers/trainer_Ditto.py index e5ae8f4ef..e4e4a0200 100644 --- a/federatedscope/core/trainers/trainer_Ditto.py +++ b/federatedscope/core/trainers/trainer_Ditto.py @@ -99,8 +99,8 @@ def init_Ditto_ctx(base_trainer): calculate_batch_epoch_num(cfg_p_local_update_steps, cfg.train.batch_or_epoch, ctx.num_train_data, - cfg.data.batch_size, - cfg.data.drop_last) + cfg.dataloader.batch_size, + cfg.dataloader.drop_last) # In the first # 1. `num_train_batch` and `num_train_batch_last_epoch` diff --git a/federatedscope/core/workers/client.py b/federatedscope/core/workers/client.py index 2dbc4bdd7..ad72d89dc 100644 --- a/federatedscope/core/workers/client.py +++ b/federatedscope/core/workers/client.py @@ -407,7 +407,7 @@ def callback_funcs_for_join_in_info(self, message: Message): if requirement.lower() == 'num_sample': if self._cfg.train.batch_or_epoch == 'batch': num_sample = self._cfg.train.local_update_steps * \ - self._cfg.data.batch_size + self._cfg.dataloader.batch_size else: num_sample = self._cfg.train.local_update_steps * \ self.trainer.ctx.num_train_batch diff --git a/federatedscope/gfl/dataloader/dataloader_graph.py b/federatedscope/gfl/dataloader/dataloader_graph.py index b300097c2..25faba9e3 100644 --- a/federatedscope/gfl/dataloader/dataloader_graph.py +++ b/federatedscope/gfl/dataloader/dataloader_graph.py @@ -32,7 +32,7 @@ def load_graphlevel_dataset(config=None, client_cfgs=None): path = config.data.root name = config.data.type.upper() client_num = config.federate.client_num - batch_size = config.data.batch_size + batch_size = config.dataloader.batch_size # Splitter splitter = get_splitter(config) diff --git a/federatedscope/gfl/dataloader/dataloader_link.py b/federatedscope/gfl/dataloader/dataloader_link.py index ae983b078..5628927aa 100644 --- a/federatedscope/gfl/dataloader/dataloader_link.py +++ b/federatedscope/gfl/dataloader/dataloader_link.py @@ -24,15 +24,16 @@ def raw2loader(raw_data, config=None): elif config.data.loader == 'graphsaint-rw': loader = GraphSAINTRandomWalkSampler( raw_data, - batch_size=config.data.batch_size, - walk_length=config.data.graphsaint.walk_length, - num_steps=config.data.graphsaint.num_steps, + batch_size=config.dataloader.batch_size, + walk_length=config.dataloader.walk_length, + num_steps=config.dataloader.num_steps, sample_coverage=0) - subgraph_sampler = NeighborSampler(raw_data.edge_index, - sizes=[-1], - batch_size=4096, - shuffle=False, - num_workers=config.data.num_workers) + subgraph_sampler = NeighborSampler( + raw_data.edge_index, + sizes=[-1], + batch_size=4096, + shuffle=False, + num_workers=config.dataloader.num_workers) sampler = dict(data=raw_data, train=loader, val=subgraph_sampler, diff --git a/federatedscope/gfl/dataloader/dataloader_node.py b/federatedscope/gfl/dataloader/dataloader_node.py index ce04ee35b..cc49c153e 100644 --- a/federatedscope/gfl/dataloader/dataloader_node.py +++ b/federatedscope/gfl/dataloader/dataloader_node.py @@ -36,15 +36,16 @@ def raw2loader(raw_data, config=None): num_nodes=raw_data.x.shape[0])[0] loader = GraphSAINTRandomWalkSampler( raw_data, - batch_size=config.data.batch_size, - walk_length=config.data.graphsaint.walk_length, - num_steps=config.data.graphsaint.num_steps, + batch_size=config.dataloader.batch_size, + walk_length=config.dataloader.walk_length, + num_steps=config.dataloader.num_steps, sample_coverage=0) - subgraph_sampler = NeighborSampler(raw_data.edge_index, - sizes=[-1], - batch_size=4096, - shuffle=False, - num_workers=config.data.num_workers) + subgraph_sampler = NeighborSampler( + raw_data.edge_index, + sizes=[-1], + batch_size=4096, + shuffle=False, + num_workers=config.dataloader.num_workers) sampler = dict(data=raw_data, train=loader, val=subgraph_sampler, @@ -57,15 +58,16 @@ def raw2loader(raw_data, config=None): train_idx = raw_data.train_mask.nonzero(as_tuple=True)[0] loader = NeighborSampler(raw_data.edge_index, node_idx=train_idx, - sizes=config.data.sizes, - batch_size=config.data.batch_size, - shuffle=config.data.shuffle, - num_workers=config.data.num_workers) - subgraph_sampler = NeighborSampler(raw_data.edge_index, - sizes=[-1], - batch_size=4096, - shuffle=False, - num_workers=config.data.num_workers) + sizes=config.dataloader.sizes, + batch_size=config.dataloader.batch_size, + shuffle=config.dataloader.shuffle, + num_workers=config.dataloader.num_workers) + subgraph_sampler = NeighborSampler( + raw_data.edge_index, + sizes=[-1], + batch_size=4096, + shuffle=False, + num_workers=config.dataloader.num_workers) sampler = dict(data=raw_data, train=loader, val=subgraph_sampler, diff --git a/federatedscope/gfl/dataset/cikm_cup.py b/federatedscope/gfl/dataset/cikm_cup.py index 60678b85c..c25cb4f60 100644 --- a/federatedscope/gfl/dataset/cikm_cup.py +++ b/federatedscope/gfl/dataset/cikm_cup.py @@ -71,18 +71,20 @@ def load_cikmcup_data(config, client_cfgs=None): if 'train' in dataset[client_idx]: dataloader_dict['train'] = DataLoader( dataset[client_idx]['train'], - client_cfg.data.batch_size, - shuffle=client_cfg.data.shuffle) + client_cfg.dataloader.batch_size, + shuffle=client_cfg.dataloader.shuffle) tmp_dataset += dataset[client_idx]['train'] if 'val' in dataset[client_idx]: - dataloader_dict['val'] = DataLoader(dataset[client_idx]['val'], - client_cfg.data.batch_size, - shuffle=False) + dataloader_dict['val'] = DataLoader( + dataset[client_idx]['val'], + client_cfg.dataloader.batch_size, + shuffle=False) tmp_dataset += dataset[client_idx]['val'] if 'test' in dataset[client_idx]: - dataloader_dict['test'] = DataLoader(dataset[client_idx]['test'], - client_cfg.data.batch_size, - shuffle=False) + dataloader_dict['test'] = DataLoader( + dataset[client_idx]['test'], + client_cfg.dataloader.batch_size, + shuffle=False) tmp_dataset += dataset[client_idx]['test'] if tmp_dataset: dataloader_dict['num_label'] = 0 diff --git a/federatedscope/gfl/fedsageplus/worker.py b/federatedscope/gfl/fedsageplus/worker.py index f1812598d..a1a5a94e1 100644 --- a/federatedscope/gfl/fedsageplus/worker.py +++ b/federatedscope/gfl/fedsageplus/worker.py @@ -348,19 +348,21 @@ def callback_funcs_for_setup_fedsage(self, message: Message): sizes=[-1], batch_size=4096, shuffle=False, - num_workers=self._cfg.data.num_workers) + num_workers=self._cfg.dataloader.num_workers) fill_dataloader = { 'data': self.filled_data, - 'train': NeighborSampler(self.filled_data.edge_index, - node_idx=self.filled_data.train_idx, - sizes=self._cfg.data.sizes, - batch_size=self.sage_batch_size, - shuffle=self._cfg.data.shuffle, - num_workers=self._cfg.data.num_workers), + 'train': NeighborSampler( + self.filled_data.edge_index, + node_idx=self.filled_data.train_idx, + sizes=self._cfg.dataloader.sizes, + batch_size=self.sage_batch_size, + shuffle=self._cfg.dataloader.shuffle, + num_workers=self._cfg.dataloader.num_workers), 'val': subgraph_sampler, 'test': subgraph_sampler } - self._cfg.merge_from_list(['data.batch_size', self.sage_batch_size]) + self._cfg.merge_from_list( + ['dataloader.batch_size', self.sage_batch_size]) self.trainer_clf = NodeMiniBatchTrainer(self.clf, fill_dataloader, self.device, diff --git a/federatedscope/gfl/trainer/linktrainer.py b/federatedscope/gfl/trainer/linktrainer.py index 72ab9b9a3..fb86717c8 100644 --- a/federatedscope/gfl/trainer/linktrainer.py +++ b/federatedscope/gfl/trainer/linktrainer.py @@ -46,12 +46,13 @@ def parse_data(self, data): for mode in ["train", "val", "test"]: edges = data.edge_index.T[data[MODE2MASK[mode]]] # Use an index loader - index_loader = DataLoader(range(edges.size(0)), - self.cfg.data.batch_size, - shuffle=self.cfg.data.shuffle - if mode == 'train' else False, - drop_last=self.cfg.data.drop_last - if mode == 'train' else False) + index_loader = DataLoader( + range(edges.size(0)), + self.cfg.dataloader.batch_size, + shuffle=self.cfg.dataloader.shuffle + if mode == 'train' else False, + drop_last=self.cfg.dataloader.drop_last + if mode == 'train' else False) init_dict["{}_loader".format(mode)] = index_loader init_dict["num_{}_data".format(mode)] = edges.size(0) init_dict["{}_data".format(mode)] = None @@ -159,7 +160,7 @@ def parse_data(self, data): data.get(mode) ] init_dict["num_{}_data".format( - mode)] = self.cfg.data.batch_size + mode)] = self.cfg.dataloader.batch_size else: raise TypeError("Type {} is not supported.".format( type(data.get(mode)))) @@ -187,7 +188,7 @@ def _hook_on_batch_forward(self, ctx): pred = [] for perm in DataLoader(range(edges.size(0)), - self.cfg.data.batch_size): + self.cfg.dataloader.batch_size): edge = edges[perm].T pred += [ctx.model.link_predictor(h, edge).squeeze()] pred = torch.cat(pred, dim=0) diff --git a/federatedscope/gfl/trainer/nodetrainer.py b/federatedscope/gfl/trainer/nodetrainer.py index e22341d07..88ad5cc9f 100644 --- a/federatedscope/gfl/trainer/nodetrainer.py +++ b/federatedscope/gfl/trainer/nodetrainer.py @@ -119,7 +119,7 @@ def parse_data(self, data): data.get(mode) ] init_dict["num_{}_data".format( - mode)] = self.cfg.data.batch_size + mode)] = self.cfg.dataloader.batch_size else: raise TypeError("Type {} is not supported.".format( type(data.get(mode)))) diff --git a/federatedscope/mf/dataloader/dataloader.py b/federatedscope/mf/dataloader/dataloader.py index 062a614d3..1dbecd37c 100644 --- a/federatedscope/mf/dataloader/dataloader.py +++ b/federatedscope/mf/dataloader/dataloader.py @@ -58,16 +58,16 @@ def load_mf_dataset(config=None, client_cfgs=None): client_cfg = config data_local_dict[id_client]["train"] = MFDataLoader( data["train"], - shuffle=client_cfg.data.shuffle, - batch_size=client_cfg.data.batch_size, - drop_last=client_cfg.data.drop_last, - theta=client_cfg.sgdmf.theta) + shuffle=client_cfg.dataloader.shuffle, + batch_size=client_cfg.dataloader.batch_size, + drop_last=client_cfg.dataloader.drop_last, + theta=client_cfg.dataloader.theta) data_local_dict[id_client]["test"] = MFDataLoader( data["test"], shuffle=False, - batch_size=client_cfg.data.batch_size, - drop_last=client_cfg.data.drop_last, - theta=client_cfg.sgdmf.theta) + batch_size=client_cfg.dataloader.batch_size, + drop_last=client_cfg.dataloader.drop_last, + theta=client_cfg.dataloader.theta) # Modify config config.merge_from_list(['model.num_user', dataset.n_user]) diff --git a/federatedscope/mf/trainer/trainer_sgdmf.py b/federatedscope/mf/trainer/trainer_sgdmf.py index 7e5dddacc..653eeb555 100644 --- a/federatedscope/mf/trainer/trainer_sgdmf.py +++ b/federatedscope/mf/trainer/trainer_sgdmf.py @@ -37,13 +37,13 @@ def init_sgdmf_ctx(base_trainer): ctx = base_trainer.ctx cfg = base_trainer.cfg - sample_ratio = float(cfg.data.batch_size) / cfg.model.num_user + sample_ratio = float(cfg.dataloader.batch_size) / cfg.model.num_user # Noise multiplier tmp = cfg.sgdmf.constant * np.power(sample_ratio, 2) * ( cfg.federate.total_round_num * ctx.num_total_train_batch) * np.log( 1. / cfg.sgdmf.delta) noise_multipler = np.sqrt(tmp / np.power(cfg.sgdmf.epsilon, 2)) - ctx.scale = max(cfg.sgdmf.theta, 1.) * noise_multipler * np.power( + ctx.scale = max(cfg.dataloader.theta, 1.) * noise_multipler * np.power( cfg.sgdmf.R, 1.5) logger.info("Inject noise: (loc=0, scale={})".format(ctx.scale)) ctx.sgdmf_R = cfg.sgdmf.R diff --git a/federatedscope/tabular/dataloader/toy.py b/federatedscope/tabular/dataloader/toy.py index b8fb4ad9a..5faa20626 100644 --- a/federatedscope/tabular/dataloader/toy.py +++ b/federatedscope/tabular/dataloader/toy.py @@ -3,6 +3,7 @@ import numpy as np from federatedscope.core.data import StandaloneDataDict +from federatedscope.core.auxiliaries.dataloader_builder import WrapDataset def load_toy_data(config=None, client_cfgs=None): @@ -110,9 +111,10 @@ def _generate_data(client_num=5, else: with open(config.distribute.data_file, 'rb') as f: data = pickle.load(f) - for key in data.keys(): - data[key] = {k: np.asarray(v) - for k, v in data[key].items() - } if data[key] is not None else None + for client_id in data.keys(): + data[client_id] = { + k: WrapDataset(np.asarray(v)) + for k, v in data[client_id].items() + } if data[client_id] is not None else None - return StandaloneDataDict(data, config), config + return data, config diff --git a/federatedscope/vertical_fl/dataloader/dataloader.py b/federatedscope/vertical_fl/dataloader/dataloader.py index bd7460574..4e0b5e7d3 100644 --- a/federatedscope/vertical_fl/dataloader/dataloader.py +++ b/federatedscope/vertical_fl/dataloader/dataloader.py @@ -1,6 +1,7 @@ import numpy as np from federatedscope.core.data import StandaloneDataDict +from federatedscope.core.auxiliaries.dataloader_builder import WrapDataset def load_vertical_data(config=None, generate=False): @@ -36,23 +37,24 @@ def load_vertical_data(config=None, generate=False): data[0] = dict() data[0]['train'] = None data[0]['val'] = None - data[0]['test'] = test_data + data[0]['test'] = WrapDataset(test_data) # For Client #1 data[1] = dict() - data[1]['train'] = { + data[1]['train'] = WrapDataset({ 'x': x[:train_num, :config.vertical.dims[0]], 'y': y[:train_num] - } + }) data[1]['val'] = None - data[1]['test'] = test_data + data[1]['test'] = WrapDataset(test_data) # For Client #2 data[2] = dict() - data[2]['train'] = {'x': x[:train_num, config.vertical.dims[0]:]} + data[2]['train'] = WrapDataset( + {'x': x[:train_num, config.vertical.dims[0]:]}) data[2]['val'] = None - data[2]['test'] = test_data + data[2]['test'] = WrapDataset(test_data) - return StandaloneDataDict(data, config), config + return data, config else: raise ValueError('You must provide the data file') diff --git a/federatedscope/vertical_fl/worker/vertical_client.py b/federatedscope/vertical_fl/worker/vertical_client.py index b463ea82e..09fcb9c18 100644 --- a/federatedscope/vertical_fl/worker/vertical_client.py +++ b/federatedscope/vertical_fl/worker/vertical_client.py @@ -37,7 +37,7 @@ def __init__(self, self.batch_index = None self.own_label = ('y' in self.data['train']) self.dataloader = batch_iter(self.data['train'], - self._cfg.data.batch_size, + self._cfg.dataloader.batch_size, shuffled=True) self.register_handlers('public_keys', diff --git a/scripts/example_configs/femnist/avg/ss.yaml b/scripts/example_configs/femnist/avg/ss.yaml index 199a1c1d2..e7d57b0ae 100644 --- a/scripts/example_configs/femnist/avg/ss.yaml +++ b/scripts/example_configs/femnist/avg/ss.yaml @@ -14,6 +14,6 @@ train.local_update_steps: type: int lower: 1 upper: 4 -data.batch_size: +dataloader.batch_size: type: cate choices: [16, 32, 64] diff --git a/scripts/example_configs/femnist/hpo_ss_fedex_arm.yaml b/scripts/example_configs/femnist/hpo_ss_fedex_arm.yaml index 6f1e7ef33..ea438a1ad 100644 --- a/scripts/example_configs/femnist/hpo_ss_fedex_arm.yaml +++ b/scripts/example_configs/femnist/hpo_ss_fedex_arm.yaml @@ -10,6 +10,6 @@ model.dropout: train.local_update_steps: type: cate choices: [1, 2, 3, 4] -data.batch_size: +dataloader.batch_size: type: cate choices: [16, 32, 64] \ No newline at end of file diff --git a/scripts/example_configs/femnist/hpo_ss_fedex_grid.yaml b/scripts/example_configs/femnist/hpo_ss_fedex_grid.yaml index c7e9138b0..774265c55 100644 --- a/scripts/example_configs/femnist/hpo_ss_fedex_grid.yaml +++ b/scripts/example_configs/femnist/hpo_ss_fedex_grid.yaml @@ -2,4 +2,4 @@ train.optimizer.lr: [0.01, 0.01668, 0.02783, 0.04642, 0.07743, 0.12915, 0.21544, train.optimizer.weight_decay: [0.0, 0.001, 0.01, 0.1] model.dropout: [0.0, 0.5] train.local_update_steps: [1, 2, 3, 4] -data.batch_size: [16, 32, 64] \ No newline at end of file +dataloader.batch_size: [16, 32, 64] \ No newline at end of file diff --git a/scripts/example_configs/femnist/hpo_ss_sha.yaml b/scripts/example_configs/femnist/hpo_ss_sha.yaml index 200f8624d..4a65f2cef 100644 --- a/scripts/example_configs/femnist/hpo_ss_sha.yaml +++ b/scripts/example_configs/femnist/hpo_ss_sha.yaml @@ -12,6 +12,6 @@ model.dropout: train.local_update_steps: type: cate choices: [1, 2, 3, 4] -data.batch_size: +dataloader.batch_size: type: cate choices: [16, 32, 64] \ No newline at end of file diff --git a/scripts/mf_exp_scripts/run_movielens1m_hfl_standalone.sh b/scripts/mf_exp_scripts/run_movielens1m_hfl_standalone.sh index 3afd0c519..e68245dc8 100644 --- a/scripts/mf_exp_scripts/run_movielens1m_hfl_standalone.sh +++ b/scripts/mf_exp_scripts/run_movielens1m_hfl_standalone.sh @@ -9,4 +9,4 @@ python federatedscope/main.py --cfg federatedscope/mf/baseline/hfl_fedavg_standa train.optimizer.lr 0.8 \ train.local_update_steps 20 \ federate.total_round_num 50 \ - data.batch_size 32 + dataloader.batch_size 32 diff --git a/scripts/mf_exp_scripts/run_movielens1m_hflsgdmf_standalone.sh b/scripts/mf_exp_scripts/run_movielens1m_hflsgdmf_standalone.sh index f063beb66..ef24de8f1 100644 --- a/scripts/mf_exp_scripts/run_movielens1m_hflsgdmf_standalone.sh +++ b/scripts/mf_exp_scripts/run_movielens1m_hflsgdmf_standalone.sh @@ -11,4 +11,4 @@ python federatedscope/main.py --cfg federatedscope/mf/baseline/hfl-sgdmf_fedavg_ train.optimizer.lr 0.1 \ train.local_update_steps 20 \ federate.total_round_num 50 \ - data.batch_size 64 + dataloader.batch_size 64 diff --git a/scripts/mf_exp_scripts/run_movielens1m_vfl_standalone.sh b/scripts/mf_exp_scripts/run_movielens1m_vfl_standalone.sh index 35f253438..6951676ae 100644 --- a/scripts/mf_exp_scripts/run_movielens1m_vfl_standalone.sh +++ b/scripts/mf_exp_scripts/run_movielens1m_vfl_standalone.sh @@ -9,4 +9,4 @@ python federatedscope/main.py --cfg federatedscope/mf/baseline/vfl_fedavg_standa train.optimizer.lr 0.8 \ train.local_update_steps 20 \ federate.total_round_num 50 \ - data.batch_size 32 \ No newline at end of file + dataloader.batch_size 32 \ No newline at end of file diff --git a/scripts/mf_exp_scripts/run_movielens1m_vflsgdmf_standalone.sh b/scripts/mf_exp_scripts/run_movielens1m_vflsgdmf_standalone.sh index 0000a0260..98628a467 100644 --- a/scripts/mf_exp_scripts/run_movielens1m_vflsgdmf_standalone.sh +++ b/scripts/mf_exp_scripts/run_movielens1m_vflsgdmf_standalone.sh @@ -11,4 +11,4 @@ python federatedscope/main.py --cfg federatedscope/mf/baseline/vfl-sgdmf_fedavg_ train.optimizer.lr 0.1 \ train.local_update_steps 20 \ federate.total_round_num 50 \ - data.batch_size 64 + dataloader.batch_size 64 diff --git a/scripts/personalization_exp_scripts/run_femnist_ditto.sh b/scripts/personalization_exp_scripts/run_femnist_ditto.sh index 7cc61cc1f..dd26cfcf7 100755 --- a/scripts/personalization_exp_scripts/run_femnist_ditto.sh +++ b/scripts/personalization_exp_scripts/run_femnist_ditto.sh @@ -23,7 +23,7 @@ do do for k in {1..3} do - python federatedscope/main.py --cfg federatedscope/cv/baseline/fedavg_convnet2_on_femnist.yaml federate.method ${method} personalization.regular_weight ${personalization_regular_weight} data.batch_size ${bs} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} + python federatedscope/main.py --cfg federatedscope/cv/baseline/fedavg_convnet2_on_femnist.yaml federate.method ${method} personalization.regular_weight ${personalization_regular_weight} dataloader.batch_size ${bs} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} done done done diff --git a/scripts/personalization_exp_scripts/run_femnist_fedavg.sh b/scripts/personalization_exp_scripts/run_femnist_fedavg.sh index f920032df..17238cc65 100755 --- a/scripts/personalization_exp_scripts/run_femnist_fedavg.sh +++ b/scripts/personalization_exp_scripts/run_femnist_fedavg.sh @@ -22,7 +22,7 @@ do do for k in {1..3} do - python federatedscope/main.py --cfg federatedscope/cv/baseline/fedavg_convnet2_on_femnist.yaml federate.method ${method} data.batch_size ${bs} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} + python federatedscope/main.py --cfg federatedscope/cv/baseline/fedavg_convnet2_on_femnist.yaml federate.method ${method} dataloader.batch_size ${bs} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} done done done diff --git a/scripts/personalization_exp_scripts/run_femnist_fedbn.sh b/scripts/personalization_exp_scripts/run_femnist_fedbn.sh index 8d3cb3cae..0d808a78f 100755 --- a/scripts/personalization_exp_scripts/run_femnist_fedbn.sh +++ b/scripts/personalization_exp_scripts/run_femnist_fedbn.sh @@ -22,7 +22,7 @@ do do for k in {1..3} do - python federatedscope/main.py --cfg federatedscope/cv/baseline/fedbn_convnet2_on_femnist.yaml data.batch_size ${bs} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} + python federatedscope/main.py --cfg federatedscope/cv/baseline/fedbn_convnet2_on_femnist.yaml dataloader.batch_size ${bs} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} done done done diff --git a/scripts/personalization_exp_scripts/run_femnist_fedem.sh b/scripts/personalization_exp_scripts/run_femnist_fedem.sh index 1c067d2fd..0dba67f77 100755 --- a/scripts/personalization_exp_scripts/run_femnist_fedem.sh +++ b/scripts/personalization_exp_scripts/run_femnist_fedem.sh @@ -23,7 +23,7 @@ do do for (( g=0; g<${#models[@]}; g++ )) do - python federatedscope/main.py --cfg federatedscope/cv/baseline/fedavg_convnet2_on_femnist.yaml federate.method ${method} model.model_num_per_trainer ${model_num_per_trainer} data.batch_size ${bs} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} + python federatedscope/main.py --cfg federatedscope/cv/baseline/fedavg_convnet2_on_femnist.yaml federate.method ${method} model.model_num_per_trainer ${model_num_per_trainer} dataloader.batch_size ${bs} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} done done done diff --git a/scripts/personalization_exp_scripts/run_femnist_pfedme.sh b/scripts/personalization_exp_scripts/run_femnist_pfedme.sh index 77f55126e..e23ee0d7d 100755 --- a/scripts/personalization_exp_scripts/run_femnist_pfedme.sh +++ b/scripts/personalization_exp_scripts/run_femnist_pfedme.sh @@ -25,7 +25,7 @@ do do for k in {1..3} do - python federatedscope/main.py --cfg federatedscope/cv/baseline/fedavg_convnet2_on_femnist.yaml federate.method ${method} data.batch_size ${bs} personalization.K ${personalization_K} personalization.lr ${personalization_lr} personalization.regular_weight ${personalization_beta} device ${cudaid} optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} + python federatedscope/main.py --cfg federatedscope/cv/baseline/fedavg_convnet2_on_femnist.yaml federate.method ${method} dataloader.batch_size ${bs} personalization.K ${personalization_K} personalization.lr ${personalization_lr} personalization.regular_weight ${personalization_beta} device ${cudaid} optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} done done done diff --git a/scripts/personalization_exp_scripts/run_shakespeare_ditto.sh b/scripts/personalization_exp_scripts/run_shakespeare_ditto.sh index 0a0f38edf..ce53722cf 100755 --- a/scripts/personalization_exp_scripts/run_shakespeare_ditto.sh +++ b/scripts/personalization_exp_scripts/run_shakespeare_ditto.sh @@ -23,7 +23,7 @@ do do for (( g=0; g<${#models[@]}; g++ )) do - python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lstm_on_shakespeare.yaml federate.method ${method} data.batch_size ${bs} personalization.regular_weight ${personalization_regular_weight} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} + python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lstm_on_shakespeare.yaml federate.method ${method} dataloader.batch_size ${bs} personalization.regular_weight ${personalization_regular_weight} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} done done done diff --git a/scripts/personalization_exp_scripts/run_shakespeare_fedavg.sh b/scripts/personalization_exp_scripts/run_shakespeare_fedavg.sh index cc6d988c3..ecb7c57e1 100755 --- a/scripts/personalization_exp_scripts/run_shakespeare_fedavg.sh +++ b/scripts/personalization_exp_scripts/run_shakespeare_fedavg.sh @@ -22,7 +22,7 @@ do do for (( g=0; g<${#models[@]}; g++ )) do - python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lstm_on_shakespeare.yaml federate.method ${method} data.batch_size ${bs} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} + python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lstm_on_shakespeare.yaml federate.method ${method} dataloader.batch_size ${bs} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} done done done diff --git a/scripts/personalization_exp_scripts/run_shakespeare_fedem.sh b/scripts/personalization_exp_scripts/run_shakespeare_fedem.sh index fe4de1fdd..83160d69b 100755 --- a/scripts/personalization_exp_scripts/run_shakespeare_fedem.sh +++ b/scripts/personalization_exp_scripts/run_shakespeare_fedem.sh @@ -28,7 +28,7 @@ do do for (( g=0; g<${#models[@]}; g++ )) do - python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lstm_on_shakespeare.yaml federate.method ${method} data.batch_size ${bs} model.model_num_per_trainer ${model_num_per_trainer} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} + python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lstm_on_shakespeare.yaml federate.method ${method} dataloader.batch_size ${bs} model.model_num_per_trainer ${model_num_per_trainer} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} done done done diff --git a/scripts/personalization_exp_scripts/run_shakespeare_pfedme.sh b/scripts/personalization_exp_scripts/run_shakespeare_pfedme.sh index 40757595e..45d821721 100755 --- a/scripts/personalization_exp_scripts/run_shakespeare_pfedme.sh +++ b/scripts/personalization_exp_scripts/run_shakespeare_pfedme.sh @@ -25,7 +25,7 @@ do do for (( g=0; g<${#models[@]}; g++ )) do - python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lstm_on_shakespeare.yaml federate.method ${method} data.batch_size ${bs} personalization.K ${personalization_K} personalization.lr ${personalization_lr} personalization.regular_weight ${personalization_beta} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} + python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lstm_on_shakespeare.yaml federate.method ${method} dataloader.batch_size ${bs} personalization.K ${personalization_K} personalization.lr ${personalization_lr} personalization.regular_weight ${personalization_beta} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} done done done diff --git a/scripts/personalization_exp_scripts/run_synthetic_ditto.sh b/scripts/personalization_exp_scripts/run_synthetic_ditto.sh index 58d126261..d802d8b2a 100755 --- a/scripts/personalization_exp_scripts/run_synthetic_ditto.sh +++ b/scripts/personalization_exp_scripts/run_synthetic_ditto.sh @@ -23,7 +23,7 @@ do do for k in {1..3} do - python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lr_on_synthetic.yaml federate.method ${method} data.batch_size ${bs} personalization.regular_weight ${personalization_regular_weight} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} + python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lr_on_synthetic.yaml federate.method ${method} dataloader.batch_size ${bs} personalization.regular_weight ${personalization_regular_weight} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} done done done diff --git a/scripts/personalization_exp_scripts/run_synthetic_fedavg.sh b/scripts/personalization_exp_scripts/run_synthetic_fedavg.sh index 74e928a1a..6d6e28780 100755 --- a/scripts/personalization_exp_scripts/run_synthetic_fedavg.sh +++ b/scripts/personalization_exp_scripts/run_synthetic_fedavg.sh @@ -22,7 +22,7 @@ do do for k in {1..3} do - python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lr_on_synthetic.yaml federate.method ${method} data.batch_size ${bs} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} + python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lr_on_synthetic.yaml federate.method ${method} dataloader.batch_size ${bs} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} done done done diff --git a/scripts/personalization_exp_scripts/run_synthetic_fedem.sh b/scripts/personalization_exp_scripts/run_synthetic_fedem.sh index 67a0b3bb2..b73934851 100755 --- a/scripts/personalization_exp_scripts/run_synthetic_fedem.sh +++ b/scripts/personalization_exp_scripts/run_synthetic_fedem.sh @@ -28,7 +28,7 @@ do do for k in {1..3} do - python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lr_on_synthetic.yaml federate.method ${method} data.batch_size ${bs} model.model_num_per_trainer ${model_num_per_trainer} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} + python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lr_on_synthetic.yaml federate.method ${method} dataloader.batch_size ${bs} model.model_num_per_trainer ${model_num_per_trainer} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} done done done diff --git a/scripts/personalization_exp_scripts/run_synthetic_pfedme.sh b/scripts/personalization_exp_scripts/run_synthetic_pfedme.sh index 2679f24f1..0a34d2c54 100755 --- a/scripts/personalization_exp_scripts/run_synthetic_pfedme.sh +++ b/scripts/personalization_exp_scripts/run_synthetic_pfedme.sh @@ -25,7 +25,7 @@ do do for k in {1..3} do - python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lr_on_synthetic.yaml federate.method ${method} data.batch_size ${bs} personalization.K ${personalization_K} personalization.lr ${personalization_lr} personalization.regular_weight ${personalization_beta} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} + python federatedscope/main.py --cfg federatedscope/nlp/baseline/fedavg_lr_on_synthetic.yaml federate.method ${method} dataloader.batch_size ${bs} personalization.K ${personalization_K} personalization.lr ${personalization_lr} personalization.regular_weight ${personalization_beta} device ${cudaid} train.optimizer.lr ${lrs[$i]} train.local_update_steps ${local_updates[$j]} model.type ${models[$g]} seed $k outdir ${outdir}/${models[$g]}_${lrs[$i]}_${local_updates[$j]}_bs${bs}_on_${dataset} done done done diff --git a/tests/test_CRA_gan_attack.py b/tests/test_CRA_gan_attack.py index 6fad02276..22be63d96 100644 --- a/tests/test_CRA_gan_attack.py +++ b/tests/test_CRA_gan_attack.py @@ -31,7 +31,7 @@ def set_config_femnist(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'femnist' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.subsample = 0.01 cfg.data.transform = [['ToTensor'], [ diff --git a/tests/test_MIA_gradient_ascent.py b/tests/test_MIA_gradient_ascent.py index feb4248cf..5d2283c82 100644 --- a/tests/test_MIA_gradient_ascent.py +++ b/tests/test_MIA_gradient_ascent.py @@ -31,7 +31,7 @@ def set_config_femnist(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'femnist' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.subsample = 0.01 cfg.data.transform = [['ToTensor'], [ diff --git a/tests/test_asyn_cifar10.py b/tests/test_asyn_cifar10.py index 59a87cad0..0a099e066 100644 --- a/tests/test_asyn_cifar10.py +++ b/tests/test_asyn_cifar10.py @@ -35,9 +35,9 @@ def set_config_cifar10_goalAchieved_afterReceiving(self, cfg): cfg.data.type = 'CIFAR10@torchvision' cfg.data.args = [{'download': False}] cfg.data.splits = [0.8, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.subsample = 0.2 - cfg.data.num_workers = 0 + cfg.dataloader.num_workers = 0 cfg.data.transform = [['ToTensor'], [ 'Normalize', { @@ -94,9 +94,9 @@ def set_config_cifar10_timeUp_afterAggregating(self, cfg): cfg.data.type = 'CIFAR10@torchvision' cfg.data.args = [{'download': False}] cfg.data.splits = [0.8, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.subsample = 0.2 - cfg.data.num_workers = 0 + cfg.dataloader.num_workers = 0 cfg.data.transform = [['ToTensor'], [ 'Normalize', { @@ -154,9 +154,9 @@ def set_config_cifar10_overselection(self, cfg): cfg.data.type = 'CIFAR10@torchvision' cfg.data.args = [{'download': False}] cfg.data.splits = [0.8, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.subsample = 0.2 - cfg.data.num_workers = 0 + cfg.dataloader.num_workers = 0 cfg.data.transform = [['ToTensor'], [ 'Normalize', { diff --git a/tests/test_backdoor_attack.py b/tests/test_backdoor_attack.py index 1d7a97b43..c1f39729b 100644 --- a/tests/test_backdoor_attack.py +++ b/tests/test_backdoor_attack.py @@ -33,7 +33,7 @@ def set_config_femnist(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'femnist' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 32 + cfg.dataloader.batch_size = 32 cfg.data.subsample = 0.05 cfg.data.transform = [['ToTensor']] diff --git a/tests/test_ditto.py b/tests/test_ditto.py index 07cd8710a..ff6c87ac9 100644 --- a/tests/test_ditto.py +++ b/tests/test_ditto.py @@ -38,7 +38,7 @@ def set_config_femnist(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'femnist' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.subsample = 0.05 cfg.data.transform = [['ToTensor'], [ diff --git a/tests/test_external_dataset.py b/tests/test_external_dataset.py index 7d243c70d..8c94995ca 100644 --- a/tests/test_external_dataset.py +++ b/tests/test_external_dataset.py @@ -32,7 +32,7 @@ def set_config_torchvision_dataset(self, cfg): cfg.data.type = 'MNIST@torchvision' cfg.data.args = [{'download': True}] cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.transform = [['ToTensor'], [ 'Normalize', { @@ -76,7 +76,7 @@ def set_config_torchtext_dataset(self, cfg): cfg.data.args = [{'max_len': 100}] cfg.data.type = 'IMDB@torchtext' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.transform = ['GloVe', {'cache': 'test_data/', 'name': '6B'}] cfg.data.splitter = 'lda' cfg.data.splitter_args = [{'alpha': 0.5}] diff --git a/tests/test_fedem.py b/tests/test_fedem.py index d3de07e18..6cbbf74ad 100644 --- a/tests/test_fedem.py +++ b/tests/test_fedem.py @@ -33,7 +33,7 @@ def set_config_femnist(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'femnist' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.subsample = 0.05 cfg.data.transform = [['ToTensor'], [ diff --git a/tests/test_fedopt.py b/tests/test_fedopt.py index 0d598e2c3..9a96a9656 100644 --- a/tests/test_fedopt.py +++ b/tests/test_fedopt.py @@ -31,7 +31,7 @@ def set_config_fedopt(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'femnist' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.subsample = 0.01 cfg.data.transform = [['ToTensor'], [ diff --git a/tests/test_fedprox.py b/tests/test_fedprox.py index 10ce8b583..5d8c8c551 100644 --- a/tests/test_fedprox.py +++ b/tests/test_fedprox.py @@ -30,7 +30,7 @@ def set_config_fedprox(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'femnist' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.subsample = 0.01 cfg.data.transform = [['ToTensor'], [ diff --git a/tests/test_fedsageplus.py b/tests/test_fedsageplus.py index 44fa2c680..223ced6f5 100644 --- a/tests/test_fedsageplus.py +++ b/tests/test_fedsageplus.py @@ -29,7 +29,7 @@ def set_config_fedsageplus(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'cora' cfg.data.splitter = 'louvain' - cfg.data.batch_size = 1 + cfg.dataloader.batch_size = 1 cfg.model.type = 'sage' cfg.model.hidden = 64 diff --git a/tests/test_femnist.py b/tests/test_femnist.py index ad99f693e..b78328ff0 100644 --- a/tests/test_femnist.py +++ b/tests/test_femnist.py @@ -31,7 +31,7 @@ def set_config_femnist(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'femnist' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.subsample = 0.05 cfg.data.transform = [['ToTensor'], [ diff --git a/tests/test_graph_node_trainer.py b/tests/test_graph_node_trainer.py index 8c1618c03..e8fae48c7 100644 --- a/tests/test_graph_node_trainer.py +++ b/tests/test_graph_node_trainer.py @@ -27,7 +27,7 @@ def set_config_node(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'cora' - cfg.data.batch_size = 1 # full batch train + cfg.dataloader.batch_size = 1 # full batch train cfg.data.splitter = 'louvain' cfg.model.type = 'gcn' diff --git a/tests/test_mf.py b/tests/test_mf.py index 1714d65f4..6d242dc17 100644 --- a/tests/test_mf.py +++ b/tests/test_mf.py @@ -30,7 +30,7 @@ def set_config_movielens1m(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'vflmovielens1m' - cfg.data.batch_size = 32 + cfg.dataloader.batch_size = 32 cfg.model.type = 'VMFNet' cfg.model.hidden = 20 diff --git a/tests/test_nbafl.py b/tests/test_nbafl.py index b328905ba..3037825fe 100644 --- a/tests/test_nbafl.py +++ b/tests/test_nbafl.py @@ -29,7 +29,7 @@ def set_config_femnist(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'femnist' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.subsample = 0.01 cfg.data.transform = [['ToTensor'], [ diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index 377a6a66f..fdb0cdde4 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -30,7 +30,7 @@ def set_config_femnist(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'femnist' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.subsample = 0.05 cfg.data.transform = [['ToTensor'], [ diff --git a/tests/test_pfedme.py b/tests/test_pfedme.py index bb69bc107..62a055fd2 100644 --- a/tests/test_pfedme.py +++ b/tests/test_pfedme.py @@ -35,7 +35,7 @@ def set_config_femnist(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'femnist' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 10 + cfg.dataloader.batch_size = 10 cfg.data.subsample = 0.05 cfg.data.transform = [['ToTensor'], [ diff --git a/tests/test_rec_IG_opt_attack.py b/tests/test_rec_IG_opt_attack.py index 63e029351..3aad2981d 100644 --- a/tests/test_rec_IG_opt_attack.py +++ b/tests/test_rec_IG_opt_attack.py @@ -30,7 +30,7 @@ def set_config_femnist(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'femnist' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 1 + cfg.dataloader.batch_size = 1 cfg.data.subsample = 0.01 cfg.data.transform = [['ToTensor'], [ diff --git a/tests/test_rec_opt_attack.py b/tests/test_rec_opt_attack.py index f28942457..1a22fb292 100644 --- a/tests/test_rec_opt_attack.py +++ b/tests/test_rec_opt_attack.py @@ -30,7 +30,7 @@ def set_config_femnist(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'femnist' cfg.data.splits = [0.6, 0.2, 0.2] - cfg.data.batch_size = 1 + cfg.dataloader.batch_size = 1 cfg.data.subsample = 0.01 cfg.data.transform = [['ToTensor'], [ From fc97a5242e420540d4c2ad0da316f8f07c1be5d5 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Fri, 16 Sep 2022 11:47:13 +0800 Subject: [PATCH 11/39] update yaml --- README.md | 4 ++-- federatedscope/core/auxiliaries/dataloader_builder.py | 2 +- federatedscope/cv/baseline/fedavg_convnet2_on_celeba.yaml | 4 ++-- federatedscope/cv/baseline/fedavg_convnet2_on_femnist.yaml | 4 ++-- federatedscope/cv/baseline/fedbn_convnet2_on_femnist.yaml | 4 ++-- federatedscope/gfl/baseline/example.yaml | 1 + .../gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml | 3 ++- .../gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml | 1 + federatedscope/gfl/baseline/fedavg_on_cSBM.yaml | 1 + .../gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml | 5 +++-- .../gfl/baseline/local_gnn_node_fullbatch_citation.yaml | 2 ++ .../hfl-sgdmf_fedavg_standalone_on_movielens1m.yaml | 5 ++--- .../mf/baseline/hfl_fedavg_standalone_on_movielens1m.yaml | 2 -- .../mf/baseline/hfl_fedavg_standalone_on_netflix.yaml | 2 +- .../vfl-sgdmf_fedavg_standalone_on_movielens1m.yaml | 4 ++-- .../mf/baseline/vfl_fedavg_standalone_on_movielens1m.yaml | 2 -- federatedscope/nlp/baseline/fedavg_bert_on_sst2.yaml | 4 ++-- federatedscope/nlp/baseline/fedavg_lr_on_synthetic.yaml | 1 - federatedscope/nlp/baseline/fedavg_lr_on_twitter.yaml | 4 ++-- federatedscope/nlp/baseline/fedavg_lstm_on_shakespeare.yaml | 2 -- federatedscope/nlp/baseline/fedavg_lstm_on_subreddit.yaml | 3 ++- federatedscope/nlp/baseline/fedavg_transformer_on_cola.yaml | 2 -- federatedscope/nlp/baseline/fedavg_transformer_on_imdb.yaml | 4 ++-- .../backdoor_badnet_fedavg_convnet2_on_femnist.yaml | 6 +++--- .../privacy_attack/CRA_fedavg_convnet2_on_femnist.yaml | 4 ++-- scripts/attack_exp_scripts/privacy_attack/PIA_toy.yaml | 4 ++-- .../privacy_attack/gradient_ascent_MIA_on_femnist.yaml | 4 ++-- .../gradient_ascent_MIA_on_femnist_simu_in.yaml | 4 ++-- .../reconstruct_IG_fedavg_opt_on_femnist.yaml | 4 ++-- .../privacy_attack/reconstruct_fedavg_opt_on_femnist.yaml | 4 ++-- .../distributed_configs/distributed_femnist_client_1.yaml | 4 ++-- .../distributed_configs/distributed_femnist_client_2.yaml | 4 ++-- .../distributed_configs/distributed_femnist_client_3.yaml | 4 ++-- .../distributed_configs/distributed_femnist_server.yaml | 4 ++-- scripts/example_configs/asyn_cifar10.yaml | 4 ++-- scripts/example_configs/cora/sha.yaml | 1 + scripts/example_configs/cora/sha_wrap_fedex.yaml | 1 + scripts/example_configs/cora/sha_wrap_fedex_arm.yaml | 1 + scripts/example_configs/fed_node_cls.yaml | 1 + scripts/example_configs/femnist.yaml | 4 ++-- scripts/example_configs/femnist/avg/bo_gp.yaml | 4 ++-- scripts/example_configs/femnist/avg/bo_gp_wrap.yaml | 4 ++-- scripts/example_configs/femnist/avg/bo_kde.yaml | 4 ++-- scripts/example_configs/femnist/avg/bo_kde_wrap.yaml | 4 ++-- scripts/example_configs/femnist/avg/bo_rf.yaml | 4 ++-- scripts/example_configs/femnist/avg/bo_rf_wrap.yaml | 4 ++-- scripts/example_configs/femnist/avg/bohb.yaml | 4 ++-- scripts/example_configs/femnist/avg/bohb_wrap.yaml | 4 ++-- scripts/example_configs/femnist/avg/hb.yaml | 4 ++-- scripts/example_configs/femnist/avg/hb_wrap.yaml | 4 ++-- scripts/example_configs/femnist/avg/rs.yaml | 4 ++-- scripts/example_configs/femnist/avg/rs_wrap.yaml | 4 ++-- scripts/example_configs/femnist/avg/sha.yaml | 4 ++-- scripts/example_configs/femnist/avg/sha_wrap.yaml | 4 ++-- scripts/example_configs/femnist/sha.yaml | 4 ++-- scripts/example_configs/femnist/sha_wrap_fedex.yaml | 4 ++-- scripts/example_configs/femnist_global_train.yaml | 4 ++-- .../ditto/ditto_convnet2_on_femnist.yaml | 2 -- .../ditto/ditto_lr_on_synthetic.yaml | 1 - .../ditto/ditto_lstm_on_shakespeare.yaml | 2 -- .../fedbn/fedbn_convnet2_on_femnist.yaml | 2 -- .../fedem/fedem_convnet2_on_femnist.yaml | 2 -- .../fedem/fedem_lr_on_synthetic.yaml | 1 - .../fedem/fedem_lstm_on_shakespeare.yaml | 2 -- 64 files changed, 97 insertions(+), 107 deletions(-) diff --git a/README.md b/README.md index 74f7aa5a8..20f7c8f12 100644 --- a/README.md +++ b/README.md @@ -162,13 +162,13 @@ Note that FederatedScope provides a unified interface for both standalone mode a The standalone mode in FederatedScope means to simulate multiple participants (servers and clients) in a single device, while participants' data are isolated from each other and their models might be shared via message passing. -Here we demonstrate how to run a standard FL task with FederatedScope, with setting `cfg.data.type = 'FEMNIST'`and `cfg.model.type = 'ConvNet2'` to run vanilla FedAvg for an image classification task. Users can customize training configurations, such as `cfg.federated.total_round_num`, `cfg.data.batch_size`, and `cfg.train.optimizer.lr`, in the configuration (a .yaml file), and run a standard FL task as: +Here we demonstrate how to run a standard FL task with FederatedScope, with setting `cfg.data.type = 'FEMNIST'`and `cfg.model.type = 'ConvNet2'` to run vanilla FedAvg for an image classification task. Users can customize training configurations, such as `cfg.federated.total_round_num`, `cfg.dataloader.batch_size`, and `cfg.train.optimizer.lr`, in the configuration (a .yaml file), and run a standard FL task as: ```bash # Run with default configurations python federatedscope/main.py --cfg scripts/example_configs/femnist.yaml # Or with custom configurations -python federatedscope/main.py --cfg scripts/example_configs/femnist.yaml federate.total_round_num 50 data.batch_size 128 +python federatedscope/main.py --cfg scripts/example_configs/femnist.yaml federate.total_round_num 50 dataloader.batch_size 128 ``` Then you can observe some monitored metrics during the training process as: diff --git a/federatedscope/core/auxiliaries/dataloader_builder.py b/federatedscope/core/auxiliaries/dataloader_builder.py index 4524a47b7..6403edba4 100644 --- a/federatedscope/core/auxiliaries/dataloader_builder.py +++ b/federatedscope/core/auxiliaries/dataloader_builder.py @@ -38,7 +38,7 @@ def get_dataloader(dataset, config, split='train'): raw_args['sizes'] = [-1] raw_args['batch_size'] = [4096] filtered_args = filter_dict(loader_cls.__init__, raw_args) - dataloader = DataLoader(dataset=dataset, **filtered_args) + dataloader = loader_cls(dataset=dataset, **filtered_args) return dataloader else: return dataset diff --git a/federatedscope/cv/baseline/fedavg_convnet2_on_celeba.yaml b/federatedscope/cv/baseline/fedavg_convnet2_on_celeba.yaml index a84531c6a..500d94f9d 100644 --- a/federatedscope/cv/baseline/fedavg_convnet2_on_celeba.yaml +++ b/federatedscope/cv/baseline/fedavg_convnet2_on_celeba.yaml @@ -10,10 +10,10 @@ data: root: data/ type: celeba splits: [0.6,0.2,0.2] - batch_size: 5 subsample: 0.1 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 5 model: type: convnet2 hidden: 2048 diff --git a/federatedscope/cv/baseline/fedavg_convnet2_on_femnist.yaml b/federatedscope/cv/baseline/fedavg_convnet2_on_femnist.yaml index 3a500e979..9a29857ce 100644 --- a/federatedscope/cv/baseline/fedavg_convnet2_on_femnist.yaml +++ b/federatedscope/cv/baseline/fedavg_convnet2_on_femnist.yaml @@ -11,10 +11,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 10 subsample: 0.05 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 10 model: type: convnet2 hidden: 2048 diff --git a/federatedscope/cv/baseline/fedbn_convnet2_on_femnist.yaml b/federatedscope/cv/baseline/fedbn_convnet2_on_femnist.yaml index 67f49dc41..d925a1b49 100644 --- a/federatedscope/cv/baseline/fedbn_convnet2_on_femnist.yaml +++ b/federatedscope/cv/baseline/fedbn_convnet2_on_femnist.yaml @@ -11,10 +11,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 10 subsample: 0.05 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 10 model: type: convnet2 hidden: 2048 diff --git a/federatedscope/gfl/baseline/example.yaml b/federatedscope/gfl/baseline/example.yaml index a8f7a9f02..97411fca6 100644 --- a/federatedscope/gfl/baseline/example.yaml +++ b/federatedscope/gfl/baseline/example.yaml @@ -23,6 +23,7 @@ data: type: cora # Use Louvain algorithm to split `Cora` splitter: 'louvain' +dataloader: # Use fullbatch training, batch_size should be `1` batch_size: 1 diff --git a/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml b/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml index 09285bf90..922f38f80 100644 --- a/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml +++ b/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml @@ -10,8 +10,9 @@ federate: data: root: data/ type: dblp_conf - batch_size: 1 splits: [0.5, 0.2, 0.3] +dataloader: + batch_size: 1 model: type: gcn hidden: 1024 diff --git a/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml b/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml index d5bc2124c..2e5357ee5 100644 --- a/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml +++ b/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml @@ -12,6 +12,7 @@ data: root: data/ type: cora splitter: 'louvain' +dataloader: batch_size: 1 model: type: gcn diff --git a/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml b/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml index 052b8f006..39dbda340 100644 --- a/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml +++ b/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml @@ -12,6 +12,7 @@ data: type: 'csbm' #type: 'csbm_data_feb_07_2022-00:19' cSBM_phi: [0.1, 0.5, 0.9] +dataloader: batch_size: 1 model: type: gpr diff --git a/federatedscope/gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml b/federatedscope/gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml index 840196e98..29838bb21 100644 --- a/federatedscope/gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml +++ b/federatedscope/gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml @@ -9,9 +9,10 @@ federate: total_round_num: 400 data: root: data/ - loader: graphsaint-rw - batch_size: 256 type: dblp_conf +dataloader: + type: graphsaint-rw + batch_size: 256 model: type: sage hidden: 1024 diff --git a/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml b/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml index 6c960bd89..897467c8e 100644 --- a/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml +++ b/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml @@ -12,6 +12,8 @@ data: root: data/ type: cora splitter: 'louvain' +dataloader: + batch_size: 1 model: type: gcn hidden: 64 diff --git a/federatedscope/mf/baseline/hfl-sgdmf_fedavg_standalone_on_movielens1m.yaml b/federatedscope/mf/baseline/hfl-sgdmf_fedavg_standalone_on_movielens1m.yaml index a860c92d1..a0089e91d 100644 --- a/federatedscope/mf/baseline/hfl-sgdmf_fedavg_standalone_on_movielens1m.yaml +++ b/federatedscope/mf/baseline/hfl-sgdmf_fedavg_standalone_on_movielens1m.yaml @@ -9,8 +9,8 @@ federate: data: root: data/ type: HFLMovieLens1M - batch_size: 64 - num_workers: 0 +dataloader: + theta: -1 model: type: HMFNet hidden: 20 @@ -30,4 +30,3 @@ sgdmf: epsilon: 2. delta: 0.5 R: 5. - theta: -1 diff --git a/federatedscope/mf/baseline/hfl_fedavg_standalone_on_movielens1m.yaml b/federatedscope/mf/baseline/hfl_fedavg_standalone_on_movielens1m.yaml index 5b528eb9a..b75e35955 100644 --- a/federatedscope/mf/baseline/hfl_fedavg_standalone_on_movielens1m.yaml +++ b/federatedscope/mf/baseline/hfl_fedavg_standalone_on_movielens1m.yaml @@ -9,8 +9,6 @@ federate: data: root: data/ type: HFLMovieLens1M - batch_size: 64 - num_workers: 0 model: type: HMFNet hidden: 20 diff --git a/federatedscope/mf/baseline/hfl_fedavg_standalone_on_netflix.yaml b/federatedscope/mf/baseline/hfl_fedavg_standalone_on_netflix.yaml index 37d645380..e481e4c78 100644 --- a/federatedscope/mf/baseline/hfl_fedavg_standalone_on_netflix.yaml +++ b/federatedscope/mf/baseline/hfl_fedavg_standalone_on_netflix.yaml @@ -11,8 +11,8 @@ federate: data: root: data/ type: HFLNetflix +dataloader: batch_size: 32 - num_workers: 0 model: type: HMFNet hidden: 10 diff --git a/federatedscope/mf/baseline/vfl-sgdmf_fedavg_standalone_on_movielens1m.yaml b/federatedscope/mf/baseline/vfl-sgdmf_fedavg_standalone_on_movielens1m.yaml index 20b40ec53..d2f188c19 100644 --- a/federatedscope/mf/baseline/vfl-sgdmf_fedavg_standalone_on_movielens1m.yaml +++ b/federatedscope/mf/baseline/vfl-sgdmf_fedavg_standalone_on_movielens1m.yaml @@ -9,8 +9,9 @@ federate: data: root: data/ type: VFLMovieLens1M +dataloader: + theta: -1 batch_size: 8 - num_workers: 0 model: type: VMFNet hidden: 20 @@ -30,4 +31,3 @@ sgdmf: epsilon: 4. delta: 0.75 R: 5. - theta: -1 diff --git a/federatedscope/mf/baseline/vfl_fedavg_standalone_on_movielens1m.yaml b/federatedscope/mf/baseline/vfl_fedavg_standalone_on_movielens1m.yaml index 7b21dbf22..a22e7d6dd 100644 --- a/federatedscope/mf/baseline/vfl_fedavg_standalone_on_movielens1m.yaml +++ b/federatedscope/mf/baseline/vfl_fedavg_standalone_on_movielens1m.yaml @@ -9,8 +9,6 @@ federate: data: root: data/ type: VFLMovieLens1M - batch_size: 64 - num_workers: 0 model: type: VMFNet hidden: 20 diff --git a/federatedscope/nlp/baseline/fedavg_bert_on_sst2.yaml b/federatedscope/nlp/baseline/fedavg_bert_on_sst2.yaml index b85d0febf..898ecd514 100644 --- a/federatedscope/nlp/baseline/fedavg_bert_on_sst2.yaml +++ b/federatedscope/nlp/baseline/fedavg_bert_on_sst2.yaml @@ -11,10 +11,10 @@ data: root: 'glue' type: 'sst2@huggingface_datasets' args: [{'max_len': 512}] - batch_size: 128 splitter: 'lda' splitter_args: [{'alpha': 0.5}] - num_workers: 0 +dataloader: + batch_size: 128 model: type: 'google/bert_uncased_L-2_H-128_A-2@transformers' task: 'SequenceClassification' diff --git a/federatedscope/nlp/baseline/fedavg_lr_on_synthetic.yaml b/federatedscope/nlp/baseline/fedavg_lr_on_synthetic.yaml index 8aa9b8dc2..0aeb0a318 100644 --- a/federatedscope/nlp/baseline/fedavg_lr_on_synthetic.yaml +++ b/federatedscope/nlp/baseline/fedavg_lr_on_synthetic.yaml @@ -10,7 +10,6 @@ federate: data: root: data/ type: synthetic - batch_size: 64 subsample: 1.0 model: type: lr diff --git a/federatedscope/nlp/baseline/fedavg_lr_on_twitter.yaml b/federatedscope/nlp/baseline/fedavg_lr_on_twitter.yaml index b510ed79f..eef8963de 100644 --- a/federatedscope/nlp/baseline/fedavg_lr_on_twitter.yaml +++ b/federatedscope/nlp/baseline/fedavg_lr_on_twitter.yaml @@ -11,9 +11,9 @@ federate: data: root: data/ type: twitter - batch_size: 5 subsample: 0.005 - num_workers: 0 +dataloader: + batch_size: 5 model: type: lr out_channels: 2 diff --git a/federatedscope/nlp/baseline/fedavg_lstm_on_shakespeare.yaml b/federatedscope/nlp/baseline/fedavg_lstm_on_shakespeare.yaml index 86d5aca27..3603d3de9 100644 --- a/federatedscope/nlp/baseline/fedavg_lstm_on_shakespeare.yaml +++ b/federatedscope/nlp/baseline/fedavg_lstm_on_shakespeare.yaml @@ -9,9 +9,7 @@ federate: data: root: data/ type: shakespeare - batch_size: 64 subsample: 0.2 - num_workers: 0 splits: [0.6,0.2,0.2] model: type: lstm diff --git a/federatedscope/nlp/baseline/fedavg_lstm_on_subreddit.yaml b/federatedscope/nlp/baseline/fedavg_lstm_on_subreddit.yaml index 1080bb591..6ee95a43c 100644 --- a/federatedscope/nlp/baseline/fedavg_lstm_on_subreddit.yaml +++ b/federatedscope/nlp/baseline/fedavg_lstm_on_subreddit.yaml @@ -9,8 +9,9 @@ federate: data: root: data/ type: subreddit - batch_size: 5 subsample: 1.0 +dataloader: + batch_size: 5 model: type: lstm in_channels: 10000 diff --git a/federatedscope/nlp/baseline/fedavg_transformer_on_cola.yaml b/federatedscope/nlp/baseline/fedavg_transformer_on_cola.yaml index 7eb686495..9dd6e243c 100644 --- a/federatedscope/nlp/baseline/fedavg_transformer_on_cola.yaml +++ b/federatedscope/nlp/baseline/fedavg_transformer_on_cola.yaml @@ -20,10 +20,8 @@ data: args: [{'load_disk_dir': 'huggingface/datasets/glue/cola', 'hg_cache_dir': 'huggingface', 'max_len': 128, 'val_as_dummy_test': True, 'part_train_dummy_val': 0.2} ] - batch_size: 64 splitter: 'lda' splitter_args: [ { 'alpha': 0.4, 'min_size': 1} ] - num_workers: 0 model: type: 'google/bert_uncased_L-2_H-128_A-2@transformers' task: 'SequenceClassification' diff --git a/federatedscope/nlp/baseline/fedavg_transformer_on_imdb.yaml b/federatedscope/nlp/baseline/fedavg_transformer_on_imdb.yaml index a9e818aa1..0d2924cb4 100644 --- a/federatedscope/nlp/baseline/fedavg_transformer_on_imdb.yaml +++ b/federatedscope/nlp/baseline/fedavg_transformer_on_imdb.yaml @@ -12,10 +12,10 @@ data: type: 'IMDB@torchtext' args: [{'max_len': 512}] splits: [0.8, 0.2, 0.0] # test is fixed - batch_size: 128 splitter: 'lda' splitter_args: [{'alpha': 0.5}] - num_workers: 0 +dataloader: + batch_size: 128 model: type: 'google/bert_uncased_L-2_H-128_A-2@transformers' task: 'SequenceClassification' diff --git a/scripts/attack_exp_scripts/backdoor_attack/backdoor_badnet_fedavg_convnet2_on_femnist.yaml b/scripts/attack_exp_scripts/backdoor_attack/backdoor_badnet_fedavg_convnet2_on_femnist.yaml index 8b0436c3c..2d78ba93e 100644 --- a/scripts/attack_exp_scripts/backdoor_attack/backdoor_badnet_fedavg_convnet2_on_femnist.yaml +++ b/scripts/attack_exp_scripts/backdoor_attack/backdoor_badnet_fedavg_convnet2_on_femnist.yaml @@ -16,11 +16,11 @@ data: type: femnist # form: dataloader splits: [0.6,0.2,0.2] - batch_size: 32 subsample: 0.05 - num_workers: 0 # transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - transform: [['ToTensor']] + transform: [ [ 'ToTensor' ] ] +dataloader: + batch_size: 32 model: type: convnet2 hidden: 2048 diff --git a/scripts/attack_exp_scripts/privacy_attack/CRA_fedavg_convnet2_on_femnist.yaml b/scripts/attack_exp_scripts/privacy_attack/CRA_fedavg_convnet2_on_femnist.yaml index c7659b192..43d1120dc 100644 --- a/scripts/attack_exp_scripts/privacy_attack/CRA_fedavg_convnet2_on_femnist.yaml +++ b/scripts/attack_exp_scripts/privacy_attack/CRA_fedavg_convnet2_on_femnist.yaml @@ -12,10 +12,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 10 subsample: 0.0001 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 10 model: type: convnet2 hidden: 2048 diff --git a/scripts/attack_exp_scripts/privacy_attack/PIA_toy.yaml b/scripts/attack_exp_scripts/privacy_attack/PIA_toy.yaml index fb36f4b63..19a816d68 100644 --- a/scripts/attack_exp_scripts/privacy_attack/PIA_toy.yaml +++ b/scripts/attack_exp_scripts/privacy_attack/PIA_toy.yaml @@ -10,9 +10,9 @@ data: root: data/ type: toy splits: [0.6,0.2,0.2] - batch_size: 1 subsample: 0.0001 - num_workers: 0 +dataloader: + batch_size: 1 model: type: lr hidden: 2048 diff --git a/scripts/attack_exp_scripts/privacy_attack/gradient_ascent_MIA_on_femnist.yaml b/scripts/attack_exp_scripts/privacy_attack/gradient_ascent_MIA_on_femnist.yaml index f60e64dff..619e2b972 100644 --- a/scripts/attack_exp_scripts/privacy_attack/gradient_ascent_MIA_on_femnist.yaml +++ b/scripts/attack_exp_scripts/privacy_attack/gradient_ascent_MIA_on_femnist.yaml @@ -12,10 +12,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 1 subsample: 0.0001 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 1 model: type: convnet2 hidden: 2048 diff --git a/scripts/attack_exp_scripts/privacy_attack/gradient_ascent_MIA_on_femnist_simu_in.yaml b/scripts/attack_exp_scripts/privacy_attack/gradient_ascent_MIA_on_femnist_simu_in.yaml index b99423d1b..c752fdd08 100644 --- a/scripts/attack_exp_scripts/privacy_attack/gradient_ascent_MIA_on_femnist_simu_in.yaml +++ b/scripts/attack_exp_scripts/privacy_attack/gradient_ascent_MIA_on_femnist_simu_in.yaml @@ -12,10 +12,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 1 subsample: 0.0001 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 1 model: type: convnet2 hidden: 2048 diff --git a/scripts/attack_exp_scripts/privacy_attack/reconstruct_IG_fedavg_opt_on_femnist.yaml b/scripts/attack_exp_scripts/privacy_attack/reconstruct_IG_fedavg_opt_on_femnist.yaml index a8a6da4a4..2a0a75be9 100644 --- a/scripts/attack_exp_scripts/privacy_attack/reconstruct_IG_fedavg_opt_on_femnist.yaml +++ b/scripts/attack_exp_scripts/privacy_attack/reconstruct_IG_fedavg_opt_on_femnist.yaml @@ -11,10 +11,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 1 subsample: 0.0001 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 1 model: type: convnet2 hidden: 2048 diff --git a/scripts/attack_exp_scripts/privacy_attack/reconstruct_fedavg_opt_on_femnist.yaml b/scripts/attack_exp_scripts/privacy_attack/reconstruct_fedavg_opt_on_femnist.yaml index 582e44159..40094a34b 100644 --- a/scripts/attack_exp_scripts/privacy_attack/reconstruct_fedavg_opt_on_femnist.yaml +++ b/scripts/attack_exp_scripts/privacy_attack/reconstruct_fedavg_opt_on_femnist.yaml @@ -12,10 +12,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 1 subsample: 0.0001 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 1 model: type: convnet2 hidden: 2048 diff --git a/scripts/distributed_scripts/distributed_configs/distributed_femnist_client_1.yaml b/scripts/distributed_scripts/distributed_configs/distributed_femnist_client_1.yaml index 7ea86b85d..eb3d54c34 100644 --- a/scripts/distributed_scripts/distributed_configs/distributed_femnist_client_1.yaml +++ b/scripts/distributed_scripts/distributed_configs/distributed_femnist_client_1.yaml @@ -21,10 +21,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 10 subsample: 0.05 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 10 model: type: convnet2 hidden: 128 diff --git a/scripts/distributed_scripts/distributed_configs/distributed_femnist_client_2.yaml b/scripts/distributed_scripts/distributed_configs/distributed_femnist_client_2.yaml index 6d5374c8c..4fa8738dc 100644 --- a/scripts/distributed_scripts/distributed_configs/distributed_femnist_client_2.yaml +++ b/scripts/distributed_scripts/distributed_configs/distributed_femnist_client_2.yaml @@ -21,10 +21,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 10 subsample: 0.05 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 10 model: type: convnet2 hidden: 128 diff --git a/scripts/distributed_scripts/distributed_configs/distributed_femnist_client_3.yaml b/scripts/distributed_scripts/distributed_configs/distributed_femnist_client_3.yaml index 83abc02b0..b08e14b68 100644 --- a/scripts/distributed_scripts/distributed_configs/distributed_femnist_client_3.yaml +++ b/scripts/distributed_scripts/distributed_configs/distributed_femnist_client_3.yaml @@ -21,10 +21,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 10 subsample: 0.05 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 10 model: type: convnet2 hidden: 128 diff --git a/scripts/distributed_scripts/distributed_configs/distributed_femnist_server.yaml b/scripts/distributed_scripts/distributed_configs/distributed_femnist_server.yaml index 05b4fb124..599a08a69 100644 --- a/scripts/distributed_scripts/distributed_configs/distributed_femnist_server.yaml +++ b/scripts/distributed_scripts/distributed_configs/distributed_femnist_server.yaml @@ -19,10 +19,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 10 subsample: 0.05 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 10 model: type: convnet2 hidden: 128 diff --git a/scripts/example_configs/asyn_cifar10.yaml b/scripts/example_configs/asyn_cifar10.yaml index 01d5150de..ffbf7030b 100644 --- a/scripts/example_configs/asyn_cifar10.yaml +++ b/scripts/example_configs/asyn_cifar10.yaml @@ -17,12 +17,12 @@ data: type: CIFAR10@torchvision args: [{'download': True}] splits: [0.8,0.2,0.2] - batch_size: 10 subsample: 0.2 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.4914, 0.4822, 0.4465], 'std': [0.247, 0.243, 0.261]}]] splitter: 'lda' splitter_args: [{'alpha': 0.2}] +dataloader: + batch_size: 10 model: type: convnet2 hidden: 512 diff --git a/scripts/example_configs/cora/sha.yaml b/scripts/example_configs/cora/sha.yaml index b2eff0477..00444abb6 100644 --- a/scripts/example_configs/cora/sha.yaml +++ b/scripts/example_configs/cora/sha.yaml @@ -15,6 +15,7 @@ data: root: data/ type: cora splitter: 'louvain' +dataloader: batch_size: 1 model: type: gcn diff --git a/scripts/example_configs/cora/sha_wrap_fedex.yaml b/scripts/example_configs/cora/sha_wrap_fedex.yaml index 822bbc9da..a5c729c14 100644 --- a/scripts/example_configs/cora/sha_wrap_fedex.yaml +++ b/scripts/example_configs/cora/sha_wrap_fedex.yaml @@ -15,6 +15,7 @@ data: root: data/ type: cora splitter: 'louvain' +dataloader: batch_size: 1 model: type: gcn diff --git a/scripts/example_configs/cora/sha_wrap_fedex_arm.yaml b/scripts/example_configs/cora/sha_wrap_fedex_arm.yaml index 2e9b726ec..6566791d5 100644 --- a/scripts/example_configs/cora/sha_wrap_fedex_arm.yaml +++ b/scripts/example_configs/cora/sha_wrap_fedex_arm.yaml @@ -15,6 +15,7 @@ data: root: data/ type: cora splitter: 'louvain' +dataloader: batch_size: 1 model: type: gcn diff --git a/scripts/example_configs/fed_node_cls.yaml b/scripts/example_configs/fed_node_cls.yaml index adb65211e..1f84b165d 100644 --- a/scripts/example_configs/fed_node_cls.yaml +++ b/scripts/example_configs/fed_node_cls.yaml @@ -12,6 +12,7 @@ data: root: data/ type: cora splitter: 'louvain' +dataloader: batch_size: 1 model: type: gcn diff --git a/scripts/example_configs/femnist.yaml b/scripts/example_configs/femnist.yaml index 5e9f39ce1..766c3e4b0 100644 --- a/scripts/example_configs/femnist.yaml +++ b/scripts/example_configs/femnist.yaml @@ -11,10 +11,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 10 subsample: 0.05 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 10 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/bo_gp.yaml b/scripts/example_configs/femnist/avg/bo_gp.yaml index a9730acab..35915c31f 100644 --- a/scripts/example_configs/femnist/avg/bo_gp.yaml +++ b/scripts/example_configs/femnist/avg/bo_gp.yaml @@ -14,10 +14,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/bo_gp_wrap.yaml b/scripts/example_configs/femnist/avg/bo_gp_wrap.yaml index 89cdd15aa..815325682 100644 --- a/scripts/example_configs/femnist/avg/bo_gp_wrap.yaml +++ b/scripts/example_configs/femnist/avg/bo_gp_wrap.yaml @@ -15,10 +15,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/bo_kde.yaml b/scripts/example_configs/femnist/avg/bo_kde.yaml index 9d7550f23..e8096feeb 100644 --- a/scripts/example_configs/femnist/avg/bo_kde.yaml +++ b/scripts/example_configs/femnist/avg/bo_kde.yaml @@ -14,10 +14,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/bo_kde_wrap.yaml b/scripts/example_configs/femnist/avg/bo_kde_wrap.yaml index e19f71b7e..1ad4f0003 100644 --- a/scripts/example_configs/femnist/avg/bo_kde_wrap.yaml +++ b/scripts/example_configs/femnist/avg/bo_kde_wrap.yaml @@ -15,10 +15,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/bo_rf.yaml b/scripts/example_configs/femnist/avg/bo_rf.yaml index 7eead0983..1e8bca28e 100644 --- a/scripts/example_configs/femnist/avg/bo_rf.yaml +++ b/scripts/example_configs/femnist/avg/bo_rf.yaml @@ -14,10 +14,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/bo_rf_wrap.yaml b/scripts/example_configs/femnist/avg/bo_rf_wrap.yaml index 36c52e4e8..f2977a2eb 100644 --- a/scripts/example_configs/femnist/avg/bo_rf_wrap.yaml +++ b/scripts/example_configs/femnist/avg/bo_rf_wrap.yaml @@ -15,10 +15,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/bohb.yaml b/scripts/example_configs/femnist/avg/bohb.yaml index 5273252a0..b60970464 100644 --- a/scripts/example_configs/femnist/avg/bohb.yaml +++ b/scripts/example_configs/femnist/avg/bohb.yaml @@ -14,10 +14,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/bohb_wrap.yaml b/scripts/example_configs/femnist/avg/bohb_wrap.yaml index 77ed33363..e3ca1f733 100644 --- a/scripts/example_configs/femnist/avg/bohb_wrap.yaml +++ b/scripts/example_configs/femnist/avg/bohb_wrap.yaml @@ -15,10 +15,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/hb.yaml b/scripts/example_configs/femnist/avg/hb.yaml index 7e2c3dcca..f48d9de93 100644 --- a/scripts/example_configs/femnist/avg/hb.yaml +++ b/scripts/example_configs/femnist/avg/hb.yaml @@ -14,10 +14,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/hb_wrap.yaml b/scripts/example_configs/femnist/avg/hb_wrap.yaml index c9e8b6c84..1cfdeeca6 100644 --- a/scripts/example_configs/femnist/avg/hb_wrap.yaml +++ b/scripts/example_configs/femnist/avg/hb_wrap.yaml @@ -15,10 +15,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/rs.yaml b/scripts/example_configs/femnist/avg/rs.yaml index 41eed9bb7..d6dd868a0 100644 --- a/scripts/example_configs/femnist/avg/rs.yaml +++ b/scripts/example_configs/femnist/avg/rs.yaml @@ -14,10 +14,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/rs_wrap.yaml b/scripts/example_configs/femnist/avg/rs_wrap.yaml index 40acdb5cc..9d4680524 100644 --- a/scripts/example_configs/femnist/avg/rs_wrap.yaml +++ b/scripts/example_configs/femnist/avg/rs_wrap.yaml @@ -15,10 +15,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/sha.yaml b/scripts/example_configs/femnist/avg/sha.yaml index c07af3842..0c9350fe0 100644 --- a/scripts/example_configs/femnist/avg/sha.yaml +++ b/scripts/example_configs/femnist/avg/sha.yaml @@ -14,10 +14,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/avg/sha_wrap.yaml b/scripts/example_configs/femnist/avg/sha_wrap.yaml index 481c1d5a6..cf476e100 100644 --- a/scripts/example_configs/femnist/avg/sha_wrap.yaml +++ b/scripts/example_configs/femnist/avg/sha_wrap.yaml @@ -15,10 +15,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/sha.yaml b/scripts/example_configs/femnist/sha.yaml index 2207777a9..2343d43d6 100644 --- a/scripts/example_configs/femnist/sha.yaml +++ b/scripts/example_configs/femnist/sha.yaml @@ -14,10 +14,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist/sha_wrap_fedex.yaml b/scripts/example_configs/femnist/sha_wrap_fedex.yaml index 56d3602a3..58309fe31 100644 --- a/scripts/example_configs/femnist/sha_wrap_fedex.yaml +++ b/scripts/example_configs/femnist/sha_wrap_fedex.yaml @@ -14,10 +14,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 16 subsample: 0.05 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] - num_workers: 0 +dataloader: + batch_size: 16 model: type: convnet2 hidden: 2048 diff --git a/scripts/example_configs/femnist_global_train.yaml b/scripts/example_configs/femnist_global_train.yaml index 54d776c12..15871047a 100644 --- a/scripts/example_configs/femnist_global_train.yaml +++ b/scripts/example_configs/femnist_global_train.yaml @@ -14,10 +14,10 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 10 subsample: 0.05 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] +dataloader: + batch_size: 10 model: type: convnet2 hidden: 2048 diff --git a/scripts/personalization_exp_scripts/ditto/ditto_convnet2_on_femnist.yaml b/scripts/personalization_exp_scripts/ditto/ditto_convnet2_on_femnist.yaml index 51ade8fcb..b9b0a4268 100644 --- a/scripts/personalization_exp_scripts/ditto/ditto_convnet2_on_femnist.yaml +++ b/scripts/personalization_exp_scripts/ditto/ditto_convnet2_on_femnist.yaml @@ -11,9 +11,7 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 64 subsample: 0.05 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] model: type: convnet2 diff --git a/scripts/personalization_exp_scripts/ditto/ditto_lr_on_synthetic.yaml b/scripts/personalization_exp_scripts/ditto/ditto_lr_on_synthetic.yaml index 4b3e52a9f..824fedcd4 100644 --- a/scripts/personalization_exp_scripts/ditto/ditto_lr_on_synthetic.yaml +++ b/scripts/personalization_exp_scripts/ditto/ditto_lr_on_synthetic.yaml @@ -11,7 +11,6 @@ federate: data: root: data/ type: synthetic - batch_size: 64 subsample: 1.0 personalization: local_update_steps: 30 diff --git a/scripts/personalization_exp_scripts/ditto/ditto_lstm_on_shakespeare.yaml b/scripts/personalization_exp_scripts/ditto/ditto_lstm_on_shakespeare.yaml index fab240443..077ca0548 100644 --- a/scripts/personalization_exp_scripts/ditto/ditto_lstm_on_shakespeare.yaml +++ b/scripts/personalization_exp_scripts/ditto/ditto_lstm_on_shakespeare.yaml @@ -10,9 +10,7 @@ federate: data: root: data/ type: shakespeare - batch_size: 64 subsample: 0.2 - num_workers: 0 splits: [0.6,0.2,0.2] model: type: lstm diff --git a/scripts/personalization_exp_scripts/fedbn/fedbn_convnet2_on_femnist.yaml b/scripts/personalization_exp_scripts/fedbn/fedbn_convnet2_on_femnist.yaml index 3dda571cb..0ff8b2464 100644 --- a/scripts/personalization_exp_scripts/fedbn/fedbn_convnet2_on_femnist.yaml +++ b/scripts/personalization_exp_scripts/fedbn/fedbn_convnet2_on_femnist.yaml @@ -11,9 +11,7 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 64 subsample: 0.05 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] model: type: convnet2 diff --git a/scripts/personalization_exp_scripts/fedem/fedem_convnet2_on_femnist.yaml b/scripts/personalization_exp_scripts/fedem/fedem_convnet2_on_femnist.yaml index 4b6237b04..ad066ad4c 100644 --- a/scripts/personalization_exp_scripts/fedem/fedem_convnet2_on_femnist.yaml +++ b/scripts/personalization_exp_scripts/fedem/fedem_convnet2_on_femnist.yaml @@ -11,9 +11,7 @@ data: root: data/ type: femnist splits: [0.6,0.2,0.2] - batch_size: 64 subsample: 0.05 - num_workers: 0 transform: [['ToTensor'], ['Normalize', {'mean': [0.1307], 'std': [0.3081]}]] model: model_num_per_trainer: 3 diff --git a/scripts/personalization_exp_scripts/fedem/fedem_lr_on_synthetic.yaml b/scripts/personalization_exp_scripts/fedem/fedem_lr_on_synthetic.yaml index c50068373..69d6c0161 100644 --- a/scripts/personalization_exp_scripts/fedem/fedem_lr_on_synthetic.yaml +++ b/scripts/personalization_exp_scripts/fedem/fedem_lr_on_synthetic.yaml @@ -11,7 +11,6 @@ federate: data: root: data/ type: synthetic - batch_size: 64 subsample: 1.0 personalization: local_update_steps: 30 diff --git a/scripts/personalization_exp_scripts/fedem/fedem_lstm_on_shakespeare.yaml b/scripts/personalization_exp_scripts/fedem/fedem_lstm_on_shakespeare.yaml index 6bce97cb4..52ecc5234 100644 --- a/scripts/personalization_exp_scripts/fedem/fedem_lstm_on_shakespeare.yaml +++ b/scripts/personalization_exp_scripts/fedem/fedem_lstm_on_shakespeare.yaml @@ -8,9 +8,7 @@ federate: data: root: data/ type: shakespeare - batch_size: 64 subsample: 0.2 - num_workers: 0 splits: [0.6,0.2,0.2] model: model_num_per_trainer: 3 From 7e60fdc6d162b374ba1763bf08e64ce2a90ace9d Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Fri, 16 Sep 2022 14:39:40 +0800 Subject: [PATCH 12/39] remove torch in dataset --- .../core/auxiliaries/data_builder.py | 2 +- .../core/auxiliaries/dataloader_builder.py | 16 +++--- federatedscope/core/configs/cfg_data.py | 53 +++++++++++++++++++ .../core/configs/cfg_differential_privacy.py | 3 -- federatedscope/core/data/base_data.py | 28 +++++----- federatedscope/core/data/base_translator.py | 27 ++++------ federatedscope/core/data/dummy_translator.py | 1 - federatedscope/core/data/utils.py | 28 +++++----- federatedscope/cv/dataloader/dataloader.py | 24 ++------- ...gdmf_fedavg_standalone_on_movielens1m.yaml | 1 + .../hfl_fedavg_standalone_on_movielens1m.yaml | 2 + .../hfl_fedavg_standalone_on_netflix.yaml | 1 + ...gdmf_fedavg_standalone_on_movielens1m.yaml | 1 + .../vfl_fedavg_standalone_on_movielens1m.yaml | 2 + federatedscope/mf/dataloader/dataloader.py | 27 ++-------- federatedscope/nlp/dataloader/dataloader.py | 22 ++------ .../tabular/dataloader/quadratic.py | 25 +++------ federatedscope/tabular/dataloader/toy.py | 3 +- 18 files changed, 128 insertions(+), 138 deletions(-) diff --git a/federatedscope/core/auxiliaries/data_builder.py b/federatedscope/core/auxiliaries/data_builder.py index 23f8b43b7..f04d4a2cb 100644 --- a/federatedscope/core/auxiliaries/data_builder.py +++ b/federatedscope/core/auxiliaries/data_builder.py @@ -53,7 +53,7 @@ def get_data(config, client_cfgs=None): return data_and_config # Load dataset from source files - dataset, modified_config = load_dataset(config, client_cfgs) + dataset, modified_config = load_dataset(config) # Perform translator to non-FL dataset translator = getattr(import_module('federatedscope.core.data'), diff --git a/federatedscope/core/auxiliaries/dataloader_builder.py b/federatedscope/core/auxiliaries/dataloader_builder.py index 6403edba4..7eb0a5383 100644 --- a/federatedscope/core/auxiliaries/dataloader_builder.py +++ b/federatedscope/core/auxiliaries/dataloader_builder.py @@ -10,22 +10,22 @@ def get_dataloader(dataset, config, split='train'): if config.backend == 'torch': - if config.data.loader.type == 'base': + if config.dataloader.type == 'base': from torch.utils.data import DataLoader loader_cls = DataLoader - elif config.data.loader.type == 'raw': + elif config.dataloader.type == 'raw': loader_cls = None - elif config.data.loader.type == 'graphsaint': + elif config.dataloader.type == 'graphsaint': if 'split' == 'train': from torch_geometric.loader import GraphSAINTRandomWalkSampler loader_cls = GraphSAINTRandomWalkSampler else: from torch_geometric.loader import NeighborSampler loader_cls = NeighborSampler - elif config.data.loader.type == 'neighbor': + elif config.dataloader.type == 'neighbor': from torch_geometric.loader import NeighborSampler loader_cls = NeighborSampler - elif config.data.loader.type == 'mf': + elif config.dataloader.type == 'mf': from federatedscope.mf.dataloader import MFDataLoader loader_cls = MFDataLoader else: @@ -35,8 +35,10 @@ def get_dataloader(dataset, config, split='train'): raw_args = dict(config.dataloader) if split != 'train': raw_args['shuffle'] = False - raw_args['sizes'] = [-1] - raw_args['batch_size'] = [4096] + raw_args['sizes'] = -1 + # For evaluation in GFL + if config.dataloader.type in ['graphsaint', 'neighbor']: + raw_args['batch_size'] = 4096 filtered_args = filter_dict(loader_cls.__init__, raw_args) dataloader = loader_cls(dataset=dataset, **filtered_args) return dataloader diff --git a/federatedscope/core/configs/cfg_data.py b/federatedscope/core/configs/cfg_data.py index 97ef5be21..b9eb0d48c 100644 --- a/federatedscope/core/configs/cfg_data.py +++ b/federatedscope/core/configs/cfg_data.py @@ -1,6 +1,10 @@ +import logging + from federatedscope.core.configs.config import CN from federatedscope.register import register_config +logger = logging.getLogger(__name__) + def extend_data_cfg(cfg): # ---------------------------------------------------------------------- # @@ -42,6 +46,8 @@ def extend_data_cfg(cfg): cfg.dataloader.num_steps = 30 # GFL: neighbor sampler DataLoader cfg.dataloader.sizes = [10, 5] + # DP: -1 means per-rating privacy, otherwise per-user privacy + cfg.dataloader.theta = -1 # quadratic cfg.data.quadratic = CN() @@ -49,6 +55,17 @@ def extend_data_cfg(cfg): cfg.data.quadratic.min_curv = 0.02 cfg.data.quadratic.max_curv = 12.5 + # --------------- outdated configs --------------- + # TODO: delete this code block + cfg.data.loader = '' + cfg.data.batch_size = 64 + cfg.data.shuffle = True + cfg.data.num_workers = 0 + cfg.data.drop_last = False + cfg.data.walk_length = 2 + cfg.data.num_steps = 30 + cfg.data.sizes = [10, 5] + # --------------- register corresponding check function ---------- cfg.register_cfg_check_fun(assert_data_cfg) @@ -67,6 +84,42 @@ def assert_data_cfg(cfg): 'external data' assert cfg.data.splitter, '`data.splitter` should not be empty when ' \ 'using external data' + # -------------------------------------------------------------------- + # For compatibility with older versions of FS + # TODO: delete this code block + if cfg.data.loader != '': + logger.warning('config `cfg.data.loader` will be remove in the ' + 'future, use `cfg.dataloader.type` instead.') + cfg.dataloader.type = cfg.data.loader + if cfg.data.batch_size != 64: + logger.warning('config `cfg.data.batch_size` will be remove in the ' + 'future, use `cfg.dataloader.batch_size` instead.') + cfg.dataloader.batch_size = cfg.data.batch_size + if not cfg.data.shuffle: + logger.warning('config `cfg.data.shuffle` will be remove in the ' + 'future, use `cfg.dataloader.shuffle` instead.') + cfg.dataloader.shuffle = cfg.data.shuffle + if cfg.data.num_workers != 0: + logger.warning('config `cfg.data.num_workers` will be remove in the ' + 'future, use `cfg.dataloader.num_workers` instead.') + cfg.dataloader.num_workers = cfg.data.num_workers + if cfg.data.drop_last: + logger.warning('config `cfg.data.drop_last` will be remove in the ' + 'future, use `cfg.dataloader.drop_last` instead.') + cfg.dataloader.drop_last = cfg.data.drop_last + if cfg.data.walk_length != 2: + logger.warning('config `cfg.data.walk_length` will be remove in the ' + 'future, use `cfg.dataloader.walk_length` instead.') + cfg.dataloader.walk_length = cfg.data.walk_length + if cfg.data.num_steps != 30: + logger.warning('config `cfg.data.num_steps` will be remove in the ' + 'future, use `cfg.dataloader.num_steps` instead.') + cfg.dataloader.num_steps = cfg.data.num_steps + if cfg.data.sizes != [10, 5]: + logger.warning('config `cfg.data.sizes` will be remove in the ' + 'future, use `cfg.dataloader.sizes` instead.') + cfg.dataloader.sizes = cfg.data.sizes + # -------------------------------------------------------------------- register_config("data", extend_data_cfg) diff --git a/federatedscope/core/configs/cfg_differential_privacy.py b/federatedscope/core/configs/cfg_differential_privacy.py index 25da93eda..7a6ae3e41 100644 --- a/federatedscope/core/configs/cfg_differential_privacy.py +++ b/federatedscope/core/configs/cfg_differential_privacy.py @@ -26,9 +26,6 @@ def extend_dp_cfg(cfg): cfg.sgdmf.delta = 0.5 # \delta in dp cfg.sgdmf.constant = 1. # constant - cfg.dataloader.theta = -1 # -1 means per-rating privacy, - # otherwise per-user privacy - # --------------- register corresponding check function ---------- cfg.register_cfg_check_fun(assert_dp_cfg) diff --git a/federatedscope/core/data/base_data.py b/federatedscope/core/data/base_data.py index c1449c0cf..158d70328 100644 --- a/federatedscope/core/data/base_data.py +++ b/federatedscope/core/data/base_data.py @@ -83,31 +83,30 @@ def attack(self, datadict): Apply attack to `StandaloneDataDict`. """ - if 'backdoor' in self.global_cfg.attack.attack_method and 'edge' in \ - self.global_cfg.attack.trigger_type: + if 'backdoor' in self.cfg.attack.attack_method and 'edge' in \ + self.cfg.attack.trigger_type: import os import torch from federatedscope.attack.auxiliary import \ create_ardis_poisoned_dataset, create_ardis_test_dataset - if not os.path.exists(self.global_cfg.attack.edge_path): - os.makedirs(self.global_cfg.attack.edge_path) + if not os.path.exists(self.cfg.attack.edge_path): + os.makedirs(self.cfg.attack.edge_path) poisoned_edgeset = create_ardis_poisoned_dataset( - data_path=self.global_cfg.attack.edge_path) + data_path=self.cfg.attack.edge_path) ardis_test_dataset = create_ardis_test_dataset( - self.global_cfg.attack.edge_path) + self.cfg.attack.edge_path) logger.info("Writing poison_data to: {}".format( - self.global_cfg.attack.edge_path)) + self.cfg.attack.edge_path)) with open( - self.global_cfg.attack.edge_path + + self.cfg.attack.edge_path + "poisoned_edgeset_training", "wb") as saved_data_file: torch.save(poisoned_edgeset, saved_data_file) - with open( - self.global_cfg.attack.edge_path + - "ardis_test_dataset.pt", "wb") as ardis_data_file: + with open(self.cfg.attack.edge_path + "ardis_test_dataset.pt", + "wb") as ardis_data_file: torch.save(ardis_test_dataset, ardis_data_file) logger.warning( 'please notice: downloading the poisoned dataset \ @@ -115,9 +114,9 @@ def attack(self, datadict): https://github.com/ksreenivasan/OOD_Federated_Learning' ) - if 'backdoor' in self.global_cfg.attack.attack_method: + if 'backdoor' in self.cfg.attack.attack_method: from federatedscope.attack.auxiliary import poisoning - poisoning(datadict, self.global_cfg) + poisoning(datadict, self.cfg) return datadict @@ -127,7 +126,7 @@ class ClientData(dict): """ client_cfg = None - def __init__(self, loader, client_cfg, train=None, val=None, test=None): + def __init__(self, client_cfg, train=None, val=None, test=None): """ Args: @@ -140,7 +139,6 @@ def __init__(self, loader, client_cfg, train=None, val=None, test=None): self.train = train self.val = val self.test = test - self.loader = loader self.setup(client_cfg) super(ClientData, self).__init__() diff --git a/federatedscope/core/data/base_translator.py b/federatedscope/core/data/base_translator.py index 8cef0510a..db0f0b39f 100644 --- a/federatedscope/core/data/base_translator.py +++ b/federatedscope/core/data/base_translator.py @@ -12,16 +12,14 @@ class BaseDataTranslator: Dataset -> ML split -> FL split -> Data (passed to FedRunner) """ - def __init__(self, global_cfg, loader, client_cfgs=None): + def __init__(self, global_cfg, client_cfgs=None): """ Convert data to `StandaloneDataDict`. Args: global_cfg: global CfgNode - loader: `torch.utils.data.DataLoader` or subclass of it client_cfgs: client cfg `Dict` """ - self.loader = loader self.global_cfg = global_cfg.clone() self.client_cfgs = client_cfgs self.splitter = get_splitter(global_cfg) @@ -76,10 +74,10 @@ def split_train_val_test(self, dataset): def split_to_client(self, train, val, test): """ - Split dataset to clients and build DataLoader. + Split dataset to clients and build `ClientData`. Returns: - datadict (dict): dict of `ClientData` with client_idx as key. + data_dict (dict): dict of `ClientData` with client_idx as key. """ @@ -98,12 +96,8 @@ def split_to_client(self, train, val, test): split_test = self.splitter(test, prior=train_label_distribution) # Build data dict with `ClientData`, key `0` for server. - datadict = { - 0: ClientData(self.loader, - self.global_cfg, - train=train, - val=val, - test=test) + data_dict = { + 0: ClientData(self.global_cfg, train=train, val=val, test=test) } for client_id in range(1, client_num + 1): if self.client_cfgs is not None: @@ -112,9 +106,8 @@ def split_to_client(self, train, val, test): self.client_cfgs.get(f'client_{client_id}')) else: client_cfg = self.global_cfg - datadict[client_id] = ClientData(self.loader, - client_cfg, - train=split_train[client_id - 1], - val=split_val[client_id - 1], - test=split_test[client_id - 1]) - return datadict + data_dict[client_id] = ClientData(client_cfg, + train=split_train[client_id - 1], + val=split_val[client_id - 1], + test=split_test[client_id - 1]) + return data_dict diff --git a/federatedscope/core/data/dummy_translator.py b/federatedscope/core/data/dummy_translator.py index 55c2906ea..625cc4308 100644 --- a/federatedscope/core/data/dummy_translator.py +++ b/federatedscope/core/data/dummy_translator.py @@ -19,7 +19,6 @@ def split(self, dataset): else: client_cfg = self.global_cfg datadict[client_id] = ClientData( - self.loader, client_cfg, train=dataset[client_id].get('train'), val=dataset[client_id].get('val'), diff --git a/federatedscope/core/data/utils.py b/federatedscope/core/data/utils.py index 0f67f7783..2f12e5154 100644 --- a/federatedscope/core/data/utils.py +++ b/federatedscope/core/data/utils.py @@ -7,9 +7,7 @@ import numpy as np from random import shuffle -import torch.utils - -from federatedscope.core.auxiliaries.data_builder import logger +logger = logging.getLogger(__name__) class RegexInverseMap: @@ -30,21 +28,21 @@ def __repr__(self): return str(self._items.items()) -def load_dataset(config, client_cfgs): +def load_dataset(config): if config.data.type.lower() == 'toy': from federatedscope.tabular.dataloader.toy import load_toy_data - dataset, modified_config = load_toy_data(config, client_cfgs) + dataset, modified_config = load_toy_data(config) elif config.data.type.lower() == 'quadratic': from federatedscope.tabular.dataloader import load_quadratic_dataset - dataset, modified_config = load_quadratic_dataset(config, client_cfgs) + dataset, modified_config = load_quadratic_dataset(config) elif config.data.type.lower() in ['femnist', 'celeba']: from federatedscope.cv.dataloader import load_cv_dataset - dataset, modified_config = load_cv_dataset(config, client_cfgs) + dataset, modified_config = load_cv_dataset(config) elif config.data.type.lower() in [ 'shakespeare', 'twitter', 'subreddit', 'synthetic' ]: from federatedscope.nlp.dataloader import load_nlp_dataset - dataset, modified_config = load_nlp_dataset(config, client_cfgs) + dataset, modified_config = load_nlp_dataset(config) elif config.data.type.lower() in [ 'cora', 'citeseer', @@ -53,29 +51,29 @@ def load_dataset(config, client_cfgs): 'dblp_org', ] or config.data.type.lower().startswith('csbm'): from federatedscope.gfl.dataloader import load_nodelevel_dataset - dataset, modified_config = load_nodelevel_dataset(config, client_cfgs) + dataset, modified_config = load_nodelevel_dataset(config) elif config.data.type.lower() in ['ciao', 'epinions', 'fb15k-237', 'wn18']: from federatedscope.gfl.dataloader import load_linklevel_dataset - dataset, modified_config = load_linklevel_dataset(config, client_cfgs) + dataset, modified_config = load_linklevel_dataset(config) elif config.data.type.lower() in [ 'hiv', 'proteins', 'imdb-binary', 'bbbp', 'tox21', 'bace', 'sider', 'clintox', 'esol', 'freesolv', 'lipo' ] or config.data.type.startswith('graph_multi_domain'): from federatedscope.gfl.dataloader import load_graphlevel_dataset - dataset, modified_config = load_graphlevel_dataset(config, client_cfgs) + dataset, modified_config = load_graphlevel_dataset(config) elif config.data.type.lower() == 'vertical_fl_data': from federatedscope.vertical_fl.dataloader import load_vertical_data dataset, modified_config = load_vertical_data(config, generate=True) elif 'movielens' in config.data.type.lower( ) or 'netflix' in config.data.type.lower(): from federatedscope.mf.dataloader import load_mf_dataset - dataset, modified_config = load_mf_dataset(config, client_cfgs) + dataset, modified_config = load_mf_dataset(config) elif '@' in config.data.type.lower(): from federatedscope.core.data.utils import load_external_data - dataset, modified_config = load_external_data(config, client_cfgs) + dataset, modified_config = load_external_data(config) elif 'cikmcup' in config.data.type.lower(): from federatedscope.gfl.dataset.cikm_cup import load_cikmcup_data - dataset, modified_config = load_cikmcup_data(config, client_cfgs) + dataset, modified_config = load_cikmcup_data(config) elif config.data.type is None or config.data.type == "": # The participant (only for server in this version) does not own data dataset = None @@ -85,7 +83,7 @@ def load_dataset(config, client_cfgs): return dataset, modified_config -def load_external_data(config=None, client_cfgs=None): +def load_external_data(config=None): r""" Based on the configuration file, this function imports external datasets and applies train/valid/test splits and split by some specific `splitter` into the standard FederatedScope input data format. diff --git a/federatedscope/cv/dataloader/dataloader.py b/federatedscope/cv/dataloader/dataloader.py index 81b473aa0..9ea27e492 100644 --- a/federatedscope/cv/dataloader/dataloader.py +++ b/federatedscope/cv/dataloader/dataloader.py @@ -1,11 +1,8 @@ -from torch.utils.data import DataLoader - from federatedscope.cv.dataset.leaf_cv import LEAF_CV from federatedscope.core.auxiliaries.transform_builder import get_transform -from federatedscope.core.data import ClientData, StandaloneDataDict -def load_cv_dataset(config=None, client_cfgs=None): +def load_cv_dataset(config=None): r""" return { 'client_id': { @@ -36,20 +33,9 @@ def load_cv_dataset(config=None, client_cfgs=None): ) if config.federate.client_num > 0 else len(dataset) config.merge_from_list(['federate.client_num', client_num]) - # get local dataset - data_local_dict = dict() + # Convert list to dict + data_dict = dict() for client_idx in range(1, client_num + 1): - if client_cfgs is not None: - client_cfg = config.clone() - client_cfg.merge_from_other_cfg( - client_cfgs.get(f'client_{client_idx}')) - else: - client_cfg = config - client_data = ClientData(DataLoader, - client_cfg, - train=dataset[client_idx - 1].get('train'), - val=dataset[client_idx - 1].get('val'), - test=dataset[client_idx - 1].get('test')) - data_local_dict[client_idx] = client_data + data_dict[client_idx] = dataset[client_idx - 1] - return StandaloneDataDict(data_local_dict, config), config + return data_dict, config diff --git a/federatedscope/mf/baseline/hfl-sgdmf_fedavg_standalone_on_movielens1m.yaml b/federatedscope/mf/baseline/hfl-sgdmf_fedavg_standalone_on_movielens1m.yaml index a0089e91d..d94668156 100644 --- a/federatedscope/mf/baseline/hfl-sgdmf_fedavg_standalone_on_movielens1m.yaml +++ b/federatedscope/mf/baseline/hfl-sgdmf_fedavg_standalone_on_movielens1m.yaml @@ -10,6 +10,7 @@ data: root: data/ type: HFLMovieLens1M dataloader: + type: mf theta: -1 model: type: HMFNet diff --git a/federatedscope/mf/baseline/hfl_fedavg_standalone_on_movielens1m.yaml b/federatedscope/mf/baseline/hfl_fedavg_standalone_on_movielens1m.yaml index b75e35955..f6f2637f8 100644 --- a/federatedscope/mf/baseline/hfl_fedavg_standalone_on_movielens1m.yaml +++ b/federatedscope/mf/baseline/hfl_fedavg_standalone_on_movielens1m.yaml @@ -9,6 +9,8 @@ federate: data: root: data/ type: HFLMovieLens1M +dataloader: + type: mf model: type: HMFNet hidden: 20 diff --git a/federatedscope/mf/baseline/hfl_fedavg_standalone_on_netflix.yaml b/federatedscope/mf/baseline/hfl_fedavg_standalone_on_netflix.yaml index e481e4c78..e764eba44 100644 --- a/federatedscope/mf/baseline/hfl_fedavg_standalone_on_netflix.yaml +++ b/federatedscope/mf/baseline/hfl_fedavg_standalone_on_netflix.yaml @@ -13,6 +13,7 @@ data: type: HFLNetflix dataloader: batch_size: 32 + type: mf model: type: HMFNet hidden: 10 diff --git a/federatedscope/mf/baseline/vfl-sgdmf_fedavg_standalone_on_movielens1m.yaml b/federatedscope/mf/baseline/vfl-sgdmf_fedavg_standalone_on_movielens1m.yaml index d2f188c19..e01d598e4 100644 --- a/federatedscope/mf/baseline/vfl-sgdmf_fedavg_standalone_on_movielens1m.yaml +++ b/federatedscope/mf/baseline/vfl-sgdmf_fedavg_standalone_on_movielens1m.yaml @@ -10,6 +10,7 @@ data: root: data/ type: VFLMovieLens1M dataloader: + type: mf theta: -1 batch_size: 8 model: diff --git a/federatedscope/mf/baseline/vfl_fedavg_standalone_on_movielens1m.yaml b/federatedscope/mf/baseline/vfl_fedavg_standalone_on_movielens1m.yaml index a22e7d6dd..3f2acaf64 100644 --- a/federatedscope/mf/baseline/vfl_fedavg_standalone_on_movielens1m.yaml +++ b/federatedscope/mf/baseline/vfl_fedavg_standalone_on_movielens1m.yaml @@ -9,6 +9,8 @@ federate: data: root: data/ type: VFLMovieLens1M +dataloader: + type: mf model: type: VMFNet hidden: 20 diff --git a/federatedscope/mf/dataloader/dataloader.py b/federatedscope/mf/dataloader/dataloader.py index 1dbecd37c..ebd4df252 100644 --- a/federatedscope/mf/dataloader/dataloader.py +++ b/federatedscope/mf/dataloader/dataloader.py @@ -7,8 +7,6 @@ import collections import importlib -from federatedscope.core.data import StandaloneDataDict - MFDATA_CLASS_DICT = { "vflmovielens1m": "VFLMovieLens1M", "vflmovielens10m": "VFLMovieLens10M", @@ -48,32 +46,15 @@ def load_mf_dataset(config=None, client_cfgs=None): raise NotImplementedError("Dataset {} is not implemented.".format( config.data.type)) - data_local_dict = collections.defaultdict(dict) - for id_client, data in dataset.data.items(): - if client_cfgs is not None: - client_cfg = config.clone() - client_cfg.merge_from_other_cfg( - client_cfgs.get(f'client_{id_client}')) - else: - client_cfg = config - data_local_dict[id_client]["train"] = MFDataLoader( - data["train"], - shuffle=client_cfg.dataloader.shuffle, - batch_size=client_cfg.dataloader.batch_size, - drop_last=client_cfg.dataloader.drop_last, - theta=client_cfg.dataloader.theta) - data_local_dict[id_client]["test"] = MFDataLoader( - data["test"], - shuffle=False, - batch_size=client_cfg.dataloader.batch_size, - drop_last=client_cfg.dataloader.drop_last, - theta=client_cfg.dataloader.theta) + data_dict = collections.defaultdict(dict) + for client_idx, data in dataset.data.items(): + data_dict[client_idx] = data # Modify config config.merge_from_list(['model.num_user', dataset.n_user]) config.merge_from_list(['model.num_item', dataset.n_item]) - return StandaloneDataDict(data_local_dict, config), config + return data_dict, config class MFDataLoader(object): diff --git a/federatedscope/nlp/dataloader/dataloader.py b/federatedscope/nlp/dataloader/dataloader.py index eb21e1cf5..d950a2a63 100644 --- a/federatedscope/nlp/dataloader/dataloader.py +++ b/federatedscope/nlp/dataloader/dataloader.py @@ -1,13 +1,10 @@ -from torch.utils.data import DataLoader - from federatedscope.nlp.dataset.leaf_nlp import LEAF_NLP from federatedscope.nlp.dataset.leaf_twitter import LEAF_TWITTER from federatedscope.nlp.dataset.leaf_synthetic import LEAF_SYNTHETIC from federatedscope.core.auxiliaries.transform_builder import get_transform -from federatedscope.core.data import ClientData, StandaloneDataDict -def load_nlp_dataset(config=None, client_cfgs=None): +def load_nlp_dataset(config=None): r""" return { 'client_id': { @@ -49,19 +46,8 @@ def load_nlp_dataset(config=None, client_cfgs=None): config.merge_from_list(['federate.client_num', client_num]) # get local dataset - data_local_dict = dict() + data_dict = dict() for client_idx in range(1, client_num + 1): - if client_cfgs is not None: - client_cfg = config.clone() - client_cfg.merge_from_other_cfg( - client_cfgs.get(f'client_{client_idx}')) - else: - client_cfg = config - client_data = ClientData(DataLoader, - client_cfg, - train=dataset[client_idx - 1].get('train'), - val=dataset[client_idx - 1].get('val'), - test=dataset[client_idx - 1].get('test')) - data_local_dict[client_idx] = client_data + data_dict[client_idx] = dataset[client_idx - 1] - return StandaloneDataDict(data_local_dict, config), config + return data_dict, config diff --git a/federatedscope/tabular/dataloader/quadratic.py b/federatedscope/tabular/dataloader/quadratic.py index 67a6686d7..335984672 100644 --- a/federatedscope/tabular/dataloader/quadratic.py +++ b/federatedscope/tabular/dataloader/quadratic.py @@ -1,11 +1,8 @@ import numpy as np -from torch.utils.data import DataLoader -from federatedscope.core.data import ClientData, StandaloneDataDict - -def load_quadratic_dataset(config, client_cfgs=None): - dataset = dict() +def load_quadratic_dataset(config): + data_dict = dict() d = config.data.quadratic.dim base = np.exp( np.log(config.data.quadratic.max_curv / config.data.quadratic.min_curv) @@ -14,15 +11,9 @@ def load_quadratic_dataset(config, client_cfgs=None): # TODO: enable sphere a = 0.02 * base**(i - 1) * np.identity(d) # TODO: enable non-zero minimizer, i.e., provide a shift - if client_cfgs is not None: - client_cfg = config.clone() - client_cfg.merge_from_other_cfg(client_cfgs.get(f'client_{i}')) - else: - client_cfg = config - client_data = ClientData(DataLoader, - client_cfg, - train=[(a.astype(np.float32), .0)], - val=[(a.astype(np.float32), .0)], - test=[(a.astype(np.float32), .0)]) - dataset[i] = client_data - return StandaloneDataDict(dataset, config), config + data_dict[i] = { + 'train': [(a.astype(np.float32), .0)], + 'val': [(a.astype(np.float32), .0)], + 'test': [(a.astype(np.float32), .0)] + } + return data_dict, config diff --git a/federatedscope/tabular/dataloader/toy.py b/federatedscope/tabular/dataloader/toy.py index 5faa20626..cb4e035fc 100644 --- a/federatedscope/tabular/dataloader/toy.py +++ b/federatedscope/tabular/dataloader/toy.py @@ -2,11 +2,10 @@ import numpy as np -from federatedscope.core.data import StandaloneDataDict from federatedscope.core.auxiliaries.dataloader_builder import WrapDataset -def load_toy_data(config=None, client_cfgs=None): +def load_toy_data(config=None): generate = config.federate.mode.lower() == 'standalone' def _generate_data(client_num=5, From c87c4282a09ec4f3b97657289a6fb610c4c2dc41 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Fri, 16 Sep 2022 15:52:53 +0800 Subject: [PATCH 13/39] fix node trainer --- .../core/auxiliaries/data_builder.py | 6 +- .../core/auxiliaries/dataloader_builder.py | 21 ++- .../core/auxiliaries/model_builder.py | 6 +- federatedscope/core/data/base_data.py | 17 +- federatedscope/core/data/dummy_translator.py | 6 +- federatedscope/gfl/baseline/example.yaml | 2 + .../fedavg_gcn_fullbatch_on_dblpnew.yaml | 1 + .../baseline/fedavg_gcn_fullbatch_on_kg.yaml | 2 + .../fedavg_gnn_node_fullbatch_citation.yaml | 1 + .../gfl/baseline/fedavg_on_cSBM.yaml | 1 + .../gfl/baseline/fedavg_wpsn_on_cSBM.yaml | 2 + .../local_gnn_node_fullbatch_citation.yaml | 1 + .../gfl/dataloader/dataloader_node.py | 159 +++++++++--------- federatedscope/gfl/model/mpnn.py | 3 +- federatedscope/gfl/trainer/nodetrainer.py | 15 +- 15 files changed, 133 insertions(+), 110 deletions(-) diff --git a/federatedscope/core/auxiliaries/data_builder.py b/federatedscope/core/auxiliaries/data_builder.py index f04d4a2cb..5355617be 100644 --- a/federatedscope/core/auxiliaries/data_builder.py +++ b/federatedscope/core/auxiliaries/data_builder.py @@ -21,14 +21,12 @@ '.*?@.*?', 'hiv', 'proteins', 'imdb-binary', 'bbbp', 'tox21', 'bace', 'sider', 'clintox', 'esol', 'freesolv', 'lipo' ], - 'PyGNodeDataTranslator': [ - 'cora', 'citeseer', 'pubmed', 'dblp_conf', 'dblp_org', 'csbm.*?' - ], 'PyGLinkDataTranslator': ['fb15k-237', 'wn18'], 'DummyDataTranslator': [ 'toy', 'quadratic', 'femnist', 'celeba', 'shakespeare', 'twitter', 'subreddit', 'synthetic', 'ciao', 'epinions', '.*?vertical_fl_data.*?', - '.*?movielens.*?', '.*?cikmcup.*?', 'graph_multi_domain.*?' + '.*?movielens.*?', '.*?cikmcup.*?', 'graph_multi_domain.*?', 'cora', + 'citeseer', 'pubmed', 'dblp_conf', 'dblp_org', 'csbm.*?' ], # Dummy for FL dataset } DATA_TRANS_MAP = RegexInverseMap(TRANS_DATA_MAP, None) diff --git a/federatedscope/core/auxiliaries/dataloader_builder.py b/federatedscope/core/auxiliaries/dataloader_builder.py index 7eb0a5383..de0326222 100644 --- a/federatedscope/core/auxiliaries/dataloader_builder.py +++ b/federatedscope/core/auxiliaries/dataloader_builder.py @@ -15,8 +15,11 @@ def get_dataloader(dataset, config, split='train'): loader_cls = DataLoader elif config.dataloader.type == 'raw': loader_cls = None - elif config.dataloader.type == 'graphsaint': - if 'split' == 'train': + elif config.dataloader.type == 'pyg': + from torch_geometric.loader import DataLoader as PyGDataLoader + loader_cls = PyGDataLoader + elif config.dataloader.type == 'graphsaint-rw': + if split == 'train': from torch_geometric.loader import GraphSAINTRandomWalkSampler loader_cls = GraphSAINTRandomWalkSampler else: @@ -35,12 +38,20 @@ def get_dataloader(dataset, config, split='train'): raw_args = dict(config.dataloader) if split != 'train': raw_args['shuffle'] = False - raw_args['sizes'] = -1 + raw_args['sizes'] = [-1] # For evaluation in GFL - if config.dataloader.type in ['graphsaint', 'neighbor']: + if config.dataloader.type in ['graphsaint-rw', 'neighbor']: raw_args['batch_size'] = 4096 + dataset = dataset[0].edge_index + else: + if config.dataloader.type in ['graphsaint-rw']: + # Raw graph + dataset = dataset[0] + elif config.dataloader.type in ['neighbor']: + # edge_index of raw graph + dataset = dataset[0].edge_index filtered_args = filter_dict(loader_cls.__init__, raw_args) - dataloader = loader_cls(dataset=dataset, **filtered_args) + dataloader = loader_cls(dataset, **filtered_args) return dataloader else: return dataset diff --git a/federatedscope/core/auxiliaries/model_builder.py b/federatedscope/core/auxiliaries/model_builder.py index 0d80e015d..7b2214986 100644 --- a/federatedscope/core/auxiliaries/model_builder.py +++ b/federatedscope/core/auxiliaries/model_builder.py @@ -16,7 +16,7 @@ def get_shape_from_data(data, model_config, backend='torch'): Extract the input shape from the given data, which can be used to build the data. Users can also use `data.input_shape` to specify the shape Arguments: - data (object): the data used for local training or evaluation + data (`ClientData`): the data used for local training or evaluation The expected data format: 1): {train/val/test: {x:ndarray, y:ndarray}}} 2): {train/val/test: DataLoader} @@ -31,7 +31,7 @@ def get_shape_from_data(data, model_config, backend='torch'): 'gcn', 'sage', 'gpr', 'gat', 'gin', 'mpnn' ] or model_config.type.startswith('gnn_'): num_label = data['num_label'] if 'num_label' in data else None - num_edge_features = data[ + num_edge_features = data['data'][ 'num_edge_features'] if model_config.type == 'mpnn' else None if model_config.task.startswith('graph'): # graph-level task @@ -39,7 +39,7 @@ def get_shape_from_data(data, model_config, backend='torch'): return (data_representative.x.shape, num_label, num_edge_features) else: # node/link-level task - return (data.x.shape, num_label, num_edge_features) + return (data['data'].x.shape, num_label, num_edge_features) if isinstance(data, dict): keys = list(data.keys()) diff --git a/federatedscope/core/data/base_data.py b/federatedscope/core/data/base_data.py index 158d70328..ca18ce419 100644 --- a/federatedscope/core/data/base_data.py +++ b/federatedscope/core/data/base_data.py @@ -122,24 +122,29 @@ def attack(self, datadict): class ClientData(dict): """ - `ClientData` converts dataset to loader. + `ClientData` converts dataset to train/val/test DataLoader. + Key `data` in `ClientData` is the raw dataset. """ client_cfg = None - def __init__(self, client_cfg, train=None, val=None, test=None): + def __init__(self, client_cfg, train=None, val=None, test=None, **kwargs): """ Args: loader: Dataloader class or data dict which have been built client_cfg: client-specific CfgNode - train: train dataset - val: valid dataset - test: test dataset + data: raw dataset, which will stay raw + train: train dataset, which will be converted to DataLoader + val: valid dataset, which will be converted to DataLoader + test: test dataset, which will be converted to DataLoader """ self.train = train self.val = val self.test = test self.setup(client_cfg) + if kwargs is not None: + for key in kwargs: + self[key] = kwargs[key] super(ClientData, self).__init__() def setup(self, new_client_cfg=None): @@ -161,10 +166,8 @@ def setup(self, new_client_cfg=None): if self.train is not None: self['train'] = get_dataloader(self.train, self.client_cfg, 'train') - if self.val is not None: self['val'] = get_dataloader(self.train, self.client_cfg, 'val') - if self.test is not None: self['test'] = get_dataloader(self.train, self.client_cfg, 'test') return True diff --git a/federatedscope/core/data/dummy_translator.py b/federatedscope/core/data/dummy_translator.py index 625cc4308..023f2d3f3 100644 --- a/federatedscope/core/data/dummy_translator.py +++ b/federatedscope/core/data/dummy_translator.py @@ -18,9 +18,5 @@ def split(self, dataset): self.client_cfgs.get(f'client_{client_id}')) else: client_cfg = self.global_cfg - datadict[client_id] = ClientData( - client_cfg, - train=dataset[client_id].get('train'), - val=dataset[client_id].get('val'), - test=dataset[client_id].get('test')) + datadict[client_id] = ClientData(client_cfg, **dataset[client_id]) return datadict diff --git a/federatedscope/gfl/baseline/example.yaml b/federatedscope/gfl/baseline/example.yaml index 97411fca6..063cbdf58 100644 --- a/federatedscope/gfl/baseline/example.yaml +++ b/federatedscope/gfl/baseline/example.yaml @@ -24,6 +24,8 @@ data: # Use Louvain algorithm to split `Cora` splitter: 'louvain' dataloader: + # Type of sampler + type: pyg # Use fullbatch training, batch_size should be `1` batch_size: 1 diff --git a/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml b/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml index 922f38f80..2878b70e1 100644 --- a/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml +++ b/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml @@ -12,6 +12,7 @@ data: type: dblp_conf splits: [0.5, 0.2, 0.3] dataloader: + type: pyg batch_size: 1 model: type: gcn diff --git a/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_kg.yaml b/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_kg.yaml index b300f7d6f..efdce0813 100644 --- a/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_kg.yaml +++ b/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_kg.yaml @@ -13,6 +13,8 @@ data: type: wn18 splitter: rel_type pre_transform: ['Constant', {'value':1.0, 'cat':False}] +dataloader: + type: pyg model: type: gcn hidden: 64 diff --git a/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml b/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml index 2e5357ee5..965269e18 100644 --- a/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml +++ b/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml @@ -13,6 +13,7 @@ data: type: cora splitter: 'louvain' dataloader: + type: pyg batch_size: 1 model: type: gcn diff --git a/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml b/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml index 39dbda340..d0de6f9ba 100644 --- a/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml +++ b/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml @@ -13,6 +13,7 @@ data: #type: 'csbm_data_feb_07_2022-00:19' cSBM_phi: [0.1, 0.5, 0.9] dataloader: + type: pyg batch_size: 1 model: type: gpr diff --git a/federatedscope/gfl/baseline/fedavg_wpsn_on_cSBM.yaml b/federatedscope/gfl/baseline/fedavg_wpsn_on_cSBM.yaml index 1672c5c98..ef77a592c 100644 --- a/federatedscope/gfl/baseline/fedavg_wpsn_on_cSBM.yaml +++ b/federatedscope/gfl/baseline/fedavg_wpsn_on_cSBM.yaml @@ -12,6 +12,8 @@ data: type: 'csbm' #type: 'csbm_data_feb_05_2022-19:23' cSBM_phi: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] +dataloader: + type: pyg model: type: gpr hidden: 256 diff --git a/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml b/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml index 897467c8e..c8a298f52 100644 --- a/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml +++ b/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml @@ -13,6 +13,7 @@ data: type: cora splitter: 'louvain' dataloader: + type: pyg batch_size: 1 model: type: gcn diff --git a/federatedscope/gfl/dataloader/dataloader_node.py b/federatedscope/gfl/dataloader/dataloader_node.py index cc49c153e..659d75a33 100644 --- a/federatedscope/gfl/dataloader/dataloader_node.py +++ b/federatedscope/gfl/dataloader/dataloader_node.py @@ -5,7 +5,8 @@ from torch_geometric.utils import add_self_loops, remove_self_loops, \ to_undirected from torch_geometric.data import Data -from torch_geometric.loader import GraphSAINTRandomWalkSampler, NeighborSampler +# from torch_geometric.loader import GraphSAINTRandomWalkSampler, +# NeighborSampler from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.auxiliaries.transform_builder import get_transform @@ -13,73 +14,72 @@ INF = np.iinfo(np.int64).max - -def raw2loader(raw_data, config=None): - """Transform a graph into either dataloader for graph-sampling-based - mini-batch training - or still a graph for full-batch training. - Arguments: - raw_data (PyG.Data): a raw graph. - :returns: - sampler (object): a Dict containing loader and subgraph_sampler or - still a PyG.Data object. - """ - # change directed graph to undirected - raw_data.edge_index = to_undirected( - remove_self_loops(raw_data.edge_index)[0]) - - if config.data.loader == '': - sampler = raw_data - elif config.data.loader == 'graphsaint-rw': - # Sampler would crash if there was isolated node. - raw_data.edge_index = add_self_loops(raw_data.edge_index, - num_nodes=raw_data.x.shape[0])[0] - loader = GraphSAINTRandomWalkSampler( - raw_data, - batch_size=config.dataloader.batch_size, - walk_length=config.dataloader.walk_length, - num_steps=config.dataloader.num_steps, - sample_coverage=0) - subgraph_sampler = NeighborSampler( - raw_data.edge_index, - sizes=[-1], - batch_size=4096, - shuffle=False, - num_workers=config.dataloader.num_workers) - sampler = dict(data=raw_data, - train=loader, - val=subgraph_sampler, - test=subgraph_sampler) - elif config.data.loader == 'neighbor': - # Sampler would crash if there was isolated node. - raw_data.edge_index = add_self_loops(raw_data.edge_index, - num_nodes=raw_data.x.shape[0])[0] - - train_idx = raw_data.train_mask.nonzero(as_tuple=True)[0] - loader = NeighborSampler(raw_data.edge_index, - node_idx=train_idx, - sizes=config.dataloader.sizes, - batch_size=config.dataloader.batch_size, - shuffle=config.dataloader.shuffle, - num_workers=config.dataloader.num_workers) - subgraph_sampler = NeighborSampler( - raw_data.edge_index, - sizes=[-1], - batch_size=4096, - shuffle=False, - num_workers=config.dataloader.num_workers) - sampler = dict(data=raw_data, - train=loader, - val=subgraph_sampler, - test=subgraph_sampler) - - return sampler +# def raw2loader(raw_data, config=None): +# """Transform a graph into either dataloader for graph-sampling-based +# mini-batch training +# or still a graph for full-batch training. +# Arguments: +# raw_data (PyG.Data): a raw graph. +# :returns: +# sampler (object): a Dict containing loader and subgraph_sampler or +# still a PyG.Data object. +# """ +# # change directed graph to undirected +# raw_data.edge_index = to_undirected( +# remove_self_loops(raw_data.edge_index)[0]) +# +# if config.data.loader == '': +# sampler = raw_data +# elif config.data.loader == 'graphsaint-rw': +# # Sampler would crash if there was isolated node. +# raw_data.edge_index = add_self_loops(raw_data.edge_index, +# num_nodes=raw_data.x.shape[0])[0] +# loader = GraphSAINTRandomWalkSampler( +# raw_data, +# batch_size=config.dataloader.batch_size, +# walk_length=config.dataloader.walk_length, +# num_steps=config.dataloader.num_steps, +# sample_coverage=0) +# subgraph_sampler = NeighborSampler( +# raw_data.edge_index, +# sizes=[-1], +# batch_size=4096, +# shuffle=False, +# num_workers=config.dataloader.num_workers) +# sampler = dict(data=raw_data, +# train=loader, +# val=subgraph_sampler, +# test=subgraph_sampler) +# elif config.data.loader == 'neighbor': +# # Sampler would crash if there was isolated node. +# raw_data.edge_index = add_self_loops(raw_data.edge_index, +# num_nodes=raw_data.x.shape[0])[0] +# +# train_idx = raw_data.train_mask.nonzero(as_tuple=True)[0] +# loader = NeighborSampler(raw_data.edge_index, +# node_idx=train_idx, +# sizes=config.dataloader.sizes, +# batch_size=config.dataloader.batch_size, +# shuffle=config.dataloader.shuffle, +# num_workers=config.dataloader.num_workers) +# subgraph_sampler = NeighborSampler( +# raw_data.edge_index, +# sizes=[-1], +# batch_size=4096, +# shuffle=False, +# num_workers=config.dataloader.num_workers) +# sampler = dict(data=raw_data, +# train=loader, +# val=subgraph_sampler, +# test=subgraph_sampler) +# +# return sampler def load_nodelevel_dataset(config=None, client_cfgs=None): r""" :returns: - data_local_dict + data_dict :rtype: Dict: dict{'client_id': Data()} """ @@ -157,25 +157,29 @@ def load_nodelevel_dataset(config=None, client_cfgs=None): config.merge_from_list(['federate.client_num', client_num]) # get local dataset - data_local_dict = dict() + data_dict = dict() for client_idx in range(1, len(dataset) + 1): - if client_cfgs is not None: - client_cfg = config.clone() - client_cfg.merge_from_other_cfg( - client_cfgs.get(f'client_{client_idx}')) - else: - client_cfg = config - local_data = raw2loader(dataset[client_idx - 1], client_cfg) - data_local_dict[client_idx] = local_data + local_data = dataset[client_idx - 1] + # To undirected and add self-loop + local_data.edge_index = add_self_loops( + to_undirected(remove_self_loops(local_data.edge_index)[0]), + num_nodes=local_data.x.shape[0])[0] + data_dict[client_idx] = { + 'data': local_data, + 'train': [local_data], + 'val': [local_data], + 'test': [local_data] + } + # Keep ML split consistent with local graphs if global_dataset is not None: global_graph = global_dataset[0] train_mask = torch.zeros_like(global_graph.train_mask) val_mask = torch.zeros_like(global_graph.val_mask) test_mask = torch.zeros_like(global_graph.test_mask) - for client_sampler in data_local_dict.values(): + for client_sampler in data_dict.values(): if isinstance(client_sampler, Data): client_subgraph = client_sampler else: @@ -190,6 +194,11 @@ def load_nodelevel_dataset(config=None, client_cfgs=None): global_graph.val_mask = val_mask global_graph.test_mask = test_mask - data_local_dict[0] = raw2loader(global_graph, config) + data_dict[0] = { + 'data': global_graph, + 'train': [global_graph], + 'val': [global_graph], + 'test': [global_graph] + } - return StandaloneDataDict(data_local_dict, config), config + return data_dict, config diff --git a/federatedscope/gfl/model/mpnn.py b/federatedscope/gfl/model/mpnn.py index dd1117331..e804eab2a 100644 --- a/federatedscope/gfl/model/mpnn.py +++ b/federatedscope/gfl/model/mpnn.py @@ -40,8 +40,7 @@ def forward(self, data): x, edge_index, edge_attr, batch = data.x, data.edge_index, \ data.edge_attr, data.batch elif isinstance(data, tuple): - x, edge_index, edge_attr, batch = data.x, data.edge_index, \ - data.edge_attr, data.batch + x, edge_index, edge_attr, batch = data else: raise TypeError('Unsupported data type!') diff --git a/federatedscope/gfl/trainer/nodetrainer.py b/federatedscope/gfl/trainer/nodetrainer.py index 88ad5cc9f..5083164ff 100644 --- a/federatedscope/gfl/trainer/nodetrainer.py +++ b/federatedscope/gfl/trainer/nodetrainer.py @@ -1,7 +1,4 @@ import torch - -from torch_geometric.loader import DataLoader as PyGDataLoader -from torch_geometric.data import Data from torch_geometric.loader import GraphSAINTRandomWalkSampler, NeighborSampler from federatedscope.core.auxiliaries.enums import LIFECYCLE @@ -22,15 +19,15 @@ def parse_data(self, data): """ init_dict = dict() - if isinstance(data, Data): + if isinstance(data, dict): for mode in ["train", "val", "test"]: - init_dict["{}_loader".format(mode)] = PyGDataLoader([data]) + init_dict["{}_loader".format(mode)] = data.get(mode) init_dict["{}_data".format(mode)] = None # For node-level task dataloader contains one graph init_dict["num_{}_data".format(mode)] = 1 else: - raise TypeError("Type of data should be PyG data.") + raise TypeError("Type of data should be dict.") return init_dict def _hook_on_batch_forward(self, ctx): @@ -154,9 +151,9 @@ def _hook_on_batch_forward(self, ctx): else: # For GraphSAINTRandomWalkSampler or PyGDataLoader batch = ctx.data_batch.to(ctx.device) - pred = ctx.model(batch.x, - batch.edge_index)[batch['{}_mask'.format( - ctx.cur_split)]] + pred = ctx.model( + (batch.x, + batch.edge_index))[batch['{}_mask'.format(ctx.cur_split)]] label = batch.y[batch['{}_mask'.format(ctx.cur_split)]] ctx.batch_size = torch.sum(ctx.data_batch['train_mask']).item() else: From 4e9709e73d37542481115d8928cb6a2858720237 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Fri, 16 Sep 2022 16:39:14 +0800 Subject: [PATCH 14/39] update graph-level interface --- .../core/auxiliaries/data_builder.py | 6 +- federatedscope/core/data/dummy_translator.py | 20 ++++- federatedscope/core/data/utils.py | 5 +- .../gfl/dataloader/dataloader_graph.py | 60 ++------------- .../gfl/dataloader/dataloader_node.py | 73 +------------------ federatedscope/gfl/dataset/cikm_cup.py | 47 ------------ 6 files changed, 33 insertions(+), 178 deletions(-) diff --git a/federatedscope/core/auxiliaries/data_builder.py b/federatedscope/core/auxiliaries/data_builder.py index 5355617be..617d9689e 100644 --- a/federatedscope/core/auxiliaries/data_builder.py +++ b/federatedscope/core/auxiliaries/data_builder.py @@ -16,17 +16,19 @@ f'{error} in `federatedscope.contrib.data`, some modules are not ' f'available.') +# TODO: Add PyGNodeDataTranslator and PyGLinkDataTranslator +# TODO: move splitter to PyGNodeDataTranslator and PyGLinkDataTranslator TRANS_DATA_MAP = { 'BaseDataTranslator': [ '.*?@.*?', 'hiv', 'proteins', 'imdb-binary', 'bbbp', 'tox21', 'bace', 'sider', 'clintox', 'esol', 'freesolv', 'lipo' ], - 'PyGLinkDataTranslator': ['fb15k-237', 'wn18'], 'DummyDataTranslator': [ 'toy', 'quadratic', 'femnist', 'celeba', 'shakespeare', 'twitter', 'subreddit', 'synthetic', 'ciao', 'epinions', '.*?vertical_fl_data.*?', '.*?movielens.*?', '.*?cikmcup.*?', 'graph_multi_domain.*?', 'cora', - 'citeseer', 'pubmed', 'dblp_conf', 'dblp_org', 'csbm.*?' + 'citeseer', 'pubmed', 'dblp_conf', 'dblp_org', 'csbm.*?', 'fb15k-237', + 'wn18' ], # Dummy for FL dataset } DATA_TRANS_MAP = RegexInverseMap(TRANS_DATA_MAP, None) diff --git a/federatedscope/core/data/dummy_translator.py b/federatedscope/core/data/dummy_translator.py index 023f2d3f3..fb03056f9 100644 --- a/federatedscope/core/data/dummy_translator.py +++ b/federatedscope/core/data/dummy_translator.py @@ -5,7 +5,7 @@ class DummyDataTranslator(BaseDataTranslator): """ DummyDataTranslator convert FL dataset to DataLoader. - Do not perform ML split and FL split. + Do not perform FL split. """ def split(self, dataset): if not isinstance(dataset, dict): @@ -18,5 +18,21 @@ def split(self, dataset): self.client_cfgs.get(f'client_{client_id}')) else: client_cfg = self.global_cfg - datadict[client_id] = ClientData(client_cfg, **dataset[client_id]) + + if isinstance(dataset[client_id], dict): + datadict[client_id] = ClientData(client_cfg, + **dataset[client_id]) + else: + # Do not have train/val/test + train, val, test = self.split_train_val_test( + datadict[client_id]) + tmp_dict = dict(train=train, val=val, test=test) + # Only for graph-level task, get number of graph labels + if client_cfg.task.startswith('graph') and \ + client_cfg.out_channels == 0: + s = set() + for g in datadict[client_id]: + s.add(g.y.item()) + tmp_dict['num_label'] = len(s) + datadict[client_id] = ClientData(client_cfg, **tmp_dict) return datadict diff --git a/federatedscope/core/data/utils.py b/federatedscope/core/data/utils.py index 2f12e5154..e7d623275 100644 --- a/federatedscope/core/data/utils.py +++ b/federatedscope/core/data/utils.py @@ -57,7 +57,7 @@ def load_dataset(config): dataset, modified_config = load_linklevel_dataset(config) elif config.data.type.lower() in [ 'hiv', 'proteins', 'imdb-binary', 'bbbp', 'tox21', 'bace', 'sider', - 'clintox', 'esol', 'freesolv', 'lipo' + 'clintox', 'esol', 'freesolv', 'lipo', 'cikmcup' ] or config.data.type.startswith('graph_multi_domain'): from federatedscope.gfl.dataloader import load_graphlevel_dataset dataset, modified_config = load_graphlevel_dataset(config) @@ -71,9 +71,6 @@ def load_dataset(config): elif '@' in config.data.type.lower(): from federatedscope.core.data.utils import load_external_data dataset, modified_config = load_external_data(config) - elif 'cikmcup' in config.data.type.lower(): - from federatedscope.gfl.dataset.cikm_cup import load_cikmcup_data - dataset, modified_config = load_cikmcup_data(config) elif config.data.type is None or config.data.type == "": # The participant (only for server in this version) does not own data dataset = None diff --git a/federatedscope/gfl/dataloader/dataloader_graph.py b/federatedscope/gfl/dataloader/dataloader_graph.py index 25faba9e3..6fc54a44a 100644 --- a/federatedscope/gfl/dataloader/dataloader_graph.py +++ b/federatedscope/gfl/dataloader/dataloader_graph.py @@ -1,22 +1,12 @@ -import numpy as np - from torch_geometric import transforms -from torch_geometric.loader import DataLoader from torch_geometric.datasets import TUDataset, MoleculeNet from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.auxiliaries.transform_builder import get_transform -from federatedscope.core.data import ClientData, StandaloneDataDict - +from federatedscope.gfl.dataset.cikm_cup import CIKMCUPDataset -def get_numGraphLabels(dataset): - s = set() - for g in dataset: - s.add(g.y.item()) - return len(s) - -def load_graphlevel_dataset(config=None, client_cfgs=None): +def load_graphlevel_dataset(config=None): r"""Convert dataset to Dataloader. :returns: data_local_dict @@ -31,8 +21,6 @@ def load_graphlevel_dataset(config=None, client_cfgs=None): splits = config.data.splits path = config.data.root name = config.data.type.upper() - client_num = config.federate.client_num - batch_size = config.dataloader.batch_size # Splitter splitter = get_splitter(config) @@ -103,6 +91,8 @@ def load_graphlevel_dataset(config=None, client_cfgs=None): transform=transforms_funcs['transform'] if 'transform' in transforms_funcs else None) dataset.append(tmp_dataset) + elif name == 'CIKM': + dataset = CIKMCUPDataset(config.data.root) else: raise ValueError(f'No dataset named: {name}!') @@ -111,41 +101,7 @@ def load_graphlevel_dataset(config=None, client_cfgs=None): config.merge_from_list(['federate.client_num', client_num]) # get local dataset - data_local_dict = dict() - - # Build train/valid/test dataloader - raw_train = [] - raw_valid = [] - raw_test = [] - for client_idx, gs in enumerate(dataset): - if client_cfgs is not None: - client_cfg = config.clone() - client_cfg.merge_from_other_cfg( - client_cfgs.get(f'client_{client_idx+1}')) - else: - client_cfg = config - - index = np.random.permutation(np.arange(len(gs))) - train_idx = index[:int(len(gs) * splits[0])] - valid_idx = index[int(len(gs) * - splits[0]):int(len(gs) * sum(splits[:2]))] - test_idx = index[int(len(gs) * sum(splits[:2])):] - client_data = ClientData(DataLoader, - client_cfg, - train=[gs[idx] for idx in train_idx], - val=[gs[idx] for idx in valid_idx], - test=[gs[idx] for idx in test_idx]) - client_data['num_label'] = get_numGraphLabels(gs) - - data_local_dict[client_idx + 1] = client_data - raw_train = raw_train + [gs[idx] for idx in train_idx] - raw_valid = raw_valid + [gs[idx] for idx in valid_idx] - raw_test = raw_test + [gs[idx] for idx in test_idx] - if not name.startswith('graph_multi_domain'.upper()): - data_local_dict[0] = ClientData(DataLoader, - config, - train=raw_train, - val=raw_valid, - test=raw_test) - - return StandaloneDataDict(data_local_dict, config), config + data_dict = dict() + for client_idx in range(1, len(dataset) + 1): + data_dict[client_idx] = dataset[client_idx] + return data_dict, config diff --git a/federatedscope/gfl/dataloader/dataloader_node.py b/federatedscope/gfl/dataloader/dataloader_node.py index 659d75a33..5528b855a 100644 --- a/federatedscope/gfl/dataloader/dataloader_node.py +++ b/federatedscope/gfl/dataloader/dataloader_node.py @@ -5,78 +5,14 @@ from torch_geometric.utils import add_self_loops, remove_self_loops, \ to_undirected from torch_geometric.data import Data -# from torch_geometric.loader import GraphSAINTRandomWalkSampler, -# NeighborSampler from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.auxiliaries.transform_builder import get_transform -from federatedscope.core.data import StandaloneDataDict INF = np.iinfo(np.int64).max -# def raw2loader(raw_data, config=None): -# """Transform a graph into either dataloader for graph-sampling-based -# mini-batch training -# or still a graph for full-batch training. -# Arguments: -# raw_data (PyG.Data): a raw graph. -# :returns: -# sampler (object): a Dict containing loader and subgraph_sampler or -# still a PyG.Data object. -# """ -# # change directed graph to undirected -# raw_data.edge_index = to_undirected( -# remove_self_loops(raw_data.edge_index)[0]) -# -# if config.data.loader == '': -# sampler = raw_data -# elif config.data.loader == 'graphsaint-rw': -# # Sampler would crash if there was isolated node. -# raw_data.edge_index = add_self_loops(raw_data.edge_index, -# num_nodes=raw_data.x.shape[0])[0] -# loader = GraphSAINTRandomWalkSampler( -# raw_data, -# batch_size=config.dataloader.batch_size, -# walk_length=config.dataloader.walk_length, -# num_steps=config.dataloader.num_steps, -# sample_coverage=0) -# subgraph_sampler = NeighborSampler( -# raw_data.edge_index, -# sizes=[-1], -# batch_size=4096, -# shuffle=False, -# num_workers=config.dataloader.num_workers) -# sampler = dict(data=raw_data, -# train=loader, -# val=subgraph_sampler, -# test=subgraph_sampler) -# elif config.data.loader == 'neighbor': -# # Sampler would crash if there was isolated node. -# raw_data.edge_index = add_self_loops(raw_data.edge_index, -# num_nodes=raw_data.x.shape[0])[0] -# -# train_idx = raw_data.train_mask.nonzero(as_tuple=True)[0] -# loader = NeighborSampler(raw_data.edge_index, -# node_idx=train_idx, -# sizes=config.dataloader.sizes, -# batch_size=config.dataloader.batch_size, -# shuffle=config.dataloader.shuffle, -# num_workers=config.dataloader.num_workers) -# subgraph_sampler = NeighborSampler( -# raw_data.edge_index, -# sizes=[-1], -# batch_size=4096, -# shuffle=False, -# num_workers=config.dataloader.num_workers) -# sampler = dict(data=raw_data, -# train=loader, -# val=subgraph_sampler, -# test=subgraph_sampler) -# -# return sampler - -def load_nodelevel_dataset(config=None, client_cfgs=None): +def load_nodelevel_dataset(config=None): r""" :returns: data_dict @@ -86,12 +22,11 @@ def load_nodelevel_dataset(config=None, client_cfgs=None): path = config.data.root name = config.data.type.lower() + # TODO: move standalone dataset # Splitter splitter = get_splitter(config) - # Transforms transforms_funcs = get_transform(config, 'torch_geometric') - # Dataset if name in ["cora", "citeseer", "pubmed"]: num_split = { @@ -99,7 +34,6 @@ def load_nodelevel_dataset(config=None, client_cfgs=None): 'citeseer': [332, 665, INF], 'pubmed': [3943, 3943, INF], } - dataset = Planetoid(path, name, split='random', @@ -158,7 +92,6 @@ def load_nodelevel_dataset(config=None, client_cfgs=None): # get local dataset data_dict = dict() - for client_idx in range(1, len(dataset) + 1): local_data = dataset[client_idx - 1] # To undirected and add self-loop @@ -171,7 +104,6 @@ def load_nodelevel_dataset(config=None, client_cfgs=None): 'val': [local_data], 'test': [local_data] } - # Keep ML split consistent with local graphs if global_dataset is not None: global_graph = global_dataset[0] @@ -200,5 +132,4 @@ def load_nodelevel_dataset(config=None, client_cfgs=None): 'val': [global_graph], 'test': [global_graph] } - return data_dict, config diff --git a/federatedscope/gfl/dataset/cikm_cup.py b/federatedscope/gfl/dataset/cikm_cup.py index c25cb4f60..c3c152e2b 100644 --- a/federatedscope/gfl/dataset/cikm_cup.py +++ b/federatedscope/gfl/dataset/cikm_cup.py @@ -45,50 +45,3 @@ def __getitem__(self, idx): if split_data: data[split] = split_data return data - - -def load_cikmcup_data(config, client_cfgs=None): - from torch_geometric.loader import DataLoader - - # Build data - dataset = CIKMCUPDataset(config.data.root) - config.merge_from_list(['federate.client_num', len(dataset)]) - - data_dict = {} - # Build DataLoader dict - for client_idx in range(1, config.federate.client_num + 1): - logger.info(f'Loading CIKMCUP data for Client #{client_idx}.') - dataloader_dict = {} - tmp_dataset = [] - - if client_cfgs is not None: - client_cfg = config.clone() - client_cfg.merge_from_other_cfg( - client_cfgs.get(f'client_{client_idx}')) - else: - client_cfg = config - - if 'train' in dataset[client_idx]: - dataloader_dict['train'] = DataLoader( - dataset[client_idx]['train'], - client_cfg.dataloader.batch_size, - shuffle=client_cfg.dataloader.shuffle) - tmp_dataset += dataset[client_idx]['train'] - if 'val' in dataset[client_idx]: - dataloader_dict['val'] = DataLoader( - dataset[client_idx]['val'], - client_cfg.dataloader.batch_size, - shuffle=False) - tmp_dataset += dataset[client_idx]['val'] - if 'test' in dataset[client_idx]: - dataloader_dict['test'] = DataLoader( - dataset[client_idx]['test'], - client_cfg.dataloader.batch_size, - shuffle=False) - tmp_dataset += dataset[client_idx]['test'] - if tmp_dataset: - dataloader_dict['num_label'] = 0 - - data_dict[client_idx] = dataloader_dict - - return data_dict, config From e9e12a6c8a5559756f583852ed1311bb956aa77f Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Fri, 16 Sep 2022 18:06:52 +0800 Subject: [PATCH 15/39] fix minor bugs --- federatedscope/core/data/base_translator.py | 20 ++++++++++++++----- federatedscope/core/data/dummy_translator.py | 7 ++++--- .../baseline/fedavg_gcn_minibatch_on_hiv.yaml | 2 ++ .../fedavg_gin_minibatch_on_cikmcup.yaml | 2 ++ .../fedavg_gnn_minibatch_on_multi_task.yaml | 2 ++ ...atch_on_multi_task_total_samples_aggr.yaml | 2 ++ .../fedbn_gnn_minibatch_on_multi_task.yaml | 2 ++ .../isolated_gin_minibatch_on_cikmcup.yaml | 2 ++ .../gfl/dataloader/dataloader_graph.py | 13 ++---------- 9 files changed, 33 insertions(+), 19 deletions(-) diff --git a/federatedscope/core/data/base_translator.py b/federatedscope/core/data/base_translator.py index db0f0b39f..5be1ee5f9 100644 --- a/federatedscope/core/data/base_translator.py +++ b/federatedscope/core/data/base_translator.py @@ -1,4 +1,5 @@ import logging +import numpy as np from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.data import ClientData, StandaloneDataDict @@ -65,12 +66,16 @@ def split_train_val_test(self, dataset): assert len(dataset) == len(['train', 'val', 'test']), error_msg return [dataset[0], dataset[1], dataset[2]] - from torch.utils.data.dataset import random_split + index = np.random.permutation(np.arange(len(dataset))) train_size = int(splits[0] * len(dataset)) val_size = int(splits[1] * len(dataset)) - test_size = len(dataset) - train_size - val_size - split_data = random_split(dataset, [train_size, val_size, test_size]) - return split_data + + train_dataset = [dataset[x] for x in index[:train_size]] + val_dataset = [ + dataset[x] for x in index[train_size:train_size + val_size] + ] + test_dataset = [dataset[x] for x in index[train_size + val_size:]] + return train_dataset, val_dataset, test_dataset def split_to_client(self, train, val, test): """ @@ -89,7 +94,12 @@ def split_to_client(self, train, val, test): # Split train/val/test to client if len(train) > 0: split_train = self.splitter(train) - train_label_distribution = [[j[1] for j in x] for x in split_train] + try: + train_label_distribution = [[j[1] for j in x] + for x in split_train] + except: + logger.warning('Cannot access train label distribution.') + # TODO: fix kwargs in splitter. if len(val) > 0: split_val = self.splitter(val, prior=train_label_distribution) if len(test) > 0: diff --git a/federatedscope/core/data/dummy_translator.py b/federatedscope/core/data/dummy_translator.py index fb03056f9..76858289b 100644 --- a/federatedscope/core/data/dummy_translator.py +++ b/federatedscope/core/data/dummy_translator.py @@ -23,13 +23,14 @@ def split(self, dataset): datadict[client_id] = ClientData(client_cfg, **dataset[client_id]) else: + print(type(datadict)) # Do not have train/val/test train, val, test = self.split_train_val_test( - datadict[client_id]) + dataset[client_id]) tmp_dict = dict(train=train, val=val, test=test) # Only for graph-level task, get number of graph labels - if client_cfg.task.startswith('graph') and \ - client_cfg.out_channels == 0: + if client_cfg.model.task.startswith('graph') and \ + client_cfg.model.out_channels == 0: s = set() for g in datadict[client_id]: s.add(g.y.item()) diff --git a/federatedscope/gfl/baseline/fedavg_gcn_minibatch_on_hiv.yaml b/federatedscope/gfl/baseline/fedavg_gcn_minibatch_on_hiv.yaml index bbe98ec8d..b0b6c4927 100644 --- a/federatedscope/gfl/baseline/fedavg_gcn_minibatch_on_hiv.yaml +++ b/federatedscope/gfl/baseline/fedavg_gcn_minibatch_on_hiv.yaml @@ -12,6 +12,8 @@ data: root: data/ type: hiv splitter: scaffold +dataloader: + type: pyg model: type: gcn hidden: 64 diff --git a/federatedscope/gfl/baseline/fedavg_gin_minibatch_on_cikmcup.yaml b/federatedscope/gfl/baseline/fedavg_gin_minibatch_on_cikmcup.yaml index 957684f3d..c7865d4c8 100644 --- a/federatedscope/gfl/baseline/fedavg_gin_minibatch_on_cikmcup.yaml +++ b/federatedscope/gfl/baseline/fedavg_gin_minibatch_on_cikmcup.yaml @@ -12,6 +12,8 @@ federate: data: root: data/ type: cikmcup +dataloader: + type: pyg model: type: gin hidden: 64 diff --git a/federatedscope/gfl/baseline/fedavg_gnn_minibatch_on_multi_task.yaml b/federatedscope/gfl/baseline/fedavg_gnn_minibatch_on_multi_task.yaml index 7b15bdb67..a97aade78 100644 --- a/federatedscope/gfl/baseline/fedavg_gnn_minibatch_on_multi_task.yaml +++ b/federatedscope/gfl/baseline/fedavg_gnn_minibatch_on_multi_task.yaml @@ -12,6 +12,8 @@ data: root: data/ type: graph_multi_domain_mol pre_transform: ['Constant', {'value':1.0, 'cat':False}] +dataloader: + type: pyg model: type: gin hidden: 64 diff --git a/federatedscope/gfl/baseline/fedavg_gnn_minibatch_on_multi_task_total_samples_aggr.yaml b/federatedscope/gfl/baseline/fedavg_gnn_minibatch_on_multi_task_total_samples_aggr.yaml index ad99acca9..e8933c11f 100644 --- a/federatedscope/gfl/baseline/fedavg_gnn_minibatch_on_multi_task_total_samples_aggr.yaml +++ b/federatedscope/gfl/baseline/fedavg_gnn_minibatch_on_multi_task_total_samples_aggr.yaml @@ -13,6 +13,8 @@ data: root: data/ type: graph_multi_domain_mix pre_transform: ['Constant', {'value':1.0, 'cat':False}] +dataloader: + type: pyg model: type: gin hidden: 64 diff --git a/federatedscope/gfl/baseline/fedbn_gnn_minibatch_on_multi_task.yaml b/federatedscope/gfl/baseline/fedbn_gnn_minibatch_on_multi_task.yaml index 277575af4..9487f37fb 100644 --- a/federatedscope/gfl/baseline/fedbn_gnn_minibatch_on_multi_task.yaml +++ b/federatedscope/gfl/baseline/fedbn_gnn_minibatch_on_multi_task.yaml @@ -12,6 +12,8 @@ data: root: data/ type: graph_multi_domain_mix pre_transform: ['Constant', {'value':1.0, 'cat':False}] +dataloader: + type: pyg model: type: gin hidden: 64 diff --git a/federatedscope/gfl/baseline/isolated_gin_minibatch_on_cikmcup.yaml b/federatedscope/gfl/baseline/isolated_gin_minibatch_on_cikmcup.yaml index 7880f4f3b..3153c315d 100644 --- a/federatedscope/gfl/baseline/isolated_gin_minibatch_on_cikmcup.yaml +++ b/federatedscope/gfl/baseline/isolated_gin_minibatch_on_cikmcup.yaml @@ -14,6 +14,8 @@ data: batch_size: 64 root: data/ type: cikmcup +dataloader: + type: pyg model: type: gin hidden: 64 diff --git a/federatedscope/gfl/dataloader/dataloader_graph.py b/federatedscope/gfl/dataloader/dataloader_graph.py index 6fc54a44a..5be531918 100644 --- a/federatedscope/gfl/dataloader/dataloader_graph.py +++ b/federatedscope/gfl/dataloader/dataloader_graph.py @@ -1,7 +1,6 @@ from torch_geometric import transforms from torch_geometric.datasets import TUDataset, MoleculeNet -from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.auxiliaries.transform_builder import get_transform from federatedscope.gfl.dataset.cikm_cup import CIKMCUPDataset @@ -22,9 +21,6 @@ def load_graphlevel_dataset(config=None): path = config.data.root name = config.data.type.upper() - # Splitter - splitter = get_splitter(config) - # Transforms transforms_funcs = get_transform(config, 'torch_geometric') @@ -39,18 +35,13 @@ def load_graphlevel_dataset(config=None): transforms_funcs['pre_transform'] = transforms.Constant(value=1.0, cat=False) dataset = TUDataset(path, name, **transforms_funcs) - if splitter is None: - raise ValueError('Please set the graph.') - dataset = splitter(dataset) elif name in [ 'HIV', 'ESOL', 'FREESOLV', 'LIPO', 'PCBA', 'MUV', 'BACE', 'BBBP', 'TOX21', 'TOXCAST', 'SIDER', 'CLINTOX' ]: dataset = MoleculeNet(path, name, **transforms_funcs) - if splitter is None: - raise ValueError('Please set the graph.') - dataset = splitter(dataset) + return dataset, config elif name.startswith('graph_multi_domain'.upper()): """ The `graph_multi_domain` datasets follows GCFL @@ -103,5 +94,5 @@ def load_graphlevel_dataset(config=None): # get local dataset data_dict = dict() for client_idx in range(1, len(dataset) + 1): - data_dict[client_idx] = dataset[client_idx] + data_dict[client_idx] = dataset[client_idx - 1] return data_dict, config From fb2b15eed0b92b08422b0f9786c0e5063264ae49 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Fri, 16 Sep 2022 18:12:23 +0800 Subject: [PATCH 16/39] fix link graph --- .../gfl/dataloader/dataloader_link.py | 86 ++++++------------- .../gfl/dataloader/dataloader_node.py | 2 +- 2 files changed, 28 insertions(+), 60 deletions(-) diff --git a/federatedscope/gfl/dataloader/dataloader_link.py b/federatedscope/gfl/dataloader/dataloader_link.py index 5628927aa..01e9eb4b5 100644 --- a/federatedscope/gfl/dataloader/dataloader_link.py +++ b/federatedscope/gfl/dataloader/dataloader_link.py @@ -1,60 +1,23 @@ import torch -from torch_geometric.data import Data -from torch_geometric.loader import GraphSAINTRandomWalkSampler, NeighborSampler +from torch_geometric.utils import add_self_loops, remove_self_loops, \ + to_undirected from federatedscope.core.auxiliaries.splitter_builder import get_splitter from federatedscope.core.auxiliaries.transform_builder import get_transform -from federatedscope.core.data import StandaloneDataDict -def raw2loader(raw_data, config=None): - """Transform a graph into either dataloader for graph-sampling-based - mini-batch training - or still a graph for full-batch training. - Arguments: - raw_data (PyG.Data): a raw graph. - :returns: - sampler (object): a Dict containing loader and subgraph_sampler or - still a PyG.Data object. - """ - - if config.data.loader == '': - sampler = raw_data - elif config.data.loader == 'graphsaint-rw': - loader = GraphSAINTRandomWalkSampler( - raw_data, - batch_size=config.dataloader.batch_size, - walk_length=config.dataloader.walk_length, - num_steps=config.dataloader.num_steps, - sample_coverage=0) - subgraph_sampler = NeighborSampler( - raw_data.edge_index, - sizes=[-1], - batch_size=4096, - shuffle=False, - num_workers=config.dataloader.num_workers) - sampler = dict(data=raw_data, - train=loader, - val=subgraph_sampler, - test=subgraph_sampler) - else: - raise TypeError('Unsupported DataLoader Type {}'.format( - config.data.loader)) - - return sampler - - -def load_linklevel_dataset(config=None, client_cfgs=None): +def load_linklevel_dataset(config=None): r""" :returns: - data_local_dict + data_dict :rtype: (Dict): dict{'client_id': Data()} """ path = config.data.root name = config.data.type.lower() + # TODO: remove splitter # Splitter splitter = get_splitter(config) @@ -87,17 +50,21 @@ def load_linklevel_dataset(config=None, client_cfgs=None): config.merge_from_list(['federate.client_num', client_num]) # get local dataset - data_local_dict = dict() + data_dict = dict() for client_idx in range(1, len(dataset) + 1): - if client_cfgs is not None: - client_cfg = config.clone() - client_cfg.merge_from_other_cfg( - client_cfgs.get(f'client_{client_idx}')) - else: - client_cfg = config - local_data = raw2loader(dataset[client_idx - 1], client_cfg) - data_local_dict[client_idx] = local_data + local_data = dataset[client_idx - 1] + data_dict[client_idx] = local_data + # To undirected and add self-loop + local_data.edge_index = add_self_loops( + to_undirected(remove_self_loops(local_data.edge_index)[0]), + num_nodes=local_data.x.shape[0])[0] + data_dict[client_idx] = { + 'data': local_data, + 'train': [local_data], + 'val': [local_data], + 'test': [local_data] + } if global_dataset is not None: # Recode train & valid & test mask for global data @@ -108,11 +75,8 @@ def load_linklevel_dataset(config=None, client_cfgs=None): global_edge_index = torch.LongTensor([[], []]) global_edge_type = torch.LongTensor([]) - for client_sampler in data_local_dict.values(): - if isinstance(client_sampler, Data): - client_subgraph = client_sampler - else: - client_subgraph = client_sampler['data'] + for client_data in data_dict.values(): + client_subgraph = client_data['data'] orig_index = torch.zeros_like(client_subgraph.edge_index) orig_index[0] = client_subgraph.index_orig[ client_subgraph.edge_index[0]] @@ -133,6 +97,10 @@ def load_linklevel_dataset(config=None, client_cfgs=None): global_graph.test_edge_mask = test_edge_mask global_graph.edge_index = global_edge_index global_graph.edge_type = global_edge_type - data_local_dict[0] = raw2loader(global_graph, config) - - return StandaloneDataDict(data_local_dict, config), config + data_dict[0] = data_dict[0] = { + 'data': global_graph, + 'train': [global_graph], + 'val': [global_graph], + 'test': [global_graph] + } + return data_dict, config diff --git a/federatedscope/gfl/dataloader/dataloader_node.py b/federatedscope/gfl/dataloader/dataloader_node.py index 5528b855a..feef8d5d5 100644 --- a/federatedscope/gfl/dataloader/dataloader_node.py +++ b/federatedscope/gfl/dataloader/dataloader_node.py @@ -22,7 +22,7 @@ def load_nodelevel_dataset(config=None): path = config.data.root name = config.data.type.lower() - # TODO: move standalone dataset + # TODO: remove splitter # Splitter splitter = get_splitter(config) # Transforms From 94755dae94957746f288d6e8b910d17062224995 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Fri, 16 Sep 2022 18:17:06 +0800 Subject: [PATCH 17/39] fix link trainer bugs --- federatedscope/gfl/trainer/linktrainer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/federatedscope/gfl/trainer/linktrainer.py b/federatedscope/gfl/trainer/linktrainer.py index fb86717c8..c21010b06 100644 --- a/federatedscope/gfl/trainer/linktrainer.py +++ b/federatedscope/gfl/trainer/linktrainer.py @@ -42,9 +42,9 @@ def parse_data(self, data): """ init_dict = dict() - if isinstance(data, Data): + if isinstance(data, dict): for mode in ["train", "val", "test"]: - edges = data.edge_index.T[data[MODE2MASK[mode]]] + edges = data['data'].edge_index.T[data[MODE2MASK[mode]]] # Use an index loader index_loader = DataLoader( range(edges.size(0)), @@ -57,7 +57,7 @@ def parse_data(self, data): init_dict["num_{}_data".format(mode)] = edges.size(0) init_dict["{}_data".format(mode)] = None else: - raise TypeError("Type of data should be PyG data.") + raise TypeError("Type of data should be dict.") return init_dict def _hook_on_epoch_start_data2device(self, ctx): From a032337063d5945f6f245ef0f3b5137276c8b0e2 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Fri, 16 Sep 2022 18:33:13 +0800 Subject: [PATCH 18/39] fix linktrainer --- federatedscope/gfl/dataloader/dataloader_link.py | 3 --- federatedscope/gfl/trainer/linktrainer.py | 6 ++++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/federatedscope/gfl/dataloader/dataloader_link.py b/federatedscope/gfl/dataloader/dataloader_link.py index 01e9eb4b5..989dbbe35 100644 --- a/federatedscope/gfl/dataloader/dataloader_link.py +++ b/federatedscope/gfl/dataloader/dataloader_link.py @@ -56,9 +56,6 @@ def load_linklevel_dataset(config=None): local_data = dataset[client_idx - 1] data_dict[client_idx] = local_data # To undirected and add self-loop - local_data.edge_index = add_self_loops( - to_undirected(remove_self_loops(local_data.edge_index)[0]), - num_nodes=local_data.x.shape[0])[0] data_dict[client_idx] = { 'data': local_data, 'train': [local_data], diff --git a/federatedscope/gfl/trainer/linktrainer.py b/federatedscope/gfl/trainer/linktrainer.py index c21010b06..e2885b1a4 100644 --- a/federatedscope/gfl/trainer/linktrainer.py +++ b/federatedscope/gfl/trainer/linktrainer.py @@ -1,7 +1,6 @@ import torch from torch.utils.data import DataLoader -from torch_geometric.data import Data from torch_geometric.loader import GraphSAINTRandomWalkSampler, NeighborSampler from federatedscope.core.auxiliaries.enums import LIFECYCLE @@ -44,7 +43,8 @@ def parse_data(self, data): init_dict = dict() if isinstance(data, dict): for mode in ["train", "val", "test"]: - edges = data['data'].edge_index.T[data[MODE2MASK[mode]]] + graph_data = data['data'] + edges = graph_data.edge_index.T[graph_data[MODE2MASK[mode]]] # Use an index loader index_loader = DataLoader( range(edges.size(0)), @@ -61,6 +61,8 @@ def parse_data(self, data): return init_dict def _hook_on_epoch_start_data2device(self, ctx): + if isinstance(ctx.data, dict): + ctx.data = ctx.data['data'] ctx.data = ctx.data.to(ctx.device) # For handling different dict key if "input_edge_index" in ctx.data: From 577f167d948ccb605f78c8c2bea9f1a8152dc432 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Fri, 16 Sep 2022 18:54:22 +0800 Subject: [PATCH 19/39] add kwargs for splitter --- federatedscope/core/configs/README.md | 1 - federatedscope/core/splitters/generic/lda_splitter.py | 2 +- federatedscope/core/splitters/graph/louvain_splitter.py | 2 +- federatedscope/core/splitters/graph/randchunk_splitter.py | 7 ++++--- federatedscope/core/splitters/graph/random_splitter.py | 2 +- federatedscope/core/splitters/graph/reltype_splitter.py | 2 +- .../core/splitters/graph/scaffold_lda_splitter.py | 2 +- federatedscope/core/splitters/graph/scaffold_splitter.py | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/federatedscope/core/configs/README.md b/federatedscope/core/configs/README.md index 156923f2a..815048234 100644 --- a/federatedscope/core/configs/README.md +++ b/federatedscope/core/configs/README.md @@ -14,7 +14,6 @@ We summarize all the customizable configurations: ### Data The configurations related to the data/dataset are defined in `cfg_data.py`. -<<<<<<< HEAD | Name | (Type) Default Value | Description | Note | |:--------------------------------------------:|:-----:|:---------- |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `data.root` | (string) 'data' | The folder where the data file located. `data.root` would be used together with `data.type` to load the dataset. | - | diff --git a/federatedscope/core/splitters/generic/lda_splitter.py b/federatedscope/core/splitters/generic/lda_splitter.py index c8d4789c7..b32d5e07f 100644 --- a/federatedscope/core/splitters/generic/lda_splitter.py +++ b/federatedscope/core/splitters/generic/lda_splitter.py @@ -9,7 +9,7 @@ def __init__(self, client_num, alpha=0.5): self.alpha = alpha super(LDASplitter, self).__init__(client_num) - def __call__(self, dataset, prior=None): + def __call__(self, dataset, prior=None, **kwargs): dataset = [ds for ds in dataset] label = np.array([y for x, y in dataset]) idx_slice = dirichlet_distribution_noniid_slice(label, diff --git a/federatedscope/core/splitters/graph/louvain_splitter.py b/federatedscope/core/splitters/graph/louvain_splitter.py index 9ed0e5f4c..908ae9a43 100644 --- a/federatedscope/core/splitters/graph/louvain_splitter.py +++ b/federatedscope/core/splitters/graph/louvain_splitter.py @@ -22,7 +22,7 @@ def __init__(self, client_num, delta=20): self.delta = delta BaseSplitter.__init__(self, client_num) - def __call__(self, data): + def __call__(self, data, **kwargs): data.index_orig = torch.arange(data.num_nodes) G = to_networkx( data, diff --git a/federatedscope/core/splitters/graph/randchunk_splitter.py b/federatedscope/core/splitters/graph/randchunk_splitter.py index 6a8f5ac66..07e2e93cb 100644 --- a/federatedscope/core/splitters/graph/randchunk_splitter.py +++ b/federatedscope/core/splitters/graph/randchunk_splitter.py @@ -1,13 +1,14 @@ import numpy as np from torch_geometric.transforms import BaseTransform +from federatedscope.core.splitters import BaseSplitter -class RandChunkSplitter(BaseTransform): +class RandChunkSplitter(BaseTransform, BaseSplitter): def __init__(self, client_num): - super(RandChunkSplitter, self).__init__(client_num) + BaseSplitter.__init__(self, client_num) - def __call__(self, dataset): + def __call__(self, dataset, **kwargs): r"""Split dataset via random chunk. Arguments: diff --git a/federatedscope/core/splitters/graph/random_splitter.py b/federatedscope/core/splitters/graph/random_splitter.py index 10faebcb8..a3c12be1e 100644 --- a/federatedscope/core/splitters/graph/random_splitter.py +++ b/federatedscope/core/splitters/graph/random_splitter.py @@ -52,7 +52,7 @@ def __init__(self, self.drop_edge = drop_edge - def __call__(self, data): + def __call__(self, data, **kwargs): data.index_orig = torch.arange(data.num_nodes) G = to_networkx( data, diff --git a/federatedscope/core/splitters/graph/reltype_splitter.py b/federatedscope/core/splitters/graph/reltype_splitter.py index abf0e011c..2452addbd 100644 --- a/federatedscope/core/splitters/graph/reltype_splitter.py +++ b/federatedscope/core/splitters/graph/reltype_splitter.py @@ -24,7 +24,7 @@ def __init__(self, client_num, alpha=0.5, realloc_mask=False): self.alpha = alpha self.realloc_mask = realloc_mask - def __call__(self, data): + def __call__(self, data, **kwargs): data_list = [] label = data.edge_type.numpy() idx_slice = dirichlet_distribution_noniid_slice( diff --git a/federatedscope/core/splitters/graph/scaffold_lda_splitter.py b/federatedscope/core/splitters/graph/scaffold_lda_splitter.py index 08c520d31..87b119a3b 100644 --- a/federatedscope/core/splitters/graph/scaffold_lda_splitter.py +++ b/federatedscope/core/splitters/graph/scaffold_lda_splitter.py @@ -53,7 +53,7 @@ def __init__(self): Chem.rdchem.BondStereo.STEREOTRANS, ] - def __call__(self, data): + def __call__(self, data, **kwargs): mol = Chem.MolFromSmiles(data.smiles) xs = [] diff --git a/federatedscope/core/splitters/graph/scaffold_splitter.py b/federatedscope/core/splitters/graph/scaffold_splitter.py index 77dfc60df..db41779f6 100644 --- a/federatedscope/core/splitters/graph/scaffold_splitter.py +++ b/federatedscope/core/splitters/graph/scaffold_splitter.py @@ -53,7 +53,7 @@ class ScaffoldSplitter(BaseSplitter): def __init__(self, client_num): super(ScaffoldSplitter, self).__init__(client_num) - def __call__(self, dataset): + def __call__(self, dataset, **kwargs): r"""Split dataset with smiles string into scaffold split Arguments: From c2831082fe29758ea18296cfca4a6b698cdd0558 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Fri, 16 Sep 2022 19:01:22 +0800 Subject: [PATCH 20/39] fix graph-level bugs --- federatedscope/core/auxiliaries/splitter_builder.py | 3 ++- federatedscope/core/data/base_translator.py | 4 ++-- federatedscope/core/data/dummy_translator.py | 3 +-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/federatedscope/core/auxiliaries/splitter_builder.py b/federatedscope/core/auxiliaries/splitter_builder.py index 31ad607ba..85c73e35c 100644 --- a/federatedscope/core/auxiliaries/splitter_builder.py +++ b/federatedscope/core/auxiliaries/splitter_builder.py @@ -40,6 +40,7 @@ def get_splitter(config): from federatedscope.core.splitters.graph import RandChunkSplitter splitter = RandChunkSplitter(client_num, **kwargs) else: - logger.warning(f'Splitter {config.data.splitter} not found.') + logger.warning(f'Splitter {config.data.splitter} not found or not ' + f'used.') splitter = None return splitter diff --git a/federatedscope/core/data/base_translator.py b/federatedscope/core/data/base_translator.py index 5be1ee5f9..ade4c16d1 100644 --- a/federatedscope/core/data/base_translator.py +++ b/federatedscope/core/data/base_translator.py @@ -98,8 +98,8 @@ def split_to_client(self, train, val, test): train_label_distribution = [[j[1] for j in x] for x in split_train] except: - logger.warning('Cannot access train label distribution.') - # TODO: fix kwargs in splitter. + logger.warning('Cannot access train label distribution for ' + 'splitter.') if len(val) > 0: split_val = self.splitter(val, prior=train_label_distribution) if len(test) > 0: diff --git a/federatedscope/core/data/dummy_translator.py b/federatedscope/core/data/dummy_translator.py index 76858289b..640a80ec3 100644 --- a/federatedscope/core/data/dummy_translator.py +++ b/federatedscope/core/data/dummy_translator.py @@ -23,7 +23,6 @@ def split(self, dataset): datadict[client_id] = ClientData(client_cfg, **dataset[client_id]) else: - print(type(datadict)) # Do not have train/val/test train, val, test = self.split_train_val_test( dataset[client_id]) @@ -32,7 +31,7 @@ def split(self, dataset): if client_cfg.model.task.startswith('graph') and \ client_cfg.model.out_channels == 0: s = set() - for g in datadict[client_id]: + for g in dataset[client_id]: s.add(g.y.item()) tmp_dict['num_label'] = len(s) datadict[client_id] = ClientData(client_cfg, **tmp_dict) From 85ae82de1085e6e98a8208b4107a6a7f20c24d30 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Tue, 20 Sep 2022 12:06:47 +0800 Subject: [PATCH 21/39] fix minor bugs --- .../core/auxiliaries/model_builder.py | 2 + federatedscope/core/data/base_data.py | 4 +- federatedscope/gfl/fedsageplus/worker.py | 3 +- federatedscope/tabular/dataloader/toy.py | 12 +- .../vertical_fl/dataloader/dataloader.py | 16 +- scripts/example_configs/quadratic.yaml | 5 +- tests/test_CRA_gan_attack.py | 85 ------ tests/test_MIA_gradient_ascent.py | 93 ------- tests/test_PIA_toy.py | 61 ---- tests/test_asyn_cifar10.py | 260 ------------------ tests/test_backdoor_attack.py | 90 ------ tests/test_ditto.py | 90 ------ tests/test_efficient_simulation.py | 48 ---- tests/test_external_dataset.py | 146 ---------- tests/test_fedem.py | 84 ------ tests/test_fedopt.py | 83 ------ tests/test_fedprox.py | 82 ------ tests/test_fedsageplus.py | 77 ------ tests/test_femnist.py | 84 ------ tests/test_finetune_lr.py | 59 ---- tests/test_global_train_lr.py | 56 ---- tests/test_graph_node_trainer.py | 71 ----- tests/test_local_train_lr.py | 57 ---- tests/test_mf.py | 71 ----- tests/test_nbafl.py | 89 ------ tests/test_optimizer.py | 83 ------ tests/test_pfedme.py | 86 ------ tests/test_rec_IG_opt_attack.py | 86 ------ tests/test_rec_opt_attack.py | 92 ------- tests/test_toy_lr.py | 80 ------ tests/test_unseen_clients_lr.py | 57 ---- tests/test_vertical_fl.py | 1 + tests/test_yaml.py | 61 ---- 33 files changed, 22 insertions(+), 2252 deletions(-) delete mode 100644 tests/test_CRA_gan_attack.py delete mode 100644 tests/test_MIA_gradient_ascent.py delete mode 100644 tests/test_PIA_toy.py delete mode 100644 tests/test_asyn_cifar10.py delete mode 100644 tests/test_backdoor_attack.py delete mode 100644 tests/test_ditto.py delete mode 100644 tests/test_efficient_simulation.py delete mode 100644 tests/test_external_dataset.py delete mode 100644 tests/test_fedem.py delete mode 100644 tests/test_fedopt.py delete mode 100644 tests/test_fedprox.py delete mode 100644 tests/test_fedsageplus.py delete mode 100644 tests/test_femnist.py delete mode 100644 tests/test_finetune_lr.py delete mode 100644 tests/test_global_train_lr.py delete mode 100644 tests/test_graph_node_trainer.py delete mode 100644 tests/test_local_train_lr.py delete mode 100644 tests/test_mf.py delete mode 100644 tests/test_nbafl.py delete mode 100644 tests/test_optimizer.py delete mode 100644 tests/test_pfedme.py delete mode 100644 tests/test_rec_IG_opt_attack.py delete mode 100644 tests/test_rec_opt_attack.py delete mode 100644 tests/test_toy_lr.py delete mode 100644 tests/test_unseen_clients_lr.py delete mode 100644 tests/test_yaml.py diff --git a/federatedscope/core/auxiliaries/model_builder.py b/federatedscope/core/auxiliaries/model_builder.py index 7b2214986..c77e2b13b 100644 --- a/federatedscope/core/auxiliaries/model_builder.py +++ b/federatedscope/core/auxiliaries/model_builder.py @@ -45,6 +45,8 @@ def get_shape_from_data(data, model_config, backend='torch'): keys = list(data.keys()) if 'test' in keys: key_representative = 'test' + elif 'val' in keys: + key_representative = 'val' elif 'train' in keys: key_representative = 'train' elif 'data' in keys: diff --git a/federatedscope/core/data/base_data.py b/federatedscope/core/data/base_data.py index ca18ce419..262173fd9 100644 --- a/federatedscope/core/data/base_data.py +++ b/federatedscope/core/data/base_data.py @@ -167,7 +167,7 @@ def setup(self, new_client_cfg=None): self['train'] = get_dataloader(self.train, self.client_cfg, 'train') if self.val is not None: - self['val'] = get_dataloader(self.train, self.client_cfg, 'val') + self['val'] = get_dataloader(self.val, self.client_cfg, 'val') if self.test is not None: - self['test'] = get_dataloader(self.train, self.client_cfg, 'test') + self['test'] = get_dataloader(self.test, self.client_cfg, 'test') return True diff --git a/federatedscope/gfl/fedsageplus/worker.py b/federatedscope/gfl/fedsageplus/worker.py index a1a5a94e1..256d91e87 100644 --- a/federatedscope/gfl/fedsageplus/worker.py +++ b/federatedscope/gfl/fedsageplus/worker.py @@ -255,7 +255,8 @@ def __init__(self, self).__init__(ID, server_id, state, config, data, model, device, strategy, *args, **kwargs) self.data = data - self.hide_data = HideGraph(self._cfg.fedsageplus.hide_portion)(data) + self.hide_data = HideGraph(self._cfg.fedsageplus.hide_portion)( + data['data']) self.device = device self.sage_batch_size = 64 self.gen = LocalSage_Plus(data.x.shape[-1], diff --git a/federatedscope/tabular/dataloader/toy.py b/federatedscope/tabular/dataloader/toy.py index cb4e035fc..75ed49420 100644 --- a/federatedscope/tabular/dataloader/toy.py +++ b/federatedscope/tabular/dataloader/toy.py @@ -74,7 +74,7 @@ def _generate_data(client_num=5, # server_data data[0] = dict() - data[0]['train'] = None + # data[0]['train'] = None data[0]['val'] = val_data data[0]['test'] = test_data @@ -110,10 +110,10 @@ def _generate_data(client_num=5, else: with open(config.distribute.data_file, 'rb') as f: data = pickle.load(f) - for client_id in data.keys(): - data[client_id] = { - k: WrapDataset(np.asarray(v)) - for k, v in data[client_id].items() - } if data[client_id] is not None else None + for client_id in data.keys(): + data[client_id] = { + k: WrapDataset(v) + for k, v in data[client_id].items() + } if data[client_id] is not None else None return data, config diff --git a/federatedscope/vertical_fl/dataloader/dataloader.py b/federatedscope/vertical_fl/dataloader/dataloader.py index 4e0b5e7d3..e58560c12 100644 --- a/federatedscope/vertical_fl/dataloader/dataloader.py +++ b/federatedscope/vertical_fl/dataloader/dataloader.py @@ -1,8 +1,5 @@ import numpy as np -from federatedscope.core.data import StandaloneDataDict -from federatedscope.core.auxiliaries.dataloader_builder import WrapDataset - def load_vertical_data(config=None, generate=False): """ @@ -37,23 +34,22 @@ def load_vertical_data(config=None, generate=False): data[0] = dict() data[0]['train'] = None data[0]['val'] = None - data[0]['test'] = WrapDataset(test_data) + data[0]['test'] = test_data # For Client #1 data[1] = dict() - data[1]['train'] = WrapDataset({ + data[1]['train'] = { 'x': x[:train_num, :config.vertical.dims[0]], 'y': y[:train_num] - }) + } data[1]['val'] = None - data[1]['test'] = WrapDataset(test_data) + data[1]['test'] = test_data # For Client #2 data[2] = dict() - data[2]['train'] = WrapDataset( - {'x': x[:train_num, config.vertical.dims[0]:]}) + data[2]['train'] = {'x': x[:train_num, config.vertical.dims[0]:]} data[2]['val'] = None - data[2]['test'] = WrapDataset(test_data) + data[2]['test'] = test_data return data, config else: diff --git a/scripts/example_configs/quadratic.yaml b/scripts/example_configs/quadratic.yaml index 21e6b645b..99ad58096 100644 --- a/scripts/example_configs/quadratic.yaml +++ b/scripts/example_configs/quadratic.yaml @@ -16,5 +16,6 @@ model: type: 'quadratic' criterion: type: 'L1Loss' -optimizer: - lr: 0.01 +train: + optimizer: + lr: 0.01 diff --git a/tests/test_CRA_gan_attack.py b/tests/test_CRA_gan_attack.py deleted file mode 100644 index 22be63d96..000000000 --- a/tests/test_CRA_gan_attack.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class CRATest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_femnist(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.device = 0 - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc', 'loss_regular'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 5 - cfg.federate.total_round_num = 20 - cfg.federate.sample_client_num = 5 - cfg.federate.client_num = 10 - - cfg.data.root = 'test_data/' - cfg.data.type = 'femnist' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.subsample = 0.01 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.1307], - 'std': [0.3081] - } - ]] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 62 - - cfg.train.optimizer.lr = 0.001 - cfg.train.optimizer.weight_decay = 0.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - cfg.attack.attack_method = 'gan_attack' - cfg.attack.attack_id = 5 - cfg.attack.target_label_ind = 3 - - return backup_cfg - - def test_CRA_GAN_femnist_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_femnist(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 600) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_MIA_gradient_ascent.py b/tests/test_MIA_gradient_ascent.py deleted file mode 100644 index 5d2283c82..000000000 --- a/tests/test_MIA_gradient_ascent.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class GradAscentTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_femnist(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.device = 0 - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc', 'loss_regular'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 5 - cfg.federate.total_round_num = 20 - cfg.federate.sample_client_num = 5 - cfg.federate.client_num = 10 - - cfg.data.root = 'test_data/' - cfg.data.type = 'femnist' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.subsample = 0.01 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.1307], - 'std': [0.3081] - } - ]] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 62 - - cfg.train.optimizer.lr = 0.001 - cfg.train.optimizer.weight_decay = 0.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - cfg.attack.attack_method = 'GradAscent' - cfg.attack.attacker_id = 5 - cfg.attack.inject_round = 0 - - return backup_cfg - - def test_GradAscent_femnist_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_femnist(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - - # TODO: use a resonable metric - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 600) - # print(Fed_runner.client.keys()) - target_data_loss = Fed_runner.client[ - init_cfg.attack.attacker_id].trainer.ctx.target_data_loss - self.assertIsNotNone(target_data_loss) - self.assertIn(init_cfg.attack.attacker_id, Fed_runner.client.keys()) - - init_cfg.merge_from_other_cfg(backup_cfg) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_PIA_toy.py b/tests/test_PIA_toy.py deleted file mode 100644 index 24753ea28..000000000 --- a/tests/test_PIA_toy.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class PIA_ToyLRTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_standalone(self, cfg): - backup_cfg = cfg.clone() - - cfg.use_gpu = False - cfg.federate.mode = 'standalone' - cfg.federate.total_round_num = 20 - cfg.federate.client_num = 5 - cfg.eval.freq = 10 - cfg.data.type = 'toy' - cfg.trainer.type = 'general' - cfg.model.type = 'lr' - - cfg.attack.attack_method = 'PassivePIA' - cfg.attack.classifier_PIA = 'svm' - - return backup_cfg - - def test_PIA_toy_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_standalone(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_config = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_config) - - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 0.3) - self.assertIsNotNone(Fed_runner.server.pia_results) - - init_cfg.merge_from_other_cfg(backup_cfg) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_asyn_cifar10.py b/tests/test_asyn_cifar10.py deleted file mode 100644 index 0a099e066..000000000 --- a/tests/test_asyn_cifar10.py +++ /dev/null @@ -1,260 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class AsynCIFAR10Test(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_cifar10_goalAchieved_afterReceiving(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 5 - cfg.eval.metrics = ['acc', 'correct'] - cfg.eval.best_res_update_round_wise_key = 'test_acc' - - cfg.federate.mode = 'standalone' - cfg.federate.total_round_num = 40 - cfg.federate.sample_client_num = 13 - cfg.federate.merge_test_data = True - cfg.federate.share_local_model = False - cfg.federate.client_num = 200 - cfg.federate.sampler = 'group' - cfg.federate.resource_info_file = 'test_data/client_device_capacity' - - cfg.data.root = 'test_data/' - cfg.data.type = 'CIFAR10@torchvision' - cfg.data.args = [{'download': False}] - cfg.data.splits = [0.8, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.subsample = 0.2 - cfg.dataloader.num_workers = 0 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.4914, 0.4822, 0.4465], - 'std': [0.247, 0.243, 0.261] - } - ]] - cfg.data.splitter = 'lda' - cfg.data.splitter_args = [{'alpha': 0.2}] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 512 - cfg.model.out_channels = 10 - - cfg.train.local_update_steps = 2 - cfg.train.batch_or_epoch = 'batch' - cfg.train.optimizer.lr = 0.1 - cfg.train.optimizer.weight_decay = 0.0 - cfg.grad.grad_clip = 5.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - cfg.asyn.use = True - cfg.asyn.overselection = False - cfg.asyn.staleness_discount_factor = 0.2 - cfg.asyn.aggregator = 'goal_achieved' - cfg.asyn.broadcast_manner = 'after_receiving' - cfg.asyn.min_received_num = 10 - cfg.asyn.staleness_toleration = 5 - - return backup_cfg - - def set_config_cifar10_timeUp_afterAggregating(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 5 - cfg.eval.metrics = ['acc', 'correct'] - cfg.eval.best_res_update_round_wise_key = 'test_acc' - - cfg.federate.mode = 'standalone' - cfg.federate.total_round_num = 40 - cfg.federate.sample_client_num = 13 - cfg.federate.merge_test_data = True - cfg.federate.share_local_model = False - cfg.federate.client_num = 200 - cfg.federate.sampler = 'uniform' - cfg.federate.resource_info_file = 'test_data/client_device_capacity' - - cfg.data.root = 'test_data/' - cfg.data.type = 'CIFAR10@torchvision' - cfg.data.args = [{'download': False}] - cfg.data.splits = [0.8, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.subsample = 0.2 - cfg.dataloader.num_workers = 0 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.4914, 0.4822, 0.4465], - 'std': [0.247, 0.243, 0.261] - } - ]] - cfg.data.splitter = 'lda' - cfg.data.splitter_args = [{'alpha': 0.2}] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 512 - cfg.model.out_channels = 10 - - cfg.train.local_update_steps = 2 - cfg.train.batch_or_epoch = 'batch' - cfg.train.optimizer.lr = 0.1 - cfg.train.optimizer.weight_decay = 0.0 - cfg.grad.grad_clip = 5.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - cfg.asyn.use = True - cfg.asyn.overselection = False - cfg.asyn.staleness_discount_factor = 0.2 - cfg.asyn.aggregator = 'time_up' - cfg.asyn.time_budget = 10 - cfg.asyn.broadcast_manner = 'after_aggregating' - cfg.asyn.min_received_num = 10 - cfg.asyn.staleness_toleration = 5 - - return backup_cfg - - def set_config_cifar10_overselection(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 5 - cfg.eval.metrics = ['acc', 'correct'] - cfg.eval.best_res_update_round_wise_key = 'test_acc' - - cfg.federate.mode = 'standalone' - cfg.federate.total_round_num = 40 - cfg.federate.sample_client_num = 13 - cfg.federate.merge_test_data = True - cfg.federate.share_local_model = False - cfg.federate.client_num = 200 - cfg.federate.sampler = 'uniform' - cfg.federate.resource_info_file = 'test_data/client_device_capacity' - - cfg.data.root = 'test_data/' - cfg.data.type = 'CIFAR10@torchvision' - cfg.data.args = [{'download': False}] - cfg.data.splits = [0.8, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.subsample = 0.2 - cfg.dataloader.num_workers = 0 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.4914, 0.4822, 0.4465], - 'std': [0.247, 0.243, 0.261] - } - ]] - cfg.data.splitter = 'lda' - cfg.data.splitter_args = [{'alpha': 0.2}] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 512 - cfg.model.out_channels = 10 - - cfg.train.local_update_steps = 2 - cfg.train.batch_or_epoch = 'batch' - cfg.train.optimizer.lr = 0.1 - cfg.train.optimizer.weight_decay = 0.0 - cfg.grad.grad_clip = 5.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - cfg.asyn.use = True - cfg.asyn.overselection = True - cfg.asyn.staleness_discount_factor = 0.2 - cfg.asyn.aggregator = 'goal_achieved' - cfg.asyn.broadcast_manner = 'after_aggregating' - cfg.asyn.min_received_num = 10 - cfg.asyn.staleness_toleration = 0 - - return backup_cfg - - def test_asyn_cifar10_goalAchieved_afterReceiving(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_cifar10_goalAchieved_afterReceiving( - init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertGreater(test_best_results['server_global_eval']['test_acc'], - 0.15) - - def test_asyn_cifar10_timeUp_afterAggregating(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_cifar10_timeUp_afterAggregating(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertGreater(test_best_results['server_global_eval']['test_acc'], - 0.15) - - def test_asyn_cifar10_overselection(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_cifar10_overselection(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertGreater(test_best_results['server_global_eval']['test_acc'], - 0.15) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_backdoor_attack.py b/tests/test_backdoor_attack.py deleted file mode 100644 index c1f39729b..000000000 --- a/tests/test_backdoor_attack.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class Backdoor_Attack(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_femnist(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.device = 0 - cfg.eval.freq = 1 - cfg.eval.metrics = ['acc', 'correct', 'poison_attack_acc'] - - cfg.early_stop.patience = 0 - cfg.federate.mode = 'standalone' - cfg.train.batch_or_epoch = 'epoch' - cfg.train.local_update_steps = 2 - cfg.federate.total_round_num = 10 - cfg.federate.sample_client_num = 20 - cfg.federate.client_num = 200 - - cfg.data.root = 'test_data/' - cfg.data.type = 'femnist' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 32 - cfg.data.subsample = 0.05 - cfg.data.transform = [['ToTensor']] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 62 - - cfg.train.optimizer.lr = 0.1 - cfg.train.optimizer.weight_decay = 0.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - cfg.attack.attack_method = 'backdoor' - cfg.attack.attacker_id = -1 - cfg.attack.inject_round = 0 - cfg.attack.setting = 'fix' - cfg.attack.freq = 10 - cfg.attack.label_type = 'dirty' - cfg.attack.trigger_type = 'gridTrigger' - cfg.attack.target_label_ind = 1 - cfg.attack.mean = [0.1307] - cfg.attack.std = [0.3081] - - return backup_cfg - - def test_backdoor_edge_femnist_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_femnist(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - - # TODO: use a resonable metric - self.assertGreater( - test_best_results["client_summarized_weighted_avg"]['test_acc'], - 0.1) - init_cfg.merge_from_other_cfg(backup_cfg) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_ditto.py b/tests/test_ditto.py deleted file mode 100644 index ff6c87ac9..000000000 --- a/tests/test_ditto.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class FEMNISTTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_femnist(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc', 'loss_regular'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 5 - cfg.federate.total_round_num = 20 - cfg.federate.sample_client_num = 5 - cfg.federate.client_num = 10 - - cfg.federate.method = "Ditto" - cfg.personalization.regular_weight = 0.1 - - # test in epoch mode - # cfg.train.local_update_steps = 1 - # cfg.train.batch_or_epoch = 'epoch' - # cfg.personalization.local_update_steps = 1 - - cfg.data.root = 'test_data/' - cfg.data.type = 'femnist' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.subsample = 0.05 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.1307], - 'std': [0.3081] - } - ]] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 62 - - cfg.train.optimizer.lr = 0.001 - cfg.train.optimizer.weight_decay = 0.0 - cfg.grad.grad_clip = 5.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - return backup_cfg - - def test_femnist_standalone(self): - init_cfg = global_cfg.clone() - - backup_cfg = self.set_config_femnist(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertLess( - test_best_results["client_summarized_weighted_avg"] - ['test_avg_loss'], 10) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_efficient_simulation.py b/tests/test_efficient_simulation.py deleted file mode 100644 index 8ec927b51..000000000 --- a/tests/test_efficient_simulation.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class EfficientSimulationTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def test_toy_example_standalone_cmp_sim_impl(self): - case_cfg = global_cfg.clone() - case_cfg.merge_from_file('scripts/example_configs/single_process.yaml') - - setup_seed(case_cfg.seed) - update_logger(case_cfg) - - data, _ = get_data(case_cfg.clone()) - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(case_cfg), - client_class=get_client_cls(case_cfg), - config=case_cfg.clone()) - efficient_test_results = Fed_runner.run() - - setup_seed(case_cfg.seed) - case_cfg.merge_from_list([ - 'federate.share_local_model', 'False', 'federate.online_aggr', - 'False' - ]) - data, _ = get_data(case_cfg.clone()) - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(case_cfg), - client_class=get_client_cls(case_cfg), - config=case_cfg.clone()) - ordinary_test_results = Fed_runner.run() - gap = efficient_test_results["client_summarized_weighted_avg"][ - 'test_loss'] - ordinary_test_results[ - "client_summarized_weighted_avg"]['test_loss'] - self.assertLess(gap, 0.1) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_external_dataset.py b/tests/test_external_dataset.py deleted file mode 100644 index 8c94995ca..000000000 --- a/tests/test_external_dataset.py +++ /dev/null @@ -1,146 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class ExternalDatasetTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_torchvision_dataset(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 1 - cfg.federate.total_round_num = 20 - cfg.train.batch_or_epoch = 'epoch' - cfg.federate.client_num = 5 - cfg.federate.sample_client_rate = 0.2 - - cfg.data.root = 'test_data/' - cfg.data.type = 'MNIST@torchvision' - cfg.data.args = [{'download': True}] - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.1307], - 'std': [0.3081] - } - ]] - cfg.data.splitter = 'lda' - cfg.data.splitter_args = [{'alpha': 0.5}] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 10 - - cfg.train.optimizer.lr = 0.01 - cfg.train.optimizer.weight_decay = 0.0 - cfg.grad.grad_clip = 5.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 12345 - - return backup_cfg - - def set_config_torchtext_dataset(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 1 - cfg.federate.total_round_num = 20 - cfg.train.batch_or_epoch = 'epoch' - cfg.federate.client_num = 5 - cfg.federate.sample_client_rate = 0.2 - - cfg.data.root = 'test_data/' - cfg.data.args = [{'max_len': 100}] - cfg.data.type = 'IMDB@torchtext' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.transform = ['GloVe', {'cache': 'test_data/', 'name': '6B'}] - cfg.data.splitter = 'lda' - cfg.data.splitter_args = [{'alpha': 0.5}] - - cfg.model.type = 'lstm' - cfg.model.task = 'SequenceClassification' - cfg.model.hidden = 256 - cfg.model.in_channels = 300 - cfg.model.embed_size = 0 - cfg.model.out_channels = 2 - - cfg.train.optimizer.lr = 0.8 - cfg.train.optimizer.weight_decay = 0.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'nlptrainer' - cfg.seed = 12345 - - return backup_cfg - - def test_torchvision_dataset_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_torchvision_dataset(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertGreater( - test_best_results["client_summarized_weighted_avg"]['test_acc'], - 0.9) - - def test_torchtext_dataset_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_torchtext_dataset(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertGreater( - test_best_results["client_summarized_weighted_avg"]['test_acc'], - 0.65) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_fedem.py b/tests/test_fedem.py deleted file mode 100644 index 6cbbf74ad..000000000 --- a/tests/test_fedem.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class FEMNISTTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_femnist(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc', 'loss_regular'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 5 - cfg.federate.total_round_num = 20 - cfg.federate.sample_client_num = 5 - cfg.federate.client_num = 10 - - cfg.federate.method = "FedEM" - cfg.model.model_num_per_trainer = 3 - - cfg.data.root = 'test_data/' - cfg.data.type = 'femnist' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.subsample = 0.05 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.1307], - 'std': [0.3081] - } - ]] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 62 - - cfg.train.optimizer.lr = 0.001 - cfg.train.optimizer.weight_decay = 0.0 - cfg.grad.grad_clip = 5.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - return backup_cfg - - def test_femnist_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_femnist(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 600) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_fedopt.py b/tests/test_fedopt.py deleted file mode 100644 index 9a96a9656..000000000 --- a/tests/test_fedopt.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class FedOptTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_fedopt(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 5 - cfg.federate.total_round_num = 20 - cfg.federate.sample_client_num = 5 - cfg.federate.client_num = 10 - cfg.federate.method = 'FedOpt' - - cfg.data.root = 'test_data/' - cfg.data.type = 'femnist' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.subsample = 0.01 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.1307], - 'std': [0.3081] - } - ]] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 62 - - cfg.train.optimizer.lr = 0.001 - cfg.train.optimizer.weight_decay = 0.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - cfg.fedopt.use = True - cfg.fedopt.optimizer.lr = 1. - - return backup_cfg - - def test_fedopt_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_fedopt(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_results = Fed_runner.run() - init_cfg.merge_from_other_cfg(backup_cfg) - - self.assertLess( - test_results['client_summarized_weighted_avg']['test_loss'], 600) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_fedprox.py b/tests/test_fedprox.py deleted file mode 100644 index 5d8c8c551..000000000 --- a/tests/test_fedprox.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class FedProxTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_fedprox(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc', 'loss_regular'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 5 - cfg.federate.total_round_num = 20 - cfg.federate.sample_client_num = 5 - cfg.federate.client_num = 10 - - cfg.data.root = 'test_data/' - cfg.data.type = 'femnist' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.subsample = 0.01 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.1307], - 'std': [0.3081] - } - ]] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 62 - - cfg.train.optimizer.lr = 0.001 - cfg.train.optimizer.weight_decay = 0.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - cfg.fedprox.use = True - cfg.fedprox.mu = 0.1 - - return backup_cfg - - def test_fedprox_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_fedprox(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_results = Fed_runner.run() - init_cfg.merge_from_other_cfg(backup_cfg) - - self.assertLess( - test_results['client_summarized_weighted_avg']['test_loss'], 600) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_fedsageplus.py b/tests/test_fedsageplus.py deleted file mode 100644 index 223ced6f5..000000000 --- a/tests/test_fedsageplus.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class FedSagePlusTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_fedsageplus(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - - cfg.federate.mode = 'standalone' - cfg.federate.make_global_eval = True - cfg.federate.client_num = 3 - cfg.federate.total_round_num = 10 - cfg.federate.method = 'fedsageplus' - cfg.train.batch_or_epoch = 'epoch' - - cfg.data.root = 'test_data/' - cfg.data.type = 'cora' - cfg.data.splitter = 'louvain' - cfg.dataloader.batch_size = 1 - - cfg.model.type = 'sage' - cfg.model.hidden = 64 - cfg.model.dropout = 0.5 - cfg.model.out_channels = 7 - - cfg.fedsageplus.num_pred = 5 - cfg.fedsageplus.gen_hidden = 64 - cfg.fedsageplus.hide_portion = 0.5 - cfg.fedsageplus.fedgen_epoch = 2 - cfg.fedsageplus.loc_epoch = 1 - cfg.fedsageplus.a = 1.0 - cfg.fedsageplus.b = 1.0 - cfg.fedsageplus.c = 1.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'nodefullbatch_trainer' - cfg.eval.metrics = ['acc', 'correct'] - - return backup_cfg - - def test_fedsageplus_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_fedsageplus(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertGreater(test_best_results["server_global_eval"]['test_acc'], - 0.7) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_femnist.py b/tests/test_femnist.py deleted file mode 100644 index b78328ff0..000000000 --- a/tests/test_femnist.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - -SAMPLE_CLIENT_NUM = 5 - - -class FEMNISTTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_femnist(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc', 'loss_regular'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 5 - cfg.federate.total_round_num = 20 - cfg.federate.sample_client_num = SAMPLE_CLIENT_NUM - - cfg.data.root = 'test_data/' - cfg.data.type = 'femnist' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.subsample = 0.05 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.1307], - 'std': [0.3081] - } - ]] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 62 - - cfg.train.optimizer.lr = 0.001 - cfg.train.optimizer.weight_decay = 0.0 - cfg.grad.grad_clip = 5.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - return backup_cfg - - def test_femnist_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_femnist(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - self.assertEqual(init_cfg.federate.sample_client_num, - SAMPLE_CLIENT_NUM) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 600) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_finetune_lr.py b/tests/test_finetune_lr.py deleted file mode 100644 index 8f6254705..000000000 --- a/tests/test_finetune_lr.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class ToyLRTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_standalone(self, cfg, make_global_eval=False): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.federate.mode = 'standalone' - cfg.federate.total_round_num = 20 - cfg.federate.make_global_eval = make_global_eval - cfg.federate.client_num = 5 - cfg.eval.freq = 10 - cfg.data.type = 'toy' - cfg.trainer.type = 'general' - cfg.model.type = 'lr' - cfg.finetune.before_eval = True - cfg.finetune.local_update_steps = 5 - - return backup_cfg - - def test_toy_example_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_standalone(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_config = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_config) - - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 0.3) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_global_train_lr.py b/tests/test_global_train_lr.py deleted file mode 100644 index 41167e57a..000000000 --- a/tests/test_global_train_lr.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class ToyLRTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_standalone(self, cfg, make_global_eval=False): - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.federate.mode = 'standalone' - cfg.federate.total_round_num = 30 - cfg.federate.make_global_eval = make_global_eval - cfg.federate.client_num = 5 - cfg.eval.freq = 10 - cfg.data.type = 'toy' - cfg.trainer.type = 'general' - cfg.model.type = 'lr' - - cfg.early_stop.patience = 5 - cfg.federate.method = "global" - - def test_toy_example_standalone(self): - init_cfg = global_cfg.clone() - self.set_config_standalone(init_cfg) - - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_config = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_config) - - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 0.3) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_graph_node_trainer.py b/tests/test_graph_node_trainer.py deleted file mode 100644 index e8fae48c7..000000000 --- a/tests/test_graph_node_trainer.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class NodeTrainerTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_node(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc', 'correct'] - - cfg.federate.mode = 'standalone' - cfg.federate.total_round_num = 50 - cfg.federate.client_num = 5 - - cfg.data.root = 'test_data/' - cfg.data.type = 'cora' - cfg.dataloader.batch_size = 1 # full batch train - cfg.data.splitter = 'louvain' - - cfg.model.type = 'gcn' - cfg.model.hidden = 64 - cfg.model.dropout = 0.5 - cfg.model.out_channels = 7 - - cfg.train.optimizer.lr = 0.25 - cfg.train.optimizer.weight_decay = 0.0005 - cfg.train.optimizer.type = 'SGD' - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'nodefullbatch_trainer' - - return backup_cfg - - def test_node_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_node(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertGreater( - test_best_results["client_summarized_weighted_avg"]['test_acc'], - 0.7) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_local_train_lr.py b/tests/test_local_train_lr.py deleted file mode 100644 index f87a96cfa..000000000 --- a/tests/test_local_train_lr.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, \ - get_client_cls - - -class ToyLRTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_standalone(self, cfg, make_global_eval=False): - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.federate.mode = 'standalone' - cfg.federate.total_round_num = 100 - cfg.federate.make_global_eval = make_global_eval - cfg.federate.client_num = 5 - cfg.eval.freq = 10 - cfg.data.type = 'toy' - cfg.trainer.type = 'general' - cfg.model.type = 'lr' - - cfg.early_stop.patience = 5 - cfg.federate.method = "local" - - def test_toy_example_standalone(self): - init_cfg = global_cfg.clone() - self.set_config_standalone(init_cfg) - - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_config = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_config) - - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 0.3) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_mf.py b/tests/test_mf.py deleted file mode 100644 index 6d242dc17..000000000 --- a/tests/test_mf.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class MFTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_movielens1m(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.early_stop_patience = 100 - cfg.eval.best_res_update_round_wise_key = "test_avg_loss" - cfg.eval.freq = 5 - cfg.eval.metrics = [] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 20 - cfg.federate.total_round_num = 50 - cfg.federate.client_num = 5 - - cfg.data.root = 'test_data/' - cfg.data.type = 'vflmovielens1m' - cfg.dataloader.batch_size = 32 - - cfg.model.type = 'VMFNet' - cfg.model.hidden = 20 - - cfg.train.optimizer.lr = 1. - cfg.train.optimizer.weight_decay = 0.0 - - cfg.criterion.type = 'MSELoss' - cfg.trainer.type = 'mftrainer' - cfg.seed = 123 - - return backup_cfg - - def test_mf_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_movielens1m(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_results = Fed_runner.run() - init_cfg.merge_from_other_cfg(backup_cfg) - - self.assertLess( - test_results["client_summarized_weighted_avg"]["test_avg_loss"], - 50) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_nbafl.py b/tests/test_nbafl.py deleted file mode 100644 index 3037825fe..000000000 --- a/tests/test_nbafl.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class NbAFLTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_femnist(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc', 'loss_regular'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 5 - cfg.federate.total_round_num = 20 - cfg.federate.sample_client_num = 5 - - cfg.data.root = 'test_data/' - cfg.data.type = 'femnist' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.subsample = 0.01 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.1307], - 'std': [0.3081] - } - ]] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 62 - - cfg.train.optimizer.lr = 0.001 - cfg.train.optimizer.weight_decay = 0.0 - cfg.grad.grad_clip = 5.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - cfg.nbafl.use = True - cfg.nbafl.w_clip = 0.1 - cfg.nbafl.mu = 0.1 - cfg.nbafl.constant = 2. - cfg.nbafl.epsilon = 50. - - return backup_cfg - - def test_nbafl_standalone(self): - # TODO, no need to backup the cfg in all test_xxx.py, as we are now using the init_cfg style - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_femnist(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - # Run on first 10 clients - init_cfg.merge_from_list(['federate.client_num', 10]) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 500) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py deleted file mode 100644 index fdb0cdde4..000000000 --- a/tests/test_optimizer.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class FEMNISTTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_femnist(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc', 'loss_regular'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 5 - cfg.federate.total_round_num = 20 - cfg.federate.sample_client_num = 5 - cfg.federate.client_num = 10 - - cfg.data.root = 'test_data/' - cfg.data.type = 'femnist' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.subsample = 0.05 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.1307], - 'std': [0.3081] - } - ]] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 62 - - cfg.train.optimizer.type = "Adam" - cfg.train.optimizer.lr = 0.001 - cfg.train.optimizer.betas = [0.9, 0.999] - cfg.train.optimizer.weight_decay = 0.001 - cfg.grad.grad_clip = 5.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - return backup_cfg - - def test_femnist_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_femnist(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 600) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_pfedme.py b/tests/test_pfedme.py deleted file mode 100644 index 62a055fd2..000000000 --- a/tests/test_pfedme.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class FEMNISTTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_femnist(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc', 'loss_regular'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 5 - cfg.federate.total_round_num = 20 - cfg.federate.sample_client_num = 5 - cfg.federate.client_num = 10 - - cfg.federate.method = "pFedMe" - cfg.personalization.personalization_lr = -1.0 - cfg.personalization_beta = 1.0 - cfg.personalization_K = 3 - - cfg.data.root = 'test_data/' - cfg.data.type = 'femnist' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 10 - cfg.data.subsample = 0.05 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.1307], - 'std': [0.3081] - } - ]] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 62 - - cfg.train.optimizer.lr = 0.001 - cfg.train.optimizer.weight_decay = 0.0 - cfg.grad.grad_clip = 5.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - return backup_cfg - - def test_femnist_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_femnist(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertLess( - test_best_results["client_summarized_weighted_avg"] - ['test_avg_loss'], 10) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_rec_IG_opt_attack.py b/tests/test_rec_IG_opt_attack.py deleted file mode 100644 index 3aad2981d..000000000 --- a/tests/test_rec_IG_opt_attack.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class RECTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_femnist(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc', 'loss_regular'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 1 - cfg.federate.total_round_num = 20 - cfg.federate.sample_client_num = 5 - cfg.federate.client_num = 10 - - cfg.data.root = 'test_data/' - cfg.data.type = 'femnist' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 1 - cfg.data.subsample = 0.01 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.1307], - 'std': [0.3081] - } - ]] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 62 - - cfg.train.optimizer.lr = 0.001 - cfg.train.optimizer.weight_decay = 0.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - cfg.attack.attack_method = 'IG' - cfg.attack.reconstruct_lr = 0.1 - cfg.attack.reconstruct_optim = 'Adam' - cfg.attack.info_diff_type = 'l2' - cfg.attack.max_ite = 40 - - return backup_cfg - - def test_IG_rec_femnist_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_femnist(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 600) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_rec_opt_attack.py b/tests/test_rec_opt_attack.py deleted file mode 100644 index 1a22fb292..000000000 --- a/tests/test_rec_opt_attack.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class RECTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_femnist(self, cfg): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.eval.freq = 10 - cfg.eval.metrics = ['acc', 'loss_regular'] - - cfg.federate.mode = 'standalone' - cfg.train.local_update_steps = 1 - cfg.federate.total_round_num = 20 - cfg.federate.sample_client_num = 5 - cfg.federate.client_num = 10 - - cfg.data.root = 'test_data/' - cfg.data.type = 'femnist' - cfg.data.splits = [0.6, 0.2, 0.2] - cfg.dataloader.batch_size = 1 - cfg.data.subsample = 0.01 - cfg.data.transform = [['ToTensor'], - [ - 'Normalize', { - 'mean': [0.1307], - 'std': [0.3081] - } - ]] - - cfg.model.type = 'convnet2' - cfg.model.hidden = 2048 - cfg.model.out_channels = 62 - - cfg.train.optimizer.lr = 0.001 - cfg.train.optimizer.weight_decay = 0.0 - - cfg.criterion.type = 'CrossEntropyLoss' - cfg.trainer.type = 'cvtrainer' - cfg.seed = 123 - - cfg.attack.attack_method = 'dlg' - cfg.attack.reconstruct_lr = 0.1 - cfg.attack.reconstruct_optim = 'Adam' - cfg.attack.info_diff_type = 'l2' - cfg.attack.max_ite = 40 - - return backup_cfg - - def test_rec_femnist_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_femnist(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_cfg = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_cfg) - self.assertIsNotNone(data) - - # if cfg.attack.attack_method.lower() == 'dlg': - # from federatedscope.attack.worker_as_attacker.server_attacker import PassiveServer - # server_class = PassiveServer - # else: - # server_class = Server - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 600) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_toy_lr.py b/tests/test_toy_lr.py deleted file mode 100644 index 276997de2..000000000 --- a/tests/test_toy_lr.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class ToyLRTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_standalone(self, cfg, make_global_eval=False): - backup_cfg = cfg.clone() - - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.federate.mode = 'standalone' - cfg.federate.total_round_num = 20 - cfg.federate.make_global_eval = make_global_eval - cfg.federate.client_num = 5 - cfg.eval.freq = 10 - cfg.data.type = 'toy' - cfg.trainer.type = 'general' - cfg.model.type = 'lr' - - return backup_cfg - - def test_toy_example_standalone(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_standalone(init_cfg) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_config = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_config) - - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 0.3) - - def test_toy_example_standalone_global_eval(self): - init_cfg = global_cfg.clone() - backup_cfg = self.set_config_standalone(init_cfg, - make_global_eval=True) - setup_seed(init_cfg.seed) - update_logger(init_cfg, True) - - data, modified_config = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_config) - - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - init_cfg.merge_from_other_cfg(backup_cfg) - self.assertLess(test_best_results["server_global_eval"]['test_loss'], - 0.3) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_unseen_clients_lr.py b/tests/test_unseen_clients_lr.py deleted file mode 100644 index db87af090..000000000 --- a/tests/test_unseen_clients_lr.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import unittest - -from federatedscope.core.auxiliaries.data_builder import get_data -from federatedscope.core.auxiliaries.utils import setup_seed -from federatedscope.core.auxiliaries.logging import update_logger -from federatedscope.core.configs.config import global_cfg -from federatedscope.core.fed_runner import FedRunner -from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls - - -class ToyLRTest(unittest.TestCase): - def setUp(self): - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def set_config_standalone(self, cfg, make_global_eval=False): - import torch - cfg.use_gpu = torch.cuda.is_available() - cfg.federate.mode = 'standalone' - cfg.federate.total_round_num = 20 - cfg.federate.make_global_eval = make_global_eval - cfg.federate.client_num = 5 - cfg.federate.unseen_clients_rate = 0.2 # 20% unseen clients - cfg.eval.freq = 10 - cfg.data.type = 'toy' - cfg.trainer.type = 'general' - cfg.model.type = 'lr' - - def test_toy_example_standalone(self): - init_cfg = global_cfg.clone() - self.set_config_standalone(init_cfg) - - setup_seed(init_cfg.seed) - update_logger(init_cfg) - - data, modified_config = get_data(init_cfg.clone()) - init_cfg.merge_from_other_cfg(modified_config) - - self.assertIsNotNone(data) - - Fed_runner = FedRunner(data=data, - server_class=get_server_cls(init_cfg), - client_class=get_client_cls(init_cfg), - config=init_cfg.clone()) - self.assertIsNotNone(Fed_runner) - test_best_results = Fed_runner.run() - print(test_best_results) - self.assertLess( - test_best_results["client_summarized_weighted_avg"]['test_loss'], - 0.3) - self.assertLess( - test_best_results["unseen_client_summarized_weighted_avg"] - ['test_loss'], 0.3) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_vertical_fl.py b/tests/test_vertical_fl.py index dad09038c..24a6fffd4 100644 --- a/tests/test_vertical_fl.py +++ b/tests/test_vertical_fl.py @@ -30,6 +30,7 @@ def set_config(self, cfg): cfg.data.type = 'vertical_fl_data' cfg.data.size = 50 + cfg.dataloader.type = 'raw' cfg.vertical.use = True cfg.vertical.key_size = 256 diff --git a/tests/test_yaml.py b/tests/test_yaml.py deleted file mode 100644 index ff0570377..000000000 --- a/tests/test_yaml.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) Alibaba, Inc. and its affiliates. -import os -import logging -import unittest - -from federatedscope.core.configs.config import global_cfg - -logger = logging.getLogger(__name__) - - -class YAMLTest(unittest.TestCase): - def setUp(self): - self.exclude_all = ['benchmark', 'scripts'] - self.exclude_file = [ - '.pre-commit-config.yaml', 'meta.yaml', - 'federatedscope/gfl/baseline/isolated_gin_minibatch_on_cikmcup_per_client.yaml', - 'federatedscope/gfl/baseline/fedavg_gin_minibatch_on_cikmcup_per_client.yaml' - ] - self.root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - self.exclude_all = [ - os.path.join(self.root, f) for f in self.exclude_all - ] - self.exclude_file = [ - os.path.join(self.root, f) for f in self.exclude_file - ] - print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) - - def test_yaml(self): - init_cfg = global_cfg.clone() - sign, cont = False, False - for dirpath, _, filenames in os.walk(self.root): - for prefix in self.exclude_all: - if dirpath.startswith(prefix): - cont = True - break - if cont: - cont = False - continue - filenames = [f for f in filenames if f.endswith('.yaml')] - for f in filenames: - yaml_file = os.path.join(dirpath, f) - if yaml_file in self.exclude_file: - continue - try: - init_cfg.merge_from_file(yaml_file) - except KeyError as error: - logger.error( - f"KeyError: {error} in file: {yaml_file.removeprefix(self.root)}" - ) - sign = True - except ValueError as error: - logger.error( - f"ValueError: {error} in file: {yaml_file.removeprefix(self.root)}" - ) - sign = True - init_cfg = global_cfg.clone() - self.assertIs(sign, False, "Yaml check failed.") - - -if __name__ == '__main__': - unittest.main() From a7b2568b89df28454efeb72bdea4b5d3af0128da Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Tue, 20 Sep 2022 12:08:15 +0800 Subject: [PATCH 22/39] roll back --- tests/test_CRA_gan_attack.py | 85 ++++++++++ tests/test_MIA_gradient_ascent.py | 93 +++++++++++ tests/test_PIA_toy.py | 61 +++++++ tests/test_asyn_cifar10.py | 260 +++++++++++++++++++++++++++++ tests/test_backdoor_attack.py | 90 ++++++++++ tests/test_ditto.py | 90 ++++++++++ tests/test_efficient_simulation.py | 48 ++++++ tests/test_external_dataset.py | 146 ++++++++++++++++ tests/test_fedem.py | 84 ++++++++++ tests/test_fedopt.py | 83 +++++++++ tests/test_fedprox.py | 82 +++++++++ tests/test_fedsageplus.py | 77 +++++++++ tests/test_femnist.py | 84 ++++++++++ tests/test_finetune_lr.py | 59 +++++++ tests/test_global_train_lr.py | 56 +++++++ tests/test_graph_node_trainer.py | 71 ++++++++ tests/test_local_train_lr.py | 57 +++++++ tests/test_mf.py | 71 ++++++++ tests/test_nbafl.py | 89 ++++++++++ tests/test_optimizer.py | 83 +++++++++ tests/test_pfedme.py | 86 ++++++++++ tests/test_rec_IG_opt_attack.py | 86 ++++++++++ tests/test_rec_opt_attack.py | 92 ++++++++++ tests/test_toy_lr.py | 80 +++++++++ tests/test_unseen_clients_lr.py | 57 +++++++ tests/test_vertical_fl.py | 1 - tests/test_yaml.py | 61 +++++++ 27 files changed, 2231 insertions(+), 1 deletion(-) create mode 100644 tests/test_CRA_gan_attack.py create mode 100644 tests/test_MIA_gradient_ascent.py create mode 100644 tests/test_PIA_toy.py create mode 100644 tests/test_asyn_cifar10.py create mode 100644 tests/test_backdoor_attack.py create mode 100644 tests/test_ditto.py create mode 100644 tests/test_efficient_simulation.py create mode 100644 tests/test_external_dataset.py create mode 100644 tests/test_fedem.py create mode 100644 tests/test_fedopt.py create mode 100644 tests/test_fedprox.py create mode 100644 tests/test_fedsageplus.py create mode 100644 tests/test_femnist.py create mode 100644 tests/test_finetune_lr.py create mode 100644 tests/test_global_train_lr.py create mode 100644 tests/test_graph_node_trainer.py create mode 100644 tests/test_local_train_lr.py create mode 100644 tests/test_mf.py create mode 100644 tests/test_nbafl.py create mode 100644 tests/test_optimizer.py create mode 100644 tests/test_pfedme.py create mode 100644 tests/test_rec_IG_opt_attack.py create mode 100644 tests/test_rec_opt_attack.py create mode 100644 tests/test_toy_lr.py create mode 100644 tests/test_unseen_clients_lr.py create mode 100644 tests/test_yaml.py diff --git a/tests/test_CRA_gan_attack.py b/tests/test_CRA_gan_attack.py new file mode 100644 index 000000000..6fad02276 --- /dev/null +++ b/tests/test_CRA_gan_attack.py @@ -0,0 +1,85 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class CRATest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_femnist(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.device = 0 + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc', 'loss_regular'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 5 + cfg.federate.total_round_num = 20 + cfg.federate.sample_client_num = 5 + cfg.federate.client_num = 10 + + cfg.data.root = 'test_data/' + cfg.data.type = 'femnist' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.subsample = 0.01 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.1307], + 'std': [0.3081] + } + ]] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 62 + + cfg.train.optimizer.lr = 0.001 + cfg.train.optimizer.weight_decay = 0.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + cfg.attack.attack_method = 'gan_attack' + cfg.attack.attack_id = 5 + cfg.attack.target_label_ind = 3 + + return backup_cfg + + def test_CRA_GAN_femnist_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_femnist(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 600) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_MIA_gradient_ascent.py b/tests/test_MIA_gradient_ascent.py new file mode 100644 index 000000000..feb4248cf --- /dev/null +++ b/tests/test_MIA_gradient_ascent.py @@ -0,0 +1,93 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class GradAscentTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_femnist(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.device = 0 + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc', 'loss_regular'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 5 + cfg.federate.total_round_num = 20 + cfg.federate.sample_client_num = 5 + cfg.federate.client_num = 10 + + cfg.data.root = 'test_data/' + cfg.data.type = 'femnist' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.subsample = 0.01 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.1307], + 'std': [0.3081] + } + ]] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 62 + + cfg.train.optimizer.lr = 0.001 + cfg.train.optimizer.weight_decay = 0.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + cfg.attack.attack_method = 'GradAscent' + cfg.attack.attacker_id = 5 + cfg.attack.inject_round = 0 + + return backup_cfg + + def test_GradAscent_femnist_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_femnist(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + + # TODO: use a resonable metric + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 600) + # print(Fed_runner.client.keys()) + target_data_loss = Fed_runner.client[ + init_cfg.attack.attacker_id].trainer.ctx.target_data_loss + self.assertIsNotNone(target_data_loss) + self.assertIn(init_cfg.attack.attacker_id, Fed_runner.client.keys()) + + init_cfg.merge_from_other_cfg(backup_cfg) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_PIA_toy.py b/tests/test_PIA_toy.py new file mode 100644 index 000000000..24753ea28 --- /dev/null +++ b/tests/test_PIA_toy.py @@ -0,0 +1,61 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class PIA_ToyLRTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_standalone(self, cfg): + backup_cfg = cfg.clone() + + cfg.use_gpu = False + cfg.federate.mode = 'standalone' + cfg.federate.total_round_num = 20 + cfg.federate.client_num = 5 + cfg.eval.freq = 10 + cfg.data.type = 'toy' + cfg.trainer.type = 'general' + cfg.model.type = 'lr' + + cfg.attack.attack_method = 'PassivePIA' + cfg.attack.classifier_PIA = 'svm' + + return backup_cfg + + def test_PIA_toy_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_standalone(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_config = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_config) + + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 0.3) + self.assertIsNotNone(Fed_runner.server.pia_results) + + init_cfg.merge_from_other_cfg(backup_cfg) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_asyn_cifar10.py b/tests/test_asyn_cifar10.py new file mode 100644 index 000000000..59a87cad0 --- /dev/null +++ b/tests/test_asyn_cifar10.py @@ -0,0 +1,260 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class AsynCIFAR10Test(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_cifar10_goalAchieved_afterReceiving(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 5 + cfg.eval.metrics = ['acc', 'correct'] + cfg.eval.best_res_update_round_wise_key = 'test_acc' + + cfg.federate.mode = 'standalone' + cfg.federate.total_round_num = 40 + cfg.federate.sample_client_num = 13 + cfg.federate.merge_test_data = True + cfg.federate.share_local_model = False + cfg.federate.client_num = 200 + cfg.federate.sampler = 'group' + cfg.federate.resource_info_file = 'test_data/client_device_capacity' + + cfg.data.root = 'test_data/' + cfg.data.type = 'CIFAR10@torchvision' + cfg.data.args = [{'download': False}] + cfg.data.splits = [0.8, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.subsample = 0.2 + cfg.data.num_workers = 0 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.4914, 0.4822, 0.4465], + 'std': [0.247, 0.243, 0.261] + } + ]] + cfg.data.splitter = 'lda' + cfg.data.splitter_args = [{'alpha': 0.2}] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 512 + cfg.model.out_channels = 10 + + cfg.train.local_update_steps = 2 + cfg.train.batch_or_epoch = 'batch' + cfg.train.optimizer.lr = 0.1 + cfg.train.optimizer.weight_decay = 0.0 + cfg.grad.grad_clip = 5.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + cfg.asyn.use = True + cfg.asyn.overselection = False + cfg.asyn.staleness_discount_factor = 0.2 + cfg.asyn.aggregator = 'goal_achieved' + cfg.asyn.broadcast_manner = 'after_receiving' + cfg.asyn.min_received_num = 10 + cfg.asyn.staleness_toleration = 5 + + return backup_cfg + + def set_config_cifar10_timeUp_afterAggregating(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 5 + cfg.eval.metrics = ['acc', 'correct'] + cfg.eval.best_res_update_round_wise_key = 'test_acc' + + cfg.federate.mode = 'standalone' + cfg.federate.total_round_num = 40 + cfg.federate.sample_client_num = 13 + cfg.federate.merge_test_data = True + cfg.federate.share_local_model = False + cfg.federate.client_num = 200 + cfg.federate.sampler = 'uniform' + cfg.federate.resource_info_file = 'test_data/client_device_capacity' + + cfg.data.root = 'test_data/' + cfg.data.type = 'CIFAR10@torchvision' + cfg.data.args = [{'download': False}] + cfg.data.splits = [0.8, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.subsample = 0.2 + cfg.data.num_workers = 0 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.4914, 0.4822, 0.4465], + 'std': [0.247, 0.243, 0.261] + } + ]] + cfg.data.splitter = 'lda' + cfg.data.splitter_args = [{'alpha': 0.2}] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 512 + cfg.model.out_channels = 10 + + cfg.train.local_update_steps = 2 + cfg.train.batch_or_epoch = 'batch' + cfg.train.optimizer.lr = 0.1 + cfg.train.optimizer.weight_decay = 0.0 + cfg.grad.grad_clip = 5.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + cfg.asyn.use = True + cfg.asyn.overselection = False + cfg.asyn.staleness_discount_factor = 0.2 + cfg.asyn.aggregator = 'time_up' + cfg.asyn.time_budget = 10 + cfg.asyn.broadcast_manner = 'after_aggregating' + cfg.asyn.min_received_num = 10 + cfg.asyn.staleness_toleration = 5 + + return backup_cfg + + def set_config_cifar10_overselection(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 5 + cfg.eval.metrics = ['acc', 'correct'] + cfg.eval.best_res_update_round_wise_key = 'test_acc' + + cfg.federate.mode = 'standalone' + cfg.federate.total_round_num = 40 + cfg.federate.sample_client_num = 13 + cfg.federate.merge_test_data = True + cfg.federate.share_local_model = False + cfg.federate.client_num = 200 + cfg.federate.sampler = 'uniform' + cfg.federate.resource_info_file = 'test_data/client_device_capacity' + + cfg.data.root = 'test_data/' + cfg.data.type = 'CIFAR10@torchvision' + cfg.data.args = [{'download': False}] + cfg.data.splits = [0.8, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.subsample = 0.2 + cfg.data.num_workers = 0 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.4914, 0.4822, 0.4465], + 'std': [0.247, 0.243, 0.261] + } + ]] + cfg.data.splitter = 'lda' + cfg.data.splitter_args = [{'alpha': 0.2}] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 512 + cfg.model.out_channels = 10 + + cfg.train.local_update_steps = 2 + cfg.train.batch_or_epoch = 'batch' + cfg.train.optimizer.lr = 0.1 + cfg.train.optimizer.weight_decay = 0.0 + cfg.grad.grad_clip = 5.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + cfg.asyn.use = True + cfg.asyn.overselection = True + cfg.asyn.staleness_discount_factor = 0.2 + cfg.asyn.aggregator = 'goal_achieved' + cfg.asyn.broadcast_manner = 'after_aggregating' + cfg.asyn.min_received_num = 10 + cfg.asyn.staleness_toleration = 0 + + return backup_cfg + + def test_asyn_cifar10_goalAchieved_afterReceiving(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_cifar10_goalAchieved_afterReceiving( + init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertGreater(test_best_results['server_global_eval']['test_acc'], + 0.15) + + def test_asyn_cifar10_timeUp_afterAggregating(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_cifar10_timeUp_afterAggregating(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertGreater(test_best_results['server_global_eval']['test_acc'], + 0.15) + + def test_asyn_cifar10_overselection(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_cifar10_overselection(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertGreater(test_best_results['server_global_eval']['test_acc'], + 0.15) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_backdoor_attack.py b/tests/test_backdoor_attack.py new file mode 100644 index 000000000..1d7a97b43 --- /dev/null +++ b/tests/test_backdoor_attack.py @@ -0,0 +1,90 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class Backdoor_Attack(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_femnist(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.device = 0 + cfg.eval.freq = 1 + cfg.eval.metrics = ['acc', 'correct', 'poison_attack_acc'] + + cfg.early_stop.patience = 0 + cfg.federate.mode = 'standalone' + cfg.train.batch_or_epoch = 'epoch' + cfg.train.local_update_steps = 2 + cfg.federate.total_round_num = 10 + cfg.federate.sample_client_num = 20 + cfg.federate.client_num = 200 + + cfg.data.root = 'test_data/' + cfg.data.type = 'femnist' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 32 + cfg.data.subsample = 0.05 + cfg.data.transform = [['ToTensor']] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 62 + + cfg.train.optimizer.lr = 0.1 + cfg.train.optimizer.weight_decay = 0.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + cfg.attack.attack_method = 'backdoor' + cfg.attack.attacker_id = -1 + cfg.attack.inject_round = 0 + cfg.attack.setting = 'fix' + cfg.attack.freq = 10 + cfg.attack.label_type = 'dirty' + cfg.attack.trigger_type = 'gridTrigger' + cfg.attack.target_label_ind = 1 + cfg.attack.mean = [0.1307] + cfg.attack.std = [0.3081] + + return backup_cfg + + def test_backdoor_edge_femnist_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_femnist(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + + # TODO: use a resonable metric + self.assertGreater( + test_best_results["client_summarized_weighted_avg"]['test_acc'], + 0.1) + init_cfg.merge_from_other_cfg(backup_cfg) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_ditto.py b/tests/test_ditto.py new file mode 100644 index 000000000..07cd8710a --- /dev/null +++ b/tests/test_ditto.py @@ -0,0 +1,90 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class FEMNISTTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_femnist(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc', 'loss_regular'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 5 + cfg.federate.total_round_num = 20 + cfg.federate.sample_client_num = 5 + cfg.federate.client_num = 10 + + cfg.federate.method = "Ditto" + cfg.personalization.regular_weight = 0.1 + + # test in epoch mode + # cfg.train.local_update_steps = 1 + # cfg.train.batch_or_epoch = 'epoch' + # cfg.personalization.local_update_steps = 1 + + cfg.data.root = 'test_data/' + cfg.data.type = 'femnist' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.subsample = 0.05 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.1307], + 'std': [0.3081] + } + ]] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 62 + + cfg.train.optimizer.lr = 0.001 + cfg.train.optimizer.weight_decay = 0.0 + cfg.grad.grad_clip = 5.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + return backup_cfg + + def test_femnist_standalone(self): + init_cfg = global_cfg.clone() + + backup_cfg = self.set_config_femnist(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertLess( + test_best_results["client_summarized_weighted_avg"] + ['test_avg_loss'], 10) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_efficient_simulation.py b/tests/test_efficient_simulation.py new file mode 100644 index 000000000..8ec927b51 --- /dev/null +++ b/tests/test_efficient_simulation.py @@ -0,0 +1,48 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class EfficientSimulationTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def test_toy_example_standalone_cmp_sim_impl(self): + case_cfg = global_cfg.clone() + case_cfg.merge_from_file('scripts/example_configs/single_process.yaml') + + setup_seed(case_cfg.seed) + update_logger(case_cfg) + + data, _ = get_data(case_cfg.clone()) + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(case_cfg), + client_class=get_client_cls(case_cfg), + config=case_cfg.clone()) + efficient_test_results = Fed_runner.run() + + setup_seed(case_cfg.seed) + case_cfg.merge_from_list([ + 'federate.share_local_model', 'False', 'federate.online_aggr', + 'False' + ]) + data, _ = get_data(case_cfg.clone()) + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(case_cfg), + client_class=get_client_cls(case_cfg), + config=case_cfg.clone()) + ordinary_test_results = Fed_runner.run() + gap = efficient_test_results["client_summarized_weighted_avg"][ + 'test_loss'] - ordinary_test_results[ + "client_summarized_weighted_avg"]['test_loss'] + self.assertLess(gap, 0.1) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_external_dataset.py b/tests/test_external_dataset.py new file mode 100644 index 000000000..7d243c70d --- /dev/null +++ b/tests/test_external_dataset.py @@ -0,0 +1,146 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class ExternalDatasetTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_torchvision_dataset(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 1 + cfg.federate.total_round_num = 20 + cfg.train.batch_or_epoch = 'epoch' + cfg.federate.client_num = 5 + cfg.federate.sample_client_rate = 0.2 + + cfg.data.root = 'test_data/' + cfg.data.type = 'MNIST@torchvision' + cfg.data.args = [{'download': True}] + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.1307], + 'std': [0.3081] + } + ]] + cfg.data.splitter = 'lda' + cfg.data.splitter_args = [{'alpha': 0.5}] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 10 + + cfg.train.optimizer.lr = 0.01 + cfg.train.optimizer.weight_decay = 0.0 + cfg.grad.grad_clip = 5.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 12345 + + return backup_cfg + + def set_config_torchtext_dataset(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 1 + cfg.federate.total_round_num = 20 + cfg.train.batch_or_epoch = 'epoch' + cfg.federate.client_num = 5 + cfg.federate.sample_client_rate = 0.2 + + cfg.data.root = 'test_data/' + cfg.data.args = [{'max_len': 100}] + cfg.data.type = 'IMDB@torchtext' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.transform = ['GloVe', {'cache': 'test_data/', 'name': '6B'}] + cfg.data.splitter = 'lda' + cfg.data.splitter_args = [{'alpha': 0.5}] + + cfg.model.type = 'lstm' + cfg.model.task = 'SequenceClassification' + cfg.model.hidden = 256 + cfg.model.in_channels = 300 + cfg.model.embed_size = 0 + cfg.model.out_channels = 2 + + cfg.train.optimizer.lr = 0.8 + cfg.train.optimizer.weight_decay = 0.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'nlptrainer' + cfg.seed = 12345 + + return backup_cfg + + def test_torchvision_dataset_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_torchvision_dataset(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertGreater( + test_best_results["client_summarized_weighted_avg"]['test_acc'], + 0.9) + + def test_torchtext_dataset_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_torchtext_dataset(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertGreater( + test_best_results["client_summarized_weighted_avg"]['test_acc'], + 0.65) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_fedem.py b/tests/test_fedem.py new file mode 100644 index 000000000..d3de07e18 --- /dev/null +++ b/tests/test_fedem.py @@ -0,0 +1,84 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class FEMNISTTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_femnist(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc', 'loss_regular'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 5 + cfg.federate.total_round_num = 20 + cfg.federate.sample_client_num = 5 + cfg.federate.client_num = 10 + + cfg.federate.method = "FedEM" + cfg.model.model_num_per_trainer = 3 + + cfg.data.root = 'test_data/' + cfg.data.type = 'femnist' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.subsample = 0.05 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.1307], + 'std': [0.3081] + } + ]] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 62 + + cfg.train.optimizer.lr = 0.001 + cfg.train.optimizer.weight_decay = 0.0 + cfg.grad.grad_clip = 5.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + return backup_cfg + + def test_femnist_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_femnist(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 600) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_fedopt.py b/tests/test_fedopt.py new file mode 100644 index 000000000..0d598e2c3 --- /dev/null +++ b/tests/test_fedopt.py @@ -0,0 +1,83 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class FedOptTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_fedopt(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 5 + cfg.federate.total_round_num = 20 + cfg.federate.sample_client_num = 5 + cfg.federate.client_num = 10 + cfg.federate.method = 'FedOpt' + + cfg.data.root = 'test_data/' + cfg.data.type = 'femnist' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.subsample = 0.01 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.1307], + 'std': [0.3081] + } + ]] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 62 + + cfg.train.optimizer.lr = 0.001 + cfg.train.optimizer.weight_decay = 0.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + cfg.fedopt.use = True + cfg.fedopt.optimizer.lr = 1. + + return backup_cfg + + def test_fedopt_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_fedopt(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_results = Fed_runner.run() + init_cfg.merge_from_other_cfg(backup_cfg) + + self.assertLess( + test_results['client_summarized_weighted_avg']['test_loss'], 600) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_fedprox.py b/tests/test_fedprox.py new file mode 100644 index 000000000..10ce8b583 --- /dev/null +++ b/tests/test_fedprox.py @@ -0,0 +1,82 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class FedProxTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_fedprox(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc', 'loss_regular'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 5 + cfg.federate.total_round_num = 20 + cfg.federate.sample_client_num = 5 + cfg.federate.client_num = 10 + + cfg.data.root = 'test_data/' + cfg.data.type = 'femnist' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.subsample = 0.01 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.1307], + 'std': [0.3081] + } + ]] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 62 + + cfg.train.optimizer.lr = 0.001 + cfg.train.optimizer.weight_decay = 0.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + cfg.fedprox.use = True + cfg.fedprox.mu = 0.1 + + return backup_cfg + + def test_fedprox_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_fedprox(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_results = Fed_runner.run() + init_cfg.merge_from_other_cfg(backup_cfg) + + self.assertLess( + test_results['client_summarized_weighted_avg']['test_loss'], 600) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_fedsageplus.py b/tests/test_fedsageplus.py new file mode 100644 index 000000000..44fa2c680 --- /dev/null +++ b/tests/test_fedsageplus.py @@ -0,0 +1,77 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class FedSagePlusTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_fedsageplus(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + + cfg.federate.mode = 'standalone' + cfg.federate.make_global_eval = True + cfg.federate.client_num = 3 + cfg.federate.total_round_num = 10 + cfg.federate.method = 'fedsageplus' + cfg.train.batch_or_epoch = 'epoch' + + cfg.data.root = 'test_data/' + cfg.data.type = 'cora' + cfg.data.splitter = 'louvain' + cfg.data.batch_size = 1 + + cfg.model.type = 'sage' + cfg.model.hidden = 64 + cfg.model.dropout = 0.5 + cfg.model.out_channels = 7 + + cfg.fedsageplus.num_pred = 5 + cfg.fedsageplus.gen_hidden = 64 + cfg.fedsageplus.hide_portion = 0.5 + cfg.fedsageplus.fedgen_epoch = 2 + cfg.fedsageplus.loc_epoch = 1 + cfg.fedsageplus.a = 1.0 + cfg.fedsageplus.b = 1.0 + cfg.fedsageplus.c = 1.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'nodefullbatch_trainer' + cfg.eval.metrics = ['acc', 'correct'] + + return backup_cfg + + def test_fedsageplus_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_fedsageplus(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertGreater(test_best_results["server_global_eval"]['test_acc'], + 0.7) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_femnist.py b/tests/test_femnist.py new file mode 100644 index 000000000..ad99f693e --- /dev/null +++ b/tests/test_femnist.py @@ -0,0 +1,84 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + +SAMPLE_CLIENT_NUM = 5 + + +class FEMNISTTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_femnist(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc', 'loss_regular'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 5 + cfg.federate.total_round_num = 20 + cfg.federate.sample_client_num = SAMPLE_CLIENT_NUM + + cfg.data.root = 'test_data/' + cfg.data.type = 'femnist' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.subsample = 0.05 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.1307], + 'std': [0.3081] + } + ]] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 62 + + cfg.train.optimizer.lr = 0.001 + cfg.train.optimizer.weight_decay = 0.0 + cfg.grad.grad_clip = 5.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + return backup_cfg + + def test_femnist_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_femnist(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + self.assertEqual(init_cfg.federate.sample_client_num, + SAMPLE_CLIENT_NUM) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 600) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_finetune_lr.py b/tests/test_finetune_lr.py new file mode 100644 index 000000000..8f6254705 --- /dev/null +++ b/tests/test_finetune_lr.py @@ -0,0 +1,59 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class ToyLRTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_standalone(self, cfg, make_global_eval=False): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.federate.mode = 'standalone' + cfg.federate.total_round_num = 20 + cfg.federate.make_global_eval = make_global_eval + cfg.federate.client_num = 5 + cfg.eval.freq = 10 + cfg.data.type = 'toy' + cfg.trainer.type = 'general' + cfg.model.type = 'lr' + cfg.finetune.before_eval = True + cfg.finetune.local_update_steps = 5 + + return backup_cfg + + def test_toy_example_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_standalone(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_config = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_config) + + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 0.3) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_global_train_lr.py b/tests/test_global_train_lr.py new file mode 100644 index 000000000..41167e57a --- /dev/null +++ b/tests/test_global_train_lr.py @@ -0,0 +1,56 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class ToyLRTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_standalone(self, cfg, make_global_eval=False): + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.federate.mode = 'standalone' + cfg.federate.total_round_num = 30 + cfg.federate.make_global_eval = make_global_eval + cfg.federate.client_num = 5 + cfg.eval.freq = 10 + cfg.data.type = 'toy' + cfg.trainer.type = 'general' + cfg.model.type = 'lr' + + cfg.early_stop.patience = 5 + cfg.federate.method = "global" + + def test_toy_example_standalone(self): + init_cfg = global_cfg.clone() + self.set_config_standalone(init_cfg) + + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_config = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_config) + + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 0.3) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_graph_node_trainer.py b/tests/test_graph_node_trainer.py new file mode 100644 index 000000000..8c1618c03 --- /dev/null +++ b/tests/test_graph_node_trainer.py @@ -0,0 +1,71 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class NodeTrainerTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_node(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc', 'correct'] + + cfg.federate.mode = 'standalone' + cfg.federate.total_round_num = 50 + cfg.federate.client_num = 5 + + cfg.data.root = 'test_data/' + cfg.data.type = 'cora' + cfg.data.batch_size = 1 # full batch train + cfg.data.splitter = 'louvain' + + cfg.model.type = 'gcn' + cfg.model.hidden = 64 + cfg.model.dropout = 0.5 + cfg.model.out_channels = 7 + + cfg.train.optimizer.lr = 0.25 + cfg.train.optimizer.weight_decay = 0.0005 + cfg.train.optimizer.type = 'SGD' + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'nodefullbatch_trainer' + + return backup_cfg + + def test_node_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_node(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertGreater( + test_best_results["client_summarized_weighted_avg"]['test_acc'], + 0.7) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_local_train_lr.py b/tests/test_local_train_lr.py new file mode 100644 index 000000000..f87a96cfa --- /dev/null +++ b/tests/test_local_train_lr.py @@ -0,0 +1,57 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, \ + get_client_cls + + +class ToyLRTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_standalone(self, cfg, make_global_eval=False): + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.federate.mode = 'standalone' + cfg.federate.total_round_num = 100 + cfg.federate.make_global_eval = make_global_eval + cfg.federate.client_num = 5 + cfg.eval.freq = 10 + cfg.data.type = 'toy' + cfg.trainer.type = 'general' + cfg.model.type = 'lr' + + cfg.early_stop.patience = 5 + cfg.federate.method = "local" + + def test_toy_example_standalone(self): + init_cfg = global_cfg.clone() + self.set_config_standalone(init_cfg) + + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_config = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_config) + + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 0.3) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_mf.py b/tests/test_mf.py new file mode 100644 index 000000000..1714d65f4 --- /dev/null +++ b/tests/test_mf.py @@ -0,0 +1,71 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class MFTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_movielens1m(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.early_stop_patience = 100 + cfg.eval.best_res_update_round_wise_key = "test_avg_loss" + cfg.eval.freq = 5 + cfg.eval.metrics = [] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 20 + cfg.federate.total_round_num = 50 + cfg.federate.client_num = 5 + + cfg.data.root = 'test_data/' + cfg.data.type = 'vflmovielens1m' + cfg.data.batch_size = 32 + + cfg.model.type = 'VMFNet' + cfg.model.hidden = 20 + + cfg.train.optimizer.lr = 1. + cfg.train.optimizer.weight_decay = 0.0 + + cfg.criterion.type = 'MSELoss' + cfg.trainer.type = 'mftrainer' + cfg.seed = 123 + + return backup_cfg + + def test_mf_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_movielens1m(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_results = Fed_runner.run() + init_cfg.merge_from_other_cfg(backup_cfg) + + self.assertLess( + test_results["client_summarized_weighted_avg"]["test_avg_loss"], + 50) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_nbafl.py b/tests/test_nbafl.py new file mode 100644 index 000000000..b328905ba --- /dev/null +++ b/tests/test_nbafl.py @@ -0,0 +1,89 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class NbAFLTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_femnist(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc', 'loss_regular'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 5 + cfg.federate.total_round_num = 20 + cfg.federate.sample_client_num = 5 + + cfg.data.root = 'test_data/' + cfg.data.type = 'femnist' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.subsample = 0.01 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.1307], + 'std': [0.3081] + } + ]] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 62 + + cfg.train.optimizer.lr = 0.001 + cfg.train.optimizer.weight_decay = 0.0 + cfg.grad.grad_clip = 5.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + cfg.nbafl.use = True + cfg.nbafl.w_clip = 0.1 + cfg.nbafl.mu = 0.1 + cfg.nbafl.constant = 2. + cfg.nbafl.epsilon = 50. + + return backup_cfg + + def test_nbafl_standalone(self): + # TODO, no need to backup the cfg in all test_xxx.py, as we are now using the init_cfg style + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_femnist(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + # Run on first 10 clients + init_cfg.merge_from_list(['federate.client_num', 10]) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 500) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py new file mode 100644 index 000000000..377a6a66f --- /dev/null +++ b/tests/test_optimizer.py @@ -0,0 +1,83 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class FEMNISTTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_femnist(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc', 'loss_regular'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 5 + cfg.federate.total_round_num = 20 + cfg.federate.sample_client_num = 5 + cfg.federate.client_num = 10 + + cfg.data.root = 'test_data/' + cfg.data.type = 'femnist' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.subsample = 0.05 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.1307], + 'std': [0.3081] + } + ]] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 62 + + cfg.train.optimizer.type = "Adam" + cfg.train.optimizer.lr = 0.001 + cfg.train.optimizer.betas = [0.9, 0.999] + cfg.train.optimizer.weight_decay = 0.001 + cfg.grad.grad_clip = 5.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + return backup_cfg + + def test_femnist_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_femnist(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 600) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_pfedme.py b/tests/test_pfedme.py new file mode 100644 index 000000000..bb69bc107 --- /dev/null +++ b/tests/test_pfedme.py @@ -0,0 +1,86 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class FEMNISTTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_femnist(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc', 'loss_regular'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 5 + cfg.federate.total_round_num = 20 + cfg.federate.sample_client_num = 5 + cfg.federate.client_num = 10 + + cfg.federate.method = "pFedMe" + cfg.personalization.personalization_lr = -1.0 + cfg.personalization_beta = 1.0 + cfg.personalization_K = 3 + + cfg.data.root = 'test_data/' + cfg.data.type = 'femnist' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 10 + cfg.data.subsample = 0.05 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.1307], + 'std': [0.3081] + } + ]] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 62 + + cfg.train.optimizer.lr = 0.001 + cfg.train.optimizer.weight_decay = 0.0 + cfg.grad.grad_clip = 5.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + return backup_cfg + + def test_femnist_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_femnist(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertLess( + test_best_results["client_summarized_weighted_avg"] + ['test_avg_loss'], 10) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_rec_IG_opt_attack.py b/tests/test_rec_IG_opt_attack.py new file mode 100644 index 000000000..63e029351 --- /dev/null +++ b/tests/test_rec_IG_opt_attack.py @@ -0,0 +1,86 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class RECTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_femnist(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc', 'loss_regular'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 1 + cfg.federate.total_round_num = 20 + cfg.federate.sample_client_num = 5 + cfg.federate.client_num = 10 + + cfg.data.root = 'test_data/' + cfg.data.type = 'femnist' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 1 + cfg.data.subsample = 0.01 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.1307], + 'std': [0.3081] + } + ]] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 62 + + cfg.train.optimizer.lr = 0.001 + cfg.train.optimizer.weight_decay = 0.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + cfg.attack.attack_method = 'IG' + cfg.attack.reconstruct_lr = 0.1 + cfg.attack.reconstruct_optim = 'Adam' + cfg.attack.info_diff_type = 'l2' + cfg.attack.max_ite = 40 + + return backup_cfg + + def test_IG_rec_femnist_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_femnist(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 600) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_rec_opt_attack.py b/tests/test_rec_opt_attack.py new file mode 100644 index 000000000..f28942457 --- /dev/null +++ b/tests/test_rec_opt_attack.py @@ -0,0 +1,92 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class RECTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_femnist(self, cfg): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.eval.freq = 10 + cfg.eval.metrics = ['acc', 'loss_regular'] + + cfg.federate.mode = 'standalone' + cfg.train.local_update_steps = 1 + cfg.federate.total_round_num = 20 + cfg.federate.sample_client_num = 5 + cfg.federate.client_num = 10 + + cfg.data.root = 'test_data/' + cfg.data.type = 'femnist' + cfg.data.splits = [0.6, 0.2, 0.2] + cfg.data.batch_size = 1 + cfg.data.subsample = 0.01 + cfg.data.transform = [['ToTensor'], + [ + 'Normalize', { + 'mean': [0.1307], + 'std': [0.3081] + } + ]] + + cfg.model.type = 'convnet2' + cfg.model.hidden = 2048 + cfg.model.out_channels = 62 + + cfg.train.optimizer.lr = 0.001 + cfg.train.optimizer.weight_decay = 0.0 + + cfg.criterion.type = 'CrossEntropyLoss' + cfg.trainer.type = 'cvtrainer' + cfg.seed = 123 + + cfg.attack.attack_method = 'dlg' + cfg.attack.reconstruct_lr = 0.1 + cfg.attack.reconstruct_optim = 'Adam' + cfg.attack.info_diff_type = 'l2' + cfg.attack.max_ite = 40 + + return backup_cfg + + def test_rec_femnist_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_femnist(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_cfg = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_cfg) + self.assertIsNotNone(data) + + # if cfg.attack.attack_method.lower() == 'dlg': + # from federatedscope.attack.worker_as_attacker.server_attacker import PassiveServer + # server_class = PassiveServer + # else: + # server_class = Server + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 600) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_toy_lr.py b/tests/test_toy_lr.py new file mode 100644 index 000000000..276997de2 --- /dev/null +++ b/tests/test_toy_lr.py @@ -0,0 +1,80 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class ToyLRTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_standalone(self, cfg, make_global_eval=False): + backup_cfg = cfg.clone() + + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.federate.mode = 'standalone' + cfg.federate.total_round_num = 20 + cfg.federate.make_global_eval = make_global_eval + cfg.federate.client_num = 5 + cfg.eval.freq = 10 + cfg.data.type = 'toy' + cfg.trainer.type = 'general' + cfg.model.type = 'lr' + + return backup_cfg + + def test_toy_example_standalone(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_standalone(init_cfg) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_config = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_config) + + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 0.3) + + def test_toy_example_standalone_global_eval(self): + init_cfg = global_cfg.clone() + backup_cfg = self.set_config_standalone(init_cfg, + make_global_eval=True) + setup_seed(init_cfg.seed) + update_logger(init_cfg, True) + + data, modified_config = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_config) + + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + init_cfg.merge_from_other_cfg(backup_cfg) + self.assertLess(test_best_results["server_global_eval"]['test_loss'], + 0.3) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_unseen_clients_lr.py b/tests/test_unseen_clients_lr.py new file mode 100644 index 000000000..db87af090 --- /dev/null +++ b/tests/test_unseen_clients_lr.py @@ -0,0 +1,57 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import unittest + +from federatedscope.core.auxiliaries.data_builder import get_data +from federatedscope.core.auxiliaries.utils import setup_seed +from federatedscope.core.auxiliaries.logging import update_logger +from federatedscope.core.configs.config import global_cfg +from federatedscope.core.fed_runner import FedRunner +from federatedscope.core.auxiliaries.worker_builder import get_server_cls, get_client_cls + + +class ToyLRTest(unittest.TestCase): + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def set_config_standalone(self, cfg, make_global_eval=False): + import torch + cfg.use_gpu = torch.cuda.is_available() + cfg.federate.mode = 'standalone' + cfg.federate.total_round_num = 20 + cfg.federate.make_global_eval = make_global_eval + cfg.federate.client_num = 5 + cfg.federate.unseen_clients_rate = 0.2 # 20% unseen clients + cfg.eval.freq = 10 + cfg.data.type = 'toy' + cfg.trainer.type = 'general' + cfg.model.type = 'lr' + + def test_toy_example_standalone(self): + init_cfg = global_cfg.clone() + self.set_config_standalone(init_cfg) + + setup_seed(init_cfg.seed) + update_logger(init_cfg) + + data, modified_config = get_data(init_cfg.clone()) + init_cfg.merge_from_other_cfg(modified_config) + + self.assertIsNotNone(data) + + Fed_runner = FedRunner(data=data, + server_class=get_server_cls(init_cfg), + client_class=get_client_cls(init_cfg), + config=init_cfg.clone()) + self.assertIsNotNone(Fed_runner) + test_best_results = Fed_runner.run() + print(test_best_results) + self.assertLess( + test_best_results["client_summarized_weighted_avg"]['test_loss'], + 0.3) + self.assertLess( + test_best_results["unseen_client_summarized_weighted_avg"] + ['test_loss'], 0.3) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_vertical_fl.py b/tests/test_vertical_fl.py index 24a6fffd4..dad09038c 100644 --- a/tests/test_vertical_fl.py +++ b/tests/test_vertical_fl.py @@ -30,7 +30,6 @@ def set_config(self, cfg): cfg.data.type = 'vertical_fl_data' cfg.data.size = 50 - cfg.dataloader.type = 'raw' cfg.vertical.use = True cfg.vertical.key_size = 256 diff --git a/tests/test_yaml.py b/tests/test_yaml.py new file mode 100644 index 000000000..ff0570377 --- /dev/null +++ b/tests/test_yaml.py @@ -0,0 +1,61 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import os +import logging +import unittest + +from federatedscope.core.configs.config import global_cfg + +logger = logging.getLogger(__name__) + + +class YAMLTest(unittest.TestCase): + def setUp(self): + self.exclude_all = ['benchmark', 'scripts'] + self.exclude_file = [ + '.pre-commit-config.yaml', 'meta.yaml', + 'federatedscope/gfl/baseline/isolated_gin_minibatch_on_cikmcup_per_client.yaml', + 'federatedscope/gfl/baseline/fedavg_gin_minibatch_on_cikmcup_per_client.yaml' + ] + self.root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + self.exclude_all = [ + os.path.join(self.root, f) for f in self.exclude_all + ] + self.exclude_file = [ + os.path.join(self.root, f) for f in self.exclude_file + ] + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + + def test_yaml(self): + init_cfg = global_cfg.clone() + sign, cont = False, False + for dirpath, _, filenames in os.walk(self.root): + for prefix in self.exclude_all: + if dirpath.startswith(prefix): + cont = True + break + if cont: + cont = False + continue + filenames = [f for f in filenames if f.endswith('.yaml')] + for f in filenames: + yaml_file = os.path.join(dirpath, f) + if yaml_file in self.exclude_file: + continue + try: + init_cfg.merge_from_file(yaml_file) + except KeyError as error: + logger.error( + f"KeyError: {error} in file: {yaml_file.removeprefix(self.root)}" + ) + sign = True + except ValueError as error: + logger.error( + f"ValueError: {error} in file: {yaml_file.removeprefix(self.root)}" + ) + sign = True + init_cfg = global_cfg.clone() + self.assertIs(sign, False, "Yaml check failed.") + + +if __name__ == '__main__': + unittest.main() From 482beb58e665a96b69c6432016d4c0b977e56d22 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Tue, 20 Sep 2022 14:35:11 +0800 Subject: [PATCH 23/39] fix fedsageplus --- .../core/auxiliaries/data_builder.py | 4 +++ federatedscope/core/workers/client.py | 2 +- .../gfl/fedsageplus/fedsageplus_on_cora.yaml | 2 ++ federatedscope/gfl/fedsageplus/trainer.py | 2 +- federatedscope/gfl/fedsageplus/worker.py | 28 +++++++++++++------ federatedscope/gfl/flitplus/fedalgo_cls.yaml | 4 ++- federatedscope/gfl/flitplus/trainer.py | 2 +- .../gfl/gcflplus/gcflplus_on_multi_task.yaml | 2 ++ federatedscope/gfl/trainer/nodetrainer.py | 1 - tests/test_graph_node_trainer.py | 4 ++- tests/test_mf.py | 4 ++- tests/test_vertical_fl.py | 2 ++ 12 files changed, 42 insertions(+), 15 deletions(-) diff --git a/federatedscope/core/auxiliaries/data_builder.py b/federatedscope/core/auxiliaries/data_builder.py index 617d9689e..d1891894a 100644 --- a/federatedscope/core/auxiliaries/data_builder.py +++ b/federatedscope/core/auxiliaries/data_builder.py @@ -55,6 +55,10 @@ def get_data(config, client_cfgs=None): # Load dataset from source files dataset, modified_config = load_dataset(config) + if dataset is None: + logger.warning("Data is None!") + return None, modified_config + # Perform translator to non-FL dataset translator = getattr(import_module('federatedscope.core.data'), DATA_TRANS_MAP[config.data.type.lower()])( diff --git a/federatedscope/core/workers/client.py b/federatedscope/core/workers/client.py index ad72d89dc..7be858695 100644 --- a/federatedscope/core/workers/client.py +++ b/federatedscope/core/workers/client.py @@ -414,7 +414,7 @@ def callback_funcs_for_join_in_info(self, message: Message): join_in_info['num_sample'] = num_sample if self._cfg.trainer.type == 'nodefullbatch_trainer': join_in_info['num_sample'] = \ - self.trainer.ctx.data.x.shape[0] + self.trainer.ctx.data['data'].x.shape[0] elif requirement.lower() == 'client_resource': assert self.comm_bandwidth is not None and self.comp_speed \ is not None, "The requirement join_in_info " \ diff --git a/federatedscope/gfl/fedsageplus/fedsageplus_on_cora.yaml b/federatedscope/gfl/fedsageplus/fedsageplus_on_cora.yaml index 8d316ee7a..0413de06e 100644 --- a/federatedscope/gfl/fedsageplus/fedsageplus_on_cora.yaml +++ b/federatedscope/gfl/fedsageplus/fedsageplus_on_cora.yaml @@ -12,6 +12,8 @@ data: root: data/ type: 'cora' splitter: 'louvain' +dataloader: + type: pyg batch_size: 1 model: type: sage diff --git a/federatedscope/gfl/fedsageplus/trainer.py b/federatedscope/gfl/fedsageplus/trainer.py index 86d2334a8..b9f665fc7 100644 --- a/federatedscope/gfl/fedsageplus/trainer.py +++ b/federatedscope/gfl/fedsageplus/trainer.py @@ -144,5 +144,5 @@ def cal_grad(self, raw_data, model_para, embedding, true_missing): @torch.no_grad() def embedding(self): model = self.ctx.model.to(self.ctx.device) - data = self.ctx.data.to(self.ctx.device) + data = self.ctx.data['data'].to(self.ctx.device) return model.encoder_model(data).to('cpu') diff --git a/federatedscope/gfl/fedsageplus/worker.py b/federatedscope/gfl/fedsageplus/worker.py index 256d91e87..467b6d867 100644 --- a/federatedscope/gfl/fedsageplus/worker.py +++ b/federatedscope/gfl/fedsageplus/worker.py @@ -8,6 +8,7 @@ from federatedscope.core.workers.server import Server from federatedscope.core.workers.client import Client from federatedscope.core.auxiliaries.utils import merge_dict +from federatedscope.core.data import ClientData from federatedscope.gfl.trainer.nodetrainer import NodeMiniBatchTrainer from federatedscope.gfl.model.fedsageplus import LocalSage_Plus, FedSage_Plus @@ -257,9 +258,15 @@ def __init__(self, self.data = data self.hide_data = HideGraph(self._cfg.fedsageplus.hide_portion)( data['data']) + # Convert to `ClientData` + self.hide_data = ClientData(self._cfg, + train=[self.hide_data], + val=[self.hide_data], + test=[self.hide_data], + data=self.hide_data) self.device = device self.sage_batch_size = 64 - self.gen = LocalSage_Plus(data.x.shape[-1], + self.gen = LocalSage_Plus(data['data'].x.shape[-1], self._cfg.model.out_channels, hidden=self._cfg.model.hidden, gen_hidden=self._cfg.fedsageplus.gen_hidden, @@ -305,15 +312,17 @@ def callback_funcs_for_local_pre_train(self, message: Message): sender=self.ID, receiver=[sender], state=self.state, - content=[gen_para, embedding, self.hide_data.num_missing])) + content=[ + gen_para, embedding, self.hide_data['data'].num_missing + ])) logger.info(f'\tClient #{self.ID} send gen_para to Server #{sender}.') def callback_funcs_for_gen_para(self, message: Message): round, sender, content = message.state, message.sender, message.content gen_para, embedding, label, ID = content - gen_grad = self.trainer_fedgen.cal_grad(self.data, gen_para, embedding, - label) + gen_grad = self.trainer_fedgen.cal_grad(self.data['data'], gen_para, + embedding, label) self.state = round self.comm_manager.send( Message(msg_type='gradient', @@ -336,14 +345,17 @@ def callback_funcs_for_gradient(self, message): sender=self.ID, receiver=[sender], state=self.state, - content=[gen_para, embedding, self.hide_data.num_missing])) + content=[ + gen_para, embedding, self.hide_data['data'].num_missing + ])) logger.info(f'\tClient #{self.ID}: send gen_para to Server #{sender}.') def callback_funcs_for_setup_fedsage(self, message: Message): round, sender, _ = message.state, message.sender, message.content - self.filled_data = GraphMender(model=self.fedgen, - impaired_data=self.hide_data.cpu(), - original_data=self.data) + self.filled_data = GraphMender( + model=self.fedgen, + impaired_data=self.hide_data['data'].cpu(), + original_data=self.data['data']) subgraph_sampler = NeighborSampler( self.filled_data.edge_index, sizes=[-1], diff --git a/federatedscope/gfl/flitplus/fedalgo_cls.yaml b/federatedscope/gfl/flitplus/fedalgo_cls.yaml index d05ed6727..3ddefb7b7 100644 --- a/federatedscope/gfl/flitplus/fedalgo_cls.yaml +++ b/federatedscope/gfl/flitplus/fedalgo_cls.yaml @@ -9,9 +9,11 @@ federate: data: root: data/ splitter: scaffold_lda - batch_size: 64 transform: ['AddSelfLoops'] splitter_args: [{'alpha': 0.1}] +dataloader: + type: pyg + batch_size: 64 model: type: mpnn hidden: 64 diff --git a/federatedscope/gfl/flitplus/trainer.py b/federatedscope/gfl/flitplus/trainer.py index 27325345a..bc22ccb39 100644 --- a/federatedscope/gfl/flitplus/trainer.py +++ b/federatedscope/gfl/flitplus/trainer.py @@ -4,7 +4,7 @@ from federatedscope.core.auxiliaries.enums import LIFECYCLE from federatedscope.core.trainers.context import CtxVar from federatedscope.gfl.loss.vat import VATLoss -from federatedscope.core.trainers.trainer import GeneralTorchTrainer +from federatedscope.core.trainers import GeneralTorchTrainer class FLITTrainer(GeneralTorchTrainer): diff --git a/federatedscope/gfl/gcflplus/gcflplus_on_multi_task.yaml b/federatedscope/gfl/gcflplus/gcflplus_on_multi_task.yaml index 57b48d291..56c70144d 100644 --- a/federatedscope/gfl/gcflplus/gcflplus_on_multi_task.yaml +++ b/federatedscope/gfl/gcflplus/gcflplus_on_multi_task.yaml @@ -8,6 +8,8 @@ data: root: data/ type: graph_multi_domain_mix pre_transform: ['Constant', {'value':1.0, 'cat':False}] +dataloader: + type: pyg model: type: gin hidden: 64 diff --git a/federatedscope/gfl/trainer/nodetrainer.py b/federatedscope/gfl/trainer/nodetrainer.py index 5083164ff..7871bdb2c 100644 --- a/federatedscope/gfl/trainer/nodetrainer.py +++ b/federatedscope/gfl/trainer/nodetrainer.py @@ -25,7 +25,6 @@ def parse_data(self, data): init_dict["{}_data".format(mode)] = None # For node-level task dataloader contains one graph init_dict["num_{}_data".format(mode)] = 1 - else: raise TypeError("Type of data should be dict.") return init_dict diff --git a/tests/test_graph_node_trainer.py b/tests/test_graph_node_trainer.py index 8c1618c03..a6e441bd5 100644 --- a/tests/test_graph_node_trainer.py +++ b/tests/test_graph_node_trainer.py @@ -27,9 +27,11 @@ def set_config_node(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'cora' - cfg.data.batch_size = 1 # full batch train cfg.data.splitter = 'louvain' + cfg.dataloader.type = 'pyg' + cfg.dataloader.batch_size = 1 # full batch train + cfg.model.type = 'gcn' cfg.model.hidden = 64 cfg.model.dropout = 0.5 diff --git a/tests/test_mf.py b/tests/test_mf.py index 1714d65f4..c602d8621 100644 --- a/tests/test_mf.py +++ b/tests/test_mf.py @@ -30,7 +30,9 @@ def set_config_movielens1m(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'vflmovielens1m' - cfg.data.batch_size = 32 + + cfg.dataloader.type = 'mf' + cfg.dataloader.batch_size = 32 cfg.model.type = 'VMFNet' cfg.model.hidden = 20 diff --git a/tests/test_vertical_fl.py b/tests/test_vertical_fl.py index dad09038c..95613c466 100644 --- a/tests/test_vertical_fl.py +++ b/tests/test_vertical_fl.py @@ -31,6 +31,8 @@ def set_config(self, cfg): cfg.data.type = 'vertical_fl_data' cfg.data.size = 50 + cfg.dataloader.type = 'raw' + cfg.vertical.use = True cfg.vertical.key_size = 256 From 26716d7e7cf8f5a6c190e3332f44a111daad5be8 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Tue, 20 Sep 2022 17:36:44 +0800 Subject: [PATCH 24/39] fix minor bugs --- .../trainer/MIA_invert_gradient_trainer.py | 2 +- .../core/auxiliaries/data_builder.py | 4 -- .../core/auxiliaries/dataloader_builder.py | 24 +-------- .../core/auxiliaries/model_builder.py | 3 ++ federatedscope/core/data/utils.py | 52 +++++++++++-------- federatedscope/core/data/wrap_dataset.py | 31 +++++++++++ federatedscope/core/trainers/torch_trainer.py | 2 +- federatedscope/tabular/dataloader/toy.py | 3 +- tests/test_fedsageplus.py | 4 +- 9 files changed, 71 insertions(+), 54 deletions(-) create mode 100644 federatedscope/core/data/wrap_dataset.py diff --git a/federatedscope/attack/trainer/MIA_invert_gradient_trainer.py b/federatedscope/attack/trainer/MIA_invert_gradient_trainer.py index 3a7e96d5e..981d4c875 100644 --- a/federatedscope/attack/trainer/MIA_invert_gradient_trainer.py +++ b/federatedscope/attack/trainer/MIA_invert_gradient_trainer.py @@ -4,7 +4,7 @@ import torch from federatedscope.core.trainers import GeneralTorchTrainer -from federatedscope.core.auxiliaries.dataloader_builder import WrapDataset +from federatedscope.core.data.wrap_dataset import WrapDataset from federatedscope.attack.auxiliary.MIA_get_target_data import get_target_data logger = logging.getLogger(__name__) diff --git a/federatedscope/core/auxiliaries/data_builder.py b/federatedscope/core/auxiliaries/data_builder.py index d1891894a..617d9689e 100644 --- a/federatedscope/core/auxiliaries/data_builder.py +++ b/federatedscope/core/auxiliaries/data_builder.py @@ -55,10 +55,6 @@ def get_data(config, client_cfgs=None): # Load dataset from source files dataset, modified_config = load_dataset(config) - if dataset is None: - logger.warning("Data is None!") - return None, modified_config - # Perform translator to non-FL dataset translator = getattr(import_module('federatedscope.core.data'), DATA_TRANS_MAP[config.data.type.lower()])( diff --git a/federatedscope/core/auxiliaries/dataloader_builder.py b/federatedscope/core/auxiliaries/dataloader_builder.py index de0326222..2b6f934db 100644 --- a/federatedscope/core/auxiliaries/dataloader_builder.py +++ b/federatedscope/core/auxiliaries/dataloader_builder.py @@ -1,4 +1,4 @@ -from federatedscope.core.data.utils import get_func_args, filter_dict +from federatedscope.core.data.utils import filter_dict try: import torch @@ -57,25 +57,3 @@ def get_dataloader(dataset, config, split='train'): return dataset else: return None - - -class WrapDataset(Dataset): - """Wrap raw data into pytorch Dataset - - Arguments: - data (dict): raw data dictionary contains "x" and "y" - - """ - def __init__(self, data): - super(WrapDataset, self).__init__() - self.data = data - - def __getitem__(self, idx): - if not isinstance(self.data["x"][idx], torch.Tensor): - return torch.from_numpy( - self.data["x"][idx]).float(), torch.from_numpy( - self.data["y"][idx]).float() - return self.data["x"][idx], self.data["y"][idx] - - def __len__(self): - return len(self.data["y"]) diff --git a/federatedscope/core/auxiliaries/model_builder.py b/federatedscope/core/auxiliaries/model_builder.py index c77e2b13b..522159153 100644 --- a/federatedscope/core/auxiliaries/model_builder.py +++ b/federatedscope/core/auxiliaries/model_builder.py @@ -1,4 +1,7 @@ import logging + +import numpy as np + import federatedscope.register as register logger = logging.getLogger(__name__) diff --git a/federatedscope/core/data/utils.py b/federatedscope/core/data/utils.py index e7d623275..cc3835765 100644 --- a/federatedscope/core/data/utils.py +++ b/federatedscope/core/data/utils.py @@ -532,6 +532,7 @@ def filter_dict(func, kwarg): def merge_data(all_data, merged_max_data_id, specified_dataset_name=None): + from federatedscope.core.data.wrap_dataset import WrapDataset if specified_dataset_name is None: dataset_names = list(all_data[1].keys()) # e.g., train, test, val else: @@ -548,32 +549,37 @@ def merge_data(all_data, merged_max_data_id, specified_dataset_name=None): id_has_key += 1 if len(all_data) <= id_has_key: raise KeyError(f'All data do not key {data_name}.') - if isinstance(all_data[id_has_key][data_name], dict): - data_elem_names = list( - all_data[id_has_key][data_name].keys()) # e.g., x, y - merged_data = {name: defaultdict(list) for name in dataset_names} - for data_id in range(1, merged_max_data_id): + if issubclass(type(all_data[id_has_key][data_name]), + torch.utils.data.DataLoader): + if isinstance(all_data[id_has_key][data_name].dataset, WrapDataset): + data_elem_names = list( + all_data[id_has_key][data_name].dataset.dataset.keys()) # + # e.g., x, y + merged_data = {name: defaultdict(list) for name in dataset_names} + for data_id in range(1, merged_max_data_id): + for d_name in dataset_names: + if d_name not in all_data[data_id]: + continue + for elem_name in data_elem_names: + merged_data[d_name][elem_name].append( + all_data[data_id] + [d_name].dataset.dataset[elem_name]) for d_name in dataset_names: - if d_name not in all_data[data_id]: - continue for elem_name in data_elem_names: - merged_data[d_name][elem_name].append( - all_data[data_id][d_name][elem_name]) - for d_name in dataset_names: - for elem_name in data_elem_names: - merged_data[d_name][elem_name] = np.concatenate( - merged_data[d_name][elem_name]) - elif issubclass(type(all_data[id_has_key][data_name]), - torch.utils.data.DataLoader): - merged_data = all_data[id_has_key] - for data_id in range(1, merged_max_data_id): - if data_id == id_has_key: - continue - for d_name in dataset_names: - if d_name not in all_data[data_id]: + merged_data[d_name][elem_name] = np.concatenate( + merged_data[d_name][elem_name]) + for name in all_data[id_has_key]: + all_data[id_has_key][name].dataset.dataset = merged_data[name] + else: + merged_data = all_data[id_has_key] + for data_id in range(1, merged_max_data_id): + if data_id == id_has_key: continue - merged_data[d_name].dataset.extend( - all_data[data_id][d_name].dataset) + for d_name in dataset_names: + if d_name not in all_data[data_id]: + continue + merged_data[d_name].dataset.extend( + all_data[data_id][d_name].dataset) else: raise NotImplementedError( "Un-supported type when merging data across different clients." diff --git a/federatedscope/core/data/wrap_dataset.py b/federatedscope/core/data/wrap_dataset.py new file mode 100644 index 000000000..72ed832fa --- /dev/null +++ b/federatedscope/core/data/wrap_dataset.py @@ -0,0 +1,31 @@ +import torch +import numpy as np +from torch.utils.data import Dataset + + +class WrapDataset(Dataset): + """Wrap raw data into pytorch Dataset + + Arguments: + dataset (dict): raw data dictionary contains "x" and "y" + + """ + def __init__(self, dataset): + super(WrapDataset, self).__init__() + self.dataset = dataset + + def __getitem__(self, idx): + if isinstance(self.dataset["x"][idx], torch.Tensor): + return self.dataset["x"][idx], self.dataset["y"][idx] + elif isinstance(self.dataset["x"][idx], np.ndarray): + return torch.from_numpy( + self.dataset["x"][idx]).float(), torch.from_numpy( + self.dataset["y"][idx]).float() + elif isinstance(self.dataset["x"][idx], list): + return torch.FloatTensor(self.dataset["x"][idx]), \ + torch.FloatTensor(self.dataset["y"][idx]) + else: + raise TypeError + + def __len__(self): + return len(self.dataset["y"]) diff --git a/federatedscope/core/trainers/torch_trainer.py b/federatedscope/core/trainers/torch_trainer.py index 1cfc22155..a5c2a098a 100644 --- a/federatedscope/core/trainers/torch_trainer.py +++ b/federatedscope/core/trainers/torch_trainer.py @@ -16,7 +16,7 @@ from federatedscope.core.auxiliaries.scheduler_builder import get_scheduler from federatedscope.core.trainers.trainer import Trainer from federatedscope.core.trainers.context import CtxVar -from federatedscope.core.auxiliaries.dataloader_builder import WrapDataset +from federatedscope.core.data.wrap_dataset import WrapDataset from federatedscope.core.auxiliaries.dataloader_builder import get_dataloader from federatedscope.core.auxiliaries.ReIterator import ReIterator from federatedscope.core.auxiliaries.utils import param2tensor, \ diff --git a/federatedscope/tabular/dataloader/toy.py b/federatedscope/tabular/dataloader/toy.py index 75ed49420..a5a4cb0aa 100644 --- a/federatedscope/tabular/dataloader/toy.py +++ b/federatedscope/tabular/dataloader/toy.py @@ -2,7 +2,7 @@ import numpy as np -from federatedscope.core.auxiliaries.dataloader_builder import WrapDataset +from federatedscope.core.data.wrap_dataset import WrapDataset def load_toy_data(config=None): @@ -110,6 +110,7 @@ def _generate_data(client_num=5, else: with open(config.distribute.data_file, 'rb') as f: data = pickle.load(f) + data = {config.distribute.data_idx: data} for client_id in data.keys(): data[client_id] = { k: WrapDataset(v) diff --git a/tests/test_fedsageplus.py b/tests/test_fedsageplus.py index 44fa2c680..ee9706219 100644 --- a/tests/test_fedsageplus.py +++ b/tests/test_fedsageplus.py @@ -29,7 +29,9 @@ def set_config_fedsageplus(self, cfg): cfg.data.root = 'test_data/' cfg.data.type = 'cora' cfg.data.splitter = 'louvain' - cfg.data.batch_size = 1 + + cfg.dataloader.type = 'pyg' + cfg.dataloader.batch_size = 1 cfg.model.type = 'sage' cfg.model.hidden = 64 From 8eee1c7adc1d8a2f9ef31c8d542624b0786d4b0e Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Tue, 20 Sep 2022 19:48:38 +0800 Subject: [PATCH 25/39] add centralized torch trainer --- .../contrib/trainer/torch_example.py | 97 +++++++++++++++++++ federatedscope/core/trainers/__init__.py | 4 +- federatedscope/core/trainers/base_trainer.py | 29 ++++++ federatedscope/core/trainers/trainer.py | 3 +- federatedscope/core/workers/client.py | 5 +- 5 files changed, 133 insertions(+), 5 deletions(-) create mode 100644 federatedscope/contrib/trainer/torch_example.py create mode 100644 federatedscope/core/trainers/base_trainer.py diff --git a/federatedscope/contrib/trainer/torch_example.py b/federatedscope/contrib/trainer/torch_example.py new file mode 100644 index 000000000..4e426d4a5 --- /dev/null +++ b/federatedscope/contrib/trainer/torch_example.py @@ -0,0 +1,97 @@ +import inspect +from federatedscope.register import register_trainer + +# An example for converting torch training process to FS training process +# Try with FEMNIST: +# python federatedscope/main.py --cfg scripts/example_configs/femnist.yaml \ +# trainer.type mytorchtrainer federate.sample_client_rate 0.01 \ +# federate.total_round_num 5 eval.best_res_update_round_wise_key test_loss + + +class MyTorchTrainer(object): + def __init__(self, model, data, device, **kwargs): + import torch + # NN modules + self.model = model + # FS `ClientData` or your own data + self.data = data + # Device name + self.device = device + # kwargs + self.kwargs = kwargs + # Criterion & Optimizer + self.criterion = torch.nn.CrossEntropyLoss() + self.optimizer = torch.optim.SGD(self.model.parameters(), + lr=0.001, + momentum=0.9, + weight_decay=1e-4) + + def train(self): + # _hook_on_fit_start_init + self.model.to(self.device) + self.model.train() + self.optimizer.zero_grad() + + total_loss = num_samples = 0 + # _hook_on_batch_start_init + for x, y in self.data['train']: + # _hook_on_batch_forward + x, y = x.to(self.device), y.to(self.device) + outputs = self.model(x) + loss = self.criterion(outputs, y) + + # _hook_on_batch_backward + loss.backward() + self.optimizer.step() + + # _hook_on_batch_end + total_loss += loss.item() * y.shape[0] + num_samples += y.shape[0] + + # _hook_on_fit_end + return num_samples, self.model.cpu().state_dict(), \ + {'loss_total': total_loss} + + def evaluate(self, target_data_split_name='test'): + import torch + with torch.no_grad(): + self.model.to(self.device) + self.model.eval() + total_loss = num_samples = 0 + # _hook_on_batch_start_init + for x, y in self.data[target_data_split_name]: + # _hook_on_batch_forward + x, y = x.to(self.device), y.to(self.device) + pred = self.model(x) + loss = self.criterion(pred, y) + + # _hook_on_batch_end + total_loss += loss.item() * y.shape[0] + num_samples += y.shape[0] + + # _hook_on_fit_end + return { + f'{target_data_split_name}_loss': total_loss, + f'{target_data_split_name}_total': num_samples + } + + def update(self, model_parameters, strict=False): + self.model.load_state_dict(model_parameters, strict) + + def get_model_para(self): + return self.model.cpu().state_dict() + + def print_trainer_meta_info(self): + sign = inspect.signature(self.__init__).parameters.values() + meta_info = tuple([(val.name, getattr(self, val.name)) + for val in sign]) + return f'{self.__class__.__name__}{meta_info}' + + +def call_my_torch_trainer(trainer_type): + if trainer_type == 'mytorchtrainer': + trainer_builder = MyTorchTrainer + return trainer_builder + + +register_trainer('mytorchtrainer', call_my_torch_trainer) diff --git a/federatedscope/core/trainers/__init__.py b/federatedscope/core/trainers/__init__.py index a4645c251..de9221ee1 100644 --- a/federatedscope/core/trainers/__init__.py +++ b/federatedscope/core/trainers/__init__.py @@ -1,3 +1,4 @@ +from federatedscope.core.trainers.base_trainer import BaseTrainer from federatedscope.core.trainers.trainer import Trainer from federatedscope.core.trainers.torch_trainer import GeneralTorchTrainer from federatedscope.core.trainers.trainer_multi_model import \ @@ -13,5 +14,6 @@ __all__ = [ 'Trainer', 'Context', 'GeneralTorchTrainer', 'GeneralMultiModelTrainer', 'wrap_pFedMeTrainer', 'wrap_DittoTrainer', 'FedEMTrainer', - 'wrap_fedprox_trainer', 'wrap_nbafl_trainer', 'wrap_nbafl_server' + 'wrap_fedprox_trainer', 'wrap_nbafl_trainer', 'wrap_nbafl_server', + 'BaseTrainer' ] diff --git a/federatedscope/core/trainers/base_trainer.py b/federatedscope/core/trainers/base_trainer.py new file mode 100644 index 000000000..50bf57245 --- /dev/null +++ b/federatedscope/core/trainers/base_trainer.py @@ -0,0 +1,29 @@ +import abc + + +class BaseTrainer(abc.ABC): + def __init__(self, model, data, device, **kwargs): + self.model = model + self.data = data + self.device = device + self.kwargs = kwargs + + @abc.abstractmethod + def train(self): + raise NotImplementedError + + @abc.abstractmethod + def evaluate(self, target_data_split_name='test'): + raise NotImplementedError + + @abc.abstractmethod + def update(self, model_parameters, strict=False): + raise NotImplementedError + + @abc.abstractmethod + def get_model_para(self): + raise NotImplementedError + + @abc.abstractmethod + def print_trainer_meta_info(self): + raise NotImplementedError diff --git a/federatedscope/core/trainers/trainer.py b/federatedscope/core/trainers/trainer.py index d0f77c1a1..409b6631f 100644 --- a/federatedscope/core/trainers/trainer.py +++ b/federatedscope/core/trainers/trainer.py @@ -2,6 +2,7 @@ import copy import logging +from federatedscope.core.trainers.base_trainer import BaseTrainer from federatedscope.core.auxiliaries.enums import MODE from federatedscope.core.auxiliaries.enums import LIFECYCLE from federatedscope.core.auxiliaries.decorators import use_diff @@ -23,7 +24,7 @@ logger = logging.getLogger(__name__) -class Trainer(object): +class Trainer(BaseTrainer): """ Register, organize and run the train/test/val procedures """ diff --git a/federatedscope/core/workers/client.py b/federatedscope/core/workers/client.py index 7be858695..decb46e1c 100644 --- a/federatedscope/core/workers/client.py +++ b/federatedscope/core/workers/client.py @@ -410,11 +410,10 @@ def callback_funcs_for_join_in_info(self, message: Message): self._cfg.dataloader.batch_size else: num_sample = self._cfg.train.local_update_steps * \ - self.trainer.ctx.num_train_batch + len(self.data['train']) join_in_info['num_sample'] = num_sample if self._cfg.trainer.type == 'nodefullbatch_trainer': - join_in_info['num_sample'] = \ - self.trainer.ctx.data['data'].x.shape[0] + join_in_info['num_sample'] = self.data['data'].x.shape[0] elif requirement.lower() == 'client_resource': assert self.comm_bandwidth is not None and self.comp_speed \ is not None, "The requirement join_in_info " \ From dc913154cb54e9faba0cb7b3f9c015b6ec242fd5 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Tue, 20 Sep 2022 19:50:52 +0800 Subject: [PATCH 26/39] update docs --- federatedscope/contrib/trainer/torch_example.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/federatedscope/contrib/trainer/torch_example.py b/federatedscope/contrib/trainer/torch_example.py index 4e426d4a5..336408a11 100644 --- a/federatedscope/contrib/trainer/torch_example.py +++ b/federatedscope/contrib/trainer/torch_example.py @@ -1,14 +1,18 @@ import inspect from federatedscope.register import register_trainer +from federatedscope.core.trainers import BaseTrainer # An example for converting torch training process to FS training process + +# Refer to `federatedscope.core.trainers.BaseTrainer` for interface. + # Try with FEMNIST: # python federatedscope/main.py --cfg scripts/example_configs/femnist.yaml \ # trainer.type mytorchtrainer federate.sample_client_rate 0.01 \ # federate.total_round_num 5 eval.best_res_update_round_wise_key test_loss -class MyTorchTrainer(object): +class MyTorchTrainer(BaseTrainer): def __init__(self, model, data, device, **kwargs): import torch # NN modules From b42463c1031aa19290e6a397da281903f177f8d0 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Tue, 20 Sep 2022 20:09:58 +0800 Subject: [PATCH 27/39] add abc for client and server --- federatedscope/core/workers/base_client.py | 121 +++++++++++++++++++++ federatedscope/core/workers/base_server.py | 74 +++++++++++++ federatedscope/core/workers/client.py | 29 +---- federatedscope/core/workers/server.py | 22 +--- 4 files changed, 199 insertions(+), 47 deletions(-) create mode 100644 federatedscope/core/workers/base_client.py create mode 100644 federatedscope/core/workers/base_server.py diff --git a/federatedscope/core/workers/base_client.py b/federatedscope/core/workers/base_client.py new file mode 100644 index 000000000..8ec2d3b83 --- /dev/null +++ b/federatedscope/core/workers/base_client.py @@ -0,0 +1,121 @@ +import abc +from federatedscope.core.workers.base_worker import Worker + + +class BaseClient(Worker): + def __init__(self, ID, state, config, model, strategy): + super(BaseClient, self).__init__(ID, state, config, model, strategy) + self.msg_handlers = dict() + + def register_handlers(self, msg_type, callback_func): + """ + To bind a message type with a handling function. + + Arguments: + msg_type (str): The defined message type + callback_func: The handling functions to handle the received + message + """ + self.msg_handlers[msg_type] = callback_func + + def _register_default_handlers(self): + self.register_handlers('assign_client_id', + self.callback_funcs_for_assign_id) + self.register_handlers('ask_for_join_in_info', + self.callback_funcs_for_join_in_info) + self.register_handlers('address', self.callback_funcs_for_address) + self.register_handlers('model_para', + self.callback_funcs_for_model_para) + self.register_handlers('ss_model_para', + self.callback_funcs_for_model_para) + self.register_handlers('evaluate', self.callback_funcs_for_evaluate) + self.register_handlers('finish', self.callback_funcs_for_finish) + self.register_handlers('converged', self.callback_funcs_for_converged) + + @abc.abstractmethod + def run(self): + """ + To listen to the message and handle them accordingly (used for + distributed mode) + """ + raise NotImplementedError + + @abc.abstractmethod + def callback_funcs_for_model_para(self, message): + """ + The handling function for receiving model parameters, + which triggers the local training process. + This handling function is widely used in various FL courses. + + Arguments: + message: The received message, which includes sender, receiver, + state, and content. + More detail can be found in federatedscope.core.message + """ + raise NotImplementedError + + @abc.abstractmethod + def callback_funcs_for_assign_id(self, message): + """ + The handling function for receiving the client_ID assigned by the + server (during the joining process), + which is used in the distributed mode. + + Arguments: + message: The received message + """ + raise NotImplementedError + + @abc.abstractmethod + def callback_funcs_for_join_in_info(self, message): + """ + The handling function for receiving the request of join in information + (such as batch_size, num_of_samples) during the joining process. + + Arguments: + message: The received message + """ + raise NotImplementedError + + @abc.abstractmethod + def callback_funcs_for_address(self, message): + """ + The handling function for receiving other clients' IP addresses, + which is used for constructing a complex topology + + Arguments: + message: The received message + """ + raise NotImplementedError + + @abc.abstractmethod + def callback_funcs_for_evaluate(self, message): + """ + The handling function for receiving the request of evaluating + + Arguments: + message: The received message + """ + raise NotImplementedError + + @abc.abstractmethod + def callback_funcs_for_finish(self, message): + """ + The handling function for receiving the signal of finishing the FL + course. + + Arguments: + message: The received message + """ + raise NotImplementedError + + @abc.abstractmethod + def callback_funcs_for_converged(self, message): + """ + The handling function for receiving the signal that the FL course + converged + + Arguments: + message: The received message + """ + raise NotImplementedError diff --git a/federatedscope/core/workers/base_server.py b/federatedscope/core/workers/base_server.py new file mode 100644 index 000000000..10788bf0b --- /dev/null +++ b/federatedscope/core/workers/base_server.py @@ -0,0 +1,74 @@ +import abc +from federatedscope.core.workers import Worker + + +class BaseServer(Worker): + def __init__(self, ID, state, config, model, strategy): + super(BaseServer, self).__init__(ID, state, config, model, strategy) + self.msg_handlers = dict() + + def register_handlers(self, msg_type, callback_func): + """ + To bind a message type with a handling function. + + Arguments: + msg_type (str): The defined message type + callback_func: The handling functions to handle the received + message + """ + self.msg_handlers[msg_type] = callback_func + + def _register_default_handlers(self): + self.register_handlers('join_in', self.callback_funcs_for_join_in) + self.register_handlers('join_in_info', self.callback_funcs_for_join_in) + self.register_handlers('model_para', self.callback_funcs_model_para) + self.register_handlers('metrics', self.callback_funcs_for_metrics) + + @abc.abstractmethod + def run(self): + """ + To start the FL course, listen and handle messages (for distributed + mode). + """ + raise NotImplementedError + + @abc.abstractmethod + def callback_funcs_model_para(self, message): + """ + The handling function for receiving model parameters, which triggers + check_and_move_on (perform aggregation when enough feedback has + been received). + This handling function is widely used in various FL courses. + + Arguments: + message: The received message, which includes sender, receiver, + state, and content. More detail can be found in + federatedscope.core.message + """ + raise NotImplementedError + + @abc.abstractmethod + def callback_funcs_for_join_in(self, message): + """ + The handling function for receiving the join in information. The + server might request for some information (such as num_of_samples) + if necessary, assign IDs for the servers. + If all the clients have joined in, the training process will be + triggered. + + Arguments: + message: The received message + """ + raise NotImplementedError + + @abc.abstractmethod + def callback_funcs_for_metrics(self, message): + """ + The handling function for receiving the evaluation results, + which triggers check_and_move_on + (perform aggregation when enough feedback has been received). + + Arguments: + message: The received message + """ + raise NotImplementedError diff --git a/federatedscope/core/workers/client.py b/federatedscope/core/workers/client.py index decb46e1c..edbf9dc97 100644 --- a/federatedscope/core/workers/client.py +++ b/federatedscope/core/workers/client.py @@ -12,11 +12,12 @@ from federatedscope.core.secret_sharing import AdditiveSecretSharing from federatedscope.core.auxiliaries.utils import merge_dict, \ calculate_time_cost +from federatedscope.core.workers.base_client import BaseClient logger = logging.getLogger(__name__) -class Client(Worker): +class Client(BaseClient): """ The Client class, which describes the behaviors of client in an FL course. The behaviors are described by the handling functions (named as @@ -92,7 +93,6 @@ def __init__(self, self.msg_buffer = {'train': dict(), 'eval': dict()} # Register message handlers - self.msg_handlers = dict() self._register_default_handlers() # Communication and communication ability @@ -161,31 +161,6 @@ def _calculate_model_delta(self, init_model, updated_model): else: return model_deltas[0] - def register_handlers(self, msg_type, callback_func): - """ - To bind a message type with a handling function. - - Arguments: - msg_type (str): The defined message type - callback_func: The handling functions to handle the received - message - """ - self.msg_handlers[msg_type] = callback_func - - def _register_default_handlers(self): - self.register_handlers('assign_client_id', - self.callback_funcs_for_assign_id) - self.register_handlers('ask_for_join_in_info', - self.callback_funcs_for_join_in_info) - self.register_handlers('address', self.callback_funcs_for_address) - self.register_handlers('model_para', - self.callback_funcs_for_model_para) - self.register_handlers('ss_model_para', - self.callback_funcs_for_model_para) - self.register_handlers('evaluate', self.callback_funcs_for_evaluate) - self.register_handlers('finish', self.callback_funcs_for_finish) - self.register_handlers('converged', self.callback_funcs_for_converged) - def join_in(self): """ To send 'join_in' message to the server for joining in the FL course. diff --git a/federatedscope/core/workers/server.py b/federatedscope/core/workers/server.py index 38ceb6435..340f74ec4 100644 --- a/federatedscope/core/workers/server.py +++ b/federatedscope/core/workers/server.py @@ -10,18 +10,18 @@ from federatedscope.core.message import Message from federatedscope.core.communication import StandaloneCommManager, \ gRPCCommManager -from federatedscope.core.workers import Worker from federatedscope.core.auxiliaries.aggregator_builder import get_aggregator from federatedscope.core.auxiliaries.sampler_builder import get_sampler from federatedscope.core.auxiliaries.utils import merge_dict, Timeout, \ merge_param_dict from federatedscope.core.auxiliaries.trainer_builder import get_trainer from federatedscope.core.secret_sharing import AdditiveSecretSharing +from federatedscope.core.workers.base_server import BaseServer logger = logging.getLogger(__name__) -class Server(Worker): +class Server(BaseServer): """ The Server class, which describes the behaviors of server in an FL course. The behaviors are described by the handled functions (named as @@ -166,7 +166,6 @@ def __init__(self, if 'client_resource_info' in kwargs else None # Register message handlers - self.msg_handlers = dict() self._register_default_handlers() # Initialize communication manager and message buffer @@ -206,23 +205,6 @@ def total_round_num(self, value): def register_noise_injector(self, func): self._noise_injector = func - def register_handlers(self, msg_type, callback_func): - """ - To bind a message type with a handling function. - - Arguments: - msg_type (str): The defined message type - callback_func: The handling functions to handle the received - message - """ - self.msg_handlers[msg_type] = callback_func - - def _register_default_handlers(self): - self.register_handlers('join_in', self.callback_funcs_for_join_in) - self.register_handlers('join_in_info', self.callback_funcs_for_join_in) - self.register_handlers('model_para', self.callback_funcs_model_para) - self.register_handlers('metrics', self.callback_funcs_for_metrics) - def run(self): """ To start the FL course, listen and handle messages (for distributed From 786011574cca578a7cfccf1ba5cd484082e39f92 Mon Sep 17 00:00:00 2001 From: Weirui Kuang <39145382+rayrayraykk@users.noreply.github.com> Date: Tue, 27 Sep 2022 13:00:34 +0800 Subject: [PATCH 28/39] fix minor bug --- federatedscope/contrib/trainer/torch_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/federatedscope/contrib/trainer/torch_example.py b/federatedscope/contrib/trainer/torch_example.py index 336408a11..4ecaf18b4 100644 --- a/federatedscope/contrib/trainer/torch_example.py +++ b/federatedscope/contrib/trainer/torch_example.py @@ -34,7 +34,6 @@ def train(self): # _hook_on_fit_start_init self.model.to(self.device) self.model.train() - self.optimizer.zero_grad() total_loss = num_samples = 0 # _hook_on_batch_start_init @@ -45,6 +44,7 @@ def train(self): loss = self.criterion(outputs, y) # _hook_on_batch_backward + self.optimizer.zero_grad() loss.backward() self.optimizer.step() From 070b4327100bbeb458ab14630e4ddbb82e82706c Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Sat, 8 Oct 2022 16:33:15 +0800 Subject: [PATCH 29/39] minor changes --- .../core/auxiliaries/dataloader_builder.py | 103 ++++++++++-------- federatedscope/core/data/base_data.py | 9 +- federatedscope/core/data/base_translator.py | 4 + 3 files changed, 67 insertions(+), 49 deletions(-) diff --git a/federatedscope/core/auxiliaries/dataloader_builder.py b/federatedscope/core/auxiliaries/dataloader_builder.py index 2b6f934db..5e9cd5df3 100644 --- a/federatedscope/core/auxiliaries/dataloader_builder.py +++ b/federatedscope/core/auxiliaries/dataloader_builder.py @@ -9,51 +9,64 @@ def get_dataloader(dataset, config, split='train'): - if config.backend == 'torch': - if config.dataloader.type == 'base': - from torch.utils.data import DataLoader - loader_cls = DataLoader - elif config.dataloader.type == 'raw': - loader_cls = None - elif config.dataloader.type == 'pyg': - from torch_geometric.loader import DataLoader as PyGDataLoader - loader_cls = PyGDataLoader - elif config.dataloader.type == 'graphsaint-rw': - if split == 'train': - from torch_geometric.loader import GraphSAINTRandomWalkSampler - loader_cls = GraphSAINTRandomWalkSampler - else: - from torch_geometric.loader import NeighborSampler - loader_cls = NeighborSampler - elif config.dataloader.type == 'neighbor': + """ + Instantiate a DataLoader via config. + + Args: + dataset: dataset from which to load the data. + config: configs containing batch_size, shuffle, etc. + split: current split (default: 'train'), if split is 'test', shuffle + will be `False`. And in PyG, 'test' split will use + `NeighborSampler` by default. + + Returns: + dataloader: Instance of specific DataLoader configured by config. + + """ + # DataLoader builder only support torch backend now. + if config.backend != 'torch': + return None + + if config.dataloader.type == 'base': + from torch.utils.data import DataLoader + loader_cls = DataLoader + elif config.dataloader.type == 'raw': + loader_cls = None + elif config.dataloader.type == 'pyg': + from torch_geometric.loader import DataLoader as PyGDataLoader + loader_cls = PyGDataLoader + elif config.dataloader.type == 'graphsaint-rw': + if split == 'train': + from torch_geometric.loader import GraphSAINTRandomWalkSampler + loader_cls = GraphSAINTRandomWalkSampler + else: from torch_geometric.loader import NeighborSampler loader_cls = NeighborSampler - elif config.dataloader.type == 'mf': - from federatedscope.mf.dataloader import MFDataLoader - loader_cls = MFDataLoader - else: - raise ValueError(f'data.loader.type {config.data.loader.type} ' - f'not found!') - if loader_cls is not None: - raw_args = dict(config.dataloader) - if split != 'train': - raw_args['shuffle'] = False - raw_args['sizes'] = [-1] - # For evaluation in GFL - if config.dataloader.type in ['graphsaint-rw', 'neighbor']: - raw_args['batch_size'] = 4096 - dataset = dataset[0].edge_index - else: - if config.dataloader.type in ['graphsaint-rw']: - # Raw graph - dataset = dataset[0] - elif config.dataloader.type in ['neighbor']: - # edge_index of raw graph - dataset = dataset[0].edge_index - filtered_args = filter_dict(loader_cls.__init__, raw_args) - dataloader = loader_cls(dataset, **filtered_args) - return dataloader - else: - return dataset + elif config.dataloader.type == 'neighbor': + from torch_geometric.loader import NeighborSampler + loader_cls = NeighborSampler + elif config.dataloader.type == 'mf': + from federatedscope.mf.dataloader import MFDataLoader + loader_cls = MFDataLoader else: - return None + raise ValueError(f'data.loader.type {config.data.loader.type} ' + f'not found!') + if loader_cls is not None: + raw_args = dict(config.dataloader) + if split != 'train': + raw_args['shuffle'] = False + raw_args['sizes'] = [-1] + # For evaluation in GFL + if config.dataloader.type in ['graphsaint-rw', 'neighbor']: + raw_args['batch_size'] = 4096 + dataset = dataset[0].edge_index + else: + if config.dataloader.type in ['graphsaint-rw']: + # Raw graph + dataset = dataset[0] + elif config.dataloader.type in ['neighbor']: + # edge_index of raw graph + dataset = dataset[0].edge_index + filtered_args = filter_dict(loader_cls.__init__, raw_args) + dataloader = loader_cls(dataset, **filtered_args) + return dataloader diff --git a/federatedscope/core/data/base_data.py b/federatedscope/core/data/base_data.py index 262173fd9..d71b0e6fb 100644 --- a/federatedscope/core/data/base_data.py +++ b/federatedscope/core/data/base_data.py @@ -9,8 +9,6 @@ class StandaloneDataDict(dict): """ `StandaloneDataDict` maintain several `ClientData`. """ - client_cfgs = None - def __init__(self, datadict, global_cfg): """ @@ -19,6 +17,7 @@ def __init__(self, datadict, global_cfg): global_cfg: global CfgNode """ self.cfg = global_cfg + self.client_cfgs = None datadict = self.preprocess(datadict) super(StandaloneDataDict, self).__init__(datadict) @@ -28,7 +27,7 @@ def resetup(self, global_cfg, client_cfgs=None): Args: global_cfg: enable new config for `ClientData` - client_cfg: enable new client-specific config for `ClientData` + client_cfgs: enable new client-specific config for `ClientData` """ self.cfg, self.client_cfgs = global_cfg, client_cfgs for client_id, client_data in self.items(): @@ -47,7 +46,9 @@ def resetup(self, global_cfg, client_cfgs=None): def preprocess(self, datadict): """ - Preprocess for StandaloneDataDict. + Preprocess for StandaloneDataDict for: + 1. Global evaluation (merge test data). + 2. Global mode (train with centralized setting, merge all data). Args: datadict: dict with `client_id` as key, `ClientData` as value. diff --git a/federatedscope/core/data/base_translator.py b/federatedscope/core/data/base_translator.py index ade4c16d1..2075f7dbe 100644 --- a/federatedscope/core/data/base_translator.py +++ b/federatedscope/core/data/base_translator.py @@ -32,6 +32,10 @@ def __call__(self, dataset): dataset: `torch.utils.data.Dataset`, `List` of (feature, label) or split dataset tuple of (train, val, test) or Tuple of split dataset with [train, val, test] + + Returns: + datadict: instance of `StandaloneDataDict`, which is a subclass of + `dict`. """ datadict = self.split(dataset) datadict = StandaloneDataDict(datadict, self.global_cfg) From e6fcf990c1e3cc934c3149d7c03a2dd601adf4ef Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Sat, 8 Oct 2022 17:14:31 +0800 Subject: [PATCH 30/39] fix bugs --- .../core/auxiliaries/dataloader_builder.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/federatedscope/core/auxiliaries/dataloader_builder.py b/federatedscope/core/auxiliaries/dataloader_builder.py index 5e9cd5df3..2a785155a 100644 --- a/federatedscope/core/auxiliaries/dataloader_builder.py +++ b/federatedscope/core/auxiliaries/dataloader_builder.py @@ -31,7 +31,7 @@ def get_dataloader(dataset, config, split='train'): from torch.utils.data import DataLoader loader_cls = DataLoader elif config.dataloader.type == 'raw': - loader_cls = None + return None elif config.dataloader.type == 'pyg': from torch_geometric.loader import DataLoader as PyGDataLoader loader_cls = PyGDataLoader @@ -51,22 +51,22 @@ def get_dataloader(dataset, config, split='train'): else: raise ValueError(f'data.loader.type {config.data.loader.type} ' f'not found!') - if loader_cls is not None: - raw_args = dict(config.dataloader) - if split != 'train': - raw_args['shuffle'] = False - raw_args['sizes'] = [-1] - # For evaluation in GFL - if config.dataloader.type in ['graphsaint-rw', 'neighbor']: - raw_args['batch_size'] = 4096 - dataset = dataset[0].edge_index - else: - if config.dataloader.type in ['graphsaint-rw']: - # Raw graph - dataset = dataset[0] - elif config.dataloader.type in ['neighbor']: - # edge_index of raw graph - dataset = dataset[0].edge_index - filtered_args = filter_dict(loader_cls.__init__, raw_args) - dataloader = loader_cls(dataset, **filtered_args) - return dataloader + + raw_args = dict(config.dataloader) + if split != 'train': + raw_args['shuffle'] = False + raw_args['sizes'] = [-1] + # For evaluation in GFL + if config.dataloader.type in ['graphsaint-rw', 'neighbor']: + raw_args['batch_size'] = 4096 + dataset = dataset[0].edge_index + else: + if config.dataloader.type in ['graphsaint-rw']: + # Raw graph + dataset = dataset[0] + elif config.dataloader.type in ['neighbor']: + # edge_index of raw graph + dataset = dataset[0].edge_index + filtered_args = filter_dict(loader_cls.__init__, raw_args) + dataloader = loader_cls(dataset, **filtered_args) + return dataloader From f81b17d5a03c6bf1a3bf031a5bd94f0d7d35a1b5 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Sun, 9 Oct 2022 16:32:59 +0800 Subject: [PATCH 31/39] fix minor bug --- federatedscope/core/auxiliaries/dataloader_builder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/federatedscope/core/auxiliaries/dataloader_builder.py b/federatedscope/core/auxiliaries/dataloader_builder.py index 2a785155a..2f8089622 100644 --- a/federatedscope/core/auxiliaries/dataloader_builder.py +++ b/federatedscope/core/auxiliaries/dataloader_builder.py @@ -31,7 +31,8 @@ def get_dataloader(dataset, config, split='train'): from torch.utils.data import DataLoader loader_cls = DataLoader elif config.dataloader.type == 'raw': - return None + # No DataLoader + return dataset elif config.dataloader.type == 'pyg': from torch_geometric.loader import DataLoader as PyGDataLoader loader_cls = PyGDataLoader From ba5df5482dd154f93290f3a8cbbb4c545b424f60 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Sun, 9 Oct 2022 16:53:45 +0800 Subject: [PATCH 32/39] update merge_data according to #385 --- federatedscope/core/data/utils.py | 65 ++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 19 deletions(-) diff --git a/federatedscope/core/data/utils.py b/federatedscope/core/data/utils.py index cc3835765..1b46cac67 100644 --- a/federatedscope/core/data/utils.py +++ b/federatedscope/core/data/utils.py @@ -1,3 +1,4 @@ +import copy import inspect import logging import os @@ -531,32 +532,56 @@ def filter_dict(func, kwarg): return filtered_dict -def merge_data(all_data, merged_max_data_id, specified_dataset_name=None): +def merge_data(all_data, merged_max_data_id=None, specified_dataset_name=None): + """ + Merge data from client 1 to `merged_max_data_id` contained in given + `all_data`. + :param all_data: + :param merged_max_data_id: + :param specified_dataset_name: + :return: + """ + import torch.utils.data from federatedscope.core.data.wrap_dataset import WrapDataset + + # Assert + if merged_max_data_id is None: + merged_max_data_id = len(all_data) - 1 + assert merged_max_data_id >= 1 if specified_dataset_name is None: dataset_names = list(all_data[1].keys()) # e.g., train, test, val else: if not isinstance(specified_dataset_name, list): specified_dataset_name = [specified_dataset_name] dataset_names = specified_dataset_name - - import torch.utils.data assert len(dataset_names) >= 1, \ "At least one sub-dataset is required in client 1" + data_name = "test" if "test" in dataset_names else dataset_names[0] - id_has_key = 1 - while "test" not in all_data[id_has_key]: - id_has_key += 1 - if len(all_data) <= id_has_key: - raise KeyError(f'All data do not key {data_name}.') - if issubclass(type(all_data[id_has_key][data_name]), + id_contain_all_dataset_key = -1 + # check the existence of the data to be merged + for client_id in range(1, merged_max_data_id + 1): + contain_all_dataset_key = True + for dataset_name in dataset_names: + if dataset_name not in all_data[client_id]: + contain_all_dataset_key = False + logger.warning(f'Client {client_id} does not contain ' + f'dataset key {dataset_name}.') + if id_contain_all_dataset_key == -1 and contain_all_dataset_key: + id_contain_all_dataset_key = client_id + assert id_contain_all_dataset_key != -1, \ + "At least one client within [1, merged_max_data_id] should contain " \ + "all the key for expected dataset names." + + if issubclass(type(all_data[id_contain_all_dataset_key][data_name]), torch.utils.data.DataLoader): - if isinstance(all_data[id_has_key][data_name].dataset, WrapDataset): - data_elem_names = list( - all_data[id_has_key][data_name].dataset.dataset.keys()) # + if isinstance(all_data[id_contain_all_dataset_key][data_name].dataset, + WrapDataset): + data_elem_names = list(all_data[id_contain_all_dataset_key] + [data_name].dataset.dataset.keys()) # # e.g., x, y merged_data = {name: defaultdict(list) for name in dataset_names} - for data_id in range(1, merged_max_data_id): + for data_id in range(1, merged_max_data_id + 1): for d_name in dataset_names: if d_name not in all_data[data_id]: continue @@ -568,12 +593,13 @@ def merge_data(all_data, merged_max_data_id, specified_dataset_name=None): for elem_name in data_elem_names: merged_data[d_name][elem_name] = np.concatenate( merged_data[d_name][elem_name]) - for name in all_data[id_has_key]: - all_data[id_has_key][name].dataset.dataset = merged_data[name] + for name in all_data[id_contain_all_dataset_key]: + all_data[id_contain_all_dataset_key][ + name].dataset.dataset = merged_data[name] else: - merged_data = all_data[id_has_key] - for data_id in range(1, merged_max_data_id): - if data_id == id_has_key: + merged_data = copy.deepcopy(all_data[id_contain_all_dataset_key]) + for data_id in range(1, merged_max_data_id + 1): + if data_id == id_contain_all_dataset_key: continue for d_name in dataset_names: if d_name not in all_data[data_id]: @@ -583,7 +609,8 @@ def merge_data(all_data, merged_max_data_id, specified_dataset_name=None): else: raise NotImplementedError( "Un-supported type when merging data across different clients." - f"Your data type is {type(all_data[id_has_key][data_name])}. " + f"Your data type is " + f"{type(all_data[id_contain_all_dataset_key][data_name])}. " f"Currently we only support the following forms: " " 1): {data_id: {train: {x:ndarray, y:ndarray}} }" " 2): {data_id: {train: DataLoader }") From edb4d4b4e3bc7b5257327534c1fdcf66c7eb7460 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Mon, 10 Oct 2022 11:39:19 +0800 Subject: [PATCH 33/39] remove unnecessary clone of cfg --- federatedscope/core/data/base_data.py | 46 ++++++++++----------- federatedscope/core/data/base_translator.py | 2 +- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/federatedscope/core/data/base_data.py b/federatedscope/core/data/base_data.py index d71b0e6fb..c219f5049 100644 --- a/federatedscope/core/data/base_data.py +++ b/federatedscope/core/data/base_data.py @@ -16,7 +16,7 @@ def __init__(self, datadict, global_cfg): datadict: `Dict` with `client_id` as key, `ClientData` as value. global_cfg: global CfgNode """ - self.cfg = global_cfg + self.global_cfg = global_cfg self.client_cfgs = None datadict = self.preprocess(datadict) super(StandaloneDataDict, self).__init__(datadict) @@ -29,7 +29,7 @@ def resetup(self, global_cfg, client_cfgs=None): global_cfg: enable new config for `ClientData` client_cfgs: enable new client-specific config for `ClientData` """ - self.cfg, self.client_cfgs = global_cfg, client_cfgs + self.global_cfg, self.client_cfgs = global_cfg, client_cfgs for client_id, client_data in self.items(): if isinstance(client_data, ClientData): if client_cfgs is not None: @@ -53,17 +53,17 @@ def preprocess(self, datadict): Args: datadict: dict with `client_id` as key, `ClientData` as value. """ - if self.cfg.federate.merge_test_data: + if self.global_cfg.federate.merge_test_data: server_data = merge_data( all_data=datadict, - merged_max_data_id=self.cfg.federate.client_num, + merged_max_data_id=self.global_cfg.federate.client_num, specified_dataset_name=['test']) # `0` indicate Server datadict[0] = server_data - if self.cfg.federate.method == "global": - if self.cfg.federate.client_num != 1: - if self.cfg.data.server_holds_all: + if self.global_cfg.federate.method == "global": + if self.global_cfg.federate.client_num != 1: + if self.global_cfg.data.server_holds_all: assert datadict[0] is not None \ and len(datadict[0]) != 0, \ "You specified cfg.data.server_holds_all=True " \ @@ -72,10 +72,10 @@ def preprocess(self, datadict): datadict[1] = datadict[0] else: logger.info(f"Will merge data from clients whose ids in " - f"[1, {self.cfg.federate.client_num}]") + f"[1, {self.global_cfg.federate.client_num}]") datadict[1] = merge_data( all_data=datadict, - merged_max_data_id=self.cfg.federate.client_num) + merged_max_data_id=self.global_cfg.federate.client_num) datadict = self.attack(datadict) return datadict @@ -84,30 +84,31 @@ def attack(self, datadict): Apply attack to `StandaloneDataDict`. """ - if 'backdoor' in self.cfg.attack.attack_method and 'edge' in \ - self.cfg.attack.trigger_type: + if 'backdoor' in self.global_cfg.attack.attack_method and 'edge' in \ + self.global_cfg.attack.trigger_type: import os import torch from federatedscope.attack.auxiliary import \ create_ardis_poisoned_dataset, create_ardis_test_dataset - if not os.path.exists(self.cfg.attack.edge_path): - os.makedirs(self.cfg.attack.edge_path) + if not os.path.exists(self.global_cfg.attack.edge_path): + os.makedirs(self.global_cfg.attack.edge_path) poisoned_edgeset = create_ardis_poisoned_dataset( - data_path=self.cfg.attack.edge_path) + data_path=self.global_cfg.attack.edge_path) ardis_test_dataset = create_ardis_test_dataset( - self.cfg.attack.edge_path) + self.global_cfg.attack.edge_path) logger.info("Writing poison_data to: {}".format( - self.cfg.attack.edge_path)) + self.global_cfg.attack.edge_path)) with open( - self.cfg.attack.edge_path + + self.global_cfg.attack.edge_path + "poisoned_edgeset_training", "wb") as saved_data_file: torch.save(poisoned_edgeset, saved_data_file) - with open(self.cfg.attack.edge_path + "ardis_test_dataset.pt", - "wb") as ardis_data_file: + with open( + self.global_cfg.attack.edge_path + + "ardis_test_dataset.pt", "wb") as ardis_data_file: torch.save(ardis_test_dataset, ardis_data_file) logger.warning( 'please notice: downloading the poisoned dataset \ @@ -115,9 +116,9 @@ def attack(self, datadict): https://github.com/ksreenivasan/OOD_Federated_Learning' ) - if 'backdoor' in self.cfg.attack.attack_method: + if 'backdoor' in self.global_cfg.attack.attack_method: from federatedscope.attack.auxiliary import poisoning - poisoning(datadict, self.cfg) + poisoning(datadict, self.global_cfg) return datadict @@ -126,8 +127,6 @@ class ClientData(dict): `ClientData` converts dataset to train/val/test DataLoader. Key `data` in `ClientData` is the raw dataset. """ - client_cfg = None - def __init__(self, client_cfg, train=None, val=None, test=None, **kwargs): """ @@ -139,6 +138,7 @@ def __init__(self, client_cfg, train=None, val=None, test=None, **kwargs): val: valid dataset, which will be converted to DataLoader test: test dataset, which will be converted to DataLoader """ + self.client_cfg = None self.train = train self.val = val self.test = test diff --git a/federatedscope/core/data/base_translator.py b/federatedscope/core/data/base_translator.py index 2075f7dbe..9d8b7e995 100644 --- a/federatedscope/core/data/base_translator.py +++ b/federatedscope/core/data/base_translator.py @@ -21,7 +21,7 @@ def __init__(self, global_cfg, client_cfgs=None): global_cfg: global CfgNode client_cfgs: client cfg `Dict` """ - self.global_cfg = global_cfg.clone() + self.global_cfg = global_cfg self.client_cfgs = client_cfgs self.splitter = get_splitter(global_cfg) From b99cc31985dbe7de43be8ecec4641e10a96588bb Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Mon, 10 Oct 2022 12:20:59 +0800 Subject: [PATCH 34/39] minor changes --- federatedscope/contrib/data/example.py | 2 +- .../contrib/trainer/torch_example.py | 7 +++++-- federatedscope/core/auxiliaries/utils.py | 2 -- federatedscope/core/data/base_translator.py | 18 ++++++++++-------- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/federatedscope/contrib/data/example.py b/federatedscope/contrib/data/example.py index da3e9c1cd..b896bdf7f 100644 --- a/federatedscope/contrib/data/example.py +++ b/federatedscope/contrib/data/example.py @@ -1,7 +1,7 @@ from federatedscope.register import register_data -def MyData(config, client_cfgs): +def MyData(config, client_cfgs=None): r""" Returns: data: diff --git a/federatedscope/contrib/trainer/torch_example.py b/federatedscope/contrib/trainer/torch_example.py index 4ecaf18b4..18cd5d7a0 100644 --- a/federatedscope/contrib/trainer/torch_example.py +++ b/federatedscope/contrib/trainer/torch_example.py @@ -54,7 +54,8 @@ def train(self): # _hook_on_fit_end return num_samples, self.model.cpu().state_dict(), \ - {'loss_total': total_loss} + {'loss_total': total_loss, 'avg_loss': total_loss/float( + num_samples)} def evaluate(self, target_data_split_name='test'): import torch @@ -76,7 +77,9 @@ def evaluate(self, target_data_split_name='test'): # _hook_on_fit_end return { f'{target_data_split_name}_loss': total_loss, - f'{target_data_split_name}_total': num_samples + f'{target_data_split_name}_total': num_samples, + f'{target_data_split_name}_avg_loss': total_loss / + float(num_samples) } def update(self, model_parameters, strict=False): diff --git a/federatedscope/core/auxiliaries/utils.py b/federatedscope/core/auxiliaries/utils.py index f2f488b66..190a96102 100644 --- a/federatedscope/core/auxiliaries/utils.py +++ b/federatedscope/core/auxiliaries/utils.py @@ -13,8 +13,6 @@ import numpy as np # Blind torch -import torch.utils - try: import torch import torchvision diff --git a/federatedscope/core/data/base_translator.py b/federatedscope/core/data/base_translator.py index 9d8b7e995..18fe34e2a 100644 --- a/federatedscope/core/data/base_translator.py +++ b/federatedscope/core/data/base_translator.py @@ -96,14 +96,16 @@ def split_to_client(self, train, val, test): train_label_distribution = None # Split train/val/test to client - if len(train) > 0: - split_train = self.splitter(train) - try: - train_label_distribution = [[j[1] for j in x] - for x in split_train] - except: - logger.warning('Cannot access train label distribution for ' - 'splitter.') + if self.global_cfg.data.consistent_label_distribution: + if len(train) > 0: + split_train = self.splitter(train) + try: + train_label_distribution = [[j[1] for j in x] + for x in split_train] + except: + logger.warning( + 'Cannot access train label distribution for ' + 'splitter.') if len(val) > 0: split_val = self.splitter(val, prior=train_label_distribution) if len(test) > 0: From 551fe3a1b5b132252fa623d2cc8c67d56bf7f6b3 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Mon, 10 Oct 2022 14:18:58 +0800 Subject: [PATCH 35/39] fix minor bug --- federatedscope/core/data/base_translator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/federatedscope/core/data/base_translator.py b/federatedscope/core/data/base_translator.py index 18fe34e2a..4fc82a0d3 100644 --- a/federatedscope/core/data/base_translator.py +++ b/federatedscope/core/data/base_translator.py @@ -96,9 +96,9 @@ def split_to_client(self, train, val, test): train_label_distribution = None # Split train/val/test to client - if self.global_cfg.data.consistent_label_distribution: - if len(train) > 0: - split_train = self.splitter(train) + if len(train) > 0: + split_train = self.splitter(train) + if self.global_cfg.data.consistent_label_distribution: try: train_label_distribution = [[j[1] for j in x] for x in split_train] From 05c6514b6ce377f9342467b743858b1f5fcaeb72 Mon Sep 17 00:00:00 2001 From: Weirui Kuang <39145382+rayrayraykk@users.noreply.github.com> Date: Thu, 13 Oct 2022 18:16:43 +0800 Subject: [PATCH 36/39] Update dataloader_builder.py --- federatedscope/core/auxiliaries/dataloader_builder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/federatedscope/core/auxiliaries/dataloader_builder.py b/federatedscope/core/auxiliaries/dataloader_builder.py index 2f8089622..a412d76d2 100644 --- a/federatedscope/core/auxiliaries/dataloader_builder.py +++ b/federatedscope/core/auxiliaries/dataloader_builder.py @@ -57,6 +57,7 @@ def get_dataloader(dataset, config, split='train'): if split != 'train': raw_args['shuffle'] = False raw_args['sizes'] = [-1] + raw_args['drop_last'] = False # For evaluation in GFL if config.dataloader.type in ['graphsaint-rw', 'neighbor']: raw_args['batch_size'] = 4096 From 88754224432557526bfedc9d2ff57be373f78605 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Mon, 17 Oct 2022 12:13:50 +0800 Subject: [PATCH 37/39] update version --- federatedscope/__init__.py | 2 +- federatedscope/gfl/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/federatedscope/__init__.py b/federatedscope/__init__.py index d6e7a36f4..f15846754 100644 --- a/federatedscope/__init__.py +++ b/federatedscope/__init__.py @@ -1,6 +1,6 @@ from __future__ import absolute_import, division, print_function -__version__ = '0.2.0' +__version__ = '0.2.1' def _setup_logger(): diff --git a/federatedscope/gfl/__init__.py b/federatedscope/gfl/__init__.py index 7fd229a32..fc79d63d5 100644 --- a/federatedscope/gfl/__init__.py +++ b/federatedscope/gfl/__init__.py @@ -1 +1 @@ -__version__ = '0.2.0' +__version__ = '0.2.1' diff --git a/setup.py b/setup.py index 4fb9a7407..8a402f766 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ import setuptools __name__ = 'federatedscope' -__version__ = '0.2.0' +__version__ = '0.2.1' URL = 'https://github.com/alibaba/FederatedScope' minimal_requires = [ From e25a4eb9d17df0ce5ce111373b4211bb03800f3f Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Mon, 17 Oct 2022 14:02:45 +0800 Subject: [PATCH 38/39] retriger UT --- federatedscope/gfl/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/federatedscope/gfl/__init__.py b/federatedscope/gfl/__init__.py index fc79d63d5..e69de29bb 100644 --- a/federatedscope/gfl/__init__.py +++ b/federatedscope/gfl/__init__.py @@ -1 +0,0 @@ -__version__ = '0.2.1' From 47c89b1b1d16f06eb8e9c94a4486fa54dea50955 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Wed, 19 Oct 2022 16:58:08 +0800 Subject: [PATCH 39/39] fix format --- federatedscope/core/monitors/monitor.py | 38 ++++++++++++++++--------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/federatedscope/core/monitors/monitor.py b/federatedscope/core/monitors/monitor.py index e5f33a323..0f4832164 100644 --- a/federatedscope/core/monitors/monitor.py +++ b/federatedscope/core/monitors/monitor.py @@ -316,17 +316,21 @@ def format_eval_res(self, 3356, 'test_loss': 2892, 'test_total': 5000 }, 'Results_fairness': { - 'test_total': 33.99, 'test_correct': 27.185, 'test_avg_loss_std': 0.433551, - 'test_avg_loss_bottom_decile': 0.356503, 'test_avg_loss_top_decile': 1.212492, + 'test_total': 33.99, 'test_correct': 27.185, + 'test_avg_loss_std': 0.433551, + 'test_avg_loss_bottom_decile': 0.356503, + 'test_avg_loss_top_decile': 1.212492, 'test_avg_loss_min': 0.198317, 'test_avg_loss_max': 3.603567, - 'test_avg_loss_bottom10%': 0.276681, 'test_avg_loss_top10%': 1.686649, + 'test_avg_loss_bottom10%': 0.276681, 'test_avg_loss_top10%': + 1.686649, 'test_avg_loss_cos1': 0.867932, 'test_avg_loss_entropy': 5.164172, 'test_loss_std': 13.686828, 'test_loss_bottom_decile': 11.822035, 'test_loss_top_decile': 39.727236, 'test_loss_min': 7.337724, 'test_loss_max': 100.899873, 'test_loss_bottom10%': 9.618685, 'test_loss_top10%': 54.96769, 'test_loss_cos1': 0.880356, 'test_loss_entropy': 5.175803, 'test_acc_std': 0.123823, - 'test_acc_bottom_decile': 0.676471, 'test_acc_top_decile': 0.916667, + 'test_acc_bottom_decile': 0.676471, 'test_acc_top_decile': + 0.916667, 'test_acc_min': 0.071429, 'test_acc_max': 0.972973, 'test_acc_bottom10%': 0.527482, 'test_acc_top10%': 0.94486, 'test_acc_cos1': 0.988134, 'test_acc_entropy': 5.283755 @@ -387,16 +391,22 @@ def format_eval_res(self, # min and max new_results[f"{key}_min"] = all_res[0] new_results[f"{key}_max"] = all_res[-1] - #bottom and top 10% - new_results[f"{key}_bottom10%"]=np.mean(all_res[: - all_res.size // 10]) - new_results[f"{key}_top10%"]=np.mean(all_res[ - all_res.size * 9 // 10:]) - #cosine similarity between the performance distribution and 1 - new_results[f"{key}_cos1"]=np.mean(all_res)/(np.sqrt(np.mean(all_res**2))) - #entropy of performance distribution - all_res_preprocessed=all_res+1e-9 - new_results[f"{key}_entropy"]=np.sum(-all_res_preprocessed/np.sum(all_res_preprocessed)*(np.log((all_res_preprocessed)/np.sum(all_res_preprocessed)))) + # bottom and top 10% + new_results[f"{key}_bottom10%"] = np.mean( + all_res[:all_res.size // 10]) + new_results[f"{key}_top10%"] = np.mean( + all_res[all_res.size * 9 // 10:]) + # cosine similarity between the performance + # distribution and 1 + new_results[f"{key}_cos1"] = np.mean(all_res) / ( + np.sqrt(np.mean(all_res**2))) + # entropy of performance distribution + all_res_preprocessed = all_res + 1e-9 + new_results[f"{key}_entropy"] = np.sum( + -all_res_preprocessed / + np.sum(all_res_preprocessed) * (np.log( + (all_res_preprocessed) / + np.sum(all_res_preprocessed)))) round_formatted_results[f'Results_{form}'] = new_results with open(os.path.join(self.outdir, "eval_results.raw"),