From 57565efebbe083784907cf13025cc41b7240d4e7 Mon Sep 17 00:00:00 2001 From: "yuexiang.xyx" Date: Tue, 28 Mar 2023 21:30:51 +0800 Subject: [PATCH 1/3] debug: load data from files --- .github/workflows/test_distribute.yml | 16 ++++-- .../contrib/data/load_from_files.py | 54 +++++++++++++++++++ .../core/auxiliaries/data_builder.py | 15 +++--- .../core/auxiliaries/model_builder.py | 7 ++- federatedscope/core/configs/cfg_data.py | 3 ++ .../vertical_fl/dataloader/utils.py | 8 +-- .../distributed_client_1.yaml | 3 +- .../distributed_client_2.yaml | 5 +- .../distributed_client_3.yaml | 5 +- .../distributed_server.yaml | 3 +- scripts/distributed_scripts/gen_data.py | 3 ++ 11 files changed, 101 insertions(+), 21 deletions(-) create mode 100644 federatedscope/contrib/data/load_from_files.py diff --git a/.github/workflows/test_distribute.yml b/.github/workflows/test_distribute.yml index 67905345f..37111f0b5 100644 --- a/.github/workflows/test_distribute.yml +++ b/.github/workflows/test_distribute.yml @@ -32,14 +32,24 @@ jobs: - name: Install FS run: | pip install -e .[test] - - name: Test Distributed (LR on toy) + - name: Test Distributed (LR on toy with a unified files) run: | python scripts/distributed_scripts/gen_data.py - python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_server.yaml & + python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_server_no_data.yaml & sleep 2 python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml & sleep 2 python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml & sleep 2 python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_3.yaml - [ $? -eq 1 ] && exit 1 || echo "Passed" \ No newline at end of file + [ $? -eq 1 ] && exit 1 || echo "Passed" + - name: Test Distributed (LR on toy with multiple files) + run: | + python scripts/distributed_scripts/gen_data.py + python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_server.yaml data.file_path 'toy_data/server_data' distribute.data_idx -1 & + sleep 2 + python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml data.file_path 'toy_data/client_1_data' distribute.data_idx -1 & + sleep 2 + python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml data.file_path 'toy_data/client_2_data' distribute.data_idx -1 & + sleep 2 + python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_3.yaml data.file_path 'toy_data/client_3_data' distribute.data_idx -1 diff --git a/federatedscope/contrib/data/load_from_files.py b/federatedscope/contrib/data/load_from_files.py new file mode 100644 index 000000000..304ad8a45 --- /dev/null +++ b/federatedscope/contrib/data/load_from_files.py @@ -0,0 +1,54 @@ +import os +import pickle + +from federatedscope.register import register_data +from federatedscope.core.data.utils import convert_data_mode +from federatedscope.core.auxiliaries.utils import setup_seed + + +def load_data_from_file(config, client_cfgs=None): + from federatedscope.core.data import DummyDataTranslator + + file_path = config.data.file_path + + if not os.path.exists(file_path): + raise ValueError(f'The file {file_path} does not exist.') + + with open(file_path, 'br') as file: + data = pickle.load(file) + # The shape of data is expected to be: + # (1) the data consist of all participants' data: + # { + # 'client_id': { + # 'train/val/test': { + # 'x/y': np.ndarray + # } + # } + # } + # (2) isolated data + # { + # 'train/val/test': { + # 'x/y': np.ndarray + # } + # } + + # translator = DummyDataTranslator(config, client_cfgs) + # data = translator(data) + + # Convert `StandaloneDataDict` to `ClientData` when in distribute mode + data = convert_data_mode(data, config) + + # Restore the user-specified seed after the data generation + setup_seed(config.seed) + + return data, config + + +def call_file_data(config, client_cfgs): + if config.data.type == "file": + # All the data (clients and servers) are loaded from one unified files + data, modified_config = load_data_from_file(config, client_cfgs) + return data, modified_config + + +register_data("file", call_file_data) diff --git a/federatedscope/core/auxiliaries/data_builder.py b/federatedscope/core/auxiliaries/data_builder.py index 964096b9f..cddabef02 100644 --- a/federatedscope/core/auxiliaries/data_builder.py +++ b/federatedscope/core/auxiliaries/data_builder.py @@ -126,13 +126,16 @@ def get_data(config, client_cfgs=None): # Apply translator to non-FL dataset to transform it into its federated # counterpart - translator = getattr(import_module('federatedscope.core.data'), - DATA_TRANS_MAP[config.data.type.lower()])( - modified_config, client_cfgs) - data = translator(dataset) + if dataset is not None: + translator = getattr(import_module('federatedscope.core.data'), + DATA_TRANS_MAP[config.data.type.lower()])( + modified_config, client_cfgs) + data = translator(dataset) - # Convert `StandaloneDataDict` to `ClientData` when in distribute mode - data = convert_data_mode(data, modified_config) + # Convert `StandaloneDataDict` to `ClientData` when in distribute mode + data = convert_data_mode(data, modified_config) + else: + data = None # Restore the user-specified seed after the data generation setup_seed(config.seed) diff --git a/federatedscope/core/auxiliaries/model_builder.py b/federatedscope/core/auxiliaries/model_builder.py index 085017c7a..7700a72ed 100644 --- a/federatedscope/core/auxiliaries/model_builder.py +++ b/federatedscope/core/auxiliaries/model_builder.py @@ -1,4 +1,5 @@ import logging +import numpy as np import federatedscope.register as register logger = logging.getLogger(__name__) @@ -63,7 +64,7 @@ def get_shape_from_data(data, model_config, backend='torch'): if isinstance(data_representative, dict): if 'x' in data_representative: - shape = data_representative['x'].shape + shape = np.asarray(data_representative['x']).shape if len(shape) == 1: # (batch, ) = (batch, 1) return 1 else: @@ -121,7 +122,9 @@ def get_model(model_config, local_data=None, backend='torch'): ``mf.model.model_builder.get_mfnet()`` =================================== ============================== """ - if local_data is not None: + if model_config.type.lower() in ['xgb_tree', 'gbdt_tree', 'random_forest']: + input_shape = None + elif local_data is not None: input_shape = get_shape_from_data(local_data, model_config, backend) else: input_shape = model_config.input_shape diff --git a/federatedscope/core/configs/cfg_data.py b/federatedscope/core/configs/cfg_data.py index fe648e1aa..f825e937b 100644 --- a/federatedscope/core/configs/cfg_data.py +++ b/federatedscope/core/configs/cfg_data.py @@ -42,6 +42,9 @@ def extend_data_cfg(cfg): cfg.data.test_target_transform = [] cfg.data.test_pre_transform = [] + # data.file_path takes effect when data.type = 'files' + cfg.data.file_path = '' + # DataLoader related args cfg.dataloader = CN() cfg.dataloader.type = 'base' diff --git a/federatedscope/vertical_fl/dataloader/utils.py b/federatedscope/vertical_fl/dataloader/utils.py index 7717a4067..7942a5e39 100644 --- a/federatedscope/vertical_fl/dataloader/utils.py +++ b/federatedscope/vertical_fl/dataloader/utils.py @@ -15,8 +15,8 @@ def batch_iter(data, batch_size, shuffled=True): """ assert 'x' in data and 'y' in data - data_x = data['x'] - data_y = data['y'] + data_x = np.asarray(data['x']) + data_y = np.asarray(data['y']) data_size = len(data_y) num_batches_per_epoch = math.ceil(data_size / batch_size) @@ -44,8 +44,8 @@ def __init__(self, use_full_trainset=True, feature_frac=1.0): assert 'x' in data - self.data_x = data['x'] - self.data_y = data['y'] if 'y' in data else None + self.data_x = np.asarray(data['x']) + self.data_y = np.asarray(data['y']) if 'y' in data else None self.data_size = self.data_x.shape[0] self.feature_size = self.data_x.shape[1] self.replace = replace diff --git a/scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml b/scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml index e98e4236e..e490b90f0 100644 --- a/scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml +++ b/scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml @@ -18,6 +18,7 @@ trainer: eval: freq: 10 data: - type: 'toy' + type: 'file' + file_path: 'toy_data/all_data' model: type: 'lr' \ No newline at end of file diff --git a/scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml b/scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml index 0acd42a41..bb9b15383 100644 --- a/scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml +++ b/scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml @@ -12,12 +12,13 @@ distribute: client_host: '127.0.0.1' client_port: 50053 role: 'client' - data_idx: 1 + data_idx: 2 trainer: type: 'general' eval: freq: 10 data: - type: 'toy' + type: 'file' + file_path: 'toy_data/all_data' model: type: 'lr' \ No newline at end of file diff --git a/scripts/distributed_scripts/distributed_configs/distributed_client_3.yaml b/scripts/distributed_scripts/distributed_configs/distributed_client_3.yaml index 66e793493..9e695136c 100644 --- a/scripts/distributed_scripts/distributed_configs/distributed_client_3.yaml +++ b/scripts/distributed_scripts/distributed_configs/distributed_client_3.yaml @@ -12,12 +12,13 @@ distribute: client_host: '127.0.0.1' client_port: 50054 role: 'client' - data_idx: 1 + data_idx: 3 trainer: type: 'general' eval: freq: 10 data: - type: 'toy' + type: 'file' + file_path: 'toy_data/all_data' model: type: 'lr' \ No newline at end of file diff --git a/scripts/distributed_scripts/distributed_configs/distributed_server.yaml b/scripts/distributed_scripts/distributed_configs/distributed_server.yaml index 6ea218f31..32294cfa7 100644 --- a/scripts/distributed_scripts/distributed_configs/distributed_server.yaml +++ b/scripts/distributed_scripts/distributed_configs/distributed_server.yaml @@ -16,6 +16,7 @@ trainer: eval: freq: 10 data: - type: 'toy' + type: 'file' + file_path: 'toy_data/all_data' model: type: 'lr' \ No newline at end of file diff --git a/scripts/distributed_scripts/gen_data.py b/scripts/distributed_scripts/gen_data.py index 2784a8121..f92f0950c 100644 --- a/scripts/distributed_scripts/gen_data.py +++ b/scripts/distributed_scripts/gen_data.py @@ -100,6 +100,9 @@ def generate_data(client_num=3, } if data[client_idx]['test'] is not None else None pickle.dump(save_client_data, f) + with open('toy_data/all_data', 'wb') as f: + pickle.dump(data, f) + return data From bd00c2bf85a131146b669c6cced1533035192777 Mon Sep 17 00:00:00 2001 From: "yuexiang.xyx" Date: Tue, 28 Mar 2023 21:46:09 +0800 Subject: [PATCH 2/3] update docs --- .github/workflows/test_distribute.yml | 1 - README.md | 18 +++++++++--------- federatedscope/core/configs/README.md | 2 +- .../distributed_tf_client_3.yaml | 4 ++-- .../cross_backends/distributed_tf_server.yaml | 4 ++-- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test_distribute.yml b/.github/workflows/test_distribute.yml index 37111f0b5..f3f0502f1 100644 --- a/.github/workflows/test_distribute.yml +++ b/.github/workflows/test_distribute.yml @@ -45,7 +45,6 @@ jobs: [ $? -eq 1 ] && exit 1 || echo "Passed" - name: Test Distributed (LR on toy with multiple files) run: | - python scripts/distributed_scripts/gen_data.py python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_server.yaml data.file_path 'toy_data/server_data' distribute.data_idx -1 & sleep 2 python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml data.file_path 'toy_data/client_1_data' distribute.data_idx -1 & diff --git a/README.md b/README.md index 37c0ae435..2d0723bb0 100644 --- a/README.md +++ b/README.md @@ -202,7 +202,7 @@ The distributed mode in FederatedScope denotes running multiple procedures to bu To run with distributed mode, you only need to: -- Prepare isolated data file and set up `cfg.distribute.data_file = PATH/TO/DATA` for each participant; +- Prepare isolated data file and set up `cfg.data.file_path = PATH/TO/DATA` for each participant; - Change `cfg.federate.model = 'distributed'`, and specify the role of each participant by `cfg.distributed.role = 'server'/'client'`. - Set up a valid address by `cfg.distribute.server_host/client_host = x.x.x.x` and `cfg.distribute.server_port/client_port = xxxx`. (Note that for a server, you need to set up `server_host` and `server_port` for listening messages, while for a client, you need to set up `client_host` and `client_port` for listening as well as `server_host` and `server_port` for joining in an FL course) @@ -210,12 +210,12 @@ We prepare a synthetic example for running with distributed mode: ```bash # For server -python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_server.yaml distribute.data_file 'PATH/TO/DATA' distribute.server_host x.x.x.x distribute.server_port xxxx +python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_server.yaml data.file_path 'PATH/TO/DATA' distribute.server_host x.x.x.x distribute.server_port xxxx # For clients -python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml distribute.data_file 'PATH/TO/DATA' distribute.server_host x.x.x.x distribute.server_port xxxx distribute.client_host x.x.x.x distribute.client_port xxxx -python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml distribute.data_file 'PATH/TO/DATA' distribute.server_host x.x.x.x distribute.server_port xxxx distribute.client_host x.x.x.x distribute.client_port xxxx -python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_3.yaml distribute.data_file 'PATH/TO/DATA' distribute.server_host x.x.x.x distribute.server_port xxxx distribute.client_host x.x.x.x distribute.client_port xxxx +python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml data.file_path 'PATH/TO/DATA' distribute.server_host x.x.x.x distribute.server_port xxxx distribute.client_host x.x.x.x distribute.client_port xxxx +python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml data.file_path 'PATH/TO/DATA' distribute.server_host x.x.x.x distribute.server_port xxxx distribute.client_host x.x.x.x distribute.client_port xxxx +python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_3.yaml data.file_path 'PATH/TO/DATA' distribute.server_host x.x.x.x distribute.server_port xxxx distribute.client_host x.x.x.x distribute.client_port xxxx ``` An executable example with generated toy data can be run with (a script can be found in `scripts/run_distributed_lr.sh`): @@ -224,14 +224,14 @@ An executable example with generated toy data can be run with (a script can be f python scripts/distributed_scripts/gen_data.py # Firstly start the server that is waiting for clients to join in -python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_server.yaml distribute.data_file toy_data/server_data distribute.server_host 127.0.0.1 distribute.server_port 50051 +python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_server.yaml data.file_path toy_data/server_data distribute.server_host 127.0.0.1 distribute.server_port 50051 # Start the client #1 (with another process) -python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml distribute.data_file toy_data/client_1_data distribute.server_host 127.0.0.1 distribute.server_port 50051 distribute.client_host 127.0.0.1 distribute.client_port 50052 +python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml data.file_path toy_data/client_1_data distribute.server_host 127.0.0.1 distribute.server_port 50051 distribute.client_host 127.0.0.1 distribute.client_port 50052 # Start the client #2 (with another process) -python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml distribute.data_file toy_data/client_2_data distribute.server_host 127.0.0.1 distribute.server_port 50051 distribute.client_host 127.0.0.1 distribute.client_port 50053 +python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml data.file_path toy_data/client_2_data distribute.server_host 127.0.0.1 distribute.server_port 50051 distribute.client_host 127.0.0.1 distribute.client_port 50053 # Start the client #3 (with another process) -python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_3.yaml distribute.data_file toy_data/client_3_data distribute.server_host 127.0.0.1 distribute.server_port 50051 distribute.client_host 127.0.0.1 distribute.client_port 50054 +python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_client_3.yaml data.file_path toy_data/client_3_data distribute.server_host 127.0.0.1 distribute.server_port 50051 distribute.client_host 127.0.0.1 distribute.client_port 50054 ``` And you can observe the results as (the IP addresses are anonymized with 'x.x.x.x'): diff --git a/federatedscope/core/configs/README.md b/federatedscope/core/configs/README.md index 307ff3308..1f79f7b5b 100644 --- a/federatedscope/core/configs/README.md +++ b/federatedscope/core/configs/README.md @@ -36,6 +36,7 @@ The configurations related to the data/dataset are defined in `cfg_data.py`. |:--------------------------------------------:|:-----:|:---------- |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `data.root` | (string) 'data' | The folder where the data file located. `data.root` would be used together with `data.type` to load the dataset. | - | | `data.type` | (string) 'toy' | Dataset name | CV: 'femnist', 'celeba' ; NLP: 'shakespeare', 'subreddit', 'twitter'; Graph: 'cora', 'citeseer', 'pubmed', 'dblp_conf', 'dblp_org', 'csbm', 'epinions', 'ciao', 'fb15k-237', 'wn18', 'fb15k' , 'MUTAG', 'BZR', 'COX2', 'DHFR', 'PTC_MR', 'AIDS', 'NCI1', 'ENZYMES', 'DD', 'PROTEINS', 'COLLAB', 'IMDB-BINARY', 'IMDB-MULTI', 'REDDIT-BINARY', 'IMDB-BINARY', 'IMDB-MULTI', 'HIV', 'ESOL', 'FREESOLV', 'LIPO', 'PCBA', 'MUV', 'BACE', 'BBBP', 'TOX21', 'TOXCAST', 'SIDER', 'CLINTOX', 'graph_multi_domain_mol', 'graph_multi_domain_small', 'graph_multi_domain_mix', 'graph_multi_domain_biochem'; MF: 'vflmovielens1m', 'vflmovielens10m', 'hflmovielens1m', 'hflmovielens10m', 'vflnetflix', 'hflnetflix'; Tabular: 'toy', 'synthetic'; External dataset: 'DNAME@torchvision', 'DNAME@torchtext', 'DNAME@huggingface_datasets', 'DNAME@openml'. | +| `data.file_path` | (string) '' | The path to the data file, only makes effect when data.type = 'file' | - | | `data.args` | (list) [] | Args for the external dataset | Used for external dataset, eg. `[{'download': False}]` | | `data.save_data` | (bool) False | Whether to save the generated toy data | - | | `data.splitter` | (string) '' | Splitter name for standalone dataset | Generic splitter: 'lda'; Graph splitter: 'louvain', 'random', 'rel_type', 'graph_type', 'scaffold', 'scaffold_lda', 'rand_chunk' | @@ -238,7 +239,6 @@ The configurations related to FL settings are defined in `cfg_fl_setting.py`. | `distribute.client_host` | (string) '0.0.0.0' | The host of client's ip address for communication | - | | `distribute.client_port` | (string) 50050 | The port of client's ip address for communication | - | | `distribute.role` | (string) 'client'
Choices: {'server', 'client'} | The role of the worker | - | -| `distribute.data_file` | (string) 'data' | The path to the data dile | - | | `distribute.data_idx` | (int) -1 | It is used to specify the data index in distributed mode when adopting a centralized dataset for simulation (formatted as {data_idx: data/dataloader}). | `data_idx=-1` means that the entire dataset is owned by the participant. And we randomly sample the index in simulation for other invalid values excepted for -1. | `distribute.`
`grpc_max_send_message_length` | (int) 100 * 1024 * 1024 | The maximum length of sent messages | - | | `distribute.`
`grpc_max_receive_message_length` | (int) 100 * 1024 * 1024 | The maximum length of received messages | - | diff --git a/federatedscope/cross_backends/distributed_tf_client_3.yaml b/federatedscope/cross_backends/distributed_tf_client_3.yaml index 61792c2f5..42d16780c 100644 --- a/federatedscope/cross_backends/distributed_tf_client_3.yaml +++ b/federatedscope/cross_backends/distributed_tf_client_3.yaml @@ -13,12 +13,12 @@ distribute: client_host: '127.0.0.1' client_port: 50054 role: 'client' - data_file: 'toy_data/client_3_data' trainer: type: 'general' eval: freq: 10 data: - type: 'toy' + type: 'file' + file_path: 'toy_data/client_3_data' model: type: 'lr' \ No newline at end of file diff --git a/federatedscope/cross_backends/distributed_tf_server.yaml b/federatedscope/cross_backends/distributed_tf_server.yaml index cd1b23c43..24120381f 100644 --- a/federatedscope/cross_backends/distributed_tf_server.yaml +++ b/federatedscope/cross_backends/distributed_tf_server.yaml @@ -11,12 +11,12 @@ distribute: server_host: '127.0.0.1' server_port: 50051 role: 'server' - data_file: 'toy_data/server_data' trainer: type: 'general' eval: freq: 10 data: - type: 'toy' + type: 'file' + file_path: 'toy_data/server_data' model: type: 'lr' \ No newline at end of file From 63bef701cec3af8e6bbe6a612d8955109b1a985d Mon Sep 17 00:00:00 2001 From: "yuexiang.xyx" Date: Tue, 28 Mar 2023 21:46:35 +0800 Subject: [PATCH 3/3] update docs --- scripts/distributed_scripts/run_distributed_lr.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/distributed_scripts/run_distributed_lr.sh b/scripts/distributed_scripts/run_distributed_lr.sh index aa1840e25..483b7d0e3 100755 --- a/scripts/distributed_scripts/run_distributed_lr.sh +++ b/scripts/distributed_scripts/run_distributed_lr.sh @@ -4,6 +4,8 @@ cd .. echo "Test distributed mode with LR..." +python scripts/distributed_scripts/gen_data.py + ### server owns global test data python federatedscope/main.py --cfg scripts/distributed_scripts/distributed_configs/distributed_server.yaml & ### server doesn't own data