Skip to content

Commit

Permalink
bugfix synthetic data url split data
Browse files Browse the repository at this point in the history
  • Loading branch information
RyanTrojans committed Aug 28, 2022
1 parent f9d9015 commit 48986e1
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 139 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,59 +3,60 @@
import fedml
from fedml import FedMLRunner
from fedml.data.MNIST.data_loader import download_mnist, load_partition_data_mnist
from .trainer.classification_aggregator import ClassificationAggregator
from .trainer.classification_trainer import ClassificationTrainer
from trainer.classification_aggregator import ClassificationAggregator
from trainer.classification_trainer import ClassificationTrainer
from fedml.data.data_loader import load


def load_data(args):
download_mnist(args.data_cache_dir)
fedml.logging.info("load_data. dataset_name = %s" % args.dataset)

"""
Please read through the data loader at to see how to customize the dataset for FedML framework.
"""
(
client_num,
train_data_num,
test_data_num,
train_data_global,
test_data_global,
train_data_local_num_dict,
train_data_local_dict,
test_data_local_dict,
class_num,
) = load_partition_data_mnist(
args,
args.batch_size,
train_path=args.data_cache_dir + "/MNIST/train",
test_path=args.data_cache_dir + "/MNIST/test",
)
"""
For shallow NN or linear models,
we uniformly sample a fraction of clients each round (as the original FedAvg paper)
"""
args.client_num_in_total = client_num
dataset = [
train_data_num,
test_data_num,
train_data_global,
test_data_global,
train_data_local_num_dict,
train_data_local_dict,
test_data_local_dict,
class_num,
]
return dataset, class_num


class LogisticRegression(torch.nn.Module):
def __init__(self, input_dim, output_dim):
super(LogisticRegression, self).__init__()
self.linear = torch.nn.Linear(input_dim, output_dim)

def forward(self, x):
outputs = torch.sigmoid(self.linear(x))
return outputs
# def load_data(args):
# download_mnist(args.data_cache_dir)
# fedml.logging.info("load_data. dataset_name = %s" % args.dataset)
#
# """
# Please read through the data loader at to see how to customize the dataset for FedML framework.
# """
# (
# client_num,
# train_data_num,
# test_data_num,
# train_data_global,
# test_data_global,
# train_data_local_num_dict,
# train_data_local_dict,
# test_data_local_dict,
# class_num,
# ) = load_partition_data_mnist(
# args,
# args.batch_size,
# train_path=args.data_cache_dir + "/MNIST/train",
# test_path=args.data_cache_dir + "/MNIST/test",
# )
# """
# For shallow NN or linear models,
# we uniformly sample a fraction of clients each round (as the original FedAvg paper)
# """
# args.client_num_in_total = client_num
# dataset = [
# train_data_num,
# test_data_num,
# train_data_global,
# test_data_global,
# train_data_local_num_dict,
# train_data_local_dict,
# test_data_local_dict,
# class_num,
# ]
# return dataset, class_num
#
#
# class LogisticRegression(torch.nn.Module):
# def __init__(self, input_dim, output_dim):
# super(LogisticRegression, self).__init__()
# self.linear = torch.nn.Linear(input_dim, output_dim)
#
# def forward(self, x):
# outputs = torch.sigmoid(self.linear(x))
# return outputs


if __name__ == "__main__":
Expand All @@ -66,13 +67,13 @@ def forward(self, x):
device = fedml.device.get_device(args)

# load data
dataset, class_num = load_data(args)
dataset, class_num = load(args)

# create model and trainer
model = fedml.model.create(args, output_dim=class_num)
trainer = ClassificationTrainer(model=model, args=args)
aggregator = ClassificationAggregator(model=model, args=args)
# trainer = ClassificationTrainer(model=model, args=args)
# aggregator = ClassificationAggregator(model=model, args=args)

# start training
fedml_runner = FedMLRunner(args, device, dataset, model, trainer, aggregator)
fedml_runner = FedMLRunner(args, device, dataset, model)
fedml_runner.run()
Original file line number Diff line number Diff line change
Expand Up @@ -2,50 +2,51 @@

import fedml
from fedml import FedMLRunner
from fedml.data.MNIST.data_loader import download_mnist, load_partition_data_mnist
from .trainer.classification_aggregator import ClassificationAggregator
from .trainer.classification_trainer import ClassificationTrainer
# from fedml.data.MNIST.data_loader import download_mnist, load_partition_data_mnist
from trainer.classification_aggregator import ClassificationAggregator
from trainer.classification_trainer import ClassificationTrainer
from fedml.data.data_loader import load


def load_data(args):
download_mnist(args.data_cache_dir)
fedml.logging.info("load_data. dataset_name = %s" % args.dataset)

"""
Please read through the data loader at to see how to customize the dataset for FedML framework.
"""
(
client_num,
train_data_num,
test_data_num,
train_data_global,
test_data_global,
train_data_local_num_dict,
train_data_local_dict,
test_data_local_dict,
class_num,
) = load_partition_data_mnist(
args,
args.batch_size,
train_path=args.data_cache_dir + "/MNIST/train",
test_path=args.data_cache_dir + "/MNIST/test",
)
"""
For shallow NN or linear models,
we uniformly sample a fraction of clients each round (as the original FedAvg paper)
"""
args.client_num_in_total = client_num
dataset = [
train_data_num,
test_data_num,
train_data_global,
test_data_global,
train_data_local_num_dict,
train_data_local_dict,
test_data_local_dict,
class_num,
]
return dataset, class_num
# def load_data(args):
# download_mnist(args.data_cache_dir)
# fedml.logging.info("load_data. dataset_name = %s" % args.dataset)
#
# """
# Please read through the data loader at to see how to customize the dataset for FedML framework.
# """
# (
# client_num,
# train_data_num,
# test_data_num,
# train_data_global,
# test_data_global,
# train_data_local_num_dict,
# train_data_local_dict,
# test_data_local_dict,
# class_num,
# ) = load_partition_data_mnist(
# args,
# args.batch_size,
# train_path=args.data_cache_dir + "/MNIST/train",
# test_path=args.data_cache_dir + "/MNIST/test",
# )
# """
# For shallow NN or linear models,
# we uniformly sample a fraction of clients each round (as the original FedAvg paper)
# """
# args.client_num_in_total = client_num
# dataset = [
# train_data_num,
# test_data_num,
# train_data_global,
# test_data_global,
# train_data_local_num_dict,
# train_data_local_dict,
# test_data_local_dict,
# class_num,
# ]
# return dataset, class_num


class LogisticRegression(torch.nn.Module):
Expand All @@ -66,7 +67,7 @@ def forward(self, x):
device = fedml.device.get_device(args)

# load data
dataset, class_num = load_data(args)
dataset, class_num = load(args)

# create model and trainer
model = fedml.model.create(args, output_dim=class_num)
Expand Down
18 changes: 12 additions & 6 deletions python/fedml/data/cifar10/efficient_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,26 +94,30 @@ def _data_transforms_cifar10():
return train_transform, valid_transform


def load_cifar10_data(datadir, resize=32, augmentation=True, data_efficient_load=False):
def load_cifar10_data(datadir, process_id, synthetic_data_url, private_local_data, resize=32, augmentation=True, data_efficient_load=False):
train_transform, test_transform = _data_transforms_cifar10()

is_download = True;
if process_id != 0:
is_download = False if (len(synthetic_data_url) != 0 or len(private_local_data) != 0) else True;

if data_efficient_load:
cifar10_train_ds = CIFAR10(datadir, train=True, download=True, transform=train_transform)
cifar10_test_ds = CIFAR10(datadir, train=False, download=True, transform=test_transform)
else:
cifar10_train_ds = CIFAR10_truncated(datadir, train=True, download=True, transform=train_transform)
cifar10_test_ds = CIFAR10_truncated(datadir, train=False, download=True, transform=test_transform)
cifar10_train_ds = CIFAR10_truncated(datadir, train=True, download=is_download, transform=train_transform)
cifar10_test_ds = CIFAR10_truncated(datadir, train=False, download=is_download, transform=test_transform)

X_train, y_train = cifar10_train_ds.data, cifar10_train_ds.targets
X_test, y_test = cifar10_test_ds.data, cifar10_test_ds.targets

return (X_train, y_train, X_test, y_test, cifar10_train_ds, cifar10_test_ds)


def partition_data(dataset, datadir, partition, n_nets, alpha):
def partition_data(dataset, datadir, partition, n_nets, alpha, process_id, synthetic_data_url, private_local_data):
np.random.seed(10)
logging.info("*********partition data***************")
X_train, y_train, X_test, y_test, cifar10_train_ds, cifar10_test_ds = load_cifar10_data(datadir)
X_train, y_train, X_test, y_test, cifar10_train_ds, cifar10_test_ds = load_cifar10_data(datadir, process_id, synthetic_data_url, private_local_data)
n_train = X_train.shape[0]
# n_test = X_test.shape[0]

Expand Down Expand Up @@ -311,6 +315,8 @@ def efficient_load_partition_data_cifar10(
client_number,
batch_size,
process_id,
synthetic_data_url,
private_local_data,
n_proc_in_silo=0,
data_efficient_load=True,
):
Expand All @@ -323,7 +329,7 @@ def efficient_load_partition_data_cifar10(
traindata_cls_counts,
cifar10_train_ds,
cifar10_test_ds,
) = partition_data(dataset, data_dir, partition_method, client_number, partition_alpha, process_id)
) = partition_data(dataset, data_dir, partition_method, client_number, partition_alpha, process_id, synthetic_data_url, private_local_data)
class_num = len(np.unique(y_train))
logging.info("traindata_cls_counts = " + str(traindata_cls_counts))
train_data_num = sum([len(net_dataidx_map[r]) for r in range(client_number)])
Expand Down
Loading

0 comments on commit 48986e1

Please sign in to comment.