From aff3f204e6e62ebf71d5fc4b2a937ce654b5b0df Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Tue, 1 Dec 2020 04:20:53 +0000 Subject: [PATCH 1/3] Transcribed imagenet code --- examples/imagenet.py | 363 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 363 insertions(+) create mode 100644 examples/imagenet.py diff --git a/examples/imagenet.py b/examples/imagenet.py new file mode 100644 index 0000000..3aac07e --- /dev/null +++ b/examples/imagenet.py @@ -0,0 +1,363 @@ +import os +import warnings + +import hydra +import torch +import torch.nn as nn +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torchvision.models as models +import torchvision.datasets as datasets +import torchvision.transforms as transforms +from torch.optim import Adam + +best_acc1 = 0 + + +@hydra.main(config_name="imagenetconf") +def main(cfg): + if cfg.seed is not None: + random.seed(cfg.seed) + torch.manual_seed(cfg.seed) + cudnn.deterministic = True + warnings.warn( + "You have chosen to seed training. " + "This will turn on the CUDNN deterministic setting, " + "which can slow down your training considerably! " + "You may see unexpected behavior when restarting " + "from checkpoints." + ) + if cfg.gpu is not None: + warnings.warn( + "You have chosen a specific GPU. This will completely " + "disable data parallelism." + ) + if cfg.dist_url == "env://" and cfg.world_size == -1: + cfg.world_size = int(os.environ["WORLD_SIZE"]) + + global best_acc1 + + if cfg.gpu is not None: + print("Use GPU: {} for training".format(cfg.gpu)) + + if cfg.distributed: + if cfg.dist_url == "env://" and cfg.rank == -1: + cfg.rank = int(os.environ["RANK"]) + if cfg.multiprocessing_distributed: + cfg.rank = cfg.rank * cfg.ngpus_per_node + gpu + dist.init_process_group( + backend=cfg.dist_backend, + init_method=cfg.dist_url, + world_size=cfg.world_size, + rank=cfg.rank, + ) + if cfg.pretrained: + print("=> using pre-trained model '{}'".format(cfg.arch)) + model = models.__dict__[cfg.arch](pretrained=True) + else: + print("=> creating model '{}'".format(cfg.arch)) + model = models.__dict__[cfg.arch]() + + if not torch.cuda.is_available(): + print("using CPU, this wil be slow") + elif cfg.distributed: + if cfg.gpu is not None: + torch.cuda.set_device(cfg.gpu) + model.cuda(cfg.gpu) + cfg.batch_size = int(cfg.batch_size / cfg.ngpus_per_node) + model = torch.nn.parallel.DistributedDataParallel( + model, device_ids=[cfg.gpu] + ) + else: + model.cuda() + model = torch.nn.parallel.DistributedDataParallel(model) + elif cfg.gpu is not None: + torch.cuda.set_device(cfg.gpu) + model = model.cuda(cfg.gpu) + else: + if cfg.arch.startswith("alexnet") or cfg.arch.startswith("vgg"): + model.features = torch.nn.DataParallel(model.features) + model.cuda() + else: + model = torch.nn.DataParallel(model).cuda() + + criterion = nn.CrossEntropyLoss().cuda(cfg.gpu) + optimizer = Adam( + lr=cfg.adam.lr, + rho=cfg.adam.rho, + eps=cfg.adam.eps, + weight_decay=cfg.adam.weight_decay, + params=model.parameters(), + ) + + if cfg.resume: + if os.path.isfile(cfg.resume): + print("=> loading checkpoint '{}'".format(cfg.resume)) + if cfg.gpu is None: + checkpoint = torch.load(cfg.resume) + else: + loc = "cuda:{}".format(cfg.gpu) + checkpoint = torch.load(cfg.resume, map_location=loc) + cfg.start_epoch = checkpoint["epoch"] + best_acc1 = checkpoint["best_acc1"] + if cfg.gpu is not None: + best_acc1 = best_acc1.to(cfg.gpu) + model.load_state_dict(checkpoint["state_dict"]) + optimizer.load_state_dict(checkpoint["optimizer"]) + print( + "=> loaded checkpoint '{}' (epoch {})".format( + cfg.resume, checkpoint["epoch"] + ) + ) + else: + print("=> no checkpoint found at '{}'".format(cfg.resume)) + + cudnn.benchmark = True + + traindir = os.path.join(cfg.data, "train") + valdir = os.path.join(cfg.data, "val") + normalize = transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + train_dataset = datasets.ImageFolder( + train_dir, + transforms.Compose( + [ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ] + ), + ) + + if cfg.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + else: + train_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, + batch_size=cfg.batch_size, + shuffle=(train_sampler is None), + num_workers=cfg.workers, + pin_memory=True, + sampler=train_sampler, + ) + + val_loader = torch.utils.data.DataLoader( + datasets.ImageFolder( + valdir, + transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ] + ), + ), + batch_size=args.batch_size, + shuffle=False, + num_workers=args.workers, + pin_memory=True, + ) + + if cfg.evaluate: + validate(val_loader, model, criterion, cfg) + return + + for epoch in range(cfg.start_epoch, cfg.epochs): + if cfg.distributed: + train_sampler.set_epoch(epoch) + adjust_learning_rate(optimizer, epoch, cfg) + + train(train_loader, model, criterion, optimizer, epoch, args) + acc1 = validate(val_loader, model, criterion, cfg) + + is_best = acc1 > best_acc1 + best_acc1 = max(acc1, best_acc1) + + if not cfg.multiprocessing_distributed or ( + cfg.multiprocessing_distributed and cfg.rank % cfg.ngpus_per_node == 0 + ): + save_checkpoint( + { + "epoch": epoch + 1, + "arch": cfg.arch, + "state_dict": model.state_dict(), + "best_acc1": best_acc1, + "optimizer": optimizer.state_dict(), + }, + is_best, + ) + + +def train(train_loader, model, criterion, optimizer, epoch, cfg): + batch_time = AverageMeter("Time", ":6.3f") + data_time = AverageMeter("Data", ":6.3f") + losses = AverageMeter("Loss", ":.4e") + top1 = AverageMeter("Acc@1", ":6.2f") + top5 = AverageMeter("Acc@5", ":6.2f") + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, losses, top1, top5], + prefix="Epoch: [{}]".format(epoch), + ) + + # switch to train mode + model.train() + + end = time.time() + for i, (images, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + if args.gpu is not None: + images = images.cuda(cfg.gpu, non_blocking=True) + if torch.cuda.is_available(): + target = target.cuda(cfg.gpu, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + progress.display(i) + + +def validate(val_loader, model, criterion, cfg): + batch_time = AverageMeter("Time", ":6.3f") + losses = AverageMeter("Loss", ":.4e") + top1 = AverageMeter("Acc@1", ":6.2f") + top5 = AverageMeter("Acc@5", ":6.2f") + progress = ProgressMeter( + len(val_loader), [batch_time, losses, top1, top5], prefix="Test: " + ) + + # switch to evaluate mode + model.eval() + + with torch.no_grad(): + end = time.time() + for i, (images, target) in enumerate(val_loader): + if cfg.gpu is not None: + images = images.cuda(cfg.gpu, non_blocking=True) + if torch.cuda.is_available(): + target = target.cuda(cfg.gpu, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + progress.display(i) + + # TODO: this should also be done with the ProgressMeter + print( + " * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}".format(top1=top1, top5=top5) + ) + + return top1.avg + + +def save_checkpoint(state, is_best, filename="checkpoint.pth.tar"): + torch.save(state, filename) + if is_best: + shutil.copyfile(filename, "model_best.pth.tar") + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=":f"): + self.name = name + self.fmt = fmt + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + def __str__(self): + fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})" + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print("\t".join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = "{:" + str(num_digits) + "d}" + return "[" + fmt + "/" + fmt.format(num_batches) + "]" + + +def adjust_learning_rate(optimizer, epoch, cfg): + """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" + lr = cfg.lr * (0.1 ** (epoch // 30)) + for param_group in optimizer.param_groups: + param_group["lr"] = lr + + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +if __name__ == "__main__": + main() From 54040e83d153d0023ae66464e1fd27301da1d377 Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Thu, 3 Dec 2020 08:16:54 +0000 Subject: [PATCH 2/3] Pruned most non-DDP code --- examples/{imagenet.py => imagenet_00.py} | 170 ++++++----------------- examples/imagenetconf.yaml | 19 +++ 2 files changed, 62 insertions(+), 127 deletions(-) rename examples/{imagenet.py => imagenet_00.py} (61%) create mode 100644 examples/imagenetconf.yaml diff --git a/examples/imagenet.py b/examples/imagenet_00.py similarity index 61% rename from examples/imagenet.py rename to examples/imagenet_00.py index 3aac07e..29f0295 100644 --- a/examples/imagenet.py +++ b/examples/imagenet_00.py @@ -1,7 +1,12 @@ +import logging import os +import random +import shutil +import time import warnings import hydra +from omegaconf import DictConfig import torch import torch.nn as nn import torch.backends.cudnn as cudnn @@ -9,111 +14,49 @@ import torchvision.models as models import torchvision.datasets as datasets import torchvision.transforms as transforms -from torch.optim import Adam +from torch.optim import SGD -best_acc1 = 0 +logger = logging.getLogger("ImageNet") @hydra.main(config_name="imagenetconf") -def main(cfg): +def main(cfg: DictConfig): if cfg.seed is not None: random.seed(cfg.seed) torch.manual_seed(cfg.seed) + cudnn.benchmark = False cudnn.deterministic = True - warnings.warn( - "You have chosen to seed training. " - "This will turn on the CUDNN deterministic setting, " - "which can slow down your training considerably! " - "You may see unexpected behavior when restarting " - "from checkpoints." - ) - if cfg.gpu is not None: - warnings.warn( - "You have chosen a specific GPU. This will completely " - "disable data parallelism." - ) - if cfg.dist_url == "env://" and cfg.world_size == -1: - cfg.world_size = int(os.environ["WORLD_SIZE"]) - - global best_acc1 - if cfg.gpu is not None: - print("Use GPU: {} for training".format(cfg.gpu)) - - if cfg.distributed: - if cfg.dist_url == "env://" and cfg.rank == -1: - cfg.rank = int(os.environ["RANK"]) - if cfg.multiprocessing_distributed: - cfg.rank = cfg.rank * cfg.ngpus_per_node + gpu - dist.init_process_group( - backend=cfg.dist_backend, - init_method=cfg.dist_url, - world_size=cfg.world_size, - rank=cfg.rank, - ) - if cfg.pretrained: - print("=> using pre-trained model '{}'".format(cfg.arch)) - model = models.__dict__[cfg.arch](pretrained=True) - else: - print("=> creating model '{}'".format(cfg.arch)) - model = models.__dict__[cfg.arch]() + logger.info(f"Use GPU: {cfg.gpu} for training") + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12345" + dist.init_process_group( + backend=cfg.dist_backend, + # init_method=cfg.dist_url, + world_size=cfg.world_size, + rank=cfg.gpu, + ) + return - if not torch.cuda.is_available(): - print("using CPU, this wil be slow") - elif cfg.distributed: - if cfg.gpu is not None: - torch.cuda.set_device(cfg.gpu) - model.cuda(cfg.gpu) - cfg.batch_size = int(cfg.batch_size / cfg.ngpus_per_node) - model = torch.nn.parallel.DistributedDataParallel( - model, device_ids=[cfg.gpu] - ) - else: - model.cuda() - model = torch.nn.parallel.DistributedDataParallel(model) - elif cfg.gpu is not None: - torch.cuda.set_device(cfg.gpu) - model = model.cuda(cfg.gpu) - else: - if cfg.arch.startswith("alexnet") or cfg.arch.startswith("vgg"): - model.features = torch.nn.DataParallel(model.features) - model.cuda() - else: - model = torch.nn.DataParallel(model).cuda() + model = models.__dict__[cfg.arch]() + torch.cuda.set_device(cfg.gpu) + model.cuda(cfg.gpu) + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs we have + cfg.batch_size = int(cfg.batch_size / cfg.world_size) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[cfg.gpu]) criterion = nn.CrossEntropyLoss().cuda(cfg.gpu) - optimizer = Adam( - lr=cfg.adam.lr, - rho=cfg.adam.rho, - eps=cfg.adam.eps, - weight_decay=cfg.adam.weight_decay, + optimizer = SGD( params=model.parameters(), + lr=cfg.sgd.lr, + lambd=cfg.sgd.lambd, + alpha=cfg.sgd.alpha, + t0=cfg.sgd.t0, + weight_decay=cfg.sgd.weight_decay, ) - if cfg.resume: - if os.path.isfile(cfg.resume): - print("=> loading checkpoint '{}'".format(cfg.resume)) - if cfg.gpu is None: - checkpoint = torch.load(cfg.resume) - else: - loc = "cuda:{}".format(cfg.gpu) - checkpoint = torch.load(cfg.resume, map_location=loc) - cfg.start_epoch = checkpoint["epoch"] - best_acc1 = checkpoint["best_acc1"] - if cfg.gpu is not None: - best_acc1 = best_acc1.to(cfg.gpu) - model.load_state_dict(checkpoint["state_dict"]) - optimizer.load_state_dict(checkpoint["optimizer"]) - print( - "=> loaded checkpoint '{}' (epoch {})".format( - cfg.resume, checkpoint["epoch"] - ) - ) - else: - print("=> no checkpoint found at '{}'".format(cfg.resume)) - - cudnn.benchmark = True - traindir = os.path.join(cfg.data, "train") valdir = os.path.join(cfg.data, "val") normalize = transforms.Normalize( @@ -121,7 +64,7 @@ def main(cfg): ) train_dataset = datasets.ImageFolder( - train_dir, + traindir, transforms.Compose( [ transforms.RandomResizedCrop(224), @@ -158,9 +101,9 @@ def main(cfg): ] ), ), - batch_size=args.batch_size, + batch_size=cfg.batch_size, shuffle=False, - num_workers=args.workers, + num_workers=cfg.workers, pin_memory=True, ) @@ -173,25 +116,8 @@ def main(cfg): train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, cfg) - train(train_loader, model, criterion, optimizer, epoch, args) - acc1 = validate(val_loader, model, criterion, cfg) - - is_best = acc1 > best_acc1 - best_acc1 = max(acc1, best_acc1) - - if not cfg.multiprocessing_distributed or ( - cfg.multiprocessing_distributed and cfg.rank % cfg.ngpus_per_node == 0 - ): - save_checkpoint( - { - "epoch": epoch + 1, - "arch": cfg.arch, - "state_dict": model.state_dict(), - "best_acc1": best_acc1, - "optimizer": optimizer.state_dict(), - }, - is_best, - ) + train(train_loader, model, criterion, optimizer, epoch, cfg) + validate(val_loader, model, criterion, cfg) def train(train_loader, model, criterion, optimizer, epoch, cfg): @@ -214,7 +140,7 @@ def train(train_loader, model, criterion, optimizer, epoch, cfg): # measure data loading time data_time.update(time.time() - end) - if args.gpu is not None: + if cfg.gpu is not None: images = images.cuda(cfg.gpu, non_blocking=True) if torch.cuda.is_available(): target = target.cuda(cfg.gpu, non_blocking=True) @@ -238,7 +164,7 @@ def train(train_loader, model, criterion, optimizer, epoch, cfg): batch_time.update(time.time() - end) end = time.time() - if i % args.print_freq == 0: + if i % cfg.print_freq == 0: progress.display(i) @@ -276,22 +202,12 @@ def validate(val_loader, model, criterion, cfg): batch_time.update(time.time() - end) end = time.time() - if i % args.print_freq == 0: + if i % cfg.print_freq == 0: progress.display(i) - - # TODO: this should also be done with the ProgressMeter - print( + logger.info( " * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}".format(top1=top1, top5=top5) ) - return top1.avg - - -def save_checkpoint(state, is_best, filename="checkpoint.pth.tar"): - torch.save(state, filename) - if is_best: - shutil.copyfile(filename, "model_best.pth.tar") - class AverageMeter(object): """Computes and stores the average and current value""" @@ -327,7 +243,7 @@ def __init__(self, num_batches, meters, prefix=""): def display(self, batch): entries = [self.prefix + self.batch_fmtstr.format(batch)] entries += [str(meter) for meter in self.meters] - print("\t".join(entries)) + logger.info("\t".join(entries)) def _get_batch_fmtstr(self, num_batches): num_digits = len(str(num_batches // 1)) diff --git a/examples/imagenetconf.yaml b/examples/imagenetconf.yaml new file mode 100644 index 0000000..f2ba313 --- /dev/null +++ b/examples/imagenetconf.yaml @@ -0,0 +1,19 @@ +seed: ~ +gpu: ~ +world_size: 4 +dist_backend: nccl +dist_url: "localhost:9999" +batch_size: 256 +ngpus_per_node: 4 +sgd: + lr: 0.01 + lambd: 0.0001 + alpha: 0.75 + t0: 1000000.0 + weight_decay: 0 + +hydra.launcher.joblib.backend: multiprocessing + + +defaults: + - hydra/launcher: joblib From f5dff54fb2257ca89bc5ee60294b0628ac118ff5 Mon Sep 17 00:00:00 2001 From: Brian Ko Date: Thu, 3 Dec 2020 09:47:31 +0000 Subject: [PATCH 3/3] Fixing conf --- examples/imagenetconf.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/imagenetconf.yaml b/examples/imagenetconf.yaml index f2ba313..5fab571 100644 --- a/examples/imagenetconf.yaml +++ b/examples/imagenetconf.yaml @@ -12,8 +12,6 @@ sgd: t0: 1000000.0 weight_decay: 0 -hydra.launcher.joblib.backend: multiprocessing - defaults: - hydra/launcher: joblib