diff --git a/torchbenchmark/models/maml/LICENSE b/torchbenchmark/models/maml/LICENSE deleted file mode 100644 index 6f1b377b33..0000000000 --- a/torchbenchmark/models/maml/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2019 Jackie Loong - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/torchbenchmark/models/maml/README.md b/torchbenchmark/models/maml/README.md deleted file mode 100644 index efa8d42459..0000000000 --- a/torchbenchmark/models/maml/README.md +++ /dev/null @@ -1,91 +0,0 @@ -# MAML-Pytorch -PyTorch implementation of the supervised learning experiments from the paper: -[Model-Agnostic Meta-Learning (MAML)](https://arxiv.org/abs/1703.03400). - -> Version 1.0: Both `MiniImagenet` and `Omniglot` Datasets are supported! Have Fun~ - -> Version 2.0: Re-write meta learner and basic learner. Solved some serious bugs in version 1.0. - -For Tensorflow Implementation, please visit official [HERE](https://github.com/cbfinn/maml) and simplier version [HERE](https://github.com/dragen1860/MAML-TensorFlow). - -For First-Order Approximation Implementation, Reptile namely, please visit [HERE](https://github.com/dragen1860/Reptile-Pytorch). - -![heart](res/heart.gif) - -# Platform -- python: 3.x -- Pytorch: 0.4+ - -# MiniImagenet - - -## Howto - -For 5-way 1-shot exp., it allocates nearly 6GB GPU memory. - -1. download `MiniImagenet` dataset from [here](https://github.com/dragen1860/LearningToCompare-Pytorch/issues/4), splitting: `train/val/test.csv` from [here](https://github.com/twitter/meta-learning-lstm/tree/master/data/miniImagenet). -2. extract it like: -```shell -miniimagenet/ -├── images - ├── n0210891500001298.jpg - ├── n0287152500001298.jpg - ... -├── test.csv -├── val.csv -└── train.csv - - -``` -3. modify the `path` in `miniimagenet_train.py`: -```python - mini = MiniImagenet('miniimagenet/', mode='train', n_way=args.n_way, k_shot=args.k_spt, - k_query=args.k_qry, - batchsz=10000, resize=args.imgsz) - ... - mini_test = MiniImagenet('miniimagenet/', mode='test', n_way=args.n_way, k_shot=args.k_spt, - k_query=args.k_qry, - batchsz=100, resize=args.imgsz) -``` -to your actual data path. - -4. just run `python miniimagenet_train.py` and the running screenshot is as follows: -![screenshot-miniimagetnet](res/mini-screen.png) - -If your reproducation perf. is not so good, maybe you can enlarge your `training epoch` to get longer training. And MAML is notorious for its hard training. Therefore, this implementation only provide you a basic start point to begin your research. -and the performance below is true and achieved on my machine. - -## Benchmark - -| Model | Fine Tune | 5-way Acc. | | 20-way Acc.| | -|-------------------------------------|-----------|------------|--------|------------|--------| -| | | 1-shot | 5-shot | 1-shot | 5-shot | -| Matching Nets | N | 43.56% | 55.31% | 17.31% | 22.69% | -| Meta-LSTM | | 43.44% | 60.60% | 16.70% | 26.06% | -| MAML | Y | 48.7% | 63.11% | 16.49% | 19.29% | -| **Ours** | Y | 46.2% | 60.3% | - | - | - - - -# Ominiglot - -## Howto -run `python omniglot_train.py`, the program will download `omniglot` dataset automatically. - -decrease the value of `args.task_num` to fit your GPU memory capacity. - -For 5-way 1-shot exp., it allocates nearly 3GB GPU memory. - - -# Refer to this Rep. -``` -@misc{MAML_Pytorch, - author = {Liangqu Long}, - title = {MAML-Pytorch Implementation}, - year = {2018}, - publisher = {GitHub}, - journal = {GitHub repository}, - howpublished = {\url{https://github.com/dragen1860/MAML-Pytorch}}, - commit = {master} -} -``` diff --git a/torchbenchmark/models/maml/__init__.py b/torchbenchmark/models/maml/__init__.py deleted file mode 100644 index 9b0d2d7456..0000000000 --- a/torchbenchmark/models/maml/__init__.py +++ /dev/null @@ -1,90 +0,0 @@ -import numpy as np -import random -import time -import torch -from argparse import Namespace -from .meta import Meta -from pathlib import Path -from typing import Tuple -from ...util.model import BenchmarkModel -from torchbenchmark.tasks import OTHER - -torch.backends.cudnn.deterministic = True -torch.backends.cudnn.benchmark = False - - -class Model(BenchmarkModel): - task = OTHER.OTHER_TASKS - DEFAULT_TRAIN_BSIZE = 1 - DEFAULT_EVAL_BSIZE = 1 - ALLOW_CUSTOMIZE_BSIZE = False - # Skip correctness check, because maml runs backward and optimizer in eval() - # Which will return non-deterministic results - SKIP_CORRECTNESS_CHECK = True - - def __init__(self, test, device, jit, batch_size=None, extra_args=[]): - super().__init__(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args) - - # load from disk or synthesize data - use_data_file = False - debug_print = False - root = str(Path(__file__).parent) - args = Namespace(**{ - 'n_way': 5, - 'k_spt': 1, - 'k_qry': 15, - 'imgsz': 28, - 'imgc': 1, - 'task_num': 32, - 'meta_lr': 1e-3, - 'update_lr': 0.4, - 'update_step': 5, - 'update_step_test': 10 - }) - config = [ - ('conv2d', [64, args.imgc, 3, 3, 2, 0]), - ('relu', [True]), - ('bn', [64]), - ('conv2d', [64, 64, 3, 3, 2, 0]), - ('relu', [True]), - ('bn', [64]), - ('conv2d', [64, 64, 3, 3, 2, 0]), - ('relu', [True]), - ('bn', [64]), - ('conv2d', [64, 64, 2, 2, 1, 0]), - ('relu', [True]), - ('bn', [64]), - ('flatten', []), - ('linear', [args.n_way, 64]) - ] - - self.module = Meta(args, config).to(device) - - if use_data_file: - self.example_inputs = torch.load(f'{root}/batch.pt') - self.example_inputs = tuple([torch.from_numpy(i).to(self.device) for i in self.example_inputs]) - else: - # synthesize data parameterized by arg values - self.example_inputs = ( - torch.randn(args.task_num, args.n_way, args.imgc, args.imgsz, args.imgsz).to(device), - torch.randint(0, args.n_way, [args.task_num, args.n_way], dtype=torch.long).to(device), - torch.randn(args.task_num, args.n_way * args.k_qry, args.imgc, args.imgsz, args.imgsz).to(device), - torch.randint(0, args.n_way, [args.task_num, args.n_way * args.k_qry], dtype=torch.long).to(device)) - - # print input shapes - if debug_print: - for i in range(len(self.example_inputs)): - print(self.example_inputs[i].shape) - - def get_module(self): - return self.module, self.example_inputs - - def eval(self) -> Tuple[torch.Tensor]: - out = self.module(*self.example_inputs) - return (out, ) - - def train(self): - raise NotImplementedError("MAML model doesn't support train.") - - def eval_in_nograd(self): - return False diff --git a/torchbenchmark/models/maml/batch.pt b/torchbenchmark/models/maml/batch.pt deleted file mode 100644 index 5ac4490bc4..0000000000 --- a/torchbenchmark/models/maml/batch.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:141141dcc7e260454405e7f15ec5550495bbeaed691c3ab19181b0dc00a9d91d -size 10428868 diff --git a/torchbenchmark/models/maml/install.py b/torchbenchmark/models/maml/install.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/torchbenchmark/models/maml/learner.py b/torchbenchmark/models/maml/learner.py deleted file mode 100644 index 974405026a..0000000000 --- a/torchbenchmark/models/maml/learner.py +++ /dev/null @@ -1,218 +0,0 @@ -import torch -from torch import nn -from torch.nn import functional as F -import numpy as np -from typing import List - - - -class Learner(nn.Module): - """ - - """ - - def __init__(self, config, imgc, imgsz): - """ - - :param config: network config file, type:list of (string, list) - :param imgc: 1 or 3 - :param imgsz: 28 or 84 - """ - super(Learner, self).__init__() - - - self.config = config - - # this dict contains all tensors needed to be optimized - self.vars = nn.ParameterList() - # running_mean and running_var - self.vars_bn = nn.ParameterList() - - for i, (name, param) in enumerate(self.config): - if name == 'conv2d': - # [ch_out, ch_in, kernelsz, kernelsz] - w = nn.Parameter(torch.ones(*param[:4])) - # gain=1 according to cbfin's implementation - torch.nn.init.kaiming_normal_(w) - self.vars.append(w) - # [ch_out] - self.vars.append(nn.Parameter(torch.zeros(param[0]))) - - elif name == 'convt2d': - # [ch_in, ch_out, kernelsz, kernelsz, stride, padding] - w = nn.Parameter(torch.ones(*param[:4])) - # gain=1 according to cbfin's implementation - torch.nn.init.kaiming_normal_(w) - self.vars.append(w) - # [ch_in, ch_out] - self.vars.append(nn.Parameter(torch.zeros(param[1]))) - - elif name == 'linear': - # [ch_out, ch_in] - w = nn.Parameter(torch.ones(*param)) - # gain=1 according to cbfinn's implementation - torch.nn.init.kaiming_normal_(w) - self.vars.append(w) - # [ch_out] - self.vars.append(nn.Parameter(torch.zeros(param[0]))) - - elif name == 'bn': - # [ch_out] - w = nn.Parameter(torch.ones(param[0])) - self.vars.append(w) - # [ch_out] - self.vars.append(nn.Parameter(torch.zeros(param[0]))) - - # must set requires_grad=False - running_mean = nn.Parameter(torch.zeros(param[0]), requires_grad=False) - running_var = nn.Parameter(torch.ones(param[0]), requires_grad=False) - self.vars_bn.extend([running_mean, running_var]) - - - elif name in ['tanh', 'relu', 'upsample', 'avg_pool2d', 'max_pool2d', - 'flatten', 'reshape', 'leakyrelu', 'sigmoid']: - continue - else: - raise NotImplementedError - - - - - - - def extra_repr(self): - info = '' - - for name, param in self.config: - if name == 'conv2d': - tmp = 'conv2d:(ch_in:%d, ch_out:%d, k:%dx%d, stride:%d, padding:%d)'\ - %(param[1], param[0], param[2], param[3], param[4], param[5],) - info += tmp + '\n' - - elif name == 'convt2d': - tmp = 'convTranspose2d:(ch_in:%d, ch_out:%d, k:%dx%d, stride:%d, padding:%d)'\ - %(param[0], param[1], param[2], param[3], param[4], param[5],) - info += tmp + '\n' - - elif name == 'linear': - tmp = 'linear:(in:%d, out:%d)'%(param[1], param[0]) - info += tmp + '\n' - - elif name == 'leakyrelu': - tmp = 'leakyrelu:(slope:%f)'%(param[0]) - info += tmp + '\n' - - - elif name == 'avg_pool2d': - tmp = 'avg_pool2d:(k:%d, stride:%d, padding:%d)'%(param[0], param[1], param[2]) - info += tmp + '\n' - elif name == 'max_pool2d': - tmp = 'max_pool2d:(k:%d, stride:%d, padding:%d)'%(param[0], param[1], param[2]) - info += tmp + '\n' - elif name in ['flatten', 'tanh', 'relu', 'upsample', 'reshape', 'sigmoid', 'use_logits', 'bn']: - tmp = name + ':' + str(tuple(param)) - info += tmp + '\n' - else: - raise NotImplementedError - - return info - - - - def forward(self, x, vars=None, bn_training=True): - """ - This function can be called by finetunning, however, in finetunning, we dont wish to update - running_mean/running_var. Thought weights/bias of bn == updated, it has been separated by fast_weights. - Indeed, to not update running_mean/running_var, we need set update_bn_statistics=False - but weight/bias will be updated and not dirty initial theta parameters via fast_weiths. - :param x: [b, 1, 28, 28] - :param vars: - :param bn_training: set False to not update - :return: x, loss, likelihood, kld - """ - - if vars == None: - vars = self.vars - - idx = 0 - bn_idx = 0 - - for name, param in self.config: - if name == 'conv2d': - w, b = vars[idx], vars[idx + 1] - # remember to keep synchrozied of forward_encoder and forward_decoder! - x = F.conv2d(x, w, b, stride=param[4], padding=param[5]) - idx += 2 - # print(name, param, '\tout:', x.shape) - elif name == 'convt2d': - w, b = vars[idx], vars[idx + 1] - # remember to keep synchrozied of forward_encoder and forward_decoder! - x = F.conv_transpose2d(x, w, b, stride=param[4], padding=param[5]) - idx += 2 - # print(name, param, '\tout:', x.shape) - elif name == 'linear': - w, b = vars[idx], vars[idx + 1] - x = F.linear(x, w, b) - idx += 2 - # print('forward:', idx, x.norm().item()) - elif name == 'bn': - w, b = vars[idx], vars[idx + 1] - running_mean, running_var = self.vars_bn[bn_idx], self.vars_bn[bn_idx+1] - x = F.batch_norm(x, running_mean, running_var, weight=w, bias=b, training=bn_training) - idx += 2 - bn_idx += 2 - - elif name == 'flatten': - # print(x.shape) - x = x.view(x.size(0), -1) - elif name == 'reshape': - # [b, 8] => [b, 2, 2, 2] - x = x.view(x.size(0), *param) - elif name == 'relu': - x = F.relu(x, inplace=param[0]) - elif name == 'leakyrelu': - x = F.leaky_relu(x, negative_slope=param[0], inplace=param[1]) - elif name == 'tanh': - x = F.tanh(x) - elif name == 'sigmoid': - x = torch.sigmoid(x) - elif name == 'upsample': - x = F.upsample_nearest(x, scale_factor=param[0]) - elif name == 'max_pool2d': - x = F.max_pool2d(x, param[0], param[1], param[2]) - elif name == 'avg_pool2d': - x = F.avg_pool2d(x, param[0], param[1], param[2]) - - else: - raise NotImplementedError - - # make sure variable == used properly - assert idx == len(vars) - assert bn_idx == len(self.vars_bn) - - - return x - - - def zero_grad(self, vars=None): - """ - - :param vars: - :return: - """ - with torch.no_grad(): - if vars == None: - for p in self.vars: - if not p.grad == None: - p.grad.zero_() - else: - for p in vars: - if not p.grad == None: - p.grad.zero_() - - def parameters(self): - """ - override this function since initial parameters will return with a generator. - :return: - """ - return self.vars \ No newline at end of file diff --git a/torchbenchmark/models/maml/meta.py b/torchbenchmark/models/maml/meta.py deleted file mode 100644 index 9c53d79979..0000000000 --- a/torchbenchmark/models/maml/meta.py +++ /dev/null @@ -1,231 +0,0 @@ -import torch -from torch import nn -from torch import optim -from torch.nn import functional as F -from torch.utils.data import TensorDataset, DataLoader -from torch import optim -import numpy as np - -from .learner import Learner -from copy import deepcopy - - - -class Meta(nn.Module): - """ - Meta Learner - """ - def __init__(self, args, config): - """ - - :param args: - """ - super(Meta, self).__init__() - - self.update_lr = args.update_lr - self.meta_lr = args.meta_lr - self.n_way = args.n_way - self.k_spt = args.k_spt - self.k_qry = args.k_qry - self.task_num = args.task_num - self.update_step = args.update_step - self.update_step_test = args.update_step_test - - - self.net = Learner(config, args.imgc, args.imgsz) - self.meta_optim = optim.Adam(self.net.parameters(), lr=self.meta_lr) - - - - - def clip_grad_by_norm_(self, grad, max_norm): - """ - in-place gradient clipping. - :param grad: list of gradients - :param max_norm: maximum norm allowable - :return: - """ - - total_norm = 0 - counter = 0 - for g in grad: - param_norm = g.data.norm(2) - total_norm += param_norm.item() ** 2 - counter += 1 - total_norm = total_norm ** (1. / 2) - - clip_coef = max_norm / (total_norm + 1e-6) - if clip_coef < 1: - for g in grad: - g.data.mul_(clip_coef) - - return total_norm/counter - - def forward(self, x_spt, y_spt, x_qry, y_qry): - if self.training: - return self.forward_train(x_spt, y_spt, x_qry, y_qry) - else: - return self.finetunning(x_spt[0], y_spt[0], x_qry[0], y_qry[0]) - - def forward_train(self, x_spt, y_spt, x_qry, y_qry): - """ - - :param x_spt: [b, setsz, c_, h, w] - :param y_spt: [b, setsz] - :param x_qry: [b, querysz, c_, h, w] - :param y_qry: [b, querysz] - :return: - """ - task_num, setsz, c_, h, w = x_spt.size() - querysz = x_qry.size(1) - - losses_q = [0 for _ in range(self.update_step + 1)] # losses_q[i] is the loss on step i - corrects = [0 for _ in range(self.update_step + 1)] - - - for i in range(task_num): - - # 1. run the i-th task and compute loss for k=0 - logits = self.net(x_spt[i], vars=None, bn_training=True) - loss = F.cross_entropy(logits, y_spt[i]) - grad = torch.autograd.grad(loss, self.net.parameters()) - fast_weights = list([p[1] - self.update_lr * p[0]for p in zip(grad, self.net.parameters())]) - - # this is the loss and accuracy before first update - with torch.no_grad(): - # [setsz, nway] - logits_q = self.net(x_qry[i], self.net.parameters(), bn_training=True) - loss_q = F.cross_entropy(logits_q, y_qry[i]) - losses_q[0] += loss_q - - pred_q = F.softmax(logits_q, dim=1).argmax(dim=1) - correct = torch.eq(pred_q, y_qry[i]).sum().item() - corrects[0] = corrects[0] + correct - - # this is the loss and accuracy after the first update - with torch.no_grad(): - # [setsz, nway] - logits_q = self.net(x_qry[i], fast_weights, bn_training=True) - loss_q = F.cross_entropy(logits_q, y_qry[i]) - losses_q[1] += loss_q - # [setsz] - pred_q = F.softmax(logits_q, dim=1).argmax(dim=1) - correct = torch.eq(pred_q, y_qry[i]).sum().item() - corrects[1] = corrects[1] + correct - - for k in range(1, self.update_step): - # 1. run the i-th task and compute loss for k=1~K-1 - logits = self.net(x_spt[i], fast_weights, bn_training=True) - loss = F.cross_entropy(logits, y_spt[i]) - # 2. compute grad on theta_pi - grad = torch.autograd.grad(loss, fast_weights) - # 3. theta_pi = theta_pi - train_lr * grad - fast_weights = [p[1] - self.update_lr * p[0] for p in zip(grad, fast_weights)] - - logits_q = self.net(x_qry[i], fast_weights, bn_training=True) - # loss_q will be overwritten and just keep the loss_q on last update step. - loss_q = F.cross_entropy(logits_q, y_qry[i]) - losses_q[k + 1] += loss_q - - with torch.no_grad(): - pred_q = F.softmax(logits_q, dim=1).argmax(dim=1) - correct = torch.eq(pred_q, y_qry[i]).sum().item() # convert to numpy - corrects[k + 1] = corrects[k + 1] + correct - - - - # end of all tasks - # sum over all losses on query set across all tasks - loss_q = losses_q[-1] / task_num - - # optimize theta parameters - self.meta_optim.zero_grad() - loss_q.backward() - # print('meta update') - # for p in self.net.parameters()[:5]: - # print(torch.norm(p).item()) - self.meta_optim.step() - - - accs = torch.tensor(corrects) / (querysz * task_num) - - return accs - - - def finetunning(self, x_spt, y_spt, x_qry, y_qry): - """ - - :param x_spt: [setsz, c_, h, w] - :param y_spt: [setsz] - :param x_qry: [querysz, c_, h, w] - :param y_qry: [querysz] - :return: - """ - querysz = x_qry.size(0) - - corrects = [0 for _ in range(self.update_step_test + 1)] - - # in order to not ruin the state of running_mean/variance and bn_weight/bias - # we finetunning on the copied model instead of self.net - net = deepcopy(self.net) - - # 1. run the i-th task and compute loss for k=0 - logits = net(x_spt) - loss = F.cross_entropy(logits, y_spt) - grad = torch.autograd.grad(loss, net.parameters()) - fast_weights = list(map(lambda p: p[1] - self.update_lr * p[0], zip(grad, net.parameters()))) - - # this is the loss and accuracy before first update - with torch.no_grad(): - # [setsz, nway] - logits_q = net(x_qry, net.parameters(), bn_training=True) - # [setsz] - pred_q = F.softmax(logits_q, dim=1).argmax(dim=1) - # scalar - correct = torch.eq(pred_q, y_qry).sum().item() - corrects[0] = corrects[0] + correct - - # this is the loss and accuracy after the first update - with torch.no_grad(): - # [setsz, nway] - logits_q = net(x_qry, fast_weights, bn_training=True) - # [setsz] - pred_q = F.softmax(logits_q, dim=1).argmax(dim=1) - # scalar - correct = torch.eq(pred_q, y_qry).sum().item() - corrects[1] = corrects[1] + correct - - for k in range(1, self.update_step_test): - # 1. run the i-th task and compute loss for k=1~K-1 - logits = net(x_spt, fast_weights, bn_training=True) - loss = F.cross_entropy(logits, y_spt) - # 2. compute grad on theta_pi - grad = torch.autograd.grad(loss, fast_weights) - # 3. theta_pi = theta_pi - train_lr * grad - fast_weights = list(map(lambda p: p[1] - self.update_lr * p[0], zip(grad, fast_weights))) - - logits_q = net(x_qry, fast_weights, bn_training=True) - # loss_q will be overwritten and just keep the loss_q on last update step. - loss_q = F.cross_entropy(logits_q, y_qry) - - with torch.no_grad(): - pred_q = F.softmax(logits_q, dim=1).argmax(dim=1) - correct = torch.eq(pred_q, y_qry).sum().item() # convert to numpy - corrects[k + 1] = corrects[k + 1] + correct - - - del net - - accs = torch.tensor(corrects) / querysz - - return accs - - - - -def main(): - pass - - -if __name__ == '__main__': - main() diff --git a/torchbenchmark/models/maml/metadata.yaml b/torchbenchmark/models/maml/metadata.yaml deleted file mode 100644 index 06b9110c46..0000000000 --- a/torchbenchmark/models/maml/metadata.yaml +++ /dev/null @@ -1,7 +0,0 @@ -eval_benchmark: false -eval_deterministic: true -eval_nograd: true -train_benchmark: false -train_deterministic: true -not_implemented: - - jit: true \ No newline at end of file diff --git a/torchbenchmark/models/maml/origin b/torchbenchmark/models/maml/origin deleted file mode 100644 index c3f766d92f..0000000000 --- a/torchbenchmark/models/maml/origin +++ /dev/null @@ -1 +0,0 @@ -https://github.com/dragen1860/MAML-Pytorch