Skip to content

Commit

Permalink
ci: Cache datasets (#315)
Browse files Browse the repository at this point in the history
* Add conftest.py

* Add dataset caching

* Use tmpdir to use cached dataset and remove unused tmpdir

* Use data_dir for datasets and tmpdir for logs and weights

* Use cached datasets in cli tests

* Keep line length <= 120

* Remove unnecessary import

* Add TODO

* Use DATA_DIR to specify path in pytest.mark.parametrize

* Fix typo

* Add notes

* Fix data_dir in LitMNIST

* data_dir

* clean names

* Path()

* City

Co-authored-by: Jirka Borovec <[email protected]>
  • Loading branch information
akihironitta and Borda authored Nov 6, 2020
1 parent ef34a17 commit bc01085
Show file tree
Hide file tree
Showing 24 changed files with 123 additions and 92 deletions.
11 changes: 5 additions & 6 deletions .github/workflows/ci_test-full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,11 @@ jobs:
pip list
shell: bash

#- name: Cache datasets
# uses: actions/cache@v2
# with:
# path: Datasets # This path is specific to Ubuntu
# # Look to see if there is a cache hit for the corresponding requirements file
# key: pl-datasets
- name: Cache datasets
uses: actions/cache@v2
with:
path: ./datasets
key: pl-datasets-${{ hashFiles('tests/conftest.py') }}

- name: Tests
run: |
Expand Down
3 changes: 1 addition & 2 deletions pl_bolts/models/mnist_module.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from argparse import ArgumentParser
from warnings import warn

Expand Down Expand Up @@ -70,7 +69,7 @@ def val_dataloader(self):
return loader

def test_dataloader(self):
test_dataset = MNIST(os.getcwd(), train=False, download=True, transform=transforms.ToTensor())
test_dataset = MNIST(self.hparams.data_dir, train=False, download=True, transform=transforms.ToTensor())
loader = DataLoader(test_dataset, batch_size=self.hparams.batch_size, num_workers=self.hparams.num_workers)
return loader

Expand Down
3 changes: 2 additions & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

from pytorch_lightning import seed_everything

TEST_ROOT = os.path.dirname(__file__)
TEST_ROOT = os.path.realpath(os.path.dirname(__file__))
PACKAGE_ROOT = os.path.dirname(TEST_ROOT)
DATASETS_PATH = os.path.join(PACKAGE_ROOT, 'datasets')
# generate a list of random seeds for each test
ROOT_SEED = 1234

Expand Down
2 changes: 1 addition & 1 deletion tests/callbacks/test_info_callbacks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pl_bolts.callbacks import PrintTableMetricsCallback


def test_printtable_metrics_callback(tmpdir):
def test_printtable_metrics_callback():
callback = PrintTableMetricsCallback()

metrics_a = {'loss': 1.0, 'epoch': 0}
Expand Down
2 changes: 1 addition & 1 deletion tests/callbacks/test_param_update_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pl_bolts.callbacks.byol_updates import BYOLMAWeightUpdate


def test_byol_ma_weight_update_callback(tmpdir):
def test_byol_ma_weight_update_callback():
a = nn.Linear(100, 10)
b = deepcopy(a)
a_original = deepcopy(a)
Expand Down
2 changes: 1 addition & 1 deletion tests/callbacks/test_variational_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pl_bolts.models.gans import GAN


def test_latent_dim_interpolator(tmpdir):
def test_latent_dim_interpolator():

class FakeTrainer(object):
def __init__(self):
Expand Down
15 changes: 15 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pathlib import Path

import pytest


# GitHub Actions use this path to cache datasets.
# Use `datadir` fixture where possible and use `DATASETS_PATH` in
# `pytest.mark.parametrize()` where you cannot use `datadir`.
# https://github.com/pytest-dev/pytest/issues/349
from tests import DATASETS_PATH


@pytest.fixture(scope="session")
def datadir():
return Path(DATASETS_PATH)
4 changes: 2 additions & 2 deletions tests/datamodules/test_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from pl_bolts.datasets.cifar10_dataset import CIFAR10


def test_async_dataloader(tmpdir):
ds = CIFAR10(tmpdir)
def test_async_dataloader(datadir):
ds = CIFAR10(data_dir=datadir)

if torch.cuda.device_count() > 0: # Can only run this test with a GPU
device = torch.device('cuda', 0)
Expand Down
11 changes: 6 additions & 5 deletions tests/datamodules/test_datamodules.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
from pl_bolts.datamodules import CityscapesDataModule


def test_dev_datasets(tmpdir):
ds = CIFAR10(tmpdir)
def test_dev_datasets(datadir):

ds = CIFAR10(data_dir=datadir)
for b in ds:
pass

Expand Down Expand Up @@ -35,14 +36,14 @@ def _create_synth_Cityscapes_dataset(path_dir):
fine_labels_dir / split / city / semantic_target_name)


def test_cityscapes_datamodule(tmpdir):
def test_cityscapes_datamodule(datadir):

_create_synth_Cityscapes_dataset(tmpdir)
_create_synth_Cityscapes_dataset(datadir)

batch_size = 1
target_types = ['semantic', 'instance']
for target_type in target_types:
dm = CityscapesDataModule(tmpdir,
dm = CityscapesDataModule(datadir,
num_workers=0,
batch_size=batch_size,
target_type=target_type)
Expand Down
2 changes: 1 addition & 1 deletion tests/datamodules/test_sklearn_dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
' install it with `pip install sklearn`.')


def test_dataloader(tmpdir):
def test_dataloader():
seed_everything()

X = np.random.rand(5, 2)
Expand Down
8 changes: 4 additions & 4 deletions tests/datasets/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,31 @@
from pl_bolts.datasets import DummyDataset, RandomDataset, RandomDictDataset, RandomDictStringDataset


def test_dummy_ds(tmpdir):
def test_dummy_ds():
ds = DummyDataset((1, 2), num_samples=100)
dl = DataLoader(ds)

for b in dl:
pass


def test_rand_ds(tmpdir):
def test_rand_ds():
ds = RandomDataset(32, num_samples=100)
dl = DataLoader(ds)

for b in dl:
pass


def test_rand_dict_ds(tmpdir):
def test_rand_dict_ds():
ds = RandomDictDataset(32, num_samples=100)
dl = DataLoader(ds)

for b in dl:
pass


def test_rand_str_dict_ds(tmpdir):
def test_rand_str_dict_ds():
ds = RandomDictStringDataset(32, num_samples=100)
dl = DataLoader(ds)

Expand Down
30 changes: 15 additions & 15 deletions tests/models/self_supervised/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@

# TODO: this test is hanging (runs for more then 10min) so we need to use GPU or optimize it...
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
def test_cpcv2(tmpdir):
def test_cpcv2(tmpdir, datadir):
seed_everything()

datamodule = CIFAR10DataModule(data_dir=tmpdir, num_workers=0, batch_size=2)
datamodule = CIFAR10DataModule(data_dir=datadir, num_workers=0, batch_size=2)
datamodule.train_transforms = CPCTrainTransformsCIFAR10()
datamodule.val_transforms = CPCEvalTransformsCIFAR10()

model = CPCV2(encoder='resnet18', data_dir=tmpdir, batch_size=2, online_ft=True, datamodule=datamodule)
model = CPCV2(encoder='resnet18', data_dir=datadir, batch_size=2, online_ft=True, datamodule=datamodule)
trainer = pl.Trainer(fast_dev_run=True, max_epochs=1, default_root_dir=tmpdir)
trainer.fit(model)
loss = trainer.progress_bar_dict['val_nce']
Expand All @@ -32,51 +32,51 @@ def test_cpcv2(tmpdir):

# TODO: this test is hanging (runs for more then 10min) so we need to use GPU or optimize it...
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
def test_byol(tmpdir):
def test_byol(tmpdir, datadir):
seed_everything()

datamodule = CIFAR10DataModule(data_dir=tmpdir, num_workers=0, batch_size=2)
datamodule = CIFAR10DataModule(data_dir=datadir, num_workers=0, batch_size=2)
datamodule.train_transforms = CPCTrainTransformsCIFAR10()
datamodule.val_transforms = CPCEvalTransformsCIFAR10()

model = BYOL(data_dir=tmpdir, num_classes=datamodule)
model = BYOL(data_dir=datadir, num_classes=datamodule)
trainer = pl.Trainer(fast_dev_run=True, max_epochs=1, default_root_dir=tmpdir, max_steps=2)
trainer.fit(model, datamodule)
loss = trainer.progress_bar_dict['loss']

assert float(loss) < 1.0


def test_amdim(tmpdir):
def test_amdim(tmpdir, datadir):
seed_everything()

model = AMDIM(data_dir=tmpdir, batch_size=2, online_ft=True, encoder='resnet18')
model = AMDIM(data_dir=datadir, batch_size=2, online_ft=True, encoder='resnet18')
trainer = pl.Trainer(fast_dev_run=True, max_epochs=1, default_root_dir=tmpdir)
trainer.fit(model)
loss = trainer.progress_bar_dict['loss']

assert float(loss) > 0


def test_moco(tmpdir):
def test_moco(tmpdir, datadir):
seed_everything()

datamodule = CIFAR10DataModule(tmpdir, num_workers=0, batch_size=2)
datamodule = CIFAR10DataModule(data_dir=datadir, num_workers=0, batch_size=2)
datamodule.train_transforms = Moco2TrainCIFAR10Transforms()
datamodule.val_transforms = Moco2EvalCIFAR10Transforms()

model = MocoV2(data_dir=tmpdir, batch_size=2, online_ft=True)
model = MocoV2(data_dir=datadir, batch_size=2, online_ft=True)
trainer = pl.Trainer(fast_dev_run=True, max_epochs=1, default_root_dir=tmpdir, callbacks=[MocoLRScheduler()])
trainer.fit(model, datamodule=datamodule)
loss = trainer.progress_bar_dict['loss']

assert float(loss) > 0


def test_simclr(tmpdir):
def test_simclr(tmpdir, datadir):
seed_everything()

datamodule = CIFAR10DataModule(tmpdir, num_workers=0, batch_size=2)
datamodule = CIFAR10DataModule(data_dir=datadir, num_workers=0, batch_size=2)
datamodule.train_transforms = SimCLRTrainDataTransform(32)
datamodule.val_transforms = SimCLREvalDataTransform(32)

Expand All @@ -88,14 +88,14 @@ def test_simclr(tmpdir):
assert float(loss) > 0


def test_swav(tmpdir):
def test_swav(tmpdir, datadir):
seed_everything()

batch_size = 2

# inputs, y = batch (doesn't receive y for some reason)
datamodule = CIFAR10DataModule(
data_dir=tmpdir,
data_dir=datadir,
batch_size=batch_size,
num_workers=0
)
Expand Down
6 changes: 3 additions & 3 deletions tests/models/self_supervised/test_resnets.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
)


def test_cpc_resnet(tmpdir):
def test_cpc_resnet():
x = torch.rand(3, 3, 64, 64)
model = cpc_resnet50(x)
model(x)
Expand All @@ -33,7 +33,7 @@ def test_cpc_resnet(tmpdir):
wide_resnet50_2,
wide_resnet101_2
])
def test_torchvision_resnets(tmpdir, model_class):
def test_torchvision_resnets(model_class):
x = torch.rand(3, 3, 64, 64)
model = model_class()
model(x)
Expand All @@ -44,7 +44,7 @@ def test_torchvision_resnets(tmpdir, model_class):
64,
128
])
def test_amdim_encoder(tmpdir, size):
def test_amdim_encoder(size):
dummy_batch = torch.zeros((2, 3, size, size))
model = AMDIMEncoder(dummy_batch, encoder_size=size)
model.init_weights()
Expand Down
26 changes: 19 additions & 7 deletions tests/models/self_supervised/test_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@
import pytest
import torch

from tests import DATASETS_PATH

@pytest.mark.parametrize('cli_args', ["--max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2"])

@pytest.mark.parametrize('cli_args', [
f"--data_dir {DATASETS_PATH} --max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2"
])
def test_cli_run_self_supervised_amdim(cli_args):
"""Test running CLI for an example with default params."""
from pl_bolts.models.self_supervised.amdim.amdim_module import cli_main
Expand All @@ -16,7 +20,9 @@ def test_cli_run_self_supervised_amdim(cli_args):

# TODO: this test is hanging (runs for more then 10min) so we need to use GPU or optimize it...
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
@pytest.mark.parametrize('cli_args', ['--max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --encoder resnet18'])
@pytest.mark.parametrize('cli_args', [
f'--data_dir {DATASETS_PATH} --max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --encoder resnet18'
])
def test_cli_run_self_supervised_cpc(cli_args):
"""Test running CLI for an example with default params."""
from pl_bolts.models.self_supervised.cpc.cpc_module import cli_main
Expand All @@ -26,7 +32,9 @@ def test_cli_run_self_supervised_cpc(cli_args):
cli_main()


@pytest.mark.parametrize('cli_args', ['--max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2'])
@pytest.mark.parametrize('cli_args', [
f'--data_dir {DATASETS_PATH} --max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2'
])
def test_cli_run_self_supervised_moco(cli_args):
"""Test running CLI for an example with default params."""
from pl_bolts.models.self_supervised.moco.moco2_module import cli_main
Expand All @@ -36,7 +44,9 @@ def test_cli_run_self_supervised_moco(cli_args):
cli_main()


@pytest.mark.parametrize('cli_args', ['--max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --online_ft'])
@pytest.mark.parametrize('cli_args', [
f'--data_dir {DATASETS_PATH} --max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --online_ft'
])
def test_cli_run_self_supervised_simclr(cli_args):
"""Test running CLI for an example with default params."""
from pl_bolts.models.self_supervised.simclr.simclr_module import cli_main
Expand All @@ -46,7 +56,9 @@ def test_cli_run_self_supervised_simclr(cli_args):
cli_main()


@pytest.mark.parametrize('cli_args', ['--max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --online_ft'])
@pytest.mark.parametrize('cli_args', [
f'--data_dir {DATASETS_PATH} --max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --online_ft'
])
def test_cli_run_self_supervised_byol(cli_args):
"""Test running CLI for an example with default params."""
from pl_bolts.models.self_supervised.byol.byol_module import cli_main
Expand All @@ -58,8 +70,8 @@ def test_cli_run_self_supervised_byol(cli_args):

@pytest.mark.parametrize(
'cli_args', [
'--max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2 --gpus 0 --arch resnet18'
' --hidden_mlp 512 --fp32 --sinkhorn_iterations 1 --nmb_prototypes 2 --dataset cifar10'
f'--dataset cifar10 --data_path {DATASETS_PATH} --max_epochs 1 --max_steps 3 --fast_dev_run --batch_size 2'
' --gpus 0 --arch resnet18 --hidden_mlp 512 --fp32 --sinkhorn_iterations 1 --nmb_prototypes 2'
]
)
def test_cli_run_self_supervised_swav(cli_args):
Expand Down
Loading

0 comments on commit bc01085

Please sign in to comment.