microsoft · calebrob6 · May 11, 2023 · Apr 15, 2023 · Apr 15, 2023 · Apr 16, 2023
diff --git a/docs/conf.py b/docs/conf.py
@@ -62,6 +62,7 @@
     ("py:class", "segmentation_models_pytorch.base.model.SegmentationModel"),
     ("py:class", "timm.models.resnet.ResNet"),
     ("py:class", "timm.models.vision_transformer.VisionTransformer"),
+    ("py:class", "torch.optim.lr_scheduler.LRScheduler"),
     ("py:class", "torchvision.models._api.WeightsEnum"),
     ("py:class", "torchvision.models.resnet.ResNet"),
 ]

diff --git a/environment.yml b/environment.yml
@@ -25,6 +25,7 @@ dependencies:
     - isort[colors]>=5.8
     - kornia>=0.6.5
     - laspy>=2
+    - lightly>=1.2.4
     - lightning>=1.8
     - mypy>=0.900
     - nbmake>=1.3.3

diff --git a/pyproject.toml b/pyproject.toml
@@ -96,6 +96,8 @@ filterwarnings = [
     "ignore:ANTIALIAS is deprecated and will be removed in Pillow 10:DeprecationWarning:tensorboardX.summary",
     # https://github.com/Lightning-AI/lightning/issues/16756
     "ignore:Deprecated call to `pkg_resources.declare_namespace:DeprecationWarning",
+    # https://github.com/lightly-ai/lightly/issues/1152
+    "ignore:Using active learning via the lightly package is deprecated and will be removed soon.:DeprecationWarning:lightly.active_learning",
 
     # Expected warnings
     # Lightning warns us about using num_workers=0, but it's faster on macOS

diff --git a/requirements/min-reqs.old b/requirements/min-reqs.old
@@ -5,6 +5,7 @@ setuptools==42.0.0
 einops==0.3.0
 fiona==1.8.19
 kornia==0.6.5
+lightly==1.2.4
 lightning==1.8.0
 matplotlib==3.3.3
 numpy==1.19.3

diff --git a/requirements/required.txt b/requirements/required.txt
@@ -6,6 +6,7 @@ einops==0.6.1
 fiona==1.9.3
 kornia==0.6.12
 lightning==2.0.1.post0
+lightly==1.4.2
 matplotlib==3.7.1
 numpy==1.24.2
 pillow==9.5.0

diff --git a/setup.cfg b/setup.cfg
@@ -30,6 +30,8 @@ install_requires =
     fiona>=1.8.19,<2
     # kornia 0.6.5+ required due to change in kornia.augmentation API
     kornia>=0.6.5,<0.7
+    # lightly 1.2.4+ required for gather_distributed parameter of NTXentLoss
+    lightly>=1.2.4
     # lightning 1.8+ is first release
     lightning>=1.8,<3
     # matplotlib 3.3.3+ required for Python 3.9 wheels

diff --git a/tests/conf/chesapeake_cvpr_prior_byol.yaml b/tests/conf/chesapeake_cvpr_prior_byol.yaml
@@ -3,7 +3,7 @@ experiment:
   module:
     loss: "ce"
     model: "unet"
-    backbone: "resnet50"
+    backbone: "resnet18"
     learning_rate: 1e-3
     learning_rate_schedule_patience: 6
     in_channels: 4
@@ -13,7 +13,7 @@ experiment:
     weights: null
   datamodule:
     root: "tests/data/chesapeake/cvpr"
-    download: true
+    download: false
     train_splits:
     - "de-test"
     val_splits:

diff --git a/tests/conf/chesapeake_cvpr_prior_simclr.yaml b/tests/conf/chesapeake_cvpr_prior_simclr.yaml
@@ -0,0 +1,22 @@
+experiment:
+  task: "chesapeake_cvpr"
+  module:
+    model: "resnet18"
+    in_channels: 4
+    version: 1
+    layers: 2
+    memory_bank_size: 0
+  datamodule:
+    root: "tests/data/chesapeake/cvpr"
+    download: false
+    train_splits:
+    - "de-test"
+    val_splits:
+    - "de-test"
+    test_splits:
+    - "de-test"
+    batch_size: 2
+    patch_size: 64
+    num_workers: 0
+    class_set: 5
+    use_prior_labels: True
diff --git a/tests/conf/seco_simclr_1.yaml b/tests/conf/seco_simclr_1.yaml
@@ -0,0 +1,16 @@
+experiment:
+  task: "seco"
+  module:
+    model: "resnet18"
+    in_channels: 3
+    version: 1
+    layers: 2
+    hidden_dim: 8
+    output_dim: 8
+    weight_decay: 1e-6
+    memory_bank_size: 0
+  datamodule:
+    root: "tests/data/seco"
+    seasons: 1
+    batch_size: 2
+    num_workers: 0
diff --git a/tests/conf/seco_simclr_2.yaml b/tests/conf/seco_simclr_2.yaml
@@ -0,0 +1,16 @@
+experiment:
+  task: "seco"
+  module:
+    model: "resnet18"
+    in_channels: 3
+    version: 2
+    layers: 4
+    hidden_dim: 8
+    output_dim: 8
+    weight_decay: 1e-4
+    memory_bank_size: 10
+  datamodule:
+    root: "tests/data/seco"
+    seasons: 2
+    batch_size: 2
+    num_workers: 0
diff --git a/tests/conf/ssl4eo_s12_simclr_1.yaml b/tests/conf/ssl4eo_s12_simclr_1.yaml
@@ -0,0 +1,16 @@
+experiment:
+  task: "ssl4eo_s12"
+  module:
+    model: "resnet18"
+    in_channels: 13
+    version: 1
+    layers: 2
+    hidden_dim: 8
+    output_dim: 8
+    weight_decay: 1e-6
+    memory_bank_size: 0
+  datamodule:
+    root: "tests/data/ssl4eo/s12"
+    seasons: 1
+    batch_size: 2
+    num_workers: 0
diff --git a/tests/conf/ssl4eo_s12_simclr_2.yaml b/tests/conf/ssl4eo_s12_simclr_2.yaml
@@ -0,0 +1,16 @@
+experiment:
+  task: "ssl4eo_s12"
+  module:
+    model: "resnet18"
+    in_channels: 13
+    version: 2
+    layers: 3
+    hidden_dim: 8
+    output_dim: 8
+    weight_decay: 1e-4
+    memory_bank_size: 10
+  datamodule:
+    root: "tests/data/ssl4eo/s12"
+    seasons: 2
+    batch_size: 2
+    num_workers: 0
diff --git a/tests/trainers/test_classification.py b/tests/trainers/test_classification.py
@@ -38,7 +38,8 @@ def __init__(
         super().__init__()
         self.conv1 = nn.Conv2d(in_channels=in_chans, out_channels=1, kernel_size=1)
         self.pool = nn.AdaptiveAvgPool2d((1, 1))
-        self.fc = nn.Linear(1, num_classes)
+        self.fc = nn.Linear(1, num_classes) if num_classes else nn.Identity()
+        self.num_features = 1
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         x = self.conv1(x)

diff --git a/tests/trainers/test_simclr.py b/tests/trainers/test_simclr.py
@@ -0,0 +1,166 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+from pathlib import Path
+from typing import Any, cast
+
+import pytest
+import timm
+import torch
+import torchvision
+from _pytest.fixtures import SubRequest
+from _pytest.monkeypatch import MonkeyPatch
+from lightning.pytorch import LightningDataModule, Trainer
+from omegaconf import OmegaConf
+from torch.nn import Module
+from torchvision.models._api import WeightsEnum
+
+from torchgeo.datamodules import (
+    ChesapeakeCVPRDataModule,
+    SeasonalContrastS2DataModule,
+    SSL4EOS12DataModule,
+)
+from torchgeo.datasets import SSL4EOS12, SeasonalContrastS2
+from torchgeo.models import get_model_weights, list_models
+from torchgeo.trainers import SimCLRTask
+
+from .test_classification import ClassificationTestModel
+
+
+def create_model(*args: Any, **kwargs: Any) -> Module:
+    return ClassificationTestModel(**kwargs)
+
+
+def load(url: str, *args: Any, **kwargs: Any) -> dict[str, Any]:
+    state_dict: dict[str, Any] = torch.load(url)
+    return state_dict
+
+
+class TestSimCLRTask:
+    @pytest.mark.parametrize(
+        "name,classname",
+        [
+            ("chesapeake_cvpr_prior_simclr", ChesapeakeCVPRDataModule),
+            ("seco_simclr_1", SeasonalContrastS2DataModule),
+            ("seco_simclr_2", SeasonalContrastS2DataModule),
+            ("ssl4eo_s12_simclr_1", SSL4EOS12DataModule),
+            ("ssl4eo_s12_simclr_2", SSL4EOS12DataModule),
+        ],
+    )
+    def test_trainer(
+        self,
+        monkeypatch: MonkeyPatch,
+        name: str,
+        classname: type[LightningDataModule],
+        fast_dev_run: bool,
+    ) -> None:
+        conf = OmegaConf.load(os.path.join("tests", "conf", name + ".yaml"))
+        conf_dict = OmegaConf.to_object(conf.experiment)
+        conf_dict = cast(dict[str, dict[str, Any]], conf_dict)
+
+        if name.startswith("seco"):
+            monkeypatch.setattr(SeasonalContrastS2, "__len__", lambda self: 2)
+
+        if name.startswith("ssl4eo_s12"):
+            monkeypatch.setattr(SSL4EOS12, "__len__", lambda self: 2)
+
+        # Instantiate datamodule
+        datamodule_kwargs = conf_dict["datamodule"]
+        datamodule = classname(**datamodule_kwargs)
+
+        # Instantiate model
+        monkeypatch.setattr(timm, "create_model", create_model)
+        model_kwargs = conf_dict["module"]
+        model = SimCLRTask(**model_kwargs)
+
+        # Instantiate trainer
+        trainer = Trainer(
+            accelerator="cpu",
+            fast_dev_run=fast_dev_run,
+            log_every_n_steps=1,
+            max_epochs=1,
+        )
+        trainer.fit(model=model, datamodule=datamodule)
+
+    def test_version_warnings(self) -> None:
+        with pytest.warns(UserWarning, match="SimCLR v1 only uses 2 layers"):
+            SimCLRTask(version=1, layers=3)
+        with pytest.warns(UserWarning, match="SimCLR v1 does not use a memory bank"):
+            SimCLRTask(version=1, memory_bank_size=10)
+        with pytest.warns(UserWarning, match=r"SimCLR v2 uses 3\+ layers"):
+            SimCLRTask(version=2, layers=2)
+        with pytest.warns(UserWarning, match="SimCLR v2 uses a memory bank"):
+            SimCLRTask(version=2, memory_bank_size=0)
+
+    @pytest.fixture(
+        params=[
+            weights for model in list_models() for weights in get_model_weights(model)
+        ]
+    )
+    def weights(self, request: SubRequest) -> WeightsEnum:
+        return request.param
+
+    @pytest.fixture
+    def mocked_weights(
+        self, tmp_path: Path, monkeypatch: MonkeyPatch, weights: WeightsEnum
+    ) -> WeightsEnum:
+        path = tmp_path / f"{weights}.pth"
+        model = timm.create_model(
+            weights.meta["model"], in_chans=weights.meta["in_chans"]
+        )
+        torch.save(model.state_dict(), path)
+        try:
+            monkeypatch.setattr(weights.value, "url", str(path))
+        except AttributeError:
+            monkeypatch.setattr(weights, "url", str(path))
+        monkeypatch.setattr(torchvision.models._api, "load_state_dict_from_url", load)
+        return weights
+
+    def test_weight_file(self, checkpoint: str) -> None:
+        model_kwargs: dict[str, Any] = {"model": "resnet18", "weights": checkpoint}
+        match = "num classes .* != num classes in pretrained model"
+        with pytest.warns(UserWarning, match=match):
+            SimCLRTask(**model_kwargs)
+
+    def test_weight_enum(self, mocked_weights: WeightsEnum) -> None:
+        model_kwargs: dict[str, Any] = {
+            "model": mocked_weights.meta["model"],
+            "weights": mocked_weights,
+            "in_channels": mocked_weights.meta["in_chans"],
+        }
+        match = "num classes .* != num classes in pretrained model"
+        with pytest.warns(UserWarning, match=match):
+            SimCLRTask(**model_kwargs)
+
+    def test_weight_str(self, mocked_weights: WeightsEnum) -> None:
+        model_kwargs: dict[str, Any] = {
+            "model": mocked_weights.meta["model"],
+            "weights": str(mocked_weights),
+            "in_channels": mocked_weights.meta["in_chans"],
+        }
+        match = "num classes .* != num classes in pretrained model"
+        with pytest.warns(UserWarning, match=match):
+            SimCLRTask(**model_kwargs)
+
+    @pytest.mark.slow
+    def test_weight_enum_download(self, weights: WeightsEnum) -> None:
+        model_kwargs: dict[str, Any] = {
+            "model": weights.meta["model"],
+            "weights": weights,
+            "in_channels": weights.meta["in_chans"],
+        }
+        match = "num classes .* != num classes in pretrained model"
+        with pytest.warns(UserWarning, match=match):
+            SimCLRTask(**model_kwargs)
+
+    @pytest.mark.slow
+    def test_weight_str_download(self, weights: WeightsEnum) -> None:
+        model_kwargs: dict[str, Any] = {
+            "model": weights.meta["model"],
+            "weights": str(weights),
+            "in_channels": weights.meta["in_chans"],
+        }
+        match = "num classes .* != num classes in pretrained model"
+        with pytest.warns(UserWarning, match=match):
+            SimCLRTask(**model_kwargs)
diff --git a/torchgeo/trainers/__init__.py b/torchgeo/trainers/__init__.py
@@ -8,6 +8,7 @@
 from .detection import ObjectDetectionTask
 from .regression import RegressionTask
 from .segmentation import SemanticSegmentationTask
+from .simclr import SimCLRTask
 
 __all__ = (
     "BYOLTask",
@@ -16,4 +17,5 @@
     "ObjectDetectionTask",
     "RegressionTask",
     "SemanticSegmentationTask",
+    "SimCLRTask",
 )