From eef7ba4e76a423472e5d41a570ba21ed151925ce Mon Sep 17 00:00:00 2001
From: "g.bobrovskih" <g.bobrovskih@an01.zhores>
Date: Wed, 19 Aug 2020 21:00:35 +0300
Subject: [PATCH 01/12] segmentation and depth ds

---
 0                                             |   0
 configs/meta_arch/depth-segmentator.yaml      |   6 +
 sharpf/modeling/meta_arch/depth_regressor.py  |   2 +
 .../modeling/meta_arch/depth_segmentator.py   | 130 ++++++++++++++++++
 sharpf/utils/abc_utils/hdf5/dataset.py        | 114 +++++++++++++++
 5 files changed, 252 insertions(+)
 create mode 100644 0
 create mode 100644 configs/meta_arch/depth-segmentator.yaml
 create mode 100644 sharpf/modeling/meta_arch/depth_segmentator.py

diff --git a/0 b/0
new file mode 100644
index 00000000..e69de29b
diff --git a/configs/meta_arch/depth-segmentator.yaml b/configs/meta_arch/depth-segmentator.yaml
new file mode 100644
index 00000000..9d3d16f9
--- /dev/null
+++ b/configs/meta_arch/depth-segmentator.yaml
@@ -0,0 +1,6 @@
+# @package _group_
+#
+# target: sharpf.modeling.DepthSegmentator
+# monitor: val_balanced_accuracy #TODO: add balanced accuracy metric class
+# loss:
+#   target: torch.nn.BCEWithLogitsLoss
diff --git a/sharpf/modeling/meta_arch/depth_regressor.py b/sharpf/modeling/meta_arch/depth_regressor.py
index 9db43875..80c1c644 100644
--- a/sharpf/modeling/meta_arch/depth_regressor.py
+++ b/sharpf/modeling/meta_arch/depth_regressor.py
@@ -30,6 +30,7 @@ def __init__(self, cfg):
         super().__init__()
         self.hparams = flatten_omegaconf(cfg)  # there should be better official way later
         self.cfg = cfg
+        self.task = 'regression'
         self.model = build_model(cfg.model)
         self.example_input_array = torch.rand(1, 1, 64, 64)
         self.data_dir = hydra.utils.to_absolute_path(self.cfg.data.data_dir)
@@ -102,6 +103,7 @@ def _get_dataset(self, partition):
             io=DepthMapIO,
             data_label=self.cfg.data.data_label,
             target_label=self.cfg.data.target_label,
+            task=self.task,
             partition=partition,
             transform=transform,
             max_loaded_files=self.cfg.data.max_loaded_files
diff --git a/sharpf/modeling/meta_arch/depth_segmentator.py b/sharpf/modeling/meta_arch/depth_segmentator.py
new file mode 100644
index 00000000..42a8d30e
--- /dev/null
+++ b/sharpf/modeling/meta_arch/depth_segmentator.py
@@ -0,0 +1,130 @@
+import logging
+
+import hydra
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from pytorch_lightning import TrainResult
+from pytorch_lightning.core.lightning import LightningModule
+from pytorch_lightning.metrics import tensor_metric
+from torch.utils.data import DataLoader
+
+from sharpf.utils.comm import get_batch_size
+from sharpf.utils.losses import balanced_accuracy
+from ..model.build import build_model
+from ...data import DepthMapIO
+from ...utils.abc_utils import LotsOfHdf5Files, DepthDataset
+from ...utils.abc_utils.torch import CompositeTransform
+from ...utils.config import flatten_omegaconf
+
+log = logging.getLogger(__name__)
+
+
+@tensor_metric()
+def gather_sum(x: torch.Tensor) -> torch.Tensor:
+    return x
+
+
+class DepthSegmentator(LightningModule):
+
+    def __init__(self, cfg):
+        super().__init__()
+        self.hparams = flatten_omegaconf(cfg)  # there should be better official way later
+        self.cfg = cfg
+        self.task = 'segmentation'
+        self.model = build_model(cfg.model)
+        self.example_input_array = torch.rand(1, 1, 64, 64)
+        self.data_dir = hydra.utils.to_absolute_path(self.cfg.data.data_dir)
+
+        dist_backend = self.cfg.trainer.distributed_backend
+        if (dist_backend is not None and 'ddp' in dist_backend) or (
+                dist_backend is None and self.cfg.trainer.gpus is not None and (
+                self.cfg.trainer.gpus > 1 or self.cfg.trainer.num_nodes > 1)):
+            log.info('Converting BatchNorm to SyncBatchNorm. Do not forget other batch-dimension dependent operations.')
+            self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
+
+    def forward(self, x):
+        return self.model(x)
+
+    def training_step(self, batch, batch_idx):
+        points, distances = batch['image'], batch['distances']
+        points = points.unsqueeze(1) if points.dim() == 3 else points
+        preds = self.forward(points)
+        loss = hydra.utils.instantiate(self.cfg.meta_arch.loss, preds, distances)
+        result = TrainResult(minimize=loss)
+        result.log('train_loss', loss, prog_bar=True)
+        return result
+
+    def _shared_eval_step(self, batch, batch_idx, prefix):
+        points, distances = batch['image'], batch['distances']
+        points = points.unsqueeze(1) if points.dim() == 3 else points
+        preds = self.forward(points)
+
+        metric = balanced_accuracy(preds, distances)  # (batch)
+        # loss = hydra.utils.instantiate(self.cfg.meta_arch.loss, preds, distances)
+        # self.logger[0].experiment.add_scalars('losses', {f'{prefix}_loss': loss})
+        # TODO Consider pl.EvalResult, once there are good examples how to use it
+        return {'balanced_accuracy_sum': metric.sum(),
+                'batch_size': torch.tensor(points.size(0), device=self.device)}
+
+    def _shared_eval_epoch_end(self, outputs, prefix):
+        rmse_sum = 0
+        size = 0
+        for output in outputs:
+            rmse_sum += output['rmse_sum']
+            size += output['batch_size']
+        mean_rmse = gather_sum(rmse_sum) / gather_sum(size)
+        logs = {f'{prefix}_mean_rmse': mean_rmse}
+        return {f'{prefix}_mean_rmse': mean_rmse, 'log': logs}
+
+    def validation_step(self, batch, batch_idx):
+        return self._shared_eval_step(batch, batch_idx, prefix='val')
+
+    def test_step(self, batch, batch_idx):
+        return self._shared_eval_step(batch, batch_idx, prefix='test')
+
+    def validation_epoch_end(self, outputs):
+        return self._shared_eval_epoch_end(outputs, prefix='val')
+
+    def test_epoch_end(self, outputs):
+        return self._shared_eval_epoch_end(outputs, prefix='test')
+
+    def configure_optimizers(self):
+        optimizer = hydra.utils.instantiate(self.cfg.opt, params=self.parameters())
+        scheduler = hydra.utils.instantiate(self.cfg.scheduler, optimizer=optimizer)
+        return [optimizer], [scheduler]
+
+    def _get_dataset(self, partition):
+        if hasattr(self, f'{partition}_set') and getattr(self, f'{partition}_set') is not None:
+            return getattr(self, f'{partition}_set')
+        transform = CompositeTransform([hydra.utils.instantiate(tf) for tf in self.cfg.transforms[partition]])
+        return DepthDataset(
+            data_dir=self.data_dir,
+            io=DepthMapIO,
+            data_label=self.cfg.data.data_label,
+            target_label=self.cfg.data.target_label,
+            task=self.task,
+            partition=partition,
+            transform=transform,
+            max_loaded_files=self.cfg.data.max_loaded_files
+        )
+
+    def _get_dataloader(self, partition):
+        dataset = self._get_dataset(partition)
+        num_workers = self.cfg.data_loader[partition].num_workers
+        batch_size = get_batch_size(self.cfg.data_loader[partition].total_batch_size)
+        return DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=True)
+
+    def setup(self, stage: str):
+        self.train_set = self._get_dataset('train') if stage == 'fit' else None
+        self.val_set = self._get_dataset('val') if stage == 'fit' else None
+        self.test_set = self._get_dataset('test') if stage == 'test' else None
+
+    def train_dataloader(self):
+        return self._get_dataloader('train')
+
+    def val_dataloader(self):
+        return self._get_dataloader('val')
+
+    def test_dataloader(self):
+        return self._get_dataloader('val')  # FIXME
diff --git a/sharpf/utils/abc_utils/hdf5/dataset.py b/sharpf/utils/abc_utils/hdf5/dataset.py
index b6569ef8..e09015d4 100644
--- a/sharpf/utils/abc_utils/hdf5/dataset.py
+++ b/sharpf/utils/abc_utils/hdf5/dataset.py
@@ -133,3 +133,117 @@ def __getitem__(self, index):
             file_index_to_unload = np.random.choice(loaded_file_indexes)
             self.files[file_index_to_unload].unload()
         return item
+
+class DepthDataset(LotsOfHdf5Files):
+
+    def __init__(self, io, data_dir, data_label, target_label, task, partition=None,
+                 transform=None, max_loaded_files=0):
+        super().__init__(data_dir=data_dir, io=io,
+                         data_label=data_label, target_label=target_label,
+                         labels=None,
+                         partition=partition,
+                         transform=transform,
+                         max_loaded_files=max_loaded_files)
+        self.task = task
+        self.quality = self._get_quantity()
+
+        if None is not partition:
+            data_dir = os.path.join(data_dir, partition)
+        filenames = glob.glob(os.path.join(data_dir, '*.hdf5'))
+        self.files = []
+
+        for i, filename in enumerate(filenames):
+            print('preload', True * bool(i > max_loaded_files))
+            self.files.append(Hdf5File(filename, data_label, target_label,
+                                       transform=transform, preload=bool(i < max_loaded_files)))
+
+        self.cum_num_items = np.cumsum([len(f) for f in self.files])
+        self.current_file_idx = 0
+        self.max_loaded_files = max_loaded_files
+        self.loaded_file_indexes = 0
+        self.previous_loaded_id = 0
+
+    def __len__(self):
+        if len(self.cum_num_items) > 0:
+            return self.cum_num_items[-1]
+        return 0
+
+    def _get_quantity(self):
+        data_dir_split = self.data_dir.split('_')
+        if 'high' in data_dir_split:
+            return 'high'
+        elif 'low' in data_dir_split:
+            return 'low'
+        elif 'med' in data_dir_split:
+            return 'med'
+
+    def quantile_normalize(self, data):
+        # mask -> min shift -> quantile
+
+        norm_data = np.copy(data)
+        mask_obj = np.where(norm_data != 0)
+        mask_back = np.where(norm_data == 0)
+        norm_data[mask_back] = norm_data.max() + 1.0  # new line
+        norm_data -= norm_data[mask_obj].min()
+
+        norm_data /= high_res_quantile
+
+        return norm_data
+
+    def standartize(self, data):
+        # zero mean, unit variance
+
+        standart_data = np.copy(data)
+        standart_data -= np.mean(standart_data)
+        std = np.linalg.norm(standart_data, axis=1).max()
+        if std > 0:
+            standart_data /= std
+
+        return standart_data
+
+    def _getdata(self, index):
+
+        file_index = np.searchsorted(self.cum_num_items, index, side='right')
+        relative_index = index - self.cum_num_items[file_index] if file_index > 0 else index
+
+        data, target = self.files[file_index][relative_index]
+        if file_index != self.previous_loaded_id:
+            self.loaded_file_indexes += 1
+
+        # loaded_file_indexes = [i for i, f in enumerate(self.files) if f.is_loaded()]
+        if self.loaded_file_indexes > self.max_loaded_files:
+            print('unloading {}'.format(self.previous_loaded_id))
+            self.files[self.previous_loaded_id].unload()
+            self.loaded_file_indexes -= 1
+        self.previous_loaded_id = file_index
+
+        return data, target
+
+    def __getitem__(self, index):
+
+        data, target = self._getdata(index)
+        mask_1 = (np.copy(data) != 0.0).astype(float)  # mask for object
+        mask_2 = np.where(data == 0)  # mask for background
+
+        data = self.quantile_normalize(data)
+        data = self.standartize(data)
+
+        dist_new = np.copy(target)
+        dist_mask = dist_new * mask_1  # select object points
+        dist_mask[mask_2] = 1.0  # background points has max distance to sharp features
+        close_to_sharp = np.array((dist_mask != np.nan) & (dist_mask < 1.)).astype(float)
+
+        if self.task == 'two-heads':
+            # regression + segmentation (or two-head network) has to targets:
+            # distance field and segmented close-to-sharp region of the object
+            target = torch.cat(
+                [torch.FloatTensor(dist_mask).unsqueeze(0), torch.FloatTensor(close_to_sharp).unsqueeze(0)], dim=0)
+        if self.task == 'segmentation':
+            target = torch.FloatTensor(close_to_sharp)
+        elif self.task == 'regression':
+            target = torch.FloatTensor(dist_mask)
+
+        data = torch.FloatTensor(data).unsqueeze(0)
+        data = torch.cat([data, data, data], dim=0)
+
+        return data, target

From bec69a891e9ce639c74c1a44ded31060a8c2f1b3 Mon Sep 17 00:00:00 2001
From: "g.bobrovskih" <g.bobrovskih@an01.zhores>
Date: Thu, 20 Aug 2020 14:51:40 +0300
Subject: [PATCH 02/12] corrected depth ds inheritance, metrics in segmentation
 model

---
 configs/model/unet-segmentator.yaml           | 19 ++++++++++++++
 configs/transforms/depth-norm.yaml            | 15 +++++++++++
 .../modeling/meta_arch/depth_segmentator.py   | 25 +++++++++++++------
 3 files changed, 51 insertions(+), 8 deletions(-)
 create mode 100644 configs/model/unet-segmentator.yaml
 create mode 100644 configs/transforms/depth-norm.yaml

diff --git a/configs/model/unet-segmentator.yaml b/configs/model/unet-segmentator.yaml
new file mode 100644
index 00000000..be4c3666
--- /dev/null
+++ b/configs/model/unet-segmentator.yaml
@@ -0,0 +1,19 @@
+# @package _group_
+model_name: PixelSegmentator
+params:
+  feature_extractor:
+    target: sharpf.modeling.Unet
+    params:
+      encoder_name: resnet50
+      decoder_use_batchnorm: true
+      decoder_channels: [256, 128, 64, 32, 16]
+      decoder_attention_type: null
+      in_channels: 1
+  segmentation_head:
+    - target: torch.nn.Conv2d
+      params:
+        in_channels: 16
+        out_channels: 1
+        kernel_size: 3
+        padding: 1
+    - target: torch.nn.Sigmoid
\ No newline at end of file
diff --git a/configs/transforms/depth-norm.yaml b/configs/transforms/depth-norm.yaml
new file mode 100644
index 00000000..7b5b6165
--- /dev/null
+++ b/configs/transforms/depth-norm.yaml
@@ -0,0 +1,15 @@
+# @package _group_
+
+train:
+  - target: sharpf.utils.abc_utils.torch.TypeCast
+
+val:
+  - target: sharpf.utils.abc_utils.torch.TypeCast
+
+test:
+  - target: sharpf.utils.abc_utils.torch.TypeCast
+
+normalisation:
+  - data: ['standartize', 'quantile']
+
+
diff --git a/sharpf/modeling/meta_arch/depth_segmentator.py b/sharpf/modeling/meta_arch/depth_segmentator.py
index 42a8d30e..adff0af6 100644
--- a/sharpf/modeling/meta_arch/depth_segmentator.py
+++ b/sharpf/modeling/meta_arch/depth_segmentator.py
@@ -56,26 +56,29 @@ def training_step(self, batch, batch_idx):
         return result
 
     def _shared_eval_step(self, batch, batch_idx, prefix):
+        metric_name = 'balanced_accuracy'
+        metric = balanced_accuracy
         points, distances = batch['image'], batch['distances']
         points = points.unsqueeze(1) if points.dim() == 3 else points
         preds = self.forward(points)
 
-        metric = balanced_accuracy(preds, distances)  # (batch)
+        metric_value = metric(preds, distances)  # (batch)
         # loss = hydra.utils.instantiate(self.cfg.meta_arch.loss, preds, distances)
         # self.logger[0].experiment.add_scalars('losses', {f'{prefix}_loss': loss})
         # TODO Consider pl.EvalResult, once there are good examples how to use it
-        return {'balanced_accuracy_sum': metric.sum(),
+        return {f'{metric_name}_sum': metric_value.sum(),
                 'batch_size': torch.tensor(points.size(0), device=self.device)}
 
     def _shared_eval_epoch_end(self, outputs, prefix):
-        rmse_sum = 0
+        metric_name = 'balanced_accuracy'
+        metric_sum = 0
         size = 0
         for output in outputs:
-            rmse_sum += output['rmse_sum']
+            metric_sum += output[f'{metric_name}_sum']
             size += output['batch_size']
-        mean_rmse = gather_sum(rmse_sum) / gather_sum(size)
-        logs = {f'{prefix}_mean_rmse': mean_rmse}
-        return {f'{prefix}_mean_rmse': mean_rmse, 'log': logs}
+        mean_metric = gather_sum(metric_sum) / gather_sum(size)
+        logs = {f'{prefix}_mean_{metric_name}': mean_metric}
+        return {f'{prefix}_mean_{metric_name}': mean_metric, 'log': logs}
 
     def validation_step(self, batch, batch_idx):
         return self._shared_eval_step(batch, batch_idx, prefix='val')
@@ -98,6 +101,11 @@ def _get_dataset(self, partition):
         if hasattr(self, f'{partition}_set') and getattr(self, f'{partition}_set') is not None:
             return getattr(self, f'{partition}_set')
         transform = CompositeTransform([hydra.utils.instantiate(tf) for tf in self.cfg.transforms[partition]])
+        if 'normalisation' in self.cfg.transforms.keys:
+            normalisation = self.cfg.transforms['normalisation']
+        else:
+            normalisation = None
+            
         return DepthDataset(
             data_dir=self.data_dir,
             io=DepthMapIO,
@@ -106,7 +114,8 @@ def _get_dataset(self, partition):
             task=self.task,
             partition=partition,
             transform=transform,
-            max_loaded_files=self.cfg.data.max_loaded_files
+            max_loaded_files=self.cfg.data.max_loaded_files,
+            normalisation=normalisation
         )
 
     def _get_dataloader(self, partition):

From bba6ff1da7c421da0416bddfd751227e20612493 Mon Sep 17 00:00:00 2001
From: "g.bobrovskih" <g.bobrovskih@an01.zhores>
Date: Thu, 20 Aug 2020 15:10:24 +0300
Subject: [PATCH 03/12] corrected depth ds inheritance

---
 .../modeling/meta_arch/depth_segmentator.py   |  2 +-
 sharpf/utils/abc_utils/hdf5/dataset.py        | 56 +++----------------
 2 files changed, 10 insertions(+), 48 deletions(-)

diff --git a/sharpf/modeling/meta_arch/depth_segmentator.py b/sharpf/modeling/meta_arch/depth_segmentator.py
index adff0af6..f5e733dc 100644
--- a/sharpf/modeling/meta_arch/depth_segmentator.py
+++ b/sharpf/modeling/meta_arch/depth_segmentator.py
@@ -105,7 +105,7 @@ def _get_dataset(self, partition):
             normalisation = self.cfg.transforms['normalisation']
         else:
             normalisation = None
-            
+
         return DepthDataset(
             data_dir=self.data_dir,
             io=DepthMapIO,
diff --git a/sharpf/utils/abc_utils/hdf5/dataset.py b/sharpf/utils/abc_utils/hdf5/dataset.py
index e09015d4..d4063555 100644
--- a/sharpf/utils/abc_utils/hdf5/dataset.py
+++ b/sharpf/utils/abc_utils/hdf5/dataset.py
@@ -137,7 +137,7 @@ def __getitem__(self, index):
 class DepthDataset(LotsOfHdf5Files):
 
     def __init__(self, io, data_dir, data_label, target_label, task, partition=None,
-                 transform=None, max_loaded_files=0):
+                 transform=None, normalisation=['quantile', 'standartize'], max_loaded_files=0):
         super().__init__(data_dir=data_dir, io=io,
                          data_label=data_label, target_label=target_label,
                          labels=None,
@@ -146,27 +146,7 @@ def __init__(self, io, data_dir, data_label, target_label, task, partition=None,
                          max_loaded_files=max_loaded_files)
         self.task = task
         self.quality = self._get_quantity()
-
-        if None is not partition:
-            data_dir = os.path.join(data_dir, partition)
-        filenames = glob.glob(os.path.join(data_dir, '*.hdf5'))
-        self.files = []
-
-        for i, filename in enumerate(filenames):
-            print('preload', True * bool(i > max_loaded_files))
-            self.files.append(Hdf5File(filename, data_label, target_label,
-                                       transform=transform, preload=bool(i < max_loaded_files)))
-
-        self.cum_num_items = np.cumsum([len(f) for f in self.files])
-        self.current_file_idx = 0
-        self.max_loaded_files = max_loaded_files
-        self.loaded_file_indexes = 0
-        self.previous_loaded_id = 0
-
-    def __len__(self):
-        if len(self.cum_num_items) > 0:
-            return self.cum_num_items[-1]
-        return 0
+        self.normalisation = normalisation
 
     def _get_quantity(self):
         data_dir_split = self.data_dir.split('_')
@@ -201,32 +181,16 @@ def standartize(self, data):
 
         return standart_data
 
-    def _getdata(self, index):
-
-        file_index = np.searchsorted(self.cum_num_items, index, side='right')
-        relative_index = index - self.cum_num_items[file_index] if file_index > 0 else index
-
-        data, target = self.files[file_index][relative_index]
-        if file_index != self.previous_loaded_id:
-            self.loaded_file_indexes += 1
-
-        # loaded_file_indexes = [i for i, f in enumerate(self.files) if f.is_loaded()]
-        if self.loaded_file_indexes > self.max_loaded_files:
-            print('unloading {}'.format(self.previous_loaded_id))
-            self.files[self.previous_loaded_id].unload()
-            self.loaded_file_indexes -= 1
-        self.previous_loaded_id = file_index
-
-        return data, target
-
     def __getitem__(self, index):
 
-        data, target = self._getdata(index)
+        data, target = super.__getitem__(index)
         mask_1 = (np.copy(data) != 0.0).astype(float)  # mask for object
         mask_2 = np.where(data == 0)  # mask for background
 
-        data = self.quantile_normalize(data)
-        data = self.standartize(data)
+        if 'quantile' in self.normalisation:
+            data = self.quantile_normalize(data)
+        if 'standartize' in self.normalisation:
+            data = self.standartize(data)
 
         dist_new = np.copy(target)
         dist_mask = dist_new * mask_1  # select object points
@@ -236,14 +200,12 @@ def __getitem__(self, index):
         if self.task == 'two-heads':
             # regression + segmentation (or two-head network) has to targets:
             # distance field and segmented close-to-sharp region of the object
-            target = torch.cat(
-                [torch.FloatTensor(dist_mask).unsqueeze(0), torch.FloatTensor(close_to_sharp).unsqueeze(0)], dim=0)
+            target = torch.cat([torch.FloatTensor(dist_mask).unsqueeze(0), torch.FloatTensor(close_to_sharp).unsqueeze(0)], dim=0)
         if self.task == 'segmentation':
             target = torch.FloatTensor(close_to_sharp)
         elif self.task == 'regression':
             target = torch.FloatTensor(dist_mask)
 
         data = torch.FloatTensor(data).unsqueeze(0)
-        data = torch.cat([data, data, data], dim=0)
 
-        return data, target
+        return {'data': data, 'target': target}

From 5cb47c9722fffcb7d5375c8c527184d31119acc9 Mon Sep 17 00:00:00 2001
From: "g.bobrovskih" <g.bobrovskih@an01.zhores>
Date: Fri, 21 Aug 2020 13:36:25 +0300
Subject: [PATCH 04/12] added balanced accuracy and ~after run~ fixes

---
 configs/meta_arch/depth-segmentator.yaml      | 10 ++++-----
 sharpf/modeling/meta_arch/depth_regressor.py  |  4 ++--
 .../modeling/meta_arch/depth_segmentator.py   |  4 +++-
 sharpf/modeling/model/pixel_regressor.py      | 22 +++++++++++++++++++
 sharpf/utils/abc_utils/hdf5/__init__.py       |  2 +-
 sharpf/utils/abc_utils/hdf5/dataset.py        | 11 ++++++----
 train_net.py                                  |  4 +++-
 7 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/configs/meta_arch/depth-segmentator.yaml b/configs/meta_arch/depth-segmentator.yaml
index 9d3d16f9..769fa231 100644
--- a/configs/meta_arch/depth-segmentator.yaml
+++ b/configs/meta_arch/depth-segmentator.yaml
@@ -1,6 +1,6 @@
 # @package _group_
-#
-# target: sharpf.modeling.DepthSegmentator
-# monitor: val_balanced_accuracy #TODO: add balanced accuracy metric class
-# loss:
-#   target: torch.nn.BCEWithLogitsLoss
+
+target: sharpf.modeling.DepthSegmentator
+monitor: val_balanced_accuracy #TODO: add balanced accuracy metric class
+loss:
+   target: torch.nn.functional.binary_cross_entropy_with_logits
diff --git a/sharpf/modeling/meta_arch/depth_regressor.py b/sharpf/modeling/meta_arch/depth_regressor.py
index 80c1c644..f8f24cb6 100644
--- a/sharpf/modeling/meta_arch/depth_regressor.py
+++ b/sharpf/modeling/meta_arch/depth_regressor.py
@@ -12,7 +12,7 @@
 from sharpf.utils.comm import get_batch_size
 from ..model.build import build_model
 from ...data import DepthMapIO
-from ...utils.abc_utils import LotsOfHdf5Files
+from ...utils.abc_utils.hdf5.dataset import LotsOfHdf5Files, DepthDataset
 from ...utils.abc_utils.torch import CompositeTransform
 from ...utils.config import flatten_omegaconf
 
@@ -98,7 +98,7 @@ def _get_dataset(self, partition):
         if hasattr(self, f'{partition}_set') and getattr(self, f'{partition}_set') is not None:
             return getattr(self, f'{partition}_set')
         transform = CompositeTransform([hydra.utils.instantiate(tf) for tf in self.cfg.transforms[partition]])
-        return LotsOfHdf5Files(
+        return DepthDataset(
             data_dir=self.data_dir,
             io=DepthMapIO,
             data_label=self.cfg.data.data_label,
diff --git a/sharpf/modeling/meta_arch/depth_segmentator.py b/sharpf/modeling/meta_arch/depth_segmentator.py
index f5e733dc..772563a2 100644
--- a/sharpf/modeling/meta_arch/depth_segmentator.py
+++ b/sharpf/modeling/meta_arch/depth_segmentator.py
@@ -13,7 +13,7 @@
 from sharpf.utils.losses import balanced_accuracy
 from ..model.build import build_model
 from ...data import DepthMapIO
-from ...utils.abc_utils import LotsOfHdf5Files, DepthDataset
+from ...utils.abc_utils.hdf5 import DepthDataset
 from ...utils.abc_utils.torch import CompositeTransform
 from ...utils.config import flatten_omegaconf
 
@@ -47,9 +47,11 @@ def forward(self, x):
         return self.model(x)
 
     def training_step(self, batch, batch_idx):
+        print('training step')
         points, distances = batch['image'], batch['distances']
         points = points.unsqueeze(1) if points.dim() == 3 else points
         preds = self.forward(points)
+        print(preds.shape, distances.shape)
         loss = hydra.utils.instantiate(self.cfg.meta_arch.loss, preds, distances)
         result = TrainResult(minimize=loss)
         result.log('train_loss', loss, prog_bar=True)
diff --git a/sharpf/modeling/model/pixel_regressor.py b/sharpf/modeling/model/pixel_regressor.py
index 269e72ef..6de62f41 100644
--- a/sharpf/modeling/model/pixel_regressor.py
+++ b/sharpf/modeling/model/pixel_regressor.py
@@ -27,3 +27,25 @@ def from_config(cls, cfg: DictConfig):
             "feature_extractor": hydra.utils.instantiate(cfg.feature_extractor),
             "regression_head": nn.Sequential(*[hydra.utils.instantiate(node) for node in cfg.regression_head])
         }
+
+# still believe that single class or abstract class for pixel-task model would be better
+@MODEL_REGISTRY.register()
+class PixelSegmentator(nn.Module):
+    @configurable
+    def __init__(self, feature_extractor, segmentation_head):
+        super().__init__()
+        self.feature_extractor = feature_extractor
+        self.segmentation_head = segmentation_head
+
+    def initialize(self):
+        initialize_head(self.segmentation_head)
+
+    def forward(self, x):
+        return self.segmentation_head(self.feature_extractor(x))
+
+    @classmethod
+    def from_config(cls, cfg: DictConfig):
+        return {
+            "feature_extractor": hydra.utils.instantiate(cfg.feature_extractor),
+            "segmentation_head": nn.Sequential(*[hydra.utils.instantiate(node) for node in cfg.segmentation_head])
+        }
diff --git a/sharpf/utils/abc_utils/hdf5/__init__.py b/sharpf/utils/abc_utils/hdf5/__init__.py
index 7d90035e..02555e6f 100644
--- a/sharpf/utils/abc_utils/hdf5/__init__.py
+++ b/sharpf/utils/abc_utils/hdf5/__init__.py
@@ -1 +1 @@
-from .dataset import Hdf5File, LotsOfHdf5Files
+from .dataset import Hdf5File, LotsOfHdf5Files, DepthDataset
diff --git a/sharpf/utils/abc_utils/hdf5/dataset.py b/sharpf/utils/abc_utils/hdf5/dataset.py
index d4063555..df3d0e01 100644
--- a/sharpf/utils/abc_utils/hdf5/dataset.py
+++ b/sharpf/utils/abc_utils/hdf5/dataset.py
@@ -11,6 +11,7 @@
 
 log = logging.getLogger(__name__)
 
+high_res_quantile = 7.4776
 
 class Hdf5File(Dataset):
     def __init__(self, filename, io, data_label=None, target_label=None, labels=None, preload=True,
@@ -144,6 +145,7 @@ def __init__(self, io, data_dir, data_label, target_label, task, partition=None,
                          partition=partition,
                          transform=transform,
                          max_loaded_files=max_loaded_files)
+        self.data_dir = data_dir
         self.task = task
         self.quality = self._get_quantity()
         self.normalisation = normalisation
@@ -183,7 +185,8 @@ def standartize(self, data):
 
     def __getitem__(self, index):
 
-        data, target = super.__getitem__(index)
+        item = super().__getitem__(index)
+        data, target = item['image'], item['distances']
         mask_1 = (np.copy(data) != 0.0).astype(float)  # mask for object
         mask_2 = np.where(data == 0)  # mask for background
 
@@ -202,10 +205,10 @@ def __getitem__(self, index):
             # distance field and segmented close-to-sharp region of the object
             target = torch.cat([torch.FloatTensor(dist_mask).unsqueeze(0), torch.FloatTensor(close_to_sharp).unsqueeze(0)], dim=0)
         if self.task == 'segmentation':
-            target = torch.FloatTensor(close_to_sharp)
+            target = torch.FloatTensor(close_to_sharp).unsqueeze(0)
         elif self.task == 'regression':
-            target = torch.FloatTensor(dist_mask)
+            target = torch.FloatTensor(dist_mask).unsqueeze(0)
 
         data = torch.FloatTensor(data).unsqueeze(0)
 
-        return {'data': data, 'target': target}
+        return {'image': data, 'distances': target}
diff --git a/train_net.py b/train_net.py
index 1e2ea637..1aa3f93b 100644
--- a/train_net.py
+++ b/train_net.py
@@ -14,6 +14,8 @@
 
 from sharpf.utils.callbacks import FitDurationCallback
 from sharpf.utils.collect_env import collect_env_info
+from sharpf.modeling.meta_arch.depth_regressor import DepthRegressor
+from sharpf.modeling.meta_arch.depth_segmentator import DepthSegmentator
 
 from configs import trainer, optimizer, scheduler
 
@@ -36,7 +38,7 @@ def main(cfg: DictConfig):
     log.info(f"Original working directory: {hydra.utils.get_original_cwd()}")
     seed_everything(cfg.seed)
 
-    model = instantiate(cfg.meta_arch, cfg=cfg)
+    model = DepthRegressor(cfg)
     if cfg.weights is not None:
         model.load_state_dict(torch.load(cfg.weights)['state_dict'])
 

From 92b454b7d3f0e5b86755fe2915bfa88969b28853 Mon Sep 17 00:00:00 2001
From: "g.bobrovskih" <g.bobrovskih@an01.zhores>
Date: Fri, 21 Aug 2020 15:35:30 +0300
Subject: [PATCH 05/12] added balanced accuracy for real

---
 sharpf/utils/losses.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 sharpf/utils/losses.py

diff --git a/sharpf/utils/losses.py b/sharpf/utils/losses.py
new file mode 100644
index 00000000..7b4c38e6
--- /dev/null
+++ b/sharpf/utils/losses.py
@@ -0,0 +1,12 @@
+import torch
+import numpy as np
+
+def balanced_accuracy(y_pred, y_true):
+    print('balanced accuracy print', y_pred.shape, y_true.shape)
+    tpr = np.sum((y_pred[y_pred == 1] == y_true[y_true == 1]).float(), axis=1)
+    tnr = np.sum((y_pred[y_pred == 0] == y_true[y_true == 1]).float(), axis=1)
+    tpr /= (tpr + np.sum((y_pred[y_pred == 0] == y_true[y_true == 1]).float(), axis=1))
+    tnr /= (tnr + np.sum((y_pred[y_pred == 1] == y_true[y_true == 0]).float().sum(), axis=1))
+    acc = (tpr + tnr) / 2
+    print(acc.shape)
+    return acc
\ No newline at end of file

From a75cb7436df5a51241c72e6bace03ea1fe330e54 Mon Sep 17 00:00:00 2001
From: "g.bobrovskih" <g.bobrovskih@an01.zhores>
Date: Fri, 21 Aug 2020 16:42:11 +0300
Subject: [PATCH 06/12] small fixes

---
 configs/config.yaml                            | 4 ++--
 sharpf/modeling/meta_arch/depth_segmentator.py | 2 --
 sharpf/utils/abc_utils/hdf5/dataset.py         | 8 +++++++-
 train_net.py                                   | 4 +---
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/configs/config.yaml b/configs/config.yaml
index 0f304cca..638d1b8e 100644
--- a/configs/config.yaml
+++ b/configs/config.yaml
@@ -12,8 +12,8 @@ data_loader:
     total_batch_size: 16
     num_workers: 2
   val:
-    total_batch_size: 16
-    num_workers: 2
+    total_batch_size: 4
+    num_workers: 0
   test:
     total_batch_size: 16
     num_workers: 2
diff --git a/sharpf/modeling/meta_arch/depth_segmentator.py b/sharpf/modeling/meta_arch/depth_segmentator.py
index 772563a2..f0f0082e 100644
--- a/sharpf/modeling/meta_arch/depth_segmentator.py
+++ b/sharpf/modeling/meta_arch/depth_segmentator.py
@@ -47,11 +47,9 @@ def forward(self, x):
         return self.model(x)
 
     def training_step(self, batch, batch_idx):
-        print('training step')
         points, distances = batch['image'], batch['distances']
         points = points.unsqueeze(1) if points.dim() == 3 else points
         preds = self.forward(points)
-        print(preds.shape, distances.shape)
         loss = hydra.utils.instantiate(self.cfg.meta_arch.loss, preds, distances)
         result = TrainResult(minimize=loss)
         result.log('train_loss', loss, prog_bar=True)
diff --git a/sharpf/utils/abc_utils/hdf5/dataset.py b/sharpf/utils/abc_utils/hdf5/dataset.py
index df3d0e01..c3da7287 100644
--- a/sharpf/utils/abc_utils/hdf5/dataset.py
+++ b/sharpf/utils/abc_utils/hdf5/dataset.py
@@ -200,15 +200,21 @@ def __getitem__(self, index):
         dist_mask[mask_2] = 1.0  # background points has max distance to sharp features
         close_to_sharp = np.array((dist_mask != np.nan) & (dist_mask < 1.)).astype(float)
 
+        output = {}
+
         if self.task == 'two-heads':
             # regression + segmentation (or two-head network) has to targets:
             # distance field and segmented close-to-sharp region of the object
             target = torch.cat([torch.FloatTensor(dist_mask).unsqueeze(0), torch.FloatTensor(close_to_sharp).unsqueeze(0)], dim=0)
+            output['distance_and_close_to_sharp'] = target
         if self.task == 'segmentation':
             target = torch.FloatTensor(close_to_sharp).unsqueeze(0)
+            output['close_to_sharp_mask] = target
         elif self.task == 'regression':
             target = torch.FloatTensor(dist_mask).unsqueeze(0)
+            output['distance_to_sharp'] = target
 
         data = torch.FloatTensor(data).unsqueeze(0)
+        output['image'] = data
 
-        return {'image': data, 'distances': target}
+        return output
diff --git a/train_net.py b/train_net.py
index 1aa3f93b..1e2ea637 100644
--- a/train_net.py
+++ b/train_net.py
@@ -14,8 +14,6 @@
 
 from sharpf.utils.callbacks import FitDurationCallback
 from sharpf.utils.collect_env import collect_env_info
-from sharpf.modeling.meta_arch.depth_regressor import DepthRegressor
-from sharpf.modeling.meta_arch.depth_segmentator import DepthSegmentator
 
 from configs import trainer, optimizer, scheduler
 
@@ -38,7 +36,7 @@ def main(cfg: DictConfig):
     log.info(f"Original working directory: {hydra.utils.get_original_cwd()}")
     seed_everything(cfg.seed)
 
-    model = DepthRegressor(cfg)
+    model = instantiate(cfg.meta_arch, cfg=cfg)
     if cfg.weights is not None:
         model.load_state_dict(torch.load(cfg.weights)['state_dict'])
 

From c87b3c563675dea6f7781a8827caea375fbd1dff Mon Sep 17 00:00:00 2001
From: "g.bobrovskih" <g.bobrovskih@an01.zhores>
Date: Sat, 22 Aug 2020 13:13:28 +0300
Subject: [PATCH 07/12] changed balanced accuracy computation, changed dict
 keys in depth ds, fixed cfgs

---
 configs/config.yaml                           |  5 --
 configs/meta_arch/depth-segmentator.yaml      |  7 +-
 configs/model/unet-segmentator.yaml           |  6 +-
 configs/transforms/depth-norm.yaml            |  6 +-
 experiments/my_exp/.hydra/config.yaml         | 52 +++++++--------
 sharpf/modeling/__init__.py                   |  2 +-
 sharpf/modeling/meta_arch/__init__.py         |  1 +
 sharpf/modeling/meta_arch/depth_regressor.py  | 22 ++-----
 .../modeling/meta_arch/depth_segmentator.py   | 64 +++++++++++--------
 sharpf/utils/abc_utils/hdf5/dataset.py        |  2 +-
 10 files changed, 79 insertions(+), 88 deletions(-)

diff --git a/configs/config.yaml b/configs/config.yaml
index 1aff5f84..b06ac839 100644
--- a/configs/config.yaml
+++ b/configs/config.yaml
@@ -12,13 +12,8 @@ data_loader:
     total_batch_size: 16
     num_workers: 0  # depending on RAM capacity set this value carefully
   val:
-<<<<<<< HEAD
-    total_batch_size: 4
-    num_workers: 0
-=======
     total_batch_size: 16
     num_workers: 0  # depending on RAM capacity set this value carefully
->>>>>>> pl_hydra
   test:
     total_batch_size: 16
     num_workers: 0  # depending on RAM capacity set this value carefully
diff --git a/configs/meta_arch/depth-segmentator.yaml b/configs/meta_arch/depth-segmentator.yaml
index 769fa231..53d3e914 100644
--- a/configs/meta_arch/depth-segmentator.yaml
+++ b/configs/meta_arch/depth-segmentator.yaml
@@ -1,6 +1,7 @@
 # @package _group_
 
-target: sharpf.modeling.DepthSegmentator
-monitor: val_balanced_accuracy #TODO: add balanced accuracy metric class
+pl_class:
+  _target_: sharpf.modeling.DepthSegmentator
+monitor: val_balanced_accuracy 
 loss:
-   target: torch.nn.functional.binary_cross_entropy_with_logits
+  _target_: torch.nn.functional.binary_cross_entropy_with_logits
diff --git a/configs/model/unet-segmentator.yaml b/configs/model/unet-segmentator.yaml
index be4c3666..90d383eb 100644
--- a/configs/model/unet-segmentator.yaml
+++ b/configs/model/unet-segmentator.yaml
@@ -2,7 +2,7 @@
 model_name: PixelSegmentator
 params:
   feature_extractor:
-    target: sharpf.modeling.Unet
+    _target_: sharpf.modeling.Unet
     params:
       encoder_name: resnet50
       decoder_use_batchnorm: true
@@ -10,10 +10,10 @@ params:
       decoder_attention_type: null
       in_channels: 1
   segmentation_head:
-    - target: torch.nn.Conv2d
+    - _target_: torch.nn.Conv2d
       params:
         in_channels: 16
         out_channels: 1
         kernel_size: 3
         padding: 1
-    - target: torch.nn.Sigmoid
\ No newline at end of file
+    - _target_: torch.nn.Sigmoid
\ No newline at end of file
diff --git a/configs/transforms/depth-norm.yaml b/configs/transforms/depth-norm.yaml
index 7b5b6165..09ee240b 100644
--- a/configs/transforms/depth-norm.yaml
+++ b/configs/transforms/depth-norm.yaml
@@ -1,13 +1,13 @@
 # @package _group_
 
 train:
-  - target: sharpf.utils.abc_utils.torch.TypeCast
+  - _target_: sharpf.utils.abc_utils.torch.TypeCast
 
 val:
-  - target: sharpf.utils.abc_utils.torch.TypeCast
+  - _target_: sharpf.utils.abc_utils.torch.TypeCast
 
 test:
-  - target: sharpf.utils.abc_utils.torch.TypeCast
+  - _target_: sharpf.utils.abc_utils.torch.TypeCast
 
 normalisation:
   - data: ['standartize', 'quantile']
diff --git a/experiments/my_exp/.hydra/config.yaml b/experiments/my_exp/.hydra/config.yaml
index 31d24f3c..17ca7ed5 100644
--- a/experiments/my_exp/.hydra/config.yaml
+++ b/experiments/my_exp/.hydra/config.yaml
@@ -1,13 +1,13 @@
 data_loader:
   train:
     total_batch_size: 16
-    num_workers: 2
+    num_workers: 0
   val:
-    total_batch_size: 4
+    total_batch_size: 16
     num_workers: 0
   test:
     total_batch_size: 16
-    num_workers: 2
+    num_workers: 0
 eval_only: false
 weights: null
 seed: 123
@@ -22,7 +22,7 @@ model:
   model_name: PixelSegmentator
   params:
     feature_extractor:
-      target: sharpf.modeling.Unet
+      _target_: sharpf.modeling.Unet
       params:
         encoder_name: resnet50
         decoder_use_batchnorm: true
@@ -35,29 +35,30 @@ model:
         decoder_attention_type: null
         in_channels: 1
     segmentation_head:
-    - target: torch.nn.Conv2d
+    - _target_: torch.nn.Conv2d
       params:
         in_channels: 16
         out_channels: 1
         kernel_size: 3
         padding: 1
-    - target: torch.nn.Sigmoid
+    - _target_: torch.nn.Sigmoid
 transforms:
   train:
-  - target: sharpf.utils.abc_utils.torch.TypeCast
+  - _target_: sharpf.utils.abc_utils.torch.TypeCast
   val:
-  - target: sharpf.utils.abc_utils.torch.TypeCast
+  - _target_: sharpf.utils.abc_utils.torch.TypeCast
   test:
-  - target: sharpf.utils.abc_utils.torch.TypeCast
+  - _target_: sharpf.utils.abc_utils.torch.TypeCast
   normalisation:
   - data:
     - standartize
     - quantile
 meta_arch:
-  target: sharpf.modeling.DepthSegmentator
+  pl_class:
+    _target_: sharpf.modeling.DepthSegmentator
   monitor: val_balanced_accuracy
   loss:
-    target: torch.nn.functional.binary_cross_entropy_with_logits
+    _target_: torch.nn.functional.binary_cross_entropy_with_logits
 trainer:
   default_root_dir: null
   gradient_clip_val: 0.0
@@ -85,6 +86,7 @@ trainer:
   log_save_interval: 100
   row_log_interval: 50
   distributed_backend: null
+  sync_batchnorm: false
   precision: 32
   weights_summary: top
   weights_save_path: null
@@ -99,20 +101,18 @@ trainer:
   terminate_on_nan: false
   auto_scale_batch_size: false
   prepare_data_per_node: true
-  amp_level: O1
+  amp_backend: native
 scheduler:
-  target: torch.optim.lr_scheduler.StepLR
-  params:
-    step_size: 20
-    gamma: 0.1
-    last_epoch: -1
+  _target_: torch.optim.lr_scheduler.StepLR
+  step_size: 20
+  gamma: 0.1
+  last_epoch: -1
 opt:
-  target: torch.optim.Adam
-  params:
-    betas:
-    - 0.9
-    - 0.999
-    lr: 0.001
-    eps: 1.0e-08
-    weight_decay: 0.0
-    amsgrad: false
+  _target_: torch.optim.Adam
+  betas:
+  - 0.9
+  - 0.999
+  lr: 0.001
+  eps: 1.0e-08
+  weight_decay: 0.0
+  amsgrad: false
diff --git a/sharpf/modeling/__init__.py b/sharpf/modeling/__init__.py
index 22232382..c48eda09 100644
--- a/sharpf/modeling/__init__.py
+++ b/sharpf/modeling/__init__.py
@@ -1,4 +1,4 @@
-from .meta_arch import PointSharpnessRegressor, DepthRegressor
+from .meta_arch import PointSharpnessRegressor, DepthRegressor, DepthSegmentator
 from .model import MODEL_REGISTRY, build_model, DGCNN, Unet, PixelRegressor
 from .modules import (
     AggregationMax,
diff --git a/sharpf/modeling/meta_arch/__init__.py b/sharpf/modeling/meta_arch/__init__.py
index 7840ba94..1aac6294 100644
--- a/sharpf/modeling/meta_arch/__init__.py
+++ b/sharpf/modeling/meta_arch/__init__.py
@@ -1,2 +1,3 @@
 from .point_sharpness_regressor import PointSharpnessRegressor
 from .depth_regressor import DepthRegressor
+from .depth_segmentator import DepthSegmentator
diff --git a/sharpf/modeling/meta_arch/depth_regressor.py b/sharpf/modeling/meta_arch/depth_regressor.py
index 1c7205b2..79af283c 100644
--- a/sharpf/modeling/meta_arch/depth_regressor.py
+++ b/sharpf/modeling/meta_arch/depth_regressor.py
@@ -27,15 +27,9 @@ class DepthRegressor(LightningModule):
 
     def __init__(self, cfg):
         super().__init__()
-<<<<<<< HEAD
-        self.hparams = flatten_omegaconf(cfg)  # there should be better official way later
-        self.cfg = cfg
+        self.hparams = cfg  # there should be better official way later
         self.task = 'regression'
-        self.model = build_model(cfg.model)
-=======
-        self.hparams = cfg
         self.model = build_model(self.hparams.model)
->>>>>>> pl_hydra
         self.example_input_array = torch.rand(1, 1, 64, 64)
         self.data_dir = hydra.utils.to_absolute_path(self.hparams.data.data_dir)
 
@@ -50,7 +44,7 @@ def forward(self, x):
         return self.model(x)
 
     def training_step(self, batch, batch_idx):
-        points, distances = batch['image'], batch['distances']
+        points, distances = batch['image'], batch['distance_to_sharp']
         points = points.unsqueeze(1) if points.dim() == 3 else points
         preds = self.forward(points)
         loss = hydra.utils.instantiate(self.hparams.meta_arch.loss, preds, distances)
@@ -59,7 +53,7 @@ def training_step(self, batch, batch_idx):
         return result
 
     def _shared_eval_step(self, batch, batch_idx, prefix):
-        points, distances = batch['image'], batch['distances']
+        points, distances = batch['image'], batch['distance_to_sharp']
         points = points.unsqueeze(1) if points.dim() == 3 else points
         preds = self.forward(points)
 
@@ -101,7 +95,7 @@ def configure_optimizers(self):
     def _get_dataset(self, partition):
         if hasattr(self, f'{partition}_set') and getattr(self, f'{partition}_set') is not None:
             return getattr(self, f'{partition}_set')
-<<<<<<< HEAD
+
         transform = CompositeTransform([hydra.utils.instantiate(tf) for tf in self.cfg.transforms[partition]])
         return DepthDataset(
             data_dir=self.data_dir,
@@ -109,14 +103,6 @@ def _get_dataset(self, partition):
             data_label=self.cfg.data.data_label,
             target_label=self.cfg.data.target_label,
             task=self.task,
-=======
-        transform = CompositeTransform([hydra.utils.instantiate(tf) for tf in self.hparams.transforms[partition]])
-        return LotsOfHdf5Files(
-            data_dir=self.data_dir,
-            io=DepthMapIO,
-            data_label=self.hparams.data.data_label,
-            target_label=self.hparams.data.target_label,
->>>>>>> pl_hydra
             partition=partition,
             transform=transform,
             max_loaded_files=self.hparams.data.max_loaded_files
diff --git a/sharpf/modeling/meta_arch/depth_segmentator.py b/sharpf/modeling/meta_arch/depth_segmentator.py
index f0f0082e..ec6becb4 100644
--- a/sharpf/modeling/meta_arch/depth_segmentator.py
+++ b/sharpf/modeling/meta_arch/depth_segmentator.py
@@ -7,15 +7,15 @@
 from pytorch_lightning import TrainResult
 from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.metrics import tensor_metric
+from pytorch_lightning.metrics.sklearns import BalancedAccuracy
 from torch.utils.data import DataLoader
 
 from sharpf.utils.comm import get_batch_size
-from sharpf.utils.losses import balanced_accuracy
+from pytorch_lightning.metrics.functional import stat_scores
 from ..model.build import build_model
 from ...data import DepthMapIO
 from ...utils.abc_utils.hdf5 import DepthDataset
 from ...utils.abc_utils.torch import CompositeTransform
-from ...utils.config import flatten_omegaconf
 
 log = logging.getLogger(__name__)
 
@@ -29,17 +29,16 @@ class DepthSegmentator(LightningModule):
 
     def __init__(self, cfg):
         super().__init__()
-        self.hparams = flatten_omegaconf(cfg)  # there should be better official way later
-        self.cfg = cfg
+        self.hparams = cfg
         self.task = 'segmentation'
-        self.model = build_model(cfg.model)
+        self.model = build_model(self.hparams.model)
         self.example_input_array = torch.rand(1, 1, 64, 64)
-        self.data_dir = hydra.utils.to_absolute_path(self.cfg.data.data_dir)
+        self.data_dir = hydra.utils.to_absolute_path(self.hparams.data.data_dir)
 
-        dist_backend = self.cfg.trainer.distributed_backend
+        dist_backend = self.hparams.trainer.distributed_backend
         if (dist_backend is not None and 'ddp' in dist_backend) or (
-                dist_backend is None and self.cfg.trainer.gpus is not None and (
-                self.cfg.trainer.gpus > 1 or self.cfg.trainer.num_nodes > 1)):
+                dist_backend is None and self.hparams.trainer.gpus is not None and (
+                self.hparams.trainer.gpus > 1 or self.hparams.trainer.num_nodes > 1)):
             log.info('Converting BatchNorm to SyncBatchNorm. Do not forget other batch-dimension dependent operations.')
             self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
 
@@ -47,36 +46,45 @@ def forward(self, x):
         return self.model(x)
 
     def training_step(self, batch, batch_idx):
-        points, distances = batch['image'], batch['distances']
+        points, target = batch['image'], batch['close_to_sharp_mask']
         points = points.unsqueeze(1) if points.dim() == 3 else points
         preds = self.forward(points)
-        loss = hydra.utils.instantiate(self.cfg.meta_arch.loss, preds, distances)
+        loss = hydra.utils.instantiate(self.hparams.meta_arch.loss, preds, target)
         result = TrainResult(minimize=loss)
         result.log('train_loss', loss, prog_bar=True)
         return result
 
     def _shared_eval_step(self, batch, batch_idx, prefix):
-        metric_name = 'balanced_accuracy'
-        metric = balanced_accuracy
-        points, distances = batch['image'], batch['distances']
+        stat_names = ['tpr', 'tnr']
+        points, target = batch['image'], batch['close_to_sharp_mask']
         points = points.unsqueeze(1) if points.dim() == 3 else points
         preds = self.forward(points)
 
-        metric_value = metric(preds, distances)  # (batch)
+        statistics = torch.Tensor(list(map(lambda a: stat_scores(a[0], a[1], class_index=1),
+                                           [preds, target]))).T.to(preds.device) # return: tp, fp, tn, fn, sup
+                                                                # dim: (5, batch)
+
+        tpr = statistics[0] / (statistics[0] + statistics[-2])  # 1(pred=1| true=1) / (1(pred=1| true=1) + 1(pred=0|true=1))
+        tnr = statistics[2] / (statistics[2] + statistics[1])  # 1(pred=0| true=0) / (1(pred=0| true=0) + 1(pred=1|true=0))
+
         # loss = hydra.utils.instantiate(self.cfg.meta_arch.loss, preds, distances)
         # self.logger[0].experiment.add_scalars('losses', {f'{prefix}_loss': loss})
         # TODO Consider pl.EvalResult, once there are good examples how to use it
-        return {f'{metric_name}_sum': metric_value.sum(),
+        return {f'{stat_names[0]}': tpr,
+                f'{stat_names[1]}': tnr,
                 'batch_size': torch.tensor(points.size(0), device=self.device)}
 
     def _shared_eval_epoch_end(self, outputs, prefix):
-        metric_name = 'balanced_accuracy'
+        stat_names = ['tpr', 'tnr']
         metric_sum = 0
         size = 0
         for output in outputs:
-            metric_sum += output[f'{metric_name}_sum']
+            metric_sum += sum((output[f'{stat_names[0]}'] + output[f'{stat_names[1]}']).double() / 2)
             size += output['batch_size']
+
+        metric_name = 'balanced_accuracy'
         mean_metric = gather_sum(metric_sum) / gather_sum(size)
+
         logs = {f'{prefix}_mean_{metric_name}': mean_metric}
         return {f'{prefix}_mean_{metric_name}': mean_metric, 'log': logs}
 
@@ -93,35 +101,35 @@ def test_epoch_end(self, outputs):
         return self._shared_eval_epoch_end(outputs, prefix='test')
 
     def configure_optimizers(self):
-        optimizer = hydra.utils.instantiate(self.cfg.opt, params=self.parameters())
-        scheduler = hydra.utils.instantiate(self.cfg.scheduler, optimizer=optimizer)
+        optimizer = hydra.utils.instantiate(self.hparams.opt, params=self.parameters())
+        scheduler = hydra.utils.instantiate(self.hparams.scheduler, optimizer=optimizer)
         return [optimizer], [scheduler]
 
     def _get_dataset(self, partition):
         if hasattr(self, f'{partition}_set') and getattr(self, f'{partition}_set') is not None:
             return getattr(self, f'{partition}_set')
-        transform = CompositeTransform([hydra.utils.instantiate(tf) for tf in self.cfg.transforms[partition]])
-        if 'normalisation' in self.cfg.transforms.keys:
-            normalisation = self.cfg.transforms['normalisation']
+        transform = CompositeTransform([hydra.utils.instantiate(tf) for tf in self.hparams.transforms[partition]])
+        if 'normalisation' in self.hparams.transforms.keys():
+            normalisation = self.hparams.transforms['normalisation']
         else:
             normalisation = None
 
         return DepthDataset(
             data_dir=self.data_dir,
             io=DepthMapIO,
-            data_label=self.cfg.data.data_label,
-            target_label=self.cfg.data.target_label,
+            data_label=self.hparams.data.data_label,
+            target_label=self.hparams.data.target_label,
             task=self.task,
             partition=partition,
             transform=transform,
-            max_loaded_files=self.cfg.data.max_loaded_files,
+            max_loaded_files=self.hparams.data.max_loaded_files,
             normalisation=normalisation
         )
 
     def _get_dataloader(self, partition):
         dataset = self._get_dataset(partition)
-        num_workers = self.cfg.data_loader[partition].num_workers
-        batch_size = get_batch_size(self.cfg.data_loader[partition].total_batch_size)
+        num_workers = self.hparams.data_loader[partition].num_workers
+        batch_size = get_batch_size(self.hparams.data_loader[partition].total_batch_size)
         return DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=True)
 
     def setup(self, stage: str):
diff --git a/sharpf/utils/abc_utils/hdf5/dataset.py b/sharpf/utils/abc_utils/hdf5/dataset.py
index c3da7287..d96e228c 100644
--- a/sharpf/utils/abc_utils/hdf5/dataset.py
+++ b/sharpf/utils/abc_utils/hdf5/dataset.py
@@ -209,7 +209,7 @@ def __getitem__(self, index):
             output['distance_and_close_to_sharp'] = target
         if self.task == 'segmentation':
             target = torch.FloatTensor(close_to_sharp).unsqueeze(0)
-            output['close_to_sharp_mask] = target
+            output['close_to_sharp_mask'] = target
         elif self.task == 'regression':
             target = torch.FloatTensor(dist_mask).unsqueeze(0)
             output['distance_to_sharp'] = target

From 07c1a1e5ffd294623b14c4f2f0fc982832ed903e Mon Sep 17 00:00:00 2001
From: rakhimovv <risonyo@gmail.com>
Date: Tue, 25 Aug 2020 00:05:43 +0300
Subject: [PATCH 08/12] delete garbage

---
 0                                        |   0
 experiments/my_exp/.hydra/config.yaml    | 118 --------------------
 experiments/my_exp/.hydra/hydra.yaml     | 132 -----------------------
 experiments/my_exp/.hydra/overrides.yaml |   6 --
 4 files changed, 256 deletions(-)
 delete mode 100644 0
 delete mode 100644 experiments/my_exp/.hydra/config.yaml
 delete mode 100644 experiments/my_exp/.hydra/hydra.yaml
 delete mode 100644 experiments/my_exp/.hydra/overrides.yaml

diff --git a/0 b/0
deleted file mode 100644
index e69de29b..00000000
diff --git a/experiments/my_exp/.hydra/config.yaml b/experiments/my_exp/.hydra/config.yaml
deleted file mode 100644
index 17ca7ed5..00000000
--- a/experiments/my_exp/.hydra/config.yaml
+++ /dev/null
@@ -1,118 +0,0 @@
-data_loader:
-  train:
-    total_batch_size: 16
-    num_workers: 0
-  val:
-    total_batch_size: 16
-    num_workers: 0
-  test:
-    total_batch_size: 16
-    num_workers: 0
-eval_only: false
-weights: null
-seed: 123
-data:
-  resolution: high
-  size: 64k
-  data_dir: data/abc/images/${data.resolution}/${data.size}
-  data_label: image
-  target_label: distances
-  max_loaded_files: 5
-model:
-  model_name: PixelSegmentator
-  params:
-    feature_extractor:
-      _target_: sharpf.modeling.Unet
-      params:
-        encoder_name: resnet50
-        decoder_use_batchnorm: true
-        decoder_channels:
-        - 256
-        - 128
-        - 64
-        - 32
-        - 16
-        decoder_attention_type: null
-        in_channels: 1
-    segmentation_head:
-    - _target_: torch.nn.Conv2d
-      params:
-        in_channels: 16
-        out_channels: 1
-        kernel_size: 3
-        padding: 1
-    - _target_: torch.nn.Sigmoid
-transforms:
-  train:
-  - _target_: sharpf.utils.abc_utils.torch.TypeCast
-  val:
-  - _target_: sharpf.utils.abc_utils.torch.TypeCast
-  test:
-  - _target_: sharpf.utils.abc_utils.torch.TypeCast
-  normalisation:
-  - data:
-    - standartize
-    - quantile
-meta_arch:
-  pl_class:
-    _target_: sharpf.modeling.DepthSegmentator
-  monitor: val_balanced_accuracy
-  loss:
-    _target_: torch.nn.functional.binary_cross_entropy_with_logits
-trainer:
-  default_root_dir: null
-  gradient_clip_val: 0.0
-  process_position: 0
-  num_nodes: 1
-  num_processes: 1
-  gpus: 1
-  auto_select_gpus: false
-  tpu_cores: null
-  log_gpu_memory: null
-  progress_bar_refresh_rate: 1
-  overfit_batches: 0.0
-  track_grad_norm: -1
-  check_val_every_n_epoch: 1
-  fast_dev_run: false
-  accumulate_grad_batches: 1
-  max_epochs: 1
-  min_epochs: 1
-  max_steps: null
-  min_steps: null
-  limit_train_batches: 1.0
-  limit_val_batches: 1.0
-  limit_test_batches: 1.0
-  val_check_interval: 1.0
-  log_save_interval: 100
-  row_log_interval: 50
-  distributed_backend: null
-  sync_batchnorm: false
-  precision: 32
-  weights_summary: top
-  weights_save_path: null
-  num_sanity_val_steps: 2
-  truncated_bptt_steps: null
-  resume_from_checkpoint: null
-  benchmark: false
-  deterministic: false
-  reload_dataloaders_every_epoch: false
-  auto_lr_find: false
-  replace_sampler_ddp: true
-  terminate_on_nan: false
-  auto_scale_batch_size: false
-  prepare_data_per_node: true
-  amp_backend: native
-scheduler:
-  _target_: torch.optim.lr_scheduler.StepLR
-  step_size: 20
-  gamma: 0.1
-  last_epoch: -1
-opt:
-  _target_: torch.optim.Adam
-  betas:
-  - 0.9
-  - 0.999
-  lr: 0.001
-  eps: 1.0e-08
-  weight_decay: 0.0
-  amsgrad: false
diff --git a/experiments/my_exp/.hydra/hydra.yaml b/experiments/my_exp/.hydra/hydra.yaml
deleted file mode 100644
index 1818c131..00000000
--- a/experiments/my_exp/.hydra/hydra.yaml
+++ /dev/null
@@ -1,132 +0,0 @@
-hydra:
-  run:
-    dir: experiments/my_exp
-  sweep:
-    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
-    subdir: ${hydra.job.num}
-  hydra_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][HYDRA] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: simple
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    loggers:
-      logging_example:
-        level: DEBUG
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: simple
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    hydra_help: ???
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.run.dir=experiments/my_exp
-    task:
-    - trainer.gpus=1
-    - trainer.max_epochs=1
-    - model=unet-segmentator
-    - data=abc-depth
-    - transforms=depth-norm
-    - meta_arch=depth-segmentator
-  job:
-    name: train_net
-    override_dirname: data=abc-depth,meta_arch=depth-segmentator,model=unet-segmentator,trainer.gpus=1,trainer.max_epochs=1,transforms=depth-norm
-    id: ???
-    num: ???
-    config_name: config
-    env_set: {}
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.0.0rc4
-    cwd: /trinity/home/g.bobrovskih/sharp_features_pl_hydra_orig
-  verbose: false
diff --git a/experiments/my_exp/.hydra/overrides.yaml b/experiments/my_exp/.hydra/overrides.yaml
deleted file mode 100644
index f1cff667..00000000
--- a/experiments/my_exp/.hydra/overrides.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-- trainer.gpus=1
-- trainer.max_epochs=1
-- model=unet-segmentator
-- data=abc-depth
-- transforms=depth-norm
-- meta_arch=depth-segmentator

From 56e1d2e9aee5445fc5f5438a58a3e74116122c39 Mon Sep 17 00:00:00 2001
From: rakhimovv <risonyo@gmail.com>
Date: Tue, 25 Aug 2020 01:58:31 +0300
Subject: [PATCH 09/12] add more distributed tools

---
 sharpf/utils/comm.py | 222 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 222 insertions(+)

diff --git a/sharpf/utils/comm.py b/sharpf/utils/comm.py
index d58908b8..b9e8d553 100644
--- a/sharpf/utils/comm.py
+++ b/sharpf/utils/comm.py
@@ -1,3 +1,14 @@
+"""
+This file contains primitives for multi-gpu communication.
+This is useful when doing distributed training.
+"""
+
+import functools
+import logging
+import pickle
+
+import numpy as np
+import torch
 import torch.distributed as dist
 
 
@@ -15,3 +26,214 @@ def get_batch_size(total_batch_size):
         f"Total batch size ({total_batch_size}) must be divisible by the number of gpus ({world_size})."
     batch_size = total_batch_size // world_size
     return batch_size
+
+
+def get_rank() -> int:
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank()
+
+
+def is_main_process() -> bool:
+    return get_rank() == 0
+
+
+def synchronize():
+    """
+    Helper function to synchronize (barrier) among all processes when
+    using distributed training
+    """
+    if not dist.is_available():
+        return
+    if not dist.is_initialized():
+        return
+    world_size = dist.get_world_size()
+    if world_size == 1:
+        return
+    dist.barrier()
+
+
+@functools.lru_cache()
+def _get_global_gloo_group():
+    """
+    Return a process group based on gloo backend, containing all the ranks
+    The result is cached.
+    """
+    if dist.get_backend() == "nccl":
+        return dist.new_group(backend="gloo")
+    else:
+        return dist.group.WORLD
+
+
+def _serialize_to_tensor(data, group):
+    backend = dist.get_backend(group)
+    assert backend in ["gloo", "nccl"]
+    device = torch.device("cpu" if backend == "gloo" else "cuda")
+
+    buffer = pickle.dumps(data)
+    if len(buffer) > 1024 ** 3:
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            "Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
+                get_rank(), len(buffer) / (1024 ** 3), device
+            )
+        )
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to(device=device)
+    return tensor
+
+
+def _pad_to_largest_tensor(tensor, group):
+    """
+    Returns:
+        list[int]: size of the tensor, on each rank
+        Tensor: padded tensor that has the max size
+    """
+    world_size = dist.get_world_size(group=group)
+    assert (
+            world_size >= 1
+    ), "comm.gather/all_gather must be called from ranks within the given group!"
+    local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
+    size_list = [
+        torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)
+    ]
+    dist.all_gather(size_list, local_size, group=group)
+    size_list = [int(size.item()) for size in size_list]
+
+    max_size = max(size_list)
+
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    if local_size != max_size:
+        padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device)
+        tensor = torch.cat((tensor, padding), dim=0)
+    return size_list, tensor
+
+
+def all_gather(data, group=None):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors).
+
+    Args:
+        data: any picklable object
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group) == 1:
+        return [data]
+
+    tensor = _serialize_to_tensor(data, group)
+
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+    max_size = max(size_list)
+
+    # receiving Tensor from all ranks
+    tensor_list = [
+        torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
+    ]
+    dist.all_gather(tensor_list, tensor, group=group)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+
+    return data_list
+
+
+def gather(data, dst=0, group=None):
+    """
+    Run gather on arbitrary picklable data (not necessarily tensors).
+
+    Args:
+        data: any picklable object
+        dst (int): destination rank
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+
+    Returns:
+        list[data]: on dst, a list of data gathered from each rank. Otherwise,
+            an empty list.
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group=group) == 1:
+        return [data]
+    rank = dist.get_rank(group=group)
+
+    tensor = _serialize_to_tensor(data, group)
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+
+    # receiving Tensor from all ranks
+    if rank == dst:
+        max_size = max(size_list)
+        tensor_list = [
+            torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
+        ]
+        dist.gather(tensor, tensor_list, dst=dst, group=group)
+
+        data_list = []
+        for size, tensor in zip(size_list, tensor_list):
+            buffer = tensor.cpu().numpy().tobytes()[:size]
+            data_list.append(pickle.loads(buffer))
+        return data_list
+    else:
+        dist.gather(tensor, [], dst=dst, group=group)
+        return []
+
+
+def shared_random_seed():
+    """
+    Returns:
+        int: a random number that is the same across all workers.
+            If workers need a shared RNG, they can use this shared seed to
+            create one.
+
+    All workers must call this function, otherwise it will deadlock.
+    """
+    ints = np.random.randint(2 ** 31)
+    all_ints = all_gather(ints)
+    return all_ints[0]
+
+
+def reduce_dict(input_dict, average=True):
+    """
+    Reduce the values in the dictionary from all processes so that process with rank
+    0 has the reduced results.
+
+    Args:
+        input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor.
+        average (bool): whether to do average or sum
+
+    Returns:
+        a dict with the same keys as input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        dist.reduce(values, dst=0)
+        if dist.get_rank() == 0 and average:
+            # only main process gets accumulated, so only divide by
+            # world_size in this case
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict

From 4b45ef42720d10fc406df56487e82af163ab10d1 Mon Sep 17 00:00:00 2001
From: rakhimovv <risonyo@gmail.com>
Date: Tue, 25 Aug 2020 03:00:10 +0300
Subject: [PATCH 10/12] fix metric calculation

---
 sharpf/modeling/__init__.py                   |  1 +
 .../modeling/meta_arch/depth_segmentator.py   | 70 ++++++++-----------
 sharpf/modeling/metrics.py                    | 21 ++++++
 sharpf/utils/losses.py                        | 12 ----
 4 files changed, 52 insertions(+), 52 deletions(-)
 create mode 100644 sharpf/modeling/metrics.py
 delete mode 100644 sharpf/utils/losses.py

diff --git a/sharpf/modeling/__init__.py b/sharpf/modeling/__init__.py
index c48eda09..205828c4 100644
--- a/sharpf/modeling/__init__.py
+++ b/sharpf/modeling/__init__.py
@@ -1,4 +1,5 @@
 from .meta_arch import PointSharpnessRegressor, DepthRegressor, DepthSegmentator
+from .metrics import balanced_accuracy
 from .model import MODEL_REGISTRY, build_model, DGCNN, Unet, PixelRegressor
 from .modules import (
     AggregationMax,
diff --git a/sharpf/modeling/meta_arch/depth_segmentator.py b/sharpf/modeling/meta_arch/depth_segmentator.py
index ec6becb4..166bdd40 100644
--- a/sharpf/modeling/meta_arch/depth_segmentator.py
+++ b/sharpf/modeling/meta_arch/depth_segmentator.py
@@ -3,15 +3,13 @@
 import hydra
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 from pytorch_lightning import TrainResult
 from pytorch_lightning.core.lightning import LightningModule
-from pytorch_lightning.metrics import tensor_metric
-from pytorch_lightning.metrics.sklearns import BalancedAccuracy
+from pytorch_lightning.metrics.functional import stat_scores
 from torch.utils.data import DataLoader
 
-from sharpf.utils.comm import get_batch_size
-from pytorch_lightning.metrics.functional import stat_scores
+from sharpf.utils.comm import get_batch_size, all_gather, synchronize
+from ..metrics import balanced_accuracy
 from ..model.build import build_model
 from ...data import DepthMapIO
 from ...utils.abc_utils.hdf5 import DepthDataset
@@ -20,11 +18,6 @@
 log = logging.getLogger(__name__)
 
 
-@tensor_metric()
-def gather_sum(x: torch.Tensor) -> torch.Tensor:
-    return x
-
-
 class DepthSegmentator(LightningModule):
 
     def __init__(self, cfg):
@@ -42,51 +35,48 @@ def __init__(self, cfg):
             log.info('Converting BatchNorm to SyncBatchNorm. Do not forget other batch-dimension dependent operations.')
             self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
 
-    def forward(self, x):
+    def forward(self, x, as_mask=True):
+        out = self.model(x)
+        if as_mask:
+            return (out.sigmoid() > 0.5).long()
         return self.model(x)
 
     def training_step(self, batch, batch_idx):
         points, target = batch['image'], batch['close_to_sharp_mask']
         points = points.unsqueeze(1) if points.dim() == 3 else points
-        preds = self.forward(points)
+        preds = self.forward(points, as_mask=False)
         loss = hydra.utils.instantiate(self.hparams.meta_arch.loss, preds, target)
         result = TrainResult(minimize=loss)
         result.log('train_loss', loss, prog_bar=True)
         return result
 
     def _shared_eval_step(self, batch, batch_idx, prefix):
-        stat_names = ['tpr', 'tnr']
         points, target = batch['image'], batch['close_to_sharp_mask']
         points = points.unsqueeze(1) if points.dim() == 3 else points
-        preds = self.forward(points)
-
-        statistics = torch.Tensor(list(map(lambda a: stat_scores(a[0], a[1], class_index=1),
-                                           [preds, target]))).T.to(preds.device) # return: tp, fp, tn, fn, sup
-                                                                # dim: (5, batch)
-
-        tpr = statistics[0] / (statistics[0] + statistics[-2])  # 1(pred=1| true=1) / (1(pred=1| true=1) + 1(pred=0|true=1))
-        tnr = statistics[2] / (statistics[2] + statistics[1])  # 1(pred=0| true=0) / (1(pred=0| true=0) + 1(pred=1|true=0))
-
-        # loss = hydra.utils.instantiate(self.cfg.meta_arch.loss, preds, distances)
-        # self.logger[0].experiment.add_scalars('losses', {f'{prefix}_loss': loss})
-        # TODO Consider pl.EvalResult, once there are good examples how to use it
-        return {f'{stat_names[0]}': tpr,
-                f'{stat_names[1]}': tnr,
-                'batch_size': torch.tensor(points.size(0), device=self.device)}
+        preds = self.forward(points, as_mask=True)
+        stats = [list(stat_scores(preds[i], target[i], class_index=1)) for i in range(preds.size(0))]
+        tp, fp, tn, fn, sup = torch.Tensor(stats).to(preds.device).T.unsqueeze(1)  # each of size (1, batch)
+        return {'tp': tp, 'fp': fp, 'tn': tn, 'fn': fn, 'sup': sup}
 
     def _shared_eval_epoch_end(self, outputs, prefix):
-        stat_names = ['tpr', 'tnr']
-        metric_sum = 0
-        size = 0
-        for output in outputs:
-            metric_sum += sum((output[f'{stat_names[0]}'] + output[f'{stat_names[1]}']).double() / 2)
-            size += output['batch_size']
-
-        metric_name = 'balanced_accuracy'
-        mean_metric = gather_sum(metric_sum) / gather_sum(size)
-
-        logs = {f'{prefix}_mean_{metric_name}': mean_metric}
-        return {f'{prefix}_mean_{metric_name}': mean_metric, 'log': logs}
+        # gather across sub batches
+        tp = torch.cat([output['tp'] for output in outputs])
+        fp = torch.cat([output['fp'] for output in outputs])
+        tn = torch.cat([output['tn'] for output in outputs])
+        fn = torch.cat([output['fn'] for output in outputs])
+
+        # gather results across gpus
+        synchronize()
+        tp = torch.cat(all_gather(tp))
+        fp = torch.cat(all_gather(fp))
+        tn = torch.cat(all_gather(tn))
+        fn = torch.cat(all_gather(fn))
+
+        # calculate metrics
+        ba = balanced_accuracy(tp, fp, tn, fn)
+
+        logs = {f'{prefix}_balanced_accuracy': ba}
+        return {f'{prefix}_balanced_accuracy': ba, 'log': logs}
 
     def validation_step(self, batch, batch_idx):
         return self._shared_eval_step(batch, batch_idx, prefix='val')
diff --git a/sharpf/modeling/metrics.py b/sharpf/modeling/metrics.py
new file mode 100644
index 00000000..b6889ee7
--- /dev/null
+++ b/sharpf/modeling/metrics.py
@@ -0,0 +1,21 @@
+import torch
+
+
+def balanced_accuracy(tp: torch.Tensor, fp: torch.Tensor, tn: torch.Tensor, fn: torch.Tensor) -> torch.Tensor:
+    """
+    Calculate balanced accuracy for one class based on provided statistics
+
+    Args:
+        tp (Tensor): of shape (B, 1). True positive.
+        fo (Tensor): of shape (B, 1). False positive stats of a shape.
+        tn (Tensor): of shape (B, 1). True negative stats of a shape.
+        fn (Tensor): of shape (B, 1). False negative stats of a shape.
+
+    Returns:
+        torch.Tensor: balanced accuracy value
+    """
+    tpr = tp / (tp + fn)  # (B, 1)
+    tnr = tn / (tn + fp)  # (B, 1)
+    tpr = torch.where(torch.isnan(tpr), tnr, tpr)  # (B, 1)
+    tnr = torch.where(torch.isnan(tnr), tpr, tnr)  # (B, 1)
+    return 0.5 * torch.mean(tpr + tnr)
diff --git a/sharpf/utils/losses.py b/sharpf/utils/losses.py
deleted file mode 100644
index 7b4c38e6..00000000
--- a/sharpf/utils/losses.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import torch
-import numpy as np
-
-def balanced_accuracy(y_pred, y_true):
-    print('balanced accuracy print', y_pred.shape, y_true.shape)
-    tpr = np.sum((y_pred[y_pred == 1] == y_true[y_true == 1]).float(), axis=1)
-    tnr = np.sum((y_pred[y_pred == 0] == y_true[y_true == 1]).float(), axis=1)
-    tpr /= (tpr + np.sum((y_pred[y_pred == 0] == y_true[y_true == 1]).float(), axis=1))
-    tnr /= (tnr + np.sum((y_pred[y_pred == 1] == y_true[y_true == 0]).float().sum(), axis=1))
-    acc = (tpr + tnr) / 2
-    print(acc.shape)
-    return acc
\ No newline at end of file

From 56d07cbf45ebf86d70634344e59ebc40ad0bcd09 Mon Sep 17 00:00:00 2001
From: rakhimovv <risonyo@gmail.com>
Date: Tue, 25 Aug 2020 03:02:15 +0300
Subject: [PATCH 11/12] fix typos

---
 sharpf/modeling/metrics.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sharpf/modeling/metrics.py b/sharpf/modeling/metrics.py
index b6889ee7..666bc942 100644
--- a/sharpf/modeling/metrics.py
+++ b/sharpf/modeling/metrics.py
@@ -6,10 +6,10 @@ def balanced_accuracy(tp: torch.Tensor, fp: torch.Tensor, tn: torch.Tensor, fn:
     Calculate balanced accuracy for one class based on provided statistics
 
     Args:
-        tp (Tensor): of shape (B, 1). True positive.
-        fo (Tensor): of shape (B, 1). False positive stats of a shape.
-        tn (Tensor): of shape (B, 1). True negative stats of a shape.
-        fn (Tensor): of shape (B, 1). False negative stats of a shape.
+        tp (Tensor): of shape (B, 1). True positive values.
+        fp (Tensor): of shape (B, 1). False positive values.
+        tn (Tensor): of shape (B, 1). True negative values.
+        fn (Tensor): of shape (B, 1). False negative values.
 
     Returns:
         torch.Tensor: balanced accuracy value

From aba17ccff2e447f23bde7237a4b7b2cd2a910d60 Mon Sep 17 00:00:00 2001
From: rakhimovv <risonyo@gmail.com>
Date: Tue, 25 Aug 2020 03:06:07 +0300
Subject: [PATCH 12/12] fix dimensions

---
 sharpf/modeling/meta_arch/depth_segmentator.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/sharpf/modeling/meta_arch/depth_segmentator.py b/sharpf/modeling/meta_arch/depth_segmentator.py
index 166bdd40..86f9c25d 100644
--- a/sharpf/modeling/meta_arch/depth_segmentator.py
+++ b/sharpf/modeling/meta_arch/depth_segmentator.py
@@ -55,22 +55,22 @@ def _shared_eval_step(self, batch, batch_idx, prefix):
         points = points.unsqueeze(1) if points.dim() == 3 else points
         preds = self.forward(points, as_mask=True)
         stats = [list(stat_scores(preds[i], target[i], class_index=1)) for i in range(preds.size(0))]
-        tp, fp, tn, fn, sup = torch.Tensor(stats).to(preds.device).T.unsqueeze(1)  # each of size (1, batch)
+        tp, fp, tn, fn, sup = torch.Tensor(stats).to(preds.device).T.unsqueeze(2)  # each of size (batch, 1)
         return {'tp': tp, 'fp': fp, 'tn': tn, 'fn': fn, 'sup': sup}
 
     def _shared_eval_epoch_end(self, outputs, prefix):
         # gather across sub batches
-        tp = torch.cat([output['tp'] for output in outputs])
-        fp = torch.cat([output['fp'] for output in outputs])
-        tn = torch.cat([output['tn'] for output in outputs])
-        fn = torch.cat([output['fn'] for output in outputs])
+        tp = torch.cat([output['tp'] for output in outputs], dim=0)
+        fp = torch.cat([output['fp'] for output in outputs], dim=0)
+        tn = torch.cat([output['tn'] for output in outputs], dim=0)
+        fn = torch.cat([output['fn'] for output in outputs], dim=0)
 
         # gather results across gpus
         synchronize()
-        tp = torch.cat(all_gather(tp))
-        fp = torch.cat(all_gather(fp))
-        tn = torch.cat(all_gather(tn))
-        fn = torch.cat(all_gather(fn))
+        tp = torch.cat(all_gather(tp), dim=0)
+        fp = torch.cat(all_gather(fp), dim=0)
+        tn = torch.cat(all_gather(tn), dim=0)
+        fn = torch.cat(all_gather(fn), dim=0)
 
         # calculate metrics
         ba = balanced_accuracy(tp, fp, tn, fn)