Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transfer learning y convolucional #2

Merged
merged 5 commits into from
Jun 19, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
354 changes: 338 additions & 16 deletions notebooks/Data analysis.ipynb

Large diffs are not rendered by default.

25 changes: 16 additions & 9 deletions src/catdog/catdog/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,38 @@

from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

from definitions import IMG_PATH



class CatDogDataset(Dataset):
def __init__(self, cat_dog_df, transforms=None, img_output_size=(500,500)):
def __init__(self, cat_dog_df, transformations=None, feature_scaling=255, img_output_size=(500, 500), img_path="../data/images/"):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

img_path hardcodeado lo queres ahi?? yo lo habia reemplazado por definitions.IMG_PATH

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, debí mirar con poco amor al hacer el merge del archivo con main. Corrijo en próximo commit.

self.files = (IMG_PATH + cat_dog_df["file"]).values
self.width = cat_dog_df["width"].values
self.height = cat_dog_df["height"].values
self.target = np.where(cat_dog_df["class"].values == "cat", 1, 0).astype(np.float32)
self.bbox = cat_dog_df[["xmin", "ymin", "xmax", "ymax"]].values.astype(np.float32)
self.resizer = fn.Resize(img_output_size)
self.resizer = transforms.Resize(img_output_size)

self.to_tensor = transforms.ToTensor()
self.normalizer = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

esto no es hardcode??

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahi vi que se usa eso como estandar de pytorch. lo queremos usar para todos nuestros modelos, aun los que no fueron pre entrenados??
pytorch/vision#1439

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactamente por eso lo puse. Osea sí, mucho no me lo cuestioné. Podemos calcular esos valores con nuestro dataset y reemplazarlos, pero no me calienta mucho diría.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

de acuerdo

self.scaling = feature_scaling


self.transformations = transformations

self.transforms = transforms

def __len__(self):
return len(self.files)

def __getitem__(self, idx):
resized_img = self.resizer(Image.open(self.files[idx]).convert("RGB"))
np_img = np.asarray(resized_img)
resized_img = self.resizer(self.to_tensor(Image.open(self.files[idx]).convert("RGB")))
img = self.normalizer(resized_img)

bbox = self.bbox[idx]
if self.transforms is not None:
np_img, bbox = self.transforms(np_img, bbox)
if self.transformations is not None:
np_img, bbox = self.transformations(img, bbox)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aca tal vez queremos renombrar np_img??

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

De hecho sí, es un bug. Corrijo en próximo commit.


torch_img = fn.functional.to_tensor(np_img)
return torch_img, self.target[idx], bbox
return img, self.target[idx], bbox
16 changes: 11 additions & 5 deletions src/catdog/catdog/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,19 @@ def forward(self, x):


class CatDogClassifier(pl.LightningModule):
def __init__(self, *args: Any, **kwargs: Any):
def __init__(self, optimizer_params=None, bbox_alpha=1, *args: Any, **kwargs: Any):
super().__init__(*args, **kwargs)
self.AUROC = torchmetrics.AUROC() # num_classes=2 was not meant for binary problems, but multiclass problems
self.Precision = torchmetrics.Precision()
self.Recall = torchmetrics.Recall()
self.batch_size = 32 # TODO(cgiudice): either move this to a config file or receive it as parameter

optimizer_params = self.get_default_optimizer_params() if not optimizer_params else optimizer_params
self.save_hyperparameters("optimizer_params", "bbox_alpha")

def forward_pass(self, img):
raise NotImplementedError

def get_default_optimizer_params(self):
return {"lr": 0.02}

Expand All @@ -41,7 +47,7 @@ def forward(self, x):
"""
with torch.no_grad():
x = self.preprocess_img(x)
pred_class, pred_bbox = self.model(x)
pred_class, pred_bbox = self.forward_pass(x)
return pred_class, pred_bbox

def preprocess_img(self, img):
Expand All @@ -50,10 +56,10 @@ def preprocess_img(self, img):
def _shared_step(self, batch):
img, target, bbox = batch
model_input = self.preprocess_img(img)
pred_target, pred_bbox = self.model(model_input)
pred_target, pred_bbox = self.forward_pass(model_input)
# adds one dimension to target, just the way torch likes it
target = target.unsqueeze(1)

classification_loss = F.binary_cross_entropy(pred_target, target)
bbox_loss = F.mse_loss(pred_bbox, bbox)
loss = classification_loss + self.hparams.bbox_alpha * bbox_loss
Expand All @@ -74,7 +80,7 @@ def validation_step(self, batch, batch_idx):
return {"img": imgs[sel_idx], "sel_idx": sel_idx, "target": target,
"pred_target": pred_target, "pred_bbox": pred_bbox,
"classification_loss": classification_loss, "bbox_loss": bbox_loss, "loss": loss}

def test_step(self, batch, batch_idx):
target, pred_target, bbox, pred_bbox, classification_loss, bbox_loss, loss = self._shared_step(batch)
imgs = batch[0]
Expand Down
35 changes: 35 additions & 0 deletions src/catdog/catdog/models/convolutional.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import torch.nn
from .base import CatDogClassifier, CatDogOutput
from catdog.utils.image import appropiate_padding


class ConvolutionalClassifier(CatDogClassifier):
def __init__(self, input_shape, *args, **kwargs):
super().__init__(*args, **kwargs)

in_channels, current_height, current_width = input_shape
conv1 = torch.nn.Conv2d(in_channels=in_channels, out_channels=32, kernel_size=(3, 3),
# padding=appropiate_padding((current_height, current_width), (3,3))
)
conv2 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3),
# padding=appropiate_padding((current_height, current_width), (3,3))
)

conv3 = torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3),
# padding=appropiate_padding((current_height, current_width), (3,3))
)

self.model = torch.nn.Sequential(conv1, torch.nn.ReLU(), torch.nn.MaxPool2d((2, 2)),
conv2, torch.nn.ReLU(), torch.nn.MaxPool2d((2, 2)),
conv3, torch.nn.ReLU(), torch.nn.MaxPool2d((2, 2)),
torch.nn.Flatten(start_dim=1),
CatDogOutput(36992)
)

def configure_optimizers(self):
params = self.hparams["optimizer_params"]
params = self.get_default_optimizer_params() if not params else params
return torch.optim.Adam(self.parameters(), **params)

def forward_pass(self, img):
return self.model(img)
8 changes: 5 additions & 3 deletions src/catdog/catdog/models/feedforward.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@


class MLPClassifier(CatDogClassifier):
def __init__(self, input_size, activation=torch.nn.ReLU, hidden_sizes=(),
optimizer_params=None, bbox_alpha=1):
def __init__(self, input_size, activation=torch.nn.ReLU, hidden_sizes=(), *args, **kwargs):
super().__init__(*args, **kwargs)

super().__init__()
layers = []
if hidden_sizes:
sizes = [input_size, *hidden_sizes]
Expand All @@ -33,3 +32,6 @@ def configure_optimizers(self):
params = self.hparams["optimizer_params"]
params = self.get_default_optimizer_params() if not params else params
return torch.optim.Adam(self.parameters(), **params)

def forward_pass(self, img):
return self.model(img)
23 changes: 23 additions & 0 deletions src/catdog/catdog/models/transfer_learning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from typing import Any

from catdog.models.base import CatDogClassifier, CatDogOutput
from torch.optim import Adam
import torchvision

class TransferLearningClassifier(CatDogClassifier):
def __init__(self, *args: Any, **kwargs: Any):
super().__init__(*args, **kwargs)

self.resnet = torchvision.models.resnet18(pretrained=True)
for param in self.resnet.parameters():
param.requires_grad = False
self.resnet.fc = CatDogOutput(512)

def configure_optimizers(self):
params = self.hparams.get("optimizer_params", None)
params = self.get_default_optimizer_params() if not params else params
# Only optimize the parameters for the last layer
return Adam(self.resnet.fc.parameters(), **params)

def forward_pass(self, img):
return self.resnet(img)
7 changes: 7 additions & 0 deletions src/catdog/catdog/utils/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
import matplotlib.patches as patches


def appropiate_padding(input_shape, kernel_shape):
def n_pad(x, k):
return k - (x % k)

return [n_pad(n, k) for n, k in zip(input_shape, kernel_shape)]


def plot_image_bbox(img, category, xmin, ymin, xmax, ymax, ax=None):
ax = plt.gca() if not ax else ax
xmin *= img.shape[0]
Expand Down