Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add oversampler. #236

Merged
merged 2 commits into from
Oct 29, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions continuum/tasks/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,37 @@
from typing import Tuple, List


import torch
import numpy as np

from continuum.tasks.base import BaseTaskSet
from continuum.tasks.base import BaseTaskSet, TaskType
from continuum.tasks.task_set import TaskSet


arthurdouillard marked this conversation as resolved.
Show resolved Hide resolved
def get_balanced_sampler(taskset, log=False):
"""Create a sampler that will balance the dataset.

You should give the returned sampler to the dataloader with the argument `sampler`.

:param taskset: A pytorch dataset that implement the TaskSet interface.
:param log: Use a log weights. If enabled, there will still be imbalance but
on the other hand, the oversampling/downsampling won't be as violent.
:return: A PyTorch sampler.
"""
if taskset.data_type in (TaskType.SEGMENTATION, TaskType.OBJ_DETECTION, TaskType.TEXT):
raise NotImplementedError(
"Samplers are not yet available for the "
f"{taskset.data_type} type."
)

y = taskset.get_raw_samples()[1]
nb_per_class = np.bincount(y)
weights_per_class = 1 / nb_per_class
if log:
weights_per_class = np.log(weights_per_class)

weights = weights_per_class[y]

return torch.utils.data.sampler.WeightedRandomSampler(weights, len(taskset))


def split_train_val(dataset: BaseTaskSet, val_split: float = 0.1) -> Tuple[BaseTaskSet, BaseTaskSet]:
Expand Down