Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve image source parsing for Folder dataset #784

Merged
merged 13 commits into from
Dec 13, 2022
12 changes: 12 additions & 0 deletions anomalib/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,18 @@ def get_configurable_parameters(
# Project Configs
project_path = Path(config.project.path) / config.model.name / config.dataset.name

if config.dataset.format == "folder":
if "mask" in config.dataset:
warn(
DeprecationWarning(
"mask will be deprecated in favor of mask_dir in config.dataset in a future release."
)
)
config.dataset.mask_dir = config.dataset.mask
if "path" in config.dataset:
warn(DeprecationWarning("path will be deprecated in favor of root in config.dataset in a future release."))
config.dataset.mask_dir = config.dataset.mask

# add category subfolder if needed
if config.dataset.format.lower() in ("btech", "mvtec"):
project_path = project_path / config.dataset.category
Expand Down
4 changes: 2 additions & 2 deletions anomalib/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ def get_datamodule(config: Union[DictConfig, ListConfig]) -> AnomalibDataModule:
)
elif config.dataset.format.lower() == "folder":
datamodule = Folder(
root=config.dataset.path,
root=config.dataset.root,
normal_dir=config.dataset.normal_dir,
abnormal_dir=config.dataset.abnormal_dir,
task=config.dataset.task,
normal_test_dir=config.dataset.normal_test_dir,
mask_dir=config.dataset.mask,
mask_dir=config.dataset.mask_dir,
extensions=config.dataset.extensions,
normal_split_ratio=config.dataset.normal_split_ratio,
image_size=(config.dataset.image_size[0], config.dataset.image_size[1]),
Expand Down
55 changes: 44 additions & 11 deletions anomalib/data/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,33 @@ def _prepare_files_labels(
return filenames, labels


def _resolve_path(folder: Union[Path, str], root: Optional[Union[Path, str]] = None) -> Path:
"""Combines root and folder and returns the absolute path.

This allows users to pass either a root directory and relative paths, or absolute paths to each of the
image sources. This function makes sure that the samples dataframe always contains absolute paths.

Args:
folder (Optional[Union[Path, str]]): Folder location containing image or mask data.
root (Optional[Union[Path, str]]): Root directory for the dataset.
"""
folder = Path(folder)
djdameln marked this conversation as resolved.
Show resolved Hide resolved
if folder.is_absolute():
# path is absolute; return unmodified
path = folder
# path is relative.
elif root is None:
# no root provided; return absolute path
path = folder.resolve()
else:
# root provided; prepend root and return absolute path
path = (Path(root) / folder).resolve()
return path


def make_folder_dataset(
normal_dir: Union[str, Path],
root: Optional[Union[str, Path]] = None,
abnormal_dir: Optional[Union[str, Path]] = None,
normal_test_dir: Optional[Union[str, Path]] = None,
mask_dir: Optional[Union[str, Path]] = None,
Expand All @@ -75,6 +100,7 @@ def make_folder_dataset(

Args:
normal_dir (Union[str, Path]): Path to the directory containing normal images.
root (Optional[Union[str, Path]]): Path to the root directory of the dataset.
abnormal_dir (Optional[Union[str, Path]], optional): Path to the directory containing abnormal images.
normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
normal images for the test dataset. Normal test images will be a split of `normal_dir`
Expand All @@ -89,6 +115,11 @@ def make_folder_dataset(
Returns:
DataFrame: an output dataframe containing samples for the requested split (ie., train or test)
"""
normal_dir = _resolve_path(normal_dir, root)
abnormal_dir = _resolve_path(abnormal_dir, root) if abnormal_dir is not None else None
normal_test_dir = _resolve_path(normal_test_dir, root) if normal_test_dir is not None else None
mask_dir = _resolve_path(mask_dir, root) if mask_dir is not None else None
assert normal_dir.is_dir(), "A folder location must be provided in normal_dir."

filenames = []
labels = []
Expand Down Expand Up @@ -118,7 +149,8 @@ def make_folder_dataset(
samples["mask_path"] = ""
for index, row in samples.iterrows():
if row.label_index == 1:
samples.loc[index, "mask_path"] = str(mask_dir / row.image_path.name)
rel_image_path = row.image_path.relative_to(abnormal_dir)
samples.loc[index, "mask_path"] = str(mask_dir / rel_image_path)

# make sure all the files exist
# samples.image_path does NOT need to be checked because we build the df based on that
Expand Down Expand Up @@ -152,9 +184,8 @@ class FolderDataset(AnomalibDataset):
pre_process (PreProcessor): Image Pre-processor to apply transform.
split (Optional[Union[Split, str]]): Fixed subset split that follows from folder structure on file system.
Choose from [Split.FULL, Split.TRAIN, Split.TEST]

root (Union[str, Path]): Root folder of the dataset.
normal_dir (Union[str, Path]): Path to the directory containing normal images.
root (Optional[Union[str, Path]]): Root folder of the dataset.
abnormal_dir (Optional[Union[str, Path]], optional): Path to the directory containing abnormal images.
normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
normal images for the test dataset. Defaults to None.
Expand All @@ -174,8 +205,8 @@ def __init__(
self,
task: TaskType,
pre_process: PreProcessor,
root: Union[str, Path],
normal_dir: Union[str, Path],
root: Optional[Union[str, Path]] = None,
abnormal_dir: Optional[Union[str, Path]] = None,
normal_test_dir: Optional[Union[str, Path]] = None,
mask_dir: Optional[Union[str, Path]] = None,
Expand All @@ -186,9 +217,10 @@ def __init__(
super().__init__(task, pre_process)

self.split = split
self.normal_dir = Path(root) / Path(normal_dir)
self.abnormal_dir = Path(root) / Path(abnormal_dir) if abnormal_dir else None
self.normal_test_dir = Path(root) / Path(normal_test_dir) if normal_test_dir else None
self.root = root
self.normal_dir = normal_dir
self.abnormal_dir = abnormal_dir
self.normal_test_dir = normal_test_dir
self.mask_dir = mask_dir
self.extensions = extensions

Expand All @@ -197,6 +229,7 @@ def __init__(
def _setup(self):
"""Assign samples."""
self.samples = make_folder_dataset(
root=self.root,
normal_dir=self.normal_dir,
abnormal_dir=self.abnormal_dir,
normal_test_dir=self.normal_test_dir,
Expand All @@ -210,10 +243,10 @@ class Folder(AnomalibDataModule):
"""Folder DataModule.

Args:
root (Union[str, Path]): Path to the root folder containing normal and abnormal dirs.
normal_dir (Union[str, Path]): Name of the directory containing normal images.
Defaults to "normal".
abnormal_dir (Union[str, Path]): Name of the directory containing abnormal images.
root (Optional[Union[str, Path]]): Path to the root folder containing normal and abnormal dirs.
abnormal_dir (Optional[Union[str, Path]]): Name of the directory containing abnormal images.
Defaults to "abnormal".
normal_test_dir (Optional[Union[str, Path]], optional): Path to the directory containing
normal images for the test dataset. Defaults to None.
Expand Down Expand Up @@ -243,9 +276,9 @@ class Folder(AnomalibDataModule):

def __init__(
self,
root: Union[str, Path],
normal_dir: Union[str, Path],
abnormal_dir: Union[str, Path],
root: Optional[Union[str, Path]] = None,
abnormal_dir: Optional[Union[str, Path]] = None,
normal_test_dir: Optional[Union[str, Path]] = None,
mask_dir: Optional[Union[str, Path]] = None,
normal_split_ratio: float = 0.2,
Expand Down
2 changes: 1 addition & 1 deletion anomalib/models/cflow/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dataset:
name: mvtec #options: [mvtec, btech, folder]
name: mvtec
format: mvtec
path: ./datasets/MVTec
category: bottle
Expand Down
2 changes: 1 addition & 1 deletion anomalib/models/dfkde/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dataset:
name: mvtec #options: [mvtec, btech, folder]
name: mvtec
format: mvtec
path: ./datasets/MVTec
category: bottle
Expand Down
2 changes: 1 addition & 1 deletion anomalib/models/dfm/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dataset:
name: mvtec #options: [mvtec, btech, folder]
name: mvtec
format: mvtec
path: ./datasets/MVTec
category: bottle
Expand Down
2 changes: 1 addition & 1 deletion anomalib/models/draem/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dataset:
name: mvtec #options: [mvtec, btech, folder]
name: mvtec
format: mvtec
path: ./datasets/MVTec
category: bottle
Expand Down
2 changes: 1 addition & 1 deletion anomalib/models/fastflow/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dataset:
name: mvtec #options: [mvtec, btech, folder]
name: mvtec
format: mvtec
path: ./datasets/MVTec
task: segmentation
Expand Down
2 changes: 1 addition & 1 deletion anomalib/models/ganomaly/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dataset:
name: mvtec #options: [mvtec, btech, folder]
name: mvtec
format: mvtec
path: ./datasets/MVTec
category: bottle
Expand Down
2 changes: 1 addition & 1 deletion anomalib/models/padim/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dataset:
name: mvtec #options: [mvtec, btech, folder]
name: mvtec
format: mvtec
path: ./datasets/MVTec
category: bottle
Expand Down
2 changes: 1 addition & 1 deletion anomalib/models/patchcore/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dataset:
name: mvtec #options: [mvtec, btech, folder]
name: mvtec
format: mvtec
path: ./datasets/MVTec
task: segmentation
Expand Down
2 changes: 1 addition & 1 deletion anomalib/models/reverse_distillation/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dataset:
name: mvtec #options: [mvtec, btech, folder]
name: mvtec
format: mvtec
path: ./datasets/MVTec
category: bottle
Expand Down
2 changes: 1 addition & 1 deletion anomalib/models/stfpm/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dataset:
name: mvtec #options: [mvtec, btech, folder]
name: mvtec
format: mvtec
path: ./datasets/MVTec
category: bottle
Expand Down
4 changes: 2 additions & 2 deletions docs/source/how_to_guides/train_custom_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ Let's choose `Padim algorithm <https://arxiv.org/pdf/2011.08785.pdf>`_, copy the
dataset:
name: hazelnut
format: folder
path: ./datasets/Hazelnut_toy
root: ./datasets/Hazelnut_toy
normal_dir: good # name of the folder containing normal images.
abnormal_dir: colour # name of the folder containing abnormal images.
task: classification # classification or segmentation
mask: null #optional
mask_dir: null #optional
normal_test_dir: null # optional
extensions: null
split_ratio: 0.2 # normal images ratio to create a test split
Expand Down
4 changes: 2 additions & 2 deletions tests/pre_merge/datasets/test_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def make_folder_data_module(task="classification", batch_size=1, val_split_mode=
root=root,
normal_dir="good",
abnormal_dir="broken_large",
mask_dir=os.path.join(root, "ground_truth/broken_large"),
mask_dir="ground_truth/broken_large",
normal_split_ratio=0.2,
image_size=(256, 256),
train_batch_size=batch_size,
Expand Down Expand Up @@ -129,7 +129,7 @@ def data_sample():
root=root,
normal_dir="good",
abnormal_dir="broken_large",
mask_dir=os.path.join(root, "ground_truth/broken_large"),
mask_dir="ground_truth/broken_large",
normal_split_ratio=0.2,
image_size=(256, 256),
train_batch_size=1,
Expand Down