Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Relabel support for Darknet (YOLO) annotation files #138

Merged
merged 3 commits into from
Feb 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,23 @@ $ cvdata_filter --format darknet \
--boxes_per_class car:6000 truck:6000
```

## Relabel annotations
The module/script `cvdata/relabel.py` or the corresponding script entry point `cvdata_relabel`
can be used to filter the number of image/annotation files of a dataset. For example,
to relabel all PASCAL annotation files in a directory from "dog" to "beagle":
```bash
$ cvdata_relabel --labels_dir /data/cvdata/pascal \
--old dog --new beagle --format pascal
```
Since Darknet (YOLO) annotation files use index values that correspond to entries
in a class labels file we would use integer values for the `--old` and `--new`
arguments:
```bash
$ cvdata_relabel --labels_dir /data/cvdata/darknet \
--old 1 --new 4 --format darknet
```
This function currently supports `darknet`, `kitti`, and `pascal` formats.

## Remove duplicates
The module/script `cvdata/duplicates.py` or the corresponding script entry point
`cvdata_duplicates` can be used to remove duplicate images from a directory. This
Expand Down
51 changes: 49 additions & 2 deletions src/cvdata/relabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,35 @@
_logger = logging.getLogger(__name__)


# ------------------------------------------------------------------------------
def relabel_darknet(
file_path: str,
old_index: int,
new_index: int,
):
"""
Replaces the label index values of a Darknet (YOLO) annotation file.

:param file_path: path of the Darknet (YOLO) file
:param old_index: label index value which if found will be replaced by the
new label index
:param new_index: new label index value
"""

# arguments validation
if (old_index < 0) or (new_index < 0):
raise ValueError("Invalid label index argument, must be equal or greater than zero")

# replace the label indices in-place
with fileinput.FileInput(file_path, inplace=True) as file_input:
for line in file_input:
line = line.rstrip("\r\n")
parts = line.split()
if (len(parts) > 0) and (parts[0] == str(old_index)):
parts[0] = str(new_index)
print(" ".join(parts))


# ------------------------------------------------------------------------------
def relabel_kitti(
file_path: str,
Expand All @@ -29,7 +58,7 @@ def relabel_kitti(
"""
Replaces the label values of a KITTI annotation file.

:param file_path: path of the KITTI file to have labels replaced
:param file_path: path of the KITTI file
:param old_label: label value which if found will be replaced by the new label
:param new_label: new label value
"""
Expand All @@ -53,7 +82,7 @@ def relabel_pascal(
"""
Replaces the label values of a PASCAL VOC annotation file.

:param file_path: path of the PASCAL VOC file to have labels replaced
:param file_path: path of the PASCAL VOC file
:param old_label: label value which if found will be replaced by the new label
:param new_label: new label value
"""
Expand Down Expand Up @@ -92,6 +121,21 @@ def _validate_args(
raise ValueError(f"File path argument {file_path} is not a valid file path")


# ------------------------------------------------------------------------------
def _relabel_darknet(arguments: Dict):
"""
Unpacks a dictionary of arguments and calls the function for replacing the
labels of a Darknet (YOLO) annotation file.

:param arguments: dictionary of function arguments, should include:
"file_path": path of the Darknet (YOLO) file to have labels renamed
"old": label index which if found will be renamed
"new": new label index value
"""

relabel_darknet(arguments["file_path"], arguments["old"], arguments["new"])


# ------------------------------------------------------------------------------
def _relabel_kitti(arguments: Dict):
"""
Expand Down Expand Up @@ -161,6 +205,9 @@ def main():
elif args["format"] == "pascal":
file_ext = ".xml"
relabel_function = _relabel_pascal
elif args["format"] == "darknet":
file_ext = ".txt"
relabel_function = _relabel_darknet
else:
raise ValueError("Only KITTI and PASCAL annotation files are supported")

Expand Down
55 changes: 53 additions & 2 deletions tests/test_relabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,57 @@
logging.disable(logging.CRITICAL)


# ------------------------------------------------------------------------------
@pytest.mark.usefixtures(
"data_dir",
)
def test_relabel_darknet(
data_dir,
):
"""
Test for the cvdata.relabel.relabel_darknet() function

:param data_dir: temporary directory into which test files will be loaded
"""
darknet_file_name = "darknet_1.txt"
darknet_file_path = str(data_dir.join(darknet_file_name))

# confirm that a relabeling won't occur if the old value is not present
relabel.relabel_darknet(darknet_file_path, 58, 59)
expected_darknet_file_name = "expected_darknet_1.txt"
expected_darknet_file_path = str(data_dir.join(expected_darknet_file_name))
assert text_files_equal(
darknet_file_path,
expected_darknet_file_path,
)

# confirm that relabeling occurred as expected
relabel.relabel_darknet(darknet_file_path, 3, 2)
expected_darknet_file_name = "expected_darknet_2.txt"
expected_darknet_file_path = str(data_dir.join(expected_darknet_file_name))
assert text_files_equal(
darknet_file_path,
expected_darknet_file_path,
)

# confirm that various invalid arguments raise an error
with pytest.raises(TypeError):
relabel.relabel_darknet(darknet_file_path, None, 0)
relabel.relabel_darknet(darknet_file_path, 0, None)
relabel.relabel_darknet(1, 0, 1)
relabel.relabel_darknet(None, 1, 0)
relabel.relabel_darknet("/not/present", 0, 1)
relabel.relabel_darknet(1.0, "strings won't work", 0)
relabel.relabel_darknet(darknet_file_path, 1, "strings won't work")
relabel.relabel_darknet(darknet_file_path, 1.0, 0)
relabel.relabel_darknet(darknet_file_path, 2, 1.0)
relabel.relabel_darknet(darknet_file_path, True, 0)
relabel.relabel_darknet(darknet_file_path, 1, True)
with pytest.raises(ValueError):
relabel.relabel_darknet(darknet_file_path, -5, 1)
relabel.relabel_darknet(darknet_file_path, 1, -4)


# ------------------------------------------------------------------------------
@pytest.mark.usefixtures(
"data_dir",
Expand Down Expand Up @@ -44,7 +95,7 @@ def test_relabel_kitti(
expected_kitti_file_path,
)

# confirm that invalid argument types raise an error
# confirm that various invalid arguments raise an error
with pytest.raises(ValueError):
relabel.relabel_kitti(None, "don't care", "don't care")
relabel.relabel_kitti(kitti_file_path, None, "don't care")
Expand Down Expand Up @@ -95,7 +146,7 @@ def test_relabel_pascal(
etree_after_relabel.getroot(),
)

# confirm that invalid argument types raise an error
# confirm that various invalid arguments raise an error
with pytest.raises(ValueError):
relabel.relabel_pascal(None, "don't care", "don't care")
relabel.relabel_pascal(pascal_file_path, None, "don't care")
Expand Down
1 change: 1 addition & 0 deletions tests/test_relabel/darknet_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3 0.4995 0.2012 0.3994 0.0456
1 change: 1 addition & 0 deletions tests/test_relabel/expected_darknet_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3 0.4995 0.2012 0.3994 0.0456
1 change: 1 addition & 0 deletions tests/test_relabel/expected_darknet_2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 0.4995 0.2012 0.3994 0.0456