Skip to content

Commit

Permalink
Merge pull request #138 from monocongo/issue_137_relabel_darknet
Browse files Browse the repository at this point in the history
Relabel support for Darknet (YOLO) annotation files
  • Loading branch information
monocongo authored Feb 5, 2020
2 parents f8e02ec + 5d811bd commit 1855ee4
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 4 deletions.
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,23 @@ $ cvdata_filter --format darknet \
--boxes_per_class car:6000 truck:6000
```

## Relabel annotations
The module/script `cvdata/relabel.py` or the corresponding script entry point `cvdata_relabel`
can be used to filter the number of image/annotation files of a dataset. For example,
to relabel all PASCAL annotation files in a directory from "dog" to "beagle":
```bash
$ cvdata_relabel --labels_dir /data/cvdata/pascal \
--old dog --new beagle --format pascal
```
Since Darknet (YOLO) annotation files use index values that correspond to entries
in a class labels file we would use integer values for the `--old` and `--new`
arguments:
```bash
$ cvdata_relabel --labels_dir /data/cvdata/darknet \
--old 1 --new 4 --format darknet
```
This function currently supports `darknet`, `kitti`, and `pascal` formats.

## Remove duplicates
The module/script `cvdata/duplicates.py` or the corresponding script entry point
`cvdata_duplicates` can be used to remove duplicate images from a directory. This
Expand Down
51 changes: 49 additions & 2 deletions src/cvdata/relabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,35 @@
_logger = logging.getLogger(__name__)


# ------------------------------------------------------------------------------
def relabel_darknet(
file_path: str,
old_index: int,
new_index: int,
):
"""
Replaces the label index values of a Darknet (YOLO) annotation file.
:param file_path: path of the Darknet (YOLO) file
:param old_index: label index value which if found will be replaced by the
new label index
:param new_index: new label index value
"""

# arguments validation
if (old_index < 0) or (new_index < 0):
raise ValueError("Invalid label index argument, must be equal or greater than zero")

# replace the label indices in-place
with fileinput.FileInput(file_path, inplace=True) as file_input:
for line in file_input:
line = line.rstrip("\r\n")
parts = line.split()
if (len(parts) > 0) and (parts[0] == str(old_index)):
parts[0] = str(new_index)
print(" ".join(parts))


# ------------------------------------------------------------------------------
def relabel_kitti(
file_path: str,
Expand All @@ -29,7 +58,7 @@ def relabel_kitti(
"""
Replaces the label values of a KITTI annotation file.
:param file_path: path of the KITTI file to have labels replaced
:param file_path: path of the KITTI file
:param old_label: label value which if found will be replaced by the new label
:param new_label: new label value
"""
Expand All @@ -53,7 +82,7 @@ def relabel_pascal(
"""
Replaces the label values of a PASCAL VOC annotation file.
:param file_path: path of the PASCAL VOC file to have labels replaced
:param file_path: path of the PASCAL VOC file
:param old_label: label value which if found will be replaced by the new label
:param new_label: new label value
"""
Expand Down Expand Up @@ -92,6 +121,21 @@ def _validate_args(
raise ValueError(f"File path argument {file_path} is not a valid file path")


# ------------------------------------------------------------------------------
def _relabel_darknet(arguments: Dict):
"""
Unpacks a dictionary of arguments and calls the function for replacing the
labels of a Darknet (YOLO) annotation file.
:param arguments: dictionary of function arguments, should include:
"file_path": path of the Darknet (YOLO) file to have labels renamed
"old": label index which if found will be renamed
"new": new label index value
"""

relabel_darknet(arguments["file_path"], arguments["old"], arguments["new"])


# ------------------------------------------------------------------------------
def _relabel_kitti(arguments: Dict):
"""
Expand Down Expand Up @@ -161,6 +205,9 @@ def main():
elif args["format"] == "pascal":
file_ext = ".xml"
relabel_function = _relabel_pascal
elif args["format"] == "darknet":
file_ext = ".txt"
relabel_function = _relabel_darknet
else:
raise ValueError("Only KITTI and PASCAL annotation files are supported")

Expand Down
55 changes: 53 additions & 2 deletions tests/test_relabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,57 @@
logging.disable(logging.CRITICAL)


# ------------------------------------------------------------------------------
@pytest.mark.usefixtures(
"data_dir",
)
def test_relabel_darknet(
data_dir,
):
"""
Test for the cvdata.relabel.relabel_darknet() function
:param data_dir: temporary directory into which test files will be loaded
"""
darknet_file_name = "darknet_1.txt"
darknet_file_path = str(data_dir.join(darknet_file_name))

# confirm that a relabeling won't occur if the old value is not present
relabel.relabel_darknet(darknet_file_path, 58, 59)
expected_darknet_file_name = "expected_darknet_1.txt"
expected_darknet_file_path = str(data_dir.join(expected_darknet_file_name))
assert text_files_equal(
darknet_file_path,
expected_darknet_file_path,
)

# confirm that relabeling occurred as expected
relabel.relabel_darknet(darknet_file_path, 3, 2)
expected_darknet_file_name = "expected_darknet_2.txt"
expected_darknet_file_path = str(data_dir.join(expected_darknet_file_name))
assert text_files_equal(
darknet_file_path,
expected_darknet_file_path,
)

# confirm that various invalid arguments raise an error
with pytest.raises(TypeError):
relabel.relabel_darknet(darknet_file_path, None, 0)
relabel.relabel_darknet(darknet_file_path, 0, None)
relabel.relabel_darknet(1, 0, 1)
relabel.relabel_darknet(None, 1, 0)
relabel.relabel_darknet("/not/present", 0, 1)
relabel.relabel_darknet(1.0, "strings won't work", 0)
relabel.relabel_darknet(darknet_file_path, 1, "strings won't work")
relabel.relabel_darknet(darknet_file_path, 1.0, 0)
relabel.relabel_darknet(darknet_file_path, 2, 1.0)
relabel.relabel_darknet(darknet_file_path, True, 0)
relabel.relabel_darknet(darknet_file_path, 1, True)
with pytest.raises(ValueError):
relabel.relabel_darknet(darknet_file_path, -5, 1)
relabel.relabel_darknet(darknet_file_path, 1, -4)


# ------------------------------------------------------------------------------
@pytest.mark.usefixtures(
"data_dir",
Expand Down Expand Up @@ -44,7 +95,7 @@ def test_relabel_kitti(
expected_kitti_file_path,
)

# confirm that invalid argument types raise an error
# confirm that various invalid arguments raise an error
with pytest.raises(ValueError):
relabel.relabel_kitti(None, "don't care", "don't care")
relabel.relabel_kitti(kitti_file_path, None, "don't care")
Expand Down Expand Up @@ -95,7 +146,7 @@ def test_relabel_pascal(
etree_after_relabel.getroot(),
)

# confirm that invalid argument types raise an error
# confirm that various invalid arguments raise an error
with pytest.raises(ValueError):
relabel.relabel_pascal(None, "don't care", "don't care")
relabel.relabel_pascal(pascal_file_path, None, "don't care")
Expand Down
1 change: 1 addition & 0 deletions tests/test_relabel/darknet_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3 0.4995 0.2012 0.3994 0.0456
1 change: 1 addition & 0 deletions tests/test_relabel/expected_darknet_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3 0.4995 0.2012 0.3994 0.0456
1 change: 1 addition & 0 deletions tests/test_relabel/expected_darknet_2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 0.4995 0.2012 0.3994 0.0456

0 comments on commit 1855ee4

Please sign in to comment.