diff --git a/.github/workflows/code_scan.yml b/.github/workflows/code_scan.yml index 2bc01036cd..fde7def50a 100644 --- a/.github/workflows/code_scan.yml +++ b/.github/workflows/code_scan.yml @@ -31,14 +31,14 @@ jobs: mkdir -p .ci/base/docs pip-compile -o .ci/base/docs/requirements.txt docs/requirements.txt - name: Run Trivy Scan (full, csv) - uses: aquasecurity/trivy-action@6e7b7d1fd3e4fef0c5fa8cce1229c54b2c9bd0d8 # 0.24.0 + uses: aquasecurity/trivy-action@18f2510ee396bbf400402947b394f2dd8c87dbb0 # 0.29.0 with: trivy-config: ".ci/trivy-csv.yaml" scan-type: 'fs' scan-ref: ".ci/" scanners: vuln,secret - name: Run Trivy Scan (prod, spdx.json) - uses: aquasecurity/trivy-action@6e7b7d1fd3e4fef0c5fa8cce1229c54b2c9bd0d8 # 0.24.0 + uses: aquasecurity/trivy-action@18f2510ee396bbf400402947b394f2dd8c87dbb0 # 0.29.0 with: trivy-config: ".ci/trivy-json.yaml" scan-type: 'fs' diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 93ef850ddc..c6a290e0da 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -52,7 +52,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # v3.25.15 + uses: github/codeql-action/init@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3.27.5 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -73,7 +73,7 @@ jobs: python -m build - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # v3.25.15 + uses: github/codeql-action/analyze@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3.27.5 with: category: "/language:${{matrix.language}}" - name: Generate Security Report diff --git a/.github/workflows/issue_assignment.yml b/.github/workflows/issue_assignment.yml index 8cba4d3f45..16caac999b 100644 --- a/.github/workflows/issue_assignment.yml +++ b/.github/workflows/issue_assignment.yml @@ -14,9 +14,9 @@ jobs: steps: - name: Auto-assign Issue - uses: pozil/auto-assign-issue@v2.0.0 + uses: pozil/auto-assign-issue@v2.0.1 with: repo-token: ${{ secrets.GITHUB_TOKEN }} - assignees: vinnamkim,jihyeonyi,sooahleex,itrushkin + assignees: jihyeonyi,sooahleex,itrushkin numOfAssignee: 1 allowSelfAssign: false diff --git a/.github/workflows/pr_check.yml b/.github/workflows/pr_check.yml index 3870725d6e..1b35dc3af6 100644 --- a/.github/workflows/pr_check.yml +++ b/.github/workflows/pr_check.yml @@ -62,6 +62,6 @@ jobs: run: | tox -vvv -e tests-py${{ matrix.tox-env-py }}-${{ matrix.tox-env-os }} -- tests/integration - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: flags: ${{ matrix.os }}_Python-${{ matrix.python-version }} diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml index 061b8f9f25..b4653f07ac 100644 --- a/.github/workflows/publish_to_pypi.yml +++ b/.github/workflows/publish_to_pypi.yml @@ -80,12 +80,12 @@ jobs: file_glob: true - name: Publish package distributions to PyPI if: ${{ steps.check-tag.outputs.match != '' }} - uses: pypa/gh-action-pypi-publish@v1.9.0 + uses: pypa/gh-action-pypi-publish@v1.12.2 with: password: ${{ secrets.PYPI_API_TOKEN }} - name: Publish package distributions to TestPyPI if: ${{ steps.check-tag.outputs.match == '' }} - uses: pypa/gh-action-pypi-publish@v1.9.0 + uses: pypa/gh-action-pypi-publish@v1.12.2 with: password: ${{ secrets.TESTPYPI_API_TOKEN }} repository-url: https://test.pypi.org/legacy/ diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 8f9d09195f..693bff2bbc 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -67,6 +67,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # v3.25.15 + uses: github/codeql-action/upload-sarif@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3.27.5 with: sarif_file: results.sarif diff --git a/3rd-party.txt b/3rd-party.txt index 0401a59ee0..85d2f2edf2 100644 --- a/3rd-party.txt +++ b/3rd-party.txt @@ -7518,5 +7518,22 @@ Apache-2.0 See the License for the specific language governing permissions and limitations under the License. ------------------------------------------------------------- +portalocker + +BSD-3-Clause + +Copyright 2022 Rick van Hattem + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------------------------------------------------------------- * Other names and brands may be claimed as the property of others. diff --git a/CHANGELOG.md b/CHANGELOG.md index c246c0f18c..22af027194 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,16 +5,79 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## \[unreleased\] +## \[Unreleased\] + +### New features +- Convert Cuboid2D annotation to/from 3D data + () +- Add label groups for hierarchical classification in ImageNet + () + +### Enhancements +- Enhance 'id_from_image_name' transform to ensure each identifier is unique + () +- Optimize path assignment to handle point cloud in JSON without images + () +- Add documentation for framework conversion + () + +### Bug fixes +- Fix assertion to compare hashkeys against expected value + () + +## Q4 2024 Release 1.10.0 + +### New features +- Support KITTI 3D format + (, ) +- Add PseudoLabeling transform for unlabeled dataset + () + +### Enhancements +- Raise an appropriate error when exporting a datumaro dataset if its subset name contains path separators. + () +- Update docs for transform plugins + () +- Update ov ir model for explorer openvino launcher with CLIP ViT-L/14@336px model + () +- Optimize path assignment to handle point cloud in JSON without images + () +- Set TabularTransform to process clean transform in parallel + () + +### Bug fixes +- Fix datumaro format to load visibility information from Points annotations + () + +## Q4 2024 Release 1.9.1 +### Enhancements +- Support multiple labels for kaggle format + () +- Use DataFrame.map instead of DataFrame.applymap + () + +### Bug fixes +- Fix StreamDataset merging when importing in eager mode + () + +## Q3 2024 Release 1.9.0 ### New features - Add a new CLI command: datum format () +- Add a new Cuboid2D annotation type + () +- Support language dataset for DmTorchDataset + () ### Enhancements - Change _Shape to Shape and add comments for subclasses of Shape () +- Fix `kitti_raw` importer and exporter for dimensions (height, width, length) in meters + () ### Bug fixes +- Fix KITTI-3D importer and exporter + () ## Q3 2024 Release 1.8.0 ### New features diff --git a/README.md b/README.md index fd63a4f743..067d007604 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ [![Build status](https://github.com/openvinotoolkit/datumaro/actions/workflows/health_check.yml/badge.svg)](https://github.com/openvinotoolkit/datumaro/actions/workflows/health_check.yml) [![codecov](https://codecov.io/gh/openvinotoolkit/datumaro/branch/develop/graph/badge.svg?token=FG25VU096Q)](https://codecov.io/gh/openvinotoolkit/datumaro) +[![Downloads](https://static.pepy.tech/badge/datumaro)](https://pepy.tech/project/datumaro) A framework and CLI tool to build, transform, and analyze datasets. diff --git a/docker/segment-anything/requirements.txt b/docker/segment-anything/requirements.txt index 8d1056bd21..99e2828731 100644 --- a/docker/segment-anything/requirements.txt +++ b/docker/segment-anything/requirements.txt @@ -371,38 +371,33 @@ numpy==1.26.4 \ # onnxruntime # opencv-python # pycocotools -onnx==1.16.0 \ - --hash=sha256:034ae21a2aaa2e9c14119a840d2926d213c27aad29e5e3edaa30145a745048e1 \ - --hash=sha256:03a627488b1a9975d95d6a55582af3e14c7f3bb87444725b999935ddd271d352 \ - --hash=sha256:0e60ca76ac24b65c25860d0f2d2cdd96d6320d062a01dd8ce87c5743603789b8 \ - --hash=sha256:0efeb46985de08f0efe758cb54ad3457e821a05c2eaf5ba2ccb8cd1602c08084 \ - --hash=sha256:209fe84995a28038e29ae8369edd35f33e0ef1ebc3bddbf6584629823469deb1 \ - --hash=sha256:237c6987c6c59d9f44b6136f5819af79574f8d96a760a1fa843bede11f3822f7 \ - --hash=sha256:257858cbcb2055284f09fa2ae2b1cfd64f5850367da388d6e7e7b05920a40c90 \ - --hash=sha256:298f28a2b5ac09145fa958513d3d1e6b349ccf86a877dbdcccad57713fe360b3 \ - --hash=sha256:30f02beaf081c7d9fa3a8c566a912fc4408e28fc33b1452d58f890851691d364 \ - --hash=sha256:3e0860fea94efde777e81a6f68f65761ed5e5f3adea2e050d7fbe373a9ae05b3 \ - --hash=sha256:5202559070afec5144332db216c20f2fff8323cf7f6512b0ca11b215eacc5bf3 \ - --hash=sha256:62a2e27ae8ba5fc9b4a2620301446a517b5ffaaf8566611de7a7c2160f5bcf4c \ - --hash=sha256:66300197b52beca08bc6262d43c103289c5d45fde43fb51922ed1eb83658cf0c \ - --hash=sha256:70a90649318f3470985439ea078277c9fb2a2e6e2fd7c8f3f2b279402ad6c7e6 \ - --hash=sha256:71839546b7f93be4fa807995b182ab4b4414c9dbf049fee11eaaced16fcf8df2 \ - --hash=sha256:7449241e70b847b9c3eb8dae622df8c1b456d11032a9d7e26e0ee8a698d5bf86 \ - --hash=sha256:7532343dc5b8b5e7c3e3efa441a3100552f7600155c4db9120acd7574f64ffbf \ - --hash=sha256:7665217c45a61eb44718c8e9349d2ad004efa0cb9fbc4be5c6d5e18b9fe12b52 \ - --hash=sha256:7755cbd5f4e47952e37276ea5978a46fc8346684392315902b5ed4a719d87d06 \ - --hash=sha256:77579e7c15b4df39d29465b216639a5f9b74026bdd9e4b6306cd19a32dcfe67c \ - --hash=sha256:7fb29a9a692b522deef1f6b8f2145da62c0c43ea1ed5b4c0f66f827fdc28847d \ - --hash=sha256:81b4ee01bc554e8a2b11ac6439882508a5377a1c6b452acd69a1eebb83571117 \ - --hash=sha256:8cf3e518b1b1b960be542e7c62bed4e5219e04c85d540817b7027029537dec92 \ - --hash=sha256:9eadbdce25b19d6216f426d6d99b8bc877a65ed92cbef9707751c6669190ba4f \ - --hash=sha256:ae0029f5e47bf70a1a62e7f88c80bca4ef39b844a89910039184221775df5e43 \ - --hash=sha256:c392faeabd9283ee344ccb4b067d1fea9dfc614fa1f0de7c47589efd79e15e78 \ - --hash=sha256:d7886c05aa6d583ec42f6287678923c1e343afc4350e49d5b36a0023772ffa22 \ - --hash=sha256:ddf14a3d32234f23e44abb73a755cb96a423fac7f004e8f046f36b10214151ee \ - --hash=sha256:e5752bbbd5717304a7643643dba383a2fb31e8eb0682f4e7b7d141206328a73b \ - --hash=sha256:ec22a43d74eb1f2303373e2fbe7fbcaa45fb225f4eb146edfed1356ada7a9aea \ - --hash=sha256:f51179d4af3372b4f3800c558d204b592c61e4b4a18b8f61e0eea7f46211221a +onnx==1.17.0 \ + --hash=sha256:0141c2ce806c474b667b7e4499164227ef594584da432fd5613ec17c1855e311 \ + --hash=sha256:081ec43a8b950171767d99075b6b92553901fa429d4bc5eb3ad66b36ef5dbe3a \ + --hash=sha256:0e906e6a83437de05f8139ea7eaf366bf287f44ae5cc44b2850a30e296421f2f \ + --hash=sha256:23b8d56a9df492cdba0eb07b60beea027d32ff5e4e5fe271804eda635bed384f \ + --hash=sha256:317870fca3349d19325a4b7d1b5628f6de3811e9710b1e3665c68b073d0e68d7 \ + --hash=sha256:3193a3672fc60f1a18c0f4c93ac81b761bc72fd8a6c2035fa79ff5969f07713e \ + --hash=sha256:38b5df0eb22012198cdcee527cc5f917f09cce1f88a69248aaca22bd78a7f023 \ + --hash=sha256:3d955ba2939878a520a97614bcf2e79c1df71b29203e8ced478fa78c9a9c63c2 \ + --hash=sha256:3e19fd064b297f7773b4c1150f9ce6213e6d7d041d7a9201c0d348041009cdcd \ + --hash=sha256:48ca1a91ff73c1d5e3ea2eef20ae5d0e709bb8a2355ed798ffc2169753013fd3 \ + --hash=sha256:4a183c6178be001bf398260e5ac2c927dc43e7746e8638d6c05c20e321f8c949 \ + --hash=sha256:4f3fb5cc4e2898ac5312a7dc03a65133dd2abf9a5e520e69afb880a7251ec97a \ + --hash=sha256:5ca7a0894a86d028d509cdcf99ed1864e19bfe5727b44322c11691d834a1c546 \ + --hash=sha256:659b8232d627a5460d74fd3c96947ae83db6d03f035ac633e20cd69cfa029227 \ + --hash=sha256:67e1c59034d89fff43b5301b6178222e54156eadd6ab4cd78ddc34b2f6274a66 \ + --hash=sha256:76884fe3e0258c911c749d7d09667fb173365fd27ee66fcedaf9fa039210fd13 \ + --hash=sha256:8167295f576055158a966161f8ef327cb491c06ede96cc23392be6022071b6ed \ + --hash=sha256:95c03e38671785036bb704c30cd2e150825f6ab4763df3a4f1d249da48525957 \ + --hash=sha256:d545335cb49d4d8c47cc803d3a805deb7ad5d9094dc67657d66e568610a36d7d \ + --hash=sha256:d6fc3a03fc0129b8b6ac03f03bc894431ffd77c7d79ec023d0afd667b4d35869 \ + --hash=sha256:dfd777d95c158437fda6b34758f0877d15b89cbe9ff45affbedc519b35345cf9 \ + --hash=sha256:e4673276b558b5b572b960b7f9ef9214dce9305673683eb289bb97a7df379a4b \ + --hash=sha256:ea5023a8dcdadbb23fd0ed0179ce64c1f6b05f5b5c34f2909b4e927589ebd0e4 \ + --hash=sha256:ecf2b617fd9a39b831abea2df795e17bac705992a35a98e1f0363f005c4a5247 \ + --hash=sha256:f01a4b63d4e1d8ec3e2f069e7b798b2955810aa434f7361f01bc8ca08d69cce4 \ + --hash=sha256:f0e437f8f2f0c36f629e9743d28cf266312baa90be6a899f405f78f2d4cb2e1d # via segment_anything (./segment-anything/setup.py) onnxruntime==1.17.1 \ --hash=sha256:2dff1a24354220ac30e4a4ce2fb1df38cb1ea59f7dac2c116238d63fe7f4c5ff \ diff --git a/docs/requirements.txt b/docs/requirements.txt index 8607e58ff3..7f346f353b 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -5,9 +5,9 @@ opencv-python-headless==4.10.0.84 # docs -markupsafe==2.1.5 +markupsafe==3.0.2 nbconvert>=7.2.3 -ipython==8.26.0 +ipython==8.29.0 sphinx==7.2.6 pydata-sphinx-theme==0.15.2 sphinx-copybutton diff --git a/docs/source/docs/command-reference/context_free/prune.md b/docs/source/docs/command-reference/context_free/prune.md index 0142c0f597..18c30b21e5 100644 --- a/docs/source/docs/command-reference/context_free/prune.md +++ b/docs/source/docs/command-reference/context_free/prune.md @@ -13,7 +13,7 @@ Prune supports various methodology. By default, datasets are updated in-place. The `-o/--output-dir` option can be used to specify another output directory. When updating in-place, use the `--overwirte` parameter (in-place updates fail by default to prevent data loss), unless a project target is modified. -The current project (`-p/--project`) is also used as a context for plugins, so it can be useful for datasest paths having custom formats. When not specified, the current project's working tree is used. +The current project (`-p/--project`) is also used as a context for plugins, so it can be useful for dataset paths having custom formats. When not specified, the current project's working tree is used. The command can be applied to a dataset or a project build target, a stage or the combined `project` target, in which case all the project targets will be affected. diff --git a/docs/source/docs/command-reference/context_free/transform.md b/docs/source/docs/command-reference/context_free/transform.md index c33bdc8359..c79cc94a99 100644 --- a/docs/source/docs/command-reference/context_free/transform.md +++ b/docs/source/docs/command-reference/context_free/transform.md @@ -101,7 +101,10 @@ Basic dataset item manipulations: - [`remove_images`](#remove_images) - Removes specific images - [`remove_annotations`](#remove_annotations) - Removes annotations - [`remove_attributes`](#remove_attributes) - Removes attributes -- [`astype_annotations`](#astype_annotations) - Convert annotation type +- [`astype_annotations`](#astype_annotations) - Transforms annotation types +- [`pseudo_labeling`](#pseudo_labeling) - Generates pseudo labels for unlabeled data +- [`correct`](#correct) - Corrects annotation types +- [`clean`](#clean) - Removes noisy data for tabular dataset Subset manipulations: - [`random_split`](#random_split) - Splits dataset into subsets @@ -173,15 +176,36 @@ Examples: #### `id_from_image_name` -Renames items in the dataset using image file name (without extension). +Renames items in the dataset based on the image file name, excluding the extension. +When 'ensure_unique' is enabled, a random suffix is appended to ensure each identifier is unique +in cases where the image name is not distinct. By default, the random suffix is three characters long, +but this can be adjusted with the 'suffix_length' parameter. Usage: ```console -id_from_image_name [-h] +id_from_image_name [-h] [-u] [-l SUFFIX_LENGTH] ``` Optional arguments: -- `-h`, `--help` (flag) - Show this help message and exit +- `-h`, `--help` (flag) - show this help message and exit +- `-u`, `--ensure_unique` (flag) - Appends a random suffix to ensure each identifier is unique if the image name is duplicated +- `-l`, `--suffix_length` (int) - Alters the length of the random suffix if the `ensure_unique` is enabled(default: 3) + +Examples: +- Renames items without duplication check + ```console + datum transform -t id_from_image_name + ``` + +- Renames items with duplication check + ```console + datum transform -t id_from_image_name -- --ensure_unique + ``` + +- Renames items with duplication check and alters the suffix length(default: 3) + ```console + datum transform -t id_from_image_name -- --ensure_unique --suffix_length 2 + ``` #### `reindex` @@ -826,6 +850,35 @@ bbox_values_decrement [-h] Optional arguments: - `-h`, `--help` (flag) - Show this help message and exit +#### `pseudo_labeling` + +Assigns pseudo-labels to items in a dataset based on their similarity to predefined labels. This class is useful for semi-supervised learning when dealing with missing or uncertain labels. + +The process includes: + +- Similarity Computation: Uses hashing techniques to compute the similarity between items and predefined labels. +- Pseudo-Label Assignment: Assigns the most similar label as a pseudo-label to each item. + +Attributes: + +- `extractor` (IDataset) - Provides access to dataset items and their annotations. +- `labels` (Optional[List[str]]) - List of predefined labels for pseudo-labeling. Defaults to all available labels if not provided. +- `explorer` (Optional[Explorer]) - Computes hash keys for items and labels. If not provided, a new Explorer is created. + +Usage: +```console +pseudo_labeling [-h] [--labels LABELS] + +Optional arguments: +- `-h`, `--help` (flag) - Show this help message and exit +- `--labels` (str) - Comma-separated list of label names for pseudo-labeling + +Examples: +- Assign pseudo-labels based on predefined labels + ```console + datum transform -t pseudo_labeling -- --labels 'label1,label2' + ``` + #### `correct` Correct the dataset from a validation report @@ -838,3 +891,27 @@ correct [-h] [-r REPORT_PATH] Optional arguments: - `-h`, `--help` (flag) - Show this help message and exit - `-r`, `--reports` (str) - A validation report from a 'validate' CLI (default=validation_reports.json) + +#### `clean` + +Refines and preprocesses media items in a dataset, focusing on string, numeric, and categorical data. This transform is designed to clean and improve the quality of the data, making it more suitable for analysis and modeling. + +The cleaning process includes: + +- String Data: Removes unnecessary characters using NLP techniques. +- Numeric Data: Identifies and handles outliers and missing values. +- Categorical Data: Cleans and refines categorical information. + +Usage: +```console +clean [-h] +``` + +Optional arguments: +- `-h`, `--help` (flag) - Show this help message and exit + +Examples: +- Clean and preprocess dataset items + ```console + datum transform -t clean + ``` diff --git a/docs/source/docs/command-reference/context_free/validate.md b/docs/source/docs/command-reference/context_free/validate.md index 52284876d1..cdf52856f5 100644 --- a/docs/source/docs/command-reference/context_free/validate.md +++ b/docs/source/docs/command-reference/context_free/validate.md @@ -58,7 +58,7 @@ Examples: datum validate -p -t classification -- -ir 40 ``` -### List of validation items (annomaly types) +### List of validation items (anomaly types) | Anomaly Type | Description | Task Type | | ------------ | ----------- | --------- | diff --git a/docs/source/docs/command-reference/helper/format.md b/docs/source/docs/command-reference/helper/format.md index 20f6f95f71..f9617f1c57 100644 --- a/docs/source/docs/command-reference/helper/format.md +++ b/docs/source/docs/command-reference/helper/format.md @@ -13,7 +13,7 @@ usage: datum format [-h] [-li | -le] [-d DELIMITER] Parameters: - `-h, --help` - Print the help message and exit. -- `-d DELIMITER, --delimiter DELIMITER` - Seperator used to list data format names (default: `\n`). For example, `datum format -d ','` command displays +- `-d DELIMITER, --delimiter DELIMITER` - Separator used to list data format names (default: `\n`). For example, `datum format -d ','` command displays ```console Supported import formats: ade20k2017,ade20k2020,align_celeba,... diff --git a/docs/source/docs/data-formats/formats/ava_action.md b/docs/source/docs/data-formats/formats/ava_action.md index 792cee9af4..3d8e2ac93b 100644 --- a/docs/source/docs/data-formats/formats/ava_action.md +++ b/docs/source/docs/data-formats/formats/ava_action.md @@ -7,7 +7,7 @@ The AVA action format specification is available The dataset has annotations for recognizing an action per instance from video frames like visual tracking task. Specifically, the AVA action dataset contains frame indices, -bounding box cooridnates, actions, and tracking ids in the annotation file. The action +bounding box coordinates, actions, and tracking ids in the annotation file. The action categories are described in `ava_action_list_v2.2.pbtxt`. For the ease use for object detection, the AVA action dataset provides the bounding box proposals from `Faster R-CNN`. diff --git a/docs/source/docs/data-formats/formats/cityscapes.md b/docs/source/docs/data-formats/formats/cityscapes.md index 0f5ac2b58b..9c11a3aa0e 100644 --- a/docs/source/docs/data-formats/formats/cityscapes.md +++ b/docs/source/docs/data-formats/formats/cityscapes.md @@ -137,7 +137,7 @@ Extra options for exporting to Cityscapes format: #... datum project export -f cityscapes -- --label-map mycolormap.txt ``` -or you can use original cityscapes colomap: +or you can use original cityscapes colormap: ``` bash datum project export -f cityscapes -- --label-map cityscapes ``` diff --git a/docs/source/docs/data-formats/formats/coco.md b/docs/source/docs/data-formats/formats/coco.md index a6ed039f70..c007cfc552 100644 --- a/docs/source/docs/data-formats/formats/coco.md +++ b/docs/source/docs/data-formats/formats/coco.md @@ -122,7 +122,7 @@ For the panoptic task, a dataset directory should have the following structure: Annotation files must have the names like `_.json`. The year is treated as a part of the subset name. -If the annotation file name does't match this pattern, use one of the +If the annotation file name doesn't match this pattern, use one of the task-specific formats instead of plain `coco`: `coco_captions`, `coco_image_info`, `coco_instances`, `coco_labels`, `coco_panoptic`, `coco_person_keypoints`, `coco_stuff`. In this case all items of the diff --git a/docs/source/docs/data-formats/formats/datumaro.md b/docs/source/docs/data-formats/formats/datumaro.md index b12f1af6a1..0e9f1abfe8 100644 --- a/docs/source/docs/data-formats/formats/datumaro.md +++ b/docs/source/docs/data-formats/formats/datumaro.md @@ -73,6 +73,8 @@ A Datumaro dataset directory should have the following structure: └── ... ``` +Note that the subset name shouldn't contain path separators. + If your dataset is not following the above directory structure, it cannot detect and import your dataset as the Datumaro format properly. diff --git a/docs/source/docs/data-formats/formats/datumaro_binary.md b/docs/source/docs/data-formats/formats/datumaro_binary.md index 7b724b3734..a970d135a5 100644 --- a/docs/source/docs/data-formats/formats/datumaro_binary.md +++ b/docs/source/docs/data-formats/formats/datumaro_binary.md @@ -113,6 +113,8 @@ A DatumaroBinary dataset directory should have the following structure: └── ... ``` +Note that the subset name shouldn't contain path separators. + If your dataset is not following the above directory structure, it cannot detect and import your dataset as the DatumaroBinary format properly. diff --git a/docs/source/docs/data-formats/formats/kaggle.md b/docs/source/docs/data-formats/formats/kaggle.md index e7d25b38b8..dd517934cc 100644 --- a/docs/source/docs/data-formats/formats/kaggle.md +++ b/docs/source/docs/data-formats/formats/kaggle.md @@ -46,7 +46,7 @@ At this time, it's essential to specify the column names for media and label suc ## Import Kaggle Image Txt dataset -Another `kaggle_image_txt` format replaces only `columns` with an order of informations in `.txt`. +Another `kaggle_image_txt` format replaces only `columns` with an order of information in `.txt`. For instance, dataset can be created by ```python diff --git a/docs/source/docs/data-formats/formats/kitti.md b/docs/source/docs/data-formats/formats/kitti.md index 5c5b2fbc63..12896b05ba 100644 --- a/docs/source/docs/data-formats/formats/kitti.md +++ b/docs/source/docs/data-formats/formats/kitti.md @@ -175,7 +175,7 @@ Extra options for exporting to KITTI format: datum project export -f kitti -- --label-map mycolormap.txt ``` -or you can use original kitti colomap: +or you can use original kitti colormap: ``` bash datum project export -f kitti -- --label-map kitti ``` diff --git a/docs/source/docs/data-formats/formats/mapillary_vistas.md b/docs/source/docs/data-formats/formats/mapillary_vistas.md index 95e4990c01..9bb9f58b5b 100644 --- a/docs/source/docs/data-formats/formats/mapillary_vistas.md +++ b/docs/source/docs/data-formats/formats/mapillary_vistas.md @@ -12,7 +12,7 @@ Supported annotation types: - `Mask` (class, instances, panoptic) - `Polygon` -Supported atttibutes: +Supported attributes: - `is_crowd`(boolean; on panoptic `mask`): Indicates that the annotation covers multiple instances of the same class. diff --git a/docs/source/docs/data-formats/formats/mot.md b/docs/source/docs/data-formats/formats/mot.md index 040055f51a..a03ffc7479 100644 --- a/docs/source/docs/data-formats/formats/mot.md +++ b/docs/source/docs/data-formats/formats/mot.md @@ -18,7 +18,7 @@ Supported annotation attributes: You can download the MOT challenge dataset [here](https://motchallenge.net). -A Datumaro project with the MOT challange source can be created in the following way: +A Datumaro project with the MOT challenge source can be created in the following way: ``` bash datum project create @@ -43,7 +43,7 @@ The MOT challenge dataset directory should have the following structure: └── seqinfo.ini (optional) ``` -`seqinfo.ini` is provided by the MOT challange dataset but it is optional in Datumaro. +`seqinfo.ini` is provided by the MOT challenge dataset but it is optional in Datumaro. It includes `imdir` field which is the name of directory having image files. If this file is given, Datumaro will find the image files from the directory written in the `imdir` field. @@ -52,7 +52,7 @@ run `datum project info`, which will display the project information. ## Export to other formats -Datumaro can convert the MOT challange dataset into any other format [Datumaro supports](/docs/data-formats/formats/index.rst). +Datumaro can convert the MOT challenge dataset into any other format [Datumaro supports](/docs/data-formats/formats/index.rst). Such conversion will only be successful if the output format can represent the type of dataset you want to convert, diff --git a/docs/source/docs/data-formats/formats/mots.md b/docs/source/docs/data-formats/formats/mots.md index 782bab9544..e382d82843 100644 --- a/docs/source/docs/data-formats/formats/mots.md +++ b/docs/source/docs/data-formats/formats/mots.md @@ -16,9 +16,9 @@ Supported annotation attributes: ## Import MOTS dataset -You can download the PNG format of MOTS challange dataset [here](https://www.vision.rwth-aachen.de/page/mots). +You can download the PNG format of MOTS challenge dataset [here](https://www.vision.rwth-aachen.de/page/mots). -A Datumaro project with the MOTS challange source can be created in the following way: +A Datumaro project with the MOTS challenge source can be created in the following way: ``` bash datum project create @@ -28,7 +28,7 @@ datum project import --format mots It is possible to specify project name and project directory. Run `datum project create --help` for more information. -The MOTS challange dataset directory should have the following structure: +The MOTS challenge dataset directory should have the following structure: ``` @@ -55,7 +55,7 @@ run `datum project info`, which will display the project information. ## Export to other formats -Datumaro can convert the MOTS challange dataset into any other format [Datumaro supports](/docs/data-formats/formats/index.rst). +Datumaro can convert the MOTS challenge dataset into any other format [Datumaro supports](/docs/data-formats/formats/index.rst). Such conversion will only be successful if the output format can represent the type of dataset you want to convert, diff --git a/docs/source/docs/data-formats/formats/pascal_voc.md b/docs/source/docs/data-formats/formats/pascal_voc.md index ee70abb2ad..f6f457d209 100644 --- a/docs/source/docs/data-formats/formats/pascal_voc.md +++ b/docs/source/docs/data-formats/formats/pascal_voc.md @@ -221,7 +221,7 @@ datum project export -f voc -- --tasks detection,classification # person:255,0,0:head: datum project export -f voc_segmentation -- --label-map mycolormap.txt ``` -or you can use original voc colomap: +or you can use original voc colormap: ``` bash datum project export -f voc_segmentation -- --label-map voc ``` diff --git a/docs/source/docs/data-formats/formats/yolo_ultralytics.md b/docs/source/docs/data-formats/formats/yolo_ultralytics.md index a550499bb6..ca1b82fd48 100644 --- a/docs/source/docs/data-formats/formats/yolo_ultralytics.md +++ b/docs/source/docs/data-formats/formats/yolo_ultralytics.md @@ -93,7 +93,7 @@ To add custom classes, you can use [`dataset_meta.json`](/docs/data-formats/form ## Export to YOLO-Ultralytics format Datumaro can convert [any other image dataset format](/docs/data-formats/formats/index.rst) which has bounding box annotations into YOLO-Ultralytics format. -After the successful conversion, you can train your own detecter with the exported dataset and [Ultralytics YOLOv8 trainer](https://github.com/ultralytics/ultralytics). +After the successful conversion, you can train your own detector with the exported dataset and [Ultralytics YOLOv8 trainer](https://github.com/ultralytics/ultralytics). > Note, if you want to see the end-to-end Jupyter-notebook example from the dataset conversion to the training, please see this [link](https://github.com/openvinotoolkit/datumaro/blob/develop/notebooks/08_e2e_example_yolo_ultralytics_trainer.ipynb). diff --git a/docs/source/docs/jupyter_notebook_examples/e2e_example.rst b/docs/source/docs/jupyter_notebook_examples/e2e_example.rst index 9c214881c0..0cc20b7843 100644 --- a/docs/source/docs/jupyter_notebook_examples/e2e_example.rst +++ b/docs/source/docs/jupyter_notebook_examples/e2e_example.rst @@ -11,6 +11,7 @@ Here we provide E2E examples from Datumaro to model trainers. notebooks/10_noisy_label_detection_cls notebooks/13_noisy_label_detection_det notebooks/16_missing_annotation_detection + notebooks/22_framework_converter .. grid:: 1 2 2 2 :gutter: 2 @@ -42,3 +43,10 @@ Here we provide E2E examples from Datumaro to model trainers. :color: primary :outline: :expand: + + .. grid-item-card:: + + .. button-ref:: notebooks/22_framework_converter + :color: primary + :outline: + :expand: diff --git a/docs/source/docs/level-up/advanced_skills/12_project_versioning.rst b/docs/source/docs/level-up/advanced_skills/13_project_versioning.rst similarity index 99% rename from docs/source/docs/level-up/advanced_skills/12_project_versioning.rst rename to docs/source/docs/level-up/advanced_skills/13_project_versioning.rst index 161b63c78b..d4fffd91e0 100644 --- a/docs/source/docs/level-up/advanced_skills/12_project_versioning.rst +++ b/docs/source/docs/level-up/advanced_skills/13_project_versioning.rst @@ -1,5 +1,5 @@ ============================ -Level 12: Project Versioning +Level 13: Project Versioning ============================ Project versioning is a concept unique to Datumaro. Datumaro project includes a data source and revision tree, diff --git a/docs/source/docs/level-up/advanced_skills/13_pseudo_label_generation.rst b/docs/source/docs/level-up/advanced_skills/14_pseudo_label_generation.rst similarity index 68% rename from docs/source/docs/level-up/advanced_skills/13_pseudo_label_generation.rst rename to docs/source/docs/level-up/advanced_skills/14_pseudo_label_generation.rst index 99c632f8fe..cd444be1e2 100644 --- a/docs/source/docs/level-up/advanced_skills/13_pseudo_label_generation.rst +++ b/docs/source/docs/level-up/advanced_skills/14_pseudo_label_generation.rst @@ -1,5 +1,5 @@ ================================= -Level 13: Pseudo Label Generation +Level 14: Pseudo Label Generation ================================= TBD diff --git a/docs/source/docs/level-up/advanced_skills/14_data_pruning.rst b/docs/source/docs/level-up/advanced_skills/15_data_pruning.rst similarity index 99% rename from docs/source/docs/level-up/advanced_skills/14_data_pruning.rst rename to docs/source/docs/level-up/advanced_skills/15_data_pruning.rst index 66c044f7e8..5c299e50f3 100644 --- a/docs/source/docs/level-up/advanced_skills/14_data_pruning.rst +++ b/docs/source/docs/level-up/advanced_skills/15_data_pruning.rst @@ -1,5 +1,5 @@ ===================================================== -Level 14: Dataset Pruning +Level 15: Dataset Pruning ===================================================== diff --git a/docs/source/docs/level-up/advanced_skills/index.rst b/docs/source/docs/level-up/advanced_skills/index.rst index 59cfeefd1b..36e5cb26e5 100644 --- a/docs/source/docs/level-up/advanced_skills/index.rst +++ b/docs/source/docs/level-up/advanced_skills/index.rst @@ -5,16 +5,16 @@ Advanced Skills :maxdepth: 1 :hidden: - 12_project_versioning - 13_pseudo_label_generation - 14_data_pruning + 13_project_versioning + 14_pseudo_label_generation + 15_data_pruning .. grid:: 1 2 2 2 :gutter: 2 .. grid-item-card:: - .. button-ref:: 12_project_versioning + .. button-ref:: 13_project_versioning :color: primary :outline: :expand: @@ -25,7 +25,7 @@ Advanced Skills .. grid-item-card:: - .. button-ref:: 13_pseudo_label_generation + .. button-ref:: 14_pseudo_label_generation :color: primary :outline: :expand: @@ -36,7 +36,7 @@ Advanced Skills .. grid-item-card:: - .. button-ref:: 14_data_pruning + .. button-ref:: 15_data_pruning :color: primary :outline: :expand: diff --git a/docs/source/docs/level-up/basic_skills/03_dataset_import_export.rst b/docs/source/docs/level-up/basic_skills/03_dataset_import_export.rst index f0345e3201..a40251bfe3 100644 --- a/docs/source/docs/level-up/basic_skills/03_dataset_import_export.rst +++ b/docs/source/docs/level-up/basic_skills/03_dataset_import_export.rst @@ -19,7 +19,7 @@ Convert data format =================== Users sometimes need to compare, merge, or manage various kinds of public datasets in a unified -system. To achieve this, Datumaro not only has ``import`` and ``export`` funcionalities, but also +system. To achieve this, Datumaro not only has ``import`` and ``export`` functionalities, but also provides ``convert``, which shortens the import and export into a single command line. Let's convert the Cityscapes data into the MS-COCO format, which is described in :ref:`here `. diff --git a/docs/source/docs/level-up/intermediate_skills/11_data_generation.rst b/docs/source/docs/level-up/intermediate_skills/11_data_generation.rst index 90a00ee5d6..b0a2aa5ffd 100644 --- a/docs/source/docs/level-up/intermediate_skills/11_data_generation.rst +++ b/docs/source/docs/level-up/intermediate_skills/11_data_generation.rst @@ -10,7 +10,7 @@ since the manual annotations is quite expensive work. Base on the [FractalDB]_, Datumaro provides a fractal image dataset (FractalDB) generator that can be utilized to pre-train the vision models. Learning visual features of FractalDB is known to increase the performance of Vision Transformer (ViT) models. -Note that a fractal patterns in FractalDB is calculated mathmatically using the interated function system (IFS) with random parameters. +Note that a fractal patterns in FractalDB is calculated mathematically using the integrated function system (IFS) with random parameters. We thus don't need to concern about any privacy issues. diff --git a/docs/source/docs/level-up/intermediate_skills/12_framework_conversion.rst b/docs/source/docs/level-up/intermediate_skills/12_framework_conversion.rst new file mode 100644 index 0000000000..f3468c326b --- /dev/null +++ b/docs/source/docs/level-up/intermediate_skills/12_framework_conversion.rst @@ -0,0 +1,56 @@ +============================ +Level 12: Framework Conversion +============================ + +Datumaro allows seamless conversion of datasets to popular deep learning frameworks, such as PyTorch and TensorFlow. +This is particularly useful when you are working with a dataset that needs to be used across different frameworks +without manual reformatting. + +Datumaro provides the FrameworkConverter class, which can be used to convert a dataset for various tasks +like classification, detection, and segmentation. + +Supported Tasks + - Classification + - Multilabel Classification + - Detection + - Instance Segmentation + - Semantic Segmentation + - Tabular Data + +.. tab-set:: + + .. tab-item:: Python + + With the PyTorch framework, you can convert a Datumaro dataset like this: + + .. code-block:: python + + from datumaro.plugins.framework_converter import FrameworkConverter + from torchvision import transforms + + transform = transforms.Compose([transforms.ToTensor()]) + dm_dataset = ... # Load your dataset here + + First, we have to specify the dataset, subset, and task + + .. code-block:: python + + multi_framework_dataset = FrameworkConverter(dm_dataset, subset="train", task="classification") + train_dataset = multi_framework_dataset.to_framework(framework="torch", transform=transform) + + Through this, we convert the dataset to PyTorch format + + .. code-block:: python + + from torch.utils.data import DataLoader + train_loader = DataLoader(train_dataset, batch_size=32) + + Now we can use the train_dataset with PyTorch DataLoader + +In this example: + +- `subset="train"` indicates that we are working with the training portion of the dataset. + +- `task="classification"` specifies that this is a classification task. + +- The dataset is converted to PyTorch-compatible format using the `to_framework` method. diff --git a/docs/source/docs/level-up/intermediate_skills/index.rst b/docs/source/docs/level-up/intermediate_skills/index.rst index 8e7e2bba20..a4ec91fd3c 100644 --- a/docs/source/docs/level-up/intermediate_skills/index.rst +++ b/docs/source/docs/level-up/intermediate_skills/index.rst @@ -13,6 +13,7 @@ Intermediate Skills 09_data_filtering 10_data_exploration 11_data_generation + 12_framework_conversion .. grid:: 1 2 2 2 :gutter: 2 @@ -102,3 +103,14 @@ Intermediate Skills :bdg-info:`CLI` :bdg-warning:`Python` + + .. grid-item-card:: + + .. button-ref:: 12_framework_conversion + :color: primary + :outline: + :expand: + + Level 12: Framework Conversion + + :bdg-warning:`Python` diff --git a/docs/source/docs/release_notes.rst b/docs/source/docs/release_notes.rst index 221e4658b0..ceb18dfc7f 100644 --- a/docs/source/docs/release_notes.rst +++ b/docs/source/docs/release_notes.rst @@ -4,6 +4,53 @@ Release Notes .. toctree:: :maxdepth: 1 +v1.10.0 (2024 Q4) + +New features +^^^^^^^^^^^^ +- Support KITTI 3D format +- Add PseudoLabeling transform for unlabeled dataset + +Enhancements +^^^^^^^^^^^^ +- Raise an appropriate error when exporting a datumaro dataset if its subset name contains path separators. +- Update docs for transform plugins +- Update ov ir model for explorer openvino launcher with CLIP ViT-L/14@336px model +- Optimize path assignment to handle point cloud in JSON without images +- Set TabularTransform to process clean transform in parallel + +Bug fixes +^^^^^^^^^ +- Fix datumaro format to load visibility information from Points annotations + +v1.9.1 (2024 Q3) +---------------- + +Enhancements +^^^^^^^^^^^^ +- Support multiple labels for kaggle format +- Use DataFrame.map instead of DataFrame.applymap + +Bug fixes +^^^^^^^^^ +- Fix StreamDataset merging when importing in eager mode + +v1.9.0 (2024 Q3) +---------------- + +New features +^^^^^^^^^^^^ +- Add a new CLI command: datum format +- Support language dataset for DmTorchDataset + +Enhancements +^^^^^^^^^^^^ +- Change _Shape to Shape and add comments for subclasses of Shape + +Bug fixes +^^^^^^^^^ +- Fix KITTI-3D importer and exporter + v1.8.0 (2024 Q3) ---------------- diff --git a/notebooks/21_kaggle_data_cleaning.ipynb b/notebooks/21_kaggle_data_cleaning.ipynb index 97980509b9..6078dcbc8a 100644 --- a/notebooks/21_kaggle_data_cleaning.ipynb +++ b/notebooks/21_kaggle_data_cleaning.ipynb @@ -51,16 +51,24 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/sooah/.pyenv/versions/datum/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + }, { "data": { "text/plain": [ "['tabular']" ] }, - "execution_count": 3, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -69,7 +77,7 @@ "import datumaro as dm\n", "from datumaro.components.environment import DEFAULT_ENVIRONMENT\n", "\n", - "data_path = \"/home/sooah/data/corona_nlp\"\n", + "data_path = \"~/data\"\n", "detected_formats = DEFAULT_ENVIRONMENT.detect_dataset(data_path)\n", "detected_formats" ] @@ -83,28 +91,28 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dataset\n", - "\tsize=44955\n", - "\tsource_path=/home/sooah/data/corona_nlp\n", + "\tsize=2000\n", + "\tsource_path=/home/sooah/data/corona_nlp_1k\n", "\tmedia_type=\n", "\tann_types=set()\n", "\tannotated_items_count=0\n", "\tannotations_count=0\n", "subsets\n", - "\tCorona_NLP_test: # of items=3798, # of annotated items=0, # of annotations=0\n", - "\tCorona_NLP_train: # of items=41157, # of annotated items=0, # of annotations=0\n", + "\ttest: # of items=1000, # of annotated items=0, # of annotations=0\n", + "\ttrain: # of items=1000, # of annotated items=0, # of annotations=0\n", "infos\n", "\tcategories\n", - "\ttabular: []" + "\t14: []" ] }, - "execution_count": 4, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -148,28 +156,28 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dataset\n", - "\tsize=44955\n", - "\tsource_path=/home/sooah/data/corona_nlp\n", + "\tsize=2000\n", + "\tsource_path=/home/sooah/data/corona_nlp_1k\n", "\tmedia_type=\n", "\tann_types={}\n", - "\tannotated_items_count=44955\n", - "\tannotations_count=44955\n", + "\tannotated_items_count=2000\n", + "\tannotations_count=2000\n", "subsets\n", - "\tCorona_NLP_test: # of items=3798, # of annotated items=3798, # of annotations=3798\n", - "\tCorona_NLP_train: # of items=41157, # of annotated items=41157, # of annotations=41157\n", + "\ttest: # of items=1000, # of annotated items=1000, # of annotations=1000\n", + "\ttrain: # of items=1000, # of annotated items=1000, # of annotations=1000\n", "infos\n", "\tcategories\n", - "\ttabular: ['Sentiment']" + "\t14: ['Sentiment']" ] }, - "execution_count": 5, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -200,16 +208,16 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "DatasetItem(id='0@Corona_NLP_train', subset='Corona_NLP_train', media=TableRow(row_idx:0, data:{'OriginalTweet': '@MeNyrbie @Phil_Gahan @Chrisitv https://t.co/iFz9FAn2Pa and https://t.co/xX6ghGFzCC and https://t.co/I2NlzdxNo8', 'Location': 'London', 'Sentiment': 'Neutral'}), annotations=[Tabular(id=0, attributes={}, group=0, object_id=-1, values={'Sentiment': 'Neutral'})], attributes={})" + "DatasetItem(id='0@test', subset='test', media=TableRow(row_idx:0, data:{'OriginalTweet': 'TRENDING: New Yorkers encounter empty supermarket shelves (pictured, Wegmans in Brooklyn), sold-out online grocers (FoodKick, MaxDelivery) as #coronavirus-fearing shoppers stock up https://t.co/Gr76pcrLWh https://t.co/ivMKMsqdT1', 'Location': 'NYC', 'Sentiment': 'Extremely Negative'}), annotations=[Tabular(id=0, attributes={}, group=0, object_id=-1, values={'Sentiment': 'Extremely Negative'})], attributes={})" ] }, - "execution_count": 7, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -227,15 +235,15 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "media : {'OriginalTweet': '@MeNyrbie @Phil_Gahan @Chrisitv https://t.co/iFz9FAn2Pa and https://t.co/xX6ghGFzCC and https://t.co/I2NlzdxNo8', 'Location': 'London', 'Sentiment': 'Neutral'}\n", - "annotations : [Tabular(id=0, attributes={}, group=0, object_id=-1, values={'Sentiment': 'Neutral'})]\n" + "media : {'OriginalTweet': 'TRENDING: New Yorkers encounter empty supermarket shelves (pictured, Wegmans in Brooklyn), sold-out online grocers (FoodKick, MaxDelivery) as #coronavirus-fearing shoppers stock up https://t.co/Gr76pcrLWh https://t.co/ivMKMsqdT1', 'Location': 'NYC', 'Sentiment': 'Extremely Negative'}\n", + "annotations : [Tabular(id=0, attributes={}, group=0, object_id=-1, values={'Sentiment': 'Extremely Negative'})]\n" ] } ], @@ -266,28 +274,28 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dataset\n", - "\tsize=44955\n", - "\tsource_path=/home/sooah/data/corona_nlp\n", + "\tsize=2000\n", + "\tsource_path=/home/sooah/data/corona_nlp_1k\n", "\tmedia_type=\n", "\tann_types={}\n", - "\tannotated_items_count=44955\n", - "\tannotations_count=44955\n", + "\tannotated_items_count=2000\n", + "\tannotations_count=2000\n", "subsets\n", - "\tCorona_NLP_test: # of items=3798, # of annotated items=3798, # of annotations=3798\n", - "\tCorona_NLP_train: # of items=41157, # of annotated items=41157, # of annotations=41157\n", + "\ttest: # of items=1000, # of annotated items=1000, # of annotations=1000\n", + "\ttrain: # of items=1000, # of annotated items=1000, # of annotations=1000\n", "infos\n", "\tcategories\n", - "\tlabel: ['Sentiment:Extremely Negative', 'Sentiment:Extremely Positive', 'Sentiment:Negative', 'Sentiment:Neutral', 'Sentiment:Positive']" + "\t1: ['Sentiment:Extremely Negative', 'Sentiment:Extremely Positive', 'Sentiment:Negative', 'Sentiment:Neutral', 'Sentiment:Positive']" ] }, - "execution_count": 9, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -299,14 +307,14 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "annotations : [Label(id=0, attributes={}, group=0, object_id=-1, label=3)]\n" + "annotations : [Label(id=0, attributes={}, group=0, object_id=-1, label=0)]\n" ] } ], @@ -344,7 +352,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -377,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -385,17 +393,17 @@ "output_type": "stream", "text": [ "Statistics summary\n", - "Total number of annotation : 44955\n", + "Total number of annotation : 2000\n", "The number of items without any annotation : 0\n", "The number of items with missing annotation : 0\n", "\n", "\n", "Result of label distribution\n", " Sentiment:Extremely Negative Sentiment:Extremely Positive \\\n", - "0 6073 7223 \n", + "0 309 310 \n", "\n", " Sentiment:Negative Sentiment:Neutral Sentiment:Positive \n", - "0 10958 8332 12369 \n", + "0 568 318 495 \n", "The number of empty label for Sentiment is 0\n", "\n", "\n" @@ -403,7 +411,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -464,18 +472,18 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'OriginalTweet': '@MeNyrbie @Phil_Gahan @Chrisitv https://t.co/iFz9FAn2Pa and https://t.co/xX6ghGFzCC and https://t.co/I2NlzdxNo8',\n", - " 'Location': 'London',\n", - " 'Sentiment': 'Neutral'}" + "{'OriginalTweet': 'TRENDING: New Yorkers encounter empty supermarket shelves (pictured, Wegmans in Brooklyn), sold-out online grocers (FoodKick, MaxDelivery) as #coronavirus-fearing shoppers stock up https://t.co/Gr76pcrLWh https://t.co/ivMKMsqdT1',\n", + " 'Location': 'NYC',\n", + " 'Sentiment': 'Extremely Negative'}" ] }, - "execution_count": 13, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -486,18 +494,18 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'OriginalTweet': 'I hate grocery shopping in general but I swear IÂ\\x92m doing it online next shop, can not deal with the swathes of panic buyers at all! #COVID?19 #coronavirus #coronavirusuk #anxiety #panicbuyinguk #morons',\n", - " 'Location': 'Portsmouth, England',\n", - " 'Sentiment': 'Extremely Negative'}" + "{'OriginalTweet': '@NileshShah68 I have summarized the most important points from the paper in this thread:\\r\\r\\nhttps://t.co/dTZg4vg8VM',\n", + " 'Location': 'Hyderabad, India',\n", + " 'Sentiment': 'Positive'}" ] }, - "execution_count": 14, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -520,7 +528,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -529,18 +537,18 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'OriginalTweet': 'menyrbie philgahan chrisitv',\n", - " 'Location': 'london',\n", - " 'Sentiment': 'Neutral'}" + "{'OriginalTweet': 'trending new yorkers encounter empty supermarket shelves pictured wegmans brooklyn soldout online grocers foodkick maxdelivery coronavirusfearing shoppers stock',\n", + " 'Location': 'nyc',\n", + " 'Sentiment': 'Extremely Negative'}" ] }, - "execution_count": 16, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -551,18 +559,18 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'OriginalTweet': 'hate grocery shopping general swear im online next shop deal swathes panic buyers covid coronavirus coronavirusuk anxiety panicbuyinguk morons',\n", - " 'Location': 'portsmouth england',\n", - " 'Sentiment': 'Extremely Negative'}" + "{'OriginalTweet': 'nileshshah summarized important points paper thread',\n", + " 'Location': 'hyderabad india',\n", + " 'Sentiment': 'Positive'}" ] }, - "execution_count": 17, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -581,15 +589,16 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Before Clean : I hate grocery shopping in general but I swear I’m doing it online next shop, can not deal with the swathes of panic buyers at all! #COVID?19 #coronavirus #coronavirusuk #anxiety #panicbuyinguk #morons\n", - "After Clean : hate grocery shopping general swear im online next shop deal swathes panic buyers covid coronavirus coronavirusuk anxiety panicbuyinguk morons\n" + "Before Clean : @NileshShah68 I have summarized the most important points from the paper in this thread:\n", + "https://t.co/dTZg4vg8VM\n", + "After Clean : nileshshah summarized important points paper thread\n" ] } ], @@ -602,20 +611,391 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Export refiend data into Datumaro format\n", + "## Convert Datumaro dataset into PyTorch dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "train_iter = iter([value.media.data[\"OriginalTweet\"] for value in result])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-16 16:36:27.631387: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-16 16:36:27.645753: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-16 16:36:27.649957: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-16 16:36:27.659912: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2024-10-16 16:36:28.583817: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + ] + } + ], + "source": [ + "from datumaro.plugins.framework_converter import FrameworkConverter\n", + "from torchtext.data.utils import get_tokenizer\n", + "from torchtext.vocab import build_vocab_from_iterator\n", + "\n", + "tokenizer = get_tokenizer(\"basic_english\")\n", + "\n", + "\n", + "def yield_tokens(data_iter):\n", + " for _, text in data_iter:\n", + " yield tokenizer(text)\n", + "\n", + "\n", + "vocab = build_vocab_from_iterator(train_iter, specials=[\"\"])\n", + "vocab.set_default_index(vocab[\"\"])\n", + "\n", + "train_dataset = FrameworkConverter(result, subset=\"train\", task=\"tabular\")\n", + "dm_torch_train_dataset = train_dataset.to_framework(\n", + " framework=\"torch\", target={\"input\": \"OriginalTweet\"}, tokenizer=tokenizer, vocab=vocab\n", + ")\n", + "val_dataset = FrameworkConverter(result, subset=\"test\", task=\"tabular\")\n", + "dm_torch_val_dataset = val_dataset.to_framework(\n", + " framework=\"torch\", target={\"input\": \"OriginalTweet\"}, tokenizer=tokenizer, vocab=vocab\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 4. Modeling\n", "\n", - "We can export the refined data in the Datumaro format. Additionally, it is possible to export the data in various other formats. For more details, please refer to this [link](https://openvinotoolkit.github.io/datumaro/latest/docs/command-reference/context/export.html#export).\n" + "- Showcase how to use your tool for tasks such as feature extraction, model training, or evaluation on the dataset.\n", + "- Compare it with standard methods to show its advantages." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/sooah/.pyenv/versions/datum/lib/python3.11/site-packages/torch/nn/modules/rnn.py:82: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.5 and num_layers=1\n", + " warnings.warn(\"dropout option adds dropout after all but last \"\n" + ] + } + ], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "\n", + "# Define a simple RNN-based model for text classification\n", + "\n", + "\n", + "class SentimentRNN(nn.Module):\n", + " def __init__(self, vocab_size, embed_size, hidden_size, output_size, num_layers=1, dropout=0.5):\n", + " super(SentimentRNN, self).__init__()\n", + " self.embedding = nn.Embedding(vocab_size, embed_size)\n", + " self.rnn = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True, dropout=dropout)\n", + " self.fc = nn.Linear(hidden_size, output_size)\n", + "\n", + " def forward(self, x):\n", + " x = self.embedding(x)\n", + " _, (hidden, _) = self.rnn(x)\n", + " out = self.fc(hidden[-1])\n", + " return out\n", + "\n", + "\n", + "# Example: Model initialization\n", + "vocab_size = len(vocab) # This should be the size of your vocabulary\n", + "embed_size = 128\n", + "hidden_size = 256\n", + "output_size = 5 # Assume we have 3 sentiment classes: positive, neutral, negative\n", + "\n", + "model = SentimentRNN(vocab_size, embed_size, hidden_size, output_size)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "save_path = \"/home/sooah/data/refined_corona_nlp\"\n", - "result.export(save_path, \"datumaro\", save_media=True)" + "import numpy as np\n", + "from torch.utils.data import DataLoader\n", + "\n", + "# Define Loss and Optimizer\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = optim.Adam(model.parameters(), lr=0.001)\n", + "\n", + "\n", + "def custom_collate_fn(batch):\n", + " # Separate inputs and outputs\n", + " inputs, outputs = zip(*batch)\n", + "\n", + " # Find the maximum length in the inputs and outputs\n", + " max_input_length = max(len(input_) for input_ in inputs)\n", + "\n", + " # Pad all inputs and outputs to the maximum length\n", + " padded_inputs = [\n", + " np.pad(input_, (0, max_input_length - len(input_)), mode=\"constant\") for input_ in inputs\n", + " ]\n", + "\n", + " # Convert to tensors\n", + " padded_inputs = torch.tensor(padded_inputs, dtype=torch.long)\n", + " padded_outputs = torch.stack(outputs) # Assuming labels are integers for classification\n", + "\n", + " return padded_inputs, padded_outputs\n", + "\n", + "\n", + "# Create DataLoader for your dataset\n", + "train_loader = DataLoader(\n", + " dm_torch_train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn\n", + ")\n", + "val_loader = DataLoader(dm_torch_val_dataset, batch_size=32, collate_fn=custom_collate_fn)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_633257/52718613.py:18: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ../torch/csrc/utils/tensor_new.cpp:261.)\n", + " padded_inputs = torch.tensor(padded_inputs, dtype=torch.long)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1, Loss: 1.590895850211382 | Validation Loss: 1.5764841102063656\n", + "Epoch 2, Loss: 1.5879455283284187 | Validation Loss: 1.5764116831123829\n", + "Epoch 3, Loss: 1.581930335611105 | Validation Loss: 1.5686759762465954\n", + "Epoch 4, Loss: 1.5809518098831177 | Validation Loss: 1.5715479329228401\n", + "Epoch 5, Loss: 1.5830248109996319 | Validation Loss: 1.5709160640835762\n" + ] + } + ], + "source": [ + "# Training Loop\n", + "def train(model, train_loader, val_loader, criterion, optimizer, num_epochs=100):\n", + " model.train()\n", + " train_losses = []\n", + " val_losses = []\n", + " for epoch in range(num_epochs):\n", + " running_loss = 0.0\n", + " for batch in train_loader:\n", + " inputs, labels = batch\n", + " outputs = model(inputs)\n", + "\n", + " loss = criterion(outputs, labels)\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " running_loss += loss.item()\n", + " # print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')\n", + " train_losses.append(running_loss)\n", + "\n", + " # Validation Loop (optional)\n", + " model.eval()\n", + " val_loss = 0.0\n", + " with torch.no_grad():\n", + " for batch in val_loader:\n", + " inputs, labels = batch\n", + " outputs = model(inputs)\n", + " loss = criterion(outputs, labels)\n", + " val_loss += loss.item()\n", + " val_losses.append(val_loss)\n", + "\n", + " if epoch % 5 == 0:\n", + " print(\n", + " f\"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)} | Validation Loss: {val_loss/len(val_loader)}\"\n", + " )\n", + " return train_losses, val_losses\n", + "\n", + "\n", + "# Run the training\n", + "train_losses, val_losses = train(model, train_loader, val_loader, criterion, optimizer)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def plot_loss(train_losses, val_losses):\n", + " plt.figure(figsize=(10, 6))\n", + " plt.plot(train_losses, label=\"Training Loss\")\n", + " plt.plot(val_losses, label=\"Validation Loss\")\n", + " plt.xlabel(\"Epochs\")\n", + " plt.ylabel(\"Loss\")\n", + " plt.title(\"Training and Validation Loss over Epochs\")\n", + " plt.legend()\n", + " plt.show()\n", + "\n", + "\n", + "# Assuming you have stored losses in lists\n", + "plot_loss(train_losses, val_losses)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: seaborn in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (0.13.2)\n", + "Requirement already satisfied: numpy!=1.24.0,>=1.20 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from seaborn) (1.26.4)\n", + "Requirement already satisfied: pandas>=1.2 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from seaborn) (2.2.3)\n", + "Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from seaborn) (3.9.2)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.3.0)\n", + "Requirement already satisfied: cycler>=0.10 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.54.1)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.7)\n", + "Requirement already satisfied: packaging>=20.0 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (24.1)\n", + "Requirement already satisfied: pillow>=8 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (10.4.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.2.0)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from pandas>=1.2->seaborn) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from pandas>=1.2->seaborn) (2024.2)\n", + "Requirement already satisfied: six>=1.5 in /home/sooah/.pyenv/versions/3.11.9/envs/datum/lib/python3.11/site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.16.0)\n" + ] + } + ], + "source": [ + "! pip install seaborn" ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "import seaborn as sns\n", + "\n", + "\n", + "def plot_confusion_matrix(model, val_loader):\n", + " all_preds = []\n", + " all_labels = []\n", + "\n", + " model.eval()\n", + " with torch.no_grad():\n", + " for batch in val_loader:\n", + " inputs, labels = batch\n", + " outputs = model(inputs)\n", + " preds = torch.argmax(outputs, dim=1)\n", + " all_preds.extend(preds.cpu().numpy())\n", + " all_labels.extend(labels.cpu().numpy())\n", + "\n", + " cm = confusion_matrix(all_labels, all_preds)\n", + "\n", + " plt.figure(figsize=(10, 7))\n", + " sns.heatmap(cm, annot=True, fmt=\"d\", cmap=\"Blues\")\n", + " plt.xlabel(\"Predicted\")\n", + " plt.ylabel(\"True\")\n", + " plt.title(\"Confusion Matrix\")\n", + " plt.show()\n", + "\n", + "\n", + "plot_confusion_matrix(model, val_loader)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy of class 0: 0.00%\n", + "Accuracy of class 1: 0.62%\n", + "Accuracy of class 2: 95.03%\n", + "Accuracy of class 3: 0.00%\n", + "Accuracy of class 4: 1.68%\n" + ] + } + ], + "source": [ + "def per_class_accuracy(model, val_loader, num_classes):\n", + " class_correct = [0] * num_classes\n", + " class_total = [0] * num_classes\n", + "\n", + " model.eval()\n", + " with torch.no_grad():\n", + " for batch in val_loader:\n", + " inputs, labels = batch\n", + " outputs = model(inputs)\n", + " preds = torch.argmax(outputs, dim=1)\n", + "\n", + " for i in range(len(labels)):\n", + " label = labels[i]\n", + " if preds[i] == label:\n", + " class_correct[label] += 1\n", + " class_total[label] += 1\n", + "\n", + " for i in range(num_classes):\n", + " print(f\"Accuracy of class {i}: {100 * class_correct[i] / class_total[i]:.2f}%\")\n", + "\n", + "\n", + "# Assuming the dataset has 5 classes\n", + "per_class_accuracy(model, val_loader, 5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -634,7 +1014,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/notebooks/22_framework_converter.ipynb b/notebooks/22_framework_converter.ipynb new file mode 100644 index 0000000000..9ba6cdb1b1 --- /dev/null +++ b/notebooks/22_framework_converter.ipynb @@ -0,0 +1,443 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Framework Convert\n", + "\n", + "In this notebook, we will demonstrate how to leverage the Datumaro to manage datasets and seamlessly integrate them into a PyTorch training pipeline. This tutorial will walk through preparing a dataset using Datumaro and converting it into a format suitable for PyTorch model training and validation.\n", + "\n", + "Specifically, we will:\n", + "\n", + "- Load and inspect a dataset using Datumaro.\n", + "- Convert the dataset to a PyTorch-friendly format.\n", + "- Implement a simple training and validation pipeline using PyTorch.\n", + "\n", + "By the end of this notebook, you will understand how Datumaro can simplify dataset management tasks and improve the efficiency of your deep learning pipelines.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisite\n", + "\n", + "### Download dataset\n", + "\n", + "We will be using a dataset from Kaggle for this tutorial. First, we’ll download the dataset. Please refer to [this guide](20_kaggle_data_import.ipynb) on how to download datasets from Kaggle.\n", + "\n", + "In this notebook, we choose [ananthu017/emotion-detection-fer](https://www.kaggle.com/datasets/ananthu017/emotion-detection-fer/data) dataset as below." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# !kaggle datasets download ananthu017/emotion-detection-fer --unzip --path ./emotion-detection-fer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataset Preparation\n", + "\n", + "### Import a dataset\n", + "\n", + "\n", + "The dataset is organized in the following directory structure:\n", + "\n", + "```\n", + ".\n", + "├── test\n", + "│ ├── angry\n", + "│ ├── disgusted\n", + "│ ├── fearful\n", + "│ ├── happy\n", + "│ ├── neutral\n", + "│ ├── sad\n", + "│ └── surprised\n", + "└── train\n", + " ├── angry\n", + " ├── disgusted\n", + " ├── fearful\n", + " ├── happy\n", + " ├── neutral\n", + " ├── sad\n", + " └── surprised\n", + "```\n", + "\n", + "In our `emotion_detection_fer` folder, the dataset is divided into two main directories: `train` and `test`. Each of these directories contains subfolders for each emotion category, including \"angry,\" \"disgusted,\" \"fearful,\" \"happy,\" \"neutral,\" \"sad,\" and \"surprised.\" Each subfolder contains images corresponding to that emotion, allowing for organized access during training and testing phases. I used `datumaro` to inspect the dataset directory structure, and it appears that the dataset is well-structured for a classification task." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Detected data format is 'imagenet_with_subset_dirs'\n", + "Dataset\n", + "\tsize=35887\n", + "\tsource_path=/home/sooah/data/emotion-detection-fer\n", + "\tmedia_type=\n", + "\tann_types={}\n", + "\tannotated_items_count=35887\n", + "\tannotations_count=35887\n", + "subsets\n", + "\ttest: # of items=7178, # of annotated items=7178, # of annotations=7178\n", + "\ttrain: # of items=28709, # of annotated items=28709, # of annotations=28709\n", + "infos\n", + "\tcategories\n", + "\t1: ['angry', 'disgusted', 'fearful', 'happy', 'neutral', 'sad', 'surprised']\n", + "\n" + ] + } + ], + "source": [ + "import datumaro as dm\n", + "\n", + "dataset_dir = \"/home/sooah/data/emotion-detection-fer\"\n", + "formats = dm.Dataset.detect(dataset_dir)\n", + "print(f\"Detected data format is '{formats}'\")\n", + "\n", + "dataset = dm.Dataset.import_from(dataset_dir, formats)\n", + "print(dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Based on the information provided:\n", + "- The total size of the dataset is 35,887 items.\n", + "- The dataset is divided into two subsets:\n", + " - The 'test' subset contains 7,178 items.\n", + " - The 'train' subset contains 28,709 items.\n", + "\n", + "This breakdown gives us insight into the scale of our dataset and the distribution of items across its subsets, with a clear emphasis on a larger training set to enhance model performance.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Convert Datumaro dataset into PyTorch dataset\n", + "\n", + "The process of converting a Datumaro dataset into a PyTorch dataset involves utilizing the `FrameworkConverter` from the Datumaro library. This allows us to seamlessly transform our dataset for compatibility with PyTorch's training and validation pipeline. In the code, we first define a set of transformations using `torchvision.transforms`, specifically converting images to tensor format. We then create PyTorch-compatible datasets for both the training and testing subsets by specifying the respective subset names and the classification task. Finally, we can check the number of items in both datasets to ensure they have been correctly prepared for model training and evaluation. This approach not only streamlines the data preprocessing step but also leverages the robust capabilities of the PyTorch framework for building and deploying deep learning models." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-23 15:32:28.371272: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-23 15:32:28.383616: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-23 15:32:28.387695: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-23 15:32:28.396903: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2024-10-23 15:32:29.470910: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Converted train dataset len is '28709'\n", + "Converted train dataset len is '7178'\n" + ] + } + ], + "source": [ + "from torchvision import transforms\n", + "from datumaro.plugins.framework_converter import FrameworkConverter\n", + "\n", + "transform = transforms.Compose([transforms.ToTensor()])\n", + "\n", + "multi_framework_dataset = FrameworkConverter(dataset, subset=\"train\", task=\"classification\")\n", + "train_dataset = multi_framework_dataset.to_framework(\n", + " framework=\"torch\",\n", + " transform=transform,\n", + ")\n", + "\n", + "multi_framework_dataset = FrameworkConverter(dataset, subset=\"test\", task=\"classification\")\n", + "val_dataset = multi_framework_dataset.to_framework(\n", + " framework=\"torch\",\n", + " transform=transform,\n", + ")\n", + "\n", + "print(f\"Converted train dataset len is '{len(train_dataset)}'\")\n", + "print(f\"Converted train dataset len is '{len(val_dataset)}'\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Building the PyTorch Training and Validation Pipeline\n", + "\n", + "### Creating Data Loaders for Efficient Data Handling\n", + "\n", + "In this section, we establish our data loaders for both training and validation datasets, which are essential for efficient data handling during the model training process. By utilizing PyTorch's `DataLoader`, we ensure that our training data is shuffled randomly for better generalization, while the validation data is loaded in a deterministic manner to facilitate accurate performance evaluation. The specified batch size of 4 allows for manageable processing of data during each training iteration. With these loaders in place, we can seamlessly feed our datasets into the training loop for effective model training and validation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training Loader Batches: 449\n", + "Validation Loader Batches: 113\n" + ] + } + ], + "source": [ + "from torch.utils.data import DataLoader\n", + "\n", + "training_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)\n", + "validation_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)\n", + "\n", + "print(f\"Training Loader Batches: {len(training_loader)}\")\n", + "print(f\"Validation Loader Batches: {len(validation_loader)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Modeling\n", + "\n", + "### Model Architecture Definition\n", + "\n", + "In this section, we define our model architecture by leveraging the pre-trained ResNet-50 model, which is well-suited for image classification tasks. By utilizing transfer learning, we can capitalize on the learned features from the ImageNet dataset, which enhances our model's performance on the emotion detection task. We modify the final fully connected layer to match the number of classes in our specific dataset, ensuring the model outputs predictions relevant to the emotions present in the images. Finally, we transfer the model to the GPU, enabling efficient training and inference processes. This approach helps us build a robust foundation for our emotion detection pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from torchvision.models import mobilenet_v2\n", + "import torch\n", + "\n", + "model = mobilenet_v2(weights=\"IMAGENET1K_V1\")\n", + "model.features[0] = torch.nn.Conv2d(\n", + " 1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False\n", + ")\n", + "# Get the number of input features for the last layer\n", + "num_features = model.classifier[1].in_features\n", + "\n", + "# Create a new classifier layer with the number of classes\n", + "num_classes = len(dataset.categories()[dm.AnnotationType.label])\n", + "model.classifier[1] = torch.nn.Linear(num_features, num_classes)\n", + "\n", + "# Move the model to GPU if available\n", + "model = model.cuda() # If using GPU" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training and Validation Loop\n", + "\n", + "In this section, we implement the training and validation loop for our emotion detection model. The `top_k_accuracy` function calculates the top-k accuracy for the model predictions, allowing us to evaluate performance more robustly. We define a cross-entropy loss function suitable for multi-class classification tasks and use the Stochastic Gradient Descent (SGD) optimizer to adjust the model's parameters. Throughout the training process, we report the loss for every 100 batches, providing insights into the model's learning progress. After each epoch, we evaluate the model on the validation dataset, calculating the average accuracy to gauge its effectiveness in classifying the emotions." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "EPOCH 1:\n", + "\t [TRAIN] batch 100 loss: 1.8547\n", + "\t [TRAIN] batch 200 loss: 1.7383\n", + "\t [TRAIN] batch 300 loss: 1.6348\n", + "\t [TRAIN] batch 400 loss: 1.5933\n", + "\t [VAL] validation accuracy: 40.84%\n", + "EPOCH 2:\n", + "\t [TRAIN] batch 100 loss: 1.4857\n", + "\t [TRAIN] batch 200 loss: 1.4612\n", + "\t [TRAIN] batch 300 loss: 1.4012\n", + "\t [TRAIN] batch 400 loss: 1.3967\n", + "\t [VAL] validation accuracy: 48.21%\n", + "EPOCH 3:\n", + "\t [TRAIN] batch 100 loss: 1.2735\n", + "\t [TRAIN] batch 200 loss: 1.2806\n", + "\t [TRAIN] batch 300 loss: 1.2650\n", + "\t [TRAIN] batch 400 loss: 1.2792\n", + "\t [VAL] validation accuracy: 51.14%\n", + "EPOCH 4:\n", + "\t [TRAIN] batch 100 loss: 1.1394\n", + "\t [TRAIN] batch 200 loss: 1.1445\n", + "\t [TRAIN] batch 300 loss: 1.1760\n", + "\t [TRAIN] batch 400 loss: 1.1557\n", + "\t [VAL] validation accuracy: 52.51%\n", + "EPOCH 5:\n", + "\t [TRAIN] batch 100 loss: 1.0302\n", + "\t [TRAIN] batch 200 loss: 1.0563\n", + "\t [TRAIN] batch 300 loss: 1.0757\n", + "\t [TRAIN] batch 400 loss: 1.0815\n", + "\t [VAL] validation accuracy: 52.39%\n", + "EPOCH 6:\n", + "\t [TRAIN] batch 100 loss: 0.9378\n", + "\t [TRAIN] batch 200 loss: 0.9302\n", + "\t [TRAIN] batch 300 loss: 1.0006\n", + "\t [TRAIN] batch 400 loss: 0.9811\n", + "\t [VAL] validation accuracy: 51.48%\n", + "EPOCH 7:\n", + "\t [TRAIN] batch 100 loss: 0.8105\n", + "\t [TRAIN] batch 200 loss: 0.8475\n", + "\t [TRAIN] batch 300 loss: 0.9001\n", + "\t [TRAIN] batch 400 loss: 0.9265\n", + "\t [VAL] validation accuracy: 54.57%\n", + "EPOCH 8:\n", + "\t [TRAIN] batch 100 loss: 0.7378\n", + "\t [TRAIN] batch 200 loss: 0.7624\n", + "\t [TRAIN] batch 300 loss: 0.8293\n", + "\t [TRAIN] batch 400 loss: 0.8538\n", + "\t [VAL] validation accuracy: 54.17%\n", + "EPOCH 9:\n", + "\t [TRAIN] batch 100 loss: 0.6630\n", + "\t [TRAIN] batch 200 loss: 0.6890\n", + "\t [TRAIN] batch 300 loss: 0.7210\n", + "\t [TRAIN] batch 400 loss: 0.7865\n", + "\t [VAL] validation accuracy: 52.42%\n", + "EPOCH 10:\n", + "\t [TRAIN] batch 100 loss: 0.5968\n", + "\t [TRAIN] batch 200 loss: 0.6473\n", + "\t [TRAIN] batch 300 loss: 0.6737\n", + "\t [TRAIN] batch 400 loss: 0.7167\n", + "\t [VAL] validation accuracy: 55.29%\n" + ] + } + ], + "source": [ + "def top_k_accuracy(output, labels, k=1):\n", + " \"\"\"Compute the top-k accuracy given model output and labels.\"\"\"\n", + " with torch.no_grad():\n", + " batch_size = labels.size(0)\n", + " _, pred = output.topk(k, 1, True, True)\n", + " pred = pred.t()\n", + " correct = pred.eq(labels.view(1, -1).expand_as(pred))\n", + " correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)\n", + " return correct_k.mul_(100.0 / batch_size).item()\n", + "\n", + "\n", + "# Define loss function and optimizer\n", + "loss_fn = torch.nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", + "\n", + "EPOCHS = 10\n", + "for epoch in range(EPOCHS):\n", + " print(f\"EPOCH {epoch + 1}:\")\n", + "\n", + " # Training phase\n", + " model.train()\n", + " running_loss = 0.0\n", + " for i, data in enumerate(training_loader):\n", + " inputs, labels = data\n", + " inputs, labels = inputs.cuda(), labels.cuda()\n", + "\n", + " optimizer.zero_grad()\n", + " outputs = model(inputs)\n", + "\n", + " loss = loss_fn(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " # Gather data and report\n", + " running_loss += loss.item()\n", + " if (i + 1) % 100 == 0:\n", + " print(f\"\\t [TRAIN] batch {i + 1} loss: {running_loss / 100:.4f}\")\n", + " running_loss = 0.0\n", + "\n", + " # Validation phase\n", + " model.eval()\n", + " accs = 0.0\n", + " with torch.no_grad():\n", + " for i, vdata in enumerate(validation_loader):\n", + " inputs, labels = vdata\n", + " inputs, labels = inputs.cuda(), labels.cuda()\n", + "\n", + " outputs = model(inputs)\n", + " top1_acc = top_k_accuracy(outputs, labels, k=1)\n", + " accs += top1_acc\n", + "\n", + " avg_accs = accs / (i + 1)\n", + " print(f\"\\t [VAL] validation accuracy: {avg_accs:.2f}%\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model Fine-Tuning and Further Improvements\n", + "\n", + "While MobileNetV2 provided a solid baseline performance for this emotion detection task, further fine-tuning can help improve results. Experimenting with different architectures—such as ResNet or EfficientNet—or adjusting layers and hyperparameters in MobileNetV2 could yield a better fit to the unique characteristics of the dataset. Additionally, applying transfer learning from models pretrained on large face or emotion recognition datasets might enhance the model's ability to capture subtle facial expressions, leading to higher accuracy in emotion detection." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "In this notebook, we explored the use of Datumaro for data management, transforming the emotion-detection-fer dataset into a PyTorch-compatible format. This process enabled us to easily handle image-based datasets, including various pre-processing steps and dataset partitioning for training and validation.\n", + "\n", + "Leveraging MobileNetV2, a lightweight yet effective model architecture, we demonstrated its application for facial emotion recognition. MobileNetV2, with its efficient design and lower computational requirements, performed well on the dataset, making it a practical choice for projects that prioritize speed and model efficiency.\n", + "\n", + "Through the completed training and validation pipeline, we showcased how MobileNetV2 can be fine-tuned for specific emotion detection tasks. Datumaro’s robust data management features allowed us to streamline the dataset preparation, ensuring efficient handling and compatibility with PyTorch.\n", + "\n", + "Future improvements could involve experimenting with data augmentation, testing more complex model architectures, or further tuning hyperparameters to optimize accuracy. We hope this notebook serves as a comprehensive guide for leveraging Datumaro and MobileNetV2 in similar emotion detection or classification tasks." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "datum", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements-core.txt b/requirements-core.txt index bb0c7a38a1..a1cf376c06 100644 --- a/requirements-core.txt +++ b/requirements-core.txt @@ -7,7 +7,7 @@ matplotlib>=3.3.1 networkx>=2.6 nibabel>=3.2.1 numpy<2,>=1.23.4 -orjson==3.10.6 +orjson==3.10.12 Pillow>=10.3.0 ruamel.yaml>=0.17.0 shapely>=1.7 @@ -64,3 +64,6 @@ json-stream # TabularValidator nltk + +# torch converter for language +portalocker diff --git a/setup.py b/setup.py index acc6925fdc..91b1b51e8c 100644 --- a/setup.py +++ b/setup.py @@ -85,7 +85,7 @@ def parse_requirements(filename=CORE_REQUIREMENTS_FILE): extras_require={ "tf": ["tensorflow"], "tfds": ["tensorflow-datasets<4.9.3"], - "torch": ["torch", "torchvision"], + "torch": ["torch", "torchvision", "torchtext==0.16.0"], "default": DEFAULT_REQUIREMENTS, }, ext_modules=ext_modules, diff --git a/src/datumaro/components/algorithms/hash_key_inference/base.py b/src/datumaro/components/algorithms/hash_key_inference/base.py index 0eb7c6101f..9b9d9a578b 100644 --- a/src/datumaro/components/algorithms/hash_key_inference/base.py +++ b/src/datumaro/components/algorithms/hash_key_inference/base.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2023-2024 Intel Corporation # # SPDX-License-Identifier: MIT @@ -21,13 +21,13 @@ def __init__(self, *datasets: Sequence[Dataset]) -> None: @property def model(self): if self._model is None: - self._model = explorer.ExplorerLauncher(model_name="clip_visual_ViT-B_32") + self._model = explorer.ExplorerLauncher(model_name="clip_visual_vit_l_14_336px_int8") return self._model @property def text_model(self): if self._text_model is None: - self._text_model = explorer.ExplorerLauncher(model_name="clip_text_ViT-B_32") + self._text_model = explorer.ExplorerLauncher(model_name="clip_text_vit_l_14_336px_int8") return self._text_model def _compute_hash_key(self, datasets, datasets_to_infer): diff --git a/src/datumaro/components/annotation.py b/src/datumaro/components/annotation.py index c68af00c4a..1a16ef2ed6 100644 --- a/src/datumaro/components/annotation.py +++ b/src/datumaro/components/annotation.py @@ -24,6 +24,7 @@ ) import attr +import cv2 import numpy as np import shapely.geometry as sg from attr import asdict, attrs, field @@ -50,6 +51,7 @@ class AnnotationType(IntEnum): feature_vector = 13 tabular = 14 rotated_bbox = 15 + cuboid_2d = 16 COORDINATE_ROUNDING_DIGITS = 2 @@ -260,8 +262,8 @@ class HashKey(Annotation): @hash_key.validator def _validate(self, attribute, value: np.ndarray): - """Check whether value is a 1D Numpy array having 64 np.uint8 values""" - if value.ndim != 1 or value.shape[0] != 64 or value.dtype != np.uint8: + """Check whether value is a 1D Numpy array having 96 np.uint8 values""" + if value.ndim != 1 or value.shape[0] != 96 or value.dtype != np.uint8: raise ValueError(value) def __eq__(self, other): @@ -1363,6 +1365,224 @@ def wrap(item, **kwargs): return attr.evolve(item, **d) +@attrs(slots=True, init=False, order=False) +class Cuboid2D(Annotation): + """ + Cuboid2D annotation class. This class represents a 3D bounding box defined by its point coordinates + in the following way: + [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x5, y5), (x6, y6), (x7, y7), (x8, y8)]. + + + 2---3 + /| /| + 1-+-4 | + | 5 + 6 + |/ |/ + 8---7 + + Attributes: + _type (AnnotationType): The type of annotation, set to `AnnotationType.cuboid_2d`. + + Methods: + __init__: Initializes the Cuboid2D with its coordinates. + wrap: Creates a new Cuboid2D instance with updated attributes. + """ + + _type = AnnotationType.cuboid_2d + points = field(default=None) + label: Optional[int] = field( + converter=attr.converters.optional(int), default=None, kw_only=True + ) + z_order: int = field(default=0, validator=default_if_none(int), kw_only=True) + y_3d: float = field(default=None, validator=default_if_none(float), kw_only=True) + + def __init__( + self, + _points: Iterable[Tuple[float, float]], + *args, + **kwargs, + ): + kwargs.pop("points", None) # comes from wrap() + self.__attrs_init__(points=_points, *args, **kwargs) + + @staticmethod + def _get_plane_equation(points): + """Calculates coefficients of the plane equation from three points.""" + x1, y1, z1 = points[0, 0], points[0, 1], points[0, 2] + x2, y2, z2 = points[1, 0], points[1, 1], points[1, 2] + x3, y3, z3 = points[2, 0], points[2, 1], points[2, 2] + a1 = x2 - x1 + b1 = y2 - y1 + c1 = z2 - z1 + a2 = x3 - x1 + b2 = y3 - y1 + c2 = z3 - z1 + a = b1 * c2 - b2 * c1 + b = a2 * c1 - a1 * c2 + c = a1 * b2 - b1 * a2 + d = -a * x1 - b * y1 - c * z1 + return np.array([a, b, c, d]) + + @staticmethod + def _get_denorm(Tr_velo_to_cam_homo): + """Calculates the denormalized vector perpendicular to the image plane. + Args: + Tr_velo_to_cam_homo (np.ndarray): Homogeneous (4x4) LiDAR-to-camera transformation matrix + Returns: + np.ndarray: vector""" + ground_points_lidar = np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [1.0, 1.0, 0.0]]) + ground_points_lidar = np.concatenate( + (ground_points_lidar, np.ones((ground_points_lidar.shape[0], 1))), axis=1 + ) + ground_points_cam = np.matmul(Tr_velo_to_cam_homo, ground_points_lidar.T).T + denorm = -1 * Cuboid2D._get_plane_equation(ground_points_cam) + return denorm + + @staticmethod + def _get_3d_points(dim, location, rotation_y, denorm): + """Get corner points according to the 3D bounding box parameters. + + Args: + dim (List[float]): The dimensions of the 3D bounding box as [l, w, h]. + location (List[float]): The location of the 3D bounding box as [x, y, z]. + rotation_y (float): The rotation angle around the y-axis. + + Returns: + np.ndarray: The corner points of the 3D bounding box. + """ + + c, s = np.cos(rotation_y), np.sin(rotation_y) + R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32) + l, w, h = dim[2], dim[1], dim[0] + x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2] + y_corners = [0, 0, 0, 0, -h, -h, -h, -h] + z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2] + + corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32) + corners_3d = np.dot(R, corners) + + denorm = denorm[:3] + denorm_norm = denorm / np.sqrt(denorm[0] ** 2 + denorm[1] ** 2 + denorm[2] ** 2) + ori_denorm = np.array([0.0, -1.0, 0.0]) + theta = -1 * math.acos(np.dot(denorm_norm, ori_denorm)) + n_vector = np.cross(denorm, ori_denorm) + n_vector_norm = n_vector / np.sqrt(n_vector[0] ** 2 + n_vector[1] ** 2 + n_vector[2] ** 2) + rotation_matrix, j = cv2.Rodrigues(theta * n_vector_norm) + corners_3d = np.dot(rotation_matrix, corners_3d) + corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(3, 1) + return corners_3d.transpose(1, 0) + + @staticmethod + def _project_to_2d(pts_3d, P): + """Project 3D points to 2D image plane. + + Args: + pts_3d (np.ndarray): The 3D points to project. + P (np.ndarray): The projection matrix. + + Returns: + np.ndarray: The 2D points projected to the image + """ + # Convert to homogeneous coordinates + pts_3d = pts_3d.T + pts_3d_homo = np.vstack((pts_3d, np.ones(pts_3d.shape[1]))) + pts_2d = P @ pts_3d_homo + pts_2d[0, :] = np.divide(pts_2d[0, :], pts_2d[2, :]) + pts_2d[1, :] = np.divide(pts_2d[1, :], pts_2d[2, :]) + pts_2d = pts_2d[:2, :].T + + return pts_2d + + @classmethod + def from_3d( + cls, + dim: np.ndarray, + location: np.ndarray, + rotation_y: float, + P: np.ndarray, + Tr_velo_to_cam: np.ndarray, + ) -> Cuboid2D: + """Creates an instance of Cuboid2D class from 3D bounding box parameters. + + Args: + dim (np.ndarray): 3 scalars describing length, width and height of a 3D bounding box + location (np.ndarray): (x, y, z) coordinates of the middle of the top face. + rotation_y (np.ndarray): rotation along the Y-axis (from -pi to pi) + P (np.ndarray): Camera-to-Image transformation matrix (3x4) + Tr_velo_to_cam (np.ndarray): LiDAR-to-Camera transformation matrix (3x4) + + Returns: + Cuboid2D: Projection points for the given bounding box + """ + Tr_velo_to_cam_homo = np.eye(4) + Tr_velo_to_cam_homo[:3, :4] = Tr_velo_to_cam + denorm = cls._get_denorm(Tr_velo_to_cam_homo) + pts_3d = cls._get_3d_points(dim, location, rotation_y, denorm) + y_3d = np.mean(pts_3d[:4, 1]) + pts_2d = cls._project_to_2d(pts_3d, P) + + return cls(list(map(tuple, pts_2d)), y_3d=y_3d) + + def to_3d(self, P_inv: np.ndarray) -> tuple[np.ndarray, np.ndarray, float]: + """Reconstructs 3D object Velodyne coordinates (dimensions, location and rotation along the Y-axis) + from the given Cuboid2D instance. + + Args: + P_inv (np.ndarray): Pseudo-inverse of Camera-to-Image projection matrix + Returns: + tuple: dimensions, location and rotation along the Y-axis + """ + recon_3d = [] + for idx, coord_2d in enumerate(self.points): + coord_2d = np.append(coord_2d, 1) + coord_3d = P_inv @ coord_2d + if idx < 4: + coord_3d = coord_3d * self.y_3d / coord_3d[1] + else: + coord_3d = coord_3d * recon_3d[idx - 4][0] / coord_3d[0] + recon_3d.append(coord_3d[:3]) + recon_3d = np.array(recon_3d) + + x = np.mean(recon_3d[:, 0]) + z = np.mean(recon_3d[:, 2]) + + yaws = [] + pairs = [(0, 1), (3, 2), (4, 5), (7, 6)] + for p in pairs: + delta_x = recon_3d[p[0]][0] - recon_3d[p[1]][0] + delta_z = recon_3d[p[0]][2] - recon_3d[p[1]][2] + yaws.append(np.arctan2(delta_x, delta_z)) + yaw = np.mean(yaws) + + widths = [] + pairs = [(0, 1), (2, 3), (4, 5), (6, 7)] + for p in pairs: + delta_x = np.sqrt( + (recon_3d[p[0]][0] - recon_3d[p[1]][0]) ** 2 + + (recon_3d[p[0]][2] - recon_3d[p[1]][2]) ** 2 + ) + widths.append(delta_x) + w = np.mean(widths) + + lengths = [] + pairs = [(1, 2), (0, 3), (5, 6), (4, 7)] + for p in pairs: + delta_z = np.sqrt( + (recon_3d[p[0]][0] - recon_3d[p[1]][0]) ** 2 + + (recon_3d[p[0]][2] - recon_3d[p[1]][2]) ** 2 + ) + lengths.append(delta_z) + l = np.mean(lengths) + + heights = [] + pairs = [(0, 4), (1, 5), (2, 6), (3, 7)] + for p in pairs: + delta_y = np.abs(recon_3d[p[0]][1] - recon_3d[p[1]][1]) + heights.append(delta_y) + h = np.mean(heights) + return np.array([h, w, l]), np.array([x, self.y_3d, z]), yaw + + @attrs(slots=True, order=False) class PointsCategories(Categories): """ diff --git a/src/datumaro/components/annotations/matcher.py b/src/datumaro/components/annotations/matcher.py index db9322722a..eb7c874cc4 100644 --- a/src/datumaro/components/annotations/matcher.py +++ b/src/datumaro/components/annotations/matcher.py @@ -35,6 +35,7 @@ "ImageAnnotationMatcher", "HashKeyMatcher", "FeatureVectorMatcher", + "Cuboid2DMatcher", ] @@ -378,3 +379,8 @@ def distance(self, a, b): b = Points([p for pt in b.as_polygon() for p in pt]) return OKS(a, b, sigma=self.sigma) + + +@attrs +class Cuboid2DMatcher(ShapeMatcher): + pass diff --git a/src/datumaro/components/annotations/merger.py b/src/datumaro/components/annotations/merger.py index c1c356f81b..8ff7593a61 100644 --- a/src/datumaro/components/annotations/merger.py +++ b/src/datumaro/components/annotations/merger.py @@ -12,6 +12,7 @@ AnnotationMatcher, BboxMatcher, CaptionsMatcher, + Cuboid2DMatcher, Cuboid3dMatcher, FeatureVectorMatcher, HashKeyMatcher, @@ -210,3 +211,8 @@ class TabularMerger(AnnotationMerger, TabularMatcher): @attrs class RotatedBboxMerger(_ShapeMerger, RotatedBboxMatcher): pass + + +@attrs +class Cuboid2DMerger(_ShapeMerger, Cuboid2DMatcher): + pass diff --git a/src/datumaro/components/dataset.py b/src/datumaro/components/dataset.py index 2652c99a7d..574ec7cc33 100644 --- a/src/datumaro/components/dataset.py +++ b/src/datumaro/components/dataset.py @@ -1023,17 +1023,22 @@ class _MergedStreamDataset(cls): def __init__(self, *sources: IDataset): from datumaro.components.hl_ops import HLOps - self.merged = HLOps.merge(*sources, merge_policy=merge_policy) + self._merged = HLOps.merge(*sources, merge_policy=merge_policy) + self._data = self._merged._data + self._env = env + self._format = DEFAULT_FORMAT + self._source_path = None + self._options = {} def __iter__(self): - yield from self.merged + yield from self._merged @property def is_stream(self): return True def subsets(self) -> Dict[str, DatasetSubset]: - return self.merged.subsets() + return self._merged.subsets() return _MergedStreamDataset(*sources) diff --git a/src/datumaro/components/environment.py b/src/datumaro/components/environment.py index 52080e5eda..150125aedd 100644 --- a/src/datumaro/components/environment.py +++ b/src/datumaro/components/environment.py @@ -275,7 +275,7 @@ def merge(cls, envs: Sequence["Environment"]) -> "Environment": merged = Environment() def _register(registry: PluginRegistry): - merged.register_plugins(plugin for plugin in registry) + merged.register_plugins(list(registry._items.values())) for env in envs: _register(env.extractors) diff --git a/src/datumaro/components/errors.py b/src/datumaro/components/errors.py index c850fcc551..446c16a548 100644 --- a/src/datumaro/components/errors.py +++ b/src/datumaro/components/errors.py @@ -342,6 +342,16 @@ def __str__(self): return f"Item {self.item_id} is repeated in the source sequence." +@define(auto_exc=False) +class PathSeparatorInSubsetNameError(DatasetError): + subset: str = field() + + def __str__(self): + return ( + f"Failed to export the subset '{self.subset}': subset name contains path separator(s)." + ) + + class DatasetQualityError(DatasetError): pass diff --git a/src/datumaro/components/hl_ops/__init__.py b/src/datumaro/components/hl_ops/__init__.py index 54091aa703..22aaa32aa2 100644 --- a/src/datumaro/components/hl_ops/__init__.py +++ b/src/datumaro/components/hl_ops/__init__.py @@ -282,13 +282,14 @@ def merge( merger = get_merger(merge_policy, **kwargs) merged = merger(*datasets) env = Environment.merge( - dataset.env - for dataset in datasets - if hasattr( - dataset, "env" - ) # TODO: Sometimes, there is dataset which is not exactly "Dataset", - # e.g., VocClassificationBase. this should be fixed and every object from - # Dataset.import_from should have "Dataset" type. + [ + dataset.env + for dataset in datasets + if hasattr(dataset, "env") + # TODO: Sometimes, there is dataset which is not exactly "Dataset", + # e.g., VocClassificationBase. this should be fixed and every object from + # Dataset.import_from should have "Dataset" type. + ] ) if report_path: merger.save_merge_report(report_path) diff --git a/src/datumaro/components/merge/intersect_merge.py b/src/datumaro/components/merge/intersect_merge.py index 26677661ea..bb545f950d 100644 --- a/src/datumaro/components/merge/intersect_merge.py +++ b/src/datumaro/components/merge/intersect_merge.py @@ -19,6 +19,7 @@ AnnotationMerger, BboxMerger, CaptionsMerger, + Cuboid2DMerger, Cuboid3dMerger, EllipseMerger, FeatureVectorMerger, @@ -455,6 +456,8 @@ def _for_type(t, **kwargs): return _make(TabularMerger, **kwargs) elif t is AnnotationType.rotated_bbox: return _make(RotatedBboxMerger, **kwargs) + elif t is AnnotationType.cuboid_2d: + return _make(Cuboid2DMerger, **kwargs) else: raise NotImplementedError("Type %s is not supported" % t) diff --git a/src/datumaro/components/transformer.py b/src/datumaro/components/transformer.py index c5d743bbc3..3d9b91c660 100644 --- a/src/datumaro/components/transformer.py +++ b/src/datumaro/components/transformer.py @@ -72,6 +72,80 @@ def __iter__(self): yield item +class TabularTransform(Transform): + """A transformation class for processing dataset items in batches with optional parallelism. + + This class takes a dataset extractor, batch size, and number of worker threads to process + dataset items. Depending on the number of workers specified, it can process items either + sequentially (single-process) or in parallel (multi-process), making it efficient for + batch transformations. + + Parameters: + extractor: The dataset extractor to obtain items from. + batch_size: The batch size for processing items. Default is 1. + num_workers: The number of worker threads to use for parallel processing. + Set to 0 for single-process mode. Default is 0. + """ + + def __init__( + self, + extractor: IDataset, + batch_size: int = 1, + num_workers: int = 0, + ): + super().__init__(extractor) + self._batch_size = batch_size + if not (isinstance(num_workers, int) and num_workers >= 0): + raise ValueError( + f"num_workers should be a non negative integer, but it is {num_workers}" + ) + self._num_workers = num_workers + + def __iter__(self) -> Iterator[DatasetItem]: + if self._num_workers == 0: + return self._iter_single_proc() + return self._iter_multi_procs() + + def _iter_multi_procs(self): + with ThreadPool(processes=self._num_workers) as pool: + + def _producer_gen(): + for batch in take_by(self._extractor, self._batch_size): + future = pool.apply_async( + func=self._process_batch, + args=(batch,), + ) + yield future + + with consumer_generator(producer_generator=_producer_gen()) as consumer_gen: + for future in consumer_gen: + for item in future.get(): + yield item + + def _iter_single_proc(self) -> Iterator[DatasetItem]: + for batch in take_by(self._extractor, self._batch_size): + for item in self._process_batch(batch=batch): + yield item + + def transform_item(self, item: DatasetItem) -> Optional[DatasetItem]: + """ + Returns a modified copy of the input item. + + Avoid changing and returning the input item, because it can lead to + unexpected problems. Use wrap_item() or item.wrap() to simplify copying. + """ + + raise NotImplementedError() + + def _process_batch( + self, + batch: List[DatasetItem], + ) -> List[DatasetItem]: + results = [self.transform_item(item) for item in batch] + + return results + + class ModelTransform(Transform): """A transformation class for applying a model's inference to dataset items. diff --git a/src/datumaro/components/visualizer.py b/src/datumaro/components/visualizer.py index 7030165871..1f184a4e3b 100644 --- a/src/datumaro/components/visualizer.py +++ b/src/datumaro/components/visualizer.py @@ -19,6 +19,7 @@ AnnotationType, Bbox, Caption, + Cuboid2D, Cuboid3d, DepthAnnotation, Ellipse, @@ -661,6 +662,39 @@ def _draw_cuboid_3d( ) -> None: raise NotImplementedError(f"{ann.type} is not implemented yet.") + def _draw_cuboid_2d( + self, + ann: Cuboid2D, + label_categories: Optional[LabelCategories], + fig: Figure, + ax: Axes, + context: List, + ) -> None: + import matplotlib.patches as patches + + points = ann.points + color = self._get_color(ann) + label_text = label_categories[ann.label].name if label_categories is not None else ann.label + + # Define the faces based on vertex indices + + faces = [ + [points[i] for i in [0, 1, 2, 3]], # Top face + [points[i] for i in [4, 5, 6, 7]], # Bottom face + [points[i] for i in [0, 1, 5, 4]], # Front face + [points[i] for i in [1, 2, 6, 5]], # Right face + [points[i] for i in [2, 3, 7, 6]], # Back face + [points[i] for i in [3, 0, 4, 7]], # Left face + ] + ax.text(points[0][0], points[0][1] - self.text_y_offset, label_text, color=color) + + # Draw each face + for face in faces: + polygon = patches.Polygon( + face, fill=False, linewidth=self.bbox_linewidth, edgecolor=color + ) + ax.add_patch(polygon) + def _draw_super_resolution_annotation( self, ann: SuperResolutionAnnotation, diff --git a/src/datumaro/plugins/data_formats/common_semantic_segmentation.py b/src/datumaro/plugins/data_formats/common_semantic_segmentation.py index 4e9f55f625..7845ffc406 100644 --- a/src/datumaro/plugins/data_formats/common_semantic_segmentation.py +++ b/src/datumaro/plugins/data_formats/common_semantic_segmentation.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: MIT import errno -import glob import os.path as osp from typing import List, Optional @@ -69,11 +68,11 @@ def __init__( self._image_prefix = image_prefix self._mask_prefix = mask_prefix - meta_file = glob.glob(osp.join(path, "**", DATASET_META_FILE), recursive=True) - if is_meta_file(meta_file[0]): - self._root_dir = osp.dirname(meta_file[0]) + meta_file = osp.join(path, DATASET_META_FILE) + if is_meta_file(meta_file): + self._root_dir = osp.dirname(meta_file) - label_map = parse_meta_file(meta_file[0]) + label_map = parse_meta_file(meta_file) self._categories = make_categories(label_map) else: raise FileNotFoundError(errno.ENOENT, "Dataset meta info file was not found", path) @@ -163,11 +162,10 @@ def build_cmdline_parser(cls, **kwargs): @classmethod def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: - path = context.require_file(f"**/{DATASET_META_FILE}") - path = osp.dirname(path) + context.require_file(DATASET_META_FILE) - context.require_file(osp.join(path, CommonSemanticSegmentationPath.IMAGES_DIR, "**", "*")) - context.require_file(osp.join(path, CommonSemanticSegmentationPath.MASKS_DIR, "**", "*")) + context.require_file(osp.join(CommonSemanticSegmentationPath.IMAGES_DIR, "**", "*")) + context.require_file(osp.join(CommonSemanticSegmentationPath.MASKS_DIR, "**", "*")) return FormatDetectionConfidence.MEDIUM diff --git a/src/datumaro/plugins/data_formats/datumaro/base.py b/src/datumaro/plugins/data_formats/datumaro/base.py index ee7a8cdc21..5278782822 100644 --- a/src/datumaro/plugins/data_formats/datumaro/base.py +++ b/src/datumaro/plugins/data_formats/datumaro/base.py @@ -11,6 +11,7 @@ AnnotationType, Bbox, Caption, + Cuboid2D, Cuboid3d, Ellipse, GroupType, @@ -182,8 +183,7 @@ def _parse_item(self, item_desc: Dict) -> Optional[DatasetItem]: pcd_info = item_desc.get("point_cloud") if media and pcd_info: raise MediaTypeError(STR_MULTIPLE_MEDIA) - if pcd_info: - pcd_path = pcd_info.get("path") + if pcd_info and (pcd_path := pcd_info.get("path")): point_cloud = osp.join(self._pcd_dir, self._subset, pcd_path) related_images = None @@ -338,6 +338,7 @@ def _load_annotations(self, item: Dict): points, label=label_id, id=ann_id, + visibility=ann.get("visibility"), attributes=attributes, group=group, object_id=object_id, @@ -378,6 +379,18 @@ def _load_annotations(self, item: Dict): elif ann_type == AnnotationType.hash_key: continue + elif ann_type == AnnotationType.cuboid_2d: + loaded.append( + Cuboid2D( + list(map(tuple, points)), + label=label_id, + id=ann_id, + attributes=attributes, + group=group, + object_id=object_id, + z_order=z_order, + ) + ) else: raise NotImplementedError() except Exception as e: diff --git a/src/datumaro/plugins/data_formats/datumaro/exporter.py b/src/datumaro/plugins/data_formats/datumaro/exporter.py index 494492cbe8..a470b66b8f 100644 --- a/src/datumaro/plugins/data_formats/datumaro/exporter.py +++ b/src/datumaro/plugins/data_formats/datumaro/exporter.py @@ -20,6 +20,7 @@ Annotation, Bbox, Caption, + Cuboid2D, Cuboid3d, Ellipse, HashKey, @@ -37,6 +38,7 @@ from datumaro.components.crypter import NULL_CRYPTER from datumaro.components.dataset_base import DatasetItem from datumaro.components.dataset_item_storage import ItemStatus +from datumaro.components.errors import PathSeparatorInSubsetNameError from datumaro.components.exporter import ExportContextComponent, Exporter from datumaro.components.media import Image, MediaElement, PointCloud, Video, VideoFrame from datumaro.util import cast, dump_json_file @@ -184,7 +186,8 @@ def context_save_media( if context.save_media: fname = context.make_video_filename(item) - context.save_video(item, fname=fname, subdir=item.subset) + subdir = item.subset.replace(os.sep, "_") if item.subset else None + context.save_video(item, fname=fname, subdir=subdir) item.media = Video( path=fname, step=video._step, @@ -199,7 +202,8 @@ def context_save_media( if context.save_media: fname = context.make_video_filename(item) - context.save_video(item, fname=fname, subdir=item.subset) + subdir = item.subset.replace(os.sep, "_") if item.subset else None + context.save_video(item, fname=fname, subdir=subdir) item.media = VideoFrame(Video(fname), video_frame.index) yield @@ -209,8 +213,9 @@ def context_save_media( if context.save_media: # Temporarily update image path and save it. - fname = context.make_image_filename(item) - context.save_image(item, encryption=encryption, fname=fname, subdir=item.subset) + fname = context.make_image_filename(item, name=str(item.id).replace(os.sep, "_")) + subdir = item.subset.replace(os.sep, "_") if item.subset else None + context.save_image(item, encryption=encryption, fname=fname, subdir=subdir) item.media = Image.from_file(path=fname, size=image._size) yield @@ -219,14 +224,18 @@ def context_save_media( pcd = item.media_as(PointCloud) if context.save_media: - pcd_fname = context.make_pcd_filename(item) - context.save_point_cloud(item, fname=pcd_fname, subdir=item.subset) + pcd_name = str(item.id).replace(os.sep, "_") + pcd_fname = context.make_pcd_filename(item, name=pcd_name) + subdir = item.subset.replace(os.sep, "_") if item.subset else None + context.save_point_cloud(item, fname=pcd_fname, subdir=subdir) extra_images = [] for i, extra_image in enumerate(pcd.extra_images): extra_images.append( Image.from_file( - path=context.make_pcd_extra_image_filename(item, i, extra_image) + path=context.make_pcd_extra_image_filename( + item, i, extra_image, name=f"{pcd_name}/extra_image_{i}" + ) ) ) @@ -311,6 +320,8 @@ def _gen_item_desc(self, item: DatasetItem, *args, **kwargs) -> Dict: converted_ann = self._convert_ellipse_object(ann) elif isinstance(ann, HashKey): continue + elif isinstance(ann, Cuboid2D): + converted_ann = self._convert_cuboid_2d_object(ann) else: raise NotImplementedError() annotations.append(converted_ann) @@ -435,6 +446,18 @@ def _convert_cuboid_3d_object(self, obj): def _convert_ellipse_object(self, obj: Ellipse): return self._convert_shape_object(obj) + def _convert_cuboid_2d_object(self, obj: Cuboid2D): + converted = self._convert_annotation(obj) + + converted.update( + { + "label_id": cast(obj.label, int), + "points": obj.points, + "z_order": obj.z_order, + } + ) + return converted + class _StreamSubsetWriter(_SubsetWriter): def __init__( @@ -492,18 +515,27 @@ def create_writer( default_image_ext=self._default_image_ext, ) + if os.path.sep in subset: + raise PathSeparatorInSubsetNameError(subset) + return ( _SubsetWriter( context=self, subset=subset, - ann_file=osp.join(self._annotations_dir, subset + self.PATH_CLS.ANNOTATION_EXT), + ann_file=osp.join( + self._annotations_dir, + subset + self.PATH_CLS.ANNOTATION_EXT, + ), export_context=export_context, ) if not self._stream else _StreamSubsetWriter( context=self, subset=subset, - ann_file=osp.join(self._annotations_dir, subset + self.PATH_CLS.ANNOTATION_EXT), + ann_file=osp.join( + self._annotations_dir, + subset + self.PATH_CLS.ANNOTATION_EXT, + ), export_context=export_context, ) ) diff --git a/src/datumaro/plugins/data_formats/datumaro_binary/exporter.py b/src/datumaro/plugins/data_formats/datumaro_binary/exporter.py index a1c86d5061..0b257334fb 100644 --- a/src/datumaro/plugins/data_formats/datumaro_binary/exporter.py +++ b/src/datumaro/plugins/data_formats/datumaro_binary/exporter.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2024 Intel Corporation # # SPDX-License-Identifier: MIT @@ -15,7 +15,7 @@ from datumaro.components.crypter import NULL_CRYPTER, Crypter from datumaro.components.dataset_base import DatasetItem, IDataset -from datumaro.components.errors import DatumaroError +from datumaro.components.errors import DatumaroError, PathSeparatorInSubsetNameError from datumaro.components.exporter import ExportContext, ExportContextComponent, Exporter from datumaro.plugins.data_formats.datumaro.exporter import DatumaroExporter from datumaro.plugins.data_formats.datumaro.exporter import _SubsetWriter as __SubsetWriter @@ -309,6 +309,9 @@ def create_writer( default_image_ext=self._default_image_ext, ) + if osp.sep in subset: + raise PathSeparatorInSubsetNameError(subset) + return _SubsetWriter( context=self, subset=subset, diff --git a/src/datumaro/plugins/data_formats/datumaro_binary/mapper/__init__.py b/src/datumaro/plugins/data_formats/datumaro_binary/mapper/__init__.py index cefedf4cbd..01ee56d60a 100644 --- a/src/datumaro/plugins/data_formats/datumaro_binary/mapper/__init__.py +++ b/src/datumaro/plugins/data_formats/datumaro_binary/mapper/__init__.py @@ -22,6 +22,7 @@ "CaptionMapper", "Cuboid3dMapper", "EllipseMapper", + "Cuboid2DMapper", # common "Mapper", "DictMapper", diff --git a/src/datumaro/plugins/data_formats/datumaro_binary/mapper/annotation.py b/src/datumaro/plugins/data_formats/datumaro_binary/mapper/annotation.py index 4c7269719e..c26658bc64 100644 --- a/src/datumaro/plugins/data_formats/datumaro_binary/mapper/annotation.py +++ b/src/datumaro/plugins/data_formats/datumaro_binary/mapper/annotation.py @@ -12,6 +12,7 @@ AnnotationType, Bbox, Caption, + Cuboid2D, Cuboid3d, Ellipse, Label, @@ -270,6 +271,33 @@ def backward(cls, _bytes: bytes, offset: int = 0) -> Tuple[Ellipse, int]: return Ellipse(x, y, x2, y2, **shape_dict), offset +class Cuboid2DMapper(AnnotationMapper): + ann_type = AnnotationType.cuboid_2d + + @classmethod + def forward(cls, ann: Shape) -> bytes: + _bytearray = bytearray() + _bytearray.extend(struct.pack(" Tuple[Ellipse, int]: + ann_dict, offset = super().backward_dict(_bytes, offset) + label, z_order = struct.unpack_from(" bytes: _bytearray.extend(Cuboid3dMapper.forward(ann)) elif isinstance(ann, Ellipse): _bytearray.extend(EllipseMapper.forward(ann)) + elif isinstance(ann, Cuboid2D): + _bytearray.extend(Cuboid2DMapper.forward(ann)) else: raise NotImplementedError() diff --git a/src/datumaro/plugins/data_formats/imagenet.py b/src/datumaro/plugins/data_formats/imagenet.py index 673cfd132d..30b9dfaa5a 100644 --- a/src/datumaro/plugins/data_formats/imagenet.py +++ b/src/datumaro/plugins/data_formats/imagenet.py @@ -48,8 +48,18 @@ def _load_categories(self, path): path = Path(path) for dirname in sorted(d for d in path.rglob("*") if d.is_dir()): dirname = dirname.relative_to(path) + level = len(dirname.parts) if str(dirname) != ImagenetPath.IMAGE_DIR_NO_LABEL: - label_cat.add(str(dirname)) + parent = None + if level > 1: + parent = str(dirname.parents[0]) + if not any([g.name == parent for g in label_cat.label_groups]): + label_cat.add_label_group(parent, [str(dirname.name)], group_type=0) + else: + g = next(x for x in label_cat.label_groups if x.name == parent) + g.labels.append(str(dirname.name)) + label_cat.add(str(dirname), parent) + return {AnnotationType.label: label_cat} def _load_items(self, path): diff --git a/src/datumaro/plugins/data_formats/kaggle/base.py b/src/datumaro/plugins/data_formats/kaggle/base.py index d21b1434c1..06d2ef9a15 100644 --- a/src/datumaro/plugins/data_formats/kaggle/base.py +++ b/src/datumaro/plugins/data_formats/kaggle/base.py @@ -77,13 +77,31 @@ def _parse_bbox_coords(self, bbox_str): # expected to output [x1, y1, x2, y2] return [float(coord.strip()) for coord in coords] - def _load_annotations(self, datas: list, indices: Dict[str, int], bbox_flag: bool): + def _load_annotations( + self, datas: list, indices: Dict[str, Union[int, Dict[str, int]]], bbox_flag: bool + ): if "label" in indices: - label_name = str(datas[indices["label"]]) - label, cat = self._label_cat.find(label_name) - if not cat: - self._label_cat.add(label_name) - label, _ = self._label_cat.find(label_name) + label_indices = indices["label"] + if isinstance(label_indices, dict): + labels = [] + list_values = datas[1:] + index_to_label = {v: k for k, v in label_indices.items()} + present_labels = [ + index_to_label[i + 1] for i, value in enumerate(list_values) if value == "1" + ] + + for label_name in present_labels: + label, cat = self._label_cat.find(label_name) + if not cat: + self._label_cat.add(label_name) + label, _ = self._label_cat.find(label_name) + labels.append(Label(label=label)) + else: + label_name = str(datas[indices["label"]]) + label, cat = self._label_cat.find(label_name) + if not cat: + self._label_cat.add(label_name) + label, _ = self._label_cat.find(label_name) else: _, cat = self._label_cat.find("object") if not cat: @@ -91,7 +109,11 @@ def _load_annotations(self, datas: list, indices: Dict[str, int], bbox_flag: boo label = 0 if "label" in indices and not bbox_flag: + label_indices = indices["label"] + if isinstance(label_indices, dict): + return labels return Label(label=label) + if bbox_flag: if "bbox" in indices: coords = self._parse_bbox_coords(datas[indices["bbox"]]) @@ -125,7 +147,14 @@ def _load_items(self, ann_file: str, columns: Dict[str, Union[str, list]]): indices = {"media": df_fields.index(columns["media"])} if "label" in columns: - indices.update({"label": df_fields.index(columns["label"])}) + label_columns = columns["label"] + if isinstance(label_columns, list): + indices_label = {} + for label in label_columns: + indices_label[label] = df_fields.index(label) + indices.update({"label": indices_label}) + else: + indices.update({"label": df_fields.index(label_columns)}) bbox_flag = False bbox_index = columns.get("bbox") @@ -165,16 +194,30 @@ def _load_items(self, ann_file: str, columns: Dict[str, Union[str, list]]): continue ann = self._load_annotations(data_info, indices, bbox_flag) - self._ann_types.add(ann.type) - if item_id in items: - items[item_id].annotations.append(ann) + if isinstance(ann, list): + for label in ann: + self._ann_types.add(label.type) + if item_id in items: + for label in ann: + items[item_id].annotations.append(label) + else: + items[item_id] = DatasetItem( + id=item_id, + subset=self._subset, + media=Image.from_file(path=media_path), + annotations=ann, + ) else: - items[item_id] = DatasetItem( - id=item_id, - subset=self._subset, - media=Image.from_file(path=media_path), - annotations=[ann], - ) + self._ann_types.add(ann.type) + if item_id in items: + items[item_id].annotations.append(ann) + else: + items[item_id] = DatasetItem( + id=item_id, + subset=self._subset, + media=Image.from_file(path=media_path), + annotations=[ann], + ) return items.values() def categories(self): diff --git a/src/datumaro/plugins/data_formats/kitti/importer.py b/src/datumaro/plugins/data_formats/kitti/importer.py index 2880301901..c1e53fad0c 100644 --- a/src/datumaro/plugins/data_formats/kitti/importer.py +++ b/src/datumaro/plugins/data_formats/kitti/importer.py @@ -99,7 +99,7 @@ class KittiDetectionImporter(KittiImporter): @classmethod def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: # left color camera label files - context.require_file(f"**/label_2/*{cls._ANNO_EXT}") + context.require_file(f"**/label_2/*_*{cls._ANNO_EXT}") return cls.DETECT_CONFIDENCE @classmethod diff --git a/src/datumaro/plugins/data_formats/kitti_3d/__init__.py b/src/datumaro/plugins/data_formats/kitti_3d/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/datumaro/plugins/data_formats/kitti_3d/base.py b/src/datumaro/plugins/data_formats/kitti_3d/base.py new file mode 100644 index 0000000000..c385512e2e --- /dev/null +++ b/src/datumaro/plugins/data_formats/kitti_3d/base.py @@ -0,0 +1,155 @@ +# Copyright (C) 2024 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import glob +import logging +import os +import os.path as osp +from typing import List, Optional, Type, TypeVar + +from datumaro.components.annotation import AnnotationType, Bbox +from datumaro.components.dataset_base import DatasetItem, SubsetBase +from datumaro.components.errors import InvalidAnnotationError +from datumaro.components.importer import ImportContext +from datumaro.components.media import Image +from datumaro.util.image import find_images + +from .format import Kitti3DLabelMap, Kitti3dPath, make_kitti3d_categories + +T = TypeVar("T") + + +class Kitti3dBase(SubsetBase): + # https://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d + + def __init__( + self, + path: str, + *, + subset: Optional[str] = None, + ctx: Optional[ImportContext] = None, + ): + assert osp.isdir(path), path + + self._path = path + + if not subset: + folder_path = path.rsplit(Kitti3dPath.LABEL_DIR, 1)[0] + img_dir = osp.join(folder_path, Kitti3dPath.IMAGE_DIR) + if any(os.path.isdir(os.path.join(img_dir, item)) for item in os.listdir(img_dir)): + subset = osp.split(path)[-1] + self._path = folder_path + super().__init__(subset=subset, ctx=ctx) + + self._categories = make_kitti3d_categories(Kitti3DLabelMap) + self._items = self._load_items() + + def _load_items(self) -> List[DatasetItem]: + items = [] + + image_dir = osp.join(self._path, Kitti3dPath.IMAGE_DIR) + image_path_by_id = { + osp.split(osp.splitext(osp.relpath(p, image_dir))[0])[-1]: p + for p in find_images(image_dir, recursive=True) + } + + if self._subset == "default": + ann_dir = osp.join(self._path, Kitti3dPath.LABEL_DIR) + else: + ann_dir = osp.join(self._path, Kitti3dPath.LABEL_DIR, self._subset) + + label_categories = self._categories[AnnotationType.label] + + for labels_path in sorted(glob.glob(osp.join(ann_dir, "**", "*.txt"), recursive=True)): + item_id = osp.splitext(osp.relpath(labels_path, ann_dir))[0] + anns = [] + + try: + with open(labels_path, "r", encoding="utf-8") as f: + lines = f.readlines() + except IOError as e: + logging.error(f"Error reading file {labels_path}: {e}") + continue + + for line_idx, line in enumerate(lines): + line = line.split() + if len(line) not in [15, 16]: + logging.warning( + f"Unexpected line length {len(line)} in file {labels_path} at line {line_idx + 1}" + ) + continue + + label_name = line[0] + label_id = label_categories.find(label_name)[0] + if label_id is None: + label_id = label_categories.add(label_name) + + try: + x1 = self._parse_field(line[4], float, "bbox left-top x") + y1 = self._parse_field(line[5], float, "bbox left-top y") + x2 = self._parse_field(line[6], float, "bbox right-bottom x") + y2 = self._parse_field(line[7], float, "bbox right-bottom y") + + attributes = { + "truncated": self._parse_field(line[1], float, "truncated"), + "occluded": self._parse_field(line[2], int, "occluded"), + "alpha": self._parse_field(line[3], float, "alpha"), + "dimensions": [ + self._parse_field(line[8], float, "height (in meters)"), + self._parse_field(line[9], float, "width (in meters)"), + self._parse_field(line[10], float, "length (in meters)"), + ], + "location": [ + self._parse_field(line[11], float, "x (in meters)"), + self._parse_field(line[12], float, "y (in meters)"), + self._parse_field(line[13], float, "z (in meters)"), + ], + "rotation_y": self._parse_field(line[14], float, "rotation_y"), + } + except ValueError as e: + logging.error(f"Error parsing line {line_idx + 1} in file {labels_path}: {e}") + continue + + anns.append( + Bbox( + x=x1, + y=y1, + w=x2 - x1, + h=y2 - y1, + id=line_idx, + attributes=attributes, + label=label_id, + ) + ) + self._ann_types.add(AnnotationType.bbox) + + image = image_path_by_id.pop(item_id, None) + if image: + image = Image.from_file(path=image) + + if self._subset == "default": + calib_path = osp.join(self._path, Kitti3dPath.CALIB_DIR, item_id + ".txt") + else: + calib_path = osp.join( + self._path, Kitti3dPath.CALIB_DIR, self._subset, item_id + ".txt" + ) + items.append( + DatasetItem( + id=item_id, + subset=self._subset, + media=image, + attributes={"calib_path": calib_path}, + annotations=anns, + ) + ) + + return items + + def _parse_field(self, value: str, desired_type: Type[T], field_name: str) -> T: + try: + return desired_type(value) + except Exception as e: + raise InvalidAnnotationError( + f"Can't parse {field_name} from '{value}'. Expected {desired_type}" + ) from e diff --git a/src/datumaro/plugins/data_formats/kitti_3d/format.py b/src/datumaro/plugins/data_formats/kitti_3d/format.py new file mode 100644 index 0000000000..c61f2b1f3f --- /dev/null +++ b/src/datumaro/plugins/data_formats/kitti_3d/format.py @@ -0,0 +1,43 @@ +# Copyright (C) 2024 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os.path as osp + +from datumaro.components.annotation import AnnotationType, LabelCategories + + +class Kitti3dPath: + PCD_DIR = osp.join("velodyne") + IMAGE_DIR = "image_2" + LABEL_DIR = "label_2" + CALIB_DIR = "calib" + + +Kitti3DLabelMap = [ + "DontCare", + "Car", + "Pedestrian", + "Van", + "Truck", + "Cyclist", + "Sitter", + "Train", + "Motorcycle", + "Bus", + "Misc", +] + + +def make_kitti3d_categories(label_map=None): + if label_map is None: + label_map = Kitti3DLabelMap + + categories = {} + common_attrs = {"truncated", "occluded", "alpha", "dimensions", "location", "rotation_y"} + label_categories = LabelCategories(attributes=common_attrs) + for label in label_map: + label_categories.add(label) + categories[AnnotationType.label] = label_categories + + return categories diff --git a/src/datumaro/plugins/data_formats/kitti_3d/importer.py b/src/datumaro/plugins/data_formats/kitti_3d/importer.py new file mode 100644 index 0000000000..2840218af7 --- /dev/null +++ b/src/datumaro/plugins/data_formats/kitti_3d/importer.py @@ -0,0 +1,53 @@ +# Copyright (C) 2024 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os.path as osp +from typing import List + +from datumaro.components.errors import DatasetImportError +from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext +from datumaro.components.importer import Importer + +from .format import Kitti3dPath + + +class Kitti3dImporter(Importer): + _ANNO_EXT = ".txt" + + @classmethod + def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: + context.require_file(f"{Kitti3dPath.CALIB_DIR}/*.txt") + cls._check_ann_file(context.require_file(f"{Kitti3dPath.LABEL_DIR}/*.txt"), context) + return FormatDetectionConfidence.MEDIUM + + @classmethod + def _check_ann_file(cls, fpath: str, context: FormatDetectionContext) -> bool: + with context.probe_text_file( + fpath, "Requirements for the annotation file of Kitti 3D format" + ) as fp: + for line in fp: + fields = line.rstrip("\n").split(" ") + if len(fields) == 15 or len(fields) == 16: + return True + raise DatasetImportError( + f"Kitti 3D format txt file should have 15 or 16 fields for " + f"each line, but the read line has {len(fields)} fields: " + f"fields={fields}." + ) + raise DatasetImportError("Empty file is not allowed.") + + @classmethod + def get_file_extensions(cls) -> List[str]: + return [cls._ANNO_EXT] + + @classmethod + def find_sources(cls, path): + # return [{"url": path, "format": "kitti3d"}] + sources = cls._find_sources_recursive( + path, "", "kitti3d", dirname=Kitti3dPath.LABEL_DIR, file_filter=lambda p: osp.isdir(p) + ) + if len(sources) == 0: + return [{"url": path, "format": "kitti3d"}] + else: + return sources diff --git a/src/datumaro/plugins/data_formats/kitti_raw/base.py b/src/datumaro/plugins/data_formats/kitti_raw/base.py index 92e04cc88e..836ad28574 100644 --- a/src/datumaro/plugins/data_formats/kitti_raw/base.py +++ b/src/datumaro/plugins/data_formats/kitti_raw/base.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021-2023 Intel Corporation +# Copyright (C) 2021-2024 Intel Corporation # # SPDX-License-Identifier: MIT @@ -182,7 +182,7 @@ def _parse_attr(cls, value): @classmethod def _parse_track(cls, track_id, track, categories): common_attrs = {k: cls._parse_attr(v) for k, v in track["attributes"].items()} - scale = [track["scale"][k] for k in ["w", "h", "l"]] + scale = [track["scale"][k] for k in ["h", "w", "l"]] label = categories[AnnotationType.label].find(track["label"])[0] kf_occluded = False diff --git a/src/datumaro/plugins/data_formats/kitti_raw/exporter.py b/src/datumaro/plugins/data_formats/kitti_raw/exporter.py index 8e2f250d29..3d01b1d822 100644 --- a/src/datumaro/plugins/data_formats/kitti_raw/exporter.py +++ b/src/datumaro/plugins/data_formats/kitti_raw/exporter.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 Intel Corporation +# Copyright (C) 2021-2024 Intel Corporation # # SPDX-License-Identifier: MIT @@ -339,8 +339,8 @@ def _create_tracklets(self, subset): if not track: track = { "objectType": label, - "h": ann.scale[1], - "w": ann.scale[0], + "h": ann.scale[0], + "w": ann.scale[1], "l": ann.scale[2], "first_frame": frame_id, "poses": [], @@ -348,7 +348,7 @@ def _create_tracklets(self, subset): } tracks[track_id] = track else: - if [track["w"], track["h"], track["l"]] != ann.scale: + if [track["h"], track["w"], track["l"]] != ann.scale: # Tracks have fixed scale in the format raise DatasetExportError( "Item %s: mismatching track shapes, " diff --git a/src/datumaro/plugins/framework_converter.py b/src/datumaro/plugins/framework_converter.py index 556005e1b7..1aeb51138b 100644 --- a/src/datumaro/plugins/framework_converter.py +++ b/src/datumaro/plugins/framework_converter.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2023-2024 Intel Corporation # # SPDX-License-Identifier: MIT @@ -17,6 +17,7 @@ "detection": AnnotationType.bbox, "instance_segmentation": AnnotationType.polygon, "semantic_segmentation": AnnotationType.mask, + "tabular": [AnnotationType.label, AnnotationType.caption], } @@ -88,7 +89,10 @@ def _gen_item(self, idx: int): if ann.type == TASK_ANN_TYPE[self.task] ] label = mask_tools.merge_masks((mask, label_id) for mask, label_id in masks) - + elif self.task == "tabular": + label = [ + ann.as_dict() for ann in item.annotations if ann.type in TASK_ANN_TYPE[self.task] + ] return image, label @@ -103,15 +107,61 @@ def __init__( task: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, + target: Optional[str] = None, + tokenizer: Optional[tuple[Callable, Callable]] = None, + vocab: Optional[tuple[Callable, Callable]] = None, ): super().__init__(dataset=dataset, subset=subset, task=task) self.transform = transform self.target_transform = target_transform + if self.task == "tabular": + if not isinstance(target, dict): + raise ValueError( + "Target should be a dictionary with 'input' and 'output' keys." + ) + self.input_target = target.get("input") + self.output_target = target.get("output") + if not self.input_target: + raise ValueError( + "Please provide target column for tabular task which is used for input" + ) + + if not (tokenizer and vocab): + raise ValueError("Both tokenizer and vocab must be provided for tabular task") + self.tokenizer = tokenizer + self.vocab = vocab + def __getitem__(self, idx): image, label = self._gen_item(idx) + if self.task == "tabular": + try: + text = image[self.input_target] + except TypeError: + text = image()[self.input_target] + + if self.output_target: + src_tokenizer, tgt_tokenizer = self.tokenizer + src_vocab, tgt_vocab = self.vocab + src_tokens = src_tokenizer(text) + src_token_ids = src_vocab(src_tokens) + + label_text = label[0]["caption"].split(f"{self.output_target}:")[-1] + tgt_tokens = tgt_tokenizer(label_text) + tgt_token_ids = tgt_vocab(tgt_tokens) + + return torch.tensor(src_token_ids, dtype=torch.long), torch.tensor( + tgt_token_ids, dtype=torch.long + ) + else: + tokens = self.tokenizer(text) + token_ids = self.vocab(tokens) + return torch.tensor(token_ids, dtype=torch.long), torch.tensor( + label[0]["label"], dtype=torch.long + ) + if len(image.shape) == 2: image = np.expand_dims(image, axis=-1) diff --git a/src/datumaro/plugins/openvino_plugin/launcher.py b/src/datumaro/plugins/openvino_plugin/launcher.py index bdc924a949..9802ab0ca6 100644 --- a/src/datumaro/plugins/openvino_plugin/launcher.py +++ b/src/datumaro/plugins/openvino_plugin/launcher.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2021 Intel Corporation +# Copyright (C) 2019-2024 Intel Corporation # # SPDX-License-Identifier: MIT @@ -92,6 +92,8 @@ class BuiltinOpenvinoModelInfo(OpenvinoModelInfo): downloadable_models = { "clip_text_ViT-B_32", "clip_visual_ViT-B_32", + "clip_visual_vit_l_14_336px_int8", + "clip_text_vit_l_14_336px_int8", "googlenet-v4-tf", } diff --git a/src/datumaro/plugins/openvino_plugin/samples/clip_text_vit_l_14_336px_int8_interp.py b/src/datumaro/plugins/openvino_plugin/samples/clip_text_vit_l_14_336px_int8_interp.py new file mode 100644 index 0000000000..3e7b6ad5a2 --- /dev/null +++ b/src/datumaro/plugins/openvino_plugin/samples/clip_text_vit_l_14_336px_int8_interp.py @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from typing import List, Tuple + +from datumaro.components.abstracts import IModelInterpreter +from datumaro.components.abstracts.model_interpreter import LauncherInputType, ModelPred, PrepInfo +from datumaro.components.annotation import Annotation, AnnotationType, LabelCategories +from datumaro.components.dataset_base import DatasetItem +from datumaro.components.errors import DatumaroError +from datumaro.components.media import Image +from datumaro.plugins.openvino_plugin.samples.utils import gen_hash_key + + +class ClipTextViTL14ModelInterpreter(IModelInterpreter): + def preprocess(self, inp: DatasetItem) -> Tuple[LauncherInputType, PrepInfo]: + img = inp.media_as(Image).data + return img, None + + def postprocess(self, pred: ModelPred, info: PrepInfo) -> List[Annotation]: + feature_vector = pred.get("output") + if feature_vector is None: + raise DatumaroError('"output" key should exist in the model prediction.') + + return [gen_hash_key(feature_vector)] + + def get_categories(self): + label_categories = LabelCategories() + return {AnnotationType.label: label_categories} diff --git a/src/datumaro/plugins/openvino_plugin/samples/clip_visual_vit_l_14_336px_int8_interp.py b/src/datumaro/plugins/openvino_plugin/samples/clip_visual_vit_l_14_336px_int8_interp.py new file mode 100644 index 0000000000..320059357a --- /dev/null +++ b/src/datumaro/plugins/openvino_plugin/samples/clip_visual_vit_l_14_336px_int8_interp.py @@ -0,0 +1,52 @@ +# Copyright (C) 2024 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os.path as osp +from typing import List, Tuple + +import cv2 +import numpy as np + +from datumaro.components.abstracts import IModelInterpreter +from datumaro.components.abstracts.model_interpreter import LauncherInputType, ModelPred, PrepInfo +from datumaro.components.annotation import Annotation, AnnotationType, LabelCategories +from datumaro.components.dataset_base import DatasetItem +from datumaro.components.errors import DatumaroError +from datumaro.components.media import Image +from datumaro.plugins.openvino_plugin.samples.utils import gen_hash_key +from datumaro.util.samples import get_samples_path + + +class ClipViTL14ModelInterpreter(IModelInterpreter): + mean = (255 * np.array([0.485, 0.456, 0.406])).reshape(1, 1, 3) + std = (255 * np.array([0.229, 0.224, 0.225])).reshape(1, 1, 3) + + def preprocess(self, inp: DatasetItem) -> Tuple[LauncherInputType, PrepInfo]: + img = inp.media_as(Image).data + img = cv2.resize(img, (336, 336)) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = (img - self.mean) / self.std + + if img.ndim == 3 and img.shape[2] in {3, 4}: + img = np.transpose(img, (2, 0, 1)) + return img, None + + def postprocess(self, pred: ModelPred, info: PrepInfo) -> List[Annotation]: + feature_vector = pred.get("output") + if feature_vector is None: + raise DatumaroError('"output" key should exist in the model prediction.') + + return [gen_hash_key(feature_vector)] + + def get_categories(self): + label_categories = LabelCategories() + openvino_plugin_samples_dir = get_samples_path() + imagenet_class_path = osp.join(openvino_plugin_samples_dir, "imagenet.class") + + with open(imagenet_class_path, "r", encoding="utf-8") as file: + labels = [line.strip() for line in file] + for label in labels: + label_categories.add(label) + + return {AnnotationType.label: label_categories} diff --git a/src/datumaro/plugins/specs.json b/src/datumaro/plugins/specs.json index 8891b79287..395ff510b0 100644 --- a/src/datumaro/plugins/specs.json +++ b/src/datumaro/plugins/specs.json @@ -799,6 +799,21 @@ ] } }, + { + "import_path": "datumaro.plugins.data_formats.kitti_3d.base.Kitti3dBase", + "plugin_name": "kitti3d", + "plugin_type": "DatasetBase" + }, + { + "import_path": "datumaro.plugins.data_formats.kitti_3d.importer.Kitti3dImporter", + "plugin_name": "kitti3d", + "plugin_type": "Importer", + "metadata": { + "file_extensions": [ + ".txt" + ] + } + }, { "import_path": "datumaro.plugins.data_formats.kitti_raw.base.KittiRawBase", "plugin_name": "kitti_raw", @@ -1855,13 +1870,13 @@ "plugin_type": "Transform" }, { - "import_path": "datumaro.plugins.transforms.Correct", - "plugin_name": "correct", + "import_path": "datumaro.plugins.transforms.Clean", + "plugin_name": "clean", "plugin_type": "Transform" }, { - "import_path": "datumaro.plugins.transforms.Clean", - "plugin_name": "clean", + "import_path": "datumaro.plugins.transforms.Correct", + "plugin_name": "correct", "plugin_type": "Transform" }, { @@ -1929,6 +1944,11 @@ "plugin_name": "remove_annotations", "plugin_type": "Transform" }, + { + "import_path": "datumaro.plugins.transforms.PseudoLabeling", + "plugin_name": "pseudo_labeling", + "plugin_type": "Transform" + }, { "import_path": "datumaro.plugins.transforms.RemoveAttributes", "plugin_name": "remove_attributes", diff --git a/src/datumaro/plugins/transforms.py b/src/datumaro/plugins/transforms.py index 6060b0ad3b..59062cd349 100644 --- a/src/datumaro/plugins/transforms.py +++ b/src/datumaro/plugins/transforms.py @@ -9,6 +9,7 @@ import os.path as osp import random import re +import string from collections import Counter, defaultdict from copy import deepcopy from enum import Enum, auto @@ -22,6 +23,8 @@ from pandas.api.types import CategoricalDtype import datumaro.util.mask_tools as mask_tools +from datumaro.components.algorithms.hash_key_inference.explorer import Explorer +from datumaro.components.algorithms.hash_key_inference.hashkey_util import calculate_hamming from datumaro.components.annotation import ( AnnotationType, Bbox, @@ -40,6 +43,7 @@ TabularCategories, ) from datumaro.components.cli_plugin import CliPlugin +from datumaro.components.dataset import Dataset from datumaro.components.dataset_base import DEFAULT_SUBSET_NAME, DatasetInfo, DatasetItem, IDataset from datumaro.components.errors import ( AnnotationTypeError, @@ -60,8 +64,8 @@ UndefinedAttribute, UndefinedLabel, ) -from datumaro.components.media import Image, TableRow -from datumaro.components.transformer import ItemTransform, Transform +from datumaro.components.media import Image, TableRow, VideoFrame +from datumaro.components.transformer import ItemTransform, TabularTransform, Transform from datumaro.util import NOTSET, filter_dict, parse_json_file, parse_str_enum_value, take_by from datumaro.util.annotation_util import find_group_leader, find_instances from datumaro.util.tabular_util import emoji_pattern @@ -592,12 +596,94 @@ def __iter__(self): class IdFromImageName(ItemTransform, CliPlugin): """ - Renames items in the dataset using image file name (without extension). + Renames items in the dataset based on the image file name, excluding the extension.|n + When 'ensure_unique' is enabled, a random suffix is appened to ensure each identifier is unique + in cases where the image name is not distinct. By default, the random suffix is three characters long, + but this can be adjusted with the 'suffix_length' parameter.|n + |n + Examples:|n + |n + |s|s- Renames items without duplication check:|n + + .. code-block:: + + |s|s|s|s%(prog)s|n + |n + |s|s- Renames items with duplication check:|n + + .. code-block:: + + |s|s|s|s%(prog)s --ensure_unique|n + |n + |s|s- Renames items with duplication check and alters the suffix length(default: 3):|n + + .. code-block:: + + |s|s|s|s%(prog)s --ensure_unique --suffix_length 2 """ + DEFAULT_RETRY = 1000 + SUFFIX_LETTERS = string.ascii_lowercase + string.digits + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument( + "-u", + "--ensure_unique", + action="store_true", + help="Appends a random suffix to ensure each identifier is unique if the image name is duplicated.", + ) + parser.add_argument( + "-l", + "--suffix_length", + type=int, + default=3, + help="Alters the length of the random suffix if the 'ensure_unique' is enabled.", + ) + + return parser + + def __init__(self, extractor, ensure_unique: bool = False, suffix_length: int = 3): + super().__init__(extractor) + self._length = "parent" + self._ensure_unique = ensure_unique + self._names: set[str] = set() + self._suffix_length = suffix_length + if suffix_length <= 0: + raise ValueError( + f"The 'suffix_length' must be greater than 0. Received: {suffix_length}." + ) + self._max_retry = min( + self.DEFAULT_RETRY, pow(len(self.SUFFIX_LETTERS), self._suffix_length) + ) + + def _add_unique_suffix(self, name): + count = 0 + while name in self._names: + suffix = "".join( + random.choices(self.SUFFIX_LETTERS, k=self._suffix_length) # nosec B311 + ) + new_name = f"{name}__{suffix}" + if new_name not in self._names: + name = new_name + break + count += 1 + if count == self._max_retry: + raise Exception( + f"Too many duplicate names. Failed to generate a unique suffix after {self._max_retry} attempts." + ) + + self._names.add(name) + return name + def transform_item(self, item): if isinstance(item.media, Image) and hasattr(item.media, "path"): name = osp.splitext(osp.basename(item.media.path))[0] + if isinstance(item.media, VideoFrame): + name += f"_frame-{item.media.index}" + if self._ensure_unique: + name = self._add_unique_suffix(name) return self.wrap_item(item, id=name) else: log.debug("Can't change item id for item '%s': " "item has no path info" % item.id) @@ -1348,9 +1434,21 @@ def transform_item(self, item: DatasetItem): class Correct(Transform, CliPlugin): """ - Correct the dataset from a validation report. - A user can should feed into validation_reports.json from validator to correct the dataset. - This helps to refine the dataset by rejecting undefined labels, missing annotations, and outliers. + This class provides functionality to correct and refine a dataset based on a validation report.|n + It processes a validation report (typically in JSON format) to identify and rectify various |n + dataset issues, such as undefined labels, missing annotations, outliers, empty labels/captions,|n + and unnecessary characters in captions. The correction process includes:|n + |n + - Adding missing labels and attributes.|n + - Removing or adjusting annotations with invalid or anomalous values.|n + - Filling in missing labels and captions with appropriate values.|n + - Removing unnecessary characters from text-based annotations like captions.|n + - Handling outliers by capping values within specified bounds.|n + - Updating dataset categories and annotations according to the corrections.|n + |n + The class is designed to be used as part of a command-line interface (CLI) and can be |n + configured with different validation reports. It integrates with the dataset extraction |n + process, ensuring that corrections are applied consistently across the dataset.|n """ @classmethod @@ -1746,13 +1844,15 @@ def __iter__(self): class AstypeAnnotations(ItemTransform): """ - Enables the conversion of annotation types for the categories and individual items within a dataset.|n + Converts the types of annotations within a dataset based on a specified mapping.|n |n - Based on a specified mapping, it transforms the annotation types,|n - changing them to 'Label' if they are categorical, and to 'Caption' if they are of type string, float, or integer.|n + This transform changes annotations to 'Label' if they are categorical, and to 'Caption' + if they are of type string, float, or integer. This is particularly useful when working + with tabular data that needs to be converted into a format suitable for specific machine + learning tasks.|n |n Examples:|n - - Convert type of `title` annotation|n + - Converts the type of a `title` annotation:|n .. code-block:: @@ -1847,7 +1947,7 @@ def transform_item(self, item: DatasetItem): return self.wrap_item(item, annotations=annotations) -class Clean(ItemTransform): +class Clean(TabularTransform): """ A class used to refine the media items in a dataset.|n |n @@ -1866,8 +1966,10 @@ class Clean(ItemTransform): def __init__( self, extractor: IDataset, + batch_size: int = 1, + num_workers: int = 0, ): - super().__init__(extractor) + super().__init__(extractor, batch_size, num_workers) self._outlier_value = {} self._missing_value = {} @@ -1957,7 +2059,7 @@ def refine_tabular_media(self, item): or item.media.table.dtype(col) is int ] - df[str_cols] = df[str_cols].applymap(lambda x: self.remove_unnecessary_char(x)) + df[str_cols] = df[str_cols].map(lambda x: self.remove_unnecessary_char(x)) if not (self._outlier_value): self.check_outlier(media.table.data[float_cols + int_cols], float_cols + int_cols) @@ -2004,3 +2106,64 @@ def transform_item(self, item): refined_annotations.append(ann) return self.wrap_item(item, media=refined_media, annotations=refined_annotations) + + +class PseudoLabeling(ItemTransform): + """ + A class used to assign pseudo-labels to items in a dataset based on + their similarity to predefined labels.|n + |n + This class leverages hashing techniques to compute the similarity + between dataset items and a set of predefined labels.|n + It assigns the most similar label as a pseudo-label to each item. + This is particularly useful in semi-supervised + learning scenarios where some labels are missing or uncertain.|n + |n + Attributes:|n + - extractor : IDataset|n + The dataset extractor that provides access to dataset items and their annotations.|n + - labels : Optional[List[str]]|n + A list of label names to be used for pseudo-labeling. + If not provided, all available labels in the dataset will be used.|n + - explorer : Optional[Explorer]|n + An optional Explorer object used to compute hash keys for items and labels. + If not provided, a new Explorer will be created.|n + """ + + def __init__( + self, + extractor: IDataset, + labels: Optional[List[str]] = None, + explorer: Optional[Explorer] = None, + ): + super().__init__(extractor) + + self._categories = self._extractor.categories() + self._labels = labels + self._explorer = explorer + self._label_indices = self._categories[AnnotationType.label]._indices + + if not self._labels: + self._labels = list(self._label_indices.keys()) + if not self._explorer: + self._explorer = Explorer(Dataset.from_iterable(list(self._extractor))) + + label_hashkeys = [ + np.unpackbits(self._explorer._get_hash_key_from_text_query(label).hash_key, axis=-1) + for label in self._labels + ] + self._label_hashkeys = np.stack(label_hashkeys, axis=0) + + def categories(self): + return self._categories + + def transform_item(self, item: DatasetItem): + hashkey_ = np.unpackbits(self._explorer._get_hash_key_from_item_query(item).hash_key) + logits = calculate_hamming(hashkey_, self._label_hashkeys) + inverse_distances = 1.0 / (logits + 1e-6) + probs = inverse_distances / np.sum(inverse_distances) + ind = np.argsort(probs)[::-1] + + pseudo = np.array(self._labels)[ind][0] + pseudo_annotation = [Label(label=self._label_indices[pseudo])] + return self.wrap_item(item, annotations=pseudo_annotation) diff --git a/src/datumaro/version.py b/src/datumaro/version.py index b3de187d2e..7972985981 100644 --- a/src/datumaro/version.py +++ b/src/datumaro/version.py @@ -1 +1 @@ -__version__ = "1.9.0rc0" +__version__ = "1.10.0rc1" diff --git a/tests/assets/explore_dataset/bird/ILSVRC2012_val_00003676.JPEG b/tests/assets/explore_dataset/bird/0.JPEG similarity index 100% rename from tests/assets/explore_dataset/bird/ILSVRC2012_val_00003676.JPEG rename to tests/assets/explore_dataset/bird/0.JPEG diff --git a/tests/assets/explore_dataset/bird/1.JPEG b/tests/assets/explore_dataset/bird/1.JPEG new file mode 100755 index 0000000000..8a0ed69866 Binary files /dev/null and b/tests/assets/explore_dataset/bird/1.JPEG differ diff --git a/tests/assets/explore_dataset/bird/2.JPEG b/tests/assets/explore_dataset/bird/2.JPEG new file mode 100755 index 0000000000..8a0ed69866 Binary files /dev/null and b/tests/assets/explore_dataset/bird/2.JPEG differ diff --git a/tests/assets/explore_dataset/bird/3.JPEG b/tests/assets/explore_dataset/bird/3.JPEG new file mode 100755 index 0000000000..8a0ed69866 Binary files /dev/null and b/tests/assets/explore_dataset/bird/3.JPEG differ diff --git a/tests/assets/explore_dataset/bird/4.JPEG b/tests/assets/explore_dataset/bird/4.JPEG new file mode 100755 index 0000000000..8a0ed69866 Binary files /dev/null and b/tests/assets/explore_dataset/bird/4.JPEG differ diff --git a/tests/assets/explore_dataset/bird/5.JPEG b/tests/assets/explore_dataset/bird/5.JPEG new file mode 100755 index 0000000000..8a0ed69866 Binary files /dev/null and b/tests/assets/explore_dataset/bird/5.JPEG differ diff --git a/tests/assets/explore_dataset/bird/ILSVRC2012_val_00001563.JPEG b/tests/assets/explore_dataset/bird/ILSVRC2012_val_00001563.JPEG deleted file mode 100755 index 06fad85759..0000000000 Binary files a/tests/assets/explore_dataset/bird/ILSVRC2012_val_00001563.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/bird/ILSVRC2012_val_00019750.JPEG b/tests/assets/explore_dataset/bird/ILSVRC2012_val_00019750.JPEG deleted file mode 100755 index 1367daaab7..0000000000 Binary files a/tests/assets/explore_dataset/bird/ILSVRC2012_val_00019750.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/bird/ILSVRC2012_val_00033708.JPEG b/tests/assets/explore_dataset/bird/ILSVRC2012_val_00033708.JPEG deleted file mode 100755 index 3d52b00ff1..0000000000 Binary files a/tests/assets/explore_dataset/bird/ILSVRC2012_val_00033708.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/bird/ILSVRC2012_val_00044891.JPEG b/tests/assets/explore_dataset/bird/ILSVRC2012_val_00044891.JPEG deleted file mode 100755 index ef4e8e8863..0000000000 Binary files a/tests/assets/explore_dataset/bird/ILSVRC2012_val_00044891.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/bird/ILSVRC2012_val_00045503.JPEG b/tests/assets/explore_dataset/bird/ILSVRC2012_val_00045503.JPEG deleted file mode 100755 index 564b29da5b..0000000000 Binary files a/tests/assets/explore_dataset/bird/ILSVRC2012_val_00045503.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/cat/ILSVRC2012_val_00024500.JPEG b/tests/assets/explore_dataset/cat/0.JPEG similarity index 100% rename from tests/assets/explore_dataset/cat/ILSVRC2012_val_00024500.JPEG rename to tests/assets/explore_dataset/cat/0.JPEG diff --git a/tests/assets/explore_dataset/cat/1.JPEG b/tests/assets/explore_dataset/cat/1.JPEG new file mode 100755 index 0000000000..1f0266df0e Binary files /dev/null and b/tests/assets/explore_dataset/cat/1.JPEG differ diff --git a/tests/assets/explore_dataset/cat/2.JPEG b/tests/assets/explore_dataset/cat/2.JPEG new file mode 100755 index 0000000000..1f0266df0e Binary files /dev/null and b/tests/assets/explore_dataset/cat/2.JPEG differ diff --git a/tests/assets/explore_dataset/cat/3.JPEG b/tests/assets/explore_dataset/cat/3.JPEG new file mode 100755 index 0000000000..1f0266df0e Binary files /dev/null and b/tests/assets/explore_dataset/cat/3.JPEG differ diff --git a/tests/assets/explore_dataset/cat/4.JPEG b/tests/assets/explore_dataset/cat/4.JPEG new file mode 100755 index 0000000000..1f0266df0e Binary files /dev/null and b/tests/assets/explore_dataset/cat/4.JPEG differ diff --git a/tests/assets/explore_dataset/cat/5.JPEG b/tests/assets/explore_dataset/cat/5.JPEG new file mode 100755 index 0000000000..1f0266df0e Binary files /dev/null and b/tests/assets/explore_dataset/cat/5.JPEG differ diff --git a/tests/assets/explore_dataset/cat/ILSVRC2012_val_00004894.JPEG b/tests/assets/explore_dataset/cat/ILSVRC2012_val_00004894.JPEG deleted file mode 100755 index cdc827fdd1..0000000000 Binary files a/tests/assets/explore_dataset/cat/ILSVRC2012_val_00004894.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/cat/ILSVRC2012_val_00010218.JPEG b/tests/assets/explore_dataset/cat/ILSVRC2012_val_00010218.JPEG deleted file mode 100755 index b5b21d2a90..0000000000 Binary files a/tests/assets/explore_dataset/cat/ILSVRC2012_val_00010218.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/cat/ILSVRC2012_val_00015372.JPEG b/tests/assets/explore_dataset/cat/ILSVRC2012_val_00015372.JPEG deleted file mode 100755 index de06ec2003..0000000000 Binary files a/tests/assets/explore_dataset/cat/ILSVRC2012_val_00015372.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/cat/ILSVRC2012_val_00015898.JPEG b/tests/assets/explore_dataset/cat/ILSVRC2012_val_00015898.JPEG deleted file mode 100755 index 7b9949b2b7..0000000000 Binary files a/tests/assets/explore_dataset/cat/ILSVRC2012_val_00015898.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/cat/ILSVRC2012_val_00049996.JPEG b/tests/assets/explore_dataset/cat/ILSVRC2012_val_00049996.JPEG deleted file mode 100755 index acf01454d6..0000000000 Binary files a/tests/assets/explore_dataset/cat/ILSVRC2012_val_00049996.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/dog/ILSVRC2012_val_00001698.JPEG b/tests/assets/explore_dataset/dog/0.JPEG similarity index 100% rename from tests/assets/explore_dataset/dog/ILSVRC2012_val_00001698.JPEG rename to tests/assets/explore_dataset/dog/0.JPEG diff --git a/tests/assets/explore_dataset/dog/1.JPEG b/tests/assets/explore_dataset/dog/1.JPEG new file mode 100755 index 0000000000..b370f35998 Binary files /dev/null and b/tests/assets/explore_dataset/dog/1.JPEG differ diff --git a/tests/assets/explore_dataset/dog/2.JPEG b/tests/assets/explore_dataset/dog/2.JPEG new file mode 100755 index 0000000000..b370f35998 Binary files /dev/null and b/tests/assets/explore_dataset/dog/2.JPEG differ diff --git a/tests/assets/explore_dataset/dog/3.JPEG b/tests/assets/explore_dataset/dog/3.JPEG new file mode 100755 index 0000000000..b370f35998 Binary files /dev/null and b/tests/assets/explore_dataset/dog/3.JPEG differ diff --git a/tests/assets/explore_dataset/dog/4.JPEG b/tests/assets/explore_dataset/dog/4.JPEG new file mode 100755 index 0000000000..b370f35998 Binary files /dev/null and b/tests/assets/explore_dataset/dog/4.JPEG differ diff --git a/tests/assets/explore_dataset/dog/5.JPEG b/tests/assets/explore_dataset/dog/5.JPEG new file mode 100755 index 0000000000..b370f35998 Binary files /dev/null and b/tests/assets/explore_dataset/dog/5.JPEG differ diff --git a/tests/assets/explore_dataset/dog/ILSVRC2012_val_00002749.JPEG b/tests/assets/explore_dataset/dog/ILSVRC2012_val_00002749.JPEG deleted file mode 100755 index e638550f1b..0000000000 Binary files a/tests/assets/explore_dataset/dog/ILSVRC2012_val_00002749.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/dog/ILSVRC2012_val_00016303.JPEG b/tests/assets/explore_dataset/dog/ILSVRC2012_val_00016303.JPEG deleted file mode 100755 index a655729100..0000000000 Binary files a/tests/assets/explore_dataset/dog/ILSVRC2012_val_00016303.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/dog/ILSVRC2012_val_00021389.JPEG b/tests/assets/explore_dataset/dog/ILSVRC2012_val_00021389.JPEG deleted file mode 100755 index 6411284302..0000000000 Binary files a/tests/assets/explore_dataset/dog/ILSVRC2012_val_00021389.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/dog/ILSVRC2012_val_00036055.JPEG b/tests/assets/explore_dataset/dog/ILSVRC2012_val_00036055.JPEG deleted file mode 100755 index 618835bafb..0000000000 Binary files a/tests/assets/explore_dataset/dog/ILSVRC2012_val_00036055.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/dog/ILSVRC2012_val_00047918.JPEG b/tests/assets/explore_dataset/dog/ILSVRC2012_val_00047918.JPEG deleted file mode 100755 index 78f81dd202..0000000000 Binary files a/tests/assets/explore_dataset/dog/ILSVRC2012_val_00047918.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00016946.JPEG b/tests/assets/explore_dataset/monkey/0.JPEG similarity index 100% rename from tests/assets/explore_dataset/monkey/ILSVRC2012_val_00016946.JPEG rename to tests/assets/explore_dataset/monkey/0.JPEG diff --git a/tests/assets/explore_dataset/monkey/1.JPEG b/tests/assets/explore_dataset/monkey/1.JPEG new file mode 100755 index 0000000000..65dce62178 Binary files /dev/null and b/tests/assets/explore_dataset/monkey/1.JPEG differ diff --git a/tests/assets/explore_dataset/monkey/2.JPEG b/tests/assets/explore_dataset/monkey/2.JPEG new file mode 100755 index 0000000000..65dce62178 Binary files /dev/null and b/tests/assets/explore_dataset/monkey/2.JPEG differ diff --git a/tests/assets/explore_dataset/monkey/3.JPEG b/tests/assets/explore_dataset/monkey/3.JPEG new file mode 100755 index 0000000000..65dce62178 Binary files /dev/null and b/tests/assets/explore_dataset/monkey/3.JPEG differ diff --git a/tests/assets/explore_dataset/monkey/4.JPEG b/tests/assets/explore_dataset/monkey/4.JPEG new file mode 100755 index 0000000000..65dce62178 Binary files /dev/null and b/tests/assets/explore_dataset/monkey/4.JPEG differ diff --git a/tests/assets/explore_dataset/monkey/5.JPEG b/tests/assets/explore_dataset/monkey/5.JPEG new file mode 100755 index 0000000000..65dce62178 Binary files /dev/null and b/tests/assets/explore_dataset/monkey/5.JPEG differ diff --git a/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00004458.JPEG b/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00004458.JPEG deleted file mode 100755 index da5af498ee..0000000000 Binary files a/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00004458.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00021490.JPEG b/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00021490.JPEG deleted file mode 100755 index e4db6fee06..0000000000 Binary files a/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00021490.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00021520.JPEG b/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00021520.JPEG deleted file mode 100755 index 748e0cb50f..0000000000 Binary files a/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00021520.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00044586.JPEG b/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00044586.JPEG deleted file mode 100755 index 3cd8792bf1..0000000000 Binary files a/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00044586.JPEG and /dev/null differ diff --git a/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00047365.JPEG b/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00047365.JPEG deleted file mode 100755 index 7b37be01a4..0000000000 Binary files a/tests/assets/explore_dataset/monkey/ILSVRC2012_val_00047365.JPEG and /dev/null differ diff --git a/tests/assets/kaggle_dataset/image_csv_multi_label/ann.csv b/tests/assets/kaggle_dataset/image_csv_multi_label/ann.csv new file mode 100644 index 0000000000..57b6540a15 --- /dev/null +++ b/tests/assets/kaggle_dataset/image_csv_multi_label/ann.csv @@ -0,0 +1,7 @@ +image_name,dog,cat,person +1.jpg,1,0,0 +2.jpg,0,1,0 +3.jpg,0,0,1 +4.jpg,1,1,0 +5.jpg,1,0,1 +6.jpg,0,1,1 diff --git a/tests/assets/kaggle_dataset/image_csv_multi_label/ann_wo_ext.csv b/tests/assets/kaggle_dataset/image_csv_multi_label/ann_wo_ext.csv new file mode 100644 index 0000000000..dd01be80e0 --- /dev/null +++ b/tests/assets/kaggle_dataset/image_csv_multi_label/ann_wo_ext.csv @@ -0,0 +1,7 @@ +image_name,dog,cat,person +1,1,0,0 +2,0,1,0 +3,0,0,1 +4,1,1,0 +5,1,0,1 +6,0,1,1 diff --git a/tests/assets/kaggle_dataset/image_csv_multi_label/images/1.jpg b/tests/assets/kaggle_dataset/image_csv_multi_label/images/1.jpg new file mode 100644 index 0000000000..8689b95631 Binary files /dev/null and b/tests/assets/kaggle_dataset/image_csv_multi_label/images/1.jpg differ diff --git a/tests/assets/kaggle_dataset/image_csv_multi_label/images/2.jpg b/tests/assets/kaggle_dataset/image_csv_multi_label/images/2.jpg new file mode 100644 index 0000000000..8689b95631 Binary files /dev/null and b/tests/assets/kaggle_dataset/image_csv_multi_label/images/2.jpg differ diff --git a/tests/assets/kaggle_dataset/image_csv_multi_label/images/3.jpg b/tests/assets/kaggle_dataset/image_csv_multi_label/images/3.jpg new file mode 100644 index 0000000000..8689b95631 Binary files /dev/null and b/tests/assets/kaggle_dataset/image_csv_multi_label/images/3.jpg differ diff --git a/tests/assets/kaggle_dataset/image_csv_multi_label/images/4.jpg b/tests/assets/kaggle_dataset/image_csv_multi_label/images/4.jpg new file mode 100644 index 0000000000..8689b95631 Binary files /dev/null and b/tests/assets/kaggle_dataset/image_csv_multi_label/images/4.jpg differ diff --git a/tests/assets/kaggle_dataset/image_csv_multi_label/images/5.jpg b/tests/assets/kaggle_dataset/image_csv_multi_label/images/5.jpg new file mode 100644 index 0000000000..8689b95631 Binary files /dev/null and b/tests/assets/kaggle_dataset/image_csv_multi_label/images/5.jpg differ diff --git a/tests/assets/kaggle_dataset/image_csv_multi_label/images/6.jpg b/tests/assets/kaggle_dataset/image_csv_multi_label/images/6.jpg new file mode 100644 index 0000000000..8689b95631 Binary files /dev/null and b/tests/assets/kaggle_dataset/image_csv_multi_label/images/6.jpg differ diff --git a/tests/assets/kitti_dataset/kitti_3d/calib/000001.txt b/tests/assets/kitti_dataset/kitti_3d/calib/000001.txt new file mode 100644 index 0000000000..367f0b263a --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_3d/calib/000001.txt @@ -0,0 +1,7 @@ +P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 +P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 +P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 +P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 +R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 +Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 +Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 \ No newline at end of file diff --git a/tests/assets/kitti_dataset/kitti_3d/image_2/000001.png b/tests/assets/kitti_dataset/kitti_3d/image_2/000001.png new file mode 100644 index 0000000000..e6f3cff877 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_3d/image_2/000001.png differ diff --git a/tests/assets/kitti_dataset/kitti_3d/label_2/000001.txt b/tests/assets/kitti_dataset/kitti_3d/label_2/000001.txt new file mode 100644 index 0000000000..a2fdc0fa6f --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_3d/label_2/000001.txt @@ -0,0 +1,3 @@ +Truck 0.00 0 -1.57 600 150 630 190 2.85 2.63 12.34 0.47 1.49 69.44 -1.56 +Car 0.00 3 -1.65 650 160 700 200 1.86 0.60 2.02 4.59 1.32 45.84 -1.55 +DontCare -1 -1 -10 500 170 590 190 -1 -1 -1 -1000 -1000 -1000 -10 diff --git a/tests/assets/kitti_dataset/kitti_3d_with_subset/calib/test/000002.txt b/tests/assets/kitti_dataset/kitti_3d_with_subset/calib/test/000002.txt new file mode 100755 index 0000000000..2b8496d5be --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_3d_with_subset/calib/test/000002.txt @@ -0,0 +1,7 @@ +P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 +P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 +P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 +P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 +R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 +Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 +Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 diff --git a/tests/assets/kitti_dataset/kitti_3d_with_subset/calib/train/000000.txt b/tests/assets/kitti_dataset/kitti_3d_with_subset/calib/train/000000.txt new file mode 100755 index 0000000000..108a6b1170 --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_3d_with_subset/calib/train/000000.txt @@ -0,0 +1,7 @@ +P0: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 0.000000000000e+00 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 +P1: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 -3.797842000000e+02 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 +P2: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 4.575831000000e+01 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 -3.454157000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 4.981016000000e-03 +P3: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 -3.341081000000e+02 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 2.330660000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 3.201153000000e-03 +R0_rect: 9.999128000000e-01 1.009263000000e-02 -8.511932000000e-03 -1.012729000000e-02 9.999406000000e-01 -4.037671000000e-03 8.470675000000e-03 4.123522000000e-03 9.999556000000e-01 +Tr_velo_to_cam: 6.927964000000e-03 -9.999722000000e-01 -2.757829000000e-03 -2.457729000000e-02 -1.162982000000e-03 2.749836000000e-03 -9.999955000000e-01 -6.127237000000e-02 9.999753000000e-01 6.931141000000e-03 -1.143899000000e-03 -3.321029000000e-01 +Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 diff --git a/tests/assets/kitti_dataset/kitti_3d_with_subset/calib/val/000001.txt b/tests/assets/kitti_dataset/kitti_3d_with_subset/calib/val/000001.txt new file mode 100755 index 0000000000..2b8496d5be --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_3d_with_subset/calib/val/000001.txt @@ -0,0 +1,7 @@ +P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 +P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 +P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 +P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 +R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 +Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 +Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 diff --git a/tests/assets/kitti_dataset/kitti_3d_with_subset/image_2/test/000002.png b/tests/assets/kitti_dataset/kitti_3d_with_subset/image_2/test/000002.png new file mode 100755 index 0000000000..e6f3cff877 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_3d_with_subset/image_2/test/000002.png differ diff --git a/tests/assets/kitti_dataset/kitti_3d_with_subset/image_2/train/000000.png b/tests/assets/kitti_dataset/kitti_3d_with_subset/image_2/train/000000.png new file mode 100755 index 0000000000..e6f3cff877 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_3d_with_subset/image_2/train/000000.png differ diff --git a/tests/assets/kitti_dataset/kitti_3d_with_subset/image_2/val/000001.png b/tests/assets/kitti_dataset/kitti_3d_with_subset/image_2/val/000001.png new file mode 100755 index 0000000000..e6f3cff877 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_3d_with_subset/image_2/val/000001.png differ diff --git a/tests/assets/kitti_dataset/kitti_3d_with_subset/label_2/test/000002.txt b/tests/assets/kitti_dataset/kitti_3d_with_subset/label_2/test/000002.txt new file mode 100644 index 0000000000..3aca4b3e55 --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_3d_with_subset/label_2/test/000002.txt @@ -0,0 +1,2 @@ +Car 0.88 3 -0.69 0 190 400 380 1.60 1.57 3.23 -2.70 1.74 3.68 -1.29 +DontCare -1 -1 -10 800 160 825 185 -1 -1 -1 -1000 -1000 -1000 -10 diff --git a/tests/assets/kitti_dataset/kitti_3d_with_subset/label_2/train/000000.txt b/tests/assets/kitti_dataset/kitti_3d_with_subset/label_2/train/000000.txt new file mode 100644 index 0000000000..9d035bc092 --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_3d_with_subset/label_2/train/000000.txt @@ -0,0 +1 @@ +Pedestrian 0.00 0 -0.20 700 150 800 300 1.89 0.48 1.20 1.84 1.47 8.41 0.01 diff --git a/tests/assets/kitti_dataset/kitti_3d_with_subset/label_2/val/000001.txt b/tests/assets/kitti_dataset/kitti_3d_with_subset/label_2/val/000001.txt new file mode 100644 index 0000000000..2bac65fc4a --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_3d_with_subset/label_2/val/000001.txt @@ -0,0 +1,2 @@ +Pedestrian 0.00 0 1.94 330 180 360 240 1.87 0.96 0.65 -8.50 2.07 23.02 1.59 +DontCare -1 -1 -10 600 170 620 185 -1 -1 -1 -1000 -1000 -1000 -10 diff --git a/tests/integration/cli/test_kitti_raw_format.py b/tests/integration/cli/test_kitti_raw_format.py index 884cc02708..f7810bc981 100644 --- a/tests/integration/cli/test_kitti_raw_format.py +++ b/tests/integration/cli/test_kitti_raw_format.py @@ -33,13 +33,13 @@ def test_can_convert_to_kitti_raw(self): annotations=[ Cuboid3d( position=[1, 2, 3], - scale=[7.95, -3.62, -1.03], + scale=[-3.62, 7.95, -1.03], label=1, attributes={"occluded": False, "track_id": 1}, ), Cuboid3d( position=[1, 1, 0], - scale=[8.34, 23.01, -0.76], + scale=[23.01, 8.34, -0.76], label=0, attributes={"occluded": False, "track_id": 2}, ), @@ -65,7 +65,7 @@ def test_can_convert_to_kitti_raw(self): annotations=[ Cuboid3d( position=[0, 1, 0], - scale=[8.34, 23.01, -0.76], + scale=[23.01, 8.34, -0.76], rotation=[1, 1, 3], label=0, attributes={"occluded": True, "track_id": 2}, @@ -92,7 +92,7 @@ def test_can_convert_to_kitti_raw(self): annotations=[ Cuboid3d( position=[1, 2, 3], - scale=[-9.41, 13.54, 0.24], + scale=[13.54, -9.41, 0.24], label=1, attributes={"occluded": False, "track_id": 3}, ) diff --git a/tests/unit/components/test_transformer.py b/tests/unit/components/test_transformer.py index 197e6b317a..2055e01fe0 100644 --- a/tests/unit/components/test_transformer.py +++ b/tests/unit/components/test_transformer.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: MIT -from typing import List, Tuple +from typing import List, Optional, Tuple import pytest @@ -11,7 +11,7 @@ from datumaro.components.dataset import Dataset from datumaro.components.dataset_base import DatasetItem from datumaro.components.launcher import Launcher -from datumaro.components.transformer import ModelTransform +from datumaro.components.transformer import ModelTransform, TabularTransform class MockLauncher(Launcher): @@ -64,3 +64,30 @@ def test_model_transform( assert item.annotations == [Annotation(id=0), Annotation(id=1)] else: assert item.annotations == [Annotation(id=1)] + + +class TabularTransformTest: + @pytest.fixture + def fxt_dataset(self): + return Dataset.from_iterable( + [DatasetItem(id=f"item_{i}", annotations=[Annotation(id=0)]) for i in range(10)] + ) + + @pytest.mark.parametrize("batch_size", [1, 10]) + @pytest.mark.parametrize("num_workers", [0, 2]) + def test_tabular_transform(self, fxt_dataset, batch_size, num_workers): + class MockTabularTransform(TabularTransform): + def transform_item(self, item: DatasetItem) -> Optional[DatasetItem]: + # Mock transformation logic + item.annotations.append(Annotation(id=1)) + return item + + transform = MockTabularTransform( + extractor=fxt_dataset, + batch_size=batch_size, + num_workers=num_workers, + ) + + for idx, item in enumerate(transform): + assert item.id == f"item_{idx}" + assert item.annotations == [Annotation(id=0), Annotation(id=1)] diff --git a/tests/unit/data_formats/conftest.py b/tests/unit/data_formats/conftest.py index c4c8c32f95..12351ee037 100644 --- a/tests/unit/data_formats/conftest.py +++ b/tests/unit/data_formats/conftest.py @@ -10,6 +10,8 @@ from datumaro import Dataset +from tests.utils.test_utils import TestDir + @pytest.fixture def fxt_dummy_dataset(): @@ -35,12 +37,12 @@ def fxt_export_kwargs(): @pytest.fixture def fxt_dataset_dir_with_subset_dirs(test_dir: str, request: pytest.FixtureRequest): fxt_dataset_dir = request.param + with TestDir(f"{test_dir}_with_subsets") as new_test_dir: + for subset in ["train", "val", "test"]: + dst = os.path.join(new_test_dir, subset) + shutil.copytree(fxt_dataset_dir, dst) - for subset in ["train", "val", "test"]: - dst = os.path.join(test_dir, subset) - shutil.copytree(fxt_dataset_dir, dst) - - yield test_dir + yield new_test_dir @pytest.fixture diff --git a/tests/unit/data_formats/datumaro/conftest.py b/tests/unit/data_formats/datumaro/conftest.py index e600ae957c..71fc8b1cd0 100644 --- a/tests/unit/data_formats/datumaro/conftest.py +++ b/tests/unit/data_formats/datumaro/conftest.py @@ -15,6 +15,7 @@ AnnotationType, Bbox, Caption, + Cuboid2D, Cuboid3d, Ellipse, Label, @@ -90,7 +91,7 @@ def fxt_test_datumaro_format_dataset(): }, ), Points( - [1, 2, 2, 0, 1, 1], + [1, 2, 0, 0, 1, 1], label=0, id=5, z_order=4, @@ -98,6 +99,7 @@ def fxt_test_datumaro_format_dataset(): "x": 1, "y": "2", }, + visibility=[1, 0, 2], ), Mask( label=3, @@ -122,6 +124,25 @@ def fxt_test_datumaro_format_dataset(): "y": "2", }, ), + Cuboid2D( + [ + (1, 1), + (3, 1), + (3, 3), + (1, 3), + (1.5, 1.5), + (3.5, 1.5), + (3.5, 3.5), + (1.5, 3.5), + ], + label=3, + id=5, + z_order=2, + attributes={ + "x": 1, + "y": "2", + }, + ), ], ), DatasetItem( @@ -201,6 +222,191 @@ def fxt_test_datumaro_format_dataset(): ) +@pytest.fixture +def fxt_test_datumaro_format_dataset_with_path_separator(): + label_categories = LabelCategories(attributes={"a", "b", "score"}) + for i in range(5): + label_categories.add("cat" + str(i), attributes={"x", "y"}) + + mask_categories = MaskCategories(generate_colormap(len(label_categories.items))) + + points_categories = PointsCategories() + for index, _ in enumerate(label_categories.items): + points_categories.add(index, ["cat1", "cat2"], joints=[[0, 1]]) + + sep = os.path.sep + return Dataset.from_iterable( + [ + DatasetItem( + id="100/0", + subset=f"my{sep}train", + media=Image.from_numpy(data=np.ones((10, 6, 3))), + annotations=[ + Caption("hello", id=1), + Caption("world", id=2, group=5), + Label( + 2, + id=3, + attributes={ + "x": 1, + "y": "2", + }, + ), + Bbox( + 1, + 2, + 3, + 4, + label=4, + id=4, + z_order=1, + attributes={ + "score": 1.0, + }, + ), + Bbox( + 5, + 6, + 7, + 8, + id=5, + group=5, + attributes={ + "a": 1.5, + "b": "text", + }, + ), + Points( + [1, 2, 2, 0, 1, 1], + label=0, + id=5, + z_order=4, + attributes={ + "x": 1, + "y": "2", + }, + ), + Mask( + label=3, + id=5, + z_order=2, + image=np.ones((2, 3)), + attributes={ + "x": 1, + "y": "2", + }, + ), + Ellipse( + 5, + 6, + 7, + 8, + label=3, + id=5, + z_order=2, + attributes={ + "x": 1, + "y": "2", + }, + ), + Cuboid2D( + [ + (1, 1), + (3, 1), + (3, 3), + (1, 3), + (1.5, 1.5), + (3.5, 1.5), + (3.5, 3.5), + (1.5, 3.5), + ], + label=3, + id=5, + z_order=2, + attributes={ + "x": 1, + "y": "2", + }, + ), + ], + ), + DatasetItem( + id=21, + media=Image.from_numpy(data=np.ones((10, 6, 3))), + subset="train", + annotations=[ + Caption("test"), + Label(2), + Bbox(1, 2, 3, 4, label=5, id=42, group=42), + ], + ), + DatasetItem( + id=2, + media=Image.from_numpy(data=np.ones((10, 6, 3))), + subset=f"my{sep}val", + annotations=[ + PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1), + Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4), + ], + ), + DatasetItem( + id="1/1", + media=Image.from_numpy(data=np.ones((10, 6, 3))), + subset="test", + annotations=[ + Cuboid3d( + [1.0, 2.0, 3.0], + [2.0, 2.0, 4.0], + [1.0, 3.0, 4.0], + id=6, + label=0, + attributes={"occluded": True}, + group=6, + ) + ], + ), + DatasetItem( + id=42, + media=Image.from_numpy(data=np.ones((10, 6, 3))), + subset=f"my{sep}test", + attributes={"a1": 5, "a2": "42"}, + ), + DatasetItem( + id=42, + media=Image.from_numpy(data=np.ones((10, 6, 3))), + # id and group integer value can be higher than 32bits limits (COCO instances). + annotations=[ + Mask( + id=900100087038, group=900100087038, image=np.ones((2, 3), dtype=np.uint8) + ), + RleMask( + rle=mask_tools.encode(np.ones((2, 3), dtype=np.uint8, order="F")), + id=900100087038, + group=900100087038, + ), + ], + ), + DatasetItem( + id="1/b/c", + media=Image.from_file(path="1/b/c.qq", size=(2, 4)), + ), + ], + categories={ + AnnotationType.label: label_categories, + AnnotationType.mask: mask_categories, + AnnotationType.points: points_categories, + }, + infos={ + "string": "test", + "int": 0, + "float": 0.0, + "string_list": ["test0", "test1", "test2"], + "int_list": [0, 1, 2], + "float_list": [0.0, 0.1, 0.2], + }, + ) + + @pytest.fixture def fxt_test_datumaro_format_video_dataset(test_dir) -> Dataset: video_path = osp.join(test_dir, "video.avi") diff --git a/tests/unit/data_formats/datumaro/test_datumaro_format.py b/tests/unit/data_formats/datumaro/test_datumaro_format.py index 2492c072b6..bb03455b0d 100644 --- a/tests/unit/data_formats/datumaro/test_datumaro_format.py +++ b/tests/unit/data_formats/datumaro/test_datumaro_format.py @@ -14,6 +14,7 @@ from datumaro.components.dataset_base import DatasetItem from datumaro.components.environment import Environment +from datumaro.components.errors import PathSeparatorInSubsetNameError from datumaro.components.importer import DatasetImportError from datumaro.components.media import Image from datumaro.components.project import Dataset @@ -155,6 +156,31 @@ def test_can_save_and_load( stream=stream, ) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + @pytest.mark.parametrize("require_media", [True, False]) + @pytest.mark.parametrize("stream", [True, False]) + def test_cannot_export_dataset_with_subset_containing_path_separators( + self, + fxt_test_datumaro_format_dataset_with_path_separator, + test_dir, + fxt_import_kwargs, + fxt_export_kwargs, + stream, + require_media, + helper_tc, + ): + with pytest.raises(PathSeparatorInSubsetNameError): + self._test_save_and_load( + helper_tc, + fxt_test_datumaro_format_dataset_with_path_separator, + partial(self.exporter.convert, save_media=True, stream=stream, **fxt_export_kwargs), + test_dir, + compare=compare_datasets, + require_media=require_media, + importer_args=fxt_import_kwargs, + stream=stream, + ) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_export_video_only_once( self, diff --git a/tests/unit/data_formats/test_common_semantic_segmentation_format.py b/tests/unit/data_formats/test_common_semantic_segmentation_format.py index b7297e4885..1d03e9ba01 100644 --- a/tests/unit/data_formats/test_common_semantic_segmentation_format.py +++ b/tests/unit/data_formats/test_common_semantic_segmentation_format.py @@ -2,8 +2,10 @@ # # SPDX-License-Identifier: MIT +import os +import shutil from collections import OrderedDict -from typing import Any, Dict, Optional +from typing import Any, Dict import numpy as np import pytest @@ -11,6 +13,7 @@ from datumaro.components.annotation import Mask from datumaro.components.dataset import Dataset from datumaro.components.dataset_base import DatasetItem +from datumaro.components.errors import DatasetImportError from datumaro.components.media import Image from datumaro.plugins.data_formats.common_semantic_segmentation import ( CommonSemanticSegmentationImporter, @@ -143,6 +146,40 @@ def test_can_import( fxt_dataset_dir, fxt_expected_dataset, fxt_import_kwargs, request ) + @pytest.mark.parametrize( + ["fxt_dataset_dir", "fxt_expected_dataset", "fxt_import_kwargs"], + [ + (DUMMY_DATASET_DIR, "fxt_dataset", {}), + ( + DUMMY_NON_STANDARD_DATASET_DIR, + "fxt_non_standard_dataset", + {"image_prefix": "image_", "mask_prefix": "gt_"}, + ), + ], + indirect=["fxt_expected_dataset"], + ids=IDS, + ) + def test_cannot_import_nested( + self, + fxt_dataset_dir: str, + fxt_expected_dataset: Dataset, + fxt_import_kwargs: Dict[str, Any], + request: pytest.FixtureRequest, + test_dir: str, + ): + shutil.copytree(fxt_dataset_dir, test_dir, dirs_exist_ok=True) + subdir_name = "subdir" + subdir = os.path.join(test_dir, subdir_name) + os.makedirs(subdir) + for _file in os.listdir(test_dir): + if _file != subdir_name: + file_path = os.path.join(test_dir, _file) + shutil.move(file_path, subdir) + with pytest.raises(DatasetImportError) as exc_info: + super().test_can_import(test_dir, fxt_expected_dataset, fxt_import_kwargs, request) + assert exc_info.value.__cause__ is not None + assert isinstance(exc_info.value.__cause__, FileNotFoundError) + class CommonSemanticSegmentationWithSubsetDirsImporterTest(TestDataFormatBase): IMPORTER = CommonSemanticSegmentationWithSubsetDirsImporter diff --git a/tests/unit/data_formats/test_kaggle.py b/tests/unit/data_formats/test_kaggle.py index 90071c71fc..262c741e74 100644 --- a/tests/unit/data_formats/test_kaggle.py +++ b/tests/unit/data_formats/test_kaggle.py @@ -20,6 +20,9 @@ from tests.utils.test_utils import compare_datasets DUMMY_DATASET_IMAGE_CSV_DIR = get_test_asset_path("kaggle_dataset", "image_csv") +DUMMY_DATASET_IMAGE_CSV_MULTI_LB_DIR = get_test_asset_path( + "kaggle_dataset", "image_csv_multi_label" +) DUMMY_DATASET_IMAGE_CSV_DET_DIR = get_test_asset_path("kaggle_dataset", "image_csv_det") DUMMY_DATASET_IMAGE_TXT_DIR = get_test_asset_path("kaggle_dataset", "image_txt") DUMMY_DATASET_IMAGE_TXT_DET_DIR = get_test_asset_path("kaggle_dataset", "image_txt_det") @@ -72,6 +75,51 @@ def fxt_img_dataset() -> Dataset: ) +@pytest.fixture +def fxt_img_multi_label_dataset() -> Dataset: + return Dataset.from_iterable( + [ + DatasetItem( + id="1", + subset="default", + media=Image.from_numpy(data=np.ones((5, 10, 3))), + annotations=[Label(label=0)], + ), + DatasetItem( + id="2", + subset="default", + media=Image.from_numpy(data=np.ones((5, 10, 3))), + annotations=[Label(label=1)], + ), + DatasetItem( + id="3", + subset="default", + media=Image.from_numpy(data=np.ones((5, 10, 3))), + annotations=[Label(label=2)], + ), + DatasetItem( + id="4", + subset="default", + media=Image.from_numpy(data=np.ones((5, 10, 3))), + annotations=[Label(label=0), Label(label=1)], + ), + DatasetItem( + id="5", + subset="default", + media=Image.from_numpy(data=np.ones((5, 10, 3))), + annotations=[Label(label=0), Label(label=2)], + ), + DatasetItem( + id="6", + subset="default", + media=Image.from_numpy(data=np.ones((5, 10, 3))), + annotations=[Label(label=1), Label(label=2)], + ), + ], + categories=["dog", "cat", "person"], + ) + + @pytest.fixture def fxt_img_det_dataset() -> Dataset: return Dataset.from_iterable( @@ -321,6 +369,8 @@ def fxt_coco_dataset() -> Dataset: IDS = [ "IMAGE_CSV", "IMAGE_CSV_WO_EXT", + "IMAGE_CSV_MULTI_LB", + "IMAGE_CSV_MULTI_LB_WO_EXT", "IMAGE_CSV_DET", "IMAGE_CSV_DET2", "IMAGE_CSV_DET3", @@ -372,6 +422,26 @@ def test_can_detect(self, fxt_dataset_dir: str): "columns": {"media": "image_name", "label": "label_name"}, }, ), + ( + DUMMY_DATASET_IMAGE_CSV_MULTI_LB_DIR, + "images", + "fxt_img_multi_label_dataset", + KaggleImageCsvBase, + { + "ann_file": osp.join(DUMMY_DATASET_IMAGE_CSV_MULTI_LB_DIR, "ann.csv"), + "columns": {"media": "image_name", "label": ["dog", "cat", "person"]}, + }, + ), + ( + DUMMY_DATASET_IMAGE_CSV_MULTI_LB_DIR, + "images", + "fxt_img_multi_label_dataset", + KaggleImageCsvBase, + { + "ann_file": osp.join(DUMMY_DATASET_IMAGE_CSV_MULTI_LB_DIR, "ann_wo_ext.csv"), + "columns": {"media": "image_name", "label": ["dog", "cat", "person"]}, + }, + ), ( DUMMY_DATASET_IMAGE_CSV_DET_DIR, "images", diff --git a/tests/unit/operations/test_statistics.py b/tests/unit/operations/test_statistics.py index bb92c53308..7f28be820a 100644 --- a/tests/unit/operations/test_statistics.py +++ b/tests/unit/operations/test_statistics.py @@ -10,7 +10,16 @@ import numpy as np import pytest -from datumaro.components.annotation import Bbox, Caption, Ellipse, Label, Mask, Points, RotatedBbox +from datumaro.components.annotation import ( + Bbox, + Caption, + Cuboid2D, + Ellipse, + Label, + Mask, + Points, + RotatedBbox, +) from datumaro.components.dataset import Dataset from datumaro.components.dataset_base import DatasetItem from datumaro.components.errors import DatumaroError @@ -232,6 +241,25 @@ def test_stats(self): "tiny": True, }, ), + Cuboid2D( + [ + (1, 1), + (3, 1), + (3, 3), + (1, 3), + (1.5, 1.5), + (3.5, 1.5), + (3.5, 3.5), + (1.5, 3.5), + ], + label=3, + id=5, + z_order=2, + attributes={ + "x": 1, + "y": "2", + }, + ), ], ), DatasetItem(id=3), @@ -242,7 +270,7 @@ def test_stats(self): expected = { "images count": 4, - "annotations count": 12, + "annotations count": 13, "unannotated images count": 2, "unannotated images": ["3", "2.2"], "annotations by type": { @@ -277,33 +305,34 @@ def test_stats(self): "hash_key": {"count": 0}, "feature_vector": {"count": 0}, "tabular": {"count": 0}, + "cuboid_2d": {"count": 1}, "unknown": {"count": 0}, }, "annotations": { "labels": { - "count": 6, + "count": 7, "distribution": { - "label_0": [1, 1 / 6], + "label_0": [1, 1 / 7], "label_1": [0, 0.0], - "label_2": [3, 3 / 6], - "label_3": [2, 2 / 6], + "label_2": [3, 3 / 7], + "label_3": [3, 3 / 7], }, "attributes": { "x": { - "count": 2, # annotations with no label are skipped + "count": 3, # annotations with no label are skipped "values count": 2, "values present": ["1", "2"], "distribution": { - "1": [1, 1 / 2], - "2": [1, 1 / 2], + "1": [2, 2 / 3], + "2": [1, 1 / 3], }, }, "y": { - "count": 2, # annotations with no label are skipped + "count": 3, # annotations with no label are skipped "values count": 1, "values present": ["2"], "distribution": { - "2": [2, 2 / 2], + "2": [3, 3 / 3], }, }, # must not include "special" attributes like "occluded" @@ -403,6 +432,7 @@ def _get_stats_template(label_names: list): "feature_vector": {"count": 0}, "tabular": {"count": 0}, "rotated_bbox": {"count": 0}, + "cuboid_2d": {"count": 0}, "unknown": {"count": 0}, }, "annotations": { diff --git a/tests/unit/test_annotation.py b/tests/unit/test_annotation.py index ddcd718bf7..d1a6824e94 100644 --- a/tests/unit/test_annotation.py +++ b/tests/unit/test_annotation.py @@ -13,6 +13,7 @@ from datumaro.components.annotation import ( Annotations, + Cuboid2D, Ellipse, ExtractedMask, HashKey, @@ -45,7 +46,7 @@ def test_get_points(self, fxt_ellipses: List[Ellipse]): class HashKeyTest: @pytest.fixture def fxt_hashkeys_same(self): - hash_key = np.random.randint(0, 256, size=(64,), dtype=np.uint8) + hash_key = np.random.randint(0, 256, size=(96,), dtype=np.uint8) hashkey1 = HashKey(hash_key=hash_key) hashkey2 = HashKey(hash_key=hash_key) return hashkey1, hashkey2 @@ -53,8 +54,8 @@ def fxt_hashkeys_same(self): @pytest.fixture def fxt_hashkeys_diff(self): np.random.seed(3003) - hashkey1 = HashKey(hash_key=np.random.randint(0, 256, size=(64,), dtype=np.uint8)) - hashkey2 = HashKey(hash_key=np.random.randint(0, 256, size=(64,), dtype=np.uint8)) + hashkey1 = HashKey(hash_key=np.random.randint(0, 256, size=(96,), dtype=np.uint8)) + hashkey2 = HashKey(hash_key=np.random.randint(0, 256, size=(96,), dtype=np.uint8)) return hashkey1, hashkey2 @pytest.mark.parametrize( @@ -62,7 +63,7 @@ def fxt_hashkeys_diff(self): ) def test_compare_hashkey(self, fxt_hashkeys, expected, request): hashkey1, hashkey2 = request.getfixturevalue(fxt_hashkeys) - assert (expected, hashkey1 == hashkey2) + assert (hashkey1 == hashkey2) == expected class RotatedBboxTest: @@ -142,3 +143,66 @@ def test_get_semantic_seg_mask_binary_mask(self, fxt_index_mask, dtype): semantic_seg_mask = annotations.get_semantic_seg_mask(ignore_index=255, dtype=dtype) assert np.allclose(semantic_seg_mask, fxt_index_mask) + + +class Cuboid2DTest: + @pytest.fixture + def fxt_cuboid_2d(self): + return Cuboid2D( + [ + (684.172, 237.810), + (750.486, 237.673), + (803.791, 256.313), + (714.712, 256.542), + (684.035, 174.227), + (750.263, 174.203), + (803.399, 170.990), + (714.476, 171.015), + ], + y_3d=1.49, + ) + + @pytest.fixture + def fxt_kitti_data(self): + dimensions = np.array([1.49, 1.56, 4.34]) + location = np.array([2.51, 1.49, 14.75]) + rot_y = -1.59 + + return dimensions, location, rot_y + + @pytest.fixture + def fxt_P2(self): + return np.array( + [ + [7.215377000000e02, 0.000000000000e00, 6.095593000000e02, 4.485728000000e01], + [0.000000000000e00, 7.215377000000e02, 1.728540000000e02, 2.163791000000e-01], + [0.000000000000e00, 0.000000000000e00, 1.000000000000e00, 2.745884000000e-03], + ] + ) + + @pytest.fixture + def fxt_velo_to_cam(self): + return np.array( + [ + [7.533745000000e-03, -9.999714000000e-01, -6.166020000000e-04, -4.069766000000e-03], + [1.480249000000e-02, 7.280733000000e-04, -9.998902000000e-01, -7.631618000000e-02], + [9.998621000000e-01, 7.523790000000e-03, 1.480755000000e-02, -2.717806000000e-01], + ] + ) + + def test_create_from_3d(self, fxt_kitti_data, fxt_cuboid_2d, fxt_P2, fxt_velo_to_cam): + actual = Cuboid2D.from_3d( + dim=fxt_kitti_data[0], + location=fxt_kitti_data[1], + rotation_y=fxt_kitti_data[2], + P=fxt_P2, + Tr_velo_to_cam=fxt_velo_to_cam, + ) + + assert np.allclose(actual.points, fxt_cuboid_2d.points, atol=1e-3) + + def test_to_3d(self, fxt_kitti_data, fxt_cuboid_2d, fxt_P2): + P_inv = np.linalg.pinv(fxt_P2) + actual = fxt_cuboid_2d.to_3d(P_inv) + for act, exp in zip(actual, fxt_kitti_data): + assert np.allclose(act, exp, atol=2) diff --git a/tests/unit/test_environment.py b/tests/unit/test_environment.py index 11f08ae386..5fed7cd8ed 100644 --- a/tests/unit/test_environment.py +++ b/tests/unit/test_environment.py @@ -5,7 +5,8 @@ import pytest import datumaro.components.lazy_plugin -from datumaro.components.environment import Environment, PluginRegistry +from datumaro.components.environment import DEFAULT_ENVIRONMENT, Environment, PluginRegistry +from datumaro.components.exporter import Exporter real_find_spec = datumaro.components.lazy_plugin.find_spec @@ -77,3 +78,17 @@ def test_extra_deps_req(self, fxt_tf_failure_env): ) assert "tf_detection_api" not in loaded_plugin_names + + def test_merge_default_env(self): + merged_env = Environment.merge([DEFAULT_ENVIRONMENT, DEFAULT_ENVIRONMENT]) + assert merged_env is DEFAULT_ENVIRONMENT + + def test_merge_custom_env(self): + class TestPlugin(Exporter): + pass + + envs = [Environment(), Environment()] + envs[0].exporters.register("test_plugin", TestPlugin) + + merged = Environment.merge(envs) + assert "test_plugin" in merged.exporters diff --git a/tests/unit/test_explorer.py b/tests/unit/test_explorer.py index dc0ae61e0e..9cf78b5773 100644 --- a/tests/unit/test_explorer.py +++ b/tests/unit/test_explorer.py @@ -1,21 +1,17 @@ -import os.path as osp -from copy import deepcopy -from functools import partial from unittest import TestCase +from unittest.mock import patch import numpy as np from datumaro.components.algorithms.hash_key_inference.explorer import Explorer -from datumaro.components.annotation import AnnotationType, Caption, Label +from datumaro.components.annotation import AnnotationType, HashKey from datumaro.components.dataset import Dataset from datumaro.components.dataset_base import DatasetItem from datumaro.components.errors import MediaTypeError -from datumaro.components.media import Image -from datumaro.plugins.data_formats.datumaro.exporter import DatumaroExporter +from datumaro.util.meta_file_util import load_hash_key from tests.requirements import Requirements, mark_requirement from tests.utils.assets import get_test_asset_path -from tests.utils.test_utils import TestDir class ExplorerTest(TestCase): @@ -171,3 +167,71 @@ def test_pointcloud_assert(self): with self.assertRaises(MediaTypeError) as capture: Explorer(imported_dataset) self.assertIn("PointCloud", str(capture.exception)) + + +class MetaFileTest(TestCase): + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_no_hashkey_dir(self): + """ + Test that the function returns the original dataset if the hashkey directory doesn't exist. + """ + dataset = [DatasetItem(id="000001", subset="test")] + with patch("os.path.isdir") as mock_isdir: + mock_isdir.return_value = False + result = load_hash_key("invalid_path", dataset) + self.assertEqual(result, dataset) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_no_hashkey_file(self): + """ + Test that the function returns the original dataset if the hashkey file doesn't exist. + """ + dataset = [DatasetItem(id="000001", subset="test")] + with patch("os.path.isdir") as mock_isdir, patch( + "datumaro.util.meta_file_util.has_hashkey_file" + ) as mock_has_hashkey_file: + mock_isdir.return_value = True + mock_has_hashkey_file.return_value = False + result = load_hash_key("hashkey_dir", dataset) + self.assertEqual(result, dataset) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_load_hash_key(self): + """ + Test that the function successfully parses the hashkey file and adds HashKey annotations to the dataset items. + """ + dataset = [ + DatasetItem(id="000001", subset="train", annotations=[]), + DatasetItem(id="000002", subset="val", annotations=[]), + ] + expected_hashkey1 = np.ones((96,), dtype=np.uint8) + expected_hashkey2 = np.zeros((96,), dtype=np.uint8) + hashkey_dict = { + "train/000001": expected_hashkey1.tolist(), + "val/000002": expected_hashkey2.tolist(), + } + + with patch("os.path.isdir") as mock_isdir, patch( + "datumaro.util.meta_file_util.has_hashkey_file" + ) as mock_has_hashkey_file, patch( + "datumaro.util.meta_file_util.parse_hashkey_file" + ) as mock_parse_hashkey_file: + mock_isdir.return_value = True + mock_has_hashkey_file.return_value = True + mock_parse_hashkey_file.return_value = hashkey_dict + + result = load_hash_key("hashkey_dir", dataset) + + self.assertEqual(len(result), len(dataset)) + self.assertEqual(result[0].id, dataset[0].id) + self.assertEqual(result[0].subset, dataset[0].subset) + + # Check if HashKey annotations are added + self.assertEqual(len(result[0].annotations), 1) + self.assertIsInstance(result[0].annotations[0], HashKey) + self.assertTrue(np.array_equal(result[0].annotations[0].hash_key, expected_hashkey1)) + + # Check if HashKey annotations are added for the second item as well + self.assertEqual(len(result[1].annotations), 1) + self.assertIsInstance(result[1].annotations[0], HashKey) + self.assertTrue(np.array_equal(result[1].annotations[0].hash_key, expected_hashkey2)) diff --git a/tests/unit/test_framework_converter.py b/tests/unit/test_framework_converter.py index 0933884293..83fd9a97c5 100644 --- a/tests/unit/test_framework_converter.py +++ b/tests/unit/test_framework_converter.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2023-2024 Intel Corporation # # SPDX-License-Identifier: MIT @@ -13,14 +13,16 @@ from datumaro.components.annotation import ( AnnotationType, Bbox, + Caption, Label, LabelCategories, Mask, Polygon, + Tabular, ) from datumaro.components.dataset import Dataset from datumaro.components.dataset_base import DatasetItem -from datumaro.components.media import Image +from datumaro.components.media import Image, Table, TableRow from datumaro.plugins.framework_converter import ( TASK_ANN_TYPE, DmTfDataset, @@ -36,6 +38,8 @@ try: import torch + from torchtext.data.utils import get_tokenizer + from torchtext.vocab import build_vocab_from_iterator from torchvision import datasets, transforms except ImportError: TORCH_AVAILABLE = False @@ -142,6 +146,89 @@ def fxt_dataset(): ) +@pytest.fixture +def fxt_tabular_label_dataset(): + table = Table.from_list( + [ + { + "label": 1, + "text": "I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered " + "controversial" + " I really had to see this for myself.

The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.

What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far between, even then it's not shot like some cheaply made porno. While my countrymen mind find it shocking, in reality sex and nudity are a major staple in Swedish cinema. Even Ingmar Bergman, arguably their answer to good old boy John Ford, had sex scenes in his films.

I do commend the filmmakers for the fact that any sex shown in the film is shown for artistic purposes rather than just to shock people and make money to be shown in pornographic theaters in America. I AM CURIOUS-YELLOW is a good film for anyone wanting to study the meat and potatoes (no pun intended) of Swedish cinema. But really, this film doesn't have much of a plot.", + } + ] + ) + return Dataset.from_iterable( + [ + DatasetItem( + id=0, + subset="train", + media=TableRow(table=table, index=0), + annotations=[Label(id=0, attributes={}, group=0, object_id=-1, label=0)], + ) + ], + categories={ + AnnotationType.label: LabelCategories.from_iterable( + [("label:1", "label"), ("label:2", "label")] + ) + }, + media_type=TableRow, + ) + + +@pytest.fixture +def fxt_tabular_caption_dataset(): + table = Table.from_list( + [ + { + "source": "Zwei junge weiße Männer sind im Freien in der Nähe vieler Büsche.", + "target": "Two young, White males are outside near many bushes.", + } + ] + ) + return Dataset.from_iterable( + [ + DatasetItem( + id=0, + subset="train", + media=TableRow(table=table, index=0), + annotations=[ + Caption("target:Two young, White males are outside near many bushes.") + ], + ) + ], + categories={}, + media_type=TableRow, + ) + + +@pytest.fixture +def fxt_dummy_tokenizer(): + def dummy_tokenizer(text): + return text.split() + + return dummy_tokenizer + + +@pytest.fixture +def data_iter(): + return [(1, "This is a sample text"), (2, "Another sample text")] + + +@pytest.fixture +def fxt_dummy_vocab(fxt_dummy_tokenizer, data_iter): + vocab = build_vocab_from_iterator( + map(fxt_dummy_tokenizer, (text for _, text in data_iter)), specials=[""] + ) + vocab.set_default_index(vocab[""]) + return vocab + + +@pytest.fixture +def fxt_tabular_fixture(fxt_dummy_tokenizer, fxt_dummy_vocab): + return {"target": {"input": "text"}, "tokenizer": fxt_dummy_tokenizer, "vocab": fxt_dummy_vocab} + + @pytest.mark.new @mark_requirement(Requirements.DATUM_GENERAL_REQ) class FrameworkConverterFactoryTest(TestCase): @@ -173,38 +260,49 @@ def test_create_converter_tf_importerror(self): @mark_requirement(Requirements.DATUM_GENERAL_REQ) class MultiframeworkConverterTest: @pytest.mark.parametrize( - "fxt_subset,fxt_task", + "fxt_dataset_type,fxt_subset,fxt_task", [ ( + "fxt_dataset", "train", "classification", ), ( + "fxt_dataset", "val", "multilabel_classification", ), ( + "fxt_dataset", "train", "detection", ), ( + "fxt_dataset", "val", "instance_segmentation", ), ( + "fxt_dataset", "train", "semantic_segmentation", ), + ("fxt_tabular_label_dataset", "train", "tabular"), ], ) - def test_multi_framework_dataset(self, fxt_dataset: Dataset, fxt_subset: str, fxt_task: str): + def test_multi_framework_dataset( + self, fxt_dataset_type: str, fxt_subset: str, fxt_task: str, request + ): + dataset = request.getfixturevalue(fxt_dataset_type) dm_multi_framework_dataset = _MultiFrameworkDataset( - dataset=fxt_dataset, subset=fxt_subset, task=fxt_task + dataset=dataset, subset=fxt_subset, task=fxt_task ) for idx in range(len(dm_multi_framework_dataset)): image, label = dm_multi_framework_dataset._gen_item(idx) - assert isinstance(image, np.ndarray) + if fxt_task == "tabular": + image = image() + assert isinstance(image, (np.ndarray, dict)) if fxt_task == "classification": assert isinstance(label, int) elif fxt_task == "multilabel_classification": @@ -213,6 +311,8 @@ def test_multi_framework_dataset(self, fxt_dataset: Dataset, fxt_subset: str, fx assert isinstance(label, list) if fxt_task == "semantic_segmentation": assert isinstance(label, np.ndarray) + elif fxt_task == "tabular": + assert isinstance(label, list) @pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch is not installed") @pytest.mark.parametrize( @@ -261,7 +361,6 @@ def test_can_convert_torch_framework( fxt_subset: str, fxt_task: str, fxt_convert_kwargs: Dict[str, Any], - request: pytest.FixtureRequest, ): multi_framework_dataset = FrameworkConverter(fxt_dataset, subset=fxt_subset, task=fxt_task) @@ -294,7 +393,12 @@ def test_can_convert_torch_framework( if ann.type == TASK_ANN_TYPE[fxt_task] ] label = np.sum(masks, axis=0, dtype=np.uint8) - + elif fxt_task == "tabular": + label = [ + ann.as_dict() + for ann in exp_item.annotations + if ann.type in TASK_ANN_TYPE[fxt_task] + ] if fxt_convert_kwargs.get("transform", None): actual = dm_torch_item[0].permute(1, 2, 0).mul(255.0).to(torch.uint8).numpy() assert np.array_equal(image, actual) @@ -374,6 +478,130 @@ def test_can_convert_torch_framework_detection(self): assert torch_ann["bbox"] == [x1, y1, x2 - x1, y2 - y1] assert torch_ann["iscrowd"] == dm_ann["attributes"]["is_crowd"] + @pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch is not installed") + def test_can_convert_torch_framework_tabular_label(self, fxt_tabular_label_dataset): + class IMDBDataset(Dataset): + def __init__(self, data_iter, vocab, transform=None): + self.data = list(data_iter) + self.vocab = vocab + self.transform = transform + self.tokenizer = get_tokenizer("basic_english") + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + label, text = self.data[idx] + token_ids = [self.vocab[token] for token in self.tokenizer(text)] + + if self.transform: + token_ids = self.transform(token_ids) + + return torch.tensor(token_ids, dtype=torch.long), torch.tensor( + label, dtype=torch.long + ) + + # Prepare data and tokenizer + # First item of IMDB + first_item = ( + 1, + "I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered \"controversial\" I really had to see this for myself.

The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.

What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far between, even then it's not shot like some cheaply made porno. While my countrymen mind find it shocking, in reality sex and nudity are a major staple in Swedish cinema. Even Ingmar Bergman, arguably their answer to good old boy John Ford, had sex scenes in his films.

I do commend the filmmakers for the fact that any sex shown in the film is shown for artistic purposes rather than just to shock people and make money to be shown in pornographic theaters in America. I AM CURIOUS-YELLOW is a good film for anyone wanting to study the meat and potatoes (no pun intended) of Swedish cinema. But really, this film doesn't have much of a plot.", + ) + tokenizer = get_tokenizer("basic_english") + + # Build vocabulary + vocab = build_vocab_from_iterator([tokenizer(first_item[1])], specials=[""]) + vocab.set_default_index(vocab[""]) + + # Create torch dataset + torch_dataset = IMDBDataset(iter([first_item]), vocab) + + # Convert to dm_torch_dataset + dm_dataset = fxt_tabular_label_dataset + multi_framework_dataset = FrameworkConverter(dm_dataset, subset="train", task="tabular") + dm_torch_dataset = multi_framework_dataset.to_framework( + framework="torch", target={"input": "text"}, tokenizer=tokenizer, vocab=vocab + ) + + # Verify equality of items in torch_dataset and dm_torch_dataset + label_indices = dm_dataset.categories().get(AnnotationType.label)._indices + torch_item = torch_dataset[0] + dm_item = dm_torch_dataset[0] + assert torch.equal(torch_item[0], dm_item[0]), "Token IDs do not match" + + # Extract and compare labels + torch_item_label = str(torch_item[1].item()) + dm_item_label = list(label_indices.keys())[list(label_indices.values()).index(0)].split( + ":" + )[-1] + assert torch_item_label == dm_item_label, "Labels do not match" + + @pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch is not installed") + def test_can_convert_torch_framework_tabular_caption(self, fxt_tabular_caption_dataset): + class Multi30kDataset(Dataset): + def __init__(self, dataset, src_tokenizer, tgt_tokenizer, src_vocab, tgt_vocab): + self.dataset = list(dataset) + self.src_tokenizer = src_tokenizer + self.tgt_tokenizer = tgt_tokenizer + self.src_vocab = src_vocab + self.tgt_vocab = tgt_vocab + + def __len__(self): + return len(self.dataset) + + def _data_process(self, text, tokenizer, vocab): + tokens = tokenizer(text) + token_ids = [vocab[token] for token in tokens] + return torch.tensor(token_ids, dtype=torch.long) + + def __getitem__(self, idx): + src, tgt = self.dataset[idx] + src_tensor = self._data_process(src, self.src_tokenizer, self.src_vocab) + tgt_tensor = self._data_process(tgt, self.tgt_tokenizer, self.tgt_vocab) + return src_tensor, tgt_tensor + + # Prepare data and tokenizer + # First item of Multi30k + first_item = ( + "Zwei junge weiße Männer sind im Freien in der Nähe vieler Büsche.", + "Two young, White males are outside near many bushes.", + ) + + dummy_tokenizer = str.split + + def build_single_vocab(item, tokenizer, specials): + tokens = tokenizer(item) + vocab = build_vocab_from_iterator([tokens], specials=specials) + vocab.set_default_index(vocab[""]) + return vocab + + # Build vocabularies + specials = ["", "", "", ""] + src_vocab = build_single_vocab(first_item[0], dummy_tokenizer, specials) + tgt_vocab = build_single_vocab(first_item[1], dummy_tokenizer, specials) + + # Create torch dataset + torch_dataset = Multi30kDataset( + iter([first_item]), dummy_tokenizer, dummy_tokenizer, src_vocab, tgt_vocab + ) + + # Convert to dm_torch_dataset + dm_dataset = fxt_tabular_caption_dataset + multi_framework_dataset = FrameworkConverter(dm_dataset, subset="train", task="tabular") + dm_torch_dataset = multi_framework_dataset.to_framework( + framework="torch", + target={"input": "source", "output": "target"}, + tokenizer=(dummy_tokenizer, dummy_tokenizer), + vocab=(src_vocab, tgt_vocab), + ) + + # Verify equality of items in torch_dataset and dm_torch_dataset + torch_item = torch_dataset[0] + dm_item = dm_torch_dataset[0] + + assert torch.equal(torch_item[0], dm_item[0]), "Token IDs for de do not match" + assert torch.equal(torch_item[1], dm_item[1]), "Token IDs for en do not match" + @pytest.mark.skipif(not TF_AVAILABLE, reason="Tensorflow is not installed") @pytest.mark.parametrize( "fxt_subset,fxt_task,fxt_convert_kwargs", diff --git a/tests/unit/test_hashkey.py b/tests/unit/test_hashkey.py index 13b60222d0..da360a8e64 100644 --- a/tests/unit/test_hashkey.py +++ b/tests/unit/test_hashkey.py @@ -46,7 +46,7 @@ def fxt_dataset_dir_with_hash_key(test_dir, fxt_data_format): test_asset_dir = test_asset_dir_map[fxt_data_format] dataset = Dataset.import_from(test_asset_dir, format=fxt_data_format) for item in dataset: - hash_key = HashKey(hash_key=np.random.randint(0, 256, size=(64,), dtype=np.uint8)) + hash_key = HashKey(hash_key=np.random.randint(0, 256, size=(96,), dtype=np.uint8)) item.annotations += [hash_key] if fxt_data_format == "wider_face": diff --git a/tests/unit/test_imagenet_format.py b/tests/unit/test_imagenet_format.py index 6e3ca2abec..5bd8a1c9bb 100644 --- a/tests/unit/test_imagenet_format.py +++ b/tests/unit/test_imagenet_format.py @@ -7,6 +7,7 @@ import pytest from datumaro.components.annotation import AnnotationType, Label, LabelCategories +from datumaro.components.contexts.importer import ImportErrorPolicy from datumaro.components.dataset import Dataset, StreamDataset from datumaro.components.dataset_base import DatasetItem from datumaro.components.environment import Environment @@ -181,6 +182,12 @@ class ImagenetImporterTest: IMPORTER_NAME = ImagenetImporter.NAME def _create_expected_dataset(self): + label_categories = LabelCategories.from_iterable( + ("label_0", "label_1", f"{Path('label_1', 'label_1_1')}") + ) + label_categories[-1].parent = "label_1" + label_categories.add_label_group(name="label_1", labels=["label_1_1"], group_type=0) + return Dataset.from_iterable( [ DatasetItem( @@ -203,18 +210,16 @@ def _create_expected_dataset(self): annotations=[Label(1)], ), ], - categories={ - AnnotationType.label: LabelCategories.from_iterable( - ("label_0", "label_1", f"{Path('label_1', 'label_1_1')}") - ), - }, + categories={AnnotationType.label: label_categories}, ) @mark_requirement(Requirements.DATUM_GENERAL_REQ) @pytest.mark.parametrize("dataset_cls, is_stream", [(Dataset, False), (StreamDataset, True)]) def test_can_import(self, dataset_cls, is_stream, helper_tc): expected_dataset = self._create_expected_dataset() - dataset = dataset_cls.import_from(self.DUMMY_DATASET_DIR, self.IMPORTER_NAME) + dataset = dataset_cls.import_from( + self.DUMMY_DATASET_DIR, self.IMPORTER_NAME, error_policy=ImportErrorPolicy() + ) assert dataset.is_stream == is_stream compare_datasets(helper_tc, expected_dataset, dataset, require_media=True) @@ -240,7 +245,9 @@ class ImagenetWithSubsetDirsImporterTest(ImagenetImporterTest): @mark_requirement(Requirements.DATUM_GENERAL_REQ) @pytest.mark.parametrize("dataset_cls, is_stream", [(Dataset, False), (StreamDataset, True)]) def test_can_import(self, dataset_cls, is_stream, helper_tc): - dataset = dataset_cls.import_from(self.DUMMY_DATASET_DIR, self.IMPORTER_NAME) + dataset = dataset_cls.import_from( + self.DUMMY_DATASET_DIR, self.IMPORTER_NAME, error_policy=ImportErrorPolicy() + ) assert dataset.is_stream == is_stream for subset_name, subset in dataset.subsets().items(): diff --git a/tests/unit/test_kitti_3d_format.py b/tests/unit/test_kitti_3d_format.py new file mode 100644 index 0000000000..3dbadb2507 --- /dev/null +++ b/tests/unit/test_kitti_3d_format.py @@ -0,0 +1,292 @@ +import os.path as osp +from unittest import TestCase + +import numpy as np + +from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories +from datumaro.components.dataset_base import DatasetItem +from datumaro.components.environment import Environment +from datumaro.components.media import Image, PointCloud +from datumaro.components.project import Dataset +from datumaro.plugins.data_formats.kitti_3d.importer import Kitti3dImporter + +from tests.requirements import Requirements, mark_requirement +from tests.utils.assets import get_test_asset_path +from tests.utils.test_utils import compare_datasets_3d + +DUMMY_DATASET_DIR = get_test_asset_path("kitti_dataset", "kitti_3d") +DUMMY_SUBSET_DATASET_DIR = get_test_asset_path("kitti_dataset", "kitti_3d_with_subset") + + +class Kitti3DImporterTest(TestCase): + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_detect(self): + detected_formats = Environment().detect_dataset(DUMMY_DATASET_DIR) + self.assertEqual([Kitti3dImporter.NAME], detected_formats) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_load(self): + """ + Description: + Ensure that the dataset can be loaded correctly from the KITTI3D format. + + Expected results: + The loaded dataset should have the same number of data items as the expected dataset. + The data items in the loaded dataset should have the same attributes and values as the expected data items. + The point clouds and images associated with the data items should be loaded correctly. + + Steps: + 1. Prepare an expected dataset with known data items, point clouds, images, and attributes. + 2. Load the dataset from the KITTI3D format. + 3. Compare the loaded dataset with the expected dataset. + """ + + image1 = Image.from_file(path=osp.join(DUMMY_DATASET_DIR, "image_2", "000001.png")) + + expected_label_cat = LabelCategories( + attributes={"occluded", "truncated", "alpha", "dimensions", "location", "rotation_y"} + ) + expected_label_list = [ + "DontCare", + "Car", + "Pedestrian", + "Van", + "Truck", + "Cyclist", + "Sitter", + "Train", + "Motorcycle", + "Bus", + "Misc", + ] + for label in expected_label_list: + expected_label_cat.add(label) + expected_dataset = Dataset.from_iterable( + [ + DatasetItem( + id="000001", + annotations=[ + Bbox( + 600, # x1 + 150, # y1 + 30, # x2-x1 + 40, # y2-y1 + label=4, + id=0, + attributes={ + "truncated": 0.0, + "occluded": 0, + "alpha": -1.57, + "dimensions": [2.85, 2.63, 12.34], + "location": [0.47, 1.49, 69.44], + "rotation_y": -1.56, + }, + ), + Bbox( + 650, # x1 + 160, # y1 + 50, # x2-x1 + 40, # y2-y1 + label=1, + id=1, + attributes={ + "truncated": 0.0, + "occluded": 3, + "alpha": -1.65, + "dimensions": [1.86, 0.6, 2.02], + "location": [4.59, 1.32, 45.84], + "rotation_y": -1.55, + }, + ), + Bbox( + 500, # x1 + 170, # y1 + 90, # x2-x1 + 20, # y2-y1 + label=0, + id=2, + attributes={ + "truncated": -1.0, + "occluded": -1, + "alpha": -10.0, + "dimensions": [-1.0, -1.0, -1.0], + "location": [-1000.0, -1000.0, -1000.0], + "rotation_y": -10.0, + }, + ), + ], + media=image1, + attributes={"calib_path": osp.join(DUMMY_DATASET_DIR, "calib", "000001.txt")}, + ), + ], + categories={AnnotationType.label: expected_label_cat}, + ) + + parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, "kitti3d") + + compare_datasets_3d(self, expected_dataset, parsed_dataset, require_point_cloud=True) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_load_with_subset(self): + """ + Description: + Ensure that the dataset can be loaded correctly from the KITTI3D format with a specified subset of data items. + + Expected results: + The loaded dataset should contain only the specified subset of data items from the original dataset. + The data items in the loaded dataset should have the same attributes and values as the expected data items. + + Steps: + 1. Prepare an expected dataset with a subset of data items from the original dataset. + 2. Load the dataset from the KITTI3D format, specifying the subset of data items to load. + 3. Compare the loaded dataset with the expected dataset. + """ + expected_label_cat = LabelCategories( + attributes={"occluded", "truncated", "alpha", "dimensions", "location", "rotation_y"} + ) + expected_label_list = [ + "DontCare", + "Car", + "Pedestrian", + "Van", + "Truck", + "Cyclist", + "Sitter", + "Train", + "Motorcycle", + "Bus", + "Misc", + ] + for label in expected_label_list: + expected_label_cat.add(label) + expected_dataset = Dataset.from_iterable( + [ + DatasetItem( + id="000000", + subset="train", + annotations=[ + Bbox( + 700, # x1 + 150, # y1 + 100, # x2-x1 + 150, # y2-y1 + label=2, + id=0, + attributes={ + "truncated": 0.0, + "occluded": 0, + "alpha": -0.2, + "dimensions": [1.89, 0.48, 1.20], + "location": [1.84, 1.47, 8.41], + "rotation_y": 0.01, + }, + ), + ], + media=Image.from_file( + path=osp.join(DUMMY_SUBSET_DATASET_DIR, "image_2", "train", "000000.png") + ), + attributes={ + "calib_path": osp.join( + DUMMY_SUBSET_DATASET_DIR, "calib", "train", "000000.txt" + ) + }, + ), + DatasetItem( + id="000001", + subset="val", + annotations=[ + Bbox( + 330, # x1 + 180, # y1 + 30, # x2-x1 + 60, # y2-y1 + label=2, + id=0, + attributes={ + "truncated": 0.0, + "occluded": 0, + "alpha": 1.94, + "dimensions": [1.87, 0.96, 0.65], + "location": [-8.50, 2.07, 23.02], + "rotation_y": 1.59, + }, + ), + Bbox( + 600, # x1 + 170, # y1 + 20, # x2-x1 + 15, # y2-y1 + label=0, + id=1, + attributes={ + "truncated": -1, + "occluded": -1, + "alpha": -10, + "dimensions": [-1, -1, -1], + "location": [-1000, -1000, -1000], + "rotation_y": -10, + }, + ), + ], + media=Image.from_file( + path=osp.join(DUMMY_SUBSET_DATASET_DIR, "image_2", "val", "000001.png") + ), + attributes={ + "calib_path": osp.join( + DUMMY_SUBSET_DATASET_DIR, "calib", "val", "000001.txt" + ) + }, + ), + DatasetItem( + id="000002", + subset="test", + annotations=[ + Bbox( + 0, # x1 + 190, # y1 + 400, # x2-x1 + 190, # y2-y1 + label=1, + id=0, + attributes={ + "truncated": 0.88, + "occluded": 3, + "alpha": -0.69, + "dimensions": [1.60, 1.57, 3.23], + "location": [-2.70, 1.74, 3.68], + "rotation_y": -1.29, + }, + ), + Bbox( + 800, # x1 + 160, # y1 + 25, # x2-x1 + 25, # y2-y1 + label=0, + id=1, + attributes={ + "truncated": -1, + "occluded": -1, + "alpha": -10, + "dimensions": [-1, -1, -1], + "location": [-1000, -1000, -1000], + "rotation_y": -10, + }, + ), + ], + media=Image.from_file( + path=osp.join(DUMMY_SUBSET_DATASET_DIR, "image_2", "test", "000002.png") + ), + attributes={ + "calib_path": osp.join( + DUMMY_SUBSET_DATASET_DIR, "calib", "test", "000002.txt" + ) + }, + ), + ], + categories={AnnotationType.label: expected_label_cat}, + ) + + parsed_dataset = Dataset.import_from(DUMMY_SUBSET_DATASET_DIR, "kitti3d") + + compare_datasets_3d(self, expected_dataset, parsed_dataset, require_point_cloud=True) diff --git a/tests/unit/test_kitti_raw_format.py b/tests/unit/test_kitti_raw_format.py index e8ab776b75..498e99b20f 100644 --- a/tests/unit/test_kitti_raw_format.py +++ b/tests/unit/test_kitti_raw_format.py @@ -52,13 +52,13 @@ def test_can_load(self): annotations=[ Cuboid3d( position=[1, 2, 3], - scale=[7.95, -3.62, -1.03], + scale=[-3.62, 7.95, -1.03], label=1, attributes={"occluded": False, "track_id": 1}, ), Cuboid3d( position=[1, 1, 0], - scale=[8.34, 23.01, -0.76], + scale=[23.01, 8.34, -0.76], label=0, attributes={"occluded": False, "track_id": 2}, ), @@ -71,7 +71,7 @@ def test_can_load(self): annotations=[ Cuboid3d( position=[0, 1, 0], - scale=[8.34, 23.01, -0.76], + scale=[23.01, 8.34, -0.76], rotation=[1, 1, 3], label=0, attributes={"occluded": True, "track_id": 2}, @@ -85,7 +85,7 @@ def test_can_load(self): annotations=[ Cuboid3d( position=[1, 2, 3], - scale=[-9.41, 13.54, 0.24], + scale=[13.54, -9.41, 0.24], label=1, attributes={"occluded": False, "track_id": 3}, ) @@ -161,7 +161,7 @@ def test_can_save_and_load(self): Cuboid3d(position=[1.4, 2.1, 1.4], label=1, attributes={"track_id": 2}), Cuboid3d( position=[11.4, -0.1, 4.2], - scale=[2, 1, 2], + scale=[1, 2, 2], label=0, attributes={"track_id": 3}, ), @@ -172,7 +172,7 @@ def test_can_save_and_load(self): annotations=[ Cuboid3d( position=[0.4, -1, 2.24], - scale=[2, 1, 2], + scale=[1, 2, 2], label=0, attributes={"track_id": 3}, ), @@ -185,7 +185,7 @@ def test_can_save_and_load(self): annotations=[ Cuboid3d( position=[0.4, -1, 3.24], - scale=[2, 1, 2], + scale=[1, 2, 2], label=0, attributes={"track_id": 3}, ), @@ -244,7 +244,7 @@ def test_can_save_and_load(self): ), Cuboid3d( position=[11.4, -0.1, 4.2], - scale=[2, 1, 2], + scale=[1, 2, 2], label=0, attributes={"occluded": False, "track_id": 3}, ), @@ -256,7 +256,7 @@ def test_can_save_and_load(self): annotations=[ Cuboid3d( position=[0.4, -1, 2.24], - scale=[2, 1, 2], + scale=[1, 2, 2], label=0, attributes={"occluded": False, "track_id": 3}, ), @@ -271,7 +271,7 @@ def test_can_save_and_load(self): annotations=[ Cuboid3d( position=[0.4, -1, 3.24], - scale=[2, 1, 2], + scale=[1, 2, 2], label=0, attributes={"occluded": False, "track_id": 3}, ), diff --git a/tests/unit/test_transforms.py b/tests/unit/test_transforms.py index 24db8a76e6..ebd7e58664 100644 --- a/tests/unit/test_transforms.py +++ b/tests/unit/test_transforms.py @@ -2,9 +2,11 @@ import argparse import logging as log +import os import os.path as osp import random from unittest import TestCase +from unittest.mock import MagicMock, patch import numpy as np import pandas as pd @@ -14,6 +16,7 @@ import datumaro.plugins.transforms as transforms import datumaro.util.mask_tools as mask_tools +from datumaro.components.algorithms.hash_key_inference.explorer import Explorer from datumaro.components.annotation import ( AnnotationType, Bbox, @@ -31,10 +34,10 @@ Tabular, TabularCategories, ) -from datumaro.components.dataset import Dataset +from datumaro.components.dataset import Dataset, eager_mode from datumaro.components.dataset_base import DatasetItem from datumaro.components.errors import AnnotationTypeError -from datumaro.components.media import Image, Table, TableRow +from datumaro.components.media import Image, Table, TableRow, Video, VideoFrame from ..requirements import Requirements, mark_bug, mark_requirement @@ -418,26 +421,6 @@ def test_shapes_to_boxes(self): actual = transforms.ShapesToBoxes(source_dataset) compare_datasets(self, target_dataset, actual) - @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_id_from_image(self): - source_dataset = Dataset.from_iterable( - [ - DatasetItem(id=1, media=Image.from_file(path="path.jpg")), - DatasetItem(id=2), - DatasetItem(id=3, media=Image.from_numpy(data=np.ones([5, 5, 3]))), - ] - ) - target_dataset = Dataset.from_iterable( - [ - DatasetItem(id="path", media=Image.from_file(path="path.jpg")), - DatasetItem(id=2), - DatasetItem(id=3, media=Image.from_numpy(data=np.ones([5, 5, 3]))), - ] - ) - - actual = transforms.IdFromImageName(source_dataset) - compare_datasets(self, target_dataset, actual) - @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_boxes_to_masks(self): source_dataset = Dataset.from_iterable( @@ -1225,6 +1208,84 @@ def test_annotation_reindex(self, fxt_dataset: Dataset, reindex_each_item: bool) ) +class IdFromImageNameTest: + @pytest.fixture + def fxt_dataset(self, n_labels=3, n_anns=5, n_items=7) -> Dataset: + video = Video("video.mp4") + video._frame_size = MagicMock(return_value=(32, 32)) + video.get_frame_data = MagicMock(return_value=np.ndarray((32, 32, 3), dtype=np.uint8)) + return Dataset.from_iterable( + [ + DatasetItem(id=1, media=Image.from_file(path="path1.jpg")), + DatasetItem(id=2, media=Image.from_file(path="path1.jpg")), + DatasetItem(id=3, media=Image.from_file(path="path1.jpg")), + DatasetItem(id=4, media=VideoFrame(video, index=30)), + DatasetItem(id=5, media=VideoFrame(video, index=30)), + DatasetItem(id=6, media=VideoFrame(video, index=60)), + DatasetItem(id=7), + DatasetItem(id=8, media=Image.from_numpy(data=np.ones([5, 5, 3]))), + ] + ) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + @pytest.mark.parametrize("ensure_unique", [True, False]) + def test_id_from_image(self, fxt_dataset, ensure_unique): + source_dataset: Dataset = fxt_dataset + actual_dataset = transforms.IdFromImageName(source_dataset, ensure_unique=ensure_unique) + + unique_names: set[str] = set() + for src, actual in zip(source_dataset, actual_dataset): + if not isinstance(src.media, Image) or not hasattr(src.media, "path"): + src == actual + else: + if isinstance(src.media, VideoFrame): + expected_id = f"video_frame-{src.media.index}" + else: + expected_id = os.path.splitext(src.media.path)[0] + if ensure_unique: + assert actual.id.startswith(expected_id) + assert actual.wrap(id=src.id) == src + assert actual.id not in unique_names + unique_names.add(actual.id) + else: + assert src.wrap(id=expected_id) == actual + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_id_from_image_wrong_suffix_length(self, fxt_dataset): + with pytest.raises(ValueError) as e: + transforms.IdFromImageName(fxt_dataset, ensure_unique=True, suffix_length=0) + assert str(e.value).startswith("The 'suffix_length' must be greater than 0.") + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_id_from_image_too_many_duplication(self, fxt_dataset): + with patch("datumaro.plugins.transforms.IdFromImageName.DEFAULT_RETRY", 1), patch( + "datumaro.plugins.transforms.IdFromImageName.SUFFIX_LETTERS", "a" + ), pytest.raises(Exception) as e: + with eager_mode(): + fxt_dataset.transform( + "id_from_image_name", + ensure_unique=True, + suffix_length=1, + ) + assert str(e.value).startswith("Too many duplicate names.") + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + @pytest.mark.parametrize( + "args,ensure_unique,suffix_length", + [ + ([], False, 3), + (["--ensure_unique", "--suffix_length", "2"], True, 2), + ], + ids=["default", "ensure_unique"], + ) + def test_parser(self, args, ensure_unique, suffix_length): + parser = transforms.IdFromImageName.build_cmdline_parser() + args = parser.parse_args(args) + + assert hasattr(args, "ensure_unique") and args.ensure_unique == ensure_unique + assert hasattr(args, "suffix_length") and args.suffix_length == suffix_length + + class AstypeAnnotationsTest(TestCase): def setUp(self): self.table = Table.from_list( @@ -1673,3 +1734,53 @@ def test_transform_clean_after_astype_ann(self): result_item = result.__getitem__(i) self.assertEqual(expected_item.annotations, result_item.annotations) self.assertEqual(expected_item.media, result_item.media) + + +class PseudoLabelingTest(TestCase): + def setUp(self): + self.data_path = get_test_asset_path("explore_dataset") + self.categories = ["bird", "cat", "dog", "monkey"] + self.source = Dataset.from_iterable( + [ + DatasetItem( + id=0, + media=Image.from_file(path=os.path.join(self.data_path, "dog", "0.JPEG")), + ), + DatasetItem( + id=1, + media=Image.from_file(path=os.path.join(self.data_path, "cat", "0.JPEG")), + ), + ], + categories=self.categories, + ) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_transform_pseudolabeling_with_labels(self): + dataset = self.source + labels = self.categories + explorer = Explorer(dataset) + result = dataset.transform("pseudo_labeling", labels=labels, explorer=explorer) + + label_indices = dataset.categories()[AnnotationType.label]._indices + for item, expected in zip(result, ["dog", "cat"]): + self.assertEqual(item.annotations[0].label, label_indices[expected]) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_transform_pseudolabeling_without_labels(self): + dataset = self.source + explorer = Explorer(dataset) + result = dataset.transform("pseudo_labeling", explorer=explorer) + + label_indices = dataset.categories()[AnnotationType.label]._indices + for item, expected in zip(result, ["dog", "cat"]): + self.assertEqual(item.annotations[0].label, label_indices[expected]) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_transform_pseudolabeling_without_explorer(self): + dataset = self.source + labels = self.categories + result = dataset.transform("pseudo_labeling", labels=labels) + + label_indices = dataset.categories()[AnnotationType.label]._indices + for item, expected in zip(result, ["dog", "cat"]): + self.assertEqual(item.annotations[0].label, label_indices[expected]) diff --git a/tests/unit/test_visualizer.py b/tests/unit/test_visualizer.py index 676cbfb966..6d0d1c15a9 100644 --- a/tests/unit/test_visualizer.py +++ b/tests/unit/test_visualizer.py @@ -474,7 +474,7 @@ def setUpClass(cls): super().setUpClass() for item in cls.dataset: - item.annotations.append(HashKey(np.ones(64).astype(np.uint8))) + item.annotations.append(HashKey(np.ones(96).astype(np.uint8))) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_vis_one_sample(self): diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index f9a2e72d59..3a25c1d158 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -108,6 +108,17 @@ def compare_categories(test, expected, actual): sorted(expected[AnnotationType.label].items, key=lambda t: t.name), sorted(actual[AnnotationType.label].items, key=lambda t: t.name), ) + if expected[AnnotationType.label].label_groups: + assert len(expected[AnnotationType.label].label_groups) == len( + actual[AnnotationType.label].label_groups + ) + for expected_group, actual_group in zip( + expected[AnnotationType.label].label_groups, + actual[AnnotationType.label].label_groups, + ): + test.assertEqual(set(expected_group.labels), set(actual_group.labels)) + test.assertEqual(expected_group.group_type, actual_group.group_type) + if AnnotationType.mask in expected: test.assertEqual( expected[AnnotationType.mask].colormap,