Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

removes decord #33987

Merged
merged 3 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/transformers-all-latest-gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum

# For video model testing
RUN python3 -m pip install --no-cache-dir decord av==9.2.0
RUN python3 -m pip install --no-cache-dir av==9.2.0

# Some slow tests require bnb
RUN python3 -m pip install --no-cache-dir bitsandbytes
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@
"cookiecutter==1.7.3",
"dataclasses",
"datasets!=2.5.0",
"decord==0.6.0",
"deepspeed>=0.9.3",
"diffusers",
"dill<0.3.5",
Expand Down Expand Up @@ -313,7 +312,7 @@ def run(self):
extras["torch-vision"] = deps_list("torchvision") + extras["vision"]
extras["natten"] = deps_list("natten")
extras["codecarbon"] = deps_list("codecarbon")
extras["video"] = deps_list("decord", "av")
extras["video"] = deps_list("av")

extras["sentencepiece"] = deps_list("sentencepiece", "protobuf")
extras["tiktoken"] = deps_list("tiktoken", "blobfile")
Expand Down
2 changes: 0 additions & 2 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,7 +936,6 @@
"is_av_available",
"is_bitsandbytes_available",
"is_datasets_available",
"is_decord_available",
"is_faiss_available",
"is_flax_available",
"is_keras_nlp_available",
Expand Down Expand Up @@ -5833,7 +5832,6 @@
is_av_available,
is_bitsandbytes_available,
is_datasets_available,
is_decord_available,
is_faiss_available,
is_flax_available,
is_keras_nlp_available,
Expand Down
1 change: 0 additions & 1 deletion src/transformers/dependency_versions_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
"cookiecutter": "cookiecutter==1.7.3",
"dataclasses": "dataclasses",
"datasets": "datasets!=2.5.0",
"decord": "decord==0.6.0",
"deepspeed": "deepspeed>=0.9.3",
"diffusers": "diffusers",
"dill": "dill<0.3.5",
Expand Down
43 changes: 32 additions & 11 deletions src/transformers/models/git/convert_git_to_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import argparse
from pathlib import Path

import av
import numpy as np
import requests
import torch
Expand Down Expand Up @@ -193,10 +194,27 @@ def prepare_img(model_name):


def prepare_video():
from decord import VideoReader, cpu
def read_video_pyav(container, indices):
"""
Decode the video with PyAV decoder.

# set seed for reproducability
np.random.seed(0)
Args:
container (`av.container.input.InputContainer`): PyAV container.
indices (`List[int]`): List of frame indices to decode.

Returns:
result (np.ndarray): np array of decoded frames of shape (num_frames, height, width, 3).
"""
frames = []
container.seek(0)
start_index = indices[0]
end_index = indices[-1]
for i, frame in enumerate(container.decode(video=0)):
if i > end_index:
break
if i >= start_index and i in indices:
frames.append(frame)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
frames.append(frame)
frames.append(frame.to_rgb())

any reason we don't use this?
or directly convert before stacking?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question, we return the transformation at the end to directly transform the numpy data structure

return np.stack([x.to_ndarray(format="rgb24") for x in frames])

It's more efficient than converting the frames to rgb:
https://github.com/PyAV-Org/PyAV/blob/main/av/video/frame.pyx#L252
https://github.com/PyAV-Org/PyAV/blob/main/av/audio/frame.pyx#L168

return np.stack([x.to_ndarray(format="rgb24") for x in frames])

def sample_frame_indices(clip_len, frame_sample_rate, seg_len):
"""
Expand All @@ -217,16 +235,19 @@ def sample_frame_indices(clip_len, frame_sample_rate, seg_len):
indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
return indices

# video clip consists of 300 frames (10 seconds at 30 FPS)
file_path = hf_hub_download(repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset")
videoreader = VideoReader(file_path, num_threads=1, ctx=cpu(0))
# set seed for reproducibility
np.random.seed(0)

# sample 6 frames
videoreader.seek(0)
indices = sample_frame_indices(clip_len=6, frame_sample_rate=4, seg_len=len(videoreader))
video = videoreader.get_batch(indices).asnumpy()
file_path = hf_hub_download(repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset")
with av.open(file_path) as container:
# sample 6 frames
num_frames = 6
indices = sample_frame_indices(
clip_len=num_frames, frame_sample_rate=4, seg_len=container.streams.video[0].frames
)
frames = read_video_pyav(container, indices)

return video
return frames


@torch.no_grad()
Expand Down
8 changes: 0 additions & 8 deletions src/transformers/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@
is_compressed_tensors_available,
is_cv2_available,
is_cython_available,
is_decord_available,
is_detectron2_available,
is_eetq_available,
is_essentia_available,
Expand Down Expand Up @@ -758,13 +757,6 @@ def require_spacy(test_case):
return unittest.skipUnless(is_spacy_available(), "test requires spacy")(test_case)


def require_decord(test_case):
"""
Decorator marking a test that requires decord. These tests are skipped when decord isn't installed.
"""
return unittest.skipUnless(is_decord_available(), "test requires decord")(test_case)


def require_torch_multi_gpu(test_case):
"""
Decorator marking a test that requires a multi-GPU setup (in PyTorch). These tests are skipped on a machine without
Expand Down
1 change: 0 additions & 1 deletion src/transformers/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@
is_cv2_available,
is_cython_available,
is_datasets_available,
is_decord_available,
is_detectron2_available,
is_eetq_available,
is_essentia_available,
Expand Down
10 changes: 0 additions & 10 deletions src/transformers/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ def _is_package_available(pkg_name: str, return_version: bool = False) -> Union[
# `importlib.metadata.util` doesn't work with `opencv-python-headless`.
_cv2_available = importlib.util.find_spec("cv2") is not None
_datasets_available = _is_package_available("datasets")
_decord_available = importlib.util.find_spec("decord") is not None
_detectron2_available = _is_package_available("detectron2")
# We need to check both `faiss` and `faiss-cpu`.
_faiss_available = importlib.util.find_spec("faiss") is not None
Expand Down Expand Up @@ -1173,10 +1172,6 @@ def is_ccl_available():
return _is_ccl_available


def is_decord_available():
return _decord_available


def is_sudachi_available():
return _sudachipy_available

Expand Down Expand Up @@ -1547,10 +1542,6 @@ def is_liger_kernel_available():
Please note that you may need to restart your runtime after installation.
"""

DECORD_IMPORT_ERROR = """
{0} requires the decord library but it was not found in your environment. You can install it with pip: `pip install
decord`. Please note that you may need to restart your runtime after installation.
"""

CYTHON_IMPORT_ERROR = """
{0} requires the Cython library but it was not found in your environment. You can install it with pip: `pip install
Expand Down Expand Up @@ -1612,7 +1603,6 @@ def is_liger_kernel_available():
("scipy", (is_scipy_available, SCIPY_IMPORT_ERROR)),
("accelerate", (is_accelerate_available, ACCELERATE_IMPORT_ERROR)),
("oneccl_bind_pt", (is_ccl_available, CCL_IMPORT_ERROR)),
("decord", (is_decord_available, DECORD_IMPORT_ERROR)),
("cython", (is_cython_available, CYTHON_IMPORT_ERROR)),
("jieba", (is_jieba_available, JIEBA_IMPORT_ERROR)),
("peft", (is_peft_available, PEFT_IMPORT_ERROR)),
Expand Down
6 changes: 3 additions & 3 deletions tests/test_pipeline_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from transformers.pipelines import AudioClassificationPipeline, AutomaticSpeechRecognitionPipeline
from transformers.testing_utils import (
is_pipeline_test,
require_decord,
require_av,
require_pytesseract,
require_timm,
require_torch,
Expand Down Expand Up @@ -583,14 +583,14 @@ def test_pipeline_translation_fp16(self):
@is_pipeline_test
@require_torch_or_tf
@require_vision
@require_decord
@require_av
def test_pipeline_video_classification(self):
self.run_task_tests(task="video-classification")

@is_pipeline_test
@require_vision
@require_decord
@require_torch
@require_av
def test_pipeline_video_classification_fp16(self):
self.run_task_tests(task="video-classification", torch_dtype="float16")

Expand Down
Loading