diff --git a/src/datasets/features/video.py b/src/datasets/features/video.py index c7d579be94a..df5cb78bdd5 100644 --- a/src/datasets/features/video.py +++ b/src/datasets/features/video.py @@ -10,7 +10,7 @@ from ..download.download_config import DownloadConfig from ..table import array_cast from ..utils.file_utils import is_local_path, xopen -from ..utils.py_utils import no_op_if_value_is_null, string_to_dict +from ..utils.py_utils import string_to_dict if TYPE_CHECKING: @@ -236,41 +236,6 @@ def cast_storage(self, storage: Union[pa.StringArray, pa.StructArray, pa.ListArr ) return array_cast(storage, self.pa_type) - def embed_storage(self, storage: pa.StructArray) -> pa.StructArray: - """Embed video files into the Arrow array. - - Args: - storage (`pa.StructArray`): - PyArrow array to embed. - - Returns: - `pa.StructArray`: Array in the Video arrow storage type, that is - `pa.struct({"bytes": pa.binary(), "path": pa.string()})`. - """ - - @no_op_if_value_is_null - def path_to_bytes(path): - with xopen(path, "rb") as f: - bytes_ = f.read() - return bytes_ - - bytes_array = pa.array( - [ - (path_to_bytes(x["path"]) if x["bytes"] is None else x["bytes"]) if x is not None else None - for x in storage.to_pylist() - ], - type=pa.binary(), - ) - path_array = pa.array( - [ - (os.path.basename(path) if os.path.isfile(path) else path) if path is not None else None - for path in storage.field("path").to_pylist() - ], - type=pa.string(), - ) - storage = pa.StructArray.from_arrays([bytes_array, path_array], ["bytes", "path"], mask=bytes_array.is_null()) - return array_cast(storage, self.pa_type) - def video_to_bytes(video: "VideoReader") -> bytes: """Convert a decord Video object to bytes using native compression if possible"""