diff --git a/docling_core/transforms/chunker/base.py b/docling_core/transforms/chunker/base.py index ad89c17..49a99bd 100644 --- a/docling_core/transforms/chunker/base.py +++ b/docling_core/transforms/chunker/base.py @@ -51,7 +51,7 @@ class BaseChunker(BaseModel, ABC): delim: str = DFLT_DELIM @abstractmethod - def chunk(self, dl_doc: DLDocument, **kwargs) -> Iterator[BaseChunk]: + def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[BaseChunk]: """Chunk the provided document. Args: diff --git a/docling_core/transforms/chunker/hybrid_chunker.py b/docling_core/transforms/chunker/hybrid_chunker.py index 5f81d2e..13bd60d 100644 --- a/docling_core/transforms/chunker/hybrid_chunker.py +++ b/docling_core/transforms/chunker/hybrid_chunker.py @@ -6,7 +6,7 @@ """Hybrid chunker implementation leveraging both doc structure & token awareness.""" import warnings -from typing import Iterable, Iterator, Optional, Union +from typing import Any, Iterable, Iterator, Optional, Union from pydantic import BaseModel, ConfigDict, PositiveInt, TypeAdapter, model_validator from typing_extensions import Self @@ -258,7 +258,7 @@ def _merge_chunks_with_matching_metadata(self, chunks: list[DocChunk]): return output_chunks - def chunk(self, dl_doc: DoclingDocument, **kwargs) -> Iterator[BaseChunk]: + def chunk(self, dl_doc: DoclingDocument, **kwargs: Any) -> Iterator[BaseChunk]: r"""Chunk the provided document. Args: diff --git a/pyproject.toml b/pyproject.toml index 66d63e3..7fa3367 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -127,6 +127,7 @@ module = [ "jsonref.*", "jsonschema.*", "requests.*", + "semchunk.*", "tabulate.*", "transformers.*", "yaml.*",