diff --git a/docling/cli/main.py b/docling/cli/main.py index 7ca7adf2..b06354c8 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -372,11 +372,13 @@ def convert( else: raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}") + pdf_format_option = PdfFormatOption( + pipeline_options=pipeline_options, + backend=backend, # pdf_backend + ) format_options: Dict[InputFormat, FormatOption] = { - InputFormat.PDF: PdfFormatOption( - pipeline_options=pipeline_options, - backend=backend, # pdf_backend - ) + InputFormat.PDF: pdf_format_option, + InputFormat.IMAGE: pdf_format_option, } doc_converter = DocumentConverter( allowed_formats=from_formats, diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 63e0d3c6..9be3ee82 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -143,7 +143,11 @@ class PdfPipelineOptions(PipelineOptions): table_structure_options: TableStructureOptions = TableStructureOptions() ocr_options: Union[ - EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, OcrMacOptions + EasyOcrOptions, + TesseractCliOcrOptions, + TesseractOcrOptions, + OcrMacOptions, + RapidOcrOptions, ] = Field(EasyOcrOptions(), discriminator="kind") images_scale: float = 1.0 diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index 40105a38..6f5e1542 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -97,7 +97,9 @@ def download_models_hf( local_dir: Optional[Path] = None, force: bool = False ) -> Path: from huggingface_hub import snapshot_download + from huggingface_hub.utils import disable_progress_bars + disable_progress_bars() download_path = snapshot_download( repo_id="ds4sd/docling-models", force_download=force,