Skip to content

Commit

Permalink
chore: Add example for inspection of picture content
Browse files Browse the repository at this point in the history
Signed-off-by: Christoph Auer <[email protected]>
  • Loading branch information
cau-git committed Dec 18, 2024
1 parent 1418fa1 commit 687c469
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions docs/examples/inspect_picture_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from docling_core.types.doc import TextItem

from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption

source = "tests/data/amt_handbook_sample.pdf"

pipeline_options = PdfPipelineOptions()
pipeline_options.images_scale = 2
pipeline_options.generate_page_images = True

doc_converter = DocumentConverter(
format_options={InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)}
)

result = doc_converter.convert(source)

doc = result.document

for picture in doc.pictures:
# picture.get_image(doc).show() # display the picture
print(picture.caption_text(doc), " contains these elements:")

for item, level in doc.iterate_items(root=picture, traverse_pictures=True):
if isinstance(item, TextItem):
print(item.text)

print("\n")
Binary file added tests/data/amt_handbook_sample.pdf
Binary file not shown.

0 comments on commit 687c469

Please sign in to comment.