Skip to content

Commit

Permalink
Inferring image DPI from pptx file
Browse files Browse the repository at this point in the history
Signed-off-by: Maksym Lysak <[email protected]>
  • Loading branch information
Maksym Lysak committed Nov 18, 2024
1 parent 2240008 commit b132c68
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
4 changes: 3 additions & 1 deletion docling/backend/mspowerpoint_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,14 +273,16 @@ def handle_pictures(self, shape, parent_slide, slide_ind, doc):
# Get the image bytes
image = shape.image
image_bytes = image.blob
im_dpi, _ = image.dpi

# Open it with PIL
pil_image = Image.open(BytesIO(image_bytes))

# shape has picture
prov = self.generate_prov(shape, slide_ind, "")
doc.add_picture(
parent=parent_slide,
image=ImageRef.from_pil(image=pil_image, dpi=72),
image=ImageRef.from_pil(image=pil_image, dpi=im_dpi),
caption=None,
prov=prov,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@
"footnotes": [],
"image": {
"mimetype": "image/png",
"dpi": 72,
"dpi": 300,
"size": {
"width": 268.0,
"height": 268.0
Expand Down

0 comments on commit b132c68

Please sign in to comment.