diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index f595e4bd..995969d4 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -16,7 +16,7 @@ TableCell, TableData, ) -from PIL import Image +from PIL import Image, UnidentifiedImageError from pptx import Presentation from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER @@ -120,6 +120,7 @@ def handle_text_elements(self, shape, parent_slide, slide_ind, doc): bullet_type = "None" list_text = "" list_label = GroupLabel.LIST + doc_label = DocItemLabel.LIST_ITEM prov = self.generate_prov(shape, slide_ind, shape.text.strip()) # Identify if shape contains lists @@ -276,16 +277,19 @@ def handle_pictures(self, shape, parent_slide, slide_ind, doc): im_dpi, _ = image.dpi # Open it with PIL - pil_image = Image.open(BytesIO(image_bytes)) - - # shape has picture - prov = self.generate_prov(shape, slide_ind, "") - doc.add_picture( - parent=parent_slide, - image=ImageRef.from_pil(image=pil_image, dpi=im_dpi), - caption=None, - prov=prov, - ) + try: + pil_image = Image.open(BytesIO(image_bytes)) + + # shape has picture + prov = self.generate_prov(shape, slide_ind, "") + doc.add_picture( + parent=parent_slide, + image=ImageRef.from_pil(image=pil_image, dpi=im_dpi), + caption=None, + prov=prov, + ) + except (UnidentifiedImageError, OSError) as e: + _log.warning(f"Warning: image cannot be loaded by Pillow: {e}") return def handle_tables(self, shape, parent_slide, slide_ind, doc):