From 26d403d7a70f65fdf67b48cd0f23769086d6dad3 Mon Sep 17 00:00:00 2001 From: Steve Canny Date: Thu, 23 May 2024 14:30:55 -0700 Subject: [PATCH] fix: add missing params to ElementMetadata (#3092) A couple of parameters needed for DOCX image extraction were not added as parameters to the `ElementMetadata` constructor when they were added as known fields. Also repair a couple gaps in alphabetical ordering cause by recent additions. --- unstructured/documents/elements.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/unstructured/documents/elements.py b/unstructured/documents/elements.py index f189f482de..3d1506c905 100644 --- a/unstructured/documents/elements.py +++ b/unstructured/documents/elements.py @@ -234,13 +234,15 @@ def __init__( filename: Optional[str | pathlib.Path] = None, filetype: Optional[str] = None, header_footer_type: Optional[str] = None, + image_base64: Optional[str] = None, + image_mime_type: Optional[str] = None, image_path: Optional[str] = None, is_continuation: Optional[bool] = None, languages: Optional[list[str]] = None, last_modified: Optional[str] = None, + link_start_indexes: Optional[list[int]] = None, link_texts: Optional[list[str]] = None, link_urls: Optional[list[str]] = None, - link_start_indexes: Optional[list[int]] = None, links: Optional[list[Link]] = None, orig_elements: Optional[list[Element]] = None, page_name: Optional[str] = None, @@ -251,8 +253,8 @@ def __init__( sent_to: Optional[list[str]] = None, signature: Optional[str] = None, subject: Optional[str] = None, - text_as_html: Optional[str] = None, table_as_cells: Optional[dict[str, str | int]] = None, + text_as_html: Optional[str] = None, url: Optional[str] = None, ) -> None: self.attached_to_filename = attached_to_filename @@ -274,6 +276,8 @@ def __init__( self.filetype = filetype self.header_footer_type = header_footer_type + self.image_base64 = image_base64 + self.image_mime_type = image_mime_type self.image_path = image_path self.is_continuation = is_continuation self.languages = languages