From b116c465a2af6327cffa95de0745506404cb39f9 Mon Sep 17 00:00:00 2001 From: Christoph Auer <60343111+cau-git@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:31:04 +0100 Subject: [PATCH] fix: Fixes for legacy-doc handling (#115) * Fixes for legacy-doc handling Signed-off-by: Christoph Auer * Fixes for legacy-doc handling Signed-off-by: Christoph Auer --------- Signed-off-by: Christoph Auer --- docling_core/types/legacy_doc/document.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/docling_core/types/legacy_doc/document.py b/docling_core/types/legacy_doc/document.py index f14e37a..9532774 100644 --- a/docling_core/types/legacy_doc/document.py +++ b/docling_core/types/legacy_doc/document.py @@ -550,17 +550,18 @@ def export_to_markdown( # noqa: C901 elif ( isinstance(item, Table) - and item.data + and (item.data or item.text) and item_type in main_text_labels ): md_table = "" table = [] - for row in item.data: - tmp = [] - for col in row: - tmp.append(col.text) - table.append(tmp) + if item.data is not None: + for row in item.data: + tmp = [] + for col in row: + tmp.append(col.text) + table.append(tmp) if len(table) > 1 and len(table[0]) > 0: try: @@ -579,7 +580,9 @@ def export_to_markdown( # noqa: C901 if item.text: markdown_text = item.text if not strict_text: - markdown_text += "\n\n" + md_table + markdown_text += ( + "\n\n" if len(markdown_text) > 0 else "" + ) + md_table elif isinstance(item, Figure) and item_type in main_text_labels: @@ -587,7 +590,9 @@ def export_to_markdown( # noqa: C901 if item.text: markdown_text = item.text if not strict_text: - markdown_text += f"\n{image_placeholder}" + markdown_text += ( + "\n" if len(markdown_text) > 0 else "" + ) + image_placeholder if markdown_text: md_texts.append(markdown_text)