Skip to content

Commit

Permalink
fix: Fixes for legacy-doc handling (#115)
Browse files Browse the repository at this point in the history
* Fixes for legacy-doc handling

Signed-off-by: Christoph Auer <[email protected]>

* Fixes for legacy-doc handling

Signed-off-by: Christoph Auer <[email protected]>

---------

Signed-off-by: Christoph Auer <[email protected]>
  • Loading branch information
cau-git authored Dec 17, 2024
1 parent ee49c60 commit b116c46
Showing 1 changed file with 13 additions and 8 deletions.
21 changes: 13 additions & 8 deletions docling_core/types/legacy_doc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,17 +550,18 @@ def export_to_markdown( # noqa: C901

elif (
isinstance(item, Table)
and item.data
and (item.data or item.text)
and item_type in main_text_labels
):

md_table = ""
table = []
for row in item.data:
tmp = []
for col in row:
tmp.append(col.text)
table.append(tmp)
if item.data is not None:
for row in item.data:
tmp = []
for col in row:
tmp.append(col.text)
table.append(tmp)

if len(table) > 1 and len(table[0]) > 0:
try:
Expand All @@ -579,15 +580,19 @@ def export_to_markdown( # noqa: C901
if item.text:
markdown_text = item.text
if not strict_text:
markdown_text += "\n\n" + md_table
markdown_text += (
"\n\n" if len(markdown_text) > 0 else ""
) + md_table

elif isinstance(item, Figure) and item_type in main_text_labels:

markdown_text = ""
if item.text:
markdown_text = item.text
if not strict_text:
markdown_text += f"\n{image_placeholder}"
markdown_text += (
"\n" if len(markdown_text) > 0 else ""
) + image_placeholder

if markdown_text:
md_texts.append(markdown_text)
Expand Down

0 comments on commit b116c46

Please sign in to comment.