Skip to content

Commit

Permalink
Fixed arabic extraction test file and white space
Browse files Browse the repository at this point in the history
  • Loading branch information
naourass committed Feb 2, 2023
1 parent 56d9a1a commit 25fa0d3
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
3 changes: 2 additions & 1 deletion pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1691,7 +1691,8 @@ def process_operation(operator: bytes, operands: List) -> None:
)
text = ""
elif (
abs(delta_y) < f * 0.3
rtl_dir == False
and abs(delta_y) < f * 0.3
and abs(delta_x) > current_spacewidth() * f * 15
):
if (output + text)[-1] != " ":
Expand Down
11 changes: 7 additions & 4 deletions tests/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -832,11 +832,14 @@ def test_empyt_password_1088():
reader = PdfReader(stream)
len(reader.pages)


@pytest.mark.xfail(reason="#1088 / #1126")
@pytest.mark.external
# @pytest.mark.xfail(reason="#1088 / #1126")
def test_arab_text_extraction():
reader = PdfReader(SAMPLE_ROOT / "015-arabic/habibi.pdf")
assert reader.pages[0].extract_text() == "habibi حَبيبي"
# previous habibi.pdf not used anymore and can be deleted
url = "https://github.com/py-pdf/pypdf/files/10567398/habibi-fixed.pdf"
name = "habibi-fixed-020220231036.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
assert reader.pages[0].extract_text() == "habibi ﺣَﺒﯿﺒﻲ"


@pytest.mark.samples
Expand Down

0 comments on commit 25fa0d3

Please sign in to comment.