Skip to content

Commit

Permalink
TST: Add test for arab text (#1127)
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma authored Jul 17, 2022
1 parent ae0ff49 commit 0b693e1
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 4 deletions.
2 changes: 1 addition & 1 deletion sample-files
13 changes: 10 additions & 3 deletions tests/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
from copy import deepcopy
from io import BytesIO
from pathlib import Path

import pytest

Expand All @@ -16,11 +17,11 @@
TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "resources")
EXTERNAL_ROOT = os.path.join(PROJECT_ROOT, "sample-files")
EXTERNAL_ROOT = Path(PROJECT_ROOT) / "sample-files"


def get_all_sample_files():
with open(os.path.join(EXTERNAL_ROOT, "files.json")) as fp:
with open(EXTERNAL_ROOT / "files.json") as fp:
data = fp.read()
meta = json.loads(data)
return meta
Expand All @@ -37,7 +38,7 @@ def get_all_sample_files():
)
@pytest.mark.filterwarnings("ignore::PyPDF2.errors.PdfReadWarning")
def test_read(meta):
pdf_path = os.path.join(EXTERNAL_ROOT, meta["path"])
pdf_path = EXTERNAL_ROOT / meta["path"]
reader = PdfReader(pdf_path)
reader.pages[0]
assert len(reader.pages) == meta["pages"]
Expand Down Expand Up @@ -342,3 +343,9 @@ def test_empyt_password_1088():
stream = BytesIO(get_pdf_from_url(url, name=name))
reader = PdfReader(stream)
len(reader.pages)


@pytest.mark.xfail(reason="#1088 / #1126")
def test_arab_text_extraction():
reader = PdfReader(EXTERNAL_ROOT / "015-arabic/habibi.pdf")
assert reader.pages[0].extract_text() == "habibi حَبيبي"

0 comments on commit 0b693e1

Please sign in to comment.