diff --git a/PyPDF2/_cmap.py b/PyPDF2/_cmap.py index 36d12f476..595abce7f 100644 --- a/PyPDF2/_cmap.py +++ b/PyPDF2/_cmap.py @@ -42,7 +42,11 @@ def build_char_map( pass # I conside the space_code is available on one byte if isinstance(space_code, str): - sp = space_code.encode("charmap")[0] + try: # one byte + sp = space_code.encode("charmap")[0] + except Exception: + sp = space_code.encode("utf-16-be") + sp = sp[0] + 256 * sp[1] else: sp = space_code sp_width = compute_space_width(ft, sp, space_width) diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 3191928ea..71e6a0240 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -149,6 +149,8 @@ def test_rotate_45(): (True, "https://arxiv.org/pdf/2201.00200.pdf", [0, 1, 5, 6]), (True, "https://arxiv.org/pdf/2201.00022.pdf", [0, 1, 5, 10]), (True, "https://arxiv.org/pdf/2201.00029.pdf", [0, 1, 6, 10]), + # #1145 + (True, "https://github.com/py-pdf/PyPDF2/files/9174594/2017.pdf", [0]), # 6 instead of 5: as there is an issue in page 5 (missing objects) # and too complex to handle the warning without hiding real regressions (True, "https://arxiv.org/pdf/1601.03642.pdf", [0, 1, 5, 7]),