Skip to content

Commit

Permalink
Possible solution to #207
Browse files Browse the repository at this point in the history
  • Loading branch information
mara004 committed Apr 20, 2023
1 parent dea9c70 commit 0c17bcc
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 6 deletions.
2 changes: 2 additions & 0 deletions docs/devel/changelog_staging.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
<!-- List character: dash (-) -->

# Changelog for next release
- `PdfTextPage.get_rect()`: Added missing return code check and updated docs regarding dependence on `count_rects()`.
Fixed related test code that was broken but disabled by accident (missing asserts). Thanks to Guy Rosin for reporting {issue}`207`.
6 changes: 5 additions & 1 deletion src/pypdfium2/_helpers/textpage.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,16 @@ def get_charbox(self, index, loose=False):
def get_rect(self, index):
"""
Get the bounding box of a text rectangle at the given index.
Note that :meth:`.count_rects` must be called once with default parameters
before subsequent :meth:`.get_rect` calls for this function to work (due to PDFium's API).
Returns:
Float values for left, bottom, right and top in PDF canvas units.
"""
l, b, r, t = c_double(), c_double(), c_double(), c_double()
pdfium_c.FPDFText_GetRect(self, index, l, t, r, b) # yes, ltrb!
success = pdfium_c.FPDFText_GetRect(self, index, l, t, r, b) # yes, ltrb!
if not success:
raise PdfiumError("Failed to get rectangle. (Make sure count_rects() was called with default params once before subsequent get_rect() calls.)")
return (l.value, b.value, r.value, t.value)


Expand Down
18 changes: 13 additions & 5 deletions tests_old/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,11 @@ def test_getrectboxes(textpage):
def _get_rects(textpage, search_result):
# TODO add helper?
if search_result is None:
return
return []
c_index, c_count = search_result
r_index = textpage.count_rects(0, c_index)
r_index = textpage.count_rects(0, c_index) - 1
r_count = textpage.count_rects(c_index, c_count)
textpage.count_rects()
rects = [textpage.get_rect(i) for i in range(r_index, r_index+r_count)]
return rects

Expand All @@ -101,9 +102,16 @@ def test_search_text(textpage):
assert occ_4x is None
assert occ_1a == occ_1b and occ_2a == occ_2b

_get_rects(textpage, occ_1a) == [ (292, 678, 329, 690) ]
_get_rects(textpage, occ_2a) == [ (324, 641, 360, 653) ]
_get_rects(textpage, occ_3a) == [ (305, 549, 341, 561) ]
occs = (occ_1a, occ_2a, occ_3a)
exp_rectlists = [
[ (57, 675, 511, 690) ],
[ (58, 638, 537, 653) ],
[ (58, 549, 367, 561) ],
]

for occ, exp_rects in zip(occs, exp_rectlists):
rects = _get_rects(textpage, occ)
assert [pytest.approx(r, abs=0.5) for r in rects] == exp_rects


def test_get_index(textpage):
Expand Down

0 comments on commit 0c17bcc

Please sign in to comment.