Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow computing page images on-demand with scale and cache them #36

Merged
merged 3 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docling/backend/docling_parse_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ def get_text_cells(self) -> Iterable[Cell]:
cell_counter += 1

def draw_clusters_and_cells():
image = self.get_page_image()
image = (
self.get_page_image()
) # make new image to avoid drawing on the saved ones
draw = ImageDraw.Draw(image)
for c in cells:
x0, y0, x1, y1 = c.bbox.as_tuple()
Expand Down
4 changes: 3 additions & 1 deletion docling/backend/pypdfium2_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,9 @@ def merge_group(group: List[Cell]) -> Cell:
return merged_cells

def draw_clusters_and_cells():
image = self.get_page_image()
image = (
self.get_page_image()
) # make new image to avoid drawing on the saved ones
draw = ImageDraw.Draw(image)
for c in cells:
x0, y0, x1, y1 = c.bbox.as_tuple()
Expand Down
25 changes: 20 additions & 5 deletions docling/datamodel/base_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,14 +234,29 @@ class Page(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)

page_no: int
page_hash: str = None
size: PageSize = None
image: Image = None
page_hash: Optional[str] = None
size: Optional[PageSize] = None
cells: List[Cell] = None
predictions: PagePredictions = PagePredictions()
assembled: AssembledUnit = None
assembled: Optional[AssembledUnit] = None

_backend: PdfPageBackend = None # Internal PDF backend
_backend: Optional[PdfPageBackend] = (
None # Internal PDF backend. By default it is cleared during assembling.
)
_image_cache: Dict[float, Image] = (
{}
) # Cache of images in different scales. By default it is cleared during assembling.

def get_image(self, scale: float = 1.0) -> Optional[Image]:
if self._backend is None:
return self._image_cache.get(scale, None)
if not scale in self._image_cache:
self._image_cache[scale] = self._backend.get_page_image(scale=scale)
return self._image_cache[scale]

@property
def image(self) -> Optional[Image]:
return self.get_image()


class DocumentStream(BaseModel):
Expand Down
8 changes: 3 additions & 5 deletions docling/document_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,7 @@ def process_document(self, in_doc: InputDocument) -> ConvertedDocument:

# Remove page images (can be disabled)
if not self.assemble_options.keep_page_images:
assembled_page.image = (
None # Comment this if you want to visualize page images
)
assembled_page._image_cache = {}

# Unload backend
assembled_page._backend.unload()
Expand Down Expand Up @@ -231,7 +229,7 @@ def initialize_page(self, doc: InputDocument, page: Page) -> Page:

# Generate the page image and store it in the page object
def populate_page_images(self, doc: InputDocument, page: Page) -> Page:
page.image = page._backend.get_page_image()
page.get_image() # this will trigger storing the image in the internal cache

return page

Expand All @@ -247,7 +245,7 @@ def draw_text_boxes(image, cells):
draw.rectangle([(x0, y0), (x1, y1)], outline="red")
image.show()

# draw_text_boxes(page.image, cells)
# draw_text_boxes(page.get_image(scale=1.0), cells)

return page

Expand Down
2 changes: 1 addition & 1 deletion docling/models/easyocr_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:

for page in page_batch:
# rects = page._fpage.
high_res_image = page._backend.get_page_image(scale=self.scale)
high_res_image = page.get_image(scale=self.scale)
im = numpy.array(high_res_image)
result = self.reader.readtext(im)

Expand Down
4 changes: 3 additions & 1 deletion docling/models/layout_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,9 @@ def postprocess(self, clusters: List[Cluster], cells: List[Cell], page_height):
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
for page in page_batch:
clusters = []
for ix, pred_item in enumerate(self.layout_predictor.predict(page.image)):
for ix, pred_item in enumerate(
self.layout_predictor.predict(page.get_image(scale=1.0))
):
cluster = Cluster(
id=ix,
label=pred_item["label"],
Expand Down
12 changes: 4 additions & 8 deletions docling/models/table_structure_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ def __init__(self, config):
self.scale = 2.0 # Scale up table input images to 144 dpi

def draw_table_and_cells(self, page: Page, tbl_list: List[TableElement]):
image = page._backend.get_page_image()
image = (
page._backend.get_page_image()
) # make new image to avoid drawing on the saved ones
draw = ImageDraw.Draw(image)

for table_element in tbl_list:
Expand Down Expand Up @@ -94,13 +96,7 @@ def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
"width": page.size.width * self.scale,
"height": page.size.height * self.scale,
}
# add image to page input.
if self.scale == 1.0:
page_input["image"] = numpy.asarray(page.image)
else: # render new page image on the fly at desired scale
page_input["image"] = numpy.asarray(
page._backend.get_page_image(scale=self.scale)
)
page_input["image"] = numpy.asarray(page.get_image(scale=self.scale))

table_clusters, table_bboxes = zip(*in_tables)

Expand Down
Loading