Skip to content

Commit

Permalink
add example images
Browse files Browse the repository at this point in the history
  • Loading branch information
ppaanngggg committed Mar 5, 2024
1 parent 53a6e2f commit 0e0fe29
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 2 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
🤗 <a href="https://huggingface.co/hantian/layoutreader">Hugging Face</a>
</p>

TODO: a result example
<p align="center">
<img src="./example/page_0.png" width="400" alt="page_0"/>
<img src="./example/page_1.png" width="400" alt="page_1"/>
</p>

## Why this repo?

Expand All @@ -29,7 +32,8 @@ from v3.helpers import prepare_inputs, boxes2inputs, parse_logits

model = LayoutLMv3ForTokenClassification.from_pretrained("hantian/layoutreader")

boxes = [[...], ...] # list of [left, top, right, bottom], bboxes of spans
# list of [left, top, right, bottom], bboxes of spans, should be range from 0 to 1000
boxes = [[...], ...]
inputs = boxes2inputs(boxes)
inputs = prepare_inputs(inputs, model)
logits = model(**inputs).logits.cpu().squeeze(0)
Expand Down
Binary file added example/page_0.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added example/page_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
49 changes: 49 additions & 0 deletions example/predict_and_draw.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import cv2
import fitz
import requests

# Please `python main.py` first

doc = fitz.open("./test.pdf")
for i, page in enumerate(doc):
page_img_file = f"./page_{i}.png"
pix = page.get_pixmap()
pix.save(page_img_file)
dicts = page.get_text("dict")
# get width, height and boxes
width = dicts["width"]
height = dicts["height"]
boxes = []
for block in dicts["blocks"]:
if "lines" not in block:
continue
for line in block["lines"]:
for span in line["spans"]:
boxes.append(span["bbox"])
# send to server to predict orders
r = requests.post(
"http://localhost:8000/predict",
json={"boxes": boxes, "width": width, "height": height},
)
orders = r.json()["orders"]
# reorder boxes
boxes = [boxes[i] for i in orders]
# draw boxes
img = cv2.imread(page_img_file)
for idx, box in enumerate(boxes):
x0, y0, x1, y1 = box
x0 = round(x0)
y0 = round(y0)
x1 = round(x1)
y1 = round(y1)
cv2.rectangle(img, (x0, y0), (x1, y1), (0, 0, 255), 1)
cv2.putText(
img,
str(idx),
(x1, y1),
cv2.FONT_HERSHEY_PLAIN,
0.5,
(0, 0, 255),
1,
)
cv2.imwrite(page_img_file, img)
Binary file added example/test.pdf
Binary file not shown.

0 comments on commit 0e0fe29

Please sign in to comment.