add example images

ppaanngggg · Mar 5, 2024 · 0e0fe29 · 0e0fe29
1 parent 53a6e2f
commit 0e0fe29
Show file tree

Hide file tree

Showing 5 changed files with 55 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -4,7 +4,10 @@
         🤗 <a href="https://huggingface.co/hantian/layoutreader">Hugging Face</a>
 </p>
 
-TODO: a result example
+<p align="center">
+  <img src="./example/page_0.png" width="400"  alt="page_0"/>
+  <img src="./example/page_1.png" width="400"  alt="page_1"/> 
+</p>
 
 ## Why this repo?
 
@@ -29,7 +32,8 @@ from v3.helpers import prepare_inputs, boxes2inputs, parse_logits
 
 model = LayoutLMv3ForTokenClassification.from_pretrained("hantian/layoutreader")
 
-boxes = [[...], ...]  # list of [left, top, right, bottom], bboxes of spans
+# list of [left, top, right, bottom], bboxes of spans, should be range from 0 to 1000
+boxes = [[...], ...]  
 inputs = boxes2inputs(boxes)
 inputs = prepare_inputs(inputs, model)
 logits = model(**inputs).logits.cpu().squeeze(0)

diff --git a/example/page_0.png b/example/page_0.png
diff --git a/example/page_1.png b/example/page_1.png
diff --git a/example/predict_and_draw.py b/example/predict_and_draw.py
@@ -0,0 +1,49 @@
+import cv2
+import fitz
+import requests
+
+# Please `python main.py` first
+
+doc = fitz.open("./test.pdf")
+for i, page in enumerate(doc):
+    page_img_file = f"./page_{i}.png"
+    pix = page.get_pixmap()
+    pix.save(page_img_file)
+    dicts = page.get_text("dict")
+    # get width, height and boxes
+    width = dicts["width"]
+    height = dicts["height"]
+    boxes = []
+    for block in dicts["blocks"]:
+        if "lines" not in block:
+            continue
+        for line in block["lines"]:
+            for span in line["spans"]:
+                boxes.append(span["bbox"])
+    # send to server to predict orders
+    r = requests.post(
+        "http://localhost:8000/predict",
+        json={"boxes": boxes, "width": width, "height": height},
+    )
+    orders = r.json()["orders"]
+    # reorder boxes
+    boxes = [boxes[i] for i in orders]
+    # draw boxes
+    img = cv2.imread(page_img_file)
+    for idx, box in enumerate(boxes):
+        x0, y0, x1, y1 = box
+        x0 = round(x0)
+        y0 = round(y0)
+        x1 = round(x1)
+        y1 = round(y1)
+        cv2.rectangle(img, (x0, y0), (x1, y1), (0, 0, 255), 1)
+        cv2.putText(
+            img,
+            str(idx),
+            (x1, y1),
+            cv2.FONT_HERSHEY_PLAIN,
+            0.5,
+            (0, 0, 255),
+            1,
+        )
+    cv2.imwrite(page_img_file, img)
diff --git a/example/test.pdf b/example/test.pdf