Skip to content

Commit

Permalink
Replace hf_hub_download with wrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
qued committed May 7, 2024
1 parent ae8ec91 commit 7bbbcd1
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 18 deletions.
5 changes: 2 additions & 3 deletions unstructured_inference/models/chipper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import torch
import transformers
from cv2.typing import MatLike
from huggingface_hub import hf_hub_download
from PIL.Image import Image
from transformers import DonutProcessor, VisionEncoderDecoderModel
from transformers.generation.logits_process import LogitsProcessor
Expand All @@ -22,7 +21,7 @@
from unstructured_inference.models.unstructuredmodel import (
UnstructuredElementExtractionModel,
)
from unstructured_inference.utils import LazyDict, strip_tags
from unstructured_inference.utils import LazyDict, strip_tags, download_if_needed_and_get_local_path

MODEL_TYPES: Dict[str, Union[LazyDict, dict]] = {
"chipperv1": {
Expand Down Expand Up @@ -115,7 +114,7 @@ def initialize(
token=auth_token,
)
if swap_head:
lm_head_file = hf_hub_download(
lm_head_file = download_if_needed_and_get_local_path(
repo_id=pre_trained_model_repo,
filename="lm_head.pth",
token=auth_token,
Expand Down
15 changes: 9 additions & 6 deletions unstructured_inference/models/detectron2.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from pathlib import Path
from typing import Any, Dict, Final, List, Optional, Union

from huggingface_hub import hf_hub_download
from layoutparser.models.detectron2.layoutmodel import (
Detectron2LayoutModel,
is_detectron2_available,
Expand All @@ -17,7 +16,11 @@
from unstructured_inference.models.unstructuredmodel import (
UnstructuredObjectDetectionModel,
)
from unstructured_inference.utils import LazyDict, LazyEvaluateInfo
from unstructured_inference.utils import (
LazyDict,
LazyEvaluateInfo,
download_if_needed_and_get_local_path,
)

DETECTRON_CONFIG: Final = "lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config"
DEFAULT_LABEL_MAP: Final[Dict[int, str]] = {
Expand All @@ -35,12 +38,12 @@
MODEL_TYPES = {
"detectron2_lp": LazyDict(
model_path=LazyEvaluateInfo(
hf_hub_download,
download_if_needed_and_get_local_path,
"layoutparser/detectron2",
"PubLayNet/faster_rcnn_R_50_FPN_3x/model_final.pth",
),
config_path=LazyEvaluateInfo(
hf_hub_download,
download_if_needed_and_get_local_path,
"layoutparser/detectron2",
"PubLayNet/faster_rcnn_R_50_FPN_3x/config.yml",
),
Expand All @@ -49,12 +52,12 @@
),
"checkbox": LazyDict(
model_path=LazyEvaluateInfo(
hf_hub_download,
download_if_needed_and_get_local_path,
"unstructuredio/oer-checkbox",
"detectron2_finetuned_oer_checkbox.pth",
),
config_path=LazyEvaluateInfo(
hf_hub_download,
download_if_needed_and_get_local_path,
"unstructuredio/oer-checkbox",
"detectron2_oer_checkbox.json",
),
Expand Down
11 changes: 7 additions & 4 deletions unstructured_inference/models/detectron2onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import cv2
import numpy as np
import onnxruntime
from huggingface_hub import hf_hub_download
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
from onnxruntime.capi import _pybind_state as C
from onnxruntime.quantization import QuantType, quantize_dynamic
Expand All @@ -16,7 +15,11 @@
from unstructured_inference.models.unstructuredmodel import (
UnstructuredObjectDetectionModel,
)
from unstructured_inference.utils import LazyDict, LazyEvaluateInfo
from unstructured_inference.utils import (
LazyDict,
LazyEvaluateInfo,
download_if_needed_and_get_local_path,
)

onnxruntime.set_default_logger_severity(logger_onnx.getEffectiveLevel())

Expand All @@ -34,7 +37,7 @@
MODEL_TYPES: Dict[str, Union[LazyDict, dict]] = {
"detectron2_onnx": LazyDict(
model_path=LazyEvaluateInfo(
hf_hub_download,
download_if_needed_and_get_local_path,
"unstructuredio/detectron2_faster_rcnn_R_50_FPN_3x",
"model.onnx",
),
Expand All @@ -52,7 +55,7 @@
},
"detectron2_mask_rcnn": LazyDict(
model_path=LazyEvaluateInfo(
hf_hub_download,
download_if_needed_and_get_local_path,
"unstructuredio/detectron2_mask_rcnn_X_101_32x8d_FPN_3x",
"model.onnx",
),
Expand Down
13 changes: 8 additions & 5 deletions unstructured_inference/models/yolox.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@
import cv2
import numpy as np
import onnxruntime
from huggingface_hub import hf_hub_download
from onnxruntime.capi import _pybind_state as C
from PIL import Image as PILImage

from unstructured_inference.constants import ElementType, Source
from unstructured_inference.inference.layoutelement import LayoutElement
from unstructured_inference.models.unstructuredmodel import UnstructuredObjectDetectionModel
from unstructured_inference.utils import LazyDict, LazyEvaluateInfo
from unstructured_inference.utils import (
LazyDict,
LazyEvaluateInfo,
download_if_needed_and_get_local_path,
)

YOLOX_LABEL_MAP = {
0: ElementType.CAPTION,
Expand All @@ -34,23 +37,23 @@
MODEL_TYPES = {
"yolox": LazyDict(
model_path=LazyEvaluateInfo(
hf_hub_download,
download_if_needed_and_get_local_path,
"unstructuredio/yolo_x_layout",
"yolox_l0.05.onnx",
),
label_map=YOLOX_LABEL_MAP,
),
"yolox_tiny": LazyDict(
model_path=LazyEvaluateInfo(
hf_hub_download,
download_if_needed_and_get_local_path,
"unstructuredio/yolo_x_layout",
"yolox_tiny.onnx",
),
label_map=YOLOX_LABEL_MAP,
),
"yolox_quantized": LazyDict(
model_path=LazyEvaluateInfo(
hf_hub_download,
download_if_needed_and_get_local_path,
"unstructuredio/yolo_x_layout",
"yolox_l0.05_quantized.onnx",
),
Expand Down

0 comments on commit 7bbbcd1

Please sign in to comment.