Skip to content

Commit

Permalink
[tokenizer] Fixes import model zoo bug (#3103)
Browse files Browse the repository at this point in the history
  • Loading branch information
frankfliu authored Apr 23, 2024
1 parent ec7d6fd commit 357b9b4
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ def save_onnx_model(self, model_info, args: Namespace, temp_dir: str):
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)

model_name = model_id.split("/")[-1]
logging.info(f"Saving onnxruntime model: {model_name}.onnx ...")
logging.info(f"Saving onnxruntime model: {model_id} ...")

from optimum.commands.optimum_cli import main

Expand All @@ -72,7 +71,7 @@ def save_onnx_model(self, model_info, args: Namespace, temp_dir: str):
include_types = "token_type_id" in inputs

tokenizer = AutoTokenizer.from_pretrained(model_id)
hf_pipeline = PipelineHolder(model, tokenizer)
hf_pipeline = PipelineHolder(tokenizer, model)
size = self.save_to_model_zoo(model_info, args.output_dir,
"OnnxRuntime", temp_dir, hf_pipeline,
include_types)
Expand Down
28 changes: 18 additions & 10 deletions extensions/tokenizers/src/main/python/huggingface_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ def list_models(self, args: Namespace) -> List[dict]:
existing_model = self.processed_models.get(model_id)
if existing_model:
existing_model["downloads"] = model_info.downloads
if not args.retry_failed:
if not args.retry_failed or existing_model[
"result"] == "success":
logging.info(f"Skip converted model: {model_id}.")
continue

Expand All @@ -119,15 +120,22 @@ def list_models(self, args: Namespace) -> List[dict]:
with open(config) as f:
config = json.load(f)

if "sentence-similarity" in model_info.tags:
task = "sentence-similarity"
else:
task, architecture = self.to_supported_task(config)
if not task:
logging.info(
f"Unsupported model architecture: {architecture} for {model_id}."
)
continue
task, architecture = self.to_supported_task(config)
if not task:
if "sentence-similarity" in model_info.tags:
task = "sentence-similarity"

if not task:
logging.info(
f"Unsupported model architecture: {architecture} for {model_id}."
)
continue

if args.category and args.category != task:
logging.info(
f"Skip {model_id}, expect task: {args.category}, detected {task}."
)
continue

model = {
"model_info": model_info,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import requests
import torch
from transformers import AutoTokenizer, AutoModel
from transformers import AutoTokenizer, AutoModel, AutoConfig

from huggingface_converter import HuggingfaceConverter, PipelineHolder
from huggingface_hub import hf_hub_download
Expand Down Expand Up @@ -76,7 +76,10 @@ def get_extra_arguments(self, hf_pipeline, model_id: str,
pass

if not "maxLength" in args:
config = hf_pipeline.model.config
if hasattr(hf_pipeline.model, "config"):
config = hf_pipeline.model.config
else:
config = AutoConfig.from_pretrained(model_id)
tokenizer = hf_pipeline.tokenizer
if hasattr(config, "max_position_embeddings") and hasattr(
tokenizer, "model_max_length"):
Expand Down

0 comments on commit 357b9b4

Please sign in to comment.