Skip to content

Commit

Permalink
Update ITREX version in ONNXRT WOQ example and fix bugs in hf models (#…
Browse files Browse the repository at this point in the history
…1333)

Signed-off-by: yuwenzho <[email protected]>
  • Loading branch information
yuwenzho authored Oct 19, 2023
1 parent ee617a4 commit d817328
Show file tree
Hide file tree
Showing 6 changed files with 9 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,8 @@ def eval_func(model, *args):
if model_args.model_name_or_path == 'mrm8488/spanbert-finetuned-squadv1':
fp32_op_names = ['/bert/embeddings/word_embeddings/Gather',
'/bert/encoder/layer.[5-7|9]/output/dense/MatMul']
elif model_args.model_name_or_path == 'salti/bert-base-multilingual-cased-finetuned-squad':
fp32_op_names = ['/bert/encoder/layer.[4-5]/output/dense/MatMul']
elif model_args.model_name_or_path == 'distilbert-base-uncased-distilled-squad':
fp32_op_names = ['/distilbert/transformer/layer.[1-5]/ffn/lin[1-2]/MatMul']
elif model_args.model_name_or_path == 'deepset/roberta-large-squad2':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import onnxruntime as ort
from torch.nn.functional import pad
from torch.utils.data import DataLoader
from intel_extension_for_transformers.evaluation.lm_eval import evaluate
from intel_extension_for_transformers.llm.evaluation.lm_eval import evaluate
from optimum.onnxruntime import ORTModelForCausalLM
from transformers import LlamaConfig, LlamaTokenizer

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
git+https://github.com/intel/intel-extension-for-transformers.git@b8302f99a93e5f09a80431cee2fb384755062664
git+https://github.com/EleutherAI/lm-evaluation-harness.git@83dbfbf6070324f3e5872f63e49d49ff7ef4c9b3
intel-extension-for-transformers
torch
transformers
accelerate
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ pip install -r requirements.txt
```
> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
> Note: Weight-only quantization in Intel® Neural Compressor is still under development. We encourage you to use the `master` branch to access the latest features.
## 2. Prepare Model

```bash
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import onnxruntime as ort
from torch.nn.functional import pad
from torch.utils.data import DataLoader
from intel_extension_for_transformers.evaluation.lm_eval import evaluate
from intel_extension_for_transformers.llm.evaluation.lm_eval import evaluate
from optimum.onnxruntime import ORTModelForCausalLM
from transformers import LlamaConfig, LlamaTokenizer

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
git+https://github.com/intel/intel-extension-for-transformers.git@b8302f99a93e5f09a80431cee2fb384755062664
git+https://github.com/EleutherAI/lm-evaluation-harness.git@83dbfbf6070324f3e5872f63e49d49ff7ef4c9b3
intel-extension-for-transformers
torch
transformers
accelerate
Expand All @@ -8,4 +8,4 @@ onnxruntime
onnxruntime-extensions; python_version < '3.11'
datasets
optimum
evaluate
evaluate

0 comments on commit d817328

Please sign in to comment.