-
Notifications
You must be signed in to change notification settings - Fork 263
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* fix bugs while calling inc fit api. * export perm from gptq object * move quantizers and perms to INC model * support perm in WeightOnlyLinear (#1118) * support gptq model compression with saved scale (#1119) * support gptq scale * remove zero when sym * enhance WeightOnlyLinear for parallel * add log for compression * inference device use model.to() for WeightOnlyLinear * support lm_head rtn quantize in gptq export * update regularization layer-wise config when directly call gptq_quantize. * align with ipex-gpu requirement * improve coverage --------- Signed-off-by: YIYANGCAI <[email protected]> Signed-off-by: Xin He <[email protected]> Signed-off-by: wenhuach21 <[email protected]> Signed-off-by: He, Xin3 <[email protected]> Co-authored-by: xinhe <[email protected]> Co-authored-by: wenhuach21 <[email protected]>
- Loading branch information
1 parent
88adfc9
commit 6ba7837
Showing
19 changed files
with
1,965 additions
and
234 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
249 changes: 249 additions & 0 deletions
249
...ytorch/nlp/huggingface_models/language-modeling/quantization/ptq_weight_only/datautils.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,249 @@ | ||
import numpy as np | ||
import torch | ||
import datasets | ||
|
||
# cache_dir = "~/.cache/" | ||
cache_dir = None | ||
|
||
|
||
def set_seed(seed): | ||
np.random.seed(seed) | ||
torch.random.manual_seed(seed) | ||
|
||
|
||
def get_wikitext2(nsamples, seed, seqlen, model): | ||
from datasets import load_dataset | ||
traindata = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train', cache_dir=cache_dir) | ||
testdata = load_dataset('wikitext', 'wikitext-2-raw-v1', split='test', cache_dir=cache_dir) | ||
|
||
from transformers import AutoTokenizer | ||
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir) | ||
trainenc = tokenizer("\n\n".join(traindata['text']), return_tensors='pt') | ||
testenc = tokenizer("\n\n".join(testdata['text']), return_tensors='pt') | ||
|
||
import random | ||
random.seed(seed) | ||
trainloader = [] | ||
for _ in range(nsamples): | ||
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1) | ||
j = i + seqlen | ||
inp = trainenc.input_ids[:, i:j] | ||
tar = inp.clone() | ||
tar[:, :-1] = -100 | ||
trainloader.append((inp, tar)) | ||
return trainloader, testenc | ||
|
||
|
||
def get_ptb(nsamples, seed, seqlen, model): | ||
from datasets import load_dataset | ||
traindata = load_dataset('ptb_text_only', 'penn_treebank', split='train', cache_dir=cache_dir) | ||
valdata = load_dataset('ptb_text_only', 'penn_treebank', split='validation', cache_dir=cache_dir) | ||
|
||
from transformers import AutoTokenizer | ||
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir) | ||
trainenc = tokenizer("\n\n".join(traindata['sentence']), return_tensors='pt') | ||
testenc = tokenizer("\n\n".join(valdata['sentence']), return_tensors='pt') | ||
|
||
import random | ||
random.seed(seed) | ||
trainloader = [] | ||
for _ in range(nsamples): | ||
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1) | ||
j = i + seqlen | ||
inp = trainenc.input_ids[:, i:j] | ||
tar = inp.clone() | ||
tar[:, :-1] = -100 | ||
trainloader.append((inp, tar)) | ||
return trainloader, testenc | ||
|
||
|
||
def get_c4(nsamples, seed, seqlen, model): | ||
from datasets import load_dataset | ||
|
||
traindata = load_dataset( | ||
'allenai/c4', 'allenai--c4', data_files={'train': 'en/c4-train.00000-of-01024.json.gz'}, split='train', | ||
cache_dir=cache_dir | ||
) | ||
valdata = load_dataset( | ||
'allenai/c4', 'allenai--c4', data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'}, | ||
split='validation', | ||
cache_dir=cache_dir | ||
) | ||
|
||
from transformers import AutoTokenizer | ||
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False) | ||
|
||
import random | ||
random.seed(seed) | ||
trainloader = [] | ||
for _ in range(nsamples): | ||
while True: | ||
i = random.randint(0, len(traindata) - 1) | ||
trainenc = tokenizer(traindata[i]['text'], return_tensors='pt') | ||
if trainenc.input_ids.shape[1] >= seqlen: | ||
break | ||
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1) | ||
j = i + seqlen | ||
inp = trainenc.input_ids[:, i:j] | ||
tar = inp.clone() | ||
tar[:, :-1] = -100 | ||
trainloader.append((inp, tar)) | ||
|
||
import random | ||
random.seed(0) | ||
valenc = [] | ||
for _ in range(256): | ||
while True: | ||
i = random.randint(0, len(valdata) - 1) | ||
tmp = tokenizer(valdata[i]['text'], return_tensors='pt') | ||
if tmp.input_ids.shape[1] >= seqlen: | ||
break | ||
i = random.randint(0, tmp.input_ids.shape[1] - seqlen - 1) | ||
j = i + seqlen | ||
valenc.append(tmp.input_ids[:, i:j]) | ||
valenc = torch.hstack(valenc) | ||
|
||
class TokenizerWrapper: | ||
def __init__(self, input_ids): | ||
self.input_ids = input_ids | ||
|
||
valenc = TokenizerWrapper(valenc) | ||
|
||
return trainloader, valenc | ||
|
||
|
||
def get_pile(nsamples, seed, seqlen, model): | ||
from datasets import load_dataset | ||
|
||
traindata = load_dataset( | ||
'NeelNanda/pile-10k', split='train', | ||
cache_dir=cache_dir | ||
) | ||
|
||
valdata = load_dataset( | ||
'allenai/c4', 'allenai--c4', data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'}, | ||
split='validation', | ||
cache_dir=cache_dir | ||
) | ||
|
||
from transformers import AutoTokenizer | ||
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir) | ||
|
||
import random | ||
random.seed(seed) | ||
trainloader = [] | ||
for _ in range(nsamples): | ||
while True: | ||
i = random.randint(0, len(traindata) - 1) | ||
trainenc = tokenizer(traindata[i]['text'], return_tensors='pt') | ||
if trainenc.input_ids.shape[1] > seqlen: | ||
break | ||
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1) | ||
j = i + seqlen | ||
inp = trainenc.input_ids[:, i:j] | ||
tar = inp.clone() | ||
tar[:, :-1] = -100 | ||
trainloader.append((inp, tar)) | ||
|
||
import random | ||
random.seed(0) | ||
valenc = [] | ||
for _ in range(256): | ||
while True: | ||
i = random.randint(0, len(valdata) - 1) | ||
tmp = tokenizer(valdata[i]['text'], return_tensors='pt') | ||
if tmp.input_ids.shape[1] >= seqlen: | ||
break | ||
i = random.randint(0, tmp.input_ids.shape[1] - seqlen - 1) | ||
j = i + seqlen | ||
valenc.append(tmp.input_ids[:, i:j]) | ||
valenc = torch.hstack(valenc) | ||
|
||
class TokenizerWrapper: | ||
def __init__(self, input_ids): | ||
self.input_ids = input_ids | ||
|
||
valenc = TokenizerWrapper(valenc) | ||
|
||
return trainloader, valenc | ||
|
||
|
||
def get_ptb_new(nsamples, seed, seqlen, model): | ||
from datasets import load_dataset | ||
traindata = load_dataset('ptb_text_only', 'penn_treebank', split='train', cache_dir=cache_dir) | ||
testdata = load_dataset('ptb_text_only', 'penn_treebank', split='test', cache_dir=cache_dir) | ||
|
||
from transformers import AutoTokenizer | ||
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir) | ||
trainenc = tokenizer(" ".join(traindata['sentence']), return_tensors='pt') | ||
testenc = tokenizer(" ".join(testdata['sentence']), return_tensors='pt') | ||
|
||
import random | ||
random.seed(seed) | ||
trainloader = [] | ||
for _ in range(nsamples): | ||
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1) | ||
j = i + seqlen | ||
inp = trainenc.input_ids[:, i:j] | ||
tar = inp.clone() | ||
tar[:, :-1] = -100 | ||
trainloader.append((inp, tar)) | ||
return trainloader, testenc | ||
|
||
|
||
def get_c4_new(nsamples, seed, seqlen, model): | ||
from datasets import load_dataset | ||
traindata = load_dataset( | ||
'allenai/c4', 'allenai--c4', data_files={'train': 'en/c4-train.00000-of-01024.json.gz'}, split='train' | ||
) | ||
valdata = load_dataset( | ||
'allenai/c4', 'allenai--c4', data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'}, | ||
split='validation' | ||
) | ||
|
||
from transformers import AutoTokenizer | ||
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir) | ||
|
||
import random | ||
random.seed(seed) | ||
trainloader = [] | ||
for _ in range(nsamples): | ||
while True: | ||
i = random.randint(0, len(traindata) - 1) | ||
trainenc = tokenizer(traindata[i]['text'], return_tensors='pt') | ||
if trainenc.input_ids.shape[1] >= seqlen: | ||
break | ||
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1) | ||
j = i + seqlen | ||
inp = trainenc.input_ids[:, i:j] | ||
tar = inp.clone() | ||
tar[:, :-1] = -100 | ||
trainloader.append((inp, tar)) | ||
|
||
valenc = tokenizer(' '.join(valdata[:1100]['text']), return_tensors='pt') | ||
valenc = valenc.input_ids[:, :(256 * seqlen)] | ||
|
||
class TokenizerWrapper: | ||
def __init__(self, input_ids): | ||
self.input_ids = input_ids | ||
|
||
valenc = TokenizerWrapper(valenc) | ||
|
||
return trainloader, valenc | ||
|
||
|
||
def get_loaders( | ||
name, nsamples=128, seed=0, seqlen=2048, model='' | ||
): | ||
if 'wikitext2' in name: | ||
return get_wikitext2(nsamples, seed, seqlen, model) | ||
if 'ptb' in name: | ||
if 'new' in name: | ||
return get_ptb_new(nsamples, seed, seqlen, model) | ||
return get_ptb(nsamples, seed, seqlen, model) | ||
if 'c4' in name: | ||
if 'new' in name: | ||
return get_c4_new(nsamples, seed, seqlen, model) | ||
return get_c4(nsamples, seed, seqlen, model) | ||
if 'pile' in name: | ||
return get_pile(nsamples, seed, seqlen, model) |
17 changes: 17 additions & 0 deletions
17
.../huggingface_models/language-modeling/quantization/ptq_weight_only/evaluation/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright (c) 2022 Intel Corporation | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
from .lm_eval.evaluator import evaluate |
16 changes: 16 additions & 0 deletions
16
...face_models/language-modeling/quantization/ptq_weight_only/evaluation/lm_eval/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright (c) 2022 Intel Corporation | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. |
Oops, something went wrong.