Skip to content

Commit

Permalink
GPTQ feature enhance (#1104)
Browse files Browse the repository at this point in the history
* fix bugs while calling inc fit api.

* export perm from gptq object

* move quantizers and perms to INC model

* support perm in WeightOnlyLinear (#1118)

* support gptq model compression with saved scale (#1119)

* support gptq scale

* remove zero when sym

* enhance WeightOnlyLinear for parallel

* add log for compression

* inference device use model.to() for WeightOnlyLinear 

* support lm_head rtn quantize in gptq export

* update regularization layer-wise config when directly call gptq_quantize.

* align with ipex-gpu requirement

* improve coverage

---------

Signed-off-by: YIYANGCAI <[email protected]>
Signed-off-by: Xin He <[email protected]>
Signed-off-by: wenhuach21 <[email protected]>
Signed-off-by: He, Xin3 <[email protected]>
Co-authored-by: xinhe <[email protected]>
Co-authored-by: wenhuach21 <[email protected]>
  • Loading branch information
3 people authored Aug 1, 2023
1 parent 88adfc9 commit 6ba7837
Show file tree
Hide file tree
Showing 19 changed files with 1,965 additions and 234 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,10 @@ quantized_model = load(tuned_checkpoint, model)
```
--------
For more details, please refer to the [sample code](./run_clm.py).

# (May Remove Later) Run GPTQ algorithm
```
sh run-gptq-llm.sh
# You may want to move script run-gptq-llm.sh to root dir of neural compressor and modify python file's path.
# Please make sure pile dataset is downloaded.
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
import numpy as np
import torch
import datasets

# cache_dir = "~/.cache/"
cache_dir = None


def set_seed(seed):
np.random.seed(seed)
torch.random.manual_seed(seed)


def get_wikitext2(nsamples, seed, seqlen, model):
from datasets import load_dataset
traindata = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train', cache_dir=cache_dir)
testdata = load_dataset('wikitext', 'wikitext-2-raw-v1', split='test', cache_dir=cache_dir)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir)
trainenc = tokenizer("\n\n".join(traindata['text']), return_tensors='pt')
testenc = tokenizer("\n\n".join(testdata['text']), return_tensors='pt')

import random
random.seed(seed)
trainloader = []
for _ in range(nsamples):
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
inp = trainenc.input_ids[:, i:j]
tar = inp.clone()
tar[:, :-1] = -100
trainloader.append((inp, tar))
return trainloader, testenc


def get_ptb(nsamples, seed, seqlen, model):
from datasets import load_dataset
traindata = load_dataset('ptb_text_only', 'penn_treebank', split='train', cache_dir=cache_dir)
valdata = load_dataset('ptb_text_only', 'penn_treebank', split='validation', cache_dir=cache_dir)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir)
trainenc = tokenizer("\n\n".join(traindata['sentence']), return_tensors='pt')
testenc = tokenizer("\n\n".join(valdata['sentence']), return_tensors='pt')

import random
random.seed(seed)
trainloader = []
for _ in range(nsamples):
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
inp = trainenc.input_ids[:, i:j]
tar = inp.clone()
tar[:, :-1] = -100
trainloader.append((inp, tar))
return trainloader, testenc


def get_c4(nsamples, seed, seqlen, model):
from datasets import load_dataset

traindata = load_dataset(
'allenai/c4', 'allenai--c4', data_files={'train': 'en/c4-train.00000-of-01024.json.gz'}, split='train',
cache_dir=cache_dir
)
valdata = load_dataset(
'allenai/c4', 'allenai--c4', data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'},
split='validation',
cache_dir=cache_dir
)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False)

import random
random.seed(seed)
trainloader = []
for _ in range(nsamples):
while True:
i = random.randint(0, len(traindata) - 1)
trainenc = tokenizer(traindata[i]['text'], return_tensors='pt')
if trainenc.input_ids.shape[1] >= seqlen:
break
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
inp = trainenc.input_ids[:, i:j]
tar = inp.clone()
tar[:, :-1] = -100
trainloader.append((inp, tar))

import random
random.seed(0)
valenc = []
for _ in range(256):
while True:
i = random.randint(0, len(valdata) - 1)
tmp = tokenizer(valdata[i]['text'], return_tensors='pt')
if tmp.input_ids.shape[1] >= seqlen:
break
i = random.randint(0, tmp.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
valenc.append(tmp.input_ids[:, i:j])
valenc = torch.hstack(valenc)

class TokenizerWrapper:
def __init__(self, input_ids):
self.input_ids = input_ids

valenc = TokenizerWrapper(valenc)

return trainloader, valenc


def get_pile(nsamples, seed, seqlen, model):
from datasets import load_dataset

traindata = load_dataset(
'NeelNanda/pile-10k', split='train',
cache_dir=cache_dir
)

valdata = load_dataset(
'allenai/c4', 'allenai--c4', data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'},
split='validation',
cache_dir=cache_dir
)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir)

import random
random.seed(seed)
trainloader = []
for _ in range(nsamples):
while True:
i = random.randint(0, len(traindata) - 1)
trainenc = tokenizer(traindata[i]['text'], return_tensors='pt')
if trainenc.input_ids.shape[1] > seqlen:
break
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
inp = trainenc.input_ids[:, i:j]
tar = inp.clone()
tar[:, :-1] = -100
trainloader.append((inp, tar))

import random
random.seed(0)
valenc = []
for _ in range(256):
while True:
i = random.randint(0, len(valdata) - 1)
tmp = tokenizer(valdata[i]['text'], return_tensors='pt')
if tmp.input_ids.shape[1] >= seqlen:
break
i = random.randint(0, tmp.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
valenc.append(tmp.input_ids[:, i:j])
valenc = torch.hstack(valenc)

class TokenizerWrapper:
def __init__(self, input_ids):
self.input_ids = input_ids

valenc = TokenizerWrapper(valenc)

return trainloader, valenc


def get_ptb_new(nsamples, seed, seqlen, model):
from datasets import load_dataset
traindata = load_dataset('ptb_text_only', 'penn_treebank', split='train', cache_dir=cache_dir)
testdata = load_dataset('ptb_text_only', 'penn_treebank', split='test', cache_dir=cache_dir)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir)
trainenc = tokenizer(" ".join(traindata['sentence']), return_tensors='pt')
testenc = tokenizer(" ".join(testdata['sentence']), return_tensors='pt')

import random
random.seed(seed)
trainloader = []
for _ in range(nsamples):
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
inp = trainenc.input_ids[:, i:j]
tar = inp.clone()
tar[:, :-1] = -100
trainloader.append((inp, tar))
return trainloader, testenc


def get_c4_new(nsamples, seed, seqlen, model):
from datasets import load_dataset
traindata = load_dataset(
'allenai/c4', 'allenai--c4', data_files={'train': 'en/c4-train.00000-of-01024.json.gz'}, split='train'
)
valdata = load_dataset(
'allenai/c4', 'allenai--c4', data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'},
split='validation'
)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir)

import random
random.seed(seed)
trainloader = []
for _ in range(nsamples):
while True:
i = random.randint(0, len(traindata) - 1)
trainenc = tokenizer(traindata[i]['text'], return_tensors='pt')
if trainenc.input_ids.shape[1] >= seqlen:
break
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
inp = trainenc.input_ids[:, i:j]
tar = inp.clone()
tar[:, :-1] = -100
trainloader.append((inp, tar))

valenc = tokenizer(' '.join(valdata[:1100]['text']), return_tensors='pt')
valenc = valenc.input_ids[:, :(256 * seqlen)]

class TokenizerWrapper:
def __init__(self, input_ids):
self.input_ids = input_ids

valenc = TokenizerWrapper(valenc)

return trainloader, valenc


def get_loaders(
name, nsamples=128, seed=0, seqlen=2048, model=''
):
if 'wikitext2' in name:
return get_wikitext2(nsamples, seed, seqlen, model)
if 'ptb' in name:
if 'new' in name:
return get_ptb_new(nsamples, seed, seqlen, model)
return get_ptb(nsamples, seed, seqlen, model)
if 'c4' in name:
if 'new' in name:
return get_c4_new(nsamples, seed, seqlen, model)
return get_c4(nsamples, seed, seqlen, model)
if 'pile' in name:
return get_pile(nsamples, seed, seqlen, model)
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .lm_eval.evaluator import evaluate
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading

0 comments on commit 6ba7837

Please sign in to comment.