Skip to content

Commit

Permalink
Enhance 3.x torch UT (#1787)
Browse files Browse the repository at this point in the history
Enhance 3.x torch UT
---------

Signed-off-by: yuwenzho <[email protected]>
  • Loading branch information
yuwenzho authored May 13, 2024
1 parent a4675c7 commit 7c0b700
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 510 deletions.
130 changes: 9 additions & 121 deletions test/3x/torch/quantization/test_static_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,127 +47,6 @@ def setup_class(self):
def teardown_class(self):
pass

@pytest.mark.skipif(not is_ipex_available(), reason="Requires IPEX")
def test_static_quant_default(self):
fp32_model = copy.deepcopy(self.fp32_model)
quant_config = get_default_static_config()
example_inputs = self.input
q_model = quantize(fp32_model, quant_config=quant_config, run_fn=run_fn, example_inputs=example_inputs)
assert q_model is not None, "Quantization failed!"

@pytest.mark.skipif(not is_ipex_available(), reason="Requires IPEX")
def test_static_quant_fallback(self):
fp32_model = copy.deepcopy(self.fp32_model)
quant_config = get_default_static_config()
example_inputs = self.input
# fallback by op_type
quant_config.set_local(torch.nn.modules.linear.Linear, StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))
q_model = quantize(fp32_model, quant_config=quant_config, run_fn=run_fn, example_inputs=example_inputs)
assert q_model is not None, "Quantization failed!"

# fallback by op_name
quant_config.set_local("fc1", StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))
q_model = quantize(fp32_model, quant_config=quant_config, run_fn=run_fn, example_inputs=example_inputs)
assert q_model is not None, "Quantization failed!"

@pytest.mark.skipif(not is_ipex_available(), reason="Requires IPEX")
@pytest.mark.parametrize(
"act_sym, act_algo",
[
(True, "kl"),
(True, "minmax"),
(False, "kl"),
(False, "minmax"),
],
)
def test_static_quant_params(self, act_sym, act_algo):
fp32_model = copy.deepcopy(self.fp32_model)
quant_config = StaticQuantConfig(act_sym=act_sym, act_algo=act_algo)
example_inputs = self.input
q_model = quantize(fp32_model, quant_config=quant_config, run_fn=run_fn, example_inputs=example_inputs)
assert q_model is not None, "Quantization failed!"

@pytest.mark.skipif(not is_ipex_available(), reason="Requires IPEX")
def test_static_quant_accuracy(self):
class M(torch.nn.Module):
def __init__(self):
super().__init__()
self.linear = torch.nn.Linear(2, 2, False)

def forward(self, x):
x = self.linear(x)
x = x + x
return x

model = M()

def run_fn(model):
model(torch.randn(3, 2))

fp32_model = copy.deepcopy(model)
fp32_model.linear.weight = torch.nn.Parameter(torch.tensor([[0.0, 1.0], [1.0, 0.0]]))
example_inputs = torch.zeros(3, 2)
quant_config = StaticQuantConfig(act_sym=True, act_algo="kl")
q_model = quantize(fp32_model, quant_config=quant_config, run_fn=run_fn, example_inputs=example_inputs)
output1 = fp32_model(example_inputs)
output2 = q_model(example_inputs)
# set a big atol to avoid random issue
assert torch.allclose(output1, output2, atol=2e-2), "Accuracy gap atol > 0.02 is unexpected. Please check."

@pytest.mark.skipif(not is_ipex_available(), reason="Requires IPEX")
def test_static_quant_save_load(self):
from intel_extension_for_pytorch.quantization import convert, prepare

example_inputs = torch.zeros(1, 30)
try:
qconfig = ipex.quantization.default_static_qconfig_mapping
except:
from torch.ao.quantization import MinMaxObserver, PerChannelMinMaxObserver, QConfig

qconfig = QConfig(
activation=MinMaxObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.quint8),
weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_channel_symmetric),
)
user_model = copy.deepcopy(self.fp32_model)
user_model = prepare(user_model.eval(), qconfig, example_inputs=example_inputs, inplace=True)

def run_fn(model):
model(example_inputs)

run_fn(user_model)
with torch.no_grad():
user_model = convert(user_model.eval(), inplace=True).eval()
user_model(example_inputs)
user_model = torch.jit.trace(user_model.eval(), example_inputs, strict=False)
user_model = torch.jit.freeze(user_model.eval())
user_model(example_inputs)
user_model(example_inputs)
ipex_out = user_model(example_inputs)

fp32_model = copy.deepcopy(self.fp32_model)
quant_config = get_default_static_config()
q_model = quantize(fp32_model, quant_config=quant_config, run_fn=run_fn, example_inputs=example_inputs)
assert q_model is not None, "Quantization failed!"
inc_out = q_model(example_inputs)
# set a big atol to avoid random issue
assert torch.allclose(inc_out, ipex_out, atol=2e-02), "Unexpected result. Please double check."
q_model.save("saved_results")

from neural_compressor.torch.quantization import load

# load
loaded_model = load("saved_results")
assert isinstance(loaded_model, torch.jit.ScriptModule)


class TestStaticQuantWithNewAPI:
def setup_class(self):
self.fp32_model = build_simple_torch_model()
self.input = torch.randn(1, 30)

def teardown_class(self):
pass

@pytest.mark.skipif(not is_ipex_available(), reason="Requires IPEX")
def test_static_quant_default(self):
fp32_model = copy.deepcopy(self.fp32_model)
Expand Down Expand Up @@ -292,3 +171,12 @@ def run_fn(model):
# load
loaded_model = load("saved_results")
assert isinstance(loaded_model, torch.jit.ScriptModule)

@pytest.mark.skipif(not is_ipex_available(), reason="Requires IPEX")
def test_static_quant_with_quantize_API(self):
# quantize API
fp32_model = copy.deepcopy(self.fp32_model)
quant_config = get_default_static_config()
example_inputs = self.input
q_model = quantize(fp32_model, quant_config=quant_config, run_fn=run_fn, example_inputs=example_inputs)
assert q_model is not None, "Quantization failed!"
30 changes: 14 additions & 16 deletions test/3x/torch/quantization/weight_only/hqq/test_hqq_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,29 +65,27 @@ def force_not_half(self, monkeypatch):
monkeypatch.setattr(hqq_global_option, "use_half", False)

def test_hqq_quant(self, force_use_cpu, force_not_half):
from neural_compressor.torch.quantization import get_default_hqq_config, quantize
from neural_compressor.torch.quantization import convert, get_default_hqq_config, prepare, quantize

hqq_global_option.use_half = False
model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m")
fp32_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m")
example_inputs = torch.tensor([[10, 20, 30, 40, 50, 60]], dtype=torch.long, device="cpu")
# test_default_config
quant_config = get_default_hqq_config()
model = quantize(model, quant_config)
q_label = model(example_inputs)[0]
print(q_label)

def test_hqq_quant_with_new_api(self, force_use_cpu, force_not_half):
from neural_compressor.torch.quantization import convert, get_default_hqq_config, prepare

hqq_global_option.use_half = False
model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m")
example_inputs = torch.tensor([[10, 20, 30, 40, 50, 60]], dtype=torch.long, device="cpu")
# test_default_config
quant_config = get_default_hqq_config()
model = prepare(model, quant_config)
# prepare + convert API
model = prepare(deepcopy(fp32_model), quant_config)
model = convert(model)
q_label = model(example_inputs)[0]
print(q_label)
q_label_1 = model(example_inputs)[0]

# quantize API
model = quantize(deepcopy(fp32_model), quant_config)
q_label_2 = model(example_inputs)[0]

# compare the results of calling `convert` + `prepare` and calling `quantize`
assert torch.all(
q_label_1.eq(q_label_2)
), "The results of calling `convert` + `prepare` and calling `quantize` should be equal."

@pytest.mark.parametrize(
"nbits, group_size, quant_zero, quant_scale, scale_quant_group_size",
Expand Down
69 changes: 12 additions & 57 deletions test/3x/torch/quantization/weight_only/hqq/test_hqq_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,73 +64,28 @@ def setup_class(cls):
hqq_global_option.use_half = True

def test_hqq_quant(self):
from neural_compressor.torch.quantization import get_default_hqq_config, quantize
from neural_compressor.torch.quantization import convert, get_default_hqq_config, prepare, quantize

model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m")
fp32_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m")
example_inputs = torch.tensor(
[[10, 20, 30, 40, 50, 60]], dtype=torch.long, device=auto_detect_accelerator().current_device()
)
# test_default_config
quant_config = get_default_hqq_config()
model = quantize(model, quant_config)
q_label = model(example_inputs)[0]
print(q_label)

def test_hqq_quant_with_new_api(self):
from neural_compressor.torch.quantization import convert, get_default_hqq_config, prepare

model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m")
example_inputs = torch.tensor(
[[10, 20, 30, 40, 50, 60]], dtype=torch.long, device=auto_detect_accelerator().current_device()
)
# test_default_config
quant_config = get_default_hqq_config()
model = prepare(model, quant_config)
# prepare + convert API
model = prepare(deepcopy(fp32_model), quant_config)
model = convert(model)
q_label = model(example_inputs)[0]
print(q_label)

@pytest.mark.parametrize(
"nbits, group_size, quant_zero, quant_scale, scale_quant_group_size",
[
(4, 64, True, False, 128),
(4, 64, False, False, 128),
(4, 64, True, True, 128),
(4, 64, False, True, 128),
(8, 64, True, False, 128),
(8, 64, False, False, 128),
(8, 64, True, True, 128),
(8, 64, False, True, 128),
(4, 64, True, False, 64),
(4, 64, False, False, 64),
(4, 64, True, True, 64),
(4, 64, False, True, 64),
],
)
def test_hqq_module_cuda(
self,
nbits,
group_size,
quant_zero,
quant_scale,
scale_quant_group_size,
):
_common_cuda_test(
nbits=nbits,
group_size=group_size,
quant_zero=quant_zero,
quant_scale=quant_scale,
scale_quant_group_size=scale_quant_group_size,
)
q_label_1 = model(example_inputs)[0]

# quantize API
model = quantize(deepcopy(fp32_model), quant_config)
q_label_2 = model(example_inputs)[0]

@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires a GPU")
class TestHQQCUDAWithNewAPI:
@classmethod
def setup_class(cls):
torch.manual_seed(0)
torch.cuda.manual_seed(0)
hqq_global_option.use_half = True
# compare the results of calling `convert` + `prepare` and calling `quantize`
assert torch.all(
q_label_1.eq(q_label_2)
), "The results of calling `convert` + `prepare` and calling `quantize` should be equal."

@pytest.mark.parametrize(
"nbits, group_size, quant_zero, quant_scale, scale_quant_group_size",
Expand Down
66 changes: 26 additions & 40 deletions test/3x/torch/quantization/weight_only/test_autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,29 +49,20 @@ def test_autoround(self):
quant_config = AutoRoundConfig(n_samples=20, seqlen=10, iters=10, scale_dtype="fp32")
logger.info(f"Test AutoRound with config {quant_config}")

qdq_model = quantize(
model=gpt_j_model,
quant_config=quant_config,
run_fn=get_autoround_default_run_fn,
run_args=(
tokenizer,
"NeelNanda/pile-10k",
20,
10,
),
)
"""run_args of get_autoround_default_run_fn:
run_fn = get_autoround_default_run_fn
run_args = (
tokenizer,
dataset_name="NeelNanda/pile-10k",
n_samples=512,
seqlen=2048,
seed=42,
bs=8,
dataset_split: str = "train",
dataloader=None,
"""

q_model = qdq_model
"NeelNanda/pile-10k",
20,
10,
)
fp32_model = gpt_j_model

# prepare + convert API
model = prepare(model=fp32_model, quant_config=quant_config)
run_fn(model, *run_args)
q_model = convert(model)

out2 = q_model(inp)
assert torch.allclose(out1[0], out2[0], atol=1e-1)
assert "transformer.h.0.attn.k_proj" in q_model.autoround_config.keys()
Expand Down Expand Up @@ -116,33 +107,28 @@ def test_quantizer(self):
assert "scale" in q_model.autoround_config["transformer.h.0.attn.k_proj"].keys()
assert torch.float32 == q_model.autoround_config["transformer.h.0.attn.k_proj"]["scale_dtype"]

def test_prepare_and_convert_api(self):
def test_autoround_with_quantize_API(self):
inp = torch.ones([1, 10], dtype=torch.long)
gpt_j_model = copy.deepcopy(self.gptj)
tokenizer = transformers.AutoTokenizer.from_pretrained(
"hf-internal-testing/tiny-random-GPTJForCausalLM", trust_remote_code=True
)

out1 = gpt_j_model(inp)

quant_config = get_default_AutoRound_config()
logger.info(f"Test AutoRound with config {quant_config}")

run_fn = get_autoround_default_run_fn
run_args = (
tokenizer,
"NeelNanda/pile-10k",
20,
10,
# quantize API
q_model = quantize(
model=gpt_j_model,
quant_config=quant_config,
run_fn=get_autoround_default_run_fn,
run_args=(
tokenizer,
"NeelNanda/pile-10k",
20,
10,
),
)
fp32_model = gpt_j_model

# quantizer execute
model = prepare(model=fp32_model, quant_config=quant_config)
run_fn(model, *run_args)
q_model = convert(model)

out2 = q_model(inp)
assert torch.allclose(out1[0], out2[0], atol=1e-1)
assert "transformer.h.0.attn.k_proj" in q_model.autoround_config.keys()
assert "scale" in q_model.autoround_config["transformer.h.0.attn.k_proj"].keys()
assert torch.float32 == q_model.autoround_config["transformer.h.0.attn.k_proj"]["scale_dtype"]
Loading

0 comments on commit 7c0b700

Please sign in to comment.