Skip to content

Commit

Permalink
Enhance 3.x common logger and update 3.x torch example (#1783)
Browse files Browse the repository at this point in the history
Signed-off-by: yuwenzho <[email protected]>
Signed-off-by: chensuyue <[email protected]>
  • Loading branch information
yuwenzho authored May 15, 2024
1 parent 7c0b700 commit 1cb844b
Show file tree
Hide file tree
Showing 17 changed files with 163 additions and 96 deletions.
43 changes: 23 additions & 20 deletions .azure-pipelines/scripts/models/env_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,27 +83,30 @@ if [[ "${inc_new_api}" == "false" ]]; then
fi

cd ${model_src_dir}
pip install ruamel.yaml==0.17.40
pip install psutil
pip install protobuf==4.23.4
if [[ "${framework}" == "tensorflow" ]]; then
if [[ "${fwk_ver}" == *"-official" ]]; then
pip install tensorflow==${fwk_ver%-official}
else
pip install intel-tensorflow==${fwk_ver}

if [[ "${fwk_ver}" != "latest" ]]; then
pip install ruamel.yaml==0.17.40
pip install psutil
pip install protobuf==4.23.4
if [[ "${framework}" == "tensorflow" ]]; then
if [[ "${fwk_ver}" == *"-official" ]]; then
pip install tensorflow==${fwk_ver%-official}
else
pip install intel-tensorflow==${fwk_ver}
fi
elif [[ "${framework}" == "pytorch" ]]; then
pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html
pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html
elif [[ "${framework}" == "onnxrt" ]]; then
pip install onnx==1.15.0
pip install onnxruntime==${fwk_ver}
elif [[ "${framework}" == "mxnet" ]]; then
pip install numpy==1.23.5
echo "re-install pycocotools resolve the issue with numpy..."
pip uninstall pycocotools -y
pip install --no-cache-dir pycocotools
pip install mxnet==${fwk_ver}
fi
elif [[ "${framework}" == "pytorch" ]]; then
pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html
pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html
elif [[ "${framework}" == "onnxrt" ]]; then
pip install onnx==1.15.0
pip install onnxruntime==${fwk_ver}
elif [[ "${framework}" == "mxnet" ]]; then
pip install numpy==1.23.5
echo "re-install pycocotools resolve the issue with numpy..."
pip uninstall pycocotools -y
pip install --no-cache-dir pycocotools
pip install mxnet==${fwk_ver}
fi

if [ -f "requirements.txt" ]; then
Expand Down
28 changes: 21 additions & 7 deletions .azure-pipelines/scripts/models/run_model_trigger_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ do
esac
done

function check_results() {
local control_phrase=$1
if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | wc -l) == 0 ];then
$BOLD_RED && echo "====== Quantization FAILED!! ======" && $RESET; exit 1
fi
}

log_dir="/neural-compressor/.azure-pipelines/scripts/models"
SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models"
if [[ "${inc_new_api}" == "3x"* ]]; then
Expand Down Expand Up @@ -90,16 +97,19 @@ elif [ "${mode}" == "tuning" ]; then
2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log
$BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET
if [[ "${inc_new_api}" == "3x"* ]]; then
control_phrase="Quantization end."
control_phrase_1="Preparation end."
check_results $control_phrase_1
control_phrase_2="Conversion end."
check_results $control_phrase_2
else
control_phrase="model which meet accuracy goal."
check_results $control_phrase
if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then
$BOLD_RED && echo "====== Quantization FAILED!! ======" && $RESET; exit 1
fi
fi
if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | wc -l) == 0 ];then
$BOLD_RED && echo "====== Quantization FAILED!! ======" && $RESET; exit 1
fi
if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then
$BOLD_RED && echo "====== Quantization FAILED!! ======" && $RESET; exit 1
fi


$BOLD_GREEN && echo "====== Quantization SUCCEED!! ======" && $RESET
elif [ "${mode}" == "fp32_benchmark" ]; then
cd ${WORK_SOURCE_DIR}/${model_src_dir}
Expand Down Expand Up @@ -149,6 +159,10 @@ elif [ "${mode}" == "collect_log" ]; then
cd ${WORK_SOURCE_DIR}/${model_src_dir}
$BOLD_YELLOW && echo "workspace ${WORK_SOURCE_DIR}/${model_src_dir}" && $RESET
$BOLD_YELLOW && echo "====== collect logs of model ${model} =======" && $RESET
if [ "${framework}" == "pytorch" ] && [ "${fwk_ver}" == "latest" ]; then
fwk_ver=$(python -c "import torch; print(torch.__version__)")
fi

python -u ${SCRIPTS_PATH}/collect_log_model.py \
--framework=${framework} \
--fwk_ver=${fwk_ver} \
Expand Down
17 changes: 11 additions & 6 deletions .azure-pipelines/scripts/models/run_pytorch_models_trigger.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@ do
esac
done

echo "specify FWs version..."
source /neural-compressor/.azure-pipelines/scripts/fwk_version.sh 'latest'
FRAMEWORK="pytorch"
FRAMEWORK_VERSION=${pytorch_version}
TORCH_VISION_VERSION=${torchvision_version}

dataset_location=""
input_model=""
yaml=""
Expand Down Expand Up @@ -72,6 +66,17 @@ elif [ "${model}" == "opt_125m_woq_gptq_int4_dq_ggml" ]; then
tuning_cmd="bash run_quant.sh --topology=opt_125m_woq_gptq_int4_dq_ggml"
fi

echo "Specify FWs version..."

FRAMEWORK="pytorch"
source /neural-compressor/.azure-pipelines/scripts/fwk_version.sh 'latest'
if [[ "${inc_new_api}" == "3x"* ]]; then
FRAMEWORK_VERSION="latest"
else
FRAMEWORK_VERSION=${pytorch_version}
TORCH_VISION_VERSION=${torchvision_version}
fi


/bin/bash run_model_trigger_common.sh \
--yaml=${yaml} \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,11 +236,11 @@ def get_user_model():

# 3.x api
if args.approach == 'weight_only':
from neural_compressor.torch.quantization import RTNConfig, GPTQConfig, quantize
from neural_compressor.torch.quantization import RTNConfig, GPTQConfig, prepare, convert, quantize
from neural_compressor.torch.utils import get_double_quant_config
weight_sym = True if args.woq_scheme == "sym" else False
double_quant_config_dict = get_double_quant_config(args.double_quant_type)

if args.woq_algo == "RTN":
if args.double_quant_type is not None:
double_quant_config_dict.update(
Expand Down Expand Up @@ -269,9 +269,8 @@ def get_user_model():
double_quant_group_size=args.double_quant_group_size,
)
quant_config.set_local("lm_head", RTNConfig(dtype="fp32"))
user_model = quantize(
model=user_model, quant_config=quant_config
)
user_model = prepare(model=user_model, quant_config=quant_config)
user_model = convert(model=user_model)
elif args.woq_algo == "GPTQ":
from utils import DataloaderPreprocessor
dataloaderPreprocessor = DataloaderPreprocessor(
Expand Down Expand Up @@ -326,24 +325,24 @@ def run_fn_for_gptq(model, dataloader_for_calibration, *args):
double_quant_group_size=args.double_quant_group_size,
)
quant_config.set_local("lm_head", GPTQConfig(dtype="fp32"))
user_model = quantize(
model=user_model, quant_config=quant_config, run_fn=run_fn_for_gptq, run_args=(dataloader_for_calibration, )
)
user_model = prepare(model=user_model, quant_config=quant_config)
run_fn_for_gptq(user_model, dataloader_for_calibration)
user_model = convert(user_model)
else:
if args.sq:
from neural_compressor.torch.quantization import SmoothQuantConfig, quantize
from neural_compressor.torch.quantization import SmoothQuantConfig

# alpha can be a float number of a list of float number.
args.alpha = args.alpha if args.alpha == "auto" else eval(args.alpha)
if re.search("falcon", user_model.config.model_type):
quant_config = SmoothQuantConfig(alpha=args.alpha, folding=False)
else:
quant_config = SmoothQuantConfig(alpha=args.alpha, folding=True)

if re.search("gpt", user_model.config.model_type):
quant_config.set_local("add", SmoothQuantConfig(w_dtype="fp32", act_dtype="fp32"))
else:
from neural_compressor.torch.quantization import quantize, get_default_static_config, StaticQuantConfig
from neural_compressor.torch.quantization import get_default_static_config, StaticQuantConfig

quant_config = get_default_static_config()
if re.search("gpt", user_model.config.model_type):
Expand All @@ -364,12 +363,23 @@ def run_fn(model):
except ValueError:
pass
return

from utils import get_example_inputs
example_inputs = get_example_inputs(user_model, calib_dataloader)
user_model = quantize(
model=user_model, quant_config=quant_config, example_inputs=example_inputs, run_fn=run_fn
)
if args.sq:
# currently, smooth quant only support quantize API
# TODO: support prepare/convert API for smooth quant
from neural_compressor.torch.quantization import quantize

user_model = quantize(
model=user_model, quant_config=quant_config, example_inputs=example_inputs, run_fn=run_fn
)
else:
from neural_compressor.torch.quantization import prepare, convert

user_model = prepare(model=user_model, quant_config=quant_config, example_inputs=example_inputs)
run_fn(user_model)
user_model = convert(user_model)
user_model.save(args.output_dir)


Expand All @@ -394,7 +404,7 @@ def run_fn(model):
user_model.eval()
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
eval_args = LMEvalParser(
model="hf",
model="hf",
user_model=user_model,
tokenizer=tokenizer,
batch_size=args.batch_size,
Expand All @@ -417,7 +427,7 @@ def run_fn(model):

samples = args.iters * args.batch_size
eval_args = LMEvalParser(
model="hf",
model="hf",
user_model=user_model,
tokenizer=tokenizer,
batch_size=args.batch_size,
Expand All @@ -436,4 +446,4 @@ def run_fn(model):
print("Accuracy: %.5f" % acc)
print('Throughput: %.3f samples/sec' % (samples / (end - start)))
print('Latency: %.3f ms' % ((end - start) * 1000 / samples))
print('Batch size = %d' % args.batch_size)
print('Batch size = %d' % args.batch_size)
4 changes: 3 additions & 1 deletion neural_compressor/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
logger,
Logger,
TuningLogger,
log_quant_execution,
log_process,
set_random_seed,
set_resume_from,
set_workspace,
Expand All @@ -32,6 +32,8 @@
"level",
"logger",
"Logger",
"TuningLogger",
"log_process",
"set_workspace",
"set_random_seed",
"set_resume_from",
Expand Down
9 changes: 9 additions & 0 deletions neural_compressor/common/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,12 @@
from typing import Callable, Union

OP_NAME_OR_MODULE_TYPE = Union[str, Callable]

# mode name
from enum import Enum


class Mode(Enum):
PREPARE = "prepare"
CONVERT = "convert"
QUANTIZE = "quantize"
25 changes: 21 additions & 4 deletions neural_compressor/common/utils/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import logging
import os

from neural_compressor.common.utils import Mode

__all__ = [
"level",
"Logger", # TODO: not expose it
Expand Down Expand Up @@ -140,6 +142,17 @@ def warning(msg, *args, **kwargs):
logger = Logger


def _get_log_msg(mode):
log_msg = None
if mode == Mode.QUANTIZE:
log_msg = "Quantization"
elif mode == Mode.PREPARE: # pragma: no cover
log_msg = "Preparation"
elif mode == Mode.CONVERT: # pragma: no cover
log_msg = "Conversion"
return log_msg


class TuningLogger:
"""A unified logger for the tuning/quantization process.
Expand All @@ -155,12 +168,16 @@ def trial_start(cls, trial_index: int = None) -> None:
logger.info("%d-trail started.", trial_index)

@classmethod
def quantization_start(cls, stacklevel=2) -> None:
logger.info("Quantization started.", stacklevel=stacklevel)
def execution_start(cls, mode=Mode.QUANTIZE, stacklevel=2):
log_msg = _get_log_msg(mode)
assert log_msg is not None, "Please check `mode` in execution_start function of TuningLogger class."
logger.info("{} started.".format(log_msg), stacklevel=stacklevel)

@classmethod
def quantization_end(cls, stacklevel=2) -> None:
logger.info("Quantization end.", stacklevel=stacklevel)
def execution_end(cls, mode=Mode.QUANTIZE, stacklevel=2):
log_msg = _get_log_msg(mode)
assert log_msg is not None, "Please check `mode` in execution_end function of TuningLogger class."
logger.info("{} end.".format(log_msg), stacklevel=stacklevel)

@classmethod
def evaluation_start(cls) -> None:
Expand Down
27 changes: 17 additions & 10 deletions neural_compressor/common/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@
import cpuinfo
import psutil

from neural_compressor.common.utils import TuningLogger, logger
from neural_compressor.common.utils import Mode, TuningLogger, logger

__all__ = [
"set_workspace",
"set_random_seed",
"set_resume_from",
"set_tensorboard",
"dump_elapsed_time",
"log_quant_execution",
"log_process",
"singleton",
"LazyImport",
"CpuInfo",
Expand Down Expand Up @@ -206,14 +206,21 @@ def set_tensorboard(tensorboard: bool):
default_tuning_logger = TuningLogger()


def log_quant_execution(func):
def wrapper(*args, **kwargs):
default_tuning_logger.quantization_start(stacklevel=4)
def log_process(mode=Mode.QUANTIZE):
def log_process_wrapper(func):
def inner_wrapper(*args, **kwargs):
start_log = default_tuning_logger.execution_start
end_log = default_tuning_logger.execution_end

# Call the original function
result = func(*args, **kwargs)
start_log(mode=mode, stacklevel=4)

default_tuning_logger.quantization_end(stacklevel=4)
return result
# Call the original function
result = func(*args, **kwargs)

return wrapper
end_log(mode=mode, stacklevel=4)

return result

return inner_wrapper

return log_process_wrapper
4 changes: 2 additions & 2 deletions neural_compressor/onnxrt/quantization/autotune.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ def autotune(
if calibration_data_reader is not None:
calibration_data_reader.rewind()
tuning_logger.trial_start(trial_index=trial_index)
tuning_logger.quantization_start()
tuning_logger.execution_start()
logger.debug("quant config: {}".format(quant_config))
q_model = _quantize(model_input, quant_config=quant_config, calibration_data_reader=calibration_data_reader)
tuning_logger.quantization_end()
tuning_logger.execution_end()
tuning_logger.evaluation_start()
with tempfile.TemporaryDirectory(prefix="ort.quant.") as tmp_dir:
# evaluate API requires str input
Expand Down
Loading

0 comments on commit 1cb844b

Please sign in to comment.