-
Notifications
You must be signed in to change notification settings - Fork 259
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add tf resnet quantization notebook (#1195)
Signed-off-by: Spycsh <[email protected]>
- Loading branch information
Showing
4 changed files
with
385 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Notebook of using Intel Neural Compressor to do int8 quantization on ResNet | ||
|
||
You can follow the steps in `resnet_quantization.ipynb` to see how to quantize ResNet step by step. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import tensorflow as tf | ||
import numpy as np | ||
from tqdm import tqdm | ||
import datasets | ||
from datasets import load_dataset | ||
import argparse | ||
import os | ||
|
||
parser = argparse.ArgumentParser(__doc__) | ||
parser.add_argument("--input_model", type=str, required=True) | ||
args = parser.parse_args() | ||
|
||
# load dataset in streaming way will get an IterableDatset | ||
calib_dataset = load_dataset('imagenet-1k', split='train', streaming=True, use_auth_token=True) | ||
eval_dataset = load_dataset('imagenet-1k', split='validation', streaming=True, use_auth_token=True) | ||
|
||
MAX_SAMPLE_LENGTG=1000 | ||
def sample_data(dataset, max_sample_length): | ||
data = {"image": [], "label": []} | ||
for i, record in enumerate(dataset): | ||
if i >= MAX_SAMPLE_LENGTG: | ||
break | ||
data["image"].append(record['image']) | ||
data["label"].append(record['label']) | ||
return datasets.Dataset.from_dict(data) | ||
|
||
sub_eval_dataset = sample_data(eval_dataset, MAX_SAMPLE_LENGTG) | ||
|
||
from neural_compressor.data.transforms.imagenet_transform import TensorflowResizeCropImagenetTransform | ||
height = width = 224 | ||
transform = TensorflowResizeCropImagenetTransform(height, width) | ||
|
||
|
||
class CustomDataloader: | ||
def __init__(self, dataset, batch_size=1): | ||
'''dataset is a iterable dataset and will be loaded record by record at runtime.''' | ||
self.dataset = dataset | ||
self.batch_size = batch_size | ||
import math | ||
self.length = math.ceil(len(self.dataset) / self.batch_size) | ||
|
||
def __iter__(self): | ||
batch_inputs = [] | ||
labels = [] | ||
for idx, record in enumerate(self.dataset): | ||
# record e.g.: {'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=408x500 ...>, 'label': 91} | ||
img = record['image'] | ||
label = record['label'] | ||
# skip the wrong shapes | ||
if len(np.array(img).shape) != 3 or np.array(img).shape[-1] != 3: | ||
continue | ||
img_resized = transform((img, label)) # (img, label) | ||
batch_inputs.append(np.array(img_resized[0])) | ||
labels.append(label) | ||
if (idx+1) % self.batch_size == 0: | ||
yield np.array(batch_inputs), np.array(labels) # (bs, 224, 224, 3), (bs,) | ||
batch_inputs = [] | ||
labels = [] | ||
def __len__(self): | ||
return self.length | ||
|
||
from neural_compressor import quantization | ||
from neural_compressor.config import PostTrainingQuantConfig | ||
from neural_compressor.utils.create_obj_from_config import create_dataloader | ||
|
||
eval_dataloader = CustomDataloader(dataset=sub_eval_dataset, batch_size=1) | ||
|
||
|
||
from neural_compressor.benchmark import fit | ||
from neural_compressor.config import BenchmarkConfig | ||
|
||
|
||
conf = BenchmarkConfig(iteration=100, | ||
cores_per_instance=4, | ||
num_of_instance=1) | ||
bench_dataloader = CustomDataloader(dataset=sub_eval_dataset, batch_size=1) | ||
|
||
|
||
fit(args.input_model, conf, b_dataloader=bench_dataloader) |
301 changes: 301 additions & 0 deletions
301
examples/notebook/tensorflow/resnet/resnet_quantization.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,301 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Intel® Neural Compressor Sample for Tensorflow" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Introduction\n", | ||
"\n", | ||
"This is a demo to show how to use Intel® Neural Compressor to do quantization on ResNet." | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Prepare Environment" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!conda install python==3.10 -y\n", | ||
"!pip install neural-compressor\n", | ||
"!wget -nc https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/resnet50_fp32_pretrained_model.pb\n", | ||
"!pip install tensorflow\n", | ||
"!pip install datasets\n", | ||
"!pip install git+https://github.com/huggingface/huggingface_hub" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import tensorflow as tf\n", | ||
"import numpy as np\n", | ||
"import datasets" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Create Dataloader" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# login to huggingface to download the imagenet-1k dataset\n", | ||
"# you should replace this read-only token with your own by create one on (https://huggingface.co/settings/tokens)\n", | ||
"# !huggingface-cli login --token <YOUR HUGGINGFACE TOKEN>\n", | ||
"!huggingface-cli login --token hf_xxxxxxxxxxxxxxxxxxxxxx" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from datasets import load_dataset\n", | ||
"# load dataset in streaming way will get an IterableDatset\n", | ||
"calib_dataset = load_dataset('imagenet-1k', split='train', streaming=True, use_auth_token=True)\n", | ||
"eval_dataset = load_dataset('imagenet-1k', split='validation', streaming=True, use_auth_token=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# We can select only a subset of the dataset for demo, here just select 1k samples\n", | ||
"MAX_SAMPLE_LENGTG=1000\n", | ||
"def sample_data(dataset, max_sample_length):\n", | ||
" data = {\"image\": [], \"label\": []}\n", | ||
" for i, record in enumerate(dataset):\n", | ||
" if i >= MAX_SAMPLE_LENGTG:\n", | ||
" break\n", | ||
" data[\"image\"].append(record['image'])\n", | ||
" data[\"label\"].append(record['label'])\n", | ||
" return datasets.Dataset.from_dict(data)\n", | ||
"\n", | ||
"sub_calib_dataset = sample_data(calib_dataset, MAX_SAMPLE_LENGTG)\n", | ||
"sub_eval_dataset = sample_data(eval_dataset, MAX_SAMPLE_LENGTG)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from neural_compressor.data.transforms.imagenet_transform import TensorflowResizeCropImagenetTransform\n", | ||
"height = width = 224\n", | ||
"transform = TensorflowResizeCropImagenetTransform(height, width)\n", | ||
"\n", | ||
"class CustomDataloader:\n", | ||
" def __init__(self, dataset, batch_size=1):\n", | ||
" '''dataset is a iterable dataset and will be loaded record by record at runtime.'''\n", | ||
" self.dataset = dataset\n", | ||
" self.batch_size = batch_size\n", | ||
" import math\n", | ||
" self.length = math.ceil(len(self.dataset) / self.batch_size)\n", | ||
" \n", | ||
" def __iter__(self):\n", | ||
" batch_inputs = []\n", | ||
" labels = []\n", | ||
" for idx, record in enumerate(self.dataset):\n", | ||
" # record e.g.: {'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=408x500 ...>, 'label': 91}\n", | ||
" img = record['image']\n", | ||
" label = record['label']\n", | ||
" # skip the wrong shapes\n", | ||
" if len(np.array(img).shape) != 3 or np.array(img).shape[-1] != 3:\n", | ||
" continue\n", | ||
" img_resized = transform((img, label)) # (img, label)\n", | ||
" batch_inputs.append(np.array(img_resized[0]))\n", | ||
" labels.append(label)\n", | ||
" if (idx+1) % self.batch_size == 0:\n", | ||
" yield np.array(batch_inputs), np.array(labels) # (bs, 224, 224, 3), (bs,)\n", | ||
" batch_inputs = []\n", | ||
" labels = []\n", | ||
" def __len__(self):\n", | ||
" return self.length" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"calib_dataloader = CustomDataloader(dataset=sub_calib_dataset, batch_size=32)\n", | ||
"eval_dataloader = CustomDataloader(dataset=sub_eval_dataset, batch_size=32)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Quantization\n", | ||
"\n", | ||
"Then we are moving to the core quantization logics. `quantization.fit` is the main entry of converting our base model to the quantized model. We pass the prepared calibration and evaluation dataloder to `quantization.fit`. After converting, we obtain the quantized int8 model and save it locally. " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from tqdm import tqdm\n", | ||
"import time\n", | ||
"from neural_compressor import quantization\n", | ||
"from neural_compressor.config import PostTrainingQuantConfig\n", | ||
"\n", | ||
"conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100], excluded_precisions = ['bf16'])\n", | ||
"\n", | ||
"def eval_func(model):\n", | ||
" from neural_compressor.model import Model\n", | ||
" model = Model(model)\n", | ||
" ans = []\n", | ||
" total_cnt = 0\n", | ||
" total_hit = 0\n", | ||
" latency_list = []\n", | ||
" for idx, (batch_inputs, labels) in enumerate(tqdm(eval_dataloader)):\n", | ||
" feed_dict = dict(zip(model.input_tensor, [batch_inputs]))\n", | ||
" start = time.time()\n", | ||
" preds = model.sess.run(model.output_tensor, feed_dict)\n", | ||
" end = time.time()\n", | ||
" latency_list.append(end-start)\n", | ||
" ans = np.argmax(preds[0], axis=-1)\n", | ||
" labels += 1 # label shift\n", | ||
" total_cnt += len(labels)\n", | ||
" total_hit += np.sum(ans == labels)\n", | ||
" acc = total_hit / total_cnt\n", | ||
" latency = np.array(latency_list).mean() / eval_dataloader.batch_size\n", | ||
" return acc\n", | ||
"\n", | ||
"q_model = quantization.fit(\"./resnet50_fp32_pretrained_model.pb\", conf=conf, calib_dataloader=calib_dataloader, eval_func=eval_func)\n", | ||
"q_model.save(\"resnet50_int8.pb\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Benchmark\n", | ||
"\n", | ||
"Now we can see that we have two models under the current directory: the original fp32 model `resnet50_fp32_pretrained_model.pb` and the quantized int8 model `resnet50_int8.pb`, and then we are going to do performance comparisons between them.\n", | ||
"\n", | ||
"\n", | ||
"To avoid the conflicts of jupyter notebook kernel to our benchmark process. We create a `resnet_quantization.py` and run it directly to do the benchmarks." | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### FP32 benchmark" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!python resnet_benchmark.py --input_model resnet50_fp32_pretrained_model.pb 2>&1|tee fp32_benchmark.log" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### INT8 benchmark" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!python resnet_benchmark.py --input_model resnet50_int8.pb 2>&1|tee int8_benchmark.log" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Finally, you will get the performance in the logs like following:\n", | ||
"\n", | ||
"* fp32_benchmark.log\n", | ||
"\n", | ||
"```\n", | ||
"2023-08-28 22:46:39 [INFO] ********************************************\n", | ||
"2023-08-28 22:46:39 [INFO] |****Multiple Instance Benchmark Summary*****|\n", | ||
"2023-08-28 22:46:39 [INFO] +---------------------------------+----------+\n", | ||
"2023-08-28 22:46:39 [INFO] | Items | Result |\n", | ||
"2023-08-28 22:46:39 [INFO] +---------------------------------+----------+\n", | ||
"2023-08-28 22:46:39 [INFO] | Latency average [second/sample] | 0.027209 |\n", | ||
"2023-08-28 22:46:39 [INFO] | Throughput sum [samples/second] | 36.753 |\n", | ||
"2023-08-28 22:46:39 [INFO] +---------------------------------+----------+\n", | ||
"```\n", | ||
"\n", | ||
"* int8_benchmark.log\n", | ||
"\n", | ||
"```\n", | ||
"2023-08-28 22:48:35 [INFO] ********************************************\n", | ||
"2023-08-28 22:48:35 [INFO] |****Multiple Instance Benchmark Summary*****|\n", | ||
"2023-08-28 22:48:35 [INFO] +---------------------------------+----------+\n", | ||
"2023-08-28 22:48:35 [INFO] | Items | Result |\n", | ||
"2023-08-28 22:48:35 [INFO] +---------------------------------+----------+\n", | ||
"2023-08-28 22:48:35 [INFO] | Latency average [second/sample] | 0.006855 |\n", | ||
"2023-08-28 22:48:35 [INFO] | Throughput sum [samples/second] | 145.874 |\n", | ||
"2023-08-28 22:48:35 [INFO] +---------------------------------+----------+\n", | ||
"```\n", | ||
"\n", | ||
"As shown in the logs, the int8/fp32 performance gain is about 145.87/36.75 = 3.97x" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "spycsh-neuralchat", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.0" | ||
}, | ||
"orig_nbformat": 4 | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |