依赖安装注意点:版本要匹配(
https://github.com/openvinotoolkit/nncf/blob/develop/docs/Installation.md),否则会有很多坑
import nncf
import openvino as ov
from paddle.io import DataLoader
from dataset import SimpleDataSet, DecodeImage, DetResizeForTest, NormalizeImage, ToCHWImage, KeepKeys
def transform_fn(data_item):
# images, _ = data_item
return data_item[0]
def quantization(model_path, output_path):
model = ov.Core().read_model(model_path)
transforms = [ # 以一个文本检测任务为例,基于paddleocr训练的模型
DecodeImage(img_mode='BGR',
channel_first=False),
DetResizeForTest(image_shape=[640, 640]),
NormalizeImage(scale=1. / 255.,
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
order='hwc'),
ToCHWImage(),
KeepKeys(keep_keys=['image', 'shape'])
]
val_dataset = SimpleDataSet(
data_dir='D:/datasets/hukouben_crop_direction',
label_file_list='D:/datasets/hukouben_crop_direction/text_det/Label.txt',
do_shuffle=False,
mode='val',
ratio_list=[1],
delimiter='\t',
transforms=transforms,
seed=None
)
data_loader = DataLoader(val_dataset, batch_size=1)
calibration_dataset = nncf.Dataset(data_loader, transform_fn)
quantized_model = nncf.quantize(model, calibration_dataset)
ov.save_model(quantized_model, output_path)
if __name__ == '__main__':
model_path = 'hukouben_text_det_mv3_db++.onnx' # onnx模型路径
output_path = './output/ser_640_640_model_quant.xml' # openvino模型保存路径
quantization(model_path, output_path)
nncf.quantize函数还包含一些其他可调的参数,对于transformer类模型,需要调整一个model_type参数。个人实践中曾碰到过模型基本构建块为transformer结构,但将自注意力模块修改为了由CNN组成,其他不变,没有设置model_type参数为transformer,模型性能下降,推理速度略微增加,将model_type参数设置为transformer后,模型性能相差无几,推理速度降低。上述经历仅供参考。nncf还有一些其他的模型量化策略,以上代码只是一部分,在实际应用中验证可行。