【发布时间】:2021-02-12 18:19:09
【问题描述】:
我目前已经使用以下类脚本量化了一个张量流模型:
class QuantModel():
def __init__(self, model=tf.keras.Model,data=[]):
'''
1. Accepts a keras model, long term will allow saved model and other formats
2. Accepts a numpy or tensor data of the format such that indexing such as
data[0] will return one input in the correct format to be fed forward through the
network
'''
self.data=data
self.model=model
'''Added script to quantize model and allows custom ops
for Logmelspectrogram operations (Might cause mix quantization)'''
def quant_model_int8(self):
converter = tf.lite.TFLiteConverter.from_keras_model(self.model)
converter.representative_dataset=self.representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8 # or tf.uint8
converter.inference_output_type = tf.int8 # or tf.uint8
#converter.allow_custom_ops=True
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model_quant = converter.convert()
open("converted_model2.tflite",'wb').write(tflite_model_quant)
return tflite_model_quant
'''Returns a tflite model with no quantization i.e. weights and variable data all
in float32'''
def convert_tflite_no_quant(self):
converter = tf.lite.TFLiteConverter.from_keras_model(self.model)
tflite_model = converter.convert()
open("converted_model.tflite",'wb').write(tflite_model)
return tflite_model
def representative_data_gen(self):
# Model has only one input so each data point has one element.
yield [self.data]
我能够成功地量化我的模型,但是输入和输出是 int8,因为这些是您量化后的选项。
现在要运行模型,我使用 tf.quantization.quantize 将输入数据更改为 qint 数据格式并通过我的网络提供。所以正如预期的那样,我得到了一个 int8 的输出。
我想将输出转换回 float32 并检查它。为此,我正在使用 tf.dequantize。但是,这只适用于 tf.qint8 数据类型。
想知道如何处理这个问题,是否有人遇到过类似的问题?
# Load the TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="converted_model2.tflite")
interpreter.allocate_tensors()
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
data_arr= np.load('Data_Mel.npy')
print(data_arr.shape)
sample=data_arr[0]
print(sample.shape)
minn=min(sample.flatten())
maxx=max(sample.flatten())
print(minn,maxx)
(sample,sample_1,sample_2)=tf.quantization.quantize(data_arr[0],minn,maxx,tf.qint8)
print(sample.shape)
# Test the model on random input data.
input_shape = input_details[0]['shape']
input_data = sample
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
# The function `get_tensor()` returns a copy of the tensor data.
# Use `tensor()` in order to get a pointer to the tensor.
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data.dtype)
output_data=tf.quantization.dequantize(output_data,minn,maxx)
print(output_data)
【问题讨论】:
标签: tensorflow2.0 tensorflow-lite