Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
qlinear_matmul_inputs.append(scale_names[0])
qlinear_matmul_inputs.append(zero_point_names[0])
# Input 1
qlinear_matmul_inputs.append(quantized_input_names[1])
qlinear_matmul_inputs.append(scale_names[1])
qlinear_matmul_inputs.append(zero_point_names[1])
# Output
qlinear_matmul_inputs.append(output_scale_name)
qlinear_matmul_inputs.append(output_zp_name)
qlinear_matmul_node = onnx.helper.make_node("QLinearMatMul", qlinear_matmul_inputs,
[qlinear_matmul_output], qlinear_matmul_name)
nodes.append(qlinear_matmul_node)
# Create an entry for this quantized value
q_output = QuantizedValue(node.output[0], qlinear_matmul_output, output_scale_name, output_zp_name, QuantizedValueType.Input)
self.quantized_value_map[node.output[0]] = q_output
return nodes
quantized_per_channel_data_list.append(quantized_per_channel_data)
channel_index = 0 # (M x C/group x kH x kW)
# combine per_channel_data into one
reshape_dims = list(initializer.dims) # deep copy
reshape_dims[channel_index] = 1 # only one per channel for reshape
quantized_weights = np.asarray(quantized_per_channel_data_list[0]).reshape(reshape_dims)
for i in range(1, len(quantized_per_channel_data_list)):
channel_weights = np.asarray(quantized_per_channel_data_list[i]).reshape(reshape_dims)
quantized_weights = np.concatenate((quantized_weights, channel_weights), axis=0)
weight = QuantizedInitializer(initializer.name, initializer, rmin_list, rmax_list, zero_point_list,
scale_list, weights, quantized_weights.flatten().tolist(), channel_index, qType)
# Make entry for this quantized weight
assert(weight.name not in self.quantized_value_map)
quantized_value = QuantizedValue(weight.name, weight.name + "_quantized", weight.name + "_scale", weight.name + "_zero_point", QuantizedValueType.Initializer, None, qType)
self.quantized_value_map[weight.name] = quantized_value
return weight
qlinear_conv_inputs.append(scale_names[1])
qlinear_conv_inputs.append(zero_point_names[1])
# Output
qlinear_conv_inputs.append(output_scale_name)
qlinear_conv_inputs.append(output_zp_name)
if bias_present:
qlinear_conv_inputs.append(quantized_bias_name)
qlinear_conv_node = onnx.helper.make_node("QLinearConv", qlinear_conv_inputs,
[qlinear_conv_output], qlinear_conv_name, **kwargs)
nodes.append(qlinear_conv_node)
# Create an entry for this quantized value
q_output = QuantizedValue(node.output[0], qlinear_conv_output, output_scale_name, output_zp_name, QuantizedValueType.Input)
self.quantized_value_map[node.output[0]] = q_output
return nodes
def _get_quantized_weight(self, initializer, qType):
'''
:param initializer: TensorProto initializer
:param qType: type to quantize to
:return: Weight class with quantization information
'''
weights_data = self.find_weight_data(initializer)
rmin, rmax, zero_point, scale, quantized_weights_data = quantize_data(weights_data.flatten().tolist(),
_get_qrange_for_qType(qType), qType)
weight = QuantizedInitializer(initializer.name, initializer, [rmin], [rmax], [zero_point], [scale],
weights_data, quantized_weights_data, axis=None, qType=qType)
# Log entry for this quantized weight
assert(weight.name not in self.quantized_value_map)
quantized_value = QuantizedValue(weight.name, weight.name + "_quantized", weight.name + "_scale", weight.name + "_zero_point", QuantizedValueType.Initializer, None, qType)
self.quantized_value_map[weight.name] = quantized_value
return weight
#update bias initializer
bias_np_data = np.asarray(quantized_data, dtype=np.int32).reshape(bias_initializer.dims)
packed_bias_initializer = onnx.numpy_helper.from_array(bias_np_data, quantized_bias_name)
self.model.graph.initializer.extend([packed_bias_initializer])
bias_value_info = onnx.helper.make_tensor_value_info(quantized_bias_name, onnx_proto.TensorProto.INT32, bias_initializer.dims)
self.model.graph.input.extend([bias_value_info])
# log entries for this quantized bias value
quantized_bias_entry = QuantizedInitializer(bias_name, bias_initializer, [0], [0], [0], [bias_scale],
bias_data, quantized_data, qType=onnx_proto.TensorProto.INT32)
self._quantized_weights.append(quantized_bias_entry)
assert(bias_name not in self.quantized_value_map)
quantized_value = QuantizedValue(bias_name, quantized_bias_name, "", "", QuantizedValueType.Initializer, None, onnx_proto.TensorProto.INT32)
self.quantized_value_map[bias_name] = quantized_value
return quantized_bias_name
def _quantize_gather_ops(self, node, new_nodes_list):
assert (node.op_type == "Gather")
(quantized_input_names, zero_point_names, scale_names, nodes) = \
self._quantize_inputs(node, [0], new_nodes_list)
gather_new_output = node.output[0] + "_quantized"
# Create an entry for this quantized value
q_output = QuantizedValue(node.output[0], gather_new_output, scale_names[0], zero_point_names[0], QuantizedValueType.Input)
self.quantized_value_map[node.output[0]] = q_output
gather_original_output = node.output[0]
node.output[0] = gather_new_output
node.input[0] = quantized_input_names[0]
nodes.append(node)
return nodes