图片解析应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

253 lines
9.2 KiB

  1. import numpy as np
  2. import onnx
  3. from onnx import onnx_pb as onnx_proto
  4. from ..quant_utils import (
  5. TENSOR_NAME_QUANT_SUFFIX,
  6. BiasToQuantize,
  7. QuantizedValue,
  8. QuantizedValueType,
  9. attribute_to_kwarg,
  10. find_by_name,
  11. get_mul_node,
  12. )
  13. from .base_operator import QuantOperatorBase
  14. from .qdq_base_operator import QDQOperatorBase
  15. class ConvInteger(QuantOperatorBase):
  16. def __init__(self, onnx_quantizer, onnx_node):
  17. super().__init__(onnx_quantizer, onnx_node)
  18. def add_bias(self, nodes, scaled_output):
  19. """
  20. Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node
  21. parameter nodes: new nodes would be appended into nodes
  22. parameter node: current node (Conv)
  23. parameter scaled_output: output of quant conv without bias
  24. parameter output: output of Conv
  25. parameter bias_name: bias of Conv
  26. return: the name of output
  27. """
  28. node = self.node
  29. model = self.quantizer.model
  30. # Add tensors for the shape to be reshaped to
  31. weight = find_by_name(node.input[1], model.initializer())
  32. if weight is None:
  33. raise ValueError("Expected {} to be an initializer".format(node.input[1]))
  34. # Add reshape for correct broadcase
  35. output = node.output[0]
  36. reshape_input_data = node.input[2] # bias of Conv
  37. reshape_input_shape = output + "_bias_reshape_shape"
  38. reshape_output = output + "_bias_reshape_output"
  39. shape = np.ones((len(weight.dims)), dtype=np.int64)
  40. shape[1] = -1
  41. init_shape = onnx.helper.make_tensor(
  42. reshape_input_shape, onnx_proto.TensorProto.INT64, [len(weight.dims)], shape
  43. )
  44. model.add_initializer(init_shape)
  45. reshape_node = onnx.helper.make_node("Reshape", [reshape_input_data, reshape_input_shape], [reshape_output])
  46. nodes.append(reshape_node)
  47. # Add an Add operation for bias
  48. add_node = onnx.helper.make_node("Add", [scaled_output, reshape_output], [output], output + "_bias_add")
  49. nodes.append(add_node)
  50. def quantize(self):
  51. node = self.node
  52. assert node.op_type == "Conv"
  53. # Get Quantized from both activation(input[0]) and weight(input[1])
  54. (
  55. quantized_input_names,
  56. zero_point_names,
  57. scale_names,
  58. nodes,
  59. ) = self.quantizer.quantize_activation(node, [0])
  60. (
  61. quantized_input_names_weight,
  62. zero_point_names_weight,
  63. scale_names_weight,
  64. nodes_weight,
  65. ) = self.quantizer.quantize_weight(node, [1], reduce_range=self.quantizer.reduce_range)
  66. quantized_input_names.extend(quantized_input_names_weight)
  67. zero_point_names.extend(zero_point_names_weight)
  68. scale_names.extend(scale_names_weight)
  69. nodes.extend(nodes_weight)
  70. conv_integer_output = node.output[0] + "_output_quantized"
  71. conv_integer_name = node.name + "_quant" if node.name != "" else ""
  72. kwargs = {}
  73. for attribute in node.attribute:
  74. kwargs.update(attribute_to_kwarg(attribute))
  75. conv_integer_node = onnx.helper.make_node(
  76. "ConvInteger", quantized_input_names + zero_point_names, [conv_integer_output], conv_integer_name, **kwargs
  77. )
  78. nodes.append(conv_integer_node)
  79. # Add cast operation to cast convInteger output to float.
  80. cast_op_output = conv_integer_output + "_cast_output"
  81. cast_node = onnx.helper.make_node(
  82. "Cast",
  83. [conv_integer_output],
  84. [cast_op_output],
  85. conv_integer_output + "_cast",
  86. to=onnx_proto.TensorProto.FLOAT,
  87. )
  88. nodes.append(cast_node)
  89. # Add mul operation to multiply scales of two inputs.
  90. assert len(scale_names) == 2
  91. if conv_integer_name != "":
  92. scales_mul_op = conv_integer_name + "_scales_mul"
  93. else:
  94. scales_mul_op = scale_names[0] + "_" + scale_names[1] + "_mul"
  95. scales_mul_node = find_by_name(scales_mul_op, self.quantizer.new_nodes)
  96. if scales_mul_node is None:
  97. scales_mul_node = get_mul_node(scale_names, scales_mul_op + ":0", scales_mul_op)
  98. nodes.append(scales_mul_node)
  99. scales_mul_op_output = scales_mul_node.output[0]
  100. has_bias = len(node.input) == 3
  101. scaled_output_name = node.output[0] if not has_bias else node.output[0] + "quant_scaled_output"
  102. # Add mul operation to multiply mul_scales_op result with output of ConvInteger
  103. # and make the output of this node the same as output of original conv node.
  104. output_scale_mul_op = conv_integer_name + "_output_scale_mul" if conv_integer_name != "" else ""
  105. nodes.append(
  106. get_mul_node(
  107. [cast_op_output, scales_mul_op_output],
  108. scaled_output_name,
  109. output_scale_mul_op,
  110. )
  111. )
  112. if has_bias:
  113. self.add_bias(nodes, scaled_output_name)
  114. self.quantizer.new_nodes += nodes
  115. class QLinearConv(QuantOperatorBase):
  116. def __init__(self, onnx_quantizer, onnx_node):
  117. super().__init__(onnx_quantizer, onnx_node)
  118. def quantize(self):
  119. node = self.node
  120. assert node.op_type == "Conv"
  121. (
  122. data_found,
  123. output_scale_name,
  124. output_zp_name,
  125. _,
  126. _,
  127. ) = self.quantizer._get_quantization_params(node.output[0])
  128. if self.quantizer.is_input_a_initializer(node.input[1]) and self.quantizer.is_per_channel():
  129. (
  130. quantized_input_names,
  131. zero_point_names,
  132. scale_names,
  133. nodes,
  134. ) = self.quantizer.quantize_activation(node, [0])
  135. quant_weight_tuple = self.quantizer.quantize_weight_per_channel(
  136. node.input[1], onnx_proto.TensorProto.INT8, 0
  137. )
  138. quantized_input_names.append(quant_weight_tuple[0])
  139. zero_point_names.append(quant_weight_tuple[1])
  140. scale_names.append(quant_weight_tuple[2])
  141. else:
  142. (
  143. quantized_input_names,
  144. zero_point_names,
  145. scale_names,
  146. nodes,
  147. ) = self.quantizer.quantize_activation(node, [0])
  148. (
  149. quantized_input_names_weight,
  150. zero_point_names_weight,
  151. scale_names_weight,
  152. nodes_weight,
  153. ) = self.quantizer.quantize_weight(node, [1], reduce_range=self.quantizer.reduce_range)
  154. quantized_input_names.extend(quantized_input_names_weight)
  155. zero_point_names.extend(zero_point_names_weight)
  156. scale_names.extend(scale_names_weight)
  157. nodes.extend(nodes_weight)
  158. if not data_found or quantized_input_names is None:
  159. return super().quantize()
  160. quantized_bias_name = ""
  161. bias_present = False
  162. if len(node.input) == 3:
  163. quantized_bias_name = self.quantizer.quantize_bias_static(node.input[2], node.input[0], node.input[1])
  164. bias_present = True
  165. qlinear_conv_output = node.output[0] + TENSOR_NAME_QUANT_SUFFIX
  166. qlinear_conv_name = qlinear_conv_name = node.name + "_quant" if node.name != "" else ""
  167. kwargs = {}
  168. for attribute in node.attribute:
  169. kwargs.update(attribute_to_kwarg(attribute))
  170. qlinear_conv_inputs = []
  171. # Input 0
  172. qlinear_conv_inputs.append(quantized_input_names[0])
  173. qlinear_conv_inputs.append(scale_names[0])
  174. qlinear_conv_inputs.append(zero_point_names[0])
  175. # Input 1
  176. qlinear_conv_inputs.append(quantized_input_names[1])
  177. qlinear_conv_inputs.append(scale_names[1])
  178. qlinear_conv_inputs.append(zero_point_names[1])
  179. # Output
  180. qlinear_conv_inputs.append(output_scale_name)
  181. qlinear_conv_inputs.append(output_zp_name)
  182. if bias_present:
  183. qlinear_conv_inputs.append(quantized_bias_name)
  184. qlinear_conv_node = onnx.helper.make_node(
  185. "QLinearConv", qlinear_conv_inputs, [qlinear_conv_output], qlinear_conv_name, **kwargs
  186. )
  187. nodes.append(qlinear_conv_node)
  188. # Create an entry for this quantized value
  189. q_output = QuantizedValue(
  190. node.output[0],
  191. qlinear_conv_output,
  192. output_scale_name,
  193. output_zp_name,
  194. QuantizedValueType.Input,
  195. )
  196. self.quantizer.quantized_value_map[node.output[0]] = q_output
  197. self.quantizer.new_nodes += nodes
  198. class QDQConv(QDQOperatorBase):
  199. def __init__(self, onnx_quantizer, onnx_node):
  200. super().__init__(onnx_quantizer, onnx_node)
  201. def quantize(self):
  202. node = self.node
  203. assert node.op_type == "Conv"
  204. self.quantizer.quantize_activation_tensor(node.input[0])
  205. if not self.disable_qdq_for_node_output:
  206. self.quantizer.quantize_activation_tensor(node.output[0])
  207. if self.quantizer.is_per_channel():
  208. self.quantizer.quantize_weight_tensor_per_channel(node.input[1], 0)
  209. else:
  210. self.quantizer.quantize_weight_tensor(node.input[1])
  211. if len(node.input) == 3:
  212. self.quantizer.quantize_bias_tensor(node.input[2], node.input[0], node.input[1])