图片解析应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

86 lines
3.0 KiB

  1. import onnx
  2. from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, QuantizedValueType, attribute_to_kwarg, ms_domain
  3. from .base_operator import QuantOperatorBase
  4. from .qdq_base_operator import QDQOperatorBase
  5. class QLinearSoftmax(QuantOperatorBase):
  6. def quantize(self):
  7. node = self.node
  8. # set limitations for softmax output scale and zp, because the output of softmax is always 0-1
  9. if self.quantizer.activation_qType == onnx.onnx_pb.TensorProto.UINT8:
  10. out_scale = 1 / 256.0
  11. out_zero_point = 0
  12. else:
  13. out_scale = 1 / 256.0
  14. out_zero_point = -128
  15. # only try to quantize when given quantization parameters for it
  16. (
  17. data_found,
  18. output_scale_name,
  19. output_zp_name,
  20. _,
  21. _,
  22. ) = self.quantizer._get_quantization_params(node.output[0], out_scale, out_zero_point)
  23. # get quantized input tensor names, quantize input if needed
  24. (
  25. quantized_input_names,
  26. input_zero_point_names,
  27. input_scale_names,
  28. nodes,
  29. ) = self.quantizer.quantize_activation(node, [0])
  30. if not data_found or quantized_input_names is None:
  31. return super().quantize()
  32. # Create an entry for output quantized value.
  33. qlinear_output_name = node.output[0] + TENSOR_NAME_QUANT_SUFFIX
  34. quantized_output_value = QuantizedValue(
  35. node.output[0],
  36. qlinear_output_name,
  37. output_scale_name,
  38. output_zp_name,
  39. QuantizedValueType.Input,
  40. )
  41. self.quantizer.quantized_value_map[node.output[0]] = quantized_output_value
  42. # Create qlinear softmax node for given type
  43. kwargs = {}
  44. for attribute in node.attribute:
  45. kwargs.update(attribute_to_kwarg(attribute))
  46. kwargs["domain"] = ms_domain
  47. # make qlinearsoft has the real opset_version, its default SinceVersion would be 1
  48. kwargs["opset"] = self.quantizer.opset_version
  49. qlinear_node_name = node.name + "_quant" if node.name != "" else ""
  50. qnode = onnx.helper.make_node(
  51. "QLinear" + node.op_type,
  52. [
  53. quantized_input_names[0],
  54. input_scale_names[0],
  55. input_zero_point_names[0],
  56. output_scale_name,
  57. output_zp_name,
  58. ],
  59. [qlinear_output_name],
  60. qlinear_node_name,
  61. **kwargs,
  62. )
  63. # add all newly created nodes
  64. nodes.append(qnode)
  65. self.quantizer.new_nodes += nodes
  66. return None
  67. class QDQSoftmax(QDQOperatorBase):
  68. def quantize(self):
  69. super().quantize()
  70. if self.quantizer.activation_qType == onnx.onnx_pb.TensorProto.UINT8:
  71. out_scale = 1 / 256.0
  72. out_zero_point = 0
  73. else:
  74. out_scale = 1 / 256.0
  75. out_zero_point = -128
  76. self.quantizer.set_quant_scale_zp(self.node.output[0], (out_scale, out_zero_point))