图片解析应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

90 lines
4.1 KiB

  1. import numpy as np
  2. import onnx
  3. from ..quant_utils import (
  4. TENSOR_NAME_QUANT_SUFFIX,
  5. QuantizedValue,
  6. QuantizedValueType,
  7. attribute_to_kwarg,
  8. quantize_nparray,
  9. )
  10. from .base_operator import QuantOperatorBase
  11. class QPad(QuantOperatorBase):
  12. def __init__(self, onnx_quantizer, onnx_node):
  13. super().__init__(onnx_quantizer, onnx_node)
  14. def quantize(self):
  15. node = self.node
  16. assert node.op_type == "Pad"
  17. # Only after version 11, it has the optional constant_value
  18. # If input[0] is not quantized, do not quanitize this node
  19. if (self.quantizer.opset_version < 11) or (node.input[0] not in self.quantizer.quantized_value_map):
  20. super().quantize()
  21. return
  22. quantized_input_value = self.quantizer.quantized_value_map[node.input[0]]
  23. kwargs = {}
  24. for attribute in node.attribute:
  25. kv = attribute_to_kwarg(attribute)
  26. kwargs.update(kv)
  27. if "mode" not in kwargs or kwargs["mode"] == b"constant":
  28. if len(node.input) > 2: # There is 3rd input 'constant_value'
  29. zp_tensor = self.quantizer.model.get_initializer(quantized_input_value.zp_name)
  30. scale_tensor = self.quantizer.model.get_initializer(quantized_input_value.scale_name)
  31. if zp_tensor is None or scale_tensor is None:
  32. super().quantize()
  33. return
  34. padding_constant_initializer = self.quantizer.model.get_initializer(node.input[2])
  35. if padding_constant_initializer is not None:
  36. zp_array = onnx.numpy_helper.to_array(zp_tensor)
  37. zp_value = zp_array.item() if zp_array.ndim == 0 else zp_array[0]
  38. scale_array = onnx.numpy_helper.to_array(scale_tensor)
  39. scale_value = scale_array.item() if scale_array.ndim == 0 else scale_array[0]
  40. padding_constant_array = onnx.numpy_helper.to_array(padding_constant_initializer)
  41. quantized_padding_constant_array = quantize_nparray(
  42. self.quantizer.activation_qType,
  43. padding_constant_array,
  44. scale_value,
  45. zp_value,
  46. )
  47. quantized_padding_constant_name = node.input[2] + TENSOR_NAME_QUANT_SUFFIX
  48. quantized_padding_constant_initializer = onnx.numpy_helper.from_array(
  49. quantized_padding_constant_array,
  50. quantized_padding_constant_name,
  51. )
  52. # Suppose this padding constant initializer only used by the node
  53. self.quantizer.model.remove_initializer(padding_constant_initializer)
  54. self.quantizer.model.add_initializer(quantized_padding_constant_initializer)
  55. node.input[2] = quantized_padding_constant_name
  56. else:
  57. # TODO: check quantize_inputs after sub graph is supported
  58. pad_value_qnodes = self.quantizer._get_quantize_input_nodes(
  59. node,
  60. 2,
  61. self.quantizer.activation_qType,
  62. quantized_input_value.scale_name,
  63. quantized_input_value.zp_name,
  64. )
  65. self.quantizer.new_nodes.extend(pad_value_qnodes)
  66. node.input[2] = pad_value_qnodes[0].output[0]
  67. else:
  68. node.input.extend([quantized_input_value.zp_name]) # pad zero_point for original zero
  69. # Create an entry for output quantized value
  70. quantized_output_value = QuantizedValue(
  71. node.output[0],
  72. node.output[0] + TENSOR_NAME_QUANT_SUFFIX,
  73. quantized_input_value.scale_name,
  74. quantized_input_value.zp_name,
  75. QuantizedValueType.Input,
  76. )
  77. self.quantizer.quantized_value_map[node.output[0]] = quantized_output_value
  78. node.input[0] = quantized_input_value.q_name
  79. node.output[0] = quantized_output_value.q_name
  80. self.quantizer.new_nodes += [node]