CnOCRService/Lib/site-packages/onnxruntime/quantization/operators/direct_q8.py


								from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, QuantizedValueType

								from .base_operator import QuantOperatorBase

								from .qdq_base_operator import QDQOperatorBase


								# For operators that support 8bits operations directly, and output could

								# reuse input[0]'s type, zeropoint, scale; For example,Transpose, Reshape, etc.

								class Direct8BitOp(QuantOperatorBase):

								    def __init__(self, onnx_quantizer, onnx_node):

								        super().__init__(onnx_quantizer, onnx_node)


								    def quantize(self):

								        node = self.node


								        if not self.quantizer.force_quantize_no_input_check:

								            # Keep backward compatiblity

								            # Quantize when input[0] is quantized already. Otherwise keep it.

								            quantized_input_value = self.quantizer.find_quantized_value(node.input[0])

								            if quantized_input_value is None:

								                self.quantizer.new_nodes += [node]

								                return


								            quantized_output_value = QuantizedValue(

								                node.output[0],

								                node.output[0] + TENSOR_NAME_QUANT_SUFFIX,

								                quantized_input_value.scale_name,

								                quantized_input_value.zp_name,

								                quantized_input_value.value_type,

								            )

								            self.quantizer.quantized_value_map[node.output[0]] = quantized_output_value


								            node.input[0] = quantized_input_value.q_name

								            node.output[0] = quantized_output_value.q_name

								            self.quantizer.new_nodes += [node]


								        else:

								            # Force quantize those ops if possible, use exclude node list if this is not you want

								            if not self.quantizer.is_valid_quantize_weight(node.input[0]):

								                super().quantize()

								                return


								            (

								                quantized_input_names,

								                zero_point_names,

								                scale_names,

								                nodes,

								            ) = self.quantizer.quantize_activation(node, [0])

								            if quantized_input_names is None:

								                return super().quantize()


								            # Create an entry for output quantized value

								            quantized_output_value = QuantizedValue(

								                node.output[0],

								                node.output[0] + TENSOR_NAME_QUANT_SUFFIX,

								                scale_names[0],

								                zero_point_names[0],

								                QuantizedValueType.Input,

								            )

								            self.quantizer.quantized_value_map[node.output[0]] = quantized_output_value


								            node.input[0] = quantized_input_names[0]

								            node.output[0] = quantized_output_value.q_name

								            nodes.append(node)


								            self.quantizer.new_nodes += nodes


								class QDQDirect8BitOp(QDQOperatorBase):

								    def __init__(self, onnx_quantizer, onnx_node):

								        super().__init__(onnx_quantizer, onnx_node)


								    def quantize(self):

								        if self.quantizer.force_quantize_no_input_check:

								            self.quantizer.quantize_activation_tensor(self.node.input[0])

								            if not self.disable_qdq_for_node_output:

								                self.quantizer.quantize_activation_tensor(self.node.output[0], self.node.input[0])

								        elif self.quantizer.is_tensor_quantized(self.node.input[0]) and not self.disable_qdq_for_node_output:

								            self.quantizer.quantize_activation_tensor(self.node.output[0], self.node.input[0])