MTtranslateService/Lib/site-packages/onnxruntime/transformers/fusion_group_norm.py


								# -------------------------------------------------------------------------

								# Copyright (c) Microsoft Corporation.  All rights reserved.

								# Licensed under the MIT License.

								# --------------------------------------------------------------------------

								from logging import getLogger

								from typing import Dict


								import numpy as np

								from fusion_base import Fusion

								from fusion_utils import FusionUtils

								from onnx import TensorProto, helper

								from onnx_model import OnnxModel


								logger = getLogger(__name__)


								class FusionGroupNorm(Fusion):

								    def __init__(self, model: OnnxModel):

								        super().__init__(model, "GroupNorm", "Add")


								    def fuse(self, add_node, input_name_to_nodes: Dict, output_name_to_node: Dict):

								        """

								         Fuse Group Normalization subgraph into one node GroupNorm.

								         The following is the pattern with swish activation:

								               +----------------Shape-------------------------------+

								               |                                                    |

								               |    (0, 32, -1)                                     v     (512x1x1) (512x1x1) (optional)

								           [Root] --> Reshape -------> InstanceNormalization --> Reshape ---> Mul --> Add --> Mul--> [output]

								        Bx512xHxW                 (scale=ones(32), B=zeros(32))                        |       ^     Bx512xHxW

								                                                                                       |       |

								                                                                                       +--->Sigmoid (optional)

								        The Mul and Sigmoid before output is for Swish activation. They are optional.

								        """

								        nodes = self.model.match_parent_path(

								            add_node, ["Mul", "Reshape", "InstanceNormalization", "Reshape"], [0, 0, 0, 0], output_name_to_node

								        )

								        if nodes is None:

								            return


								        weight_mul, reshape_4d, instance_norm, reshape_3d = nodes

								        root = reshape_3d.input[0]


								        parents = self.model.match_parent_path(reshape_4d, ["Shape"], [1], output_name_to_node)

								        if parents is None:

								            return

								        if parents[0].input[0] != root:

								            return

								        shape_node = parents[0]


								        # Check whether it has swish activation.

								        swish_mul = self.model.find_first_child_by_type(add_node, "Mul")

								        swish_sigmoid = None

								        if swish_mul is not None:

								            sigmoid_path = self.model.match_parent_path(swish_mul, ["Sigmoid"], [None], output_name_to_node)

								            if sigmoid_path is not None:

								                swish_sigmoid = sigmoid_path[0]


								        weight_input = weight_mul.input[1 - self.model.input_index(reshape_4d.output[0], weight_mul)]

								        if not self.model.is_constant_with_specified_dimension(weight_input, 3, "group norm weight"):

								            return


								        bias_input = add_node.input[1 - self.model.input_index(weight_mul.output[0], add_node)]

								        if not self.model.is_constant_with_specified_dimension(bias_input, 3, "layernorm bias"):

								            return


								        weight = self.model.get_constant_value(weight_input)

								        if weight is None:

								            return


								        if not (len(weight.shape) == 3 and weight.shape[1] == 1 and weight.shape[2] == 1):

								            return


								        bias = self.model.get_constant_value(bias_input)

								        if bias is None:

								            return

								        if not (len(bias.shape) == 3 and bias.shape[1] == 1 and bias.shape[2] == 1):

								            return


								        weight_elements = int(np.prod(weight.shape))

								        bias_elements = int(np.prod(bias.shape))

								        if weight_elements != bias_elements:

								            return


								        instance_norm_scale = self.model.get_constant_value(instance_norm.input[1])

								        if instance_norm_scale is None:

								            return

								        instance_norm_bias = self.model.get_constant_value(instance_norm.input[2])

								        if instance_norm_bias is None:

								            return


								        if not (

								            len(instance_norm_scale.shape) == 1

								            and len(instance_norm_bias.shape) == 1

								            and instance_norm_scale.shape == instance_norm_bias.shape

								            and instance_norm_scale.shape[0] == 32

								        ):

								            logger.info("InstanceNormalization groups=%d", instance_norm_scale.shape[0])

								            return


								        if not np.allclose(np.ones_like(instance_norm_scale), instance_norm_scale):

								            return

								        if not np.allclose(np.zeros_like(instance_norm_bias), instance_norm_bias):

								            return


								        group_norm_name = self.model.create_node_name("GroupNorm", name_prefix="GroupNorm")


								        if weight_elements not in [320, 640, 960, 1280, 1920, 2560] + [128, 256, 512]:

								            logger.info("GroupNorm channels=%d", weight_elements)


								        gamma = helper.make_tensor(

								            name=group_norm_name + "_gamma",

								            data_type=TensorProto.FLOAT,

								            dims=[weight_elements],

								            vals=weight.flatten().tolist(),

								        )

								        self.model.add_initializer(gamma, self.this_graph_name)


								        beta = helper.make_tensor(

								            name=group_norm_name + "_beta",

								            data_type=TensorProto.FLOAT,

								            dims=[bias_elements],

								            vals=bias.flatten().tolist(),

								        )

								        self.model.add_initializer(beta, self.this_graph_name)


								        last_node = add_node

								        subgraph_nodes = [add_node, weight_mul, reshape_4d, instance_norm, reshape_3d, shape_node]

								        has_swish_activation = swish_mul and swish_sigmoid

								        if swish_mul and swish_sigmoid:

								            subgraph_nodes.extend([swish_mul, swish_sigmoid])

								            last_node = swish_mul


								        if not self.model.is_safe_to_fuse_nodes(

								            subgraph_nodes,

								            last_node.output,

								            input_name_to_nodes,

								            output_name_to_node,

								        ):

								            self.nodes_to_remove.extend([last_node])

								        else:

								            self.nodes_to_remove.extend(subgraph_nodes)


								        # instance_norm_scale might from Constant node. Use prune graph to clear it.

								        self.prune_graph = True


								        # Right now GroupNorm only support float16 input. Need add a Cast in fp32 model.

								        utils = FusionUtils(self.model)


								        input = root

								        output = last_node.output[0]

								        if weight.dtype == np.float32:

								            # Add a Cast node to get float16 input for GroupNorm

								            cast_input, _cast_node = utils.cast_input(root, "float16")

								            input = cast_input


								            # Add a Cast node to convert back to float32 after GroupNorm

								            output = group_norm_name + "_out"

								            cast_node = helper.make_node("Cast", inputs=[group_norm_name + "_out"], outputs=[last_node.output[0]])

								            cast_node.attribute.extend([helper.make_attribute("to", int(TensorProto.FLOAT))])

								            self.model.add_node(cast_node)


								        # NCHW to NHWC

								        transpose_input = helper.make_node(

								            "Transpose",

								            [input],

								            [input + "_NHWC"],

								            name=self.model.create_node_name("Transpose", name_prefix="Transpose_NCHW_to_NHWC"),

								            perm=[0, 2, 3, 1],

								        )


								        new_node = helper.make_node(

								            "GroupNorm",

								            inputs=[input + "_NHWC", group_norm_name + "_gamma", group_norm_name + "_beta"],

								            outputs=[output + "_NHWC"],

								            name=group_norm_name,

								        )


								        new_node.attribute.extend(instance_norm.attribute)

								        new_node.attribute.extend([helper.make_attribute("groups", 32)])

								        new_node.attribute.extend([helper.make_attribute("activation", 1 if has_swish_activation else 0)])

								        new_node.domain = "com.microsoft"


								        # NHWC to NCHW

								        transpose_output = helper.make_node(

								            "Transpose",

								            [output + "_NHWC"],

								            [output],

								            name=self.model.create_node_name("Transpose", name_prefix="Transpose_NHWC_to_NCHW"),

								            perm=[0, 3, 1, 2],

								        )


								        self.nodes_to_add.append(new_node)

								        self.nodes_to_add.append(transpose_input)

								        self.nodes_to_add.append(transpose_output)


								        self.node_name_to_graph_name[new_node.name] = self.this_graph_name

								        self.node_name_to_graph_name[transpose_input.name] = self.this_graph_name

								        self.node_name_to_graph_name[transpose_output.name] = self.this_graph_name