m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

264 lines
10 KiB

6 months ago
  1. # -------------------------------------------------------------------------
  2. # Copyright (c) Microsoft Corporation. All rights reserved.
  3. # Licensed under the MIT License. See License.txt in the project root for
  4. # license information.
  5. # --------------------------------------------------------------------------
  6. import logging
  7. import os
  8. import sys
  9. from pathlib import Path
  10. from typing import Dict, List, Union
  11. import torch
  12. from t5_decoder import T5Decoder, T5DecoderHelper, T5DecoderInit
  13. from t5_encoder import T5Encoder, T5EncoderHelper
  14. from t5_encoder_decoder_init import T5EncoderDecoderInit, T5EncoderDecoderInitHelper
  15. from transformers import MT5ForConditionalGeneration, T5ForConditionalGeneration
  16. from onnxruntime import InferenceSession
  17. sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
  18. from float16 import float_to_float16_max_diff
  19. from onnx_model import OnnxModel
  20. from optimizer import optimize_model
  21. logger = logging.getLogger(__name__)
  22. PRETRAINED_T5_MODELS = ["t5-small", "t5-base", "t5-large", "t5-3b", "t5-11b"]
  23. PRETRAINED_MT5_MODELS = ["google/mt5-small", "google/mt5-base", "google/mt5-large", "google/mt5-xl", "google/mt5-xxl"]
  24. class T5Helper:
  25. @staticmethod
  26. def get_onnx_path(
  27. output_dir: str,
  28. model_name_or_path: str,
  29. suffix: str = "",
  30. new_folder: bool = False,
  31. ) -> str:
  32. """Build onnx path
  33. Args:
  34. output_dir (str): output directory
  35. model_name_or_path (str): pretrained model name, or path to the model checkpoint
  36. suffix (str, optional): suffix like "_encoder" or "_decoder_fp16" will be appended to file name. Defaults to None.
  37. new_folder (bool, optional): create a new directory for the model. Defaults to False.
  38. Returns:
  39. str: path of onnx model
  40. """
  41. model_name = model_name_or_path
  42. if os.path.isdir(model_name_or_path):
  43. model_name = Path(model_name_or_path).parts[-1]
  44. else:
  45. model_name.split("/")[-1]
  46. model_name += suffix
  47. directory = os.path.join(output_dir, model_name) if new_folder else output_dir
  48. return os.path.join(directory, model_name + ".onnx")
  49. @staticmethod
  50. def load_model(
  51. model_name_or_path: str,
  52. cache_dir: str,
  53. device: torch.device,
  54. merge_encoder_and_decoder_init: bool = True,
  55. model_type: str = "t5",
  56. ) -> Dict[str, torch.nn.Module]:
  57. """Load model given a pretrained name or path, then build models for ONNX conversion.
  58. Args:
  59. model_name_or_path (str): pretrained model name or path
  60. cache_dir (str): cache directory
  61. device (torch.device): device to run the model
  62. merge_encoder_and_decoder_init (bool, optional): Whether merge encoder and decoder initialization into one ONNX model. Defaults to True.
  63. is_mt5 (bool, optional): whether the model is MT5 instead of T5
  64. Returns:
  65. Dict[str, torch.nn.Module]: mapping from name to modules for ONNX conversion.
  66. """
  67. if model_type == "t5":
  68. model = T5ForConditionalGeneration.from_pretrained(model_name_or_path, cache_dir=cache_dir)
  69. elif model_type == "mt5":
  70. model = MT5ForConditionalGeneration.from_pretrained(model_name_or_path, cache_dir=cache_dir)
  71. else:
  72. raise ValueError("only support mode_type=t5 or mt5")
  73. decoder = T5Decoder(model.decoder, model.lm_head, model.config)
  74. decoder.eval().to(device)
  75. if merge_encoder_and_decoder_init:
  76. encoder_decoder_init = T5EncoderDecoderInit(
  77. model.encoder,
  78. model.decoder,
  79. model.lm_head,
  80. model.config,
  81. decoder_start_token_id=None,
  82. )
  83. return {"encoder_decoder_init": encoder_decoder_init, "decoder": decoder}
  84. else:
  85. encoder = T5Encoder(model.encoder, model.config)
  86. encoder.eval().to(device)
  87. decoder_init = T5DecoderInit(model.decoder, model.lm_head, model.config)
  88. decoder_init.eval().to(device)
  89. return {
  90. "encoder": encoder,
  91. "decoder": decoder,
  92. "decoder_init": decoder_init,
  93. }
  94. @staticmethod
  95. def export_onnx(
  96. model: Union[T5Encoder, T5Decoder, T5DecoderInit, T5EncoderDecoderInit],
  97. device: torch.device,
  98. onnx_model_path: str,
  99. verbose: bool = True,
  100. use_external_data_format: bool = False,
  101. use_decoder_input_ids: bool = True,
  102. use_int32_inputs: bool = False,
  103. ):
  104. if isinstance(model, T5Encoder):
  105. T5EncoderHelper.export_onnx(
  106. model,
  107. device,
  108. onnx_model_path,
  109. verbose,
  110. use_external_data_format,
  111. use_int32_inputs,
  112. )
  113. elif isinstance(model, T5EncoderDecoderInit):
  114. T5EncoderDecoderInitHelper.export_onnx(
  115. model,
  116. device,
  117. onnx_model_path,
  118. use_decoder_input_ids,
  119. verbose,
  120. use_external_data_format,
  121. use_int32_inputs,
  122. )
  123. else:
  124. T5DecoderHelper.export_onnx(
  125. model,
  126. device,
  127. onnx_model_path,
  128. verbose,
  129. use_external_data_format,
  130. use_int32_inputs,
  131. )
  132. @staticmethod
  133. def auto_mixed_precision(
  134. onnx_model: OnnxModel,
  135. op_block_list: List[str] = [
  136. "Pow",
  137. "ReduceMean",
  138. "Add",
  139. "Sqrt",
  140. "Div",
  141. "Mul",
  142. "Softmax",
  143. "Relu",
  144. ],
  145. ):
  146. """Convert model to mixed precision.
  147. It detects whether original model has fp16 precision weights, and set parameters for float16 conversion automatically.
  148. Args:
  149. onnx_model (OnnxModel): optimized ONNX model
  150. op_block_list (List[str], optional): . Defaults to ["Pow", "ReduceMean", "Add", "Sqrt", "Div", "Mul", "Softmax", "Relu"]
  151. Returns:
  152. parameters(dict): a dictionary of parameters used in float16 conversion
  153. """
  154. op_full_set = set([node.op_type for node in onnx_model.nodes()])
  155. fp32_op_set = set(op_block_list)
  156. fp16_op_set = op_full_set.difference(fp32_op_set)
  157. logger.info(f"fp32 op: {fp32_op_set} fp16 op: {fp16_op_set}")
  158. # logits is the first output
  159. logits_output_name = onnx_model.graph().output[0].name
  160. # We use the weight in last MatMul node to detect whether the model is stored with float16 weights from training.
  161. is_weight_fp16_precision = False
  162. output_name_to_node = onnx_model.output_name_to_node()
  163. assert logits_output_name in output_name_to_node
  164. node = output_name_to_node[logits_output_name]
  165. last_matmul_node = None
  166. if node.op_type == "MatMul":
  167. last_matmul_node = node
  168. logger.info(f"Found last MatMul node for logits: {node.name}")
  169. initializer = None
  170. for input in node.input:
  171. initializer = onnx_model.get_initializer(input)
  172. if initializer is not None:
  173. break
  174. # when the max difference of value after converting float to float16 is lower than a threshold (1e-6),
  175. # we can deduce that the weights are stored in float16 precision.
  176. max_diff = float_to_float16_max_diff(initializer)
  177. logger.debug(f"max diff of converting weights in last MatMul node {node.name}: {max_diff}")
  178. is_weight_fp16_precision = max_diff < 1e-6
  179. else:
  180. logger.warning(f"Failed to find MatMul node for logits. Found {node.op_type} of node {node.name}")
  181. keep_io_types = []
  182. node_block_list = []
  183. if (not is_weight_fp16_precision) and (last_matmul_node is not None):
  184. # When original weight is float32 precision, keep logits and last MatMul in float32 could get better precision.
  185. keep_io_types = [logits_output_name]
  186. node_block_list = [last_matmul_node.name]
  187. parameters = {
  188. "keep_io_types": keep_io_types,
  189. "op_block_list": op_block_list,
  190. "node_block_list": node_block_list,
  191. "force_fp16_initializers": is_weight_fp16_precision,
  192. }
  193. logger.info(f"auto_mixed_precision parameters: {parameters}")
  194. onnx_model.convert_float_to_float16(use_symbolic_shape_infer=True, **parameters)
  195. return parameters
  196. @staticmethod
  197. def optimize_onnx(
  198. onnx_model_path: str,
  199. optimized_model_path: str,
  200. is_float16: bool,
  201. num_attention_heads: int,
  202. hidden_size: int,
  203. use_external_data_format: bool = False,
  204. auto_mixed_precision: bool = True,
  205. ):
  206. """Optimize ONNX model with an option to convert it to use mixed precision."""
  207. m = optimize_model(
  208. onnx_model_path,
  209. model_type="bert", # TODO: support optimization for t5
  210. num_heads=num_attention_heads,
  211. hidden_size=hidden_size,
  212. opt_level=0,
  213. optimization_options=None,
  214. use_gpu=False,
  215. )
  216. if is_float16:
  217. if auto_mixed_precision:
  218. T5Helper.auto_mixed_precision(m)
  219. else:
  220. m.convert_model_float32_to_float16(cast_input_output=False)
  221. m.save_model_to_file(optimized_model_path, use_external_data_format, all_tensors_to_one_file=True)
  222. @staticmethod
  223. def verify_onnx(
  224. model: Union[T5Encoder, T5Decoder, T5DecoderInit, T5EncoderDecoderInit],
  225. ort_session: InferenceSession,
  226. device: torch.device,
  227. use_int32_inputs: bool,
  228. ):
  229. """Compare the result from PyTorch and OnnxRuntime to verify the ONNX model is good."""
  230. if isinstance(model, T5Encoder):
  231. return T5EncoderHelper.verify_onnx(model, ort_session, device, use_int32_inputs)
  232. if isinstance(model, T5EncoderDecoderInit):
  233. return T5EncoderDecoderInitHelper.verify_onnx(model, ort_session, device, use_int32_inputs)
  234. return T5DecoderHelper.verify_onnx(model, ort_session, device, use_int32_inputs)