m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

394 lines
18 KiB

6 months ago
  1. # -------------------------------------------------------------------------
  2. # Copyright (c) Microsoft Corporation. All rights reserved.
  3. # Licensed under the MIT License.
  4. # --------------------------------------------------------------------------
  5. # This file is modified from https://github.com/microsoft/onnxconverter-common/blob/master/onnxconverter_common/float16.py
  6. # Modifications: keep_io_types can be list of names; convert initializers if needed to preserve precision; add force_fp16_initializers option.
  7. import itertools
  8. import logging
  9. from typing import Dict, List
  10. import numpy as np
  11. import onnx
  12. from onnx import helper, numpy_helper
  13. from onnx import onnx_pb as onnx_proto
  14. from packaging import version
  15. logger = logging.getLogger(__name__)
  16. def _npfloat16_to_int(np_list):
  17. """
  18. Convert numpy float16 to python int.
  19. :param np_list: numpy float16 list
  20. :return int_list: python int list
  21. """
  22. return [int(bin(_.view("H"))[2:].zfill(16), 2) for _ in np_list]
  23. def convert_np_to_float16(np_array, min_positive_val=5.96e-08, max_finite_val=65504.0):
  24. """
  25. Convert float32 numpy array to float16 without changing sign or finiteness.
  26. Positive values less than min_positive_val are mapped to min_positive_val.
  27. Positive finite values greater than max_finite_val are mapped to max_finite_val.
  28. Similar for negative values. NaN, 0, inf, and -inf are unchanged.
  29. """
  30. def between(a, b, c):
  31. return np.logical_and(a < b, b < c)
  32. np_array = np.where(between(0, np_array, min_positive_val), min_positive_val, np_array)
  33. np_array = np.where(between(-min_positive_val, np_array, 0), -min_positive_val, np_array)
  34. np_array = np.where(between(max_finite_val, np_array, float("inf")), max_finite_val, np_array)
  35. np_array = np.where(between(float("-inf"), np_array, -max_finite_val), -max_finite_val, np_array)
  36. return np.float16(np_array)
  37. def convert_tensor_float_to_float16(tensor, min_positive_val=5.96e-08, max_finite_val=65504.0):
  38. """Convert tensor float to float16.
  39. Args:
  40. tensor (TensorProto): the tensor to convert.
  41. min_positive_val (float, optional): minimal positive value. Defaults to 1e-7.
  42. max_finite_val (float, optional): maximal finite value. Defaults to 1e4.
  43. Raises:
  44. ValueError: input type is not TensorProto.
  45. Returns:
  46. TensorProto: the converted tensor.
  47. """
  48. if not isinstance(tensor, onnx_proto.TensorProto):
  49. raise ValueError("Expected input type is an ONNX TensorProto but got %s" % type(tensor))
  50. if tensor.data_type == onnx_proto.TensorProto.FLOAT:
  51. tensor.data_type = onnx_proto.TensorProto.FLOAT16
  52. # convert float_data (float type) to float16 and write to int32_data
  53. if tensor.float_data:
  54. float16_data = convert_np_to_float16(np.array(tensor.float_data), min_positive_val, max_finite_val)
  55. int_list = _npfloat16_to_int(float16_data)
  56. tensor.int32_data[:] = int_list
  57. tensor.float_data[:] = []
  58. # convert raw_data (bytes type)
  59. if tensor.raw_data:
  60. # convert n.raw_data to float
  61. float32_list = np.frombuffer(tensor.raw_data, dtype="float32")
  62. # convert float to float16
  63. float16_list = convert_np_to_float16(float32_list, min_positive_val, max_finite_val)
  64. # convert float16 to bytes and write back to raw_data
  65. tensor.raw_data = float16_list.tobytes()
  66. return tensor
  67. def make_value_info_from_tensor(tensor):
  68. shape = numpy_helper.to_array(tensor).shape
  69. return helper.make_tensor_value_info(tensor.name, tensor.data_type, shape)
  70. DEFAULT_OP_BLOCK_LIST = [
  71. "ArrayFeatureExtractor",
  72. "Binarizer",
  73. "CastMap",
  74. "CategoryMapper",
  75. "DictVectorizer",
  76. "FeatureVectorizer",
  77. "Imputer",
  78. "LabelEncoder",
  79. "LinearClassifier",
  80. "LinearRegressor",
  81. "Normalizer",
  82. "OneHotEncoder",
  83. "SVMClassifier",
  84. "SVMRegressor",
  85. "Scaler",
  86. "TreeEnsembleClassifier",
  87. "TreeEnsembleRegressor",
  88. "ZipMap",
  89. "NonMaxSuppression",
  90. "TopK",
  91. "RoiAlign",
  92. "Resize",
  93. "Range",
  94. "CumSum",
  95. "Min",
  96. "Max",
  97. "Upsample",
  98. ]
  99. class InitializerTracker:
  100. """Class for keeping track of initializer."""
  101. def __init__(self, initializer: onnx_proto.TensorProto):
  102. self.initializer = initializer
  103. self.fp32_nodes = []
  104. self.fp16_nodes = []
  105. def add_node(self, node: onnx_proto.NodeProto, is_node_blocked):
  106. if is_node_blocked:
  107. self.fp32_nodes.append(node)
  108. else:
  109. self.fp16_nodes.append(node)
  110. def convert_float_to_float16(
  111. model,
  112. min_positive_val=5.96e-08,
  113. max_finite_val=65504.0,
  114. keep_io_types=False,
  115. disable_shape_infer=False,
  116. op_block_list=None,
  117. node_block_list=None,
  118. force_fp16_initializers=False,
  119. ):
  120. """Convert model tensor float type in the ONNX ModelProto input to tensor float16.
  121. Args:
  122. model (ModelProto): The ONNX model to convert.
  123. min_positive_val (float, optional): minimal positive value. Defaults to 5.96e-08.
  124. max_finite_val (float, optional): maximal finite value of float16. Defaults to 65504.
  125. keep_io_types (Union[bool, List[str]], optional): It could be boolean or a list of float32 input/output names.
  126. If True, model inputs/outputs should be left as float32. Defaults to False.
  127. disable_shape_infer (bool, optional): Skips running onnx shape/type inference. Useful if shape inference has been done. Defaults to False.
  128. op_block_list (List[str], optional): List of op types to leave as float32.
  129. Defaults to None, which will use `float16.DEFAULT_OP_BLOCK_LIST` as default.
  130. node_block_list (List[str], optional): List of node names to leave as float32. Defaults to None.
  131. force_fp16_initializers(bool): force converting all float initializers to float16.
  132. Default to false, which will convert only the one needed to avoid precision loss.
  133. Raises:
  134. ValueError: input type is not ModelProto.
  135. Returns:
  136. ModelProto: converted model.
  137. """
  138. assert (
  139. min_positive_val >= 5.96e-08
  140. ), "invalid min_positive_val. smallest positive float16 value: subnormal 5.96e-08, and normalized 6.104e-05"
  141. assert max_finite_val <= float(np.finfo(np.float16).max), "invalid max_finite_val. largest float16 value: 65504"
  142. func_infer_shape = None
  143. if not disable_shape_infer and version.parse(onnx.__version__) >= version.parse("1.2.0"):
  144. try:
  145. from onnx.shape_inference import infer_shapes
  146. func_infer_shape = infer_shapes
  147. finally:
  148. pass
  149. if not isinstance(model, onnx_proto.ModelProto):
  150. raise ValueError("Expected model type is an ONNX ModelProto but got %s" % type(model))
  151. # create blocklists
  152. if op_block_list is None:
  153. op_block_list = DEFAULT_OP_BLOCK_LIST
  154. if node_block_list is None:
  155. node_block_list = []
  156. op_block_list = set(op_block_list)
  157. node_block_list = set(node_block_list)
  158. logger.debug(
  159. f"fp16 parameters: min_positive_val={min_positive_val} max_finite_val={max_finite_val} keep_io_types={keep_io_types} disable_shape_infer={disable_shape_infer} op_block_list={op_block_list} node_block_list={node_block_list} force_fp16_initializers={force_fp16_initializers}"
  160. )
  161. # create a queue for BFS
  162. queue = []
  163. value_info_list = []
  164. node_list = []
  165. # type inference on input model
  166. if func_infer_shape is not None:
  167. model = func_infer_shape(model)
  168. queue.append(model)
  169. name_mapping = {}
  170. graph_io_to_skip = set()
  171. io_casts = set()
  172. fp32_inputs = [n.name for n in model.graph.input if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT]
  173. fp32_outputs = [n.name for n in model.graph.output if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT]
  174. if isinstance(keep_io_types, list):
  175. fp32_inputs = [n for n in fp32_inputs if n in keep_io_types]
  176. fp32_outputs = [n for n in fp32_outputs if n in keep_io_types]
  177. elif not keep_io_types:
  178. fp32_inputs = []
  179. fp32_outputs = []
  180. for i, n in enumerate(model.graph.input):
  181. if n.name in fp32_inputs:
  182. output_name = "graph_input_cast_" + str(i)
  183. name_mapping[n.name] = output_name
  184. graph_io_to_skip.add(n.name)
  185. node_name = "graph_input_cast" + str(i)
  186. new_value_info = model.graph.value_info.add()
  187. new_value_info.CopyFrom(n)
  188. new_value_info.name = output_name
  189. new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
  190. # add Cast node (from tensor(float) to tensor(float16) after graph input
  191. new_node = [helper.make_node("Cast", [n.name], [output_name], to=10, name=node_name)]
  192. model.graph.node.extend(new_node)
  193. value_info_list.append(new_value_info)
  194. io_casts.add(node_name)
  195. for i, n in enumerate(model.graph.output):
  196. if n.name in fp32_outputs:
  197. input_name = "graph_output_cast_" + str(i)
  198. name_mapping[n.name] = input_name
  199. graph_io_to_skip.add(n.name)
  200. node_name = "graph_output_cast" + str(i)
  201. # add Cast node (from tensor(float16) to tensor(float) before graph output
  202. new_value_info = model.graph.value_info.add()
  203. new_value_info.CopyFrom(n)
  204. new_value_info.name = input_name
  205. new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
  206. new_node = [helper.make_node("Cast", [input_name], [n.name], to=1, name=node_name)]
  207. model.graph.node.extend(new_node)
  208. value_info_list.append(new_value_info)
  209. io_casts.add(node_name)
  210. fp32_initializers: Dict[str, InitializerTracker] = {}
  211. while queue:
  212. next_level = []
  213. for q in queue:
  214. # if q is model, push q.graph (GraphProto)
  215. if isinstance(q, onnx_proto.ModelProto):
  216. next_level.append(q.graph)
  217. # if q is model.graph, push q.node.attribute (AttributeProto)
  218. if isinstance(q, onnx_proto.GraphProto):
  219. for n in q.initializer: # TensorProto type
  220. if n.data_type == onnx_proto.TensorProto.FLOAT:
  221. assert n.name not in fp32_initializers
  222. fp32_initializers[n.name] = InitializerTracker(n)
  223. for n in q.node:
  224. # if n is in the block list (doesn't support float16), no conversion for the node,
  225. # and save the node for further processing
  226. if n.name in io_casts:
  227. continue
  228. for i in range(len(n.input)):
  229. if n.input[i] in name_mapping:
  230. n.input[i] = name_mapping[n.input[i]]
  231. for i in range(len(n.output)):
  232. if n.output[i] in name_mapping:
  233. n.output[i] = name_mapping[n.output[i]]
  234. is_node_blocked = n.op_type in op_block_list or n.name in node_block_list
  235. for input in n.input:
  236. if input in fp32_initializers:
  237. fp32_initializers[input].add_node(n, is_node_blocked)
  238. if is_node_blocked:
  239. node_list.append(n)
  240. else:
  241. if n.op_type == "Cast":
  242. for attr in n.attribute:
  243. if attr.name == "to" and attr.i == 1:
  244. attr.i = 10
  245. break
  246. for attr in n.attribute:
  247. next_level.append(attr)
  248. # if q is model.graph.node.attribute, push q.g and q.graphs (GraphProto)
  249. # and process node.attribute.t and node.attribute.tensors (TensorProto)
  250. if isinstance(q, onnx_proto.AttributeProto):
  251. next_level.append(q.g)
  252. for n in q.graphs:
  253. next_level.append(n)
  254. q.t.CopyFrom(convert_tensor_float_to_float16(q.t, min_positive_val, max_finite_val))
  255. for n in q.tensors:
  256. n = convert_tensor_float_to_float16(n, min_positive_val, max_finite_val)
  257. # if q is graph, process input, output and value_info (ValueInfoProto)
  258. if isinstance(q, onnx_proto.GraphProto):
  259. # Note that float initializers tracked by fp32_initializers will be processed later.
  260. # for all ValueInfoProto with tensor(float) type in input, output and value_info, convert them to
  261. # tensor(float16) except map and seq(map). And save them in value_info_list for further processing
  262. for n in itertools.chain(q.input, q.output, q.value_info):
  263. if n.type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT:
  264. if n.name not in graph_io_to_skip:
  265. n.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
  266. value_info_list.append(n)
  267. if n.type.HasField("sequence_type"):
  268. if n.type.sequence_type.elem_type.tensor_type.elem_type == onnx_proto.TensorProto.FLOAT:
  269. if n.name not in graph_io_to_skip:
  270. n.type.sequence_type.elem_type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT16
  271. value_info_list.append(n)
  272. queue = next_level
  273. for key, value in fp32_initializers.items():
  274. # By default, to avoid precision loss, do not convert an initializer to fp16 when it is used only by fp32 nodes.
  275. if force_fp16_initializers or value.fp16_nodes:
  276. value.initializer = convert_tensor_float_to_float16(value.initializer, min_positive_val, max_finite_val)
  277. value_info_list.append(make_value_info_from_tensor(value.initializer))
  278. if value.fp32_nodes and not force_fp16_initializers:
  279. logger.info(
  280. "initializer is used by both fp32 and fp16 nodes. Consider add these nodes to block list:{}".format(
  281. value.fp16_nodes
  282. )
  283. )
  284. # process the nodes in block list that doesn't support tensor(float16)
  285. for node in node_list:
  286. # if input's name is in the value_info_list meaning input is tensor(float16) type,
  287. # insert a float16 to float Cast node before the node,
  288. # change current node's input name and create new value_info for the new name
  289. for i in range(len(node.input)):
  290. input = node.input[i]
  291. for value_info in value_info_list:
  292. if input == value_info.name:
  293. # create new value_info for current node's new input name
  294. new_value_info = model.graph.value_info.add()
  295. new_value_info.CopyFrom(value_info)
  296. output_name = node.name + "_input_cast_" + str(i)
  297. new_value_info.name = output_name
  298. new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT
  299. # add Cast node (from tensor(float16) to tensor(float) before current node
  300. node_name = node.name + "_input_cast" + str(i)
  301. new_node = [helper.make_node("Cast", [input], [output_name], to=1, name=node_name)]
  302. model.graph.node.extend(new_node)
  303. # change current node's input name
  304. node.input[i] = output_name
  305. break
  306. # if output's name is in the value_info_list meaning output is tensor(float16) type, insert a float to
  307. # float16 Cast node after the node, change current node's output name and create new value_info for the new name
  308. for i in range(len(node.output)):
  309. output = node.output[i]
  310. for value_info in value_info_list:
  311. if output == value_info.name:
  312. # create new value_info for current node's new output
  313. new_value_info = model.graph.value_info.add()
  314. new_value_info.CopyFrom(value_info)
  315. input_name = node.name + "_output_cast_" + str(i)
  316. new_value_info.name = input_name
  317. new_value_info.type.tensor_type.elem_type = onnx_proto.TensorProto.FLOAT
  318. # add Cast node (from tensor(float) to tensor(float16) after current node
  319. node_name = node.name + "_output_cast" + str(i)
  320. new_node = [helper.make_node("Cast", [input_name], [output], to=10, name=node_name)]
  321. model.graph.node.extend(new_node)
  322. # change current node's input name
  323. node.output[i] = input_name
  324. break
  325. return model
  326. def float_to_float16_max_diff(tensor, min_positive_val=5.96e-08, max_finite_val=65504.0):
  327. """Measure the maximum absolute difference after converting a float tensor to float16."""
  328. if not isinstance(tensor, onnx_proto.TensorProto):
  329. raise ValueError("Expected input type is an ONNX TensorProto but got %s" % type(tensor))
  330. if tensor.data_type != onnx_proto.TensorProto.FLOAT:
  331. raise ValueError("Expected tensor data type is float.")
  332. float32_data = None
  333. if tensor.float_data:
  334. float32_data = np.array(tensor.float_data)
  335. if tensor.raw_data:
  336. float32_data = np.frombuffer(tensor.raw_data, dtype="float32")
  337. if float32_data is None:
  338. raise RuntimeError("external data not loaded!")
  339. float16_data = convert_np_to_float16(float32_data, min_positive_val, max_finite_val)
  340. return np.amax(np.abs(float32_data - np.float32(float16_data)))