m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

141 lines
4.8 KiB

6 months ago
  1. # --------------------------------------------------------------------------
  2. # Copyright (c) Microsoft, Intel Corporation. All rights reserved.
  3. # Licensed under the MIT License. See License.txt in the project root for
  4. # license information.
  5. # --------------------------------------------------------------------------
  6. import argparse
  7. import logging
  8. import sys
  9. from .shape_inference import quant_pre_process
  10. logger = logging.getLogger(__name__)
  11. def parse_arguments():
  12. parser = argparse.ArgumentParser(
  13. description="""Model optimizer and shape inferencer, in preparation for quantization,
  14. Consists of three optional steps:
  15. 1. Symbolic shape inference (best for transformer models).
  16. 2. Model optimization.
  17. 3. ONNX shape inference.
  18. Model quantization with QDQ format, i.e. inserting QuantizeLinear/DeQuantizeLinear on
  19. the tensor, requires tensor shape information to perform its best. Currently, shape inferencing
  20. works best with optimized model. As a result, it is highly recommended to run quantization
  21. on optimized model with shape information. This is the tool for optimization and shape
  22. inferencing.
  23. Essentially this tool performs the following three (skippable) steps:
  24. 1. Symbolic shape inference.
  25. 2. Model optimization
  26. 3. ONNX shape inference"""
  27. )
  28. parser.add_argument("--input", required=True, help="Path to the input model file")
  29. parser.add_argument("--output", required=True, help="Path to the output model file")
  30. parser.add_argument(
  31. "--skip_optimization",
  32. type=bool,
  33. default=False,
  34. help="Skip model optimization step if true. It's a known issue that ORT"
  35. " optimization has difficulty with model size greater than 2GB, rerun with"
  36. " this option to get around this issue.",
  37. )
  38. parser.add_argument(
  39. "--skip_onnx_shape",
  40. type=bool,
  41. default=False,
  42. help="Skip ONNX shape inference. Symbolic shape inference is most effective"
  43. " with transformer based models. Skipping all shape inferences may"
  44. " reduce the effectiveness of quantization, as a tensor with unknown"
  45. " shape can not be quantized.",
  46. )
  47. parser.add_argument(
  48. "--skip_symbolic_shape",
  49. type=bool,
  50. default=False,
  51. help="Skip symbolic shape inference. Symbolic shape inference is most"
  52. " effective with transformer based models. Skipping all shape"
  53. " inferences may reduce the effectiveness of quantization, as a tensor"
  54. " with unknown shape can not be quantized.",
  55. )
  56. parser.add_argument(
  57. "--auto_merge",
  58. help="Automatically merge symbolic dims when confliction happens",
  59. action="store_true",
  60. default=False,
  61. )
  62. parser.add_argument(
  63. "--int_max",
  64. help="maximum value for integer to be treated as boundless for ops like slice",
  65. type=int,
  66. default=2**31 - 1,
  67. )
  68. parser.add_argument(
  69. "--guess_output_rank",
  70. help="guess output rank to be the same as input 0 for unknown ops",
  71. action="store_true",
  72. default=False,
  73. )
  74. parser.add_argument(
  75. "--verbose",
  76. help="Prints detailed logs of inference, 0: turn off, 1: warnings, 3: detailed",
  77. type=int,
  78. default=0,
  79. )
  80. parser.add_argument(
  81. "--save_as_external_data",
  82. help="Saving an ONNX model to external data",
  83. action="store_true",
  84. default=False,
  85. )
  86. parser.add_argument(
  87. "--all_tensors_to_one_file",
  88. help="Saving all the external data to one file",
  89. action="store_true",
  90. default=False,
  91. )
  92. parser.add_argument(
  93. "--external_data_location",
  94. help="The file location to save the external file",
  95. default="./",
  96. )
  97. parser.add_argument(
  98. "--external_data_size_threshold",
  99. help="The size threshold for external data",
  100. type=int,
  101. default=1024,
  102. )
  103. return parser.parse_args()
  104. if __name__ == "__main__":
  105. args = parse_arguments()
  106. if args.skip_optimization and args.skip_onnx_shape and args.skip_symbolic_shape:
  107. logger.error("Skipping all three steps, nothing to be done. Quitting...")
  108. sys.exit()
  109. if (not args.skip_optimization) and args.save_as_external_data:
  110. logger.error("ORT model optimization does not support external data yet!")
  111. sys.exit()
  112. logger.info("input model: %s", args.input)
  113. logger.info("output model: %s", args.output)
  114. quant_pre_process(
  115. args.input,
  116. args.output,
  117. args.skip_optimization,
  118. args.skip_onnx_shape,
  119. args.skip_symbolic_shape,
  120. args.auto_merge,
  121. args.int_max,
  122. args.guess_output_rank,
  123. args.verbose,
  124. args.save_as_external_data,
  125. args.all_tensors_to_one_file,
  126. args.external_data_location,
  127. args.external_data_size_threshold,
  128. )