m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

141 lines
4.8 KiB

# --------------------------------------------------------------------------
# Copyright (c) Microsoft, Intel Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
import argparse
import logging
import sys
from .shape_inference import quant_pre_process
logger = logging.getLogger(__name__)
def parse_arguments():
parser = argparse.ArgumentParser(
description="""Model optimizer and shape inferencer, in preparation for quantization,
Consists of three optional steps:
1. Symbolic shape inference (best for transformer models).
2. Model optimization.
3. ONNX shape inference.
Model quantization with QDQ format, i.e. inserting QuantizeLinear/DeQuantizeLinear on
the tensor, requires tensor shape information to perform its best. Currently, shape inferencing
works best with optimized model. As a result, it is highly recommended to run quantization
on optimized model with shape information. This is the tool for optimization and shape
inferencing.
Essentially this tool performs the following three (skippable) steps:
1. Symbolic shape inference.
2. Model optimization
3. ONNX shape inference"""
)
parser.add_argument("--input", required=True, help="Path to the input model file")
parser.add_argument("--output", required=True, help="Path to the output model file")
parser.add_argument(
"--skip_optimization",
type=bool,
default=False,
help="Skip model optimization step if true. It's a known issue that ORT"
" optimization has difficulty with model size greater than 2GB, rerun with"
" this option to get around this issue.",
)
parser.add_argument(
"--skip_onnx_shape",
type=bool,
default=False,
help="Skip ONNX shape inference. Symbolic shape inference is most effective"
" with transformer based models. Skipping all shape inferences may"
" reduce the effectiveness of quantization, as a tensor with unknown"
" shape can not be quantized.",
)
parser.add_argument(
"--skip_symbolic_shape",
type=bool,
default=False,
help="Skip symbolic shape inference. Symbolic shape inference is most"
" effective with transformer based models. Skipping all shape"
" inferences may reduce the effectiveness of quantization, as a tensor"
" with unknown shape can not be quantized.",
)
parser.add_argument(
"--auto_merge",
help="Automatically merge symbolic dims when confliction happens",
action="store_true",
default=False,
)
parser.add_argument(
"--int_max",
help="maximum value for integer to be treated as boundless for ops like slice",
type=int,
default=2**31 - 1,
)
parser.add_argument(
"--guess_output_rank",
help="guess output rank to be the same as input 0 for unknown ops",
action="store_true",
default=False,
)
parser.add_argument(
"--verbose",
help="Prints detailed logs of inference, 0: turn off, 1: warnings, 3: detailed",
type=int,
default=0,
)
parser.add_argument(
"--save_as_external_data",
help="Saving an ONNX model to external data",
action="store_true",
default=False,
)
parser.add_argument(
"--all_tensors_to_one_file",
help="Saving all the external data to one file",
action="store_true",
default=False,
)
parser.add_argument(
"--external_data_location",
help="The file location to save the external file",
default="./",
)
parser.add_argument(
"--external_data_size_threshold",
help="The size threshold for external data",
type=int,
default=1024,
)
return parser.parse_args()
if __name__ == "__main__":
args = parse_arguments()
if args.skip_optimization and args.skip_onnx_shape and args.skip_symbolic_shape:
logger.error("Skipping all three steps, nothing to be done. Quitting...")
sys.exit()
if (not args.skip_optimization) and args.save_as_external_data:
logger.error("ORT model optimization does not support external data yet!")
sys.exit()
logger.info("input model: %s", args.input)
logger.info("output model: %s", args.output)
quant_pre_process(
args.input,
args.output,
args.skip_optimization,
args.skip_onnx_shape,
args.skip_symbolic_shape,
args.auto_merge,
args.int_max,
args.guess_output_rank,
args.verbose,
args.save_as_external_data,
args.all_tensors_to_one_file,
args.external_data_location,
args.external_data_size_threshold,
)