m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

243 lines
7.3 KiB

6 months ago
  1. # -------------------------------------------------------------------------
  2. # Copyright (c) Microsoft Corporation. All rights reserved.
  3. # Licensed under the MIT License.
  4. # --------------------------------------------------------------------------
  5. # It is a tool to compare the inference results of the original model and optimized model.
  6. import argparse
  7. import statistics
  8. from pathlib import Path
  9. import numpy as np
  10. import psutil
  11. from bert_perf_test import create_session, onnxruntime_inference
  12. from bert_test_data import generate_test_data, get_bert_inputs, output_test_data
  13. def run_model(model_path, all_inputs, use_gpu, disable_optimization):
  14. import onnxruntime
  15. graph_optimization_level = None
  16. if disable_optimization:
  17. graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL
  18. intra_op_num_threads = psutil.cpu_count(logical=False)
  19. session = create_session(
  20. model_path, use_gpu, "cuda" if use_gpu else "cpu", intra_op_num_threads, graph_optimization_level
  21. )
  22. output_names = [output.name for output in session.get_outputs()]
  23. results, latency_list = onnxruntime_inference(session, all_inputs, output_names)
  24. return results, latency_list, output_names
  25. def compare(baseline_results, treatment_results, verbose, rtol=1e-3, atol=1e-4):
  26. # Validate the output of baseline and treatment, to make sure the results are similar.
  27. diff_count = 0
  28. max_rel_diff = 0
  29. max_abs_diff = 0
  30. for test_case_id, results in enumerate(baseline_results):
  31. case_passed = True
  32. for i in range(len(results)):
  33. treatment_output = treatment_results[test_case_id][i]
  34. rel_diff = np.amax(np.abs((treatment_output - results[i]) / results[i]))
  35. abs_diff = np.amax(np.abs(treatment_output - results[i]))
  36. max_rel_diff = max(max_rel_diff, rel_diff)
  37. max_abs_diff = max(max_abs_diff, abs_diff)
  38. if not np.allclose(results[i].tolist(), treatment_output.tolist(), rtol=rtol, atol=atol):
  39. if case_passed:
  40. case_passed = False
  41. diff_count += 1
  42. if verbose:
  43. print("case {} output {}".format(test_case_id, i))
  44. print("baseline={}\ntreatment={}".format(results[i].tolist(), treatment_output))
  45. print("rel_diff={} abs_diff={}".format(rel_diff, abs_diff))
  46. if diff_count == 0:
  47. print(
  48. "100% passed for {} random inputs given thresholds (rtol={}, atol={}).".format(
  49. len(baseline_results), rtol, atol
  50. )
  51. )
  52. else:
  53. print(
  54. "WARNING: {} out of {} results NOT passed for thresholds (rtol={}, atol={}).".format(
  55. diff_count, len(baseline_results), rtol, atol
  56. )
  57. )
  58. print("maximum absolute difference={}".format(max_abs_diff))
  59. print("maximum relative difference={}".format(max_rel_diff))
  60. def run_test(
  61. baseline_model,
  62. optimized_model,
  63. output_dir,
  64. batch_size,
  65. sequence_length,
  66. use_gpu,
  67. test_cases,
  68. seed,
  69. verbose,
  70. rtol,
  71. atol,
  72. input_ids_name,
  73. segment_ids_name,
  74. input_mask_name,
  75. ):
  76. # Try deduce input names from optimized model.
  77. input_ids, segment_ids, input_mask = get_bert_inputs(
  78. optimized_model, input_ids_name, segment_ids_name, input_mask_name
  79. )
  80. # Use random mask length for accuracy test. It might introduce slight inflation in latency reported in this script.
  81. all_inputs = generate_test_data(
  82. batch_size,
  83. sequence_length,
  84. test_cases,
  85. seed,
  86. verbose,
  87. input_ids,
  88. segment_ids,
  89. input_mask,
  90. random_mask_length=True,
  91. )
  92. baseline_results, baseline_latency, output_names = run_model(
  93. baseline_model, all_inputs, use_gpu, disable_optimization=True
  94. )
  95. if verbose:
  96. print(
  97. "baseline average latency (all optimizations disabled): {} ms".format(
  98. statistics.mean(baseline_latency) * 1000
  99. )
  100. )
  101. if output_dir is not None:
  102. for i, inputs in enumerate(all_inputs):
  103. output_test_data(output_dir, i, inputs)
  104. treatment_results, treatment_latency, treatment_output_names = run_model(
  105. optimized_model, all_inputs, use_gpu, disable_optimization=False
  106. )
  107. if verbose:
  108. print("treatment average latency: {} ms".format(statistics.mean(treatment_latency) * 1000))
  109. # Validate the output of baseline and treatment, to make sure the results are similar.
  110. compare(baseline_results, treatment_results, verbose, rtol, atol)
  111. def parse_arguments():
  112. parser = argparse.ArgumentParser()
  113. parser.add_argument("--baseline_model", required=True, type=str, help="baseline onnx model path.")
  114. parser.add_argument(
  115. "--optimized_model",
  116. required=True,
  117. type=str,
  118. default=None,
  119. help="path of the optimized model. It shall have same inputs as the baseline model.",
  120. )
  121. parser.add_argument(
  122. "--output_dir",
  123. required=False,
  124. type=str,
  125. default=None,
  126. help="output test data path. If not specified, test data will not be saved.",
  127. )
  128. parser.add_argument("--batch_size", required=True, type=int, help="batch size of input")
  129. parser.add_argument(
  130. "--sequence_length",
  131. required=True,
  132. type=int,
  133. help="maximum sequence length of input",
  134. )
  135. parser.add_argument("--rtol", required=False, type=float, default=1e-3, help="relative tolerance")
  136. parser.add_argument("--atol", required=False, type=float, default=1e-4, help="absolute tolerance")
  137. parser.add_argument(
  138. "--samples",
  139. required=False,
  140. type=int,
  141. default=100,
  142. help="number of test cases to be generated",
  143. )
  144. parser.add_argument("--seed", required=False, type=int, default=3, help="random seed")
  145. parser.add_argument("--use_gpu", required=False, action="store_true", help="use GPU")
  146. parser.set_defaults(use_gpu=False)
  147. parser.add_argument(
  148. "--verbose",
  149. required=False,
  150. action="store_true",
  151. help="print verbose information",
  152. )
  153. parser.set_defaults(verbose=False)
  154. parser.add_argument(
  155. "--input_ids",
  156. required=False,
  157. type=str,
  158. default=None,
  159. help="input name for input ids",
  160. )
  161. parser.add_argument(
  162. "--segment_ids",
  163. required=False,
  164. type=str,
  165. default=None,
  166. help="input name for segment ids",
  167. )
  168. parser.add_argument(
  169. "--input_mask",
  170. required=False,
  171. type=str,
  172. default=None,
  173. help="input name for attention mask",
  174. )
  175. args = parser.parse_args()
  176. return args
  177. def main():
  178. args = parse_arguments()
  179. if args.output_dir is not None:
  180. # create the output directory if not existed
  181. path = Path(args.output_dir)
  182. path.mkdir(parents=True, exist_ok=True)
  183. run_test(
  184. args.baseline_model,
  185. args.optimized_model,
  186. args.output_dir,
  187. args.batch_size,
  188. args.sequence_length,
  189. args.use_gpu,
  190. args.samples,
  191. args.seed,
  192. args.verbose,
  193. args.rtol,
  194. args.atol,
  195. args.input_ids,
  196. args.segment_ids,
  197. args.input_mask,
  198. )
  199. if __name__ == "__main__":
  200. main()