m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

301 lines
10 KiB

6 months ago
  1. # -------------------------------------------------------------------------
  2. # Copyright (c) Microsoft Corporation. All rights reserved.
  3. # Licensed under the MIT License.
  4. # --------------------------------------------------------------------------
  5. #
  6. # This script evaluates accuracy of ONNX models for question-answering task on SQuAD data set.
  7. # Example to evaluate raw and optimized model for CUDA in Linux:
  8. # pip3 install datasets evaluate optimum transformers onnxruntime-gpu
  9. # python3 eval_squad.py -m distilbert-base-cased-distilled-squad
  10. # python3 -m onnxruntime.transformers.optimizer --output optimized_fp16.onnx --num_heads 12 --hidden_size 768 \
  11. # --input /home/$USER/.cache/huggingface/hub/distilbert-base-cased-distilled-squad/model.onnx \
  12. # --use_mask_index --float16
  13. # python3 eval_squad.py -m distilbert-base-cased-distilled-squad --onnx optimized_fp16.onnx
  14. import argparse
  15. import csv
  16. import os
  17. try:
  18. from importlib.metadata import PackageNotFoundError, version
  19. except ImportError:
  20. from importlib_metadata import PackageNotFoundError, version
  21. from pathlib import Path
  22. from typing import Any, Dict, List, Optional
  23. import torch
  24. from datasets import load_dataset
  25. from evaluate import evaluator
  26. from optimum.onnxruntime import ORTModelForQuestionAnswering
  27. from optimum.onnxruntime.modeling_ort import ORTModel
  28. from transformers import AutoTokenizer, pipeline
  29. PRETRAINED_SQUAD_MODELS = [
  30. "bert-large-uncased-whole-word-masking-finetuned-squad",
  31. "deepset/roberta-base-squad2",
  32. "distilbert-base-cased-distilled-squad",
  33. ]
  34. def get_package_version(package_name: str):
  35. try:
  36. return version(package_name)
  37. except PackageNotFoundError:
  38. return None
  39. def load_onnx_model(
  40. model_id: str, onnx_path: Optional[str] = None, provider="CUDAExecutionProvider", use_io_binding: bool = False
  41. ):
  42. """Load onnx model given pretrained model name and optional ONNX model path. If onnx_path is None,
  43. the default onnx model from optimum will be used.
  44. Args:
  45. model_id (str): pretrained model name or checkpoint path
  46. onnx_path (Optional[str], optional): path of onnx model to evaluate. Defaults to None.
  47. Returns:
  48. model: ORTModel for the onnx model
  49. onnx_path: the path of onnx model
  50. """
  51. model = ORTModelForQuestionAnswering.from_pretrained(model_id, from_transformers=True)
  52. if onnx_path is not None:
  53. model.latest_model_name = Path(onnx_path).name
  54. if provider != "CPUExecutionProvider":
  55. model.device = torch.device("cuda:0")
  56. model.model = ORTModel.load_model(onnx_path, provider)
  57. else:
  58. model.device = torch.device("cpu")
  59. model.model = ORTModel.load_model(onnx_path)
  60. else:
  61. onnx_path = os.path.join(model.model_save_dir.as_posix(), model.latest_model_name)
  62. if provider != "CPUExecutionProvider":
  63. model.to("cuda")
  64. model.use_io_binding = use_io_binding
  65. return model, onnx_path
  66. def output_details(results: List[Dict[str, Any]], csv_filename: str):
  67. """Output a CSV file with detail of each test results.
  68. Args:
  69. results (List[Dict[str, Any]]): list of JSON results.
  70. csv_filename (str): path of output CSV file
  71. """
  72. with open(csv_filename, mode="a", newline="", encoding="ascii") as csv_file:
  73. column_names = [
  74. "pretrained_model_name",
  75. "onnx_path",
  76. "provider",
  77. "disable_fused_attention",
  78. "batch_size",
  79. "sequence_length",
  80. "use_io_binding",
  81. "exact",
  82. "f1",
  83. "total",
  84. "HasAns_exact",
  85. "HasAns_f1",
  86. "HasAns_total",
  87. "best_exact",
  88. "best_exact_thresh",
  89. "best_f1",
  90. "best_f1_thresh",
  91. "total_time_in_seconds",
  92. "samples_per_second",
  93. "latency_in_seconds",
  94. ]
  95. csv_writer = csv.DictWriter(csv_file, fieldnames=column_names)
  96. csv_writer.writeheader()
  97. for result in results:
  98. csv_writer.writerow(result)
  99. csv_file.flush()
  100. print(f"Detail results are saved to csv file: {csv_filename}")
  101. def output_summary(results: List[Dict[str, Any]], csv_filename: str, metric_name: str):
  102. """Output a CSV file with summary of a metric on combinations of batch_size and sequence_length.
  103. Args:
  104. results (List[Dict[str, Any]]): list of JSON results.
  105. csv_filename (str): path of output CSV file
  106. metric_name (str): the metric to summarize
  107. """
  108. with open(csv_filename, mode="a", newline="", encoding="ascii") as csv_file:
  109. header_names = [
  110. "pretrained_model_name",
  111. "onnx_path",
  112. "provider",
  113. "disable_fused_attention",
  114. "use_io_binding",
  115. ]
  116. model_list = list(set([result["onnx_path"] for result in results]))
  117. model_list.sort()
  118. batch_sizes = list(set([result["batch_size"] for result in results]))
  119. batch_sizes.sort()
  120. sequence_lengths = list(set([result["sequence_length"] for result in results]))
  121. sequence_lengths.sort()
  122. key_names = []
  123. for sequence_length in sequence_lengths:
  124. for batch_size in batch_sizes:
  125. key_names.append(f"b{batch_size}_s{sequence_length}")
  126. csv_writer = csv.DictWriter(csv_file, fieldnames=header_names + key_names)
  127. csv_writer.writeheader()
  128. for model in model_list:
  129. row = {}
  130. # Metric value for given pair of batch_size and sequence_length.
  131. # Assume that (onnx_path, batch_size and sequence_length) are unique so keep first occurrence only.
  132. values = {}
  133. values.update({k: "" for k in key_names})
  134. for result in results:
  135. if result["onnx_path"] == model and result[metric_name]:
  136. headers = {k: v for k, v in result.items() if k in header_names}
  137. if not row:
  138. row.update(headers)
  139. batch_size = result["batch_size"]
  140. sequence_length = result["sequence_length"]
  141. key = f"b{batch_size}_s{sequence_length}"
  142. if key in key_names:
  143. values[key] = result[metric_name]
  144. if row:
  145. for key in key_names:
  146. row[key] = values[key] if key in values else ""
  147. csv_writer.writerow(row)
  148. csv_file.flush()
  149. print(f"Summary results for {metric_name} are saved to csv file: {csv_filename}")
  150. def main():
  151. args = parse_arguments()
  152. print(args)
  153. for name in ["onnxruntime-gpu", "onnxruntime", "onnx", "torch", "transformers", "optimum", "datasets", "evaluate"]:
  154. package_version = get_package_version(name)
  155. if package_version:
  156. print(f"{name} version", package_version)
  157. pretrained_model_name = args.model_name
  158. if args.onnx and not os.path.exists(args.onnx):
  159. raise RuntimeError(f"Onnx model path does not exist: {args.onnx}")
  160. disable_fused_attention = os.environ.get("ORT_DISABLE_FUSED_ATTENTION", "0") == "1"
  161. all_results = []
  162. tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name)
  163. for sequence_length in args.sequence_lengths:
  164. tokenizer.model_max_length = sequence_length
  165. tokenizer.doc_stride = min(sequence_length // 2, 128)
  166. ort_model, onnx_path = load_onnx_model(pretrained_model_name, args.onnx, args.provider, args.use_io_binding)
  167. print(ort_model.config)
  168. if sequence_length > ort_model.config.max_position_embeddings:
  169. raise RuntimeError("sequence length should not be larger than {ort_model.config.max_position_embeddings}")
  170. qa_pipeline = pipeline(
  171. "question-answering", model=ort_model, tokenizer=tokenizer, question_first=True, batch_size=args.batch_size
  172. )
  173. task_evaluator = evaluator("question-answering")
  174. squad_dataset = load_dataset("squad", split=f"validation[:{args.total}]" if args.total > 0 else "validation")
  175. result = task_evaluator.compute(
  176. model_or_pipeline=qa_pipeline,
  177. data=squad_dataset,
  178. metric="squad_v2",
  179. squad_v2_format=True,
  180. )
  181. result["provider"] = args.provider
  182. result["disable_fused_attention"] = disable_fused_attention
  183. result["pretrained_model_name"] = pretrained_model_name
  184. result["onnx_path"] = onnx_path
  185. result["batch_size"] = args.batch_size
  186. result["sequence_length"] = sequence_length
  187. result["use_io_binding"] = args.use_io_binding
  188. print(result)
  189. all_results.append(result)
  190. output_details(all_results, "detail.csv")
  191. for metric_name in ["f1", "exact", "samples_per_second"]:
  192. output_summary(all_results, f"{metric_name}.csv", metric_name)
  193. def parse_arguments(argv=None):
  194. parser = argparse.ArgumentParser()
  195. parser.add_argument(
  196. "-m",
  197. "--model_name",
  198. required=False,
  199. type=str,
  200. default=PRETRAINED_SQUAD_MODELS[0],
  201. help=f"Checkpoint directory or pre-trained model names in the list: {PRETRAINED_SQUAD_MODELS}",
  202. )
  203. parser.add_argument(
  204. "-s",
  205. "--sequence_lengths",
  206. nargs="+",
  207. type=int,
  208. default=[384],
  209. help="Sequence lengths for onnx model inputs. It could have multiple values.",
  210. )
  211. parser.add_argument(
  212. "-b",
  213. "--batch_size",
  214. type=int,
  215. default=1,
  216. help="batch size for inference.",
  217. )
  218. parser.add_argument("-t", "--total", type=int, default=0, help="Total samples to test. 0 means all samples.")
  219. parser.add_argument(
  220. "--onnx",
  221. required=False,
  222. type=str,
  223. default=None,
  224. help="Optional onnx model path. If not specified, optimum will be used to export onnx model for testing.",
  225. )
  226. parser.add_argument(
  227. "--provider",
  228. required=False,
  229. default="CUDAExecutionProvider",
  230. help="Select which Execution Provider to use for runs. Default is CUDAExecutionProvider.",
  231. )
  232. parser.add_argument("--use_io_binding", required=False, action="store_true", help="Use IO Binding for GPU.")
  233. parser.set_defaults(use_io_binding=False)
  234. args = parser.parse_args(argv)
  235. return args
  236. if __name__ == "__main__":
  237. main()