m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

301 lines
10 KiB

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
#
# This script evaluates accuracy of ONNX models for question-answering task on SQuAD data set.
# Example to evaluate raw and optimized model for CUDA in Linux:
# pip3 install datasets evaluate optimum transformers onnxruntime-gpu
# python3 eval_squad.py -m distilbert-base-cased-distilled-squad
# python3 -m onnxruntime.transformers.optimizer --output optimized_fp16.onnx --num_heads 12 --hidden_size 768 \
# --input /home/$USER/.cache/huggingface/hub/distilbert-base-cased-distilled-squad/model.onnx \
# --use_mask_index --float16
# python3 eval_squad.py -m distilbert-base-cased-distilled-squad --onnx optimized_fp16.onnx
import argparse
import csv
import os
try:
from importlib.metadata import PackageNotFoundError, version
except ImportError:
from importlib_metadata import PackageNotFoundError, version
from pathlib import Path
from typing import Any, Dict, List, Optional
import torch
from datasets import load_dataset
from evaluate import evaluator
from optimum.onnxruntime import ORTModelForQuestionAnswering
from optimum.onnxruntime.modeling_ort import ORTModel
from transformers import AutoTokenizer, pipeline
PRETRAINED_SQUAD_MODELS = [
"bert-large-uncased-whole-word-masking-finetuned-squad",
"deepset/roberta-base-squad2",
"distilbert-base-cased-distilled-squad",
]
def get_package_version(package_name: str):
try:
return version(package_name)
except PackageNotFoundError:
return None
def load_onnx_model(
model_id: str, onnx_path: Optional[str] = None, provider="CUDAExecutionProvider", use_io_binding: bool = False
):
"""Load onnx model given pretrained model name and optional ONNX model path. If onnx_path is None,
the default onnx model from optimum will be used.
Args:
model_id (str): pretrained model name or checkpoint path
onnx_path (Optional[str], optional): path of onnx model to evaluate. Defaults to None.
Returns:
model: ORTModel for the onnx model
onnx_path: the path of onnx model
"""
model = ORTModelForQuestionAnswering.from_pretrained(model_id, from_transformers=True)
if onnx_path is not None:
model.latest_model_name = Path(onnx_path).name
if provider != "CPUExecutionProvider":
model.device = torch.device("cuda:0")
model.model = ORTModel.load_model(onnx_path, provider)
else:
model.device = torch.device("cpu")
model.model = ORTModel.load_model(onnx_path)
else:
onnx_path = os.path.join(model.model_save_dir.as_posix(), model.latest_model_name)
if provider != "CPUExecutionProvider":
model.to("cuda")
model.use_io_binding = use_io_binding
return model, onnx_path
def output_details(results: List[Dict[str, Any]], csv_filename: str):
"""Output a CSV file with detail of each test results.
Args:
results (List[Dict[str, Any]]): list of JSON results.
csv_filename (str): path of output CSV file
"""
with open(csv_filename, mode="a", newline="", encoding="ascii") as csv_file:
column_names = [
"pretrained_model_name",
"onnx_path",
"provider",
"disable_fused_attention",
"batch_size",
"sequence_length",
"use_io_binding",
"exact",
"f1",
"total",
"HasAns_exact",
"HasAns_f1",
"HasAns_total",
"best_exact",
"best_exact_thresh",
"best_f1",
"best_f1_thresh",
"total_time_in_seconds",
"samples_per_second",
"latency_in_seconds",
]
csv_writer = csv.DictWriter(csv_file, fieldnames=column_names)
csv_writer.writeheader()
for result in results:
csv_writer.writerow(result)
csv_file.flush()
print(f"Detail results are saved to csv file: {csv_filename}")
def output_summary(results: List[Dict[str, Any]], csv_filename: str, metric_name: str):
"""Output a CSV file with summary of a metric on combinations of batch_size and sequence_length.
Args:
results (List[Dict[str, Any]]): list of JSON results.
csv_filename (str): path of output CSV file
metric_name (str): the metric to summarize
"""
with open(csv_filename, mode="a", newline="", encoding="ascii") as csv_file:
header_names = [
"pretrained_model_name",
"onnx_path",
"provider",
"disable_fused_attention",
"use_io_binding",
]
model_list = list(set([result["onnx_path"] for result in results]))
model_list.sort()
batch_sizes = list(set([result["batch_size"] for result in results]))
batch_sizes.sort()
sequence_lengths = list(set([result["sequence_length"] for result in results]))
sequence_lengths.sort()
key_names = []
for sequence_length in sequence_lengths:
for batch_size in batch_sizes:
key_names.append(f"b{batch_size}_s{sequence_length}")
csv_writer = csv.DictWriter(csv_file, fieldnames=header_names + key_names)
csv_writer.writeheader()
for model in model_list:
row = {}
# Metric value for given pair of batch_size and sequence_length.
# Assume that (onnx_path, batch_size and sequence_length) are unique so keep first occurrence only.
values = {}
values.update({k: "" for k in key_names})
for result in results:
if result["onnx_path"] == model and result[metric_name]:
headers = {k: v for k, v in result.items() if k in header_names}
if not row:
row.update(headers)
batch_size = result["batch_size"]
sequence_length = result["sequence_length"]
key = f"b{batch_size}_s{sequence_length}"
if key in key_names:
values[key] = result[metric_name]
if row:
for key in key_names:
row[key] = values[key] if key in values else ""
csv_writer.writerow(row)
csv_file.flush()
print(f"Summary results for {metric_name} are saved to csv file: {csv_filename}")
def main():
args = parse_arguments()
print(args)
for name in ["onnxruntime-gpu", "onnxruntime", "onnx", "torch", "transformers", "optimum", "datasets", "evaluate"]:
package_version = get_package_version(name)
if package_version:
print(f"{name} version", package_version)
pretrained_model_name = args.model_name
if args.onnx and not os.path.exists(args.onnx):
raise RuntimeError(f"Onnx model path does not exist: {args.onnx}")
disable_fused_attention = os.environ.get("ORT_DISABLE_FUSED_ATTENTION", "0") == "1"
all_results = []
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name)
for sequence_length in args.sequence_lengths:
tokenizer.model_max_length = sequence_length
tokenizer.doc_stride = min(sequence_length // 2, 128)
ort_model, onnx_path = load_onnx_model(pretrained_model_name, args.onnx, args.provider, args.use_io_binding)
print(ort_model.config)
if sequence_length > ort_model.config.max_position_embeddings:
raise RuntimeError("sequence length should not be larger than {ort_model.config.max_position_embeddings}")
qa_pipeline = pipeline(
"question-answering", model=ort_model, tokenizer=tokenizer, question_first=True, batch_size=args.batch_size
)
task_evaluator = evaluator("question-answering")
squad_dataset = load_dataset("squad", split=f"validation[:{args.total}]" if args.total > 0 else "validation")
result = task_evaluator.compute(
model_or_pipeline=qa_pipeline,
data=squad_dataset,
metric="squad_v2",
squad_v2_format=True,
)
result["provider"] = args.provider
result["disable_fused_attention"] = disable_fused_attention
result["pretrained_model_name"] = pretrained_model_name
result["onnx_path"] = onnx_path
result["batch_size"] = args.batch_size
result["sequence_length"] = sequence_length
result["use_io_binding"] = args.use_io_binding
print(result)
all_results.append(result)
output_details(all_results, "detail.csv")
for metric_name in ["f1", "exact", "samples_per_second"]:
output_summary(all_results, f"{metric_name}.csv", metric_name)
def parse_arguments(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument(
"-m",
"--model_name",
required=False,
type=str,
default=PRETRAINED_SQUAD_MODELS[0],
help=f"Checkpoint directory or pre-trained model names in the list: {PRETRAINED_SQUAD_MODELS}",
)
parser.add_argument(
"-s",
"--sequence_lengths",
nargs="+",
type=int,
default=[384],
help="Sequence lengths for onnx model inputs. It could have multiple values.",
)
parser.add_argument(
"-b",
"--batch_size",
type=int,
default=1,
help="batch size for inference.",
)
parser.add_argument("-t", "--total", type=int, default=0, help="Total samples to test. 0 means all samples.")
parser.add_argument(
"--onnx",
required=False,
type=str,
default=None,
help="Optional onnx model path. If not specified, optimum will be used to export onnx model for testing.",
)
parser.add_argument(
"--provider",
required=False,
default="CUDAExecutionProvider",
help="Select which Execution Provider to use for runs. Default is CUDAExecutionProvider.",
)
parser.add_argument("--use_io_binding", required=False, action="store_true", help="Use IO Binding for GPU.")
parser.set_defaults(use_io_binding=False)
args = parser.parse_args(argv)
return args
if __name__ == "__main__":
main()