You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
301 lines
10 KiB
301 lines
10 KiB
# -------------------------------------------------------------------------
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT License.
|
|
# --------------------------------------------------------------------------
|
|
#
|
|
# This script evaluates accuracy of ONNX models for question-answering task on SQuAD data set.
|
|
# Example to evaluate raw and optimized model for CUDA in Linux:
|
|
# pip3 install datasets evaluate optimum transformers onnxruntime-gpu
|
|
# python3 eval_squad.py -m distilbert-base-cased-distilled-squad
|
|
# python3 -m onnxruntime.transformers.optimizer --output optimized_fp16.onnx --num_heads 12 --hidden_size 768 \
|
|
# --input /home/$USER/.cache/huggingface/hub/distilbert-base-cased-distilled-squad/model.onnx \
|
|
# --use_mask_index --float16
|
|
# python3 eval_squad.py -m distilbert-base-cased-distilled-squad --onnx optimized_fp16.onnx
|
|
|
|
import argparse
|
|
import csv
|
|
import os
|
|
|
|
try:
|
|
from importlib.metadata import PackageNotFoundError, version
|
|
except ImportError:
|
|
from importlib_metadata import PackageNotFoundError, version
|
|
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
import torch
|
|
from datasets import load_dataset
|
|
from evaluate import evaluator
|
|
from optimum.onnxruntime import ORTModelForQuestionAnswering
|
|
from optimum.onnxruntime.modeling_ort import ORTModel
|
|
from transformers import AutoTokenizer, pipeline
|
|
|
|
PRETRAINED_SQUAD_MODELS = [
|
|
"bert-large-uncased-whole-word-masking-finetuned-squad",
|
|
"deepset/roberta-base-squad2",
|
|
"distilbert-base-cased-distilled-squad",
|
|
]
|
|
|
|
|
|
def get_package_version(package_name: str):
|
|
try:
|
|
return version(package_name)
|
|
except PackageNotFoundError:
|
|
return None
|
|
|
|
|
|
def load_onnx_model(
|
|
model_id: str, onnx_path: Optional[str] = None, provider="CUDAExecutionProvider", use_io_binding: bool = False
|
|
):
|
|
"""Load onnx model given pretrained model name and optional ONNX model path. If onnx_path is None,
|
|
the default onnx model from optimum will be used.
|
|
|
|
Args:
|
|
model_id (str): pretrained model name or checkpoint path
|
|
onnx_path (Optional[str], optional): path of onnx model to evaluate. Defaults to None.
|
|
|
|
Returns:
|
|
model: ORTModel for the onnx model
|
|
onnx_path: the path of onnx model
|
|
"""
|
|
model = ORTModelForQuestionAnswering.from_pretrained(model_id, from_transformers=True)
|
|
|
|
if onnx_path is not None:
|
|
model.latest_model_name = Path(onnx_path).name
|
|
|
|
if provider != "CPUExecutionProvider":
|
|
model.device = torch.device("cuda:0")
|
|
model.model = ORTModel.load_model(onnx_path, provider)
|
|
else:
|
|
model.device = torch.device("cpu")
|
|
model.model = ORTModel.load_model(onnx_path)
|
|
else:
|
|
onnx_path = os.path.join(model.model_save_dir.as_posix(), model.latest_model_name)
|
|
if provider != "CPUExecutionProvider":
|
|
model.to("cuda")
|
|
|
|
model.use_io_binding = use_io_binding
|
|
|
|
return model, onnx_path
|
|
|
|
|
|
def output_details(results: List[Dict[str, Any]], csv_filename: str):
|
|
"""Output a CSV file with detail of each test results.
|
|
|
|
Args:
|
|
results (List[Dict[str, Any]]): list of JSON results.
|
|
csv_filename (str): path of output CSV file
|
|
"""
|
|
with open(csv_filename, mode="a", newline="", encoding="ascii") as csv_file:
|
|
column_names = [
|
|
"pretrained_model_name",
|
|
"onnx_path",
|
|
"provider",
|
|
"disable_fused_attention",
|
|
"batch_size",
|
|
"sequence_length",
|
|
"use_io_binding",
|
|
"exact",
|
|
"f1",
|
|
"total",
|
|
"HasAns_exact",
|
|
"HasAns_f1",
|
|
"HasAns_total",
|
|
"best_exact",
|
|
"best_exact_thresh",
|
|
"best_f1",
|
|
"best_f1_thresh",
|
|
"total_time_in_seconds",
|
|
"samples_per_second",
|
|
"latency_in_seconds",
|
|
]
|
|
|
|
csv_writer = csv.DictWriter(csv_file, fieldnames=column_names)
|
|
csv_writer.writeheader()
|
|
for result in results:
|
|
csv_writer.writerow(result)
|
|
|
|
csv_file.flush()
|
|
|
|
print(f"Detail results are saved to csv file: {csv_filename}")
|
|
|
|
|
|
def output_summary(results: List[Dict[str, Any]], csv_filename: str, metric_name: str):
|
|
"""Output a CSV file with summary of a metric on combinations of batch_size and sequence_length.
|
|
|
|
Args:
|
|
results (List[Dict[str, Any]]): list of JSON results.
|
|
csv_filename (str): path of output CSV file
|
|
metric_name (str): the metric to summarize
|
|
"""
|
|
with open(csv_filename, mode="a", newline="", encoding="ascii") as csv_file:
|
|
header_names = [
|
|
"pretrained_model_name",
|
|
"onnx_path",
|
|
"provider",
|
|
"disable_fused_attention",
|
|
"use_io_binding",
|
|
]
|
|
|
|
model_list = list(set([result["onnx_path"] for result in results]))
|
|
model_list.sort()
|
|
|
|
batch_sizes = list(set([result["batch_size"] for result in results]))
|
|
batch_sizes.sort()
|
|
|
|
sequence_lengths = list(set([result["sequence_length"] for result in results]))
|
|
sequence_lengths.sort()
|
|
|
|
key_names = []
|
|
for sequence_length in sequence_lengths:
|
|
for batch_size in batch_sizes:
|
|
key_names.append(f"b{batch_size}_s{sequence_length}")
|
|
|
|
csv_writer = csv.DictWriter(csv_file, fieldnames=header_names + key_names)
|
|
csv_writer.writeheader()
|
|
|
|
for model in model_list:
|
|
row = {}
|
|
|
|
# Metric value for given pair of batch_size and sequence_length.
|
|
# Assume that (onnx_path, batch_size and sequence_length) are unique so keep first occurrence only.
|
|
values = {}
|
|
values.update({k: "" for k in key_names})
|
|
|
|
for result in results:
|
|
if result["onnx_path"] == model and result[metric_name]:
|
|
headers = {k: v for k, v in result.items() if k in header_names}
|
|
if not row:
|
|
row.update(headers)
|
|
|
|
batch_size = result["batch_size"]
|
|
sequence_length = result["sequence_length"]
|
|
key = f"b{batch_size}_s{sequence_length}"
|
|
|
|
if key in key_names:
|
|
values[key] = result[metric_name]
|
|
|
|
if row:
|
|
for key in key_names:
|
|
row[key] = values[key] if key in values else ""
|
|
csv_writer.writerow(row)
|
|
|
|
csv_file.flush()
|
|
|
|
print(f"Summary results for {metric_name} are saved to csv file: {csv_filename}")
|
|
|
|
|
|
def main():
|
|
args = parse_arguments()
|
|
print(args)
|
|
|
|
for name in ["onnxruntime-gpu", "onnxruntime", "onnx", "torch", "transformers", "optimum", "datasets", "evaluate"]:
|
|
package_version = get_package_version(name)
|
|
if package_version:
|
|
print(f"{name} version", package_version)
|
|
|
|
pretrained_model_name = args.model_name
|
|
if args.onnx and not os.path.exists(args.onnx):
|
|
raise RuntimeError(f"Onnx model path does not exist: {args.onnx}")
|
|
|
|
disable_fused_attention = os.environ.get("ORT_DISABLE_FUSED_ATTENTION", "0") == "1"
|
|
|
|
all_results = []
|
|
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name)
|
|
for sequence_length in args.sequence_lengths:
|
|
tokenizer.model_max_length = sequence_length
|
|
tokenizer.doc_stride = min(sequence_length // 2, 128)
|
|
ort_model, onnx_path = load_onnx_model(pretrained_model_name, args.onnx, args.provider, args.use_io_binding)
|
|
|
|
print(ort_model.config)
|
|
if sequence_length > ort_model.config.max_position_embeddings:
|
|
raise RuntimeError("sequence length should not be larger than {ort_model.config.max_position_embeddings}")
|
|
|
|
qa_pipeline = pipeline(
|
|
"question-answering", model=ort_model, tokenizer=tokenizer, question_first=True, batch_size=args.batch_size
|
|
)
|
|
|
|
task_evaluator = evaluator("question-answering")
|
|
squad_dataset = load_dataset("squad", split=f"validation[:{args.total}]" if args.total > 0 else "validation")
|
|
|
|
result = task_evaluator.compute(
|
|
model_or_pipeline=qa_pipeline,
|
|
data=squad_dataset,
|
|
metric="squad_v2",
|
|
squad_v2_format=True,
|
|
)
|
|
|
|
result["provider"] = args.provider
|
|
result["disable_fused_attention"] = disable_fused_attention
|
|
result["pretrained_model_name"] = pretrained_model_name
|
|
result["onnx_path"] = onnx_path
|
|
result["batch_size"] = args.batch_size
|
|
result["sequence_length"] = sequence_length
|
|
result["use_io_binding"] = args.use_io_binding
|
|
print(result)
|
|
|
|
all_results.append(result)
|
|
|
|
output_details(all_results, "detail.csv")
|
|
|
|
for metric_name in ["f1", "exact", "samples_per_second"]:
|
|
output_summary(all_results, f"{metric_name}.csv", metric_name)
|
|
|
|
|
|
def parse_arguments(argv=None):
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument(
|
|
"-m",
|
|
"--model_name",
|
|
required=False,
|
|
type=str,
|
|
default=PRETRAINED_SQUAD_MODELS[0],
|
|
help=f"Checkpoint directory or pre-trained model names in the list: {PRETRAINED_SQUAD_MODELS}",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-s",
|
|
"--sequence_lengths",
|
|
nargs="+",
|
|
type=int,
|
|
default=[384],
|
|
help="Sequence lengths for onnx model inputs. It could have multiple values.",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-b",
|
|
"--batch_size",
|
|
type=int,
|
|
default=1,
|
|
help="batch size for inference.",
|
|
)
|
|
|
|
parser.add_argument("-t", "--total", type=int, default=0, help="Total samples to test. 0 means all samples.")
|
|
|
|
parser.add_argument(
|
|
"--onnx",
|
|
required=False,
|
|
type=str,
|
|
default=None,
|
|
help="Optional onnx model path. If not specified, optimum will be used to export onnx model for testing.",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--provider",
|
|
required=False,
|
|
default="CUDAExecutionProvider",
|
|
help="Select which Execution Provider to use for runs. Default is CUDAExecutionProvider.",
|
|
)
|
|
|
|
parser.add_argument("--use_io_binding", required=False, action="store_true", help="Use IO Binding for GPU.")
|
|
parser.set_defaults(use_io_binding=False)
|
|
|
|
args = parser.parse_args(argv)
|
|
|
|
return args
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|