|
|
# ------------------------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- # # This script evaluates accuracy of ONNX models for question-answering task on SQuAD data set. # Example to evaluate raw and optimized model for CUDA in Linux: # pip3 install datasets evaluate optimum transformers onnxruntime-gpu # python3 eval_squad.py -m distilbert-base-cased-distilled-squad # python3 -m onnxruntime.transformers.optimizer --output optimized_fp16.onnx --num_heads 12 --hidden_size 768 \ # --input /home/$USER/.cache/huggingface/hub/distilbert-base-cased-distilled-squad/model.onnx \ # --use_mask_index --float16 # python3 eval_squad.py -m distilbert-base-cased-distilled-squad --onnx optimized_fp16.onnx
import argparse import csv import os
try: from importlib.metadata import PackageNotFoundError, version except ImportError: from importlib_metadata import PackageNotFoundError, version
from pathlib import Path from typing import Any, Dict, List, Optional
import torch from datasets import load_dataset from evaluate import evaluator from optimum.onnxruntime import ORTModelForQuestionAnswering from optimum.onnxruntime.modeling_ort import ORTModel from transformers import AutoTokenizer, pipeline
PRETRAINED_SQUAD_MODELS = [ "bert-large-uncased-whole-word-masking-finetuned-squad", "deepset/roberta-base-squad2", "distilbert-base-cased-distilled-squad", ]
def get_package_version(package_name: str): try: return version(package_name) except PackageNotFoundError: return None
def load_onnx_model( model_id: str, onnx_path: Optional[str] = None, provider="CUDAExecutionProvider", use_io_binding: bool = False ): """Load onnx model given pretrained model name and optional ONNX model path. If onnx_path is None,
the default onnx model from optimum will be used.
Args: model_id (str): pretrained model name or checkpoint path onnx_path (Optional[str], optional): path of onnx model to evaluate. Defaults to None.
Returns: model: ORTModel for the onnx model onnx_path: the path of onnx model """
model = ORTModelForQuestionAnswering.from_pretrained(model_id, from_transformers=True)
if onnx_path is not None: model.latest_model_name = Path(onnx_path).name
if provider != "CPUExecutionProvider": model.device = torch.device("cuda:0") model.model = ORTModel.load_model(onnx_path, provider) else: model.device = torch.device("cpu") model.model = ORTModel.load_model(onnx_path) else: onnx_path = os.path.join(model.model_save_dir.as_posix(), model.latest_model_name) if provider != "CPUExecutionProvider": model.to("cuda")
model.use_io_binding = use_io_binding
return model, onnx_path
def output_details(results: List[Dict[str, Any]], csv_filename: str): """Output a CSV file with detail of each test results.
Args: results (List[Dict[str, Any]]): list of JSON results. csv_filename (str): path of output CSV file """
with open(csv_filename, mode="a", newline="", encoding="ascii") as csv_file: column_names = [ "pretrained_model_name", "onnx_path", "provider", "disable_fused_attention", "batch_size", "sequence_length", "use_io_binding", "exact", "f1", "total", "HasAns_exact", "HasAns_f1", "HasAns_total", "best_exact", "best_exact_thresh", "best_f1", "best_f1_thresh", "total_time_in_seconds", "samples_per_second", "latency_in_seconds", ]
csv_writer = csv.DictWriter(csv_file, fieldnames=column_names) csv_writer.writeheader() for result in results: csv_writer.writerow(result)
csv_file.flush()
print(f"Detail results are saved to csv file: {csv_filename}")
def output_summary(results: List[Dict[str, Any]], csv_filename: str, metric_name: str): """Output a CSV file with summary of a metric on combinations of batch_size and sequence_length.
Args: results (List[Dict[str, Any]]): list of JSON results. csv_filename (str): path of output CSV file metric_name (str): the metric to summarize """
with open(csv_filename, mode="a", newline="", encoding="ascii") as csv_file: header_names = [ "pretrained_model_name", "onnx_path", "provider", "disable_fused_attention", "use_io_binding", ]
model_list = list(set([result["onnx_path"] for result in results])) model_list.sort()
batch_sizes = list(set([result["batch_size"] for result in results])) batch_sizes.sort()
sequence_lengths = list(set([result["sequence_length"] for result in results])) sequence_lengths.sort()
key_names = [] for sequence_length in sequence_lengths: for batch_size in batch_sizes: key_names.append(f"b{batch_size}_s{sequence_length}")
csv_writer = csv.DictWriter(csv_file, fieldnames=header_names + key_names) csv_writer.writeheader()
for model in model_list: row = {}
# Metric value for given pair of batch_size and sequence_length. # Assume that (onnx_path, batch_size and sequence_length) are unique so keep first occurrence only. values = {} values.update({k: "" for k in key_names})
for result in results: if result["onnx_path"] == model and result[metric_name]: headers = {k: v for k, v in result.items() if k in header_names} if not row: row.update(headers)
batch_size = result["batch_size"] sequence_length = result["sequence_length"] key = f"b{batch_size}_s{sequence_length}"
if key in key_names: values[key] = result[metric_name]
if row: for key in key_names: row[key] = values[key] if key in values else "" csv_writer.writerow(row)
csv_file.flush()
print(f"Summary results for {metric_name} are saved to csv file: {csv_filename}")
def main(): args = parse_arguments() print(args)
for name in ["onnxruntime-gpu", "onnxruntime", "onnx", "torch", "transformers", "optimum", "datasets", "evaluate"]: package_version = get_package_version(name) if package_version: print(f"{name} version", package_version)
pretrained_model_name = args.model_name if args.onnx and not os.path.exists(args.onnx): raise RuntimeError(f"Onnx model path does not exist: {args.onnx}")
disable_fused_attention = os.environ.get("ORT_DISABLE_FUSED_ATTENTION", "0") == "1"
all_results = [] tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name) for sequence_length in args.sequence_lengths: tokenizer.model_max_length = sequence_length tokenizer.doc_stride = min(sequence_length // 2, 128) ort_model, onnx_path = load_onnx_model(pretrained_model_name, args.onnx, args.provider, args.use_io_binding)
print(ort_model.config) if sequence_length > ort_model.config.max_position_embeddings: raise RuntimeError("sequence length should not be larger than {ort_model.config.max_position_embeddings}")
qa_pipeline = pipeline( "question-answering", model=ort_model, tokenizer=tokenizer, question_first=True, batch_size=args.batch_size )
task_evaluator = evaluator("question-answering") squad_dataset = load_dataset("squad", split=f"validation[:{args.total}]" if args.total > 0 else "validation")
result = task_evaluator.compute( model_or_pipeline=qa_pipeline, data=squad_dataset, metric="squad_v2", squad_v2_format=True, )
result["provider"] = args.provider result["disable_fused_attention"] = disable_fused_attention result["pretrained_model_name"] = pretrained_model_name result["onnx_path"] = onnx_path result["batch_size"] = args.batch_size result["sequence_length"] = sequence_length result["use_io_binding"] = args.use_io_binding print(result)
all_results.append(result)
output_details(all_results, "detail.csv")
for metric_name in ["f1", "exact", "samples_per_second"]: output_summary(all_results, f"{metric_name}.csv", metric_name)
def parse_arguments(argv=None): parser = argparse.ArgumentParser()
parser.add_argument( "-m", "--model_name", required=False, type=str, default=PRETRAINED_SQUAD_MODELS[0], help=f"Checkpoint directory or pre-trained model names in the list: {PRETRAINED_SQUAD_MODELS}", )
parser.add_argument( "-s", "--sequence_lengths", nargs="+", type=int, default=[384], help="Sequence lengths for onnx model inputs. It could have multiple values.", )
parser.add_argument( "-b", "--batch_size", type=int, default=1, help="batch size for inference.", )
parser.add_argument("-t", "--total", type=int, default=0, help="Total samples to test. 0 means all samples.")
parser.add_argument( "--onnx", required=False, type=str, default=None, help="Optional onnx model path. If not specified, optimum will be used to export onnx model for testing.", )
parser.add_argument( "--provider", required=False, default="CUDAExecutionProvider", help="Select which Execution Provider to use for runs. Default is CUDAExecutionProvider.", )
parser.add_argument("--use_io_binding", required=False, action="store_true", help="Use IO Binding for GPU.") parser.set_defaults(use_io_binding=False)
args = parser.parse_args(argv)
return args
if __name__ == "__main__": main()
|