MTtranslateService/Lib/site-packages/onnxruntime/transformers/models/stable_diffusion/benchmark.py

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation.  All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

import argparse
import os
import time

SD_MODELS = {
    "1.5": "runwayml/stable-diffusion-v1-5",
    "2.0": "stabilityai/stable-diffusion-2",
    "2.1": "stabilityai/stable-diffusion-2-1",
}


def get_test_settings():
    height = 512
    width = 512
    num_inference_steps = 50
    prompts = [
        "a photo of an astronaut riding a horse on mars",
        "cute grey cat with blue eyes, wearing a bowtie, acrylic painting",
        "a cute magical flying dog, fantasy art drawn by disney concept artists, highly detailed, digital painting",
        "an illustration of a house with large barn with many cute flower pots and beautiful blue sky scenery",
        "one apple sitting on a table, still life, reflective, full color photograph, centered, close-up product",
        "background texture of stones, masterpiece, artistic, stunning photo, award winner photo",
        "new international organic style house, tropical surroundings, architecture, 8k, hdr",
        "beautiful Renaissance Revival Estate, Hobbit-House, detailed painting, warm colors, 8k, trending on Artstation",
        "blue owl, big green eyes, portrait, intricate metal design, unreal engine, octane render, realistic",
        "delicate elvish moonstone necklace on a velvet background, symmetrical intricate motifs, leaves, flowers, 8k",
    ]

    return height, width, num_inference_steps, prompts


def get_ort_pipeline(model_name: str, directory: str, provider: str, disable_safety_checker: bool):
    from diffusers import OnnxStableDiffusionPipeline

    import onnxruntime

    if directory is not None:
        assert os.path.exists(directory)
        session_options = onnxruntime.SessionOptions()
        pipe = OnnxStableDiffusionPipeline.from_pretrained(
            directory,
            provider=provider,
            sess_options=session_options,
        )
    else:
        pipe = OnnxStableDiffusionPipeline.from_pretrained(
            model_name,
            revision="onnx",
            provider=provider,
            use_auth_token=True,
        )

    if disable_safety_checker:
        pipe.safety_checker = None
        pipe.feature_extractor = None

    return pipe


def get_torch_pipeline(model_name: str, disable_safety_checker: bool):
    from diffusers import StableDiffusionPipeline
    from torch import channels_last, float16

    pipe = StableDiffusionPipeline.from_pretrained(
        model_name, torch_dtype=float16, revision="fp16", use_auth_token=True
    ).to("cuda")

    pipe.unet.to(memory_format=channels_last)  # in-place operation

    if disable_safety_checker:
        pipe.safety_checker = None
        pipe.feature_extractor = None

    return pipe


def get_image_filename_prefix(engine: str, model_name: str, batch_size: int, disable_safety_checker: bool):
    short_model_name = model_name.split("/")[-1].replace("stable-diffusion-", "sd")
    return f"{engine}_{short_model_name}_b{batch_size}" + ("" if disable_safety_checker else "_safe")


def run_ort_pipeline(pipe, batch_size: int, image_filename_prefix: str):
    from diffusers import OnnxStableDiffusionPipeline

    assert isinstance(pipe, OnnxStableDiffusionPipeline)

    height, width, num_inference_steps, prompts = get_test_settings()

    pipe("warm up", height, width, num_inference_steps=2)

    latency_list = []
    for i, prompt in enumerate(prompts):
        input_prompts = [prompt] * batch_size
        inference_start = time.time()
        image = pipe(input_prompts, height, width, num_inference_steps).images[0]
        inference_end = time.time()

        latency = inference_end - inference_start
        latency_list.append(latency)
        print(f"Inference took {latency} seconds")
        image.save(f"{image_filename_prefix}_{i}.jpg")
    print("Average latency in seconds:", sum(latency_list) / len(latency_list))


def run_torch_pipeline(pipe, batch_size: int, image_filename_prefix: str):
    import torch

    height, width, num_inference_steps, prompts = get_test_settings()

    pipe("warm up", height, width, num_inference_steps=2)

    torch.set_grad_enabled(False)

    latency_list = []
    for i, prompt in enumerate(prompts):
        input_prompts = [prompt] * batch_size
        torch.cuda.synchronize()
        inference_start = time.time()
        image = pipe(input_prompts, height, width, num_inference_steps).images[0]
        torch.cuda.synchronize()
        inference_end = time.time()

        latency = inference_end - inference_start
        latency_list.append(latency)
        print(f"Inference took {latency} seconds")
        image.save(f"{image_filename_prefix}_{i}.jpg")

    print("Average latency in seconds:", sum(latency_list) / len(latency_list))


def run_ort(model_name: str, directory: str, provider: str, batch_size: int, disable_safety_checker: bool):
    load_start = time.time()
    pipe = get_ort_pipeline(model_name, directory, provider, disable_safety_checker)
    load_end = time.time()
    print(f"Model loading took {load_end - load_start} seconds")

    image_filename_prefix = get_image_filename_prefix("ort", model_name, batch_size, disable_safety_checker)
    run_ort_pipeline(pipe, batch_size, image_filename_prefix)


def run_torch(model_name: str, batch_size: int, disable_safety_checker: bool):
    import torch

    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True
    # torch.backends.cuda.matmul.allow_tf32 = True

    torch.set_grad_enabled(False)

    load_start = time.time()
    pipe = get_torch_pipeline(model_name, disable_safety_checker)
    load_end = time.time()
    print(f"Model loading took {load_end - load_start} seconds")

    image_filename_prefix = get_image_filename_prefix("torch", model_name, batch_size, disable_safety_checker)
    with torch.inference_mode():
        run_torch_pipeline(pipe, batch_size, image_filename_prefix)


def parse_arguments():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "-e",
        "--engine",
        required=False,
        type=str,
        default="onnxruntime",
        choices=["onnxruntime", "torch"],
        help="Engines to benchmark. Default is onnxruntime.",
    )

    parser.add_argument(
        "-v",
        "--version",
        required=True,
        type=str,
        choices=list(SD_MODELS.keys()),
        help="Stable diffusion version like 1.5, 2.0 or 2.1",
    )

    parser.add_argument(
        "-p",
        "--pipeline",
        required=False,
        type=str,
        default=None,
        help="Directory of saved onnx pipeline. It could be output directory of optimize_pipeline.py.",
    )

    parser.add_argument(
        "--enable_safety_checker",
        required=False,
        action="store_true",
        help="Enable safety checker",
    )
    parser.set_defaults(enable_safety_checker=False)

    parser.add_argument("-b", "--batch_size", type=int, default=1)

    args = parser.parse_args()
    return args


def main():
    args = parse_arguments()
    print(args)

    sd_model = SD_MODELS[args.version]
    if args.engine == "onnxruntime":
        assert args.pipeline, "--pipeline should be specified for onnxruntime engine"

        if args.batch_size > 1:
            # Need remove a line https://github.com/huggingface/diffusers/blob/a66f2baeb782e091dde4e1e6394e46f169e5ba58/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py#L307
            #    in diffuers to run batch_size > 1.
            assert (
                args.enable_safety_checker
            ), "batch_size > 1 is not compatible with safety checker due to a bug in diffuers"

        provider = "CUDAExecutionProvider"  # TODO: use ["CUDAExecutionProvider", "CPUExecutionProvider"] in diffuers
        run_ort(sd_model, args.pipeline, provider, args.batch_size, not args.enable_safety_checker)
    else:
        run_torch(sd_model, args.batch_size, not args.enable_safety_checker)


if __name__ == "__main__":
    main()