图片解析应用
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

591 lines
25 KiB

  1. # Copyright (c) Microsoft Corporation. All rights reserved.
  2. # Licensed under the MIT License.
  3. import argparse
  4. import logging
  5. import os
  6. import pathlib
  7. import tempfile
  8. from collections import deque
  9. from enum import IntEnum
  10. import onnx
  11. from ..onnx_model_utils import (
  12. get_producer_consumer_maps,
  13. is_fixed_size_tensor,
  14. iterate_graph_per_graph_func,
  15. iterate_graph_per_node_func,
  16. optimize_model,
  17. )
  18. class _SupportedOpsChecker:
  19. """
  20. Class to process the md file with list of supported ops and caveats for an execution provider.
  21. e.g. /tools/ci_build/github/android/nnapi_supported_ops.md
  22. /tools/ci_build/github/apple/coreml_supported_ops.md
  23. """
  24. def __init__(self, filename):
  25. self._filename = filename
  26. self._ops = {} # op to caveats
  27. self._ops_seen = set()
  28. with open(filename, "r") as f:
  29. for line in f.readlines():
  30. # we're looking for a markdown table with 2 columns. first is op name. second is caveats
  31. # op name is domain:op
  32. if line.startswith("|"):
  33. pieces = line.strip().split("|")
  34. if len(pieces) == 4: # pre-first '|'. op, caveat, post-last '|'
  35. domain_op = pieces[1]
  36. caveat = pieces[2]
  37. caveat = caveat.replace("<br/>", " ") # remove some HTML tags
  38. # skip lines that don't have the ':' which separates the domain and op
  39. # e.g. the table header will fail this check
  40. if ":" in domain_op:
  41. self._ops[domain_op] = caveat
  42. def is_op_supported(self, node):
  43. domain = node.domain if node.domain else "ai.onnx"
  44. domain_op = domain + ":" + node.op_type
  45. is_supported = domain_op in self._ops
  46. if is_supported:
  47. self._ops_seen.add(domain_op)
  48. return is_supported
  49. def get_caveats(self):
  50. caveats = []
  51. for op in sorted(self._ops_seen):
  52. caveat = self._ops[op]
  53. if caveat:
  54. caveats.append(f"{op}:{caveat}")
  55. return caveats
  56. class PartitioningInfo:
  57. class TryWithEP(IntEnum):
  58. NO = (0,)
  59. MAYBE = (1,)
  60. YES = 2
  61. def __init__(self):
  62. self.num_nodes = -1 # main graph only
  63. self.num_supported_nodes = -1
  64. self.num_partitions = -1
  65. self.num_nodes_in_subgraphs = -1 # nodes not covered as we don't currently handle subgraphs in nnapi/coreml
  66. self.supported_ops_checker = None
  67. self.supported_groups = []
  68. self.unsupported_ops = set()
  69. self.nodes_unsupported_due_to_op = -1
  70. self.nodes_unsupported_due_to_dynamic_input = -1
  71. def suitability(self):
  72. # for now add up all the nodes. if there are subgraphs, the percentage of covered nodes will be reduced by all
  73. # nodes in the subgraphs.
  74. num_nodes = self.num_nodes + self.num_nodes_in_subgraphs
  75. # semi-arbitrary choices that err on the side of MAYBE.
  76. # having 1 partition is always preferred, but if that is small it may not be useful.
  77. # having 2 partitions may be okay if they cover most nodes
  78. # more than 2 partitions and the device copy cost is almost guaranteed to outweight the benefit of using the NPU
  79. # NOTE: This assumes the EP is not CPU based and there is device copy overhead to consider
  80. pct_supported = self.num_supported_nodes / num_nodes * 100
  81. if self.num_partitions == 1:
  82. if pct_supported > 75:
  83. return PartitioningInfo.TryWithEP.YES
  84. elif pct_supported > 50:
  85. return PartitioningInfo.TryWithEP.MAYBE
  86. else:
  87. return PartitioningInfo.TryWithEP.NO
  88. if self.num_partitions == 2:
  89. if pct_supported > 75:
  90. return PartitioningInfo.TryWithEP.MAYBE
  91. else:
  92. return PartitioningInfo.TryWithEP.NO
  93. return PartitioningInfo.TryWithEP.NO
  94. def dump_analysis(self, logger: logging.Logger, ep_name: str):
  95. """
  96. Analyze the partitioning information and log the analysis
  97. :param logger: Logger to use
  98. :param ep_name: Execution provider name to use in the log messages
  99. """
  100. num_nodes = self.num_nodes + self.num_nodes_in_subgraphs
  101. logger.info(
  102. f"{self.num_partitions} partitions with a total of {self.num_supported_nodes}/{num_nodes} "
  103. f"nodes can be handled by the {ep_name} EP."
  104. )
  105. if self.num_nodes_in_subgraphs:
  106. logger.info(f"{self.num_nodes_in_subgraphs} nodes are in subgraphs, which are currently not handled.")
  107. if self.supported_groups:
  108. logger.info(f'Partition sizes: [{", ".join([str(len(partition)) for partition in self.supported_groups])}]')
  109. logger.info(f"Unsupported nodes due to operator={self.nodes_unsupported_due_to_op}")
  110. if self.nodes_unsupported_due_to_dynamic_input:
  111. logger.info(
  112. "Unsupported nodes due to input having a dynamic shape=%d",
  113. self.nodes_unsupported_due_to_dynamic_input,
  114. )
  115. if logger.getEffectiveLevel() <= logging.DEBUG:
  116. # Enable this manually if you need to look at specific partitions.
  117. # for group in supported_groups:
  118. # logger.debug(f'Nodes in group: {",".join([f"{node.name}:{node.op_type}" for node in group])}')
  119. if self.unsupported_ops:
  120. logger.info(f'Unsupported ops: {",".join(sorted(self.unsupported_ops))}')
  121. caveats = self.supported_ops_checker.get_caveats()
  122. if caveats:
  123. indent = " " * 5
  124. logger.debug(
  125. "Caveats that have not been checked and may result in a node not being supported: "
  126. f'{"".join([os.linesep + indent + caveat for caveat in caveats])}'
  127. )
  128. pct_nodes_using_ep = self.num_supported_nodes / num_nodes * 100
  129. if self.num_partitions == 0:
  130. logger.info(f"{ep_name} cannot run any nodes in this model.")
  131. elif self.num_partitions == 1:
  132. if pct_nodes_using_ep > 75:
  133. logger.info(
  134. f"{ep_name} should work well for this model as there is one partition "
  135. f"covering {pct_nodes_using_ep:.1f}% of the nodes in the model."
  136. )
  137. elif pct_nodes_using_ep > 50:
  138. logger.info(
  139. f"{ep_name} may work well for this model, however only {pct_nodes_using_ep:.1f}% of nodes "
  140. "will use it. Performance testing is required to validate."
  141. )
  142. else:
  143. logger.info(
  144. f"{ep_name} will probably not work will for this model as only {pct_nodes_using_ep:.2f}% "
  145. "of nodes will use it."
  146. )
  147. elif self.num_partitions == 2 and pct_nodes_using_ep > 75:
  148. logger.info(
  149. f"{ep_name} can be considered for this model as there are two partitions "
  150. f"covering {pct_nodes_using_ep:.1f}% of the nodes. "
  151. "Performance testing is required to validate."
  152. )
  153. else:
  154. logger.info(
  155. f"{ep_name} is not recommended with this model as there are {self.num_partitions} partitions "
  156. f"covering {pct_nodes_using_ep:.1f}% of the nodes in the model. "
  157. "This will most likely result in worse performance than just using the CPU EP."
  158. )
  159. def check_partitioning(
  160. graph: onnx.GraphProto,
  161. supported_ops_checker: _SupportedOpsChecker,
  162. require_fixed_input_sizes: bool = False,
  163. value_info: dict = None,
  164. ):
  165. """
  166. Estimate the partitions the graph will be split into for nodes that is_node_supported_fn returns true for.
  167. The check on whether a node is supported is purely based on the operator type. Additional limitations
  168. (e.g. NNAPI EP only supports 2D Conv) are not checked, so partitions may not be 100% accurate. The limitations
  169. for operators in the partitions are printed so the user can manually check.
  170. :param graph: Graph to process
  171. :param supported_ops_checker: Checker with info on supported ops.
  172. :param require_fixed_input_sizes: If True, require that the inputs to a potentially supported node are
  173. fixed size tensors for it to be considered as supported.
  174. If True, onnx.shape_inference.infer_shapes should have been run on the model
  175. to populate the shape information.
  176. :param value_info: Map of value name to ValueInfoProto. Required if require_fixed_input_sizes is True to lookup
  177. the shape of a value.
  178. :return PartitioningInfo instance with details
  179. """
  180. if require_fixed_input_sizes and not value_info:
  181. raise ValueError("value_info must be provided if require_fixed_input_sizes is True.")
  182. node_to_producers, node_to_consumers = get_producer_consumer_maps(graph)
  183. # initializers have fixed sizes.
  184. # TODO: when adding subgraph support we also need to match against initializers in ancestor graphs as they are
  185. # be accessible from the outer scope (unless shadowed locally)
  186. initializers = [i.name for i in graph.initializer]
  187. def _is_fixed_shape_value(value):
  188. if value in value_info:
  189. return is_fixed_size_tensor(value_info[value])
  190. if value in initializers:
  191. return True
  192. # if something has an unknown shape (e.g. something downstream of a Reshape with dynamic input for the shape)
  193. # it won't have an entry in value_info
  194. return False
  195. #
  196. # Replicate logic from /onnxruntime/core/providers/partitioning_utils.cc:CreateSupportedPartitionNodeGroups
  197. # to roughly estimate number of partitions for nodes that is_node_supported_fn returns true for.
  198. #
  199. # We keep the structure and variable names as close as possible to the C++ implementation to simplify keeping them
  200. # in sync if future updates are needed.
  201. #
  202. # we don't currently support a callback for additional group closure checks in the python implementation
  203. on_group_closed_fn = None
  204. supported_groups = []
  205. # number of inputs from unprocessed nodes (in-degree) per node
  206. in_degree = {}
  207. # nodes that are ready to process
  208. nodes_to_process = deque() # deque of Node instances
  209. # nodes that will be processed when considering the next partition node group
  210. nodes_to_process_with_next_group = deque()
  211. # initialize in-degrees and find root nodes
  212. for node in graph.node:
  213. node_input_edge_count = len(node_to_producers[node]) if node in node_to_producers else 0
  214. in_degree[node] = node_input_edge_count
  215. if node_input_edge_count == 0:
  216. # node is only dependent on graph input or initializers
  217. nodes_to_process.append(node)
  218. # currently we don't support checking subgraphs in the partitioning as they're not handled by NNAPI/CoreML.
  219. # check how many nodes are in that blind spot so we can adjust the recommendation accordingly.
  220. # note: need to pass count in an array so that it's by reference
  221. def _count_subgraph_nodes(cur_graph: onnx.GraphProto, original_graph: onnx.GraphProto, count: [int]):
  222. if cur_graph != original_graph:
  223. count[0] += len(cur_graph.node)
  224. nodes_in_subgraphs = [0] # array with single value
  225. iterate_graph_per_graph_func(graph, _count_subgraph_nodes, original_graph=graph, count=nodes_in_subgraphs)
  226. supported_group = []
  227. # the partition node group's border is the aggregate of its nodes' output nodes
  228. supported_group_border = set()
  229. num_supported_nodes = 0
  230. num_unsupported_nodes_due_to_op = 0
  231. num_unsupported_nodes_due_to_dynamic_input = 0
  232. unsupported_ops = set()
  233. def close_group():
  234. if supported_group:
  235. keep_partition = not on_group_closed_fn or on_group_closed_fn(supported_group)
  236. if keep_partition:
  237. supported_groups.append(supported_group.copy())
  238. supported_group.clear()
  239. supported_group_border.clear()
  240. while nodes_to_process or nodes_to_process_with_next_group:
  241. if not nodes_to_process:
  242. close_group()
  243. nodes_to_process = nodes_to_process_with_next_group
  244. nodes_to_process_with_next_group = deque()
  245. continue
  246. node = nodes_to_process.popleft()
  247. is_op_supported = supported_ops_checker.is_op_supported(node)
  248. is_input_shape_supported = not require_fixed_input_sizes or all(_is_fixed_shape_value(i) for i in node.input)
  249. is_node_supported = is_op_supported and is_input_shape_supported
  250. if not is_node_supported:
  251. if node in supported_group_border:
  252. # an unsupported node on the border will be processed after the current partition node group
  253. # so skip any additional processing/counting here
  254. nodes_to_process_with_next_group.append(node)
  255. continue
  256. if not is_op_supported:
  257. unsupported_ops.add(f'{node.domain if node.domain else "ai.onnx"}:{node.op_type}')
  258. num_unsupported_nodes_due_to_op += 1
  259. else:
  260. num_unsupported_nodes_due_to_dynamic_input += 1
  261. if is_node_supported:
  262. num_supported_nodes += 1
  263. # add node to the partition node group
  264. supported_group.append(node)
  265. # remove node from the border and add its outputs to the border
  266. if node in supported_group_border:
  267. supported_group_border.remove(node)
  268. # for each consumer node add to supported_group_border
  269. if node in node_to_consumers:
  270. for consumer in node_to_consumers[node]:
  271. supported_group_border.add(consumer)
  272. # adjust in-degrees of the node outputs and add any new nodes to process
  273. if node in node_to_consumers:
  274. for consumer in node_to_consumers[node]:
  275. consumer_node_in_degree = in_degree[consumer]
  276. consumer_node_in_degree -= 1
  277. if consumer_node_in_degree == 0:
  278. nodes_to_process.append(consumer)
  279. in_degree[consumer] = consumer_node_in_degree
  280. close_group()
  281. # find any subgraphs and check supported for nodes in the subgraphs. this won't change the partitioning as we skip
  282. # Scan/Loop/If nodes, but will provide additional info on operators that are not supported if we changed that.
  283. iterate_graph_per_node_func(graph, supported_ops_checker.is_op_supported)
  284. num_nodes = len(graph.node)
  285. num_partitions = len(supported_groups)
  286. info = PartitioningInfo()
  287. info.num_nodes = num_nodes
  288. info.num_supported_nodes = num_supported_nodes
  289. info.num_partitions = num_partitions
  290. info.num_nodes_in_subgraphs = nodes_in_subgraphs[0]
  291. info.supported_ops_checker = supported_ops_checker
  292. info.supported_groups = supported_groups
  293. info.unsupported_ops = unsupported_ops
  294. info.nodes_unsupported_due_to_op = num_unsupported_nodes_due_to_op
  295. info.nodes_unsupported_due_to_dynamic_input = num_unsupported_nodes_due_to_dynamic_input
  296. return info
  297. def _check_ep_partitioning(model, supported_ops_config, value_info: dict = None):
  298. supported_ops = _SupportedOpsChecker(supported_ops_config)
  299. partition_info = check_partitioning(model.graph, supported_ops, value_info is not None, value_info)
  300. return partition_info
  301. def check_nnapi_partitions(model, value_info: dict = None):
  302. # if we're running in the ORT python package the file should be local. otherwise assume we're running from the
  303. # ORT repo
  304. script_dir = pathlib.Path(__file__).parent
  305. local_config = script_dir / "nnapi_supported_ops.md"
  306. if local_config.exists():
  307. config_path = local_config
  308. else:
  309. ort_root = script_dir.parents[3]
  310. config_path = ort_root / "tools" / "ci_build" / "github" / "android" / "nnapi_supported_ops.md"
  311. return _check_ep_partitioning(model, config_path, value_info)
  312. def check_coreml_partitions(model, value_info: dict = None):
  313. # if we're running in the ORT python package the file should be local. otherwise assume we're running from the
  314. # ORT repo
  315. script_dir = pathlib.Path(__file__).parent
  316. local_config = script_dir / "coreml_supported_ops.md"
  317. if local_config.exists():
  318. config_path = local_config
  319. else:
  320. ort_root = script_dir.parents[3]
  321. config_path = ort_root / "tools" / "ci_build" / "github" / "apple" / "coreml_supported_ops.md"
  322. return _check_ep_partitioning(model, config_path, value_info)
  323. def check_shapes(graph: onnx.GraphProto, logger: logging.Logger = None):
  324. """
  325. Check the shapes of graph inputs, values and graph outputs to determine if they have static or dynamic sizes.
  326. NNAPI and CoreML do not support dynamically sized values.
  327. :param graph: Graph to check. If shape inferencing has been run the checks on values will be meaningful.
  328. :param logger: Optional logger for diagnostic information.
  329. :return: Tuple of List of inputs with dynamic shapes, Number of dynamic values found
  330. """
  331. # it's OK if the input is dynamically sized and we do a Resize early to a fixed size.
  332. # it's not good if lots of ops have dynamic inputs
  333. num_fixed_values = 0
  334. num_dynamic_values = 0
  335. dynamic_inputs = []
  336. for i in graph.input:
  337. if not is_fixed_size_tensor(i):
  338. dynamic_inputs.append(i)
  339. # split/join to remove repeated whitespace and newlines from str(i)
  340. if logger:
  341. logger.info(f"Input is not a fixed size tensor: {' '.join(str(i).split())}")
  342. num_dynamic_values += 1
  343. else:
  344. num_fixed_values += 1
  345. dynamic_outputs = []
  346. for o in graph.output:
  347. if not is_fixed_size_tensor(o):
  348. dynamic_outputs.append(o)
  349. if logger:
  350. logger.info(f"Output is not a fixed size tensor: {' '.join(str(o).split())}")
  351. num_dynamic_values += 1
  352. else:
  353. num_fixed_values += 1
  354. # check we have value info.
  355. # special case some test graphs with a single node which only have graph input and output values, and
  356. # a model where all inputs are dynamic (results in no value_info)
  357. if not graph.value_info and not (len(graph.node) == 1 or len(dynamic_inputs) == len(graph.input)):
  358. logger.warning(
  359. "Unable to check shapes within model. "
  360. "ONNX shape inferencing should be run on the model prior to checking."
  361. )
  362. for vi in graph.value_info:
  363. if is_fixed_size_tensor(vi):
  364. num_fixed_values += 1
  365. else:
  366. num_dynamic_values += 1
  367. if logger:
  368. logger.info(
  369. f"Num values with fixed shape={num_fixed_values}. " f"Num values with dynamic shape={num_dynamic_values}"
  370. )
  371. if dynamic_inputs:
  372. if dynamic_outputs:
  373. logger.info(
  374. "Model has dynamic inputs and outputs. Consider re-exporting model with fixed sizes "
  375. "if NNAPI or CoreML can be used with this model."
  376. )
  377. else:
  378. logger.info(
  379. """Model has dynamically sized inputs but fixed sized outputs.
  380. If the sizes become fixed early in the model (e.g. pre-processing of a dynamic input size
  381. results in a fixed input size for the majority of the model) performance with NNAPI and CoreML,
  382. if applicable, should not be significantly impacted."""
  383. )
  384. return dynamic_inputs, num_dynamic_values
  385. def checker(model_path, logger: logging.Logger):
  386. model = onnx.load(model_path)
  387. model_with_shape_info = onnx.shape_inference.infer_shapes(model)
  388. # create lookup map for efficiency
  389. value_to_shape = {}
  390. for v in model_with_shape_info.graph.input:
  391. value_to_shape[v.name] = v
  392. for v in model_with_shape_info.graph.output:
  393. value_to_shape[v.name] = v
  394. for v in model_with_shape_info.graph.value_info:
  395. value_to_shape[v.name] = v
  396. dynamic_inputs, num_dynamic_values = check_shapes(model_with_shape_info.graph)
  397. def check_ep(ep_name, checker_func):
  398. logger.info(f"Checking {ep_name}")
  399. # check with shape info first so supported nodes takes into account values with dynamic shapes
  400. partition_info = checker_func(model_with_shape_info, value_to_shape)
  401. if logger.getEffectiveLevel() <= logging.DEBUG:
  402. partition_info.dump_analysis(logger, ep_name)
  403. suitability = partition_info.suitability()
  404. logger.info(f"Model should perform well with {ep_name} as is: {suitability.name}")
  405. if suitability != PartitioningInfo.TryWithEP.YES and dynamic_inputs:
  406. logger.info("Checking if model will perform better if the dynamic shapes are fixed...")
  407. partition_info_with_fixed_shapes = checker_func(model_with_shape_info)
  408. if logger.getEffectiveLevel() <= logging.DEBUG:
  409. # analyze and log detailed info
  410. logger.info("Partition information if the model was updated to make the shapes fixed:")
  411. partition_info_with_fixed_shapes.dump_analysis(logger, ep_name)
  412. fixed_shape_suitability = partition_info_with_fixed_shapes.suitability()
  413. logger.info(
  414. f"Model should perform well with {ep_name} if modified to have fixed input shapes: "
  415. f"{fixed_shape_suitability.name}"
  416. )
  417. if fixed_shape_suitability != PartitioningInfo.TryWithEP.NO:
  418. logger.info("Shapes can be altered using python -m onnxruntime.tools.make_dynamic_shape_fixed")
  419. if fixed_shape_suitability.value > suitability.value:
  420. suitability = fixed_shape_suitability
  421. return suitability
  422. nnapi_suitability = check_ep("NNAPI", check_nnapi_partitions)
  423. coreml_suitability = check_ep("CoreML", check_coreml_partitions)
  424. if (
  425. nnapi_suitability != PartitioningInfo.TryWithEP.YES or coreml_suitability != PartitioningInfo.TryWithEP.YES
  426. ) and logger.getEffectiveLevel() > logging.DEBUG:
  427. logger.info("Re-run with log level of DEBUG for more details on the NNAPI/CoreML issues.")
  428. logger.info("---------------")
  429. return nnapi_suitability != PartitioningInfo.TryWithEP.NO or coreml_suitability != PartitioningInfo.TryWithEP.NO
  430. def analyze_model(model_path: pathlib.Path, skip_optimize: bool = False, logger: logging.Logger = None):
  431. """
  432. Analyze the provided model to determine if it's likely to work well with the NNAPI or CoreML Execution Providers
  433. :param model_path: Model to analyze.
  434. :param skip_optimize: Skip optimizing to BASIC level before checking. When exporting to ORT format we will do this
  435. optimization..
  436. :param logger: Logger for output
  437. :return: True if either the NNAPI or CoreML Execution Providers may work well with this model.
  438. """
  439. if not logger:
  440. logger = logging.getLogger("usability_checker")
  441. logger.setLevel(logging.INFO)
  442. logger.info(f"Checking {model_path} for usability with ORT Mobile.")
  443. with tempfile.TemporaryDirectory() as tmp:
  444. if not skip_optimize:
  445. tmp_path = pathlib.Path(tmp) / model_path.name
  446. optimize_model(model_path, tmp_path)
  447. model_path = tmp_path
  448. try_eps = checker(str(model_path.resolve(strict=True)), logger)
  449. return try_eps
  450. def parse_args():
  451. parser = argparse.ArgumentParser(
  452. os.path.basename(__file__), description="""Analyze an ONNX model for usage with the ORT mobile"""
  453. )
  454. parser.add_argument(
  455. "--log_level", choices=["debug", "info", "warning", "error"], default="info", help="Logging level"
  456. )
  457. parser.add_argument(
  458. "--skip_optimize",
  459. action="store_true",
  460. help="Don't optimize the model to BASIC level prior to analyzing. "
  461. "Optimization will occur when exporting the model to ORT format, so in general "
  462. "should not be skipped unless you have a specific reason to do so.",
  463. )
  464. parser.add_argument("model_path", type=pathlib.Path, help="Provide path to ONNX model")
  465. return parser.parse_args()
  466. def run_analyze_model():
  467. args = parse_args()
  468. logger = logging.getLogger("default")
  469. if args.log_level == "debug":
  470. logger.setLevel(logging.DEBUG)
  471. elif args.log_level == "info":
  472. logger.setLevel(logging.INFO)
  473. elif args.log_level == "warning":
  474. logger.setLevel(logging.WARNING)
  475. else:
  476. logger.setLevel(logging.ERROR)
  477. model_path = args.model_path.resolve()
  478. analyze_model(model_path, args.skip_optimize, logger)
  479. if __name__ == "__main__":
  480. run_analyze_model()