m2m模型翻译
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

224 lines
6.9 KiB

6 months ago
  1. # -------------------------------------------------------------------------
  2. # Copyright (c) Microsoft Corporation. All rights reserved.
  3. # Licensed under the MIT License.
  4. # --------------------------------------------------------------------------
  5. # It is used to dump machine information for Notebooks
  6. import argparse
  7. import json
  8. import logging
  9. import platform
  10. import sys
  11. from os import environ
  12. from typing import Dict, List, Tuple, Union
  13. import cpuinfo
  14. import psutil
  15. from py3nvml.py3nvml import (
  16. NVMLError,
  17. nvmlDeviceGetCount,
  18. nvmlDeviceGetHandleByIndex,
  19. nvmlDeviceGetMemoryInfo,
  20. nvmlDeviceGetName,
  21. nvmlInit,
  22. nvmlShutdown,
  23. nvmlSystemGetDriverVersion,
  24. )
  25. class MachineInfo:
  26. """Class encapsulating Machine Info logic."""
  27. def __init__(self, silent=False, logger=None):
  28. self.silent = silent
  29. if logger is None:
  30. logging.basicConfig(
  31. format="%(asctime)s - %(name)s - %(levelname)s: %(message)s",
  32. level=logging.INFO,
  33. )
  34. self.logger = logging.getLogger(__name__)
  35. else:
  36. self.logger = logger
  37. self.machine_info = None
  38. try:
  39. self.machine_info = self.get_machine_info()
  40. except Exception:
  41. self.logger.exception("Exception in getting machine info.")
  42. self.machine_info = None
  43. def get_machine_info(self):
  44. """Get machine info in metric format"""
  45. gpu_info = self.get_gpu_info_by_nvml()
  46. cpu_info = cpuinfo.get_cpu_info()
  47. machine_info = {
  48. "gpu": gpu_info,
  49. "cpu": self.get_cpu_info(),
  50. "memory": self.get_memory_info(),
  51. "os": platform.platform(),
  52. "python": self._try_get(cpu_info, ["python_version"]),
  53. "packages": self.get_related_packages(),
  54. "onnxruntime": self.get_onnxruntime_info(),
  55. "pytorch": self.get_pytorch_info(),
  56. "tensorflow": self.get_tensorflow_info(),
  57. }
  58. return machine_info
  59. def get_memory_info(self) -> Dict:
  60. """Get memory info"""
  61. mem = psutil.virtual_memory()
  62. return {"total": mem.total, "available": mem.available}
  63. def _try_get(self, cpu_info: Dict, names: List) -> str:
  64. for name in names:
  65. if name in cpu_info:
  66. value = cpu_info[name]
  67. if isinstance(value, (list, tuple)):
  68. return ",".join([str(i) for i in value])
  69. return value
  70. return ""
  71. def get_cpu_info(self) -> Dict:
  72. """Get CPU info"""
  73. cpu_info = cpuinfo.get_cpu_info()
  74. return {
  75. "brand": self._try_get(cpu_info, ["brand", "brand_raw"]),
  76. "cores": psutil.cpu_count(logical=False),
  77. "logical_cores": psutil.cpu_count(logical=True),
  78. "hz": self._try_get(cpu_info, ["hz_actual"]),
  79. "l2_cache": self._try_get(cpu_info, ["l2_cache_size"]),
  80. "flags": self._try_get(cpu_info, ["flags"]),
  81. "processor": platform.uname().processor,
  82. }
  83. def get_gpu_info_by_nvml(self) -> Dict:
  84. """Get GPU info using nvml"""
  85. gpu_info_list = []
  86. driver_version = None
  87. try:
  88. nvmlInit()
  89. driver_version = nvmlSystemGetDriverVersion()
  90. deviceCount = nvmlDeviceGetCount()
  91. for i in range(deviceCount):
  92. handle = nvmlDeviceGetHandleByIndex(i)
  93. info = nvmlDeviceGetMemoryInfo(handle)
  94. gpu_info = {}
  95. gpu_info["memory_total"] = info.total
  96. gpu_info["memory_available"] = info.free
  97. gpu_info["name"] = nvmlDeviceGetName(handle)
  98. gpu_info_list.append(gpu_info)
  99. nvmlShutdown()
  100. except NVMLError as error:
  101. if not self.silent:
  102. self.logger.error("Error fetching GPU information using nvml: %s", error)
  103. return None
  104. result = {"driver_version": driver_version, "devices": gpu_info_list}
  105. if "CUDA_VISIBLE_DEVICES" in environ:
  106. result["cuda_visible"] = environ["CUDA_VISIBLE_DEVICES"]
  107. return result
  108. def get_related_packages(self) -> List[str]:
  109. import pkg_resources
  110. installed_packages = pkg_resources.working_set
  111. related_packages = [
  112. "onnxruntime-gpu",
  113. "onnxruntime",
  114. "ort-nightly-gpu",
  115. "ort-nightly",
  116. "onnx",
  117. "transformers",
  118. "protobuf",
  119. "sympy",
  120. "torch",
  121. "tensorflow",
  122. "flatbuffers",
  123. "numpy",
  124. "onnxconverter-common",
  125. ]
  126. related_packages_list = {i.key: i.version for i in installed_packages if i.key in related_packages}
  127. return related_packages_list
  128. def get_onnxruntime_info(self) -> Dict:
  129. try:
  130. import onnxruntime
  131. return {
  132. "version": onnxruntime.__version__,
  133. "support_gpu": "CUDAExecutionProvider" in onnxruntime.get_available_providers(),
  134. }
  135. except ImportError as error:
  136. if not self.silent:
  137. self.logger.exception(error)
  138. return None
  139. except Exception as exception:
  140. if not self.silent:
  141. self.logger.exception(exception, False)
  142. return None
  143. def get_pytorch_info(self) -> Dict:
  144. try:
  145. import torch
  146. return {
  147. "version": torch.__version__,
  148. "support_gpu": torch.cuda.is_available(),
  149. "cuda": torch.version.cuda,
  150. }
  151. except ImportError as error:
  152. if not self.silent:
  153. self.logger.exception(error)
  154. return None
  155. except Exception as exception:
  156. if not self.silent:
  157. self.logger.exception(exception, False)
  158. return None
  159. def get_tensorflow_info(self) -> Dict:
  160. try:
  161. import tensorflow as tf
  162. return {
  163. "version": tf.version.VERSION,
  164. "git_version": tf.version.GIT_VERSION,
  165. "support_gpu": tf.test.is_built_with_cuda(),
  166. }
  167. except ImportError as error:
  168. if not self.silent:
  169. self.logger.exception(error)
  170. return None
  171. except ModuleNotFoundError as error:
  172. if not self.silent:
  173. self.logger.exception(error)
  174. return None
  175. def parse_arguments():
  176. parser = argparse.ArgumentParser()
  177. parser.add_argument(
  178. "--silent",
  179. required=False,
  180. action="store_true",
  181. help="Do not print error message",
  182. )
  183. parser.set_defaults(silent=False)
  184. args = parser.parse_args()
  185. return args
  186. def get_machine_info(silent=True) -> str:
  187. machine = MachineInfo(silent)
  188. return json.dumps(machine.machine_info, indent=2)
  189. if __name__ == "__main__":
  190. args = parse_arguments()
  191. print(get_machine_info(args.silent))