# ------------------------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- # It is used to dump machine information for Notebooks import argparse import json import logging import platform import sys from os import environ from typing import Dict, List, Tuple, Union import cpuinfo import psutil from py3nvml.py3nvml import ( NVMLError, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, nvmlDeviceGetName, nvmlInit, nvmlShutdown, nvmlSystemGetDriverVersion, ) class MachineInfo: """Class encapsulating Machine Info logic.""" def __init__(self, silent=False, logger=None): self.silent = silent if logger is None: logging.basicConfig( format="%(asctime)s - %(name)s - %(levelname)s: %(message)s", level=logging.INFO, ) self.logger = logging.getLogger(__name__) else: self.logger = logger self.machine_info = None try: self.machine_info = self.get_machine_info() except Exception: self.logger.exception("Exception in getting machine info.") self.machine_info = None def get_machine_info(self): """Get machine info in metric format""" gpu_info = self.get_gpu_info_by_nvml() cpu_info = cpuinfo.get_cpu_info() machine_info = { "gpu": gpu_info, "cpu": self.get_cpu_info(), "memory": self.get_memory_info(), "os": platform.platform(), "python": self._try_get(cpu_info, ["python_version"]), "packages": self.get_related_packages(), "onnxruntime": self.get_onnxruntime_info(), "pytorch": self.get_pytorch_info(), "tensorflow": self.get_tensorflow_info(), } return machine_info def get_memory_info(self) -> Dict: """Get memory info""" mem = psutil.virtual_memory() return {"total": mem.total, "available": mem.available} def _try_get(self, cpu_info: Dict, names: List) -> str: for name in names: if name in cpu_info: value = cpu_info[name] if isinstance(value, (list, tuple)): return ",".join([str(i) for i in value]) return value return "" def get_cpu_info(self) -> Dict: """Get CPU info""" cpu_info = cpuinfo.get_cpu_info() return { "brand": self._try_get(cpu_info, ["brand", "brand_raw"]), "cores": psutil.cpu_count(logical=False), "logical_cores": psutil.cpu_count(logical=True), "hz": self._try_get(cpu_info, ["hz_actual"]), "l2_cache": self._try_get(cpu_info, ["l2_cache_size"]), "flags": self._try_get(cpu_info, ["flags"]), "processor": platform.uname().processor, } def get_gpu_info_by_nvml(self) -> Dict: """Get GPU info using nvml""" gpu_info_list = [] driver_version = None try: nvmlInit() driver_version = nvmlSystemGetDriverVersion() deviceCount = nvmlDeviceGetCount() for i in range(deviceCount): handle = nvmlDeviceGetHandleByIndex(i) info = nvmlDeviceGetMemoryInfo(handle) gpu_info = {} gpu_info["memory_total"] = info.total gpu_info["memory_available"] = info.free gpu_info["name"] = nvmlDeviceGetName(handle) gpu_info_list.append(gpu_info) nvmlShutdown() except NVMLError as error: if not self.silent: self.logger.error("Error fetching GPU information using nvml: %s", error) return None result = {"driver_version": driver_version, "devices": gpu_info_list} if "CUDA_VISIBLE_DEVICES" in environ: result["cuda_visible"] = environ["CUDA_VISIBLE_DEVICES"] return result def get_related_packages(self) -> List[str]: import pkg_resources installed_packages = pkg_resources.working_set related_packages = [ "onnxruntime-gpu", "onnxruntime", "ort-nightly-gpu", "ort-nightly", "onnx", "transformers", "protobuf", "sympy", "torch", "tensorflow", "flatbuffers", "numpy", "onnxconverter-common", ] related_packages_list = {i.key: i.version for i in installed_packages if i.key in related_packages} return related_packages_list def get_onnxruntime_info(self) -> Dict: try: import onnxruntime return { "version": onnxruntime.__version__, "support_gpu": "CUDAExecutionProvider" in onnxruntime.get_available_providers(), } except ImportError as error: if not self.silent: self.logger.exception(error) return None except Exception as exception: if not self.silent: self.logger.exception(exception, False) return None def get_pytorch_info(self) -> Dict: try: import torch return { "version": torch.__version__, "support_gpu": torch.cuda.is_available(), "cuda": torch.version.cuda, } except ImportError as error: if not self.silent: self.logger.exception(error) return None except Exception as exception: if not self.silent: self.logger.exception(exception, False) return None def get_tensorflow_info(self) -> Dict: try: import tensorflow as tf return { "version": tf.version.VERSION, "git_version": tf.version.GIT_VERSION, "support_gpu": tf.test.is_built_with_cuda(), } except ImportError as error: if not self.silent: self.logger.exception(error) return None except ModuleNotFoundError as error: if not self.silent: self.logger.exception(error) return None def parse_arguments(): parser = argparse.ArgumentParser() parser.add_argument( "--silent", required=False, action="store_true", help="Do not print error message", ) parser.set_defaults(silent=False) args = parser.parse_args() return args def get_machine_info(silent=True) -> str: machine = MachineInfo(silent) return json.dumps(machine.machine_info, indent=2) if __name__ == "__main__": args = parse_arguments() print(get_machine_info(args.silent))