You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
224 lines
6.9 KiB
224 lines
6.9 KiB
# -------------------------------------------------------------------------
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT License.
|
|
# --------------------------------------------------------------------------
|
|
|
|
# It is used to dump machine information for Notebooks
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import platform
|
|
import sys
|
|
from os import environ
|
|
from typing import Dict, List, Tuple, Union
|
|
|
|
import cpuinfo
|
|
import psutil
|
|
from py3nvml.py3nvml import (
|
|
NVMLError,
|
|
nvmlDeviceGetCount,
|
|
nvmlDeviceGetHandleByIndex,
|
|
nvmlDeviceGetMemoryInfo,
|
|
nvmlDeviceGetName,
|
|
nvmlInit,
|
|
nvmlShutdown,
|
|
nvmlSystemGetDriverVersion,
|
|
)
|
|
|
|
|
|
class MachineInfo:
|
|
"""Class encapsulating Machine Info logic."""
|
|
|
|
def __init__(self, silent=False, logger=None):
|
|
self.silent = silent
|
|
|
|
if logger is None:
|
|
logging.basicConfig(
|
|
format="%(asctime)s - %(name)s - %(levelname)s: %(message)s",
|
|
level=logging.INFO,
|
|
)
|
|
self.logger = logging.getLogger(__name__)
|
|
else:
|
|
self.logger = logger
|
|
|
|
self.machine_info = None
|
|
try:
|
|
self.machine_info = self.get_machine_info()
|
|
except Exception:
|
|
self.logger.exception("Exception in getting machine info.")
|
|
self.machine_info = None
|
|
|
|
def get_machine_info(self):
|
|
"""Get machine info in metric format"""
|
|
gpu_info = self.get_gpu_info_by_nvml()
|
|
cpu_info = cpuinfo.get_cpu_info()
|
|
|
|
machine_info = {
|
|
"gpu": gpu_info,
|
|
"cpu": self.get_cpu_info(),
|
|
"memory": self.get_memory_info(),
|
|
"os": platform.platform(),
|
|
"python": self._try_get(cpu_info, ["python_version"]),
|
|
"packages": self.get_related_packages(),
|
|
"onnxruntime": self.get_onnxruntime_info(),
|
|
"pytorch": self.get_pytorch_info(),
|
|
"tensorflow": self.get_tensorflow_info(),
|
|
}
|
|
return machine_info
|
|
|
|
def get_memory_info(self) -> Dict:
|
|
"""Get memory info"""
|
|
mem = psutil.virtual_memory()
|
|
return {"total": mem.total, "available": mem.available}
|
|
|
|
def _try_get(self, cpu_info: Dict, names: List) -> str:
|
|
for name in names:
|
|
if name in cpu_info:
|
|
value = cpu_info[name]
|
|
if isinstance(value, (list, tuple)):
|
|
return ",".join([str(i) for i in value])
|
|
return value
|
|
return ""
|
|
|
|
def get_cpu_info(self) -> Dict:
|
|
"""Get CPU info"""
|
|
cpu_info = cpuinfo.get_cpu_info()
|
|
|
|
return {
|
|
"brand": self._try_get(cpu_info, ["brand", "brand_raw"]),
|
|
"cores": psutil.cpu_count(logical=False),
|
|
"logical_cores": psutil.cpu_count(logical=True),
|
|
"hz": self._try_get(cpu_info, ["hz_actual"]),
|
|
"l2_cache": self._try_get(cpu_info, ["l2_cache_size"]),
|
|
"flags": self._try_get(cpu_info, ["flags"]),
|
|
"processor": platform.uname().processor,
|
|
}
|
|
|
|
def get_gpu_info_by_nvml(self) -> Dict:
|
|
"""Get GPU info using nvml"""
|
|
gpu_info_list = []
|
|
driver_version = None
|
|
try:
|
|
nvmlInit()
|
|
driver_version = nvmlSystemGetDriverVersion()
|
|
deviceCount = nvmlDeviceGetCount()
|
|
for i in range(deviceCount):
|
|
handle = nvmlDeviceGetHandleByIndex(i)
|
|
info = nvmlDeviceGetMemoryInfo(handle)
|
|
gpu_info = {}
|
|
gpu_info["memory_total"] = info.total
|
|
gpu_info["memory_available"] = info.free
|
|
gpu_info["name"] = nvmlDeviceGetName(handle)
|
|
gpu_info_list.append(gpu_info)
|
|
nvmlShutdown()
|
|
except NVMLError as error:
|
|
if not self.silent:
|
|
self.logger.error("Error fetching GPU information using nvml: %s", error)
|
|
return None
|
|
|
|
result = {"driver_version": driver_version, "devices": gpu_info_list}
|
|
|
|
if "CUDA_VISIBLE_DEVICES" in environ:
|
|
result["cuda_visible"] = environ["CUDA_VISIBLE_DEVICES"]
|
|
return result
|
|
|
|
def get_related_packages(self) -> List[str]:
|
|
import pkg_resources
|
|
|
|
installed_packages = pkg_resources.working_set
|
|
related_packages = [
|
|
"onnxruntime-gpu",
|
|
"onnxruntime",
|
|
"ort-nightly-gpu",
|
|
"ort-nightly",
|
|
"onnx",
|
|
"transformers",
|
|
"protobuf",
|
|
"sympy",
|
|
"torch",
|
|
"tensorflow",
|
|
"flatbuffers",
|
|
"numpy",
|
|
"onnxconverter-common",
|
|
]
|
|
related_packages_list = {i.key: i.version for i in installed_packages if i.key in related_packages}
|
|
return related_packages_list
|
|
|
|
def get_onnxruntime_info(self) -> Dict:
|
|
try:
|
|
import onnxruntime
|
|
|
|
return {
|
|
"version": onnxruntime.__version__,
|
|
"support_gpu": "CUDAExecutionProvider" in onnxruntime.get_available_providers(),
|
|
}
|
|
except ImportError as error:
|
|
if not self.silent:
|
|
self.logger.exception(error)
|
|
return None
|
|
except Exception as exception:
|
|
if not self.silent:
|
|
self.logger.exception(exception, False)
|
|
return None
|
|
|
|
def get_pytorch_info(self) -> Dict:
|
|
try:
|
|
import torch
|
|
|
|
return {
|
|
"version": torch.__version__,
|
|
"support_gpu": torch.cuda.is_available(),
|
|
"cuda": torch.version.cuda,
|
|
}
|
|
except ImportError as error:
|
|
if not self.silent:
|
|
self.logger.exception(error)
|
|
return None
|
|
except Exception as exception:
|
|
if not self.silent:
|
|
self.logger.exception(exception, False)
|
|
return None
|
|
|
|
def get_tensorflow_info(self) -> Dict:
|
|
try:
|
|
import tensorflow as tf
|
|
|
|
return {
|
|
"version": tf.version.VERSION,
|
|
"git_version": tf.version.GIT_VERSION,
|
|
"support_gpu": tf.test.is_built_with_cuda(),
|
|
}
|
|
except ImportError as error:
|
|
if not self.silent:
|
|
self.logger.exception(error)
|
|
return None
|
|
except ModuleNotFoundError as error:
|
|
if not self.silent:
|
|
self.logger.exception(error)
|
|
return None
|
|
|
|
|
|
def parse_arguments():
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument(
|
|
"--silent",
|
|
required=False,
|
|
action="store_true",
|
|
help="Do not print error message",
|
|
)
|
|
parser.set_defaults(silent=False)
|
|
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def get_machine_info(silent=True) -> str:
|
|
machine = MachineInfo(silent)
|
|
return json.dumps(machine.machine_info, indent=2)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = parse_arguments()
|
|
print(get_machine_info(args.silent))
|