| |
| # Unlike the rest of the PyTorch this file must be python2 compliant. |
| # This script outputs relevant system environment info |
| # Run it with `python collect_env.py`. |
| import datetime |
| import locale |
| import re |
| import subprocess |
| import sys |
| import os |
| from collections import namedtuple |
| |
| |
| try: |
| import torch |
| TORCH_AVAILABLE = True |
| except (ImportError, NameError, AttributeError, OSError): |
| TORCH_AVAILABLE = False |
| |
| # System Environment Information |
| SystemEnv = namedtuple('SystemEnv', [ |
| 'torch_version', |
| 'is_debug_build', |
| 'cuda_compiled_version', |
| 'gcc_version', |
| 'clang_version', |
| 'cmake_version', |
| 'os', |
| 'libc_version', |
| 'python_version', |
| 'python_platform', |
| 'is_cuda_available', |
| 'cuda_runtime_version', |
| 'cuda_module_loading', |
| 'nvidia_driver_version', |
| 'nvidia_gpu_models', |
| 'cudnn_version', |
| 'pip_version', # 'pip' or 'pip3' |
| 'pip_packages', |
| 'conda_packages', |
| 'hip_compiled_version', |
| 'hip_runtime_version', |
| 'miopen_runtime_version', |
| 'caching_allocator_config', |
| 'is_xnnpack_available', |
| 'cpu_info', |
| ]) |
| |
| |
| def run(command): |
| """Returns (return-code, stdout, stderr)""" |
| shell = True if type(command) is str else False |
| p = subprocess.Popen(command, stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE, shell=shell) |
| raw_output, raw_err = p.communicate() |
| rc = p.returncode |
| if get_platform() == 'win32': |
| enc = 'oem' |
| else: |
| enc = locale.getpreferredencoding() |
| output = raw_output.decode(enc) |
| err = raw_err.decode(enc) |
| return rc, output.strip(), err.strip() |
| |
| |
| def run_and_read_all(run_lambda, command): |
| """Runs command using run_lambda; reads and returns entire output if rc is 0""" |
| rc, out, _ = run_lambda(command) |
| if rc != 0: |
| return None |
| return out |
| |
| |
| def run_and_parse_first_match(run_lambda, command, regex): |
| """Runs command using run_lambda, returns the first regex match if it exists""" |
| rc, out, _ = run_lambda(command) |
| if rc != 0: |
| return None |
| match = re.search(regex, out) |
| if match is None: |
| return None |
| return match.group(1) |
| |
| def run_and_return_first_line(run_lambda, command): |
| """Runs command using run_lambda and returns first line if output is not empty""" |
| rc, out, _ = run_lambda(command) |
| if rc != 0: |
| return None |
| return out.split('\n')[0] |
| |
| |
| def get_conda_packages(run_lambda): |
| conda = os.environ.get('CONDA_EXE', 'conda') |
| out = run_and_read_all(run_lambda, "{} list".format(conda)) |
| if out is None: |
| return out |
| |
| return "\n".join( |
| line |
| for line in out.splitlines() |
| if not line.startswith("#") |
| and any( |
| name in line |
| for name in { |
| "torch", |
| "numpy", |
| "cudatoolkit", |
| "soumith", |
| "mkl", |
| "magma", |
| "triton", |
| } |
| ) |
| ) |
| |
| def get_gcc_version(run_lambda): |
| return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)') |
| |
| def get_clang_version(run_lambda): |
| return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)') |
| |
| |
| def get_cmake_version(run_lambda): |
| return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)') |
| |
| |
| def get_nvidia_driver_version(run_lambda): |
| if get_platform() == 'darwin': |
| cmd = 'kextstat | grep -i cuda' |
| return run_and_parse_first_match(run_lambda, cmd, |
| r'com[.]nvidia[.]CUDA [(](.*?)[)]') |
| smi = get_nvidia_smi() |
| return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ') |
| |
| |
| def get_gpu_info(run_lambda): |
| if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None): |
| if TORCH_AVAILABLE and torch.cuda.is_available(): |
| return torch.cuda.get_device_name(None) |
| return None |
| smi = get_nvidia_smi() |
| uuid_regex = re.compile(r' \(UUID: .+?\)') |
| rc, out, _ = run_lambda(smi + ' -L') |
| if rc != 0: |
| return None |
| # Anonymize GPUs by removing their UUID |
| return re.sub(uuid_regex, '', out) |
| |
| |
| def get_running_cuda_version(run_lambda): |
| return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)') |
| |
| |
| def get_cudnn_version(run_lambda): |
| """This will return a list of libcudnn.so; it's hard to tell which one is being used""" |
| if get_platform() == 'win32': |
| system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') |
| cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%") |
| where_cmd = os.path.join(system_root, 'System32', 'where') |
| cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path) |
| elif get_platform() == 'darwin': |
| # CUDA libraries and drivers can be found in /usr/local/cuda/. See |
| # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install |
| # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac |
| # Use CUDNN_LIBRARY when cudnn library is installed elsewhere. |
| cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*' |
| else: |
| cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev' |
| rc, out, _ = run_lambda(cudnn_cmd) |
| # find will return 1 if there are permission errors or if not found |
| if len(out) == 0 or (rc != 1 and rc != 0): |
| l = os.environ.get('CUDNN_LIBRARY') |
| if l is not None and os.path.isfile(l): |
| return os.path.realpath(l) |
| return None |
| files_set = set() |
| for fn in out.split('\n'): |
| fn = os.path.realpath(fn) # eliminate symbolic links |
| if os.path.isfile(fn): |
| files_set.add(fn) |
| if not files_set: |
| return None |
| # Alphabetize the result because the order is non-deterministic otherwise |
| files = sorted(files_set) |
| if len(files) == 1: |
| return files[0] |
| result = '\n'.join(files) |
| return 'Probably one of the following:\n{}'.format(result) |
| |
| |
| def get_nvidia_smi(): |
| # Note: nvidia-smi is currently available only on Windows and Linux |
| smi = 'nvidia-smi' |
| if get_platform() == 'win32': |
| system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') |
| program_files_root = os.environ.get('PROGRAMFILES', 'C:\\Program Files') |
| legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi) |
| new_path = os.path.join(system_root, 'System32', smi) |
| smis = [new_path, legacy_path] |
| for candidate_smi in smis: |
| if os.path.exists(candidate_smi): |
| smi = '"{}"'.format(candidate_smi) |
| break |
| return smi |
| |
| |
| # example outputs of CPU infos |
| # * linux |
| # Architecture: x86_64 |
| # CPU op-mode(s): 32-bit, 64-bit |
| # Address sizes: 46 bits physical, 48 bits virtual |
| # Byte Order: Little Endian |
| # CPU(s): 128 |
| # On-line CPU(s) list: 0-127 |
| # Vendor ID: GenuineIntel |
| # Model name: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz |
| # CPU family: 6 |
| # Model: 106 |
| # Thread(s) per core: 2 |
| # Core(s) per socket: 32 |
| # Socket(s): 2 |
| # Stepping: 6 |
| # BogoMIPS: 5799.78 |
| # Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr |
| # sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl |
| # xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16 |
| # pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand |
| # hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced |
| # fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap |
| # avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 |
| # xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq |
| # avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities |
| # Virtualization features: |
| # Hypervisor vendor: KVM |
| # Virtualization type: full |
| # Caches (sum of all): |
| # L1d: 3 MiB (64 instances) |
| # L1i: 2 MiB (64 instances) |
| # L2: 80 MiB (64 instances) |
| # L3: 108 MiB (2 instances) |
| # NUMA: |
| # NUMA node(s): 2 |
| # NUMA node0 CPU(s): 0-31,64-95 |
| # NUMA node1 CPU(s): 32-63,96-127 |
| # Vulnerabilities: |
| # Itlb multihit: Not affected |
| # L1tf: Not affected |
| # Mds: Not affected |
| # Meltdown: Not affected |
| # Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown |
| # Retbleed: Not affected |
| # Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp |
| # Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization |
| # Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence |
| # Srbds: Not affected |
| # Tsx async abort: Not affected |
| # * win32 |
| # Architecture=9 |
| # CurrentClockSpeed=2900 |
| # DeviceID=CPU0 |
| # Family=179 |
| # L2CacheSize=40960 |
| # L2CacheSpeed= |
| # Manufacturer=GenuineIntel |
| # MaxClockSpeed=2900 |
| # Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz |
| # ProcessorType=3 |
| # Revision=27142 |
| # |
| # Architecture=9 |
| # CurrentClockSpeed=2900 |
| # DeviceID=CPU1 |
| # Family=179 |
| # L2CacheSize=40960 |
| # L2CacheSpeed= |
| # Manufacturer=GenuineIntel |
| # MaxClockSpeed=2900 |
| # Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz |
| # ProcessorType=3 |
| # Revision=27142 |
| |
| def get_cpu_info(run_lambda): |
| rc, out, err = 0, '', '' |
| if get_platform() == 'linux': |
| rc, out, err = run_lambda('lscpu') |
| elif get_platform() == 'win32': |
| rc, out, err = run_lambda('wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID,\ |
| CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE') |
| elif get_platform() == 'darwin': |
| rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string") |
| cpu_info = 'None' |
| if rc == 0: |
| cpu_info = out |
| else: |
| cpu_info = err |
| return cpu_info |
| |
| |
| def get_platform(): |
| if sys.platform.startswith('linux'): |
| return 'linux' |
| elif sys.platform.startswith('win32'): |
| return 'win32' |
| elif sys.platform.startswith('cygwin'): |
| return 'cygwin' |
| elif sys.platform.startswith('darwin'): |
| return 'darwin' |
| else: |
| return sys.platform |
| |
| |
| def get_mac_version(run_lambda): |
| return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)') |
| |
| |
| def get_windows_version(run_lambda): |
| system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows') |
| wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic') |
| findstr_cmd = os.path.join(system_root, 'System32', 'findstr') |
| return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd)) |
| |
| |
| def get_lsb_version(run_lambda): |
| return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)') |
| |
| |
| def check_release_file(run_lambda): |
| return run_and_parse_first_match(run_lambda, 'cat /etc/*-release', |
| r'PRETTY_NAME="(.*)"') |
| |
| |
| def get_os(run_lambda): |
| from platform import machine |
| platform = get_platform() |
| |
| if platform == 'win32' or platform == 'cygwin': |
| return get_windows_version(run_lambda) |
| |
| if platform == 'darwin': |
| version = get_mac_version(run_lambda) |
| if version is None: |
| return None |
| return 'macOS {} ({})'.format(version, machine()) |
| |
| if platform == 'linux': |
| # Ubuntu/Debian based |
| desc = get_lsb_version(run_lambda) |
| if desc is not None: |
| return '{} ({})'.format(desc, machine()) |
| |
| # Try reading /etc/*-release |
| desc = check_release_file(run_lambda) |
| if desc is not None: |
| return '{} ({})'.format(desc, machine()) |
| |
| return '{} ({})'.format(platform, machine()) |
| |
| # Unknown platform |
| return platform |
| |
| |
| def get_python_platform(): |
| import platform |
| return platform.platform() |
| |
| |
| def get_libc_version(): |
| import platform |
| if get_platform() != 'linux': |
| return 'N/A' |
| return '-'.join(platform.libc_ver()) |
| |
| |
| def get_pip_packages(run_lambda): |
| """Returns `pip list` output. Note: will also find conda-installed pytorch |
| and numpy packages.""" |
| # People generally have `pip` as `pip` or `pip3` |
| # But here it is invoked as `python -mpip` |
| def run_with_pip(pip): |
| out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"]) |
| return "\n".join( |
| line |
| for line in out.splitlines() |
| if any( |
| name in line |
| for name in { |
| "torch", |
| "numpy", |
| "mypy", |
| "flake8", |
| "triton", |
| } |
| ) |
| ) |
| |
| pip_version = 'pip3' if sys.version[0] == '3' else 'pip' |
| out = run_with_pip([sys.executable, '-mpip']) |
| |
| return pip_version, out |
| |
| |
| def get_cachingallocator_config(): |
| ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '') |
| return ca_config |
| |
| |
| def get_cuda_module_loading_config(): |
| if TORCH_AVAILABLE and torch.cuda.is_available(): |
| torch.cuda.init() |
| config = os.environ.get('CUDA_MODULE_LOADING', '') |
| return config |
| else: |
| return "N/A" |
| |
| |
| def is_xnnpack_available(): |
| if TORCH_AVAILABLE: |
| import torch.backends.xnnpack |
| return str(torch.backends.xnnpack.enabled) # type: ignore[attr-defined] |
| else: |
| return "N/A" |
| |
| def get_env_info(): |
| run_lambda = run |
| pip_version, pip_list_output = get_pip_packages(run_lambda) |
| |
| if TORCH_AVAILABLE: |
| version_str = torch.__version__ |
| debug_mode_str = str(torch.version.debug) |
| cuda_available_str = str(torch.cuda.is_available()) |
| cuda_version_str = torch.version.cuda |
| if not hasattr(torch.version, 'hip') or torch.version.hip is None: # cuda version |
| hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A' |
| else: # HIP version |
| def get_version_or_na(cfg, prefix): |
| _lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s] |
| return _lst[0] if _lst else 'N/A' |
| |
| cfg = torch._C._show_config().split('\n') |
| hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime') |
| miopen_runtime_version = get_version_or_na(cfg, 'MIOpen') |
| cuda_version_str = 'N/A' |
| hip_compiled_version = torch.version.hip |
| else: |
| version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A' |
| hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A' |
| |
| sys_version = sys.version.replace("\n", " ") |
| |
| return SystemEnv( |
| torch_version=version_str, |
| is_debug_build=debug_mode_str, |
| python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1), |
| python_platform=get_python_platform(), |
| is_cuda_available=cuda_available_str, |
| cuda_compiled_version=cuda_version_str, |
| cuda_runtime_version=get_running_cuda_version(run_lambda), |
| cuda_module_loading=get_cuda_module_loading_config(), |
| nvidia_gpu_models=get_gpu_info(run_lambda), |
| nvidia_driver_version=get_nvidia_driver_version(run_lambda), |
| cudnn_version=get_cudnn_version(run_lambda), |
| hip_compiled_version=hip_compiled_version, |
| hip_runtime_version=hip_runtime_version, |
| miopen_runtime_version=miopen_runtime_version, |
| pip_version=pip_version, |
| pip_packages=pip_list_output, |
| conda_packages=get_conda_packages(run_lambda), |
| os=get_os(run_lambda), |
| libc_version=get_libc_version(), |
| gcc_version=get_gcc_version(run_lambda), |
| clang_version=get_clang_version(run_lambda), |
| cmake_version=get_cmake_version(run_lambda), |
| caching_allocator_config=get_cachingallocator_config(), |
| is_xnnpack_available=is_xnnpack_available(), |
| cpu_info=get_cpu_info(run_lambda), |
| ) |
| |
| env_info_fmt = """ |
| PyTorch version: {torch_version} |
| Is debug build: {is_debug_build} |
| CUDA used to build PyTorch: {cuda_compiled_version} |
| ROCM used to build PyTorch: {hip_compiled_version} |
| |
| OS: {os} |
| GCC version: {gcc_version} |
| Clang version: {clang_version} |
| CMake version: {cmake_version} |
| Libc version: {libc_version} |
| |
| Python version: {python_version} |
| Python platform: {python_platform} |
| Is CUDA available: {is_cuda_available} |
| CUDA runtime version: {cuda_runtime_version} |
| CUDA_MODULE_LOADING set to: {cuda_module_loading} |
| GPU models and configuration: {nvidia_gpu_models} |
| Nvidia driver version: {nvidia_driver_version} |
| cuDNN version: {cudnn_version} |
| HIP runtime version: {hip_runtime_version} |
| MIOpen runtime version: {miopen_runtime_version} |
| Is XNNPACK available: {is_xnnpack_available} |
| |
| CPU: |
| {cpu_info} |
| |
| Versions of relevant libraries: |
| {pip_packages} |
| {conda_packages} |
| """.strip() |
| |
| |
| def pretty_str(envinfo): |
| def replace_nones(dct, replacement='Could not collect'): |
| for key in dct.keys(): |
| if dct[key] is not None: |
| continue |
| dct[key] = replacement |
| return dct |
| |
| def replace_bools(dct, true='Yes', false='No'): |
| for key in dct.keys(): |
| if dct[key] is True: |
| dct[key] = true |
| elif dct[key] is False: |
| dct[key] = false |
| return dct |
| |
| def prepend(text, tag='[prepend]'): |
| lines = text.split('\n') |
| updated_lines = [tag + line for line in lines] |
| return '\n'.join(updated_lines) |
| |
| def replace_if_empty(text, replacement='No relevant packages'): |
| if text is not None and len(text) == 0: |
| return replacement |
| return text |
| |
| def maybe_start_on_next_line(string): |
| # If `string` is multiline, prepend a \n to it. |
| if string is not None and len(string.split('\n')) > 1: |
| return '\n{}\n'.format(string) |
| return string |
| |
| mutable_dict = envinfo._asdict() |
| |
| # If nvidia_gpu_models is multiline, start on the next line |
| mutable_dict['nvidia_gpu_models'] = \ |
| maybe_start_on_next_line(envinfo.nvidia_gpu_models) |
| |
| # If the machine doesn't have CUDA, report some fields as 'No CUDA' |
| dynamic_cuda_fields = [ |
| 'cuda_runtime_version', |
| 'nvidia_gpu_models', |
| 'nvidia_driver_version', |
| ] |
| all_cuda_fields = dynamic_cuda_fields + ['cudnn_version'] |
| all_dynamic_cuda_fields_missing = all( |
| mutable_dict[field] is None for field in dynamic_cuda_fields) |
| if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing: |
| for field in all_cuda_fields: |
| mutable_dict[field] = 'No CUDA' |
| if envinfo.cuda_compiled_version is None: |
| mutable_dict['cuda_compiled_version'] = 'None' |
| |
| # Replace True with Yes, False with No |
| mutable_dict = replace_bools(mutable_dict) |
| |
| # Replace all None objects with 'Could not collect' |
| mutable_dict = replace_nones(mutable_dict) |
| |
| # If either of these are '', replace with 'No relevant packages' |
| mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages']) |
| mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages']) |
| |
| # Tag conda and pip packages with a prefix |
| # If they were previously None, they'll show up as ie '[conda] Could not collect' |
| if mutable_dict['pip_packages']: |
| mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'], |
| '[{}] '.format(envinfo.pip_version)) |
| if mutable_dict['conda_packages']: |
| mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'], |
| '[conda] ') |
| mutable_dict['cpu_info'] = envinfo.cpu_info |
| return env_info_fmt.format(**mutable_dict) |
| |
| |
| def get_pretty_env_info(): |
| return pretty_str(get_env_info()) |
| |
| |
| def main(): |
| print("Collecting environment information...") |
| output = get_pretty_env_info() |
| print(output) |
| |
| if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'): |
| minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR |
| if sys.platform == "linux" and os.path.exists(minidump_dir): |
| dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)] |
| latest = max(dumps, key=os.path.getctime) |
| ctime = os.path.getctime(latest) |
| creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S') |
| msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \ |
| "if this is related to your bug please include it when you file a report ***" |
| print(msg, file=sys.stderr) |
| |
| |
| |
| if __name__ == '__main__': |
| main() |