blob: aa52ced783b894e1fa0e8c14a78da577e9041d3f [file] [log] [blame]
# this file contains a simple parser that parses report
# from cuda-memcheck
class ParseError(Exception):
"""Whenever the simple parser is unable to parse the report, this exception will be raised"""
pass
class Report:
"""A report is a container of errors, and a summary on how many errors are found"""
def __init__(self, text, errors):
# text is something like
# ERROR SUMMARY: 1 error
# or
# ERROR SUMMARY: 2 errors
self.text = text
self.num_errors = int(text.strip().split()[2])
self.errors = errors
if len(errors) != self.num_errors:
if len(errors) == 10000 and self.num_errors > 10000:
# When there are more than 10k errors, cuda-memcheck only display 10k
self.num_errors = 10000
else:
raise ParseError("Number of errors does not match")
class Error:
"""Each error is a section in the output of cuda-memcheck.
Each error in the report has an error message and a backtrace. It looks like:
========= Program hit cudaErrorInvalidValue (error 1) due to "invalid argument" on CUDA API call to cudaGetLastError.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x38c7b3]
========= Host Frame:/usr/local/cuda/lib64/libcudart.so.10.1 (cudaGetLastError + 0x163) [0x4c493]
========= Host Frame:/home/xgao/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so [0x5b77a05]
========= Host Frame:/home/xgao/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so [0x39d6d1d]
========= .....
"""
def __init__(self, lines):
self.message = lines[0]
lines = lines[2:]
self.stack = [l.strip() for l in lines]
def parse(message):
"""A simple parser that parses the report of cuda-memcheck. This parser is meant to be simple
and it only split the report into separate errors and a summary. Where each error is further
splitted into error message and backtrace. No further details are parsed.
A report contains multiple errors and a summary on how many errors are detected. It looks like:
========= CUDA-MEMCHECK
========= Program hit cudaErrorInvalidValue (error 1) due to "invalid argument" on CUDA API call to cudaPointerGetAttributes.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x38c7b3]
========= Host Frame:/usr/local/cuda/lib64/libcudart.so.10.1 (cudaPointerGetAttributes + 0x1a9) [0x428b9]
========= Host Frame:/home/xgao/anaconda3/lib/python3.7/site-packages/torch/lib/libtorch.so [0x5b778a9]
========= .....
=========
========= Program hit cudaErrorInvalidValue (error 1) due to "invalid argument" on CUDA API call to cudaGetLastError.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x38c7b3]
========= Host Frame:/usr/local/cuda/lib64/libcudart.so.10.1 (cudaGetLastError + 0x163) [0x4c493]
========= .....
=========
========= .....
=========
========= Program hit cudaErrorInvalidValue (error 1) due to "invalid argument" on CUDA API call to cudaGetLastError.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x38c7b3]
========= .....
========= Host Frame:python (_PyEval_EvalFrameDefault + 0x6a0) [0x1d0ad0]
========= Host Frame:python (_PyEval_EvalCodeWithName + 0xbb9) [0x116db9]
=========
========= ERROR SUMMARY: 4 errors
"""
errors = []
HEAD = '========='
headlen = len(HEAD)
started = False
in_message = False
message_lines = []
lines = message.splitlines()
for l in lines:
if l == HEAD + ' CUDA-MEMCHECK':
started = True
continue
if not started or not l.startswith(HEAD):
continue
l = l[headlen + 1:]
if l.startswith('ERROR SUMMARY:'):
return Report(l, errors)
if not in_message:
in_message = True
message_lines = [l]
elif l == '':
errors.append(Error(message_lines))
in_message = False
else:
message_lines.append(l)
raise ParseError("No error summary found")