| ## @package parser |
| # Module caffe2.python.docs.parser |
| |
| |
| |
| |
| import re |
| |
| |
| class Parser(object): |
| # List of tuples (regex_str, lambda(regex_match, formatter)) |
| # If a lambda returns True it will be called repeatedly with replacement |
| # otherwise it will only be called on text that hasn't been parsed yet. |
| regexes = [ |
| # Code blocks of various formats |
| ('````(.+?)````', |
| lambda m, f: f.addCode(m.group(1)) |
| ), |
| ('```(.+?)```', |
| lambda m, f: f.addCode(m.group(1)) |
| ), |
| (r'((( {2})+)(\S.*)(\n\s*\n|\n))+', |
| lambda m, f: f.addCode(m.group(0)) |
| ), |
| (r'([^\.])\n', |
| lambda m, f: f.addRaw('{c} '.format(c=m.group(1))) or True |
| ), |
| ('`(.+?)`', |
| lambda m, f: f.addCode(m.group(1), True) |
| ), |
| # Make links clickable |
| ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]' |
| r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', |
| lambda m, f: f.addLink(m.group(0), m.group(0)) |
| ), |
| (r'\*\*(.+?)\*\*', |
| lambda m, f: f.addEmphasis(m.group(1), 2) |
| ), |
| (r'\*(.+?)\*', |
| lambda m, f: f.addEmphasis(m.group(1), 1) |
| ), |
| ] |
| |
| def __init__(self, text, formatter): |
| self.text = text |
| self.lines = [] |
| self.formatter = formatter |
| |
| def parseText(self): |
| UNPARSED = 0 |
| PARSED = 1 |
| parsed_block = [(UNPARSED, self.text)] |
| for regex, func in self.regexes: |
| index = 0 |
| while index < len(parsed_block): |
| label, text = parsed_block[index] |
| |
| # Already been parsed |
| if (label == PARSED): |
| index += 1 |
| continue |
| |
| match = re.search(regex, text) |
| if match: |
| parsed_block.pop(index) |
| start = match.start(0) |
| end = match.end(0) |
| |
| f = self.formatter.clone() |
| merge = func(match, f) |
| |
| if merge: |
| merged = text[:start] + f.dump() + text[end:] |
| parsed_block.insert(index, (UNPARSED, merged)) |
| else: |
| if text[:start]: |
| parsed_block.insert(index, |
| (UNPARSED, text[:start])) |
| |
| index += 1 |
| parsed_block.insert(index, (PARSED, f.dump())) |
| |
| index += 1 |
| if text[end:]: |
| parsed_block.insert(index, |
| (UNPARSED, text[end:])) |
| |
| else: |
| index += 1 |
| |
| self.lines += [i for _, i in parsed_block] |
| self.text = ' '.join(self.lines) |
| |
| def parse(self): |
| self.parseText() |
| return self.text |