| # Copyright 2023 The Khronos Group Inc. |
| # |
| # SPDX-License-Identifier: Apache-2.0 |
| |
| """Utilities for automatic transformation of spec sources. Most of the logic |
| has to do with detecting asciidoc markup or block types that should not be |
| transformed (tables, code) and ignoring them. It is very likely there are many |
| asciidoc constructs not yet accounted for in the script, our usage of asciidoc |
| markup is intentionally somewhat limited. |
| """ |
| |
| import re |
| import sys |
| from reflib import logDiag, logWarn |
| |
| # Vulkan-specific - will consolidate into scripts/ like OpenXR soon |
| sys.path.insert(0, 'xml') |
| |
| from apiconventions import APIConventions |
| conventions = APIConventions() |
| |
| # Start of an asciidoctor conditional |
| # ifdef:: |
| # ifndef:: |
| conditionalStart = re.compile(r'^(ifdef|ifndef)::') |
| |
| # Markup that always ends a paragraph |
| # empty line or whitespace |
| # [block options] |
| # [[anchor]] |
| # // comment |
| # <<<< page break |
| # :attribute-setting |
| # macro-directive::terms |
| # + standalone list item continuation |
| # label:: labelled list - label must be standalone |
| endPara = re.compile(r'^( *|\[.*\]|//.*|<<<<|:.*|[a-z]+::.*|\+|.*::)$') |
| |
| # Special case of markup ending a paragraph, used to track the current |
| # command/structure. This allows for either OpenXR or Vulkan API path |
| # conventions. Nominally it should use the file suffix defined by the API |
| # conventions (conventions.file_suffix), except that XR uses '.txt' for |
| # generated API include files, not '.adoc' like its other includes. |
| includePat = re.compile( |
| r'include::(?P<directory_traverse>((../){1,4}|\{generated\}/)(generated/)?)(?P<generated_type>[\w]+)/(?P<category>\w+)/(?P<entity_name>[^./]+).adoc[\[][\]]') |
| |
| # Markup that is OK in a contiguous paragraph but otherwise passed through |
| # .anything (except .., which indicates a literal block) |
| # === Section Titles |
| # image::path_to_image[attributes] (apparently a single colon is OK but less idiomatic) |
| endParaContinue = re.compile(r'^(\.[^.].*|=+ .*|image:.*\[.*\])$') |
| |
| # Markup for block delimiters whose contents *should* be reformatted |
| # -- (exactly two) (open block) |
| # **** (4 or more) (sidebar block) |
| # ==== (4 or more) (example block) |
| # ____ (4 or more) (quote block) |
| blockTransform = re.compile(r'^(--|[*=_]{4,})$') |
| |
| # Fake block delimiters for "common" VU statements |
| blockCommonTransform = '// Common Valid Usage\n' |
| |
| # Markup for block delimiters whose contents should *not* be transformed |
| # |=== (3 or more) (table) |
| # ``` (3 or more) (listing block) |
| # //// (4 or more) (comment block) |
| # ---- (4 or more) (listing block) |
| # .... (4 or more) (literal block) |
| # ++++ (4 or more) (passthrough block) |
| blockPassthrough = re.compile(r'^(\|={3,}|[`]{3}|[\-+./]{4,})$') |
| |
| # Markup for introducing lists (hanging paragraphs) |
| # * bullet |
| # ** bullet |
| # -- bullet |
| # . bullet |
| # :: bullet (no longer supported by asciidoctor 2) |
| # {empty}:: bullet |
| # 1. list item |
| # <1> source listing callout |
| beginBullet = re.compile(r'^ *([-*.]+|\{empty\}::|::|[0-9]+[.]|<([0-9]+)>) ') |
| |
| class TransformState: |
| """State machine for transforming documents. |
| |
| Represents the state of the transform operation""" |
| def __init__(self): |
| self.blockStack = [ None ] |
| """The last element is a line with the asciidoc block delimiter that is |
| currently in effect, such as '--', '----', '****', '====', or '++++'. |
| This affects whether or not the block contents should be transformed.""" |
| self.transformStack = [ True ] |
| """The last element is True or False if the current blockStack contents |
| should be transformed.""" |
| self.vuStack = [ False ] |
| """the last element is True or False if the current blockStack contents |
| are an explicit Valid Usage block.""" |
| |
| self.para = [] |
| """list of lines in the paragraph being accumulated. |
| When this is non-empty, there is a current paragraph.""" |
| |
| self.lastTitle = False |
| """true if the previous line was a document title line |
| (e.g. :leveloffset: 0 - no attempt to track changes to this is made).""" |
| |
| self.leadIndent = 0 |
| """indent level (in spaces) of the first line of a paragraph.""" |
| |
| self.hangIndent = 0 |
| """indent level of the remaining lines of a paragraph.""" |
| |
| self.lineNumber = 0 |
| """line number being read from the input file.""" |
| |
| self.defaultApiName = '{refpage}' |
| self.apiName = self.defaultApiName |
| """String name of an API structure or command for VUID tag generation, |
| or {refpage} if one has not been included in this file yet.""" |
| |
| def incrLineNumber(self): |
| self.lineNumber = self.lineNumber + 1 |
| |
| def isOpenBlockDelimiter(self, line): |
| """Returns True if line is an open block delimiter. |
| This does not and should not match the listing block delimiter, |
| which is used inside refpage blocks both as a listing block and, |
| via an extension, as a nested open block.""" |
| return line.rstrip() == '--' |
| |
| def resetPara(self): |
| """Reset the paragraph, including its indentation level""" |
| self.para = [] |
| self.leadIndent = 0 |
| self.hangIndent = 0 |
| |
| def endBlock(self, line, transform, vuBlock): |
| """If beginning a block, tag whether or not to transform the contents. |
| |
| vuBlock is True if the previous line indicates this is a Valid Usage |
| block.""" |
| if self.blockStack[-1] == line: |
| logDiag('endBlock line', self.lineNumber, |
| ': popping block end depth:', len(self.blockStack), |
| ':', line, end='') |
| |
| # Reset apiName at the end of an open block. |
| # Open blocks cannot be nested (at present), so this is safe. |
| if self.isOpenBlockDelimiter(line): |
| logDiag('reset apiName to empty at line', self.lineNumber) |
| self.apiName = self.defaultApiName |
| else: |
| logDiag('NOT resetting apiName to default at line', |
| self.lineNumber) |
| |
| self.blockStack.pop() |
| self.transformStack.pop() |
| self.vuStack.pop() |
| else: |
| # Start a block |
| self.blockStack.append(line) |
| self.transformStack.append(transform) |
| self.vuStack.append(vuBlock) |
| |
| logDiag('endBlock transform =', transform, ' line', self.lineNumber, |
| ': pushing block start depth', len(self.blockStack), |
| ':', line, end='') |
| |
| def addLine(self, line, indent): |
| """Add a line to the current paragraph""" |
| if self.para == []: |
| # Begin a new paragraph |
| self.para = [line] |
| self.leadIndent = indent |
| self.hangIndent = indent |
| else: |
| # Add a line to a paragraph. Increase the hanging indentation |
| # level - once. |
| if self.hangIndent == self.leadIndent: |
| self.hangIndent = indent |
| self.para.append(line) |
| |
| |
| class TransformCallbackState: |
| """State given to the transformer callback object, derived from |
| TransformState.""" |
| def __init__(self, state): |
| self.isVU = state.vuStack[-1] if len(state.vuStack) > 0 else False |
| """Whether this paragraph is a VU.""" |
| |
| self.apiName = state.apiName |
| """String name of an API structure or command this paragraph belongs |
| to.""" |
| |
| self.leadIndent = state.leadIndent |
| """indent level (in spaces) of the first line of a paragraph.""" |
| |
| self.hangIndent = state.hangIndent |
| """indent level of the remaining lines of a paragraph.""" |
| |
| self.lineNumber = state.lineNumber |
| """line number being read from the input file.""" |
| |
| |
| class DocTransformer: |
| """A transformer that recursively goes over all spec files under a path. |
| |
| The transformer goes over all spec files under a path and does some basic |
| parsing. In particular, it tracks which section the current text belongs |
| to, whether it references a VU, etc and processes them in 'paragraph' |
| granularity. |
| The transformer takes a callback object with the following methods: |
| |
| - transformParagraph: Called when a paragraph is parsed. The paragraph |
| along with some information (such as whether it is a VU) is passed. The |
| function may transform the paragraph as necessary. |
| - onEmbeddedVUConditional: Called when an embedded VU conditional is |
| encountered. |
| """ |
| def __init__(self, |
| filename, |
| outfile, |
| callback): |
| self.filename = filename |
| """base name of file being read from.""" |
| |
| self.outfile = outfile |
| """file handle to write to.""" |
| |
| self.state = TransformState() |
| """State of transformation""" |
| |
| self.callback = callback |
| """The transformation callback object""" |
| |
| def printLines(self, lines): |
| """Print an array of lines with newlines already present""" |
| if len(lines) > 0: |
| logDiag(':: printLines:', len(lines), 'lines: ', lines[0], end='') |
| |
| if self.outfile is not None: |
| for line in lines: |
| print(line, file=self.outfile, end='') |
| |
| def emitPara(self): |
| """Emit a paragraph, possibly transforming it depending on the block |
| context. |
| |
| Resets the paragraph accumulator.""" |
| if self.state.para != []: |
| transformedPara = self.state.para |
| |
| if self.state.transformStack[-1]: |
| callbackState = TransformCallbackState(self.state) |
| |
| transformedPara = self.callback.transformParagraph( |
| self.state.para, |
| callbackState) |
| |
| self.printLines(transformedPara) |
| |
| self.state.resetPara() |
| |
| def endPara(self, line): |
| """'line' ends a paragraph and should itself be emitted. |
| line may be None to indicate EOF or other exception.""" |
| logDiag('endPara line', self.state.lineNumber, ': emitting paragraph') |
| |
| # Emit current paragraph, this line, and reset tracker |
| self.emitPara() |
| |
| if line: |
| self.printLines([line]) |
| |
| def endParaContinue(self, line): |
| """'line' ends a paragraph (unless there is already a paragraph being |
| accumulated, e.g. len(para) > 0 - currently not implemented)""" |
| self.endPara(line) |
| |
| def endBlock(self, line, transform = False, vuBlock = False): |
| """'line' begins or ends a block. |
| |
| If beginning a block, tag whether or not to transform the contents. |
| |
| vuBlock is True if the previous line indicates this is a Valid Usage |
| block.""" |
| self.endPara(line) |
| self.state.endBlock(line, transform, vuBlock) |
| |
| def endParaBlockTransform(self, line, vuBlock): |
| """'line' begins or ends a block. The paragraphs in the block *should* be |
| reformatted (e.g. a NOTE).""" |
| self.endBlock(line, transform = True, vuBlock = vuBlock) |
| |
| def endParaBlockPassthrough(self, line): |
| """'line' begins or ends a block. The paragraphs in the block should |
| *not* be reformatted (e.g. a code listing).""" |
| self.endBlock(line, transform = False) |
| |
| def addLine(self, line): |
| """'line' starts or continues a paragraph. |
| |
| Paragraphs may have "hanging indent", e.g. |
| |
| ``` |
| * Bullet point... |
| ... continued |
| ``` |
| |
| In this case, when the higher indentation level ends, so does the |
| paragraph.""" |
| logDiag('addLine line', self.state.lineNumber, ':', line, end='') |
| |
| # See https://stackoverflow.com/questions/13648813/what-is-the-pythonic-way-to-count-the-leading-spaces-in-a-string |
| indent = len(line) - len(line.lstrip()) |
| |
| # A hanging paragraph ends due to a less-indented line. |
| if self.state.para != [] and indent < self.state.hangIndent: |
| logDiag('addLine: line reduces indentation, emit paragraph') |
| self.emitPara() |
| |
| # A bullet point (or something that looks like one) always ends the |
| # current paragraph. |
| if beginBullet.match(line): |
| logDiag('addLine: line matches beginBullet, emit paragraph') |
| self.emitPara() |
| |
| self.state.addLine(line, indent) |
| |
| def apiMatch(self, oldname, newname): |
| """Returns whether oldname and newname match, up to an API suffix. |
| This should use the API map instead of this heuristic, since aliases |
| like VkPhysicalDeviceVariablePointerFeaturesKHR -> |
| VkPhysicalDeviceVariablePointersFeatures are not recognized.""" |
| upper = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' |
| return oldname.rstrip(upper) == newname.rstrip(upper) |
| |
| def transformFile(self, lines): |
| """Transform lines, and possibly output to to the given file.""" |
| |
| for line in lines: |
| self.state.incrLineNumber() |
| |
| # Is this a title line (leading '= ' followed by text)? |
| thisTitle = False |
| |
| # The logic here is broken. If we are in a non-transformable block and |
| # this line *does not* end the block, it should always be |
| # accumulated. |
| |
| # Test for a blockCommonTransform delimiter comment first, to avoid |
| # treating it solely as a end-Paragraph marker comment. |
| if line == blockCommonTransform: |
| # Starting or ending a pseudo-block for "common" VU statements. |
| self.endParaBlockTransform(line, vuBlock = True) |
| |
| elif blockTransform.match(line): |
| # Starting or ending a block whose contents may be transformed. |
| # Blocks cannot be nested. |
| |
| # Is this is an explicit Valid Usage block? |
| vuBlock = (self.state.lineNumber > 1 and |
| lines[self.state.lineNumber-2] == '.Valid Usage\n') |
| |
| self.endParaBlockTransform(line, vuBlock) |
| |
| elif endPara.match(line): |
| # Ending a paragraph. Emit the current paragraph, if any, and |
| # prepare to begin a new paragraph. |
| |
| self.endPara(line) |
| |
| # If this is an include:: line starting the definition of a |
| # structure or command, track that for use in VUID generation. |
| |
| matches = includePat.search(line) |
| if matches is not None: |
| generated_type = matches.group('generated_type') |
| include_type = matches.group('category') |
| if generated_type == 'api' and include_type in ('protos', 'structs', 'funcpointers'): |
| apiName = matches.group('entity_name') |
| if self.state.apiName != self.state.defaultApiName: |
| # This happens when there are multiple API include |
| # lines in a single block. The style guideline is to |
| # always place the API which others are promoted to |
| # first. In virtually all cases, the promoted API |
| # will differ solely in the vendor suffix (or |
| # absence of it), which is benign. |
| if not self.apiMatch(self.state.apiName, apiName): |
| logDiag(f'Promoted API name mismatch at line {self.state.lineNumber}: {apiName} does not match self.state.apiName (this is OK if it is just a spelling alias)') |
| else: |
| self.state.apiName = apiName |
| |
| elif endParaContinue.match(line): |
| # For now, always just end the paragraph. |
| # Could check see if len(para) > 0 to accumulate. |
| |
| self.endParaContinue(line) |
| |
| # If it is a title line, track that |
| if line[0:2] == '= ': |
| thisTitle = True |
| |
| elif blockPassthrough.match(line): |
| # Starting or ending a block whose contents must not be |
| # transformed. These are tables, etc. Blocks cannot be nested. |
| # Note that the use of a listing block masquerading as an |
| # open block, via an extension, will not be formatted even |
| # though it should be. |
| # Fixing this would require looking at the previous line |
| # state for the '[open]' tag, and there are so few cases of |
| # this in the spec markup that it is not worth the trouble. |
| |
| self.endParaBlockPassthrough(line) |
| elif self.state.lastTitle: |
| # The previous line was a document title line. This line |
| # is the author / credits line and must not be transformed. |
| |
| self.endPara(line) |
| else: |
| # Just accumulate a line to the current paragraph. Watch out for |
| # hanging indents / bullet-points and track that indent level. |
| |
| self.addLine(line) |
| |
| # Commented out now that VU extractor supports this, but may |
| # need to refactor through a conventions object enable if |
| # OpenXR still needs this. |
| |
| # This test looks for disallowed conditionals inside Valid Usage |
| # blocks, by checking if (a) this line does not start a new VU |
| # (bullet point) and (b) the previous line starts an asciidoctor |
| # conditional (ifdef:: or ifndef::). |
| # if (self.state.vuStack[-1] |
| # and not beginBullet.match(line) |
| # and conditionalStart.match(lines[self.state.lineNumber-2])): |
| # self.callback.onEmbeddedVUConditional(self.state) |
| |
| self.state.lastTitle = thisTitle |
| |
| # Cleanup at end of file |
| self.endPara(None) |
| |
| # Check for sensible block nesting |
| if len(self.state.blockStack) > 1: |
| logWarn('file', self.filename, |
| 'mismatched asciidoc block delimiters at EOF:', |
| self.state.blockStack[-1]) |
| |