| #!/usr/bin/python3 |
| # |
| # Copyright 2022-2023 The Khronos Group Inc. |
| # SPDX-License-Identifier: Apache-2.0 |
| |
| """Used to convert files from the asciidoctor spec tree to Antora module |
| format. Success is highly dependent on strict adherence to Vulkan spec |
| authoring conventions. |
| |
| Usage: `antora-prep.py [-root path] -component path files` |
| |
| - `-root` is the root path (repository root, usually) relative to which spec |
| files are processed. Defaults to current directory if not specified. |
| - `-component` is the path to the module and component in which converted |
| files are written (e.g. the component directory under which pages/, |
| partials/, images/, etc. are located). |
| - `files` are asciidoc source files from the spec to convert. |
| |
| Image files are linked from the component 'images' directory |
| |
| Asciidoc markup files (.adoc) are scanned for the first title markup and |
| classified as partials or pages depending on whether it is a top-level title |
| or not. All .adoc files are rewritten to the component 'partials' directory, to |
| allow transclusion of pages to work (otherwise the transclusions would also |
| have to be rewritten). |
| |
| pages then have additional markup injected immediately following the page |
| title to set custom attributes needed for the build. pages are then |
| symbolically linked from the component 'pages' directory to the actual |
| rewritten file in the 'partials' directory to follow Antora conventions. |
| """ |
| |
| # For error and file-loading interfaces only |
| import argparse |
| import importlib |
| import os |
| import re |
| import sys |
| from generator import enquote |
| from reflib import loadFile, logDiag, logWarn, logErr, setLogFile, getBranch |
| from pathlib import Path |
| |
| titleAnchorPat = re.compile(r'^\[\[(?P<anchor>[^,]+).*\]\]$') |
| titlePat = re.compile(r'^[=#] (?P<title>[A-Z].*)') |
| subtitlePat = re.compile(r'^[=#]{2,} (?P<title>[A-Z].*)') |
| |
| Pages = 'pages' |
| Partials = 'partials' |
| Images = 'images' |
| |
| def undefquote(s): |
| """Quote a string for JavaScript, or return the JavaScript undefined |
| value.""" |
| |
| if s is not None: |
| return enquote(s) |
| else: |
| return 'undefined' |
| |
| |
| def mapAnchor(anchor, title, pageMap, xrefMap, closeAnchor): |
| """Rewrite a <<anchor{, title}>> xref -> xref:pagemap#anchor[{title}] |
| - anchor - anchor name |
| - title - xref description or '' if not specified, in which case the |
| anchor text from the xrefMap is used if available |
| - closeAnchor - True if closing >> is on this line, False otherwise |
| - pageMap, xrefMap - per rewriteXrefs below |
| """ |
| |
| #@if anchor == 'features-shaderStorageImageReadWithoutFormat': |
| #@ import pdb |
| #@ pdb.set_trace() |
| |
| # Determine which page anchor this anchor comes from |
| # If it cannot be determined, use the unmapped anchor |
| #@ Simplify the page anchor if pageName == current page |
| try: |
| if title != '' or not closeAnchor: |
| # Either a (possibly up to a line break) title is supplied, or |
| # title is on the next line |
| (pageAnchor, _) = xrefMap[anchor] |
| else: |
| # No explicit title. Infer one from anchor and xrefMap. |
| (pageAnchor, title) = xrefMap[anchor] |
| |
| # If the title is *still* empty, make a note of it and just use |
| # the anchor name |
| if title == '': |
| print(f'No title found for anchor {anchor}', file=sys.stderr) |
| title = anchor |
| |
| # Page the page anchor comes from |
| pageName = pageMap[pageAnchor] |
| print(f'mapAnchor: anchor {anchor} pageAnchor {pageAnchor} -> pageName = {pageName}') |
| |
| xref = f'{pageName}#{anchor}' |
| except: |
| print(f'Cannot determine which page {anchor} comes from, passing through to Antora intact', file=sys.stderr) |
| xref = f'{anchor}' |
| |
| # Remove extraneous whitespace |
| title = ' '.join(title.split()) |
| |
| if closeAnchor: |
| return f'xref:{xref}[{title}]' |
| else: |
| return f'xref:{xref}[{title}' |
| |
| def replaceAnchorText(match, pageMap, xrefMap): |
| """Rewrite <<anchor,text>> to xref:newanchor[text] |
| - match - match object, \1 = anchor, \2 = text |
| - pageMap, xrefMap - per rewriteXrefs below |
| """ |
| |
| anchor = match.group(1) |
| text = match.group(2) |
| |
| return mapAnchor(anchor, text, pageMap, xrefMap, closeAnchor=True) |
| |
| def replaceAnchorOnly(match, pageMap, xrefMap): |
| """Rewrite <<anchor>> to xref:newanchor[] |
| - match - match object, \1 = anchor |
| - pageMap, xrefMap - per rewriteXrefs below |
| """ |
| |
| anchor = match.group(1) |
| |
| return mapAnchor(anchor, '', pageMap, xrefMap, closeAnchor=True) |
| |
| def replaceAnchorTrailingText(match, pageMap, xrefMap): |
| """Rewrite <<anchor, to xref:newanchor[ |
| - match - match object, \1 = anchor, \2 = text (may be empty) |
| - pageMap, xrefMap - per rewriteXrefs below |
| """ |
| |
| anchor = match.group(1) |
| text = match.group(2) |
| |
| return mapAnchor(anchor, text, pageMap, xrefMap, closeAnchor=False) |
| |
| class DocFile: |
| """Information about a markup file being converted""" |
| |
| def __init__(self): |
| """Constructor |
| - lines - text of file as list of strings |
| - root - common base directory for src files |
| - component - path to component directory for outputs |
| - srcpath - absolute path to file source |
| - relpath - path to file source relative to root |
| - dstpath - path to output file destination |
| - dstlink - path to a an alias (symlink to) dstpath, used for |
| files that need to be in both partials and pages directories. |
| - category - file type - Pages, Partials, or Images. These are |
| string variables containing the corresponding component |
| subdirectory name. |
| - title - page title for Pages, else '' |
| - titleAnchor - page title anchor for Pages, else '' |
| - anchors - asciidoc anchors found in the file |
| - includes - asciidoc includes found in the file |
| - pageMap - dictionary mapping a page anchor to a source file |
| relpath |
| - xrefMap - dictionary mapping an anchor within a page to a page |
| anchor |
| """ |
| |
| self.lines = None |
| self.root = None |
| self.component = None |
| self.srcpath = None |
| self.relpath = None |
| self.dstpath = None |
| self.dstlink = None |
| self.category = None |
| self.title = '' |
| self.titleAnchor = '' |
| self.anchors = set() |
| self.includes = set() |
| |
| self.pageMap = {} |
| self.xrefMap = {} |
| |
| def findTitle(self): |
| """Find category (Pages or Partials) and title, for Pages, in a |
| .adoc markup file. |
| |
| Heuristic is to search the beginning of the file for a top-level |
| asciidoc title, preceded immediately by an anchor for the page. |
| |
| Returns (category, title, titleLine, titleAnchor) with '' for a |
| Partials title and '' if no title anchor is found.""" |
| |
| """Chapter title block must be within this many lines of start of file""" |
| maxLines = min(30, len(self.lines)) |
| |
| """Default, if page title and/or page anchor not found""" |
| titleAnchor = '' |
| title = '' |
| |
| for lineno in range(0, maxLines): |
| line = self.lines[lineno] |
| |
| # Look for the first anchor, which must precede the title to |
| # apply to it (really, must precede it by exactly one line). |
| match = titleAnchorPat.match(line) |
| if match is not None: |
| titleAnchor = match.group('anchor') |
| continue |
| |
| # If we find a top-level title, it is a page. |
| match = titlePat.match(line) |
| if match is not None: |
| return (Pages, match.group('title'), lineno, titleAnchor) |
| |
| # If we find a second-level or above title, it is a partial |
| match = subtitlePat.match(line) |
| if match is not None: |
| return (Partials, match.group('title'), lineno, titleAnchor) |
| |
| # If we do not find a match in the first maxLines lines, assume it |
| # is a partial. |
| return(Partials, 'NO TITLE FOUND', -1, titleAnchor) |
| |
| def populate(self, |
| filename, |
| root, |
| component): |
| """Populate data structures given file content and location. |
| |
| - filename - file to scan |
| - root - absolute path to root under which all source files are |
| read |
| - component - absolute path to module / component directory under |
| which all destination files are written |
| """ |
| |
| # Load file content |
| self.srcpath = os.path.abspath(filename) |
| self.lines, _ = loadFile(self.srcpath) |
| if self.lines is None: |
| raise RuntimeError(f'No such file {self.srcpath}') |
| |
| # Miscellaneous relevant paths |
| self.root = root |
| self.relpath = os.path.relpath(self.srcpath, root) |
| self.component = component |
| |
| # Determine file category. |
| # Only .adoc files are candidates for pages, which is verified by |
| # looking at the file header for a top-level title. |
| # .svg .jpg .png are always images |
| # Anything else is a partial |
| (_, fileext) = os.path.splitext(filename) |
| |
| # Defaults |
| self.title = '' |
| self.titleLine = 0 |
| self.titleAnchor = None |
| |
| if fileext in (('.svg', '.jpg', '.png')): |
| self.category = Images |
| elif fileext == '.adoc': |
| (self.category, |
| self.title, |
| self.titleLine, |
| self.titleAnchor) = self.findTitle() |
| else: |
| self.category = Partials |
| |
| # Determine destination path based on category |
| # images/ are treated specially since there is only a single |
| # directory and the component directory is already named Images. |
| if self.category == Partials: |
| self.dstpath = Path(self.component) / Partials / self.relpath |
| elif self.category == Pages: |
| # Save the page in partials/, link from pages/ |
| self.dstpath = Path(self.component) / Partials / self.relpath |
| self.dstlink = Path(self.component) / Pages / self.relpath |
| else: |
| # Images go under images/, not under images/images/ |
| # This could fail if there were ever top-level images but as all |
| # images used in the spec are required to be specified relative |
| # to {images}, it is OK. |
| self.dstpath = Path(self.component) / self.relpath |
| |
| |
| def rewriteXrefs(self, pageMap = {}, xrefMap = {}): |
| """Rewrite asciidoc <<>> xrefs into Antora xref: xrefs, including |
| altering the xref target. |
| |
| - pageMap - map from page anchors to page names |
| - xrefMap - map from anchors within a page to the page anchor""" |
| |
| # pageMap and xrefMap are used in functions called by re.subn, so |
| # save them in members. |
| self.pageMap = pageMap |
| self.xrefMap = xrefMap |
| |
| # Xref markup may be broken across lines, and may or may not include |
| # anchor text. Track whether the closing >> is being looked for at |
| # start of line, or not. |
| withinXref = False |
| |
| for lineno in range(0, len(self.lines)): |
| line = self.lines[lineno] |
| |
| if withinXref: |
| # Could use line.replace, but that does not return a match |
| # count, so we cannot tell if the '>>' is missing. |
| (line, count) = re.subn(r'>>', r']', line, count=1) |
| if count == 0: |
| print(f'WARNING: No closing >> found on line {lineno} of {self.relpath}', file=sys.stderr) |
| elif line[0] != ' ' and self.lines[lineno-1][-1] not in '[ ': |
| # Add whitespace corresponding to crushed-out newline on |
| # previous line, so title words do not run together. |
| self.lines[lineno-1] += ' ' |
| withinXref = False |
| |
| # Now look for all xrefs starting on this line and remap them, |
| # including remapping the anchor. |
| |
| # First, complete xrefs with alt-text (<<anchor, text>>) |
| (line, count) = re.subn(r'<<([^,>]*),([^>]+)>>', |
| lambda match: replaceAnchorText(match, pageMap, xrefMap), |
| line) |
| |
| # Next, complete xrefs without alt-text (<<anchor>>) |
| (line, count) = re.subn(r'<<([^,>]*)>>', |
| lambda match: replaceAnchorOnly(match, pageMap, xrefMap), |
| line) |
| |
| # Finally, if there is a trailing '<<anchor,' at EOL, remap it |
| # and set the flag so the terminating '>>' on the next line will |
| # be mapped into an xref closing ']'. |
| (line, count) = re.subn(r'<<([^,>]*),([^>]*)$', |
| lambda match: replaceAnchorTrailingText(match, pageMap, xrefMap), |
| line) |
| if count > 0: |
| withinXref = True |
| |
| self.lines[lineno] = line |
| |
| def __str__(self): |
| lines = [ |
| f'Input file {filename}: {len(self.lines)} lines', |
| f'root = {self.root} component = {self.component} relpath = {self.relpath}', |
| f'category = {self.category} dstpath = {self.dstpath}', |
| f'title = {self.title}', |
| f'titleAnchor = {self.titleAnchor}', |
| ] |
| return '\n'.join(lines) |
| |
| def removeDestination(self, path, text, overwrite): |
| """Remove a destination file, if it exists and overwrite is true. |
| Ensure the destination directory exists. |
| |
| path - file pathname |
| text - descriptive text for errors |
| overwrite - if True, replace existing output file |
| """ |
| |
| if os.path.exists(path): |
| if overwrite: |
| # print(f'Removing {text}: {path}') |
| os.remove(path) |
| else: |
| raise RuntimeError(f'Will not overwrite {text}: {path}') |
| |
| dir = os.path.dirname(path) |
| if not os.path.exists(dir): |
| # print(f'Creating {text} directory {dir}') |
| os.makedirs(dir) |
| |
| def rewriteFile(self, overwrite = True, pageHeaders = None): |
| """Write source file to component directory. Images are just symlinked |
| to the external file. Pages are rewritten to Partials, then |
| symlinked to Pages. |
| |
| - overwrite - if True, replace existing output files |
| - pageHeaders - if not None, a list of strings to inject |
| following the chapter heading in each page |
| |
| <<>>-style xrefs are assumed to be rewritten prior to calling |
| rewriteFile. |
| |
| May still need to rewrite custom macros. |
| """ |
| |
| self.removeDestination(self.dstpath, 'destination file', overwrite) |
| |
| if self.category == Images: |
| # Just symlink destination image to source |
| # print(f'Symlinking {self.dstpath} -> {self.srcpath}') |
| os.symlink(self.srcpath, self.dstpath) |
| elif self.category == Partials: |
| self.writeFile(self.dstpath) |
| elif self.category == Pages: |
| if pageHeaders is not None: |
| # Add blank lines before and after the pageHeaders to avoid |
| # coalescing with file content. |
| lines = self.lines[0:self.titleLine+1] |
| lines += ['\n'] + pageHeaders + ['\n'] |
| lines = lines + self.lines[self.titleLine+1:] |
| self.lines = lines |
| |
| # Inject page headers immediately following page title |
| |
| self.writeFile(self.dstpath) |
| |
| if self.dstlink is None: |
| RuntimeError(f'Wrote Page {self.dstpath} to Partials, but no Pages link supplied') |
| else: |
| self.removeDestination(self.dstlink, 'destination link', overwrite) |
| os.symlink(self.dstpath, self.dstlink) |
| |
| def writeFile(self, path): |
| """Write self.lines[] to file at specified path""" |
| |
| try: |
| fp = open(path, 'w', encoding='utf8') |
| except: |
| raise RuntimeError(f'Cannot open output file {path}') |
| |
| for line in self.lines: |
| print(line, file=fp, end='') |
| |
| fp.close() |
| |
| def testHarness(): |
| def printFile(label, lines): |
| print(label) |
| print('------------------') |
| for line in lines: |
| print(line) |
| |
| # Test harness |
| docFile = DocFile() |
| docFile.lines = [ |
| '<<ext,ext chapter>> <<ext-label,', |
| 'ext chapter/label>>', |
| '<<core>>, <<core-label, core chapter/label', |
| '>>' |
| ] |
| |
| pageMap = { |
| 'ext' : 'file/ext.adoc', |
| 'core' : 'file/core.adoc', |
| } |
| xrefMap = { |
| 'ext' : [ 'ext', '' ], |
| 'ext-label' : [ 'ext', 'LABELLED ext-label' ], |
| 'core' : [ 'core', 'Core Title' ], |
| 'core-label': [ 'core', 'Core Label Title' ], |
| } |
| |
| printFile('Original File', docFile.lines) |
| |
| docFile.rewriteXrefs(pageMap, xrefMap) |
| |
| printFile('Edited File', docFile.lines) |
| |
| if __name__ == '__main__': |
| parser = argparse.ArgumentParser() |
| |
| parser.add_argument('-root', action='store', dest='root', |
| default=os.getcwd(), |
| help='Specify root directory under which files are located (default current directory)') |
| parser.add_argument('-pageHeaders', action='store', dest='pageHeaders', |
| default=None, |
| help='Specify file whose contents are injected after title of each converted page') |
| parser.add_argument('-component', action='store', dest='component', |
| required=True, |
| help='Specify module / component directory in which converted files are written') |
| #parser.add_argument('-htmlspec', action='store', dest='htmlspec', |
| # default=None, required=False, |
| # help='Specify HTML of generated spec to extract anchor mapping from') |
| parser.add_argument('-xrefpath', action='store', dest='xrefpath', |
| default=None, required=False, |
| help='Specify path to xrefMap.py containing map of anchors to chapter anchors') |
| parser.add_argument('-pagemappath', action='store', dest='pagemappath', |
| default=None, required=False, |
| help='Specify path to output pageMap.cjs containing map of anchors to chapter anchors') |
| parser.add_argument('-filelist', action='store', |
| default=None, required=False, |
| help='Specify file containing a list of filenames to convert, one/line') |
| parser.add_argument('files', metavar='filename', nargs='*', |
| help='Specify name of a single file to convert') |
| |
| args = parser.parse_args() |
| |
| args.root = os.path.abspath(args.root) |
| args.component = os.path.abspath(args.component) |
| |
| if args.pageHeaders is not None: |
| args.pageHeaders, _ = loadFile(args.pageHeaders) |
| |
| if False: |
| testHarness() |
| sys.exit(0) |
| |
| # Initialize dictionaries |
| pageInfo = {} |
| pageMap = {} |
| |
| # The xrefmap is imported from the 'xrefMap' module, if it exists |
| try: |
| if args.xrefpath is not None: |
| sys.path.append(args.xrefpath) |
| from xrefMap import xrefMap |
| except: |
| print('WARNING: No module xrefMap containing xrefMap dictionary', file=sys.stderr) |
| xrefMap = {} |
| |
| # If a file containing a list of files was specified, add each one. |
| # Could try using os.walk() instead, but that is very slow. |
| if args.filelist is not None: |
| count = 0 |
| lines, _ = loadFile(args.filelist) |
| if lines is None: |
| raise RuntimeError(f'Error reading filelist {args.filelist}') |
| for line in lines: |
| path = line.rstrip() |
| if path[0].isalpha() and path.endswith('.adoc'): |
| args.files.append(path) |
| count = count + 1 |
| print(f'Read {count} paths from {args.filelist}') |
| |
| for filename in args.files: |
| # Create data structure representing the file. |
| docFile = DocFile() |
| docFile.populate(filename = filename, |
| root = args.root, |
| component = args.component) |
| # print(docFile, '\n') |
| |
| # Save information about the file under its relpath |
| pageInfo[docFile.relpath] = docFile |
| |
| # Save mapping from page anchor to its relpath |
| if docFile.titleAnchor is not None: |
| pageMap[docFile.titleAnchor] = docFile.relpath |
| |
| # All files have been read and classified. |
| # Rewrite them in memory. |
| |
| for key in pageInfo: |
| # Look for <<>>-style anchors and rewrite them to Antora xref-style |
| # anchors using the pageMap (of top-level anchors to page names) and |
| # xrefmap (of anchors to top-level anchors). |
| docFile = pageInfo[key] |
| |
| ## print(f'*** Rewriting {key}') |
| ## print(docFile, '\n') |
| |
| docFile.rewriteXrefs(pageMap, xrefMap) |
| docFile.rewriteFile(overwrite = True, pageHeaders = args.pageHeaders) |
| |
| # Write the pageMap to a .cjs file for use in the Antora build's |
| # specmacros extensions. The xrefMap is already written in JS form. |
| if args.pagemappath is not None: |
| try: |
| fp = open(args.pagemappath, 'w', encoding='utf8') |
| except: |
| raise RuntimeError(f'Cannot open output pageMap.cjs file {args.pagemappath}') |
| |
| print('exports.pageMap = {', file=fp) |
| for pageAnchor in sorted(pageMap): |
| pageName = pageMap[pageAnchor] |
| print(f' {undefquote(pageAnchor)} : {undefquote(pageName)},', file=fp) |
| print('}', file=fp) |
| |
| fp.close() |
| |
| ## if not os.path.exists(args.xrefmap): |
| ## raise UserWarning(f'Specified xrefmap {args.xrefmap} does not exist') |
| ## if args.xrefmap[-3:] != '.py': |
| ## raise UserWarning(f'Specified xrefmap {args.xrefmap} is not a .py file') |
| ## |
| ## abspath = os.path.abspath(args.xrefmap) |
| ## xrefdir = os.path.dirname(os.path.abspath(args.xrefmap)) |
| ## sys.path.append(dir) |
| ## |
| ## xrefbase = os.path.split(args.xrefmap)[1] |
| ## xrefbase = os.path.splitext(xrefbase)[0] |
| ## |
| ## raise UserWarning(f'Specified xrefmap {args.xrefmap} does not exist') |