blob: f08c8210669a613cef7d1907d0d36542a329209c [file] [log] [blame]
#!/usr/bin/python3
#
# Copyright 2022-2023 The Khronos Group Inc.
# SPDX-License-Identifier: Apache-2.0
"""Used to convert files from the asciidoctor spec tree to Antora module
format. Success is highly dependent on strict adherence to Vulkan spec
authoring conventions.
Usage: `antora-prep.py [-root path] -component path files`
- `-root` is the root path (repository root, usually) relative to which spec
files are processed. Defaults to current directory if not specified.
- `-component` is the path to the module and component in which converted
files are written (e.g. the component directory under which pages/,
partials/, images/, etc. are located).
- `files` are asciidoc source files from the spec to convert.
Image files are linked from the component 'images' directory
Asciidoc markup files (.adoc) are scanned for the first title markup and
classified as partials or pages depending on whether it is a top-level title
or not. All .adoc files are rewritten to the component 'partials' directory, to
allow transclusion of pages to work (otherwise the transclusions would also
have to be rewritten).
pages then have additional markup injected immediately following the page
title to set custom attributes needed for the build. pages are then
symbolically linked from the component 'pages' directory to the actual
rewritten file in the 'partials' directory to follow Antora conventions.
"""
# For error and file-loading interfaces only
import argparse
import importlib
import os
import re
import sys
from generator import enquote
from reflib import loadFile, logDiag, logWarn, logErr, setLogFile, getBranch
from pathlib import Path
titleAnchorPat = re.compile(r'^\[\[(?P<anchor>[^,]+).*\]\]$')
titlePat = re.compile(r'^[=#] (?P<title>[A-Z].*)')
subtitlePat = re.compile(r'^[=#]{2,} (?P<title>[A-Z].*)')
Pages = 'pages'
Partials = 'partials'
Images = 'images'
def undefquote(s):
"""Quote a string for JavaScript, or return the JavaScript undefined
value."""
if s is not None:
return enquote(s)
else:
return 'undefined'
def mapAnchor(anchor, title, pageMap, xrefMap, closeAnchor):
"""Rewrite a <<anchor{, title}>> xref -> xref:pagemap#anchor[{title}]
- anchor - anchor name
- title - xref description or '' if not specified, in which case the
anchor text from the xrefMap is used if available
- closeAnchor - True if closing >> is on this line, False otherwise
- pageMap, xrefMap - per rewriteXrefs below
"""
#@if anchor == 'features-shaderStorageImageReadWithoutFormat':
#@ import pdb
#@ pdb.set_trace()
# Determine which page anchor this anchor comes from
# If it cannot be determined, use the unmapped anchor
#@ Simplify the page anchor if pageName == current page
try:
if title != '' or not closeAnchor:
# Either a (possibly up to a line break) title is supplied, or
# title is on the next line
(pageAnchor, _) = xrefMap[anchor]
else:
# No explicit title. Infer one from anchor and xrefMap.
(pageAnchor, title) = xrefMap[anchor]
# If the title is *still* empty, make a note of it and just use
# the anchor name
if title == '':
print(f'No title found for anchor {anchor}', file=sys.stderr)
title = anchor
# Page the page anchor comes from
pageName = pageMap[pageAnchor]
print(f'mapAnchor: anchor {anchor} pageAnchor {pageAnchor} -> pageName = {pageName}')
xref = f'{pageName}#{anchor}'
except:
print(f'Cannot determine which page {anchor} comes from, passing through to Antora intact', file=sys.stderr)
xref = f'{anchor}'
# Remove extraneous whitespace
title = ' '.join(title.split())
if closeAnchor:
return f'xref:{xref}[{title}]'
else:
return f'xref:{xref}[{title}'
def replaceAnchorText(match, pageMap, xrefMap):
"""Rewrite <<anchor,text>> to xref:newanchor[text]
- match - match object, \1 = anchor, \2 = text
- pageMap, xrefMap - per rewriteXrefs below
"""
anchor = match.group(1)
text = match.group(2)
return mapAnchor(anchor, text, pageMap, xrefMap, closeAnchor=True)
def replaceAnchorOnly(match, pageMap, xrefMap):
"""Rewrite <<anchor>> to xref:newanchor[]
- match - match object, \1 = anchor
- pageMap, xrefMap - per rewriteXrefs below
"""
anchor = match.group(1)
return mapAnchor(anchor, '', pageMap, xrefMap, closeAnchor=True)
def replaceAnchorTrailingText(match, pageMap, xrefMap):
"""Rewrite <<anchor, to xref:newanchor[
- match - match object, \1 = anchor, \2 = text (may be empty)
- pageMap, xrefMap - per rewriteXrefs below
"""
anchor = match.group(1)
text = match.group(2)
return mapAnchor(anchor, text, pageMap, xrefMap, closeAnchor=False)
class DocFile:
"""Information about a markup file being converted"""
def __init__(self):
"""Constructor
- lines - text of file as list of strings
- root - common base directory for src files
- component - path to component directory for outputs
- srcpath - absolute path to file source
- relpath - path to file source relative to root
- dstpath - path to output file destination
- dstlink - path to a an alias (symlink to) dstpath, used for
files that need to be in both partials and pages directories.
- category - file type - Pages, Partials, or Images. These are
string variables containing the corresponding component
subdirectory name.
- title - page title for Pages, else ''
- titleAnchor - page title anchor for Pages, else ''
- anchors - asciidoc anchors found in the file
- includes - asciidoc includes found in the file
- pageMap - dictionary mapping a page anchor to a source file
relpath
- xrefMap - dictionary mapping an anchor within a page to a page
anchor
"""
self.lines = None
self.root = None
self.component = None
self.srcpath = None
self.relpath = None
self.dstpath = None
self.dstlink = None
self.category = None
self.title = ''
self.titleAnchor = ''
self.anchors = set()
self.includes = set()
self.pageMap = {}
self.xrefMap = {}
def findTitle(self):
"""Find category (Pages or Partials) and title, for Pages, in a
.adoc markup file.
Heuristic is to search the beginning of the file for a top-level
asciidoc title, preceded immediately by an anchor for the page.
Returns (category, title, titleLine, titleAnchor) with '' for a
Partials title and '' if no title anchor is found."""
"""Chapter title block must be within this many lines of start of file"""
maxLines = min(30, len(self.lines))
"""Default, if page title and/or page anchor not found"""
titleAnchor = ''
title = ''
for lineno in range(0, maxLines):
line = self.lines[lineno]
# Look for the first anchor, which must precede the title to
# apply to it (really, must precede it by exactly one line).
match = titleAnchorPat.match(line)
if match is not None:
titleAnchor = match.group('anchor')
continue
# If we find a top-level title, it is a page.
match = titlePat.match(line)
if match is not None:
return (Pages, match.group('title'), lineno, titleAnchor)
# If we find a second-level or above title, it is a partial
match = subtitlePat.match(line)
if match is not None:
return (Partials, match.group('title'), lineno, titleAnchor)
# If we do not find a match in the first maxLines lines, assume it
# is a partial.
return(Partials, 'NO TITLE FOUND', -1, titleAnchor)
def populate(self,
filename,
root,
component):
"""Populate data structures given file content and location.
- filename - file to scan
- root - absolute path to root under which all source files are
read
- component - absolute path to module / component directory under
which all destination files are written
"""
# Load file content
self.srcpath = os.path.abspath(filename)
self.lines, _ = loadFile(self.srcpath)
if self.lines is None:
raise RuntimeError(f'No such file {self.srcpath}')
# Miscellaneous relevant paths
self.root = root
self.relpath = os.path.relpath(self.srcpath, root)
self.component = component
# Determine file category.
# Only .adoc files are candidates for pages, which is verified by
# looking at the file header for a top-level title.
# .svg .jpg .png are always images
# Anything else is a partial
(_, fileext) = os.path.splitext(filename)
# Defaults
self.title = ''
self.titleLine = 0
self.titleAnchor = None
if fileext in (('.svg', '.jpg', '.png')):
self.category = Images
elif fileext == '.adoc':
(self.category,
self.title,
self.titleLine,
self.titleAnchor) = self.findTitle()
else:
self.category = Partials
# Determine destination path based on category
# images/ are treated specially since there is only a single
# directory and the component directory is already named Images.
if self.category == Partials:
self.dstpath = Path(self.component) / Partials / self.relpath
elif self.category == Pages:
# Save the page in partials/, link from pages/
self.dstpath = Path(self.component) / Partials / self.relpath
self.dstlink = Path(self.component) / Pages / self.relpath
else:
# Images go under images/, not under images/images/
# This could fail if there were ever top-level images but as all
# images used in the spec are required to be specified relative
# to {images}, it is OK.
self.dstpath = Path(self.component) / self.relpath
def rewriteXrefs(self, pageMap = {}, xrefMap = {}):
"""Rewrite asciidoc <<>> xrefs into Antora xref: xrefs, including
altering the xref target.
- pageMap - map from page anchors to page names
- xrefMap - map from anchors within a page to the page anchor"""
# pageMap and xrefMap are used in functions called by re.subn, so
# save them in members.
self.pageMap = pageMap
self.xrefMap = xrefMap
# Xref markup may be broken across lines, and may or may not include
# anchor text. Track whether the closing >> is being looked for at
# start of line, or not.
withinXref = False
for lineno in range(0, len(self.lines)):
line = self.lines[lineno]
if withinXref:
# Could use line.replace, but that does not return a match
# count, so we cannot tell if the '>>' is missing.
(line, count) = re.subn(r'>>', r']', line, count=1)
if count == 0:
print(f'WARNING: No closing >> found on line {lineno} of {self.relpath}', file=sys.stderr)
elif line[0] != ' ' and self.lines[lineno-1][-1] not in '[ ':
# Add whitespace corresponding to crushed-out newline on
# previous line, so title words do not run together.
self.lines[lineno-1] += ' '
withinXref = False
# Now look for all xrefs starting on this line and remap them,
# including remapping the anchor.
# First, complete xrefs with alt-text (<<anchor, text>>)
(line, count) = re.subn(r'<<([^,>]*),([^>]+)>>',
lambda match: replaceAnchorText(match, pageMap, xrefMap),
line)
# Next, complete xrefs without alt-text (<<anchor>>)
(line, count) = re.subn(r'<<([^,>]*)>>',
lambda match: replaceAnchorOnly(match, pageMap, xrefMap),
line)
# Finally, if there is a trailing '<<anchor,' at EOL, remap it
# and set the flag so the terminating '>>' on the next line will
# be mapped into an xref closing ']'.
(line, count) = re.subn(r'<<([^,>]*),([^>]*)$',
lambda match: replaceAnchorTrailingText(match, pageMap, xrefMap),
line)
if count > 0:
withinXref = True
self.lines[lineno] = line
def __str__(self):
lines = [
f'Input file {filename}: {len(self.lines)} lines',
f'root = {self.root} component = {self.component} relpath = {self.relpath}',
f'category = {self.category} dstpath = {self.dstpath}',
f'title = {self.title}',
f'titleAnchor = {self.titleAnchor}',
]
return '\n'.join(lines)
def removeDestination(self, path, text, overwrite):
"""Remove a destination file, if it exists and overwrite is true.
Ensure the destination directory exists.
path - file pathname
text - descriptive text for errors
overwrite - if True, replace existing output file
"""
if os.path.exists(path):
if overwrite:
# print(f'Removing {text}: {path}')
os.remove(path)
else:
raise RuntimeError(f'Will not overwrite {text}: {path}')
dir = os.path.dirname(path)
if not os.path.exists(dir):
# print(f'Creating {text} directory {dir}')
os.makedirs(dir)
def rewriteFile(self, overwrite = True, pageHeaders = None):
"""Write source file to component directory. Images are just symlinked
to the external file. Pages are rewritten to Partials, then
symlinked to Pages.
- overwrite - if True, replace existing output files
- pageHeaders - if not None, a list of strings to inject
following the chapter heading in each page
<<>>-style xrefs are assumed to be rewritten prior to calling
rewriteFile.
May still need to rewrite custom macros.
"""
self.removeDestination(self.dstpath, 'destination file', overwrite)
if self.category == Images:
# Just symlink destination image to source
# print(f'Symlinking {self.dstpath} -> {self.srcpath}')
os.symlink(self.srcpath, self.dstpath)
elif self.category == Partials:
self.writeFile(self.dstpath)
elif self.category == Pages:
if pageHeaders is not None:
# Add blank lines before and after the pageHeaders to avoid
# coalescing with file content.
lines = self.lines[0:self.titleLine+1]
lines += ['\n'] + pageHeaders + ['\n']
lines = lines + self.lines[self.titleLine+1:]
self.lines = lines
# Inject page headers immediately following page title
self.writeFile(self.dstpath)
if self.dstlink is None:
RuntimeError(f'Wrote Page {self.dstpath} to Partials, but no Pages link supplied')
else:
self.removeDestination(self.dstlink, 'destination link', overwrite)
os.symlink(self.dstpath, self.dstlink)
def writeFile(self, path):
"""Write self.lines[] to file at specified path"""
try:
fp = open(path, 'w', encoding='utf8')
except:
raise RuntimeError(f'Cannot open output file {path}')
for line in self.lines:
print(line, file=fp, end='')
fp.close()
def testHarness():
def printFile(label, lines):
print(label)
print('------------------')
for line in lines:
print(line)
# Test harness
docFile = DocFile()
docFile.lines = [
'<<ext,ext chapter>> <<ext-label,',
'ext chapter/label>>',
'<<core>>, <<core-label, core chapter/label',
'>>'
]
pageMap = {
'ext' : 'file/ext.adoc',
'core' : 'file/core.adoc',
}
xrefMap = {
'ext' : [ 'ext', '' ],
'ext-label' : [ 'ext', 'LABELLED ext-label' ],
'core' : [ 'core', 'Core Title' ],
'core-label': [ 'core', 'Core Label Title' ],
}
printFile('Original File', docFile.lines)
docFile.rewriteXrefs(pageMap, xrefMap)
printFile('Edited File', docFile.lines)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-root', action='store', dest='root',
default=os.getcwd(),
help='Specify root directory under which files are located (default current directory)')
parser.add_argument('-pageHeaders', action='store', dest='pageHeaders',
default=None,
help='Specify file whose contents are injected after title of each converted page')
parser.add_argument('-component', action='store', dest='component',
required=True,
help='Specify module / component directory in which converted files are written')
#parser.add_argument('-htmlspec', action='store', dest='htmlspec',
# default=None, required=False,
# help='Specify HTML of generated spec to extract anchor mapping from')
parser.add_argument('-xrefpath', action='store', dest='xrefpath',
default=None, required=False,
help='Specify path to xrefMap.py containing map of anchors to chapter anchors')
parser.add_argument('-pagemappath', action='store', dest='pagemappath',
default=None, required=False,
help='Specify path to output pageMap.cjs containing map of anchors to chapter anchors')
parser.add_argument('-filelist', action='store',
default=None, required=False,
help='Specify file containing a list of filenames to convert, one/line')
parser.add_argument('files', metavar='filename', nargs='*',
help='Specify name of a single file to convert')
args = parser.parse_args()
args.root = os.path.abspath(args.root)
args.component = os.path.abspath(args.component)
if args.pageHeaders is not None:
args.pageHeaders, _ = loadFile(args.pageHeaders)
if False:
testHarness()
sys.exit(0)
# Initialize dictionaries
pageInfo = {}
pageMap = {}
# The xrefmap is imported from the 'xrefMap' module, if it exists
try:
if args.xrefpath is not None:
sys.path.append(args.xrefpath)
from xrefMap import xrefMap
except:
print('WARNING: No module xrefMap containing xrefMap dictionary', file=sys.stderr)
xrefMap = {}
# If a file containing a list of files was specified, add each one.
# Could try using os.walk() instead, but that is very slow.
if args.filelist is not None:
count = 0
lines, _ = loadFile(args.filelist)
if lines is None:
raise RuntimeError(f'Error reading filelist {args.filelist}')
for line in lines:
path = line.rstrip()
if path[0].isalpha() and path.endswith('.adoc'):
args.files.append(path)
count = count + 1
print(f'Read {count} paths from {args.filelist}')
for filename in args.files:
# Create data structure representing the file.
docFile = DocFile()
docFile.populate(filename = filename,
root = args.root,
component = args.component)
# print(docFile, '\n')
# Save information about the file under its relpath
pageInfo[docFile.relpath] = docFile
# Save mapping from page anchor to its relpath
if docFile.titleAnchor is not None:
pageMap[docFile.titleAnchor] = docFile.relpath
# All files have been read and classified.
# Rewrite them in memory.
for key in pageInfo:
# Look for <<>>-style anchors and rewrite them to Antora xref-style
# anchors using the pageMap (of top-level anchors to page names) and
# xrefmap (of anchors to top-level anchors).
docFile = pageInfo[key]
## print(f'*** Rewriting {key}')
## print(docFile, '\n')
docFile.rewriteXrefs(pageMap, xrefMap)
docFile.rewriteFile(overwrite = True, pageHeaders = args.pageHeaders)
# Write the pageMap to a .cjs file for use in the Antora build's
# specmacros extensions. The xrefMap is already written in JS form.
if args.pagemappath is not None:
try:
fp = open(args.pagemappath, 'w', encoding='utf8')
except:
raise RuntimeError(f'Cannot open output pageMap.cjs file {args.pagemappath}')
print('exports.pageMap = {', file=fp)
for pageAnchor in sorted(pageMap):
pageName = pageMap[pageAnchor]
print(f' {undefquote(pageAnchor)} : {undefquote(pageName)},', file=fp)
print('}', file=fp)
fp.close()
## if not os.path.exists(args.xrefmap):
## raise UserWarning(f'Specified xrefmap {args.xrefmap} does not exist')
## if args.xrefmap[-3:] != '.py':
## raise UserWarning(f'Specified xrefmap {args.xrefmap} is not a .py file')
##
## abspath = os.path.abspath(args.xrefmap)
## xrefdir = os.path.dirname(os.path.abspath(args.xrefmap))
## sys.path.append(dir)
##
## xrefbase = os.path.split(args.xrefmap)[1]
## xrefbase = os.path.splitext(xrefbase)[0]
##
## raise UserWarning(f'Specified xrefmap {args.xrefmap} does not exist')