blob: e5e9ae0a69086dd788877a75372e24706c7c8ff3 [file] [log] [blame]
const fs = require("fs").promises;
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
const path = require("path");
/**
* Run this after outputting html into 'dist'
* It will update ../../../docs/ldml/*.anchors.json
* Use source control to see if the links have changed.
*/
// We would ideally run marked and process the output here.
// But that might introduce duplicate code.
const DONE_ICON = "✅";
const GEAR_ICON = "⚙️";
const NONE_ICON = "∅";
const PACKAGE_ICON = "📦";
const SECTION_ICON = "📍";
const TYPE_ICON = "📂";
const WARN_ICON = "⚠️";
const POINT_ICON = "👉";
const MISSING_ICON = "❌";
/**
*
* @param {string} targetSection e.g. 'tr35-info'
* @param {string} anchor e.g. 'Parts'
* @returns 'tr35-info.md#Parts'
*/
function constructLink(targetSection, anchor) {
const page = `${targetSection}.md`;
if (!anchor) {
return page;
}
return `${page}#${anchor}`;
}
/**
* Read the input .md file, and write to a corresponding .html file
* @param {string} infile path to input file
* @returns {Promise<string>} name of output file (for status update)
*/
async function extractAnchors(infile) {
const basename = path.basename(infile, ".html");
dirname = '../../../docs/ldml';
console.log(`${SECTION_ICON} Reading ${infile}`);
let f1 = await fs.readFile(infile, "utf-8");
// oh the irony of removing a BOM before posting to unicode.org
if (f1.charCodeAt(0) == 0xfeff) {
f1 = f1.substring(3);
}
const rawHtml = f1;
// now fix. Spin up a JSDOM so we can manipulate
const dom = new JSDOM(rawHtml);
const document = dom.window.document;
const anchors = new Set();
const targets = new Set();
function addAnchor(n) {
if (!n) return;
if (anchors.has(n)) {
console.error(`${WARN_ICON} ${constructLink(basename)}: Duplicate anchor: #${n}`);
} else {
anchors.add(n);
}
}
function addTarget(href) {
const INTRA_PAGE_LINK = /^#(.*)$/; // starts with # => 1=anchor
const TR_SECTION_LINK = /^(tr35(?:[^.]*)).html(?:#(.*)){0,1}$/; // => 1=basename, 2=anchor
const EXTERNAL_LINK = /^(http|https|mailto|ftp):.*$/; // scheme
// Error on all other links
const intra_page = INTRA_PAGE_LINK.exec(href);
const tr_section = TR_SECTION_LINK.exec(href);
const external = EXTERNAL_LINK.exec(href);
if (intra_page) {
// same page
targets.add(constructLink(basename, intra_page[1]));
} else if (tr_section) {
// another page
targets.add(constructLink(tr_section[1], tr_section[2]));
} else if (external) {
// external
// Do nothing
// TODO: add to list of external links?
} else {
console.error(`${WARN_ICON} ${basename}: Unknown anchor: ${href}`);
}
}
// extract anchors
for (const a of dom.window.document.getElementsByTagName("*")) {
const id = a.getAttribute("id");
addAnchor(id);
if (a.tagName === 'A') {
const name = a.getAttribute("name");
addAnchor(name);
}
}
// extract targets
for (const a of dom.window.document.getElementsByTagName("A")) {
const href = a.getAttribute("href");
if (href) {
addTarget(href);
}
}
const coll = new Intl.Collator(['und']);
const anchorList = Array.from(anchors.values()).sort(coll.compare);
const anchorFile = path.join(dirname, `${basename}.anchors.json`);
await fs.writeFile(anchorFile, JSON.stringify(anchorList, null, ' '));
const targetList = Array.from(targets.values()).sort(coll.compare);
return [basename, anchorList, targetList];
}
/**
* Convert all files
* @returns Promise list of output files
*/
async function extractAll() {
outbox = "./dist";
const fileList = (await fs.readdir(outbox))
.filter((f) => /\.html$/.test(f))
.map((f) => path.join(outbox, f));
return Promise.all(fileList.map(extractAnchors));
}
async function checkAll() {
console.log(`${GEAR_ICON} Reading HTML`);
const checked = await extractAll();
console.log(`${GEAR_ICON} Collecting internal links`);
const allInternalTargets = new Set();
const allInternalAnchors = new Set();
const sectionToTargets = {
// e.g. "tr35-info" : Set(["tr35-keyboards.md#Element_keyboard", …])
};
checked.forEach(([sourceSection,anchorList,targetList]) => {
allInternalAnchors.add(constructLink(sourceSection)); // example: 'tr35-collation.md'
targetList.forEach(target => allInternalTargets.add(target));
sectionToTargets[sourceSection] = new Set(targetList); // for error checking
const myInternalAnchors = anchorList.map(anchor => constructLink(sourceSection, anchor));
myInternalAnchors.forEach(anchor => allInternalAnchors.add(anchor)); // tr35-collation.md#Parts
});
console.log(`${GEAR_ICON} Checking ${allInternalTargets.size} internal links against ${allInternalAnchors.size} anchors`);
const missingInternalLinks = new Set();
for (const expectedAnchor of allInternalTargets.values()) {
if (!allInternalAnchors.has(expectedAnchor)) {
missingInternalLinks.add(expectedAnchor);
}
}
if (!!missingInternalLinks.size) {
for (expectedAnchor of missingInternalLinks.values()) {
// coalesce
const sourceSections = ((Object.entries(sectionToTargets)
.filter(([section,s]) => s.has(expectedAnchor))) // Does this section target this anchor?
.map(([section]) => constructLink(section)) // drop the set
.join(' & ') // join section name(s)
) || '(unknown section(s))'; // error
console.error(`${MISSING_ICON} Broken internal link: ${sourceSections}: (${expectedAnchor})`);
}
console.error(`${WARN_ICON} ${missingInternalLinks.size} missing links.`);
process.exitCode = 1;
}
console.log(`${POINT_ICON} use: 'lychee --cache docs/ldml' to check external links`);
return checked.map(([anchorFile]) => anchorFile);
}
checkAll().then(
(x) => x.forEach(section => {
console.log(`${DONE_ICON} ${constructLink(section)}`);
}),
(e) => {
console.error(e);
process.exitCode = 1;
}
);