Enable URL rewriting for Github license requests.

* Also enables the new headless mode in Puppeteer.
* Adding a `.puppeteerrc.cjs` to make sure App Engine Standard correctly downloads the correct version of Puppeteer.

Test: curl -d '{"url": "https://github.com/google/desugar_jdk_libs/blob/master/LICENSE"}' -H 'Content-Type: application/json' -X POST 'http://localhost:8080/convert/licenses'

Fixes: b/285652313
Change-Id: I8fdc0ea21008dd4cabd5d81b28bf7bfa3ddc8079
diff --git a/development/fetchLicenses/url-transforms.ts b/development/fetchLicenses/url-transforms.ts
new file mode 100644
index 0000000..85b2430
--- /dev/null
+++ b/development/fetchLicenses/url-transforms.ts
@@ -0,0 +1,39 @@
+export function transformUrl(url: string): string {
+  if (isGitHub(url)) {
+    // Transform https://github.com URLs to https://raw.githubusercontent.com
+    // because GitHub applies DDos protection which prevents us from being
+    // able to pull the contents of the LICENSE file.
+    return rawGithubUrl(url);
+  }
+  return url;
+}
+
+function rawGithubUrl(url: string): string {
+  // Transform URL
+  const ignoreSet = new Set<string>(['https:', 'github.com', 'blob']);
+  const tokens = url.split('/');
+  const repo = [];
+  const path = [];
+  let pathStarted = false;
+  for (let i = 0; i < tokens.length; i += 1) {
+    if (tokens[i].length <= 0) {
+      continue;
+    }
+    if (tokens[i] === 'blob') {
+      pathStarted = true;
+    }
+    if (ignoreSet.has(tokens[i])) {
+      continue;
+    }
+    if (!pathStarted) {
+      repo.push(tokens[i]);
+    } else {
+      path.push(tokens[i]);
+    }
+  }
+  return `https://raw.githubusercontent.com/${repo.join('/')}/${path.join('/')}`;
+}
+
+function isGitHub(url: string): boolean {
+  return url.startsWith("https://github.com")
+}