src/com/google/doclava/PageMetadata.java - platform/external/doclava - Git at Google

 /*
  * Copyright (C) 2013 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.google.doclava;

 import java.io.*;
 import java.text.BreakIterator;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 import java.util.regex.Pattern;
 import java.util.regex.Matcher;
 import java.io.File;

 import com.google.clearsilver.jsilver.data.Data;

 import org.ccil.cowan.tagsoup.*;
 import org.xml.sax.XMLReader;
 import org.xml.sax.InputSource;
 import org.xml.sax.Attributes;
 import org.xml.sax.helpers.DefaultHandler;

 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;

 import javax.xml.transform.dom.DOMResult;
 import javax.xml.transform.sax.SAXSource;
 import javax.xml.transform.Transformer;
 import javax.xml.transform.TransformerFactory;
 import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathExpression;
 import javax.xml.xpath.XPathFactory;

 /**
 * Metadata associated with a specific documentation page. Extracts
 * metadata based on the page's declared hdf vars (meta.tags and others)
 * as well as implicit data relating to the page, such as url, type, etc.
 * Includes a Node class that represents the metadata and lets it attach
 * to parent/child elements in the tree metadata nodes for all pages.
 * Node also includes methods for rendering the node tree to a json file
 * in docs output, which is then used by JavaScript to load metadata
 * objects into html pages.
 */

 public class PageMetadata {
   File mSource;
   String mDest;
   String mTagList;
   static boolean sLowercaseTags = true;
   static boolean sLowercaseKeywords = true;
   //static String linkPrefix = (Doclava.META_DBG) ? "/" : "http://developer.android.com/";
   /**
    * regex pattern to match javadoc @link and similar tags. Extracts
    * root symbol to $1.
    */
   private static final Pattern JD_TAG_PATTERN =
       Pattern.compile("\\{@.*?[\\s\\.\\#]([A-Za-z\\(\\)\\d_]+)(?=\u007D)\u007D");

   public PageMetadata(File source, String dest, List<Node> taglist) {
     mSource = source;
     mDest = dest;

     if (dest != null) {
       int len = dest.length();
       if (len > 1 && dest.charAt(len - 1) != '/') {
         mDest = dest + '/';
       } else {
         mDest = dest;
       }
     }
   }

   /**
   * Given a list of metadata nodes organized by type, sort the
   * root nodes by type name and render the types and their child
   * metadata nodes to a json file in the out dir.
   *
   * @param rootTypeNodesList A list of root metadata nodes, each
   *        representing a type and it's member child pages.
   * @deprecated
   */
   public static void WriteList(List<Node> rootTypeNodesList) {
     Collections.sort(rootTypeNodesList, BY_TYPE_NAME);
     Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(rootTypeNodesList).build();

     StringBuilder buf = new StringBuilder();
     // write the taglist to string format
     pageMeta.renderTypeResources(buf);
     pageMeta.renderTypesByTag(buf);
     // write the taglist to js file
     Data data = Doclava.makeHDF();
     data.setValue("reference_tree", buf.toString());
     ClearPage.write(data, "jd_lists_unified.cs",  "jd_lists_unified.js");
   }

   /**
   * Given a list of metadata nodes organized by lang, sort the
   * root nodes by type name and render the types and their child
   * metadata nodes to separate lang-specific json files in the out dir.
   *
   * @param rootNodesList A list of root metadata nodes, each
   *        representing a type and it's member child pages.
   */
   public static void WriteListByLang(List<Node> rootNodesList) {
     Collections.sort(rootNodesList, BY_LANG_NAME);
     for (Node n : rootNodesList) {
       String langFilename = "";
       String langname = n.getLang();
       langFilename = "_" + langname;
       Collections.sort(n.getChildren(), BY_TYPE_NAME);
       Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(n.getChildren()).build();

       StringBuilder buf = new StringBuilder();
       // write the taglist to string format
       pageMeta.renderLangResources(buf,langname);
       //pageMeta.renderTypesByTag(buf);
       // write the taglist to js file
       Data data = Doclava.makeHDF();
       data.setValue("reference_tree", buf.toString());
       data.setValue("metadata.lang", langname);
       String unifiedFilename = "jd_lists_unified" + langFilename + ".js";
       String extrasFilename = "jd_extras" + langFilename + ".js";
       // write out jd_lists_unified for each lang
       ClearPage.write(data, "jd_lists_unified.cs", unifiedFilename);
       // append jd_extras to jd_lists_unified for each lang, then delete.
       appendExtrasMetadata(extrasFilename, unifiedFilename);
     }
   }

   /**
   * Given a list of metadata nodes organized by lang, sort the
   * root nodes by type name and render the types and their child
   * samples metadata nodes only to separate lang-specific json files
   * in the out dir. Only used by devsite (ds) builds.
   *
   * @param rootNodesList A list of root metadata nodes, each
   *        representing a type and it's member child pages.
   */
   public static void WriteSamplesListByLang(List<Node> rootNodesList) {
     Collections.sort(rootNodesList, BY_LANG_NAME);
     for (Node n : rootNodesList) {
       boolean langHasSamples = false;
       String langFilename = "";
       String langname = n.getLang();
       langFilename = "_" + langname;
       Collections.sort(n.getChildren(), BY_TYPE_NAME);
       Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(n.getChildren()).build();

       StringBuilder buf = new StringBuilder();
       // write the taglist to string format
       langHasSamples = pageMeta.renderSamplesResources(buf,langname);
       // write the taglist to js file
       Data data = Doclava.makeHDF();
       data.setValue("reference_tree", buf.toString());
       data.setValue("metadata.lang", langname);

       if (langHasSamples) {
         data.setValue("samples_only", "1");
         // write out jd_lists_unified for each lang
         String unifiedFilename = "android_samples_metadata" + langFilename + ".js";
         ClearPage.write(data, "jd_lists_unified.cs", unifiedFilename);
       }
     }
   }

   /**
   * Extract supported metadata values from a page and add them as
   * a child node of a root node based on type. Some metadata values
   * are normalized. Unsupported metadata fields are ignored. See
   * Node for supported metadata fields and methods for accessing values.
   *
   * @param docfile The file from which to extract metadata.
   * @param dest The output path for the file, used to set link to page.
   * @param filename The file from which to extract metadata.
   * @param hdf Data object in which to store the metadata values.
   * @param tagList The file from which to extract metadata.
   */
   public static void setPageMetadata(String docfile, String dest, String filename,
       Data hdf, List<Node> tagList) {
     //exclude this page if author does not want it included
     boolean excludeNode = "true".equals(hdf.getValue("excludeFromSuggestions",""));

     //check whether summary and image exist and if not, get them from itemprop/markup
     Boolean needsSummary = "".equals(hdf.getValue("page.metaDescription", ""));
     Boolean needsImage = "".equals(hdf.getValue("page.image", ""));
     if ((needsSummary) || (needsImage)) {
       //try to extract the metadata from itemprop and markup
       inferMetadata(docfile, hdf, needsSummary, needsImage);
     }

     //extract available metadata and set it in a node
     if (!excludeNode) {
       Node pageMeta = new Node.Builder().build();
       pageMeta.setLabel(getTitleNormalized(hdf, "page.title"));
       pageMeta.setCategory(hdf.getValue("page.category",""));
       pageMeta.setSummary(hdf.getValue("page.metaDescription",""));
       pageMeta.setLink(getPageUrlNormalized(filename));
       pageMeta.setGroup(getStringValueNormalized(hdf,"sample.group"));
       pageMeta.setKeywords(getPageTagsNormalized(hdf, "page.tags"));
       pageMeta.setTags(getPageTagsNormalized(hdf, "meta.tags"));
       pageMeta.setImage(getImageUrlNormalized(hdf.getValue("page.image", "")));
       pageMeta.setLang(getLangStringNormalized(hdf, filename));
       pageMeta.setType(getStringValueNormalized(hdf, "page.type"));
       pageMeta.setTimestamp(hdf.getValue("page.timestamp",""));
       appendMetaNodeByLang(pageMeta, tagList);
     }
   }

   /**
   * Attempt to infer page metadata based on the contents of the
   * file. Load and parse the file as a dom tree. Select values
   * in this order: 1. dom node specifically tagged with
   * microdata (itemprop). 2. first qualitifed p or img node.
   *
   * @param docfile The file from which to extract metadata.
   * @param hdf Data object in which to store the metadata values.
   * @param needsSummary Whether to extract summary metadata.
   * @param needsImage Whether to extract image metadata.
   */
   public static void inferMetadata(String docfile, Data hdf,
       Boolean needsSummary, Boolean needsImage) {
     String sum = "";
     String imageUrl = "";
     String sumFrom = needsSummary ? "none" : "hdf";
     String imgFrom = needsImage ? "none" : "hdf";
     String filedata = hdf.getValue("commentText", "");
     if (Doclava.META_DBG) System.out.println("----- " + docfile + "\n");

     try {
       XPathFactory xpathFac = XPathFactory.newInstance();
       XPath xpath = xpathFac.newXPath();
       InputStream inputStream = new ByteArrayInputStream(filedata.getBytes());
       XMLReader reader = new Parser();
       reader.setFeature(Parser.namespacesFeature, false);
       reader.setFeature(Parser.namespacePrefixesFeature, false);
       reader.setFeature(Parser.ignoreBogonsFeature, true);

       Transformer transformer = TransformerFactory.newInstance().newTransformer();
       DOMResult result = new DOMResult();
       transformer.transform(new SAXSource(reader, new InputSource(inputStream)), result);
       org.w3c.dom.Node htmlNode = result.getNode();

       if (needsSummary) {
         StringBuilder sumStrings = new StringBuilder();
         XPathExpression ItempropDescExpr = xpath.compile("/descendant-or-self::*"
             + "[@itemprop='description'][1]//text()[string(.)]");
         org.w3c.dom.NodeList nodes = (org.w3c.dom.NodeList) ItempropDescExpr.evaluate(htmlNode,
             XPathConstants.NODESET);
         if (nodes.getLength() > 0) {
           for (int i = 0; i < nodes.getLength(); i++) {
             String tx = nodes.item(i).getNodeValue();
             sumStrings.append(tx);
             sumFrom = "itemprop";
           }
         } else {
           XPathExpression FirstParaExpr = xpath.compile("//p[not(../../../"
               + "@class='notice-developers') and not(../@class='sidebox')"
               + "and not(@class)]//text()");
           nodes = (org.w3c.dom.NodeList) FirstParaExpr.evaluate(htmlNode, XPathConstants.NODESET);
           if (nodes.getLength() > 0) {
             for (int i = 0; i < nodes.getLength(); i++) {
               String tx = nodes.item(i).getNodeValue();
               sumStrings.append(tx + " ");
               sumFrom = "markup";
             }
           }
         }
         //found a summary string, now normalize it
         sum = sumStrings.toString().trim();
         if ((sum != null) && (!"".equals(sum))) {
           sum = getSummaryNormalized(sum);
         }
         //normalized summary ended up being too short to be meaningful
         if ("".equals(sum)) {
            if (Doclava.META_DBG) System.out.println("Warning: description too short! ("
             + sum.length() + "chars) ...\n\n");
         }
         //summary looks good, store it to the file hdf data
         hdf.setValue("page.metaDescription", sum);
       }
       if (needsImage) {
         XPathExpression ItempropImageExpr = xpath.compile("//*[@itemprop='image']/@src");
         org.w3c.dom.NodeList imgNodes = (org.w3c.dom.NodeList) ItempropImageExpr.evaluate(htmlNode,
             XPathConstants.NODESET);
         if (imgNodes.getLength() > 0) {
           imageUrl = imgNodes.item(0).getNodeValue();
           imageUrl = getImageUrlNormalized(imageUrl);
           imgFrom = "itemprop";
         } else {
           XPathExpression FirstImgExpr = xpath.compile("//img/@src");
           imgNodes = (org.w3c.dom.NodeList) FirstImgExpr.evaluate(htmlNode, XPathConstants.NODESET);
           if (imgNodes.getLength() > 0) {
             //iterate nodes looking for valid image url and normalize.
             for (int i = 0; i < imgNodes.getLength(); i++) {
               String tx = imgNodes.item(i).getNodeValue();
               //qualify and normalize the image
               imageUrl = getImageUrlNormalized(tx);
               //this img src did not qualify, keep looking...
               if ("".equals(imageUrl)) {
                 if (Doclava.META_DBG) System.out.println("    >>>>> Discarded image: " + tx);
                 continue;
               } else {
                 imgFrom = "markup";
                 break;
               }
             }
           }
         }
         //img src url looks good, store it to the file hdf data
         hdf.setValue("page.image", imageUrl);
       }
       if (Doclava.META_DBG) System.out.println("Image (" + imgFrom + "): " + imageUrl);
       if (Doclava.META_DBG) System.out.println("Summary (" + sumFrom + "): " + sum.length()
           + " chars\n\n" + sum + "\n");
       return;

     } catch (Exception e) {
       if (Doclava.META_DBG) System.out.println("    >>>>> Exception: " + e + "\n");
     }
   }

   /**
   * Normalize a comma-delimited, multi-string value. Split on commas, remove
   * quotes, trim whitespace, optionally make keywords/tags lowercase for
   * easier matching.
   *
   * @param hdf Data object in which the metadata values are stored.
   * @param tag The hdf var from which the metadata was extracted.
   * @return A normalized string value for the specified tag.
   */
   public static String getPageTagsNormalized(Data hdf, String tag) {

     String normTags = "";
     StringBuilder tags = new StringBuilder();
     String tagList = hdf.getValue(tag, "");
     if (tag.equals("meta.tags") && (tagList.equals(""))) {
       //use keywords as tags if no meta tags are available
       tagList = hdf.getValue("page.tags", "");
     }
     if (!tagList.equals("")) {
       tagList = tagList.replaceAll("\"", "");

       String[] tagParts = tagList.split("[,\u3001]");
       for (int iter = 0; iter < tagParts.length; iter++) {
         tags.append("\"");
         if (tag.equals("meta.tags") && sLowercaseTags) {
           tagParts[iter] = tagParts[iter].toLowerCase();
         } else if (tag.equals("page.tags") && sLowercaseKeywords) {
           tagParts[iter] = tagParts[iter].toLowerCase();
         }
         if (tag.equals("meta.tags")) {
           //tags.append("#"); //to match hashtag format used with yt/blogger resources
           tagParts[iter] = tagParts[iter].replaceAll(" ","");
         }
         tags.append(tagParts[iter].trim());
         tags.append("\"");
         if (iter < tagParts.length - 1) {
           tags.append(",");
         }
       }
     }
     //write this back to hdf to expose through js
     if (tag.equals("meta.tags")) {
       hdf.setValue(tag, tags.toString());
     }
     return tags.toString();
   }

   /**
   * Normalize a string for which only a single value is supported.
   * Extract the string up to the first comma, remove quotes, remove
   * any forward-slash prefix, trim any whitespace, optionally make
   * lowercase for easier matching.
   *
   * @param hdf Data object in which the metadata values are stored.
   * @param tag The hdf var from which the metadata should be extracted.
   * @return A normalized string value for the specified tag.
   */
   public static String getStringValueNormalized(Data hdf, String tag) {
     StringBuilder outString =  new StringBuilder();
     String tagList = hdf.getValue(tag, "");
     tagList.replaceAll("\"", "");
     if ("".equals(tagList)) {
       return tagList;
     } else {
       int end = tagList.indexOf(",");
       if (end != -1) {
         tagList = tagList.substring(0,end);
       }
       tagList = tagList.startsWith("/") ? tagList.substring(1) : tagList;
       if ("sample.group".equals(tag) && sLowercaseTags) {
         tagList = tagList.toLowerCase();
       }
       outString.append(tagList.trim());
       return outString.toString();
     }
   }

   /**
   * Normalize a page title. Extract the string, remove quotes, remove
   * markup, and trim any whitespace.
   *
   * @param hdf Data object in which the metadata values are stored.
   * @param tag The hdf var from which the metadata should be extracted.
   * @return A normalized string value for the specified tag.
   */
   public static String getTitleNormalized(Data hdf, String tag) {
     StringBuilder outTitle =  new StringBuilder();
     String title = hdf.getValue(tag, "");
     if (!title.isEmpty()) {
       title = escapeString(title);
       if (title.indexOf("<span") != -1) {
         String[] splitTitle = title.split("<span(.*?)</span>");
         title = splitTitle[0];
         for (int j = 1; j < splitTitle.length; j++) {
           title.concat(splitTitle[j]);
         }
       }
       outTitle.append(title.trim());
     }
     return outTitle.toString();
   }

   /**
   * Extract and normalize a page's language string based on the
   * lowercased dir path. Non-supported langs are ignored and assigned
   * the default lang string of "en".
   *
   * @param filename A path string to the file relative to root.
   * @return A normalized lang value.
   */
   public static String getLangStringNormalized(Data data, String filename) {
     String[] stripStr = filename.toLowerCase().split("\\/", 3);
     String outFrag = "en";
     String pathCanonical = filename;
     if (stripStr.length > 0) {
       for (String t : DocFile.DEVSITE_VALID_LANGS) {
         if ("intl".equals(stripStr[0])) {
           if (t.equals(stripStr[1])) {
             outFrag = stripStr[1];
             //extract the root url (exclusive of intl/nn)
             pathCanonical = stripStr[2];
             break;
           }
         }
       }
     }
     //extract the root url (exclusive of intl/nn)
     data.setValue("path.canonical", pathCanonical);
     return outFrag;
   }

   /**
   * Normalize a page summary string and truncate as needed. Strings
   * exceeding max_chars are truncated at the first word boundary
   * following the max_size marker. Strings smaller than min_chars
   * are discarded (as they are assumed to be too little context).
   *
   * @param s String extracted from the page as it's summary.
   * @return A normalized string value.
   */
   public static String getSummaryNormalized(String s) {
     String str = "";
     int max_chars = 250;
     int min_chars = 50;
     int marker = 0;
     if (s.length() < min_chars) {
       return str;
     } else {
       str = s.replaceAll("^\"|\"$", "");
       str = str.replaceAll("\\s+", " ");
       str = JD_TAG_PATTERN.matcher(str).replaceAll("$1");
       str = escapeString(str);
       BreakIterator bi = BreakIterator.getWordInstance();
       bi.setText(str);
       if (str.length() > max_chars) {
         marker = bi.following(max_chars);
       } else {
         marker = bi.last();
       }
       str = str.substring(0, marker);
       str = str.concat("\u2026" );
     }
     return str;
   }

   public static String escapeString(String s) {
     s = s.replaceAll("\"", "&quot;");
     s = s.replaceAll("\'", "&#39;");
     s = s.replaceAll("<", "&lt;");
     s = s.replaceAll(">", "&gt;");
     s = s.replaceAll("/", "&#47;");
     return s;
   }

   //Disqualify img src urls that include these substrings
   public static String[] IMAGE_EXCLUDE = {"/triangle-", "favicon","android-logo",
       "icon_play.png", "robot-tiny"};

   public static boolean inList(String s, String[] list) {
     for (String t : list) {
       if (s.contains(t)) {
         return true;
       }
     }
     return false;
   }

   /**
   * Normalize an img src url by removing docRoot and leading
   * slash for local image references. These are added later
   * in js to support offline mode and keep path reference
   * format consistent with hrefs.
   *
   * @param url Abs or rel url sourced from img src.
   * @return Normalized url if qualified, else empty
   */
   public static String getImageUrlNormalized(String url) {
     String absUrl = "";
     // validate to avoid choosing using specific images
     if ((url != null) && (!url.equals("")) && (!inList(url, IMAGE_EXCLUDE))) {
       absUrl = url.replace("{@docRoot}", "");
       absUrl = absUrl.replaceFirst("^/(?!/)", "");
     }
     return absUrl;
   }

   /**
   * Normalize an href url by removing docRoot and leading
   * slash for local image references. These are added later
   * in js to support offline mode and keep path reference
   * format consistent with hrefs.
   *
   * @param url Abs or rel page url sourced from href
   * @return Normalized url, either abs or rel to root
   */
   public static String getPageUrlNormalized(String url) {
     String absUrl = "";

     if ((url !=null) && (!url.equals(""))) {
       absUrl = url.replace("{@docRoot}", "");
       if (Doclava.USE_DEVSITE_LOCALE_OUTPUT_PATHS) {
         absUrl = absUrl.replaceFirst("^en/", "");
       }
       absUrl = absUrl.replaceFirst("^/(?!/)", "");
     }
     return absUrl;
   }

   /**
   * Given a metadata node, add it as a child of a root node based on its
   * type. If there is no root node that matches the node's type, create one
   * and add the metadata node as a child node.
   *
   * @param gNode The node to attach to a root node or add as a new root node.
   * @param rootList The current list of root nodes.
   * @return The updated list of root nodes.
   */
   public static List<Node> appendMetaNodeByLang(Node gNode, List<Node> rootList) {

     String nodeLang = gNode.getLang();
     boolean matched = false;
     for (Node n : rootList) {
       if (n.getLang().equals(nodeLang)) {  //find any matching lang node
         appendMetaNodeByType(gNode,n.getChildren());
         //n.getChildren().add(gNode);
         matched = true;
         break; // add to the first root node only
       } // tag did not match
     } // end rootnodes matching iterator
     if (!matched) {
       List<Node> mlangList = new ArrayList<Node>(); // list of file objects that have a given lang
       //mlangList.add(gNode);
       Node tnode = new Node.Builder().setChildren(mlangList).setLang(nodeLang).build();
       rootList.add(tnode);
       appendMetaNodeByType(gNode, mlangList);
     }
     return rootList;
   }

   /**
   * Given a metadata node, add it as a child of a root node based on its
   * type. If there is no root node that matches the node's type, create one
   * and add the metadata node as a child node.
   *
   * @param gNode The node to attach to a root node or add as a new root node.
   * @param rootList The current list of root nodes.
   * @return The updated list of root nodes.
   */
   public static List<Node> appendMetaNodeByType(Node gNode, List<Node> rootList) {

     String nodeTags = gNode.getType();
     boolean matched = false;
     for (Node n : rootList) {
       if (n.getType().equals(nodeTags)) {  //find any matching type node
         n.getChildren().add(gNode);
         matched = true;
         break; // add to the first root node only
       } // tag did not match
     } // end rootnodes matching iterator
     if (!matched) {
       List<Node> mtaglist = new ArrayList<Node>(); // list of file objects that have a given type
       mtaglist.add(gNode);
       Node tnode = new Node.Builder().setChildren(mtaglist).setType(nodeTags).build();
       rootList.add(tnode);
     }
     return rootList;
   }

   /**
   * Given a metadata node, add it as a child of a root node based on its
   * tag. If there is no root node matching the tag, create one for it
   * and add the metadata node as a child node.
   *
   * @param gNode The node to attach to a root node or add as a new root node.
   * @param rootTagNodesList The current list of root nodes.
   * @return The updated list of root nodes.
   */
   public static List<Node> appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList) {

     for (int iter = 0; iter < gNode.getChildren().size(); iter++) {
       if (gNode.getChildren().get(iter).getTags() != null) {
         List<String> nodeTags = gNode.getChildren().get(iter).getTags();
         boolean matched = false;
         for (String t : nodeTags) { //process each of the meta.tags
           for (Node n : rootTagNodesList) {
             if (n.getLabel().equals(t.toString())) {
               n.getTags().add(String.valueOf(iter));
               matched = true;
               break; // add to the first root node only
             } // tag did not match
           } // end rootnodes matching iterator
           if (!matched) {
             List<String> mtaglist = new ArrayList<String>(); // list of objects with a given tag
             mtaglist.add(String.valueOf(iter));
             Node tnode = new Node.Builder().setLabel(t.toString()).setTags(mtaglist).build();
             rootTagNodesList.add(tnode);
           }
         }
       }
     }
     return rootTagNodesList;
   }

   /**
   * Append the contents of jd_extras to jd_lists_unified for each language.
   *
   * @param extrasFilename The lang-specific extras file to append.
   * @param unifiedFilename The lang-specific unified metadata file.
   */
   public static void appendExtrasMetadata (String extrasFilename, String unifiedFilename) {

     File f = new File(ClearPage.outputDir + "/" + extrasFilename);
     if (f.exists() && !f.isDirectory()) {
       ClearPage.copyFile(true, f, unifiedFilename, true);
     }
   }

   public static final Comparator<Node> BY_TAG_NAME = new Comparator<Node>() {
     public int compare (Node one, Node other) {
       return one.getLabel().compareTo(other.getLabel());
     }
   };

   public static final Comparator<Node> BY_TYPE_NAME = new Comparator<Node>() {
     public int compare (Node one, Node other) {
       return one.getType().compareTo(other.getType());
     }
   };

     public static final Comparator<Node> BY_LANG_NAME = new Comparator<Node>() {
     public int compare (Node one, Node other) {
       return one.getLang().compareTo(other.getLang());
     }
   };

   /**
   * A node for storing page metadata. Use Builder.build() to instantiate.
   */
   public static class Node {

     private String mLabel; // holds page.title or similar identifier
     private String mCategory; // subtabs, example 'training' 'guides'
     private String mSummary; // Summary for card or similar use
     private String mLink; //link href for item click
     private String mGroup; // from sample.group in _index.jd
     private List<String> mKeywords; // from page.tags
     private List<String> mTags; // from meta.tags
     private String mImage; // holds an href, fully qualified or relative to root
     private List<Node> mChildren;
     private String mLang;
     private String mType; // design, develop, distribute, youtube, blog, etc
     private String mTimestamp; // optional timestamp eg 1447452827

     private Node(Builder builder) {
       mLabel = builder.mLabel;
       mCategory = builder.mCategory;
       mSummary = builder.mSummary;
       mLink = builder.mLink;
       mGroup = builder.mGroup;
       mKeywords = builder.mKeywords;
       mTags = builder.mTags;
       mImage = builder.mImage;
       mChildren = builder.mChildren;
       mLang = builder.mLang;
       mType = builder.mType;
       mTimestamp = builder.mTimestamp;
     }

     private static class Builder {
       private String mLabel, mCategory, mSummary, mLink, mGroup, mImage, mLang, mType, mTimestamp;
       private List<String> mKeywords = null;
       private List<String> mTags = null;
       private List<Node> mChildren = null;
       public Builder setLabel(String mLabel) { this.mLabel = mLabel; return this;}
       public Builder setCategory(String mCategory) {
         this.mCategory = mCategory; return this;
       }
       public Builder setSummary(String mSummary) {this.mSummary = mSummary; return this;}
       public Builder setLink(String mLink) {this.mLink = mLink; return this;}
       public Builder setGroup(String mGroup) {this.mGroup = mGroup; return this;}
       public Builder setKeywords(List<String> mKeywords) {
         this.mKeywords = mKeywords; return this;
       }
       public Builder setTags(List<String> mTags) {this.mTags = mTags; return this;}
       public Builder setImage(String mImage) {this.mImage = mImage; return this;}
       public Builder setChildren(List<Node> mChildren) {this.mChildren = mChildren; return this;}
       public Builder setLang(String mLang) {this.mLang = mLang; return this;}
       public Builder setType(String mType) {this.mType = mType; return this;}
       public Builder setTimestamp(String mTimestamp) {this.mTimestamp = mTimestamp; return this;}
       public Node build() {return new Node(this);}
     }

     /**
     * Render a tree of metadata nodes organized by type.
     * @param buf Output buffer to render to.
     */
     void renderTypeResources(StringBuilder buf) {
       List<Node> list = mChildren; //list of type rootnodes
       if (list == null || list.size() == 0) {
         buf.append("null");
       } else {
         final int n = list.size();
         for (int i = 0; i < n; i++) {
           buf.append("var " + list.get(i).mType.toUpperCase() + "_RESOURCES = [");
           list.get(i).renderTypes(buf); //render this type's children
           buf.append("\n];\n\n");
         }
       }
     }

     /**
     * Render a tree of metadata nodes organized by lang.
     * @param buf Output buffer to render to.
     */
     void renderLangResources(StringBuilder buf, String langname) {
       List<Node> list = mChildren; //list of type rootnodes
       if (list == null || list.size() == 0) {
         buf.append("null");
       } else {
         final int n = list.size();
         for (int i = 0; i < n; i++) {
           buf.append("METADATA['" + langname + "']." + list.get(i).mType + " = [");
           list.get(i).renderTypes(buf); //render this lang's children
           buf.append("\n];\n\n");
         }
       }
     }

     /**
     * Render a tree of metadata nodes of type 'develop' to extract
     * samples metadata. Only used by devsite (ds) builds.
     * @param buf Output buffer to render to.
     * @return true if samples were rendered to buf
     */
     boolean renderSamplesResources(StringBuilder buf, String langname) {
       boolean langHasSamples = false;
       List<Node> list = mChildren; //list of type rootnodes
       if (list == null || list.size() == 0) {
         buf.append("null");
       } else {
         final int n = list.size();
         for (int i = 0; i < n; i++) {
           //samples are always in type 'develop', so restrict
           if ("develop".equals(list.get(i).mType)) {
             //render this type's children
             langHasSamples = list.get(i).renderTypeForSamples(buf);
           }
         }
       }
       return langHasSamples;
     }

     /**
     * Render all metadata nodes for a specific type.
     * @param buf Output buffer to render to.
     */
     void renderTypes(StringBuilder buf) {
       List<Node> list = mChildren;
       if (list == null || list.size() == 0) {
         buf.append("nulltype");
       } else {
         final int n = list.size();
         for (int i = 0; i < n; i++) {
           buf.append("\n      {\n");
           buf.append("        \"title\":\"");
           renderStrWithUcs(buf, list.get(i).mLabel);
           buf.append("\",\n" );
           buf.append("        \"summary\":\"");
           renderStrWithUcs(buf, list.get(i).mSummary);
           buf.append("\",\n" );
           buf.append("        \"url\":\"" + list.get(i).mLink + "\",\n" );
           if (!"".equals(list.get(i).mImage)) {
             buf.append("        \"image\":\"" + list.get(i).mImage + "\",\n" );
           }
           if (!"".equals(list.get(i).mGroup)) {
             buf.append("        \"group\":\"");
             renderStrWithUcs(buf, list.get(i).mGroup);
             buf.append("\",\n" );
           }
           if (!"".equals(list.get(i).mCategory)) {
             buf.append("        \"category\":\"" + list.get(i).mCategory + "\",\n" );
           }
           if ((list.get(i).mType != null) && (list.get(i).mType != "")) {
             buf.append("        \"type\":\"" + list.get(i).mType + "\",\n");
           }
           list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords");
           list.get(i).renderArrayType(buf, list.get(i).mTags, "tags");
           if (!"".equals(list.get(i).mTimestamp)) {
             buf.append("        \"timestamp\":\"" + list.get(i).mTimestamp + "\",\n");
           }
           buf.append("        \"lang\":\"" + list.get(i).mLang + "\"" );
           buf.append("\n      }");
           if (i != n - 1) {
             buf.append(", ");
           }
         }
       }
     }

     /**
     * Render all metadata nodes for samples only.
     * Only used by devsite (ds) builds.
     * @param buf Output buffer to render to.
     * @return whether any samples were rendered to buf
     */
     boolean renderTypeForSamples(StringBuilder buf) {
       boolean typeHasSamples = false;
       List<Node> list = mChildren;
       if (list == null || list.size() == 0) {
         buf.append("nulltype");
       } else {
         final int n = list.size();
         for (int i = 0; i < n; i++) {
           // valid samples must have category 'samples'
           if ("samples".equals(list.get(i).mCategory)) {
             typeHasSamples = true;
             buf.append("\n      {\n");
             buf.append("        \"title\":\"");
             renderStrWithUcs(buf, list.get(i).mLabel);
             buf.append("\",\n" );
             buf.append("        \"summary\":\"");
             renderStrWithUcs(buf, list.get(i).mSummary);
             buf.append("\",\n" );
             buf.append("        \"url\":\"" + list.get(i).mLink + "\",\n" );
             if (!"".equals(list.get(i).mImage)) {
               buf.append("        \"image\":\"" + list.get(i).mImage + "\",\n" );
             }
             if (!"".equals(list.get(i).mGroup)) {
               buf.append("        \"group\":\"");
               renderStrWithUcs(buf, list.get(i).mGroup);
               buf.append("\",\n" );
             }
             if (!"".equals(list.get(i).mCategory)) {
               buf.append("        \"category\":\"" + list.get(i).mCategory + "\",\n" );
             }
             if ((list.get(i).mType != null) && (list.get(i).mType != "")) {
               buf.append("        \"type\":\"" + list.get(i).mType + "\",\n");
             }
             list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords");
             list.get(i).renderArrayType(buf, list.get(i).mTags, "tags");
             if (!"".equals(list.get(i).mTimestamp)) {
               buf.append("        \"timestamp\":\"" + list.get(i).mTimestamp + "\",\n");
             }
             buf.append("        \"lang\":\"" + list.get(i).mLang + "\"" );
             buf.append("\n      }");
             if (i != n - 1) {
               buf.append(", ");
             }
           }
         }
       }
       return typeHasSamples;
     }

     /**
     * Build and render a list of tags associated with each type.
     * @param buf Output buffer to render to.
     */
     void renderTypesByTag(StringBuilder buf) {
       List<Node> list = mChildren; //list of rootnodes
       if (list == null || list.size() == 0) {
         buf.append("null");
       } else {
         final int n = list.size();
         for (int i = 0; i < n; i++) {
         buf.append("var " + list.get(i).mType.toUpperCase() + "_BY_TAG = {");
         List<Node> mTagList = new ArrayList(); //list of rootnodes
         mTagList = appendMetaNodeByTagIndex(list.get(i), mTagList);
         list.get(i).renderTagIndices(buf, mTagList);
           buf.append("\n};\n\n");
         }
       }
     }

     /**
     * Render a list of tags associated with a type, including the
     * tag's indices in the type array.
     * @param buf Output buffer to render to.
     * @param tagList Node tree of types to render.
     */
     void renderTagIndices(StringBuilder buf, List<Node> tagList) {
       List<Node> list = tagList;
       if (list == null || list.size() == 0) {
         buf.append("");
       } else {
         final int n = list.size();
         for (int i = 0; i < n; i++) {
           buf.append("\n    " + list.get(i).mLabel + ":[");
           renderArrayValue(buf, list.get(i).mTags);
           buf.append("]");
           if (i != n - 1) {
             buf.append(", ");
           }
         }
       }
     }

     /**
     * Render key:arrayvalue pair.
     * @param buf Output buffer to render to.
     * @param type The list value to render as an arrayvalue.
     * @param key The key for the pair.
     */
     void renderArrayType(StringBuilder buf, List<String> type, String key) {
       buf.append("        \"" + key + "\": [");
       renderArrayValue(buf, type);
       buf.append("],\n");
     }

     /**
     * Render an array value to buf, with special handling of unicode characters.
     * @param buf Output buffer to render to.
     * @param type The list value to render as an arrayvalue.
     */
     void renderArrayValue(StringBuilder buf, List<String> type) {
       List<String> list = type;
       if (list != null) {
         final int n = list.size();
         for (int i = 0; i < n; i++) {
           String tagval = list.get(i).toString();
           renderStrWithUcs(buf,tagval);
           if (i != n - 1) {
             buf.append(",");
           }
         }
       }
     }

     /**
     * Render a string that can include ucs2 encoded characters.
     * @param buf Output buffer to render to.
     * @param chars String to append to buf with any necessary encoding
     */
     void renderStrWithUcs(StringBuilder buf, String chars) {
       String strval = chars;
       final int L = strval.length();
       for (int t = 0; t < L; t++) {
         char c = strval.charAt(t);
         if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_HIGH_SURROGATE ) {
           // we have a UTF-16 multi-byte character
           int codePoint = strval.codePointAt(t);
           int charSize = Character.charCount(codePoint);
           t += charSize - 1;
           buf.append(String.format("\\u%04x",codePoint));
         } else if (c >= ' ' && c <= '~' && c != '\\') {
           buf.append(c);
         } else {
           // we are encoding a two byte character
           buf.append(String.format("\\u%04x", (int) c));
         }
       }
     }

     public String getLabel() {
       return mLabel;
     }

     public void setLabel(String label) {
        mLabel = label;
     }

     public String getCategory() {
       return mCategory;
     }

     public void setCategory(String title) {
        mCategory = title;
     }

     public String getSummary() {
       return mSummary;
     }

     public void setSummary(String summary) {
        mSummary = summary;
     }

     public String getLink() {
       return mLink;
     }

     public void setLink(String ref) {
        mLink = ref;
     }

     public String getGroup() {
       return mGroup;
     }

     public void setGroup(String group) {
       mGroup = group;
     }

     public List<String> getTags() {
         return mTags;
     }

     public void setTags(String tags) {
       if ("".equals(tags)) {
         mTags = null;
       } else {
         List<String> tagList = new ArrayList();
         String[] tagParts = tags.split(",");

         for (String t : tagParts) {
           tagList.add(t);
         }
         mTags = tagList;
       }
     }

     public List<String> getKeywords() {
         return mKeywords;
     }

     public void setKeywords(String keywords) {
       if ("".equals(keywords)) {
         mKeywords = null;
       } else {
         List<String> keywordList = new ArrayList();
         String[] keywordParts = keywords.split(",");

         for (String k : keywordParts) {
           keywordList.add(k);
         }
         mKeywords = keywordList;
       }
     }

     public String getImage() {
         return mImage;
     }

     public void setImage(String ref) {
        mImage = ref;
     }

     public List<Node> getChildren() {
         return mChildren;
     }

     public void setChildren(List<Node> node) {
         mChildren = node;
     }

     public String getLang() {
       return mLang;
     }

     public void setLang(String lang) {
       mLang = lang;
     }

     public String getType() {
       return mType;
     }

     public String getTimestamp() {
       return mTimestamp;
     }

     public void setType(String type) {
       mType = type;
     }

     public void setTimestamp(String timestamp) {
       mTimestamp = timestamp;
     }
   }
 }