| /* GENERATED SOURCE. DO NOT MODIFY. */ |
| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ********************************************************************** |
| * Copyright (c) 2002-2016, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ********************************************************************** |
| */ |
| |
| package android.icu.text; |
| |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Set; |
| import java.util.SortedSet; |
| import java.util.TreeSet; |
| |
| import android.icu.impl.Assert; |
| import android.icu.impl.RBBIDataWrapper; |
| import android.icu.text.RBBIRuleBuilder.IntPair; |
| |
| /** |
| * This class is part of the RBBI rule compiler. |
| * It builds the state transition table used by the RBBI runtime |
| * from the expression syntax tree generated by the rule scanner. |
| * |
| * This class is part of the RBBI implementation only. |
| * There is no user-visible public API here. |
| */ |
| class RBBITableBuilder { |
| |
| // |
| // RBBIStateDescriptor - The DFA is initially constructed as a set of these descriptors, |
| // one for each state. |
| static class RBBIStateDescriptor { |
| boolean fMarked; |
| int fAccepting; |
| int fLookAhead; |
| SortedSet<Integer> fTagVals; |
| int fTagsIdx; |
| Set<RBBINode> fPositions; // Set of parse tree positions associated |
| // with this state. Unordered (it's a set). |
| // UVector contents are RBBINode * |
| |
| int[] fDtran; // Transitions out of this state. |
| // indexed by input character |
| // contents is int index of dest state |
| // in RBBITableBuilder.fDStates |
| |
| RBBIStateDescriptor(int maxInputSymbol) { |
| fTagVals = new TreeSet<>(); |
| fPositions = new HashSet<>(); |
| fDtran = new int[maxInputSymbol+1]; // fDtran needs to be pre-sized. |
| // It is indexed by input symbols, and will |
| // hold the next state number for each |
| // symbol. |
| } |
| } |
| |
| |
| private RBBIRuleBuilder fRB; |
| |
| /** The array index into RBBIRuleBuilder.fTreeRoots for the parse tree to operate on. */ |
| private int fRootIx; |
| |
| /** D states (Aho's terminology). Index is state number. */ |
| private List<RBBIStateDescriptor> fDStates; |
| |
| /** Synthesized safe table, a List of row arrays. */ |
| private List<short[]> fSafeTable; |
| |
| private static final int MAX_STATE_FOR_8BITS_TABLE = 255; |
| |
| /** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */ |
| int[] fLookAheadRuleMap; |
| |
| /** Counter used when assigning lookahead rule numbers. |
| * Contains the last look-ahead number already in use. |
| * The first look-ahead number is 2; Number 1 (ACCEPTING_UNCONDITIONAL) is reserved |
| * for non-lookahead accepting states. See the declarations of RBBIStateTableRowT. */ |
| int fLASlotsInUse = RBBIDataWrapper.ACCEPTING_UNCONDITIONAL; |
| |
| //----------------------------------------------------------------------------- |
| // |
| // Constructor for RBBITableBuilder. |
| // |
| // rootNode is an index into the array of root nodes that is held by |
| // the overall RBBIRuleBuilder. |
| //----------------------------------------------------------------------------- |
| RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx) { |
| fRootIx = rootNodeIx; |
| fRB = rb; |
| fDStates = new ArrayList<>(); |
| } |
| |
| |
| |
| |
| //----------------------------------------------------------------------------- |
| // |
| // RBBITableBuilder::buildForwardTable - This is the main function for building |
| // the DFA state transition table from the RBBI rules parse tree. |
| // |
| //----------------------------------------------------------------------------- |
| void buildForwardTable() { |
| // If there were no rules, just return. This situation can easily arise |
| // for the reverse rules. |
| if (fRB.fTreeRoots[fRootIx]==null) { |
| return; |
| } |
| |
| // |
| // Walk through the tree, replacing any references to $variables with a copy of the |
| // parse tree for the substitution expression. |
| // |
| fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables(0); |
| if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) { |
| System.out.println("Parse tree after flattening variable references."); |
| fRB.fTreeRoots[fRootIx].printTree(true); |
| } |
| |
| // |
| // If the rules contained any references to {bof} |
| // add a {bof} <cat> <former root of tree> to the |
| // tree. Means that all matches must start out with the |
| // {bof} fake character. |
| // |
| if (fRB.fSetBuilder.sawBOF()) { |
| RBBINode bofTop = new RBBINode(RBBINode.opCat); |
| RBBINode bofLeaf = new RBBINode(RBBINode.leafChar); |
| bofTop.fLeftChild = bofLeaf; |
| bofTop.fRightChild = fRB.fTreeRoots[fRootIx]; |
| bofLeaf.fParent = bofTop; |
| bofLeaf.fVal = 2; // Reserved value for {bof}. |
| fRB.fTreeRoots[fRootIx] = bofTop; |
| } |
| |
| // |
| // Add a unique right-end marker to the expression. |
| // Appears as a cat-node, left child being the original tree, |
| // right child being the end marker. |
| // |
| RBBINode cn = new RBBINode(RBBINode.opCat); |
| cn.fLeftChild = fRB.fTreeRoots[fRootIx]; |
| fRB.fTreeRoots[fRootIx].fParent = cn; |
| RBBINode endMarkerNode = cn.fRightChild = new RBBINode(RBBINode.endMark); |
| cn.fRightChild.fParent = cn; |
| fRB.fTreeRoots[fRootIx] = cn; |
| |
| // |
| // Replace all references to UnicodeSets with the tree for the equivalent |
| // expression. |
| // |
| fRB.fTreeRoots[fRootIx].flattenSets(); |
| if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("stree")>=0) { |
| System.out.println("Parse tree after flattening Unicode Set references."); |
| fRB.fTreeRoots[fRootIx].printTree(true); |
| } |
| |
| |
| // |
| // calculate the functions nullable, firstpos, lastpos and followpos on |
| // nodes in the parse tree. |
| // See the algorithm description in Aho. |
| // Understanding how this works by looking at the code alone will be |
| // nearly impossible. |
| // |
| calcNullable(fRB.fTreeRoots[fRootIx]); |
| calcFirstPos(fRB.fTreeRoots[fRootIx]); |
| calcLastPos(fRB.fTreeRoots[fRootIx]); |
| calcFollowPos(fRB.fTreeRoots[fRootIx]); |
| if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("pos")>=0) { |
| System.out.print("\n"); |
| printPosSets(fRB.fTreeRoots[fRootIx]); |
| } |
| |
| // |
| // For "chained" rules, modify the followPos sets |
| // |
| if (fRB.fChainRules) { |
| calcChainedFollowPos(fRB.fTreeRoots[fRootIx], endMarkerNode); |
| } |
| |
| // |
| // BOF (start of input) test fixup. |
| // |
| if (fRB.fSetBuilder.sawBOF()) { |
| bofFixup(); |
| } |
| |
| // |
| // Build the DFA state transition tables. |
| // |
| buildStateTable(); |
| mapLookAheadRules(); |
| flagAcceptingStates(); |
| flagLookAheadStates(); |
| flagTaggedStates(); |
| |
| // |
| // Update the global table of rule status {tag} values |
| // The rule builder has a global vector of status values that are common |
| // for all tables. Merge the ones from this table into the global set. |
| // |
| mergeRuleStatusVals(); |
| } |
| |
| |
| |
| //----------------------------------------------------------------------------- |
| // |
| // calcNullable. Impossible to explain succinctly. See Aho, section 3.9 |
| // |
| //----------------------------------------------------------------------------- |
| void calcNullable(RBBINode n) { |
| if (n == null) { |
| return; |
| } |
| if (n.fType == RBBINode.setRef || |
| n.fType == RBBINode.endMark ) { |
| // These are non-empty leaf node types. |
| n.fNullable = false; |
| return; |
| } |
| |
| if (n.fType == RBBINode.lookAhead || n.fType == RBBINode.tag) { |
| // Lookahead marker node. It's a leaf, so no recursion on children. |
| // It's nullable because it does not match any literal text from the input stream. |
| n.fNullable = true; |
| return; |
| } |
| |
| |
| // The node is not a leaf. |
| // Calculate nullable on its children. |
| calcNullable(n.fLeftChild); |
| calcNullable(n.fRightChild); |
| |
| // Apply functions from table 3.40 in Aho |
| if (n.fType == RBBINode.opOr) { |
| n.fNullable = n.fLeftChild.fNullable || n.fRightChild.fNullable; |
| } |
| else if (n.fType == RBBINode.opCat) { |
| n.fNullable = n.fLeftChild.fNullable && n.fRightChild.fNullable; |
| } |
| else if (n.fType == RBBINode.opStar || n.fType == RBBINode.opQuestion) { |
| n.fNullable = true; |
| } |
| else { |
| n.fNullable = false; |
| } |
| } |
| |
| |
| |
| |
| //----------------------------------------------------------------------------- |
| // |
| // calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9 |
| // |
| //----------------------------------------------------------------------------- |
| void calcFirstPos(RBBINode n) { |
| if (n == null) { |
| return; |
| } |
| if (n.fType == RBBINode.leafChar || |
| n.fType == RBBINode.endMark || |
| n.fType == RBBINode.lookAhead || |
| n.fType == RBBINode.tag) { |
| // These are non-empty leaf node types. |
| n.fFirstPosSet.add(n); |
| return; |
| } |
| |
| // The node is not a leaf. |
| // Calculate firstPos on its children. |
| calcFirstPos(n.fLeftChild); |
| calcFirstPos(n.fRightChild); |
| |
| // Apply functions from table 3.40 in Aho |
| if (n.fType == RBBINode.opOr) { |
| n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); |
| n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet); |
| } |
| else if (n.fType == RBBINode.opCat) { |
| n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); |
| if (n.fLeftChild.fNullable) { |
| n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet); |
| } |
| } |
| else if (n.fType == RBBINode.opStar || |
| n.fType == RBBINode.opQuestion || |
| n.fType == RBBINode.opPlus) { |
| n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet); |
| } |
| } |
| |
| |
| |
| //----------------------------------------------------------------------------- |
| // |
| // calcLastPos. Impossible to explain succinctly. See Aho, section 3.9 |
| // |
| //----------------------------------------------------------------------------- |
| void calcLastPos(RBBINode n) { |
| if (n == null) { |
| return; |
| } |
| if (n.fType == RBBINode.leafChar || |
| n.fType == RBBINode.endMark || |
| n.fType == RBBINode.lookAhead || |
| n.fType == RBBINode.tag) { |
| // These are non-empty leaf node types. |
| n.fLastPosSet.add(n); |
| return; |
| } |
| |
| // The node is not a leaf. |
| // Calculate lastPos on its children. |
| calcLastPos(n.fLeftChild); |
| calcLastPos(n.fRightChild); |
| |
| // Apply functions from table 3.40 in Aho |
| if (n.fType == RBBINode.opOr) { |
| n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); |
| n.fLastPosSet.addAll(n.fRightChild.fLastPosSet); |
| } |
| else if (n.fType == RBBINode.opCat) { |
| n.fLastPosSet.addAll(n.fRightChild.fLastPosSet); |
| if (n.fRightChild.fNullable) { |
| n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); |
| } |
| } |
| else if (n.fType == RBBINode.opStar || |
| n.fType == RBBINode.opQuestion || |
| n.fType == RBBINode.opPlus) { |
| n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet); |
| } |
| } |
| |
| |
| |
| //----------------------------------------------------------------------------- |
| // |
| // calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9 |
| // |
| //----------------------------------------------------------------------------- |
| void calcFollowPos(RBBINode n) { |
| if (n == null || |
| n.fType == RBBINode.leafChar || |
| n.fType == RBBINode.endMark) { |
| return; |
| } |
| |
| calcFollowPos(n.fLeftChild); |
| calcFollowPos(n.fRightChild); |
| |
| // Aho rule #1 |
| if (n.fType == RBBINode.opCat) { |
| for (RBBINode i /* is 'i' in Aho's description */ : n.fLeftChild.fLastPosSet) { |
| i.fFollowPos.addAll(n.fRightChild.fFirstPosSet); |
| } |
| } |
| |
| // Aho rule #2 |
| if (n.fType == RBBINode.opStar || |
| n.fType == RBBINode.opPlus) { |
| for (RBBINode i /* again, n and i are the names from Aho's description */ : n.fLastPosSet) { |
| i.fFollowPos.addAll(n.fFirstPosSet); |
| } |
| } |
| } |
| |
| //----------------------------------------------------------------------------- |
| // |
| // addRuleRootNodes Recursively walk a parse tree, adding all nodes flagged |
| // as roots of a rule to a destination vector. |
| // |
| //----------------------------------------------------------------------------- |
| void addRuleRootNodes(List<RBBINode> dest, RBBINode node) { |
| if (node == null) { |
| return; |
| } |
| if (node.fRuleRoot) { |
| dest.add(node); |
| // Note: rules cannot nest. If we found a rule start node, |
| // no child node can also be a start node. |
| return; |
| } |
| addRuleRootNodes(dest, node.fLeftChild); |
| addRuleRootNodes(dest, node.fRightChild); |
| } |
| |
| //----------------------------------------------------------------------------- |
| // |
| // calcChainedFollowPos. Modify the previously calculated followPos sets |
| // to implement rule chaining. NOT described by Aho |
| // |
| //----------------------------------------------------------------------------- |
| void calcChainedFollowPos(RBBINode tree, RBBINode endMarkNode) { |
| |
| List<RBBINode> leafNodes = new ArrayList<>(); |
| |
| // get a list all leaf nodes |
| tree.findNodes(leafNodes, RBBINode.leafChar); |
| |
| // Collect all leaf nodes that can start matches for rules |
| // with inbound chaining enabled, which is the union of the |
| // firstPosition sets from each of the rule root nodes. |
| |
| List<RBBINode> ruleRootNodes = new ArrayList<>(); |
| addRuleRootNodes(ruleRootNodes, tree); |
| |
| Set<RBBINode> matchStartNodes = new HashSet<>(); |
| for (RBBINode node: ruleRootNodes) { |
| if (node.fChainIn) { |
| matchStartNodes.addAll(node.fFirstPosSet); |
| } |
| } |
| |
| // Iterate over all leaf nodes, |
| // |
| for (RBBINode endNode : leafNodes) { |
| |
| // Identify leaf nodes that correspond to overall rule match positions. |
| // These include the endMarkNode in their followPos sets. |
| // |
| // Note: do not consider other end marker nodes, those that are added to |
| // look-ahead rules. These can't chain; a match immediately stops |
| // further matching. This leaves exactly one end marker node, the one |
| // at the end of the complete tree. |
| |
| if (!endNode.fFollowPos.contains(endMarkNode)) { |
| continue; |
| } |
| |
| // We've got a node that can end a match. |
| |
| // Now iterate over the nodes that can start a match, looking for ones |
| // with the same char class as our ending node. |
| for (RBBINode startNode : matchStartNodes) { |
| if (startNode.fType != RBBINode.leafChar) { |
| continue; |
| } |
| |
| if (endNode.fVal == startNode.fVal) { |
| // The end val (character class) of one possible match is the |
| // same as the start of another. |
| |
| // Add all nodes from the followPos of the start node to the |
| // followPos set of the end node, which will have the effect of |
| // letting matches transition from a match state at endNode |
| // to the second char of a match starting with startNode. |
| endNode.fFollowPos.addAll(startNode.fFollowPos); |
| } |
| } |
| } |
| } |
| |
| |
| //----------------------------------------------------------------------------- |
| // |
| // bofFixup. Fixup for state tables that include {bof} beginning of input testing. |
| // Do an swizzle similar to chaining, modifying the followPos set of |
| // the bofNode to include the followPos nodes from other {bot} nodes |
| // scattered through the tree. |
| // |
| // This function has much in common with calcChainedFollowPos(). |
| // |
| //----------------------------------------------------------------------------- |
| void bofFixup() { |
| // |
| // The parse tree looks like this ... |
| // fTree root --. <cat> |
| // / \ |
| // <cat> <#end node> |
| // / \ |
| // <bofNode> rest |
| // of tree |
| // |
| // We will be adding things to the followPos set of the <bofNode> |
| // |
| RBBINode bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild; |
| Assert.assrt(bofNode.fType == RBBINode.leafChar); |
| Assert.assrt(bofNode.fVal == 2); |
| |
| // Get all nodes that can be the start a match of the user-written rules |
| // (excluding the fake bofNode) |
| // We want the nodes that can start a match in the |
| // part labeled "rest of tree" |
| // |
| Set<RBBINode> matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet; |
| for (RBBINode startNode : matchStartNodes) { |
| if (startNode.fType != RBBINode.leafChar) { |
| continue; |
| } |
| |
| if (startNode.fVal == bofNode.fVal) { |
| // We found a leaf node corresponding to a {bof} that was |
| // explicitly written into a rule. |
| // Add everything from the followPos set of this node to the |
| // followPos set of the fake bofNode at the start of the tree. |
| // |
| bofNode.fFollowPos.addAll(startNode.fFollowPos); |
| } |
| } |
| } |
| |
| //----------------------------------------------------------------------------- |
| // |
| // buildStateTable() Determine the set of runtime DFA states and the |
| // transition tables for these states, by the algorithm |
| // of fig. 3.44 in Aho. |
| // |
| // Most of the comments are quotes of Aho's psuedo-code. |
| // |
| //----------------------------------------------------------------------------- |
| void buildStateTable() { |
| // |
| // Add a dummy state 0 - the stop state. Not from Aho. |
| int lastInputSymbol = fRB.fSetBuilder.getNumCharCategories() - 1; |
| RBBIStateDescriptor failState = new RBBIStateDescriptor(lastInputSymbol); |
| fDStates.add(failState); |
| |
| // initially, the only unmarked state in Dstates is firstpos(root), |
| // where toot is the root of the syntax tree for (r)#; |
| RBBIStateDescriptor initialState = new RBBIStateDescriptor(lastInputSymbol); |
| initialState.fPositions.addAll(fRB.fTreeRoots[fRootIx].fFirstPosSet); |
| fDStates.add(initialState); |
| |
| // while there is an unmarked state T in Dstates do begin |
| for (;;) { |
| RBBIStateDescriptor T = null; |
| int tx; |
| for (tx=1; tx<fDStates.size(); tx++) { |
| RBBIStateDescriptor temp = fDStates.get(tx); |
| if (temp.fMarked == false) { |
| T = temp; |
| break; |
| } |
| } |
| if (T == null) { |
| break; |
| } |
| |
| // mark T; |
| T.fMarked = true; |
| |
| // for each input symbol a do begin |
| int a; |
| for (a = 1; a<=lastInputSymbol; a++) { |
| // let U be the set of positions that are in followpos(p) |
| // for some position p in T |
| // such that the symbol at position p is a; |
| Set<RBBINode> U = null; |
| for (RBBINode p : T.fPositions) { |
| if ((p.fType == RBBINode.leafChar) && (p.fVal == a)) { |
| if (U == null) { |
| U = new HashSet<>(); |
| } |
| U.addAll(p.fFollowPos); |
| } |
| } |
| |
| // if U is not empty and not in DStates then |
| int ux = 0; |
| boolean UinDstates = false; |
| if (U != null) { |
| Assert.assrt(U.size() > 0); |
| int ix; |
| for (ix=0; ix<fDStates.size(); ix++) { |
| RBBIStateDescriptor temp2; |
| temp2 = fDStates.get(ix); |
| if (U.equals(temp2.fPositions)) { |
| U = temp2.fPositions; |
| ux = ix; |
| UinDstates = true; |
| break; |
| } |
| } |
| |
| // Add U as an unmarked state to Dstates |
| if (!UinDstates) |
| { |
| RBBIStateDescriptor newState = new RBBIStateDescriptor(lastInputSymbol); |
| newState.fPositions = U; |
| fDStates.add(newState); |
| ux = fDStates.size()-1; |
| } |
| |
| // Dtran[T, a] := U; |
| T.fDtran[a] = ux; |
| } |
| } |
| } |
| } |
| |
| /** |
| * mapLookAheadRules |
| * |
| */ |
| void mapLookAheadRules() { |
| fLookAheadRuleMap = new int[fRB.fScanner.numRules() + 1]; |
| |
| for (RBBIStateDescriptor sd: fDStates) { |
| int laSlotForState = 0; |
| |
| // Establish the look-ahead slot for this state, if the state covers |
| // any look-ahead nodes - corresponding to the '/' in look-ahead rules. |
| |
| // If any of the look-ahead nodes already have a slot assigned, use it, |
| // otherwise assign a new one. |
| |
| boolean sawLookAheadNode = false; |
| for (RBBINode node: sd.fPositions) { |
| if (node.fType != RBBINode.lookAhead) { |
| continue; |
| } |
| sawLookAheadNode = true; |
| int ruleNum = node.fVal; // Set when rule was originally parsed. |
| assert(ruleNum < fLookAheadRuleMap.length); |
| assert(ruleNum > 0); |
| int laSlot = fLookAheadRuleMap[ruleNum]; |
| if (laSlot != 0) { |
| if (laSlotForState == 0) { |
| laSlotForState = laSlot; |
| } else { |
| // TODO: figure out if this can fail, change to setting an error code if so. |
| assert(laSlot == laSlotForState); |
| } |
| } |
| } |
| if (!sawLookAheadNode) { |
| continue; |
| } |
| |
| if (laSlotForState == 0) { |
| laSlotForState = ++fLASlotsInUse; |
| } |
| |
| // For each look ahead node covered by this state, |
| // set the mapping from the node's rule number to the look ahead slot. |
| // There can be multiple nodes/rule numbers going to the same la slot. |
| |
| for (RBBINode node: sd.fPositions) { |
| if (node.fType != RBBINode.lookAhead) { |
| continue; |
| } |
| int ruleNum = node.fVal; // Set when rule was originally parsed. |
| int existingVal = fLookAheadRuleMap[ruleNum]; |
| assert(existingVal == 0 || existingVal == laSlotForState); |
| fLookAheadRuleMap[ruleNum] = laSlotForState; |
| } |
| } |
| |
| } |
| |
| //----------------------------------------------------------------------------- |
| // |
| // flagAcceptingStates Identify accepting states. |
| // First get a list of all of the end marker nodes. |
| // Then, for each state s, |
| // if s contains one of the end marker nodes in its list of tree positions then |
| // s is an accepting state. |
| // |
| //----------------------------------------------------------------------------- |
| void flagAcceptingStates() { |
| List<RBBINode> endMarkerNodes = new ArrayList<>(); |
| RBBINode endMarker; |
| int i; |
| int n; |
| |
| fRB.fTreeRoots[fRootIx].findNodes(endMarkerNodes, RBBINode.endMark); |
| |
| for (i=0; i<endMarkerNodes.size(); i++) { |
| endMarker = endMarkerNodes.get(i); |
| for (n=0; n<fDStates.size(); n++) { |
| RBBIStateDescriptor sd = fDStates.get(n); |
| if (sd.fPositions.contains(endMarker)) { |
| // Any non-zero value for fAccepting means this is an accepting node. |
| // The value is what will be returned to the user as the break status. |
| // If no other value was specified, force it to ACCEPTING_UNCONDITIONAL (1). |
| |
| if (sd.fAccepting==0) { |
| // State hasn't been marked as accepting yet. Do it now. |
| sd.fAccepting = fLookAheadRuleMap[endMarker.fVal]; |
| if (sd.fAccepting == 0) { |
| sd.fAccepting = RBBIDataWrapper.ACCEPTING_UNCONDITIONAL; |
| } |
| } |
| if (sd.fAccepting==RBBIDataWrapper.ACCEPTING_UNCONDITIONAL && endMarker.fVal != 0) { |
| // Both lookahead and non-lookahead accepting for this state. |
| // Favor the look-ahead, because a look-ahead match needs to |
| // immediately stop the run-time engine. First match, not longest. |
| sd.fAccepting = fLookAheadRuleMap[endMarker.fVal]; |
| } |
| // implicit else: |
| // if sd.fAccepting already had a value other than 0 or 1, leave it be. |
| } |
| } |
| } |
| } |
| |
| |
| //----------------------------------------------------------------------------- |
| // |
| // flagLookAheadStates Very similar to flagAcceptingStates, above. |
| // |
| //----------------------------------------------------------------------------- |
| void flagLookAheadStates() { |
| List<RBBINode> lookAheadNodes = new ArrayList<>(); |
| RBBINode lookAheadNode; |
| int i; |
| int n; |
| |
| fRB.fTreeRoots[fRootIx].findNodes(lookAheadNodes, RBBINode.lookAhead); |
| for (i=0; i<lookAheadNodes.size(); i++) { |
| lookAheadNode = lookAheadNodes.get(i); |
| for (n=0; n<fDStates.size(); n++) { |
| RBBIStateDescriptor sd = fDStates.get(n); |
| if (sd.fPositions.contains(lookAheadNode)) { |
| int lookaheadSlot = fLookAheadRuleMap[lookAheadNode.fVal]; |
| assert(sd.fLookAhead == 0 || sd.fLookAhead == lookaheadSlot); |
| sd.fLookAhead = lookaheadSlot; |
| } |
| } |
| } |
| } |
| |
| |
| |
| |
| //----------------------------------------------------------------------------- |
| // |
| // flagTaggedStates |
| // |
| //----------------------------------------------------------------------------- |
| void flagTaggedStates() { |
| List<RBBINode> tagNodes = new ArrayList<>(); |
| RBBINode tagNode; |
| int i; |
| int n; |
| |
| fRB.fTreeRoots[fRootIx].findNodes(tagNodes, RBBINode.tag); |
| for (i=0; i<tagNodes.size(); i++) { // For each tag node t (all of 'em) |
| tagNode = tagNodes.get(i); |
| |
| for (n=0; n<fDStates.size(); n++) { // For each state s (row in the state table) |
| RBBIStateDescriptor sd = fDStates.get(n); |
| if (sd.fPositions.contains(tagNode)) { // if s include the tag node t |
| sd.fTagVals.add(tagNode.fVal); |
| } |
| } |
| } |
| } |
| |
| |
| |
| //----------------------------------------------------------------------------- |
| // |
| // mergeRuleStatusVals |
| // |
| // Allocate positions in the global array of rule status {tag} values |
| // |
| // The RBBI runtime uses an array of {sets of status values} that can |
| // be returned for boundaries. Each accepting state that has non-zero |
| // status includes an index into this array. The format of the array |
| // is |
| // Num of status values in group 1 |
| // status val |
| // status val |
| // ... |
| // Num of status vals in group 2 |
| // status val |
| // status val |
| // ... |
| // etc. |
| // |
| // |
| //----------------------------------------------------------------------------- |
| |
| void mergeRuleStatusVals() { |
| // |
| // The basic outline of what happens here is this... |
| // |
| // for each state in this state table |
| // if the status tag list for this state is in the global statuses list |
| // record where and |
| // continue with the next state |
| // else |
| // add the tag list for this state to the global list. |
| // |
| int n; |
| |
| // Pre-load a single tag of {0} into the table. |
| // We will need this as a default, for rule sets with no explicit tagging, |
| // or with explicit tagging of {0}. |
| if (fRB.fRuleStatusVals.size() == 0) { |
| fRB.fRuleStatusVals.add(1); // Num of statuses in group |
| fRB.fRuleStatusVals.add(0); // and our single status of zero |
| |
| SortedSet<Integer> s0 = new TreeSet<>(); // mapping for rules with no explicit tagging |
| fRB.fStatusSets.put(s0, 0); // (key is an empty set). |
| |
| SortedSet<Integer> s1 = new TreeSet<>(); // mapping for rules with explicit tagging of {0} |
| s1.add(0); |
| fRB.fStatusSets.put(s1, 0); |
| } |
| |
| // For each state, check whether the state's status tag values are |
| // already entered into the status values array, and add them if not. |
| for (n=0; n<fDStates.size(); n++) { |
| RBBIStateDescriptor sd = fDStates.get(n); |
| Set<Integer> statusVals = sd.fTagVals; |
| Integer arrayIndexI = fRB.fStatusSets.get(statusVals); |
| if (arrayIndexI == null) { |
| // This is the first encounter of this set of status values. |
| // Add them to the statusSets map, This map associates |
| // the set of status values with an index in the runtime status |
| // values array. |
| arrayIndexI = fRB.fRuleStatusVals.size(); |
| fRB.fStatusSets.put(statusVals, arrayIndexI); |
| |
| // Add the new set of status values to the vector of values that |
| // will eventually become the array used by the runtime engine. |
| fRB.fRuleStatusVals.add(statusVals.size()); |
| fRB.fRuleStatusVals.addAll(statusVals); |
| } |
| |
| // Save the runtime array index back into the state descriptor. |
| sd.fTagsIdx = arrayIndexI.intValue(); |
| } |
| } |
| |
| |
| |
| |
| |
| |
| |
| //----------------------------------------------------------------------------- |
| // |
| // printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos |
| // for each node in the tree. |
| // |
| //----------------------------------------------------------------------------- |
| |
| void printPosSets(RBBINode n) { |
| if (n==null) { |
| return; |
| } |
| RBBINode.printNode(n); |
| System.out.print(" Nullable: " + n.fNullable); |
| |
| System.out.print(" firstpos: "); |
| printSet(n.fFirstPosSet); |
| |
| System.out.print(" lastpos: "); |
| printSet(n.fLastPosSet); |
| |
| System.out.print(" followpos: "); |
| printSet(n.fFollowPos); |
| |
| printPosSets(n.fLeftChild); |
| printPosSets(n.fRightChild); |
| } |
| |
| |
| |
| /** |
| * Find duplicate (redundant) character classes. Begin looking with categories.first. |
| * Duplicates, if found are returned in the categories parameter. |
| * This is an iterator-like function, used to identify character classes |
| * (state table columns) that can be eliminated. |
| * @param categories in/out parameter, specifies where to start looking for duplicates, |
| * and returns the first pair of duplicates found, if any. |
| * @return true if duplicate char classes were found, false otherwise. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| boolean findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories) { |
| int numStates = fDStates.size(); |
| int numCols = fRB.fSetBuilder.getNumCharCategories(); |
| |
| int table_base = 0; |
| int table_dupl = 0; |
| for (; categories.first < numCols-1; ++categories.first) { |
| // Note: dictionary & non-dictionary columns cannot be merged. |
| // The limitSecond value prevents considering mixed pairs. |
| // Dictionary categories are >= DictCategoriesStart. |
| // Non dict categories are < DictCategoriesStart. |
| int limitSecond = categories.first < fRB.fSetBuilder.getDictCategoriesStart() ? |
| fRB.fSetBuilder.getDictCategoriesStart() : numCols; |
| for (categories.second=categories.first+1; categories.second < limitSecond; ++categories.second) { |
| for (int state=0; state<numStates; state++) { |
| RBBIStateDescriptor sd = fDStates.get(state); |
| table_base = sd.fDtran[categories.first]; |
| table_dupl = sd.fDtran[categories.second]; |
| if (table_base != table_dupl) { |
| break; |
| } |
| } |
| if (table_base == table_dupl) { |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Remove a column from the state table. Used when two character categories |
| * have been found equivalent, and merged together, to eliminate the unneeded table column. |
| */ |
| void removeColumn(int column) { |
| int numStates = fDStates.size(); |
| for (int state=0; state<numStates; state++) { |
| RBBIStateDescriptor sd = fDStates.get(state); |
| assert(column < sd.fDtran.length); |
| int[] newArray = Arrays.copyOf(sd.fDtran, sd.fDtran.length - 1); |
| System.arraycopy(sd.fDtran, column+1, newArray, column, newArray.length - column); |
| sd.fDtran = newArray; |
| } |
| } |
| |
| |
| /** |
| * Find duplicate (redundant) states, beginning at the specified pair, |
| * within this state table. This is an iterator-like function, used to |
| * identify states (state table rows) that can be eliminated. |
| * @param states in/out parameter, specifies where to start looking for duplicates, |
| * and returns the first pair of duplicates found, if any. |
| * @return true if duplicate states were found, false otherwise. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| boolean findDuplicateState(RBBIRuleBuilder.IntPair states) { |
| int numStates = fDStates.size(); |
| int numCols = fRB.fSetBuilder.getNumCharCategories(); |
| |
| for (; states.first<numStates-1; ++states.first) { |
| RBBIStateDescriptor firstSD = fDStates.get(states.first); |
| for (states.second=states.first+1; states.second<numStates; ++states.second) { |
| RBBIStateDescriptor duplSD = fDStates.get(states.second); |
| if (firstSD.fAccepting != duplSD.fAccepting || |
| firstSD.fLookAhead != duplSD.fLookAhead || |
| firstSD.fTagsIdx != duplSD.fTagsIdx) { |
| continue; |
| } |
| boolean rowsMatch = true; |
| for (int col=0; col < numCols; ++col) { |
| int firstVal = firstSD.fDtran[col]; |
| int duplVal = duplSD.fDtran[col]; |
| if (!((firstVal == duplVal) || |
| ((firstVal == states.first || firstVal == states.second) && |
| (duplVal == states.first || duplVal == states.second)))) { |
| rowsMatch = false; |
| break; |
| } |
| } |
| if (rowsMatch) { |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Find the next duplicate state in the safe reverse table. An iterator function. |
| * @param states in/out parameter, specifies where to start looking for duplicates, |
| * and returns the first pair of duplicates found, if any. |
| * @return true if duplicate states were found, false otherwise. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| boolean findDuplicateSafeState(RBBIRuleBuilder.IntPair states) { |
| int numStates = fSafeTable.size(); |
| |
| for (; states.first<numStates-1; ++states.first) { |
| short[] firstRow = fSafeTable.get(states.first); |
| for (states.second=states.first+1; states.second<numStates; ++states.second) { |
| short[] duplRow = fSafeTable.get(states.second); |
| boolean rowsMatch = true; |
| int numCols = firstRow.length; |
| for (int col=0; col < numCols; ++col) { |
| int firstVal = firstRow[col]; |
| int duplVal = duplRow[col]; |
| if (!((firstVal == duplVal) || |
| ((firstVal == states.first || firstVal == states.second) && |
| (duplVal == states.first || duplVal == states.second)))) { |
| rowsMatch = false; |
| break; |
| } |
| } |
| if (rowsMatch) { |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Remove a duplicate state (row) from the state table. All references to the deleted (second) state |
| * are redirected to first state. |
| * @param duplStates The duplicate pair of states. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| void removeState(IntPair duplStates) { |
| final int keepState = duplStates.first; |
| final int duplState = duplStates.second; |
| assert(keepState < duplState); |
| assert(duplState < fDStates.size()); |
| |
| fDStates.remove(duplState); |
| |
| int numStates = fDStates.size(); |
| int numCols = fRB.fSetBuilder.getNumCharCategories(); |
| for (int state=0; state<numStates; ++state) { |
| RBBIStateDescriptor sd = fDStates.get(state); |
| for (int col=0; col<numCols; col++) { |
| int existingVal = sd.fDtran[col]; |
| int newVal = existingVal; |
| if (existingVal == duplState) { |
| newVal = keepState; |
| } else if (existingVal > duplState) { |
| newVal = existingVal - 1; |
| } |
| sd.fDtran[col] = newVal; |
| } |
| } |
| } |
| |
| /** |
| * Remove a duplicate state from the safe table. |
| * @param duplStates The duplicate pair of states. The first is kept, the second is removed. |
| * All references to the second in the state table are retargeted |
| * to the first. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| void removeSafeState(IntPair duplStates) { |
| final int keepState = duplStates.first; |
| final int duplState = duplStates.second; |
| assert(keepState < duplState); |
| assert(duplState < fSafeTable.size()); |
| |
| fSafeTable.remove(duplState); |
| int numStates = fSafeTable.size(); |
| for (int state=0; state<numStates; ++state) { |
| short[] row = fSafeTable.get(state); |
| for (int col=0; col<row.length; col++) { |
| int existingVal = row[col]; |
| int newVal = existingVal; |
| if (existingVal == duplState) { |
| newVal = keepState; |
| } else if (existingVal > duplState) { |
| newVal = existingVal - 1; |
| } |
| row[col] = (short)newVal; |
| } |
| } |
| } |
| |
| |
| /** |
| * Check for, and remove duplicate states (table rows). |
| * @return the number of states removed. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| int removeDuplicateStates() { |
| IntPair dupls = new IntPair(3, 0); |
| int numStatesRemoved = 0; |
| |
| while (findDuplicateState(dupls)) { |
| // System.out.printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second); |
| removeState(dupls); |
| ++numStatesRemoved; |
| } |
| return numStatesRemoved; |
| } |
| |
| |
| /** |
| * Calculate the size in bytes of the serialized form of this state transition table, |
| * which is identical to the ICU4C runtime form. |
| * Refer to common/rbbidata.h from ICU4C for the declarations of the structures |
| * being matched by this calculation. |
| */ |
| int getTableSize() { |
| if (fRB.fTreeRoots[fRootIx] == null) { |
| return 0; |
| } |
| int size = RBBIDataWrapper.RBBIStateTable.fHeaderSize; // The header, with no rows to the table. |
| int numRows = fDStates.size(); |
| int numCols = fRB.fSetBuilder.getNumCharCategories(); |
| boolean use8Bits = numRows <= MAX_STATE_FOR_8BITS_TABLE; |
| int rowSize = (use8Bits ? 1 : 2 ) * (RBBIDataWrapper.NEXTSTATES + numCols); |
| size += numRows * rowSize; |
| size = (size + 7) & ~7; // round up to a multiple of 8 bytes |
| return size; |
| } |
| |
| |
| |
| /** |
| * Create a RBBIDataWrapper.RBBIStateTable for a newly compiled table. |
| * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C, |
| * in common/rbbidata.h |
| */ |
| RBBIDataWrapper.RBBIStateTable exportTable() { |
| int state; |
| int col; |
| |
| RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable(); |
| if (fRB.fTreeRoots[fRootIx] == null) { |
| return table; |
| } |
| |
| Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff && |
| fDStates.size() < 0x7fff); |
| table.fNumStates = fDStates.size(); |
| table.fDictCategoriesStart = fRB.fSetBuilder.getDictCategoriesStart(); |
| table.fLookAheadResultsSize = |
| fLASlotsInUse == RBBIDataWrapper.ACCEPTING_UNCONDITIONAL ? 0 : fLASlotsInUse + 1; |
| boolean use8Bits = table.fNumStates <= MAX_STATE_FOR_8BITS_TABLE; |
| |
| // Size of table size in shorts. |
| int rowLen = RBBIDataWrapper.NEXTSTATES + fRB.fSetBuilder.getNumCharCategories(); // Row Length in shorts. |
| int tableSize; |
| if (use8Bits) { |
| tableSize = (getTableSize() - RBBIDataWrapper.RBBIStateTable.fHeaderSize); // fTable length in bytes. |
| table.fTable = new char[tableSize]; |
| table.fRowLen = rowLen; // Row length in bytes. |
| } else { |
| tableSize = (getTableSize() - RBBIDataWrapper.RBBIStateTable.fHeaderSize) / 2; // fTable length in shorts. |
| table.fTable = new char[tableSize]; |
| table.fRowLen = rowLen * 2; // Row length in bytes. |
| } |
| |
| if (fRB.fLookAheadHardBreak) { |
| table.fFlags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK; |
| } |
| if (fRB.fSetBuilder.sawBOF()) { |
| table.fFlags |= RBBIDataWrapper.RBBI_BOF_REQUIRED; |
| } |
| if (use8Bits) { |
| table.fFlags |= RBBIDataWrapper.RBBI_8BITS_ROWS; |
| } |
| |
| int numCharCategories = fRB.fSetBuilder.getNumCharCategories(); |
| for (state=0; state<table.fNumStates; state++) { |
| RBBIStateDescriptor sd = fDStates.get(state); |
| int row = state*rowLen; |
| if (use8Bits) { |
| Assert.assrt (0 <= sd.fAccepting && sd.fAccepting <= 255); |
| Assert.assrt (0 <= sd.fLookAhead && sd.fLookAhead <= 255); |
| } else { |
| Assert.assrt (0 <= sd.fAccepting && sd.fAccepting <= 0xffff); |
| Assert.assrt (0 <= sd.fLookAhead && sd.fLookAhead <= 0xffff); |
| } |
| table.fTable[row + RBBIDataWrapper.ACCEPTING] = (char)sd.fAccepting; |
| table.fTable[row + RBBIDataWrapper.LOOKAHEAD] = (char)sd.fLookAhead; |
| table.fTable[row + RBBIDataWrapper.TAGSIDX] = (char)sd.fTagsIdx; |
| for (col=0; col<numCharCategories; col++) { |
| if (use8Bits) { |
| Assert.assrt (0 <= sd.fDtran[col] && sd.fDtran[col] <= MAX_STATE_FOR_8BITS_TABLE); |
| } |
| table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (char)sd.fDtran[col]; |
| } |
| } |
| return table; |
| } |
| |
| /** |
| * Synthesize a safe state table from the main state table. |
| */ |
| void buildSafeReverseTable() { |
| // Safe Reverse table construction is described in more detail in the corresponding |
| // function in ICU4C, in source/common/rbbitblb.cpp. Not duplicated here because |
| // it is too likely to get out of sync. |
| |
| // Each safe pair is stored as two chars in the safePair stringBuilder. |
| StringBuilder safePairs = new StringBuilder(); |
| |
| int numCharClasses = fRB.fSetBuilder.getNumCharCategories(); |
| int numStates = fDStates.size(); |
| |
| for (int c1=0; c1<numCharClasses; ++c1) { |
| for (int c2=0; c2 < numCharClasses; ++c2) { |
| int wantedEndState = -1; |
| int endState = 0; |
| for (int startState = 1; startState < numStates; ++startState) { |
| RBBIStateDescriptor startStateD = fDStates.get(startState); |
| int s2 = startStateD.fDtran[c1]; |
| RBBIStateDescriptor s2StateD = fDStates.get(s2); |
| endState = s2StateD.fDtran[c2]; |
| if (wantedEndState < 0) { |
| wantedEndState = endState; |
| } else { |
| if (wantedEndState != endState) { |
| break; |
| } |
| } |
| } |
| if (wantedEndState == endState) { |
| safePairs.append((char)c1); |
| safePairs.append((char)c2); |
| // System.out.printf("(%d, %d) ", c1, c2); |
| } |
| } |
| // System.out.printf("\n"); |
| } |
| |
| // Populate the initial safe table. |
| // The table as a whole is a List<short[]> |
| // Row 0 is the stop state. |
| // Row 1 is the start sate. |
| // Row 2 and beyond are other states, initially one per char class, but |
| // after initial construction, many of the states will be combined, compacting the table.) |
| // The String holds the nextState data only. The four leading fields of a row, fAccepting, |
| // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building. |
| |
| assert(fSafeTable == null); |
| fSafeTable = new ArrayList<>(); |
| for (int row=0; row<numCharClasses + 2; ++row) { |
| fSafeTable.add(new short[numCharClasses]); |
| } |
| |
| // From the start state, each input char class transitions to the state for that input. |
| short[] startState = fSafeTable.get(1); |
| for (int charClass=0; charClass < numCharClasses; ++charClass) { |
| // Note: +2 to skip the start & stop state rows. |
| startState[charClass] = (short)(charClass+2); |
| } |
| |
| // Initially make every other state table row look like the start state row |
| // (except for the stop state, which remains all 0) |
| for (int row=2; row<numCharClasses+2; ++row) { |
| System.arraycopy(startState, 0, fSafeTable.get(row), 0, startState.length); |
| } |
| |
| // Run through the safe pairs, set the next state to zero when pair has been seen. |
| // Zero being the stop state, meaning we found a safe point. |
| for (int pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) { |
| int c1 = safePairs.charAt(pairIdx); |
| int c2 = safePairs.charAt(pairIdx + 1); |
| |
| short[] rowState = fSafeTable.get(c2 + 2); |
| rowState[c1] = 0; |
| } |
| |
| // Remove duplicate or redundant rows from the table. |
| RBBIRuleBuilder.IntPair states = new RBBIRuleBuilder.IntPair(1, 0); |
| while (findDuplicateSafeState(states)) { |
| // System.out.printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second); |
| removeSafeState(states); |
| } |
| } |
| |
| |
| /** |
| * Calculate the size of the runtime form of this safe state table. |
| */ |
| int getSafeTableSize() { |
| if (fSafeTable == null) { |
| return 0; |
| } |
| int size = RBBIDataWrapper.RBBIStateTable.fHeaderSize; // The header, with no rows to the table. |
| int numRows = fSafeTable.size(); |
| int numCols = fSafeTable.get(0).length; |
| boolean use8Bits = numRows <= MAX_STATE_FOR_8BITS_TABLE; |
| |
| int rowSize = (use8Bits ? 1 : 2 ) * (RBBIDataWrapper.NEXTSTATES + numCols); |
| size += numRows * rowSize; |
| // TODO: there are redundant round-up. Figure out best place, get rid of the rest. |
| size = (size + 7) & ~7; // round up to a multiple of 8 bytes |
| return size; |
| } |
| |
| |
| /** |
| * Create a RBBIDataWrapper.RBBIStateTable for the safe reverse table. |
| * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C, |
| * in common/rbbidata.h |
| */ |
| RBBIDataWrapper.RBBIStateTable exportSafeTable() { |
| RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable(); |
| table.fNumStates = fSafeTable.size(); |
| boolean use8Bits = table.fNumStates <= MAX_STATE_FOR_8BITS_TABLE; |
| int numCharCategories = fSafeTable.get(0).length; |
| |
| // Size of table size in shorts. |
| int rowLen = RBBIDataWrapper.NEXTSTATES + numCharCategories; |
| // TODO: tableSize is basically numStates * numCharCategories, |
| // except for alignment padding. Clean up here, and in main exportTable(). |
| int tableSize = (getSafeTableSize() - RBBIDataWrapper.RBBIStateTable.fHeaderSize); // fTable length in bytes. |
| if (use8Bits) { |
| table.fFlags |= RBBIDataWrapper.RBBI_8BITS_ROWS; |
| table.fTable = new char[tableSize]; |
| table.fRowLen = rowLen; // Row length in bytes. |
| } else { |
| tableSize /= 2; // fTable length in shorts. |
| table.fTable = new char[tableSize]; |
| table.fRowLen = rowLen * 2; // Row length in bytes. |
| } |
| |
| for (int state=0; state<table.fNumStates; state++) { |
| short[] rowArray = fSafeTable.get(state); |
| int row = state * rowLen; |
| |
| for (int col=0; col<numCharCategories; col++) { |
| if (use8Bits) { |
| Assert.assrt (rowArray[col] <= MAX_STATE_FOR_8BITS_TABLE); |
| } |
| table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (char)rowArray[col]; |
| } |
| } |
| return table; |
| } |
| |
| |
| //----------------------------------------------------------------------------- |
| // |
| // printSet Debug function. Print the contents of a set of Nodes |
| // |
| //----------------------------------------------------------------------------- |
| |
| void printSet(Collection<RBBINode> s) { |
| for (RBBINode n : s) { |
| RBBINode.printInt(n.fSerialNum, 8); |
| } |
| System.out.println(); |
| } |
| |
| |
| |
| //----------------------------------------------------------------------------- |
| // |
| // printStates Debug Function. Dump the fully constructed state transition table. |
| // |
| //----------------------------------------------------------------------------- |
| |
| void printStates() { |
| int c; // input "character" |
| int n; // state number |
| |
| System.out.print("state | i n p u t s y m b o l s \n"); |
| System.out.print(" | Acc LA Tag"); |
| for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { |
| RBBINode.printInt(c, 4); |
| } |
| System.out.print("\n"); |
| System.out.print(" |---------------"); |
| for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { |
| System.out.print("----"); |
| } |
| System.out.print("\n"); |
| |
| for (n=0; n<fDStates.size(); n++) { |
| RBBIStateDescriptor sd = fDStates.get(n); |
| RBBINode.printInt(n, 5); |
| System.out.print(" | "); |
| |
| RBBINode.printInt(sd.fAccepting, 3); |
| RBBINode.printInt(sd.fLookAhead, 4); |
| RBBINode.printInt(sd.fTagsIdx, 6); |
| System.out.print(" "); |
| for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) { |
| RBBINode.printInt(sd.fDtran[c], 4); |
| } |
| System.out.print("\n"); |
| } |
| System.out.print("\n\n"); |
| } |
| |
| |
| /** |
| * Debug Function. Dump the fully constructed safe reverse table. |
| */ |
| void printReverseTable() { |
| int c; // input "character" |
| |
| System.out.printf(" Safe Reverse Table \n"); |
| if (fSafeTable == null) { |
| System.out.printf(" --- nullptr ---\n"); |
| return; |
| } |
| int numCharCategories = fSafeTable.get(0).length; |
| System.out.printf("state | i n p u t s y m b o l s \n"); |
| System.out.printf(" | Acc LA Tag"); |
| for (c=0; c< numCharCategories; c++) { |
| System.out.printf(" %2d", c); |
| } |
| System.out.printf("\n"); |
| System.out.printf(" |---------------"); |
| for (c=0; c<numCharCategories; c++) { |
| System.out.printf("---"); |
| } |
| System.out.printf("\n"); |
| |
| for (int n=0; n<fSafeTable.size(); n++) { |
| short rowArray[] = fSafeTable.get(n); |
| System.out.printf(" %3d | " , n); |
| System.out.printf("%3d %3d %5d ", 0, 0, 0); // Accepting, LookAhead, Tags |
| for (c=0; c<numCharCategories; c++) { |
| System.out.printf(" %2d", rowArray[c]); |
| } |
| System.out.printf("\n"); |
| } |
| System.out.printf("\n\n"); |
| } |
| |
| |
| |
| |
| |
| //----------------------------------------------------------------------------- |
| // |
| // printRuleStatusTable Debug Function. Dump the common rule status table |
| // |
| //----------------------------------------------------------------------------- |
| |
| void printRuleStatusTable() { |
| int thisRecord = 0; |
| int nextRecord = 0; |
| int i; |
| List<Integer> tbl = fRB.fRuleStatusVals; |
| |
| System.out.print("index | tags \n"); |
| System.out.print("-------------------\n"); |
| |
| while (nextRecord < tbl.size()) { |
| thisRecord = nextRecord; |
| nextRecord = thisRecord + tbl.get(thisRecord).intValue() + 1; |
| RBBINode.printInt(thisRecord, 7); |
| for (i=thisRecord+1; i<nextRecord; i++) { |
| int val = tbl.get(i).intValue(); |
| RBBINode.printInt(val, 7); |
| } |
| System.out.print("\n"); |
| } |
| System.out.print("\n\n"); |
| } |
| |
| |
| |
| } |