| /* |
| * Copyright (C) 2008 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package android.os; |
| |
| import android.annotation.IntDef; |
| import android.util.Log; |
| import android.util.proto.ProtoOutputStream; |
| |
| import java.lang.annotation.Retention; |
| import java.lang.annotation.RetentionPolicy; |
| import java.util.Arrays; |
| |
| /** |
| * A simple pattern matcher, which is safe to use on untrusted data: it does |
| * not provide full reg-exp support, only simple globbing that can not be |
| * used maliciously. |
| */ |
| @android.ravenwood.annotation.RavenwoodKeepWholeClass |
| public class PatternMatcher implements Parcelable { |
| /** |
| * Pattern type: the given pattern must exactly match the string it is |
| * tested against. |
| */ |
| public static final int PATTERN_LITERAL = 0; |
| |
| /** |
| * Pattern type: the given pattern must match the |
| * beginning of the string it is tested against. |
| */ |
| public static final int PATTERN_PREFIX = 1; |
| |
| /** |
| * Pattern type: the given pattern is interpreted with a |
| * simple glob syntax for matching against the string it is tested against. |
| * In this syntax, you can use the '*' character to match against zero or |
| * more occurrences of the character immediately before. If the |
| * character before it is '.' it will match any character. The character |
| * '\' can be used as an escape. This essentially provides only the '*' |
| * wildcard part of a normal regexp. |
| */ |
| public static final int PATTERN_SIMPLE_GLOB = 2; |
| |
| /** |
| * Pattern type: the given pattern is interpreted with a regular |
| * expression-like syntax for matching against the string it is tested |
| * against. Supported tokens include dot ({@code .}) and sets ({@code [...]}) |
| * with full support for character ranges and the not ({@code ^}) modifier. |
| * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +}) |
| * for one-or-more and full range ({@code {...}}) support. This is a simple |
| * evaluation implementation in which matching is done against the pattern in |
| * real time with no backtracking support. |
| */ |
| public static final int PATTERN_ADVANCED_GLOB = 3; |
| |
| /** |
| * Pattern type: the given pattern must match the |
| * end of the string it is tested against. |
| */ |
| public static final int PATTERN_SUFFIX = 4; |
| |
| /** @hide */ |
| @IntDef(value = { |
| PATTERN_LITERAL, |
| PATTERN_PREFIX, |
| PATTERN_SIMPLE_GLOB, |
| PATTERN_ADVANCED_GLOB, |
| PATTERN_SUFFIX, |
| }) |
| @Retention(RetentionPolicy.SOURCE) |
| public @interface PatternType {} |
| |
| // token types for advanced matching |
| private static final int TOKEN_TYPE_LITERAL = 0; |
| private static final int TOKEN_TYPE_ANY = 1; |
| private static final int TOKEN_TYPE_SET = 2; |
| private static final int TOKEN_TYPE_INVERSE_SET = 3; |
| |
| // Return for no match |
| private static final int NO_MATCH = -1; |
| |
| private static final String TAG = "PatternMatcher"; |
| |
| // Parsed placeholders for advanced patterns |
| private static final int PARSED_TOKEN_CHAR_SET_START = -1; |
| private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2; |
| private static final int PARSED_TOKEN_CHAR_SET_STOP = -3; |
| private static final int PARSED_TOKEN_CHAR_ANY = -4; |
| private static final int PARSED_MODIFIER_RANGE_START = -5; |
| private static final int PARSED_MODIFIER_RANGE_STOP = -6; |
| private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7; |
| private static final int PARSED_MODIFIER_ONE_OR_MORE = -8; |
| |
| private final String mPattern; |
| private final int mType; |
| private final int[] mParsedPattern; |
| |
| |
| private static final int MAX_PATTERN_STORAGE = 2048; |
| // workspace to use for building a parsed advanced pattern; |
| private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE]; |
| |
| public PatternMatcher(String pattern, int type) { |
| mPattern = pattern; |
| mType = type; |
| if (mType == PATTERN_ADVANCED_GLOB) { |
| mParsedPattern = parseAndVerifyAdvancedPattern(pattern); |
| } else { |
| mParsedPattern = null; |
| } |
| } |
| |
| public final String getPath() { |
| return mPattern; |
| } |
| |
| public final int getType() { |
| return mType; |
| } |
| |
| public boolean match(String str) { |
| return matchPattern(str, mPattern, mParsedPattern, mType); |
| } |
| |
| public String toString() { |
| String type = "? "; |
| switch (mType) { |
| case PATTERN_LITERAL: |
| type = "LITERAL: "; |
| break; |
| case PATTERN_PREFIX: |
| type = "PREFIX: "; |
| break; |
| case PATTERN_SIMPLE_GLOB: |
| type = "GLOB: "; |
| break; |
| case PATTERN_ADVANCED_GLOB: |
| type = "ADVANCED: "; |
| break; |
| case PATTERN_SUFFIX: |
| type = "SUFFIX: "; |
| break; |
| } |
| return "PatternMatcher{" + type + mPattern + "}"; |
| } |
| |
| /** @hide */ |
| public void dumpDebug(ProtoOutputStream proto, long fieldId) { |
| long token = proto.start(fieldId); |
| proto.write(PatternMatcherProto.PATTERN, mPattern); |
| proto.write(PatternMatcherProto.TYPE, mType); |
| // PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to |
| // match the current data structure. |
| proto.end(token); |
| } |
| |
| /** |
| * Perform a check on the matcher for the pattern type of {@link #PATTERN_ADVANCED_GLOB}. |
| * Return true if it passed. |
| * @hide |
| */ |
| public boolean check() { |
| try { |
| if (mType == PATTERN_ADVANCED_GLOB) { |
| return Arrays.equals(mParsedPattern, parseAndVerifyAdvancedPattern(mPattern)); |
| } |
| } catch (IllegalArgumentException e) { |
| Log.w(TAG, "Failed to verify advanced pattern: " + e.getMessage()); |
| return false; |
| } |
| return true; |
| } |
| |
| public int describeContents() { |
| return 0; |
| } |
| |
| public void writeToParcel(Parcel dest, int flags) { |
| dest.writeString(mPattern); |
| dest.writeInt(mType); |
| dest.writeIntArray(mParsedPattern); |
| } |
| |
| public PatternMatcher(Parcel src) { |
| mPattern = src.readString(); |
| mType = src.readInt(); |
| mParsedPattern = src.createIntArray(); |
| } |
| |
| public static final @android.annotation.NonNull Parcelable.Creator<PatternMatcher> CREATOR |
| = new Parcelable.Creator<PatternMatcher>() { |
| public PatternMatcher createFromParcel(Parcel source) { |
| return new PatternMatcher(source); |
| } |
| |
| public PatternMatcher[] newArray(int size) { |
| return new PatternMatcher[size]; |
| } |
| }; |
| |
| static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) { |
| if (match == null) return false; |
| if (type == PATTERN_LITERAL) { |
| return pattern.equals(match); |
| } if (type == PATTERN_PREFIX) { |
| return match.startsWith(pattern); |
| } else if (type == PATTERN_SIMPLE_GLOB) { |
| return matchGlobPattern(pattern, match); |
| } else if (type == PATTERN_ADVANCED_GLOB) { |
| return matchAdvancedPattern(parsedPattern, match); |
| } else if (type == PATTERN_SUFFIX) { |
| return match.endsWith(pattern); |
| } |
| return false; |
| } |
| |
| static boolean matchGlobPattern(String pattern, String match) { |
| final int NP = pattern.length(); |
| if (NP <= 0) { |
| return match.length() <= 0; |
| } |
| final int NM = match.length(); |
| int ip = 0, im = 0; |
| char nextChar = pattern.charAt(0); |
| while ((ip<NP) && (im<NM)) { |
| char c = nextChar; |
| ip++; |
| nextChar = ip < NP ? pattern.charAt(ip) : 0; |
| final boolean escaped = (c == '\\'); |
| if (escaped) { |
| c = nextChar; |
| ip++; |
| nextChar = ip < NP ? pattern.charAt(ip) : 0; |
| } |
| if (nextChar == '*') { |
| if (!escaped && c == '.') { |
| if (ip >= (NP-1)) { |
| // at the end with a pattern match, so |
| // all is good without checking! |
| return true; |
| } |
| ip++; |
| nextChar = pattern.charAt(ip); |
| // Consume everything until the next character in the |
| // pattern is found. |
| if (nextChar == '\\') { |
| ip++; |
| nextChar = ip < NP ? pattern.charAt(ip) : 0; |
| } |
| do { |
| if (match.charAt(im) == nextChar) { |
| break; |
| } |
| im++; |
| } while (im < NM); |
| if (im == NM) { |
| // Whoops, the next character in the pattern didn't |
| // exist in the match. |
| return false; |
| } |
| ip++; |
| nextChar = ip < NP ? pattern.charAt(ip) : 0; |
| im++; |
| } else { |
| // Consume only characters matching the one before '*'. |
| do { |
| if (match.charAt(im) != c) { |
| break; |
| } |
| im++; |
| } while (im < NM); |
| ip++; |
| nextChar = ip < NP ? pattern.charAt(ip) : 0; |
| } |
| } else { |
| if (c != '.' && match.charAt(im) != c) return false; |
| im++; |
| } |
| } |
| |
| if (ip >= NP && im >= NM) { |
| // Reached the end of both strings, all is good! |
| return true; |
| } |
| |
| // One last check: we may have finished the match string, but still |
| // have a '.*' at the end of the pattern, which should still count |
| // as a match. |
| if (ip == NP-2 && pattern.charAt(ip) == '.' |
| && pattern.charAt(ip+1) == '*') { |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /** |
| * Parses the advanced pattern and returns an integer array representation of it. The integer |
| * array treats each field as a character if positive and a unique token placeholder if |
| * negative. This method will throw on any pattern structure violations. |
| */ |
| synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) { |
| int ip = 0; |
| final int LP = pattern.length(); |
| |
| int it = 0; |
| |
| boolean inSet = false; |
| boolean inRange = false; |
| boolean inCharClass = false; |
| |
| boolean addToParsedPattern; |
| |
| while (ip < LP) { |
| if (it > MAX_PATTERN_STORAGE - 3) { |
| throw new IllegalArgumentException("Pattern is too large!"); |
| } |
| |
| char c = pattern.charAt(ip); |
| addToParsedPattern = false; |
| |
| switch (c) { |
| case '[': |
| if (inSet) { |
| addToParsedPattern = true; // treat as literal or char class in set |
| } else { |
| if (pattern.charAt(ip + 1) == '^') { |
| sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START; |
| ip++; // skip over the '^' |
| } else { |
| sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START; |
| } |
| ip++; // move to the next pattern char |
| inSet = true; |
| continue; |
| } |
| break; |
| case ']': |
| if (!inSet) { |
| addToParsedPattern = true; // treat as literal outside of set |
| } else { |
| int parsedToken = sParsedPatternScratch[it - 1]; |
| if (parsedToken == PARSED_TOKEN_CHAR_SET_START || |
| parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) { |
| throw new IllegalArgumentException( |
| "You must define characters in a set."); |
| } |
| sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP; |
| inSet = false; |
| inCharClass = false; |
| } |
| break; |
| case '{': |
| if (!inSet) { |
| if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { |
| throw new IllegalArgumentException("Modifier must follow a token."); |
| } |
| sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START; |
| ip++; |
| inRange = true; |
| } |
| break; |
| case '}': |
| if (inRange) { // only terminate the range if we're currently in one |
| sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP; |
| inRange = false; |
| } |
| break; |
| case '*': |
| if (!inSet) { |
| if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { |
| throw new IllegalArgumentException("Modifier must follow a token."); |
| } |
| sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE; |
| } |
| break; |
| case '+': |
| if (!inSet) { |
| if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { |
| throw new IllegalArgumentException("Modifier must follow a token."); |
| } |
| sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE; |
| } |
| break; |
| case '.': |
| if (!inSet) { |
| sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY; |
| } |
| break; |
| case '\\': // escape |
| if (ip + 1 >= LP) { |
| throw new IllegalArgumentException("Escape found at end of pattern!"); |
| } |
| c = pattern.charAt(++ip); |
| addToParsedPattern = true; |
| break; |
| default: |
| addToParsedPattern = true; |
| break; |
| } |
| if (inSet) { |
| if (inCharClass) { |
| sParsedPatternScratch[it++] = c; |
| inCharClass = false; |
| } else { |
| // look forward for character class |
| if (ip + 2 < LP |
| && pattern.charAt(ip + 1) == '-' |
| && pattern.charAt(ip + 2) != ']') { |
| inCharClass = true; |
| sParsedPatternScratch[it++] = c; // set first token as lower end of range |
| ip++; // advance past dash |
| } else { // literal |
| sParsedPatternScratch[it++] = c; // set first token as literal |
| sParsedPatternScratch[it++] = c; // set second set as literal |
| } |
| } |
| } else if (inRange) { |
| int endOfSet = pattern.indexOf('}', ip); |
| if (endOfSet < 0) { |
| throw new IllegalArgumentException("Range not ended with '}'"); |
| } |
| String rangeString = pattern.substring(ip, endOfSet); |
| int commaIndex = rangeString.indexOf(','); |
| try { |
| final int rangeMin; |
| final int rangeMax; |
| if (commaIndex < 0) { |
| int parsedRange = Integer.parseInt(rangeString); |
| rangeMin = rangeMax = parsedRange; |
| } else { |
| rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex)); |
| if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more) |
| rangeMax = Integer.MAX_VALUE; |
| } else { |
| rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1)); |
| } |
| } |
| if (rangeMin > rangeMax) { |
| throw new IllegalArgumentException( |
| "Range quantifier minimum is greater than maximum"); |
| } |
| sParsedPatternScratch[it++] = rangeMin; |
| sParsedPatternScratch[it++] = rangeMax; |
| } catch (NumberFormatException e) { |
| throw new IllegalArgumentException("Range number format incorrect", e); |
| } |
| ip = endOfSet; |
| continue; // don't increment ip |
| } else if (addToParsedPattern) { |
| sParsedPatternScratch[it++] = c; |
| } |
| ip++; |
| } |
| if (inSet) { |
| throw new IllegalArgumentException("Set was not terminated!"); |
| } |
| return Arrays.copyOf(sParsedPatternScratch, it); |
| } |
| |
| private static boolean isParsedModifier(int parsedChar) { |
| return parsedChar == PARSED_MODIFIER_ONE_OR_MORE || |
| parsedChar == PARSED_MODIFIER_ZERO_OR_MORE || |
| parsedChar == PARSED_MODIFIER_RANGE_STOP || |
| parsedChar == PARSED_MODIFIER_RANGE_START; |
| } |
| |
| static boolean matchAdvancedPattern(int[] parsedPattern, String match) { |
| |
| // create indexes |
| int ip = 0, im = 0; |
| |
| // one-time length check |
| final int LP = parsedPattern.length, LM = match.length(); |
| |
| // The current character being analyzed in the pattern |
| int patternChar; |
| |
| int tokenType; |
| |
| int charSetStart = 0, charSetEnd = 0; |
| |
| while (ip < LP) { // we still have content in the pattern |
| |
| patternChar = parsedPattern[ip]; |
| // get the match type of the next verb |
| |
| switch (patternChar) { |
| case PARSED_TOKEN_CHAR_ANY: |
| tokenType = TOKEN_TYPE_ANY; |
| ip++; |
| break; |
| case PARSED_TOKEN_CHAR_SET_START: |
| case PARSED_TOKEN_CHAR_SET_INVERSE_START: |
| tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START |
| ? TOKEN_TYPE_SET |
| : TOKEN_TYPE_INVERSE_SET; |
| charSetStart = ip + 1; // start from the char after the set start |
| while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP); |
| charSetEnd = ip - 1; // we're on the set stop, end is the previous |
| ip++; // move the pointer to the next pattern entry |
| break; |
| default: |
| charSetStart = ip; |
| tokenType = TOKEN_TYPE_LITERAL; |
| ip++; |
| break; |
| } |
| |
| final int minRepetition; |
| final int maxRepetition; |
| |
| // look for a match length modifier |
| if (ip >= LP) { |
| minRepetition = maxRepetition = 1; |
| } else { |
| patternChar = parsedPattern[ip]; |
| switch (patternChar) { |
| case PARSED_MODIFIER_ZERO_OR_MORE: |
| minRepetition = 0; |
| maxRepetition = Integer.MAX_VALUE; |
| ip++; |
| break; |
| case PARSED_MODIFIER_ONE_OR_MORE: |
| minRepetition = 1; |
| maxRepetition = Integer.MAX_VALUE; |
| ip++; |
| break; |
| case PARSED_MODIFIER_RANGE_START: |
| minRepetition = parsedPattern[++ip]; |
| maxRepetition = parsedPattern[++ip]; |
| ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token |
| break; |
| default: |
| minRepetition = maxRepetition = 1; // implied literal |
| break; |
| } |
| } |
| if (minRepetition > maxRepetition) { |
| return false; |
| } |
| |
| // attempt to match as many characters as possible |
| int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition, |
| parsedPattern, charSetStart, charSetEnd); |
| |
| // if we found a conflict, return false immediately |
| if (matched == NO_MATCH) { |
| return false; |
| } |
| |
| // move the match pointer the number of characters matched |
| im += matched; |
| } |
| return ip >= LP && im >= LM; // have parsed entire string and regex |
| } |
| |
| private static int matchChars(String match, int im, final int lm, int tokenType, |
| int minRepetition, int maxRepetition, int[] parsedPattern, |
| int tokenStart, int tokenEnd) { |
| int matched = 0; |
| |
| while(matched < maxRepetition |
| && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart, |
| tokenEnd)) { |
| matched++; |
| } |
| |
| return matched < minRepetition ? NO_MATCH : matched; |
| } |
| |
| private static boolean matchChar(String match, int im, final int lm, int tokenType, |
| int[] parsedPattern, int tokenStart, int tokenEnd) { |
| if (im >= lm) { // we've overrun the string, no match |
| return false; |
| } |
| switch (tokenType) { |
| case TOKEN_TYPE_ANY: |
| return true; |
| case TOKEN_TYPE_SET: |
| for (int i = tokenStart; i < tokenEnd; i += 2) { |
| char matchChar = match.charAt(im); |
| if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { |
| return true; |
| } |
| } |
| return false; |
| case TOKEN_TYPE_INVERSE_SET: |
| for (int i = tokenStart; i < tokenEnd; i += 2) { |
| char matchChar = match.charAt(im); |
| if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { |
| return false; |
| } |
| } |
| return true; |
| case TOKEN_TYPE_LITERAL: |
| return match.charAt(im) == parsedPattern[tokenStart]; |
| default: |
| return false; |
| } |
| } |
| } |