auto import from //depot/cupcake/@135843

commit: 8e35f3cfc7fba1d1c829dc557ebad6409cbe16a2 [log] [tgz]
author: The Android Open Source Project <[email protected]> Tue Mar 03 19:30:52 2009 -0800
committer: The Android Open Source Project <[email protected]> Tue Mar 03 19:30:52 2009 -0800
tree: 11425ea0b299d6fb89c6d3618a22d97d5bf68d0f
parent: 648161bb0edfc3d43db63caed5cc5213bc6cb78f [diff] [blame]
diff --git a/JavaScriptCore/pcre/pcre_internal.h b/JavaScriptCore/pcre/pcre_internal.h
new file mode 100644
index 0000000..06c3e9d
--- /dev/null
+++ b/JavaScriptCore/pcre/pcre_internal.h

@@ -0,0 +1,423 @@
+/* This is JavaScriptCore's variant of the PCRE library. While this library
+started out as a copy of PCRE, many of the features of PCRE have been
+removed. This library now supports only the regular expression features
+required by the JavaScript language specification, and has only the functions
+needed by JavaScriptCore and the rest of WebKit.
+
+                 Originally written by Philip Hazel
+           Copyright (c) 1997-2006 University of Cambridge
+    Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This header contains definitions that are shared between the different
+modules, but which are not relevant to the exported API. This includes some
+functions whose names all begin with "_pcre_". */
+
+#ifndef PCRE_INTERNAL_H
+#define PCRE_INTERNAL_H
+
+/* Bit definitions for entries in the pcre_ctypes table. */
+
+#define ctype_space   0x01
+#define ctype_xdigit  0x08
+#define ctype_word    0x10   /* alphameric or '_' */
+
+/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
+of bits for a class map. Some classes are built by combining these tables. */
+
+#define cbit_space     0      /* \s */
+#define cbit_digit    32      /* \d */
+#define cbit_word     64      /* \w */
+#define cbit_length   96      /* Length of the cbits table */
+
+/* Offsets of the various tables from the base tables pointer, and
+total length. */
+
+#define lcc_offset      0
+#define fcc_offset    128
+#define cbits_offset  256
+#define ctypes_offset (cbits_offset + cbit_length)
+#define tables_length (ctypes_offset + 128)
+
+#ifndef DFTABLES
+
+// Change the following to 1 to dump used regular expressions at process exit time.
+#define REGEXP_HISTOGRAM 0
+
+#include "Assertions.h"
+
+#if COMPILER(MSVC)
+#pragma warning(disable: 4232)
+#pragma warning(disable: 4244)
+#endif
+
+#include "pcre.h"
+
+/* The value of LINK_SIZE determines the number of bytes used to store links as
+offsets within the compiled regex. The default is 2, which allows for compiled
+patterns up to 64K long. */
+
+#define LINK_SIZE   2
+
+/* Define DEBUG to get debugging output on stdout. */
+
+#if 0
+#define DEBUG
+#endif
+
+/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
+inline, and there are *still* stupid compilers about that don't like indented
+pre-processor statements, or at least there were when I first wrote this. After
+all, it had only been about 10 years then... */
+
+#ifdef DEBUG
+#define DPRINTF(p) printf p
+#else
+#define DPRINTF(p) /*nothing*/
+#endif
+
+/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored
+in big-endian order) by default. These are used, for example, to link from the
+start of a subpattern to its alternatives and its end. The use of 2 bytes per
+offset limits the size of the compiled regex to around 64K, which is big enough
+for almost everybody. However, I received a request for an even bigger limit.
+For this reason, and also to make the code easier to maintain, the storing and
+loading of offsets from the byte string is now handled by the functions that are
+defined here. */
+
+/* PCRE uses some other 2-byte quantities that do not change when the size of
+offsets changes. There are used for repeat counts and for other things such as
+capturing parenthesis numbers in back references. */
+
+static inline void put2ByteValue(unsigned char* opcodePtr, int value)
+{
+    ASSERT(value >= 0 && value <= 0xFFFF);
+    opcodePtr[0] = value >> 8;
+    opcodePtr[1] = value;
+}
+
+static inline int get2ByteValue(const unsigned char* opcodePtr)
+{
+    return (opcodePtr[0] << 8) | opcodePtr[1];
+}
+
+static inline void put2ByteValueAndAdvance(unsigned char*& opcodePtr, int value)
+{
+    put2ByteValue(opcodePtr, value);
+    opcodePtr += 2;
+}
+
+static inline void putLinkValueAllowZero(unsigned char* opcodePtr, int value)
+{
+    put2ByteValue(opcodePtr, value);
+}
+
+static inline int getLinkValueAllowZero(const unsigned char* opcodePtr)
+{
+    return get2ByteValue(opcodePtr);
+}
+
+#define MAX_PATTERN_SIZE (1 << 16)
+
+static inline void putLinkValue(unsigned char* opcodePtr, int value)
+{
+    ASSERT(value);
+    putLinkValueAllowZero(opcodePtr, value);
+}
+
+static inline int getLinkValue(const unsigned char* opcodePtr)
+{
+    int value = getLinkValueAllowZero(opcodePtr);
+    ASSERT(value);
+    return value;
+}
+
+static inline void putLinkValueAndAdvance(unsigned char*& opcodePtr, int value)
+{
+    putLinkValue(opcodePtr, value);
+    opcodePtr += LINK_SIZE;
+}
+
+static inline void putLinkValueAllowZeroAndAdvance(unsigned char*& opcodePtr, int value)
+{
+    putLinkValueAllowZero(opcodePtr, value);
+    opcodePtr += LINK_SIZE;
+}
+
+// FIXME: These are really more of a "compiled regexp state" than "regexp options"
+enum RegExpOptions {
+    UseFirstByteOptimizationOption = 0x40000000,  /* firstByte is set */
+    UseRequiredByteOptimizationOption = 0x20000000,  /* reqByte is set */
+    UseMultiLineFirstByteOptimizationOption = 0x10000000,  /* start after \n for multiline */
+    IsAnchoredOption = 0x02000000,  /* can't use partial with this regex */
+    IgnoreCaseOption = 0x00000001,
+    MatchAcrossMultipleLinesOption = 0x00000002
+};
+
+/* Flags added to firstByte or reqByte; a "non-literal" item is either a
+variable-length repeat, or a anything other than literal characters. */
+
+#define REQ_IGNORE_CASE 0x0100    /* indicates should ignore case */
+#define REQ_VARY     0x0200    /* reqByte followed non-literal item */
+
+/* Miscellaneous definitions */
+
+/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
+contain UTF-8 characters with values greater than 255. */
+
+#define XCL_NOT    0x01    /* Flag: this is a negative class */
+#define XCL_MAP    0x02    /* Flag: a 32-byte map is present */
+
+#define XCL_END       0    /* Marks end of individual items */
+#define XCL_SINGLE    1    /* Single item (one multibyte char) follows */
+#define XCL_RANGE     2    /* A range (two multibyte chars) follows */
+
+/* These are escaped items that aren't just an encoding of a particular data
+value such as \n. They must have non-zero values, as check_escape() returns
+their negation. Also, they must appear in the same order as in the opcode
+definitions below, up to ESC_w. The final one must be
+ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two
+tests in the code for an escape > ESC_b and <= ESC_w to
+detect the types that may be repeated. These are the types that consume
+characters. If any new escapes are put in between that don't consume a
+character, that code will have to change. */
+
+enum { ESC_B = 1, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_REF };
+
+/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
+that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
+OP_EOD must correspond in order to the list of escapes immediately above.
+Note that whenever this list is updated, the two macro definitions that follow
+must also be updated to match. */
+
+#define FOR_EACH_OPCODE(macro) \
+    macro(END) \
+    \
+    macro(NOT_WORD_BOUNDARY) \
+    macro(WORD_BOUNDARY) \
+    macro(NOT_DIGIT) \
+    macro(DIGIT) \
+    macro(NOT_WHITESPACE) \
+    macro(WHITESPACE) \
+    macro(NOT_WORDCHAR) \
+    macro(WORDCHAR) \
+    \
+    macro(NOT_NEWLINE) \
+    \
+    macro(CIRC) \
+    macro(DOLL) \
+    macro(BOL) \
+    macro(EOL) \
+    macro(CHAR) \
+    macro(CHAR_IGNORING_CASE) \
+    macro(ASCII_CHAR) \
+    macro(ASCII_LETTER_IGNORING_CASE) \
+    macro(NOT) \
+    \
+    macro(STAR) \
+    macro(MINSTAR) \
+    macro(PLUS) \
+    macro(MINPLUS) \
+    macro(QUERY) \
+    macro(MINQUERY) \
+    macro(UPTO) \
+    macro(MINUPTO) \
+    macro(EXACT) \
+    \
+    macro(NOTSTAR) \
+    macro(NOTMINSTAR) \
+    macro(NOTPLUS) \
+    macro(NOTMINPLUS) \
+    macro(NOTQUERY) \
+    macro(NOTMINQUERY) \
+    macro(NOTUPTO) \
+    macro(NOTMINUPTO) \
+    macro(NOTEXACT) \
+    \
+    macro(TYPESTAR) \
+    macro(TYPEMINSTAR) \
+    macro(TYPEPLUS) \
+    macro(TYPEMINPLUS) \
+    macro(TYPEQUERY) \
+    macro(TYPEMINQUERY) \
+    macro(TYPEUPTO) \
+    macro(TYPEMINUPTO) \
+    macro(TYPEEXACT) \
+    \
+    macro(CRSTAR) \
+    macro(CRMINSTAR) \
+    macro(CRPLUS) \
+    macro(CRMINPLUS) \
+    macro(CRQUERY) \
+    macro(CRMINQUERY) \
+    macro(CRRANGE) \
+    macro(CRMINRANGE) \
+    \
+    macro(CLASS) \
+    macro(NCLASS) \
+    macro(XCLASS) \
+    \
+    macro(REF) \
+    \
+    macro(ALT) \
+    macro(KET) \
+    macro(KETRMAX) \
+    macro(KETRMIN) \
+    \
+    macro(ASSERT) \
+    macro(ASSERT_NOT) \
+    \
+    macro(BRAZERO) \
+    macro(BRAMINZERO) \
+    macro(BRANUMBER) \
+    macro(BRA)
+
+#define OPCODE_ENUM_VALUE(opcode) OP_##opcode,
+enum { FOR_EACH_OPCODE(OPCODE_ENUM_VALUE) };
+
+/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and
+study.c that all opcodes are less than 128 in value. This makes handling UTF-8
+character sequences easier. */
+
+/* The highest extraction number before we have to start using additional
+bytes. (Originally PCRE didn't have support for extraction counts higher than
+this number.) The value is limited by the number of opcodes left after OP_BRA,
+i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional
+opcodes. */
+
+/* FIXME: Note that OP_BRA + 100 is > 128, so the two comments above
+are in conflict! */
+
+#define EXTRACT_BASIC_MAX  100
+
+/* The code vector runs on as long as necessary after the end. */
+
+struct JSRegExp {
+    unsigned options;
+
+    unsigned short topBracket;
+    unsigned short topBackref;
+    
+    unsigned short firstByte;
+    unsigned short reqByte;
+
+#if REGEXP_HISTOGRAM
+    size_t stringOffset;
+    size_t stringLength;
+#endif
+};
+
+/* Internal shared data tables. These are tables that are used by more than one
+ of the exported public functions. They have to be "external" in the C sense,
+ but are not part of the PCRE public API. The data for these tables is in the
+ pcre_tables.c module. */
+
+#define kjs_pcre_utf8_table1_size 6
+
+extern const int    kjs_pcre_utf8_table1[6];
+extern const int    kjs_pcre_utf8_table2[6];
+extern const int    kjs_pcre_utf8_table3[6];
+extern const unsigned char kjs_pcre_utf8_table4[0x40];
+
+extern const unsigned char kjs_pcre_default_tables[tables_length];
+
+static inline unsigned char toLowerCase(unsigned char c)
+{
+    static const unsigned char* lowerCaseChars = kjs_pcre_default_tables + lcc_offset;
+    return lowerCaseChars[c];
+}
+
+static inline unsigned char flipCase(unsigned char c)
+{
+    static const unsigned char* flippedCaseChars = kjs_pcre_default_tables + fcc_offset;
+    return flippedCaseChars[c];
+}
+
+static inline unsigned char classBitmapForChar(unsigned char c)
+{
+    static const unsigned char* charClassBitmaps = kjs_pcre_default_tables + cbits_offset;
+    return charClassBitmaps[c];
+}
+
+static inline unsigned char charTypeForChar(unsigned char c)
+{
+    const unsigned char* charTypeMap = kjs_pcre_default_tables + ctypes_offset;
+    return charTypeMap[c];
+}
+
+static inline bool isWordChar(UChar c)
+{
+    return c < 128 && (charTypeForChar(c) & ctype_word);
+}
+
+static inline bool isSpaceChar(UChar c)
+{
+    return (c < 128 && (charTypeForChar(c) & ctype_space)) || c == 0x00A0;
+}
+
+static inline bool isNewline(UChar nl)
+{
+    return (nl == 0xA || nl == 0xD || nl == 0x2028 || nl == 0x2029);
+}
+
+static inline bool isBracketStartOpcode(unsigned char opcode)
+{
+    if (opcode >= OP_BRA)
+        return true;
+    switch (opcode) {
+        case OP_ASSERT:
+        case OP_ASSERT_NOT:
+            return true;
+        default:
+            return false;
+    }
+}
+
+static inline void advanceToEndOfBracket(const unsigned char*& opcodePtr)
+{
+    ASSERT(isBracketStartOpcode(*opcodePtr) || *opcodePtr == OP_ALT);
+    do
+        opcodePtr += getLinkValue(opcodePtr + 1);
+    while (*opcodePtr == OP_ALT);
+}
+
+/* Internal shared functions. These are functions that are used in more
+that one of the source files. They have to have external linkage, but
+but are not part of the public API and so not exported from the library. */
+
+extern int kjs_pcre_ucp_othercase(unsigned);
+extern bool kjs_pcre_xclass(int, const unsigned char*);
+
+#endif
+
+#endif
+
+/* End of pcre_internal.h */
commit	8e35f3cfc7fba1d1c829dc557ebad6409cbe16a2	[log] [tgz]
author	The Android Open Source Project <[email protected]>	Tue Mar 03 19:30:52 2009 -0800
committer	The Android Open Source Project <[email protected]>	Tue Mar 03 19:30:52 2009 -0800
tree	11425ea0b299d6fb89c6d3618a22d97d5bf68d0f
parent	648161bb0edfc3d43db63caed5cc5213bc6cb78f [diff] [blame]