Behdad Esfahbod | 7f19ae7 | 2012-09-25 11:22:28 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright © 2011,2012 Google, Inc. |
| 3 | * |
| 4 | * This is part of HarfBuzz, a text shaping library. |
| 5 | * |
| 6 | * Permission is hereby granted, without written agreement and without |
| 7 | * license or royalty fees, to use, copy, modify, and distribute this |
| 8 | * software and its documentation for any purpose, provided that the |
| 9 | * above copyright notice and the following two paragraphs appear in |
| 10 | * all copies of this software. |
| 11 | * |
| 12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| 13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| 14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| 15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| 16 | * DAMAGE. |
| 17 | * |
| 18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| 19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| 21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| 22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| 23 | * |
| 24 | * Google Author(s): Behdad Esfahbod |
| 25 | */ |
| 26 | |
| 27 | #ifndef HB_UTF_PRIVATE_HH |
| 28 | #define HB_UTF_PRIVATE_HH |
| 29 | |
| 30 | #include "hb-private.hh" |
| 31 | |
| 32 | |
| 33 | /* UTF-8 */ |
| 34 | |
| 35 | #define HB_UTF8_COMPUTE(Char, Mask, Len) \ |
| 36 | if (Char < 128) { Len = 1; Mask = 0x7f; } \ |
| 37 | else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \ |
| 38 | else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \ |
| 39 | else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \ |
| 40 | else Len = 0; |
| 41 | |
| 42 | static inline const uint8_t * |
| 43 | hb_utf_next (const uint8_t *text, |
| 44 | const uint8_t *end, |
| 45 | hb_codepoint_t *unicode) |
| 46 | { |
Behdad Esfahbod | 70ea4ac | 2012-09-25 12:30:16 -0400 | [diff] [blame] | 47 | hb_codepoint_t c = *text, mask; |
| 48 | unsigned int len; |
Behdad Esfahbod | 7f19ae7 | 2012-09-25 11:22:28 -0400 | [diff] [blame] | 49 | |
| 50 | /* TODO check for overlong sequences? */ |
| 51 | |
| 52 | HB_UTF8_COMPUTE (c, mask, len); |
| 53 | if (unlikely (!len || (unsigned int) (end - text) < len)) { |
| 54 | *unicode = -1; |
| 55 | return text + 1; |
| 56 | } else { |
| 57 | hb_codepoint_t result; |
| 58 | unsigned int i; |
| 59 | result = c & mask; |
| 60 | for (i = 1; i < len; i++) |
| 61 | { |
| 62 | if (unlikely ((text[i] & 0xc0) != 0x80)) |
| 63 | { |
| 64 | *unicode = -1; |
| 65 | return text + 1; |
| 66 | } |
| 67 | result <<= 6; |
| 68 | result |= (text[i] & 0x3f); |
| 69 | } |
| 70 | *unicode = result; |
| 71 | return text + len; |
| 72 | } |
| 73 | } |
| 74 | |
Behdad Esfahbod | 89ac39d | 2012-09-25 13:59:24 -0400 | [diff] [blame] | 75 | static inline const uint8_t * |
| 76 | hb_utf_prev (const uint8_t *text, |
| 77 | const uint8_t *start, |
| 78 | hb_codepoint_t *unicode) |
| 79 | { |
Behdad Esfahbod | e13f8d2 | 2012-11-13 15:12:06 -0800 | [diff] [blame] | 80 | const uint8_t *end = text--; |
| 81 | while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) |
Behdad Esfahbod | 89ac39d | 2012-09-25 13:59:24 -0400 | [diff] [blame] | 82 | text--; |
| 83 | |
| 84 | hb_codepoint_t c = *text, mask; |
| 85 | unsigned int len; |
| 86 | |
| 87 | /* TODO check for overlong sequences? */ |
| 88 | |
| 89 | HB_UTF8_COMPUTE (c, mask, len); |
| 90 | if (unlikely (!len || (unsigned int) (end - text) != len)) { |
| 91 | *unicode = -1; |
| 92 | return end - 1; |
| 93 | } else { |
| 94 | hb_codepoint_t result; |
| 95 | unsigned int i; |
| 96 | result = c & mask; |
| 97 | for (i = 1; i < len; i++) |
| 98 | { |
| 99 | result <<= 6; |
| 100 | result |= (text[i] & 0x3f); |
| 101 | } |
| 102 | *unicode = result; |
| 103 | return text; |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | |
Behdad Esfahbod | 1f66c3c | 2012-09-25 11:42:16 -0400 | [diff] [blame] | 108 | static inline unsigned int |
| 109 | hb_utf_strlen (const uint8_t *text) |
| 110 | { |
| 111 | return strlen ((const char *) text); |
| 112 | } |
| 113 | |
Behdad Esfahbod | 7f19ae7 | 2012-09-25 11:22:28 -0400 | [diff] [blame] | 114 | |
| 115 | /* UTF-16 */ |
| 116 | |
| 117 | static inline const uint16_t * |
| 118 | hb_utf_next (const uint16_t *text, |
| 119 | const uint16_t *end, |
| 120 | hb_codepoint_t *unicode) |
| 121 | { |
Behdad Esfahbod | 4445e5e | 2012-09-25 12:26:12 -0400 | [diff] [blame] | 122 | hb_codepoint_t c = *text++; |
Behdad Esfahbod | 7f19ae7 | 2012-09-25 11:22:28 -0400 | [diff] [blame] | 123 | |
Behdad Esfahbod | 4445e5e | 2012-09-25 12:26:12 -0400 | [diff] [blame] | 124 | if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xd800, 0xdbff))) |
| 125 | { |
Behdad Esfahbod | 7f19ae7 | 2012-09-25 11:22:28 -0400 | [diff] [blame] | 126 | /* high surrogate */ |
Behdad Esfahbod | 4445e5e | 2012-09-25 12:26:12 -0400 | [diff] [blame] | 127 | hb_codepoint_t l; |
| 128 | if (text < end && ((l = *text), likely (hb_in_range<hb_codepoint_t> (l, 0xdc00, 0xdfff)))) |
| 129 | { |
Behdad Esfahbod | 7f19ae7 | 2012-09-25 11:22:28 -0400 | [diff] [blame] | 130 | /* low surrogate */ |
Behdad Esfahbod | 4445e5e | 2012-09-25 12:26:12 -0400 | [diff] [blame] | 131 | *unicode = (c << 10) + l - ((0xd800 << 10) - 0x10000 + 0xdc00); |
Behdad Esfahbod | 7f19ae7 | 2012-09-25 11:22:28 -0400 | [diff] [blame] | 132 | text++; |
| 133 | } else |
| 134 | *unicode = -1; |
| 135 | } else |
| 136 | *unicode = c; |
| 137 | |
| 138 | return text; |
| 139 | } |
| 140 | |
Behdad Esfahbod | 89ac39d | 2012-09-25 13:59:24 -0400 | [diff] [blame] | 141 | static inline const uint16_t * |
| 142 | hb_utf_prev (const uint16_t *text, |
| 143 | const uint16_t *start, |
| 144 | hb_codepoint_t *unicode) |
| 145 | { |
| 146 | hb_codepoint_t c = *--text; |
| 147 | |
| 148 | if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xdc00, 0xdfff))) |
| 149 | { |
| 150 | /* low surrogate */ |
| 151 | hb_codepoint_t h; |
| 152 | if (start < text && ((h = *(text - 1)), likely (hb_in_range<hb_codepoint_t> (h, 0xd800, 0xdbff)))) |
| 153 | { |
| 154 | /* high surrogate */ |
| 155 | *unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00); |
| 156 | text--; |
| 157 | } else |
| 158 | *unicode = -1; |
| 159 | } else |
| 160 | *unicode = c; |
| 161 | |
| 162 | return text; |
| 163 | } |
| 164 | |
| 165 | |
Behdad Esfahbod | 1f66c3c | 2012-09-25 11:42:16 -0400 | [diff] [blame] | 166 | static inline unsigned int |
| 167 | hb_utf_strlen (const uint16_t *text) |
| 168 | { |
| 169 | unsigned int l = 0; |
| 170 | while (*text++) l++; |
| 171 | return l; |
| 172 | } |
| 173 | |
Behdad Esfahbod | 7f19ae7 | 2012-09-25 11:22:28 -0400 | [diff] [blame] | 174 | |
| 175 | /* UTF-32 */ |
| 176 | |
| 177 | static inline const uint32_t * |
| 178 | hb_utf_next (const uint32_t *text, |
Behdad Esfahbod | 0beb66e | 2012-12-05 18:46:04 -0500 | [diff] [blame] | 179 | const uint32_t *end HB_UNUSED, |
Behdad Esfahbod | 7f19ae7 | 2012-09-25 11:22:28 -0400 | [diff] [blame] | 180 | hb_codepoint_t *unicode) |
| 181 | { |
Behdad Esfahbod | 89ac39d | 2012-09-25 13:59:24 -0400 | [diff] [blame] | 182 | *unicode = *text++; |
| 183 | return text; |
| 184 | } |
| 185 | |
| 186 | static inline const uint32_t * |
| 187 | hb_utf_prev (const uint32_t *text, |
Behdad Esfahbod | 0beb66e | 2012-12-05 18:46:04 -0500 | [diff] [blame] | 188 | const uint32_t *start HB_UNUSED, |
Behdad Esfahbod | 89ac39d | 2012-09-25 13:59:24 -0400 | [diff] [blame] | 189 | hb_codepoint_t *unicode) |
| 190 | { |
| 191 | *unicode = *--text; |
| 192 | return text; |
Behdad Esfahbod | 7f19ae7 | 2012-09-25 11:22:28 -0400 | [diff] [blame] | 193 | } |
| 194 | |
Behdad Esfahbod | 1f66c3c | 2012-09-25 11:42:16 -0400 | [diff] [blame] | 195 | static inline unsigned int |
| 196 | hb_utf_strlen (const uint32_t *text) |
| 197 | { |
| 198 | unsigned int l = 0; |
| 199 | while (*text++) l++; |
| 200 | return l; |
| 201 | } |
| 202 | |
Behdad Esfahbod | 7f19ae7 | 2012-09-25 11:22:28 -0400 | [diff] [blame] | 203 | |
| 204 | #endif /* HB_UTF_PRIVATE_HH */ |