Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright © 2012 Google, Inc. |
| 3 | * |
| 4 | * This is part of HarfBuzz, a text shaping library. |
| 5 | * |
| 6 | * Permission is hereby granted, without written agreement and without |
| 7 | * license or royalty fees, to use, copy, modify, and distribute this |
| 8 | * software and its documentation for any purpose, provided that the |
| 9 | * above copyright notice and the following two paragraphs appear in |
| 10 | * all copies of this software. |
| 11 | * |
| 12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| 13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| 14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| 15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| 16 | * DAMAGE. |
| 17 | * |
| 18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| 19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| 21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| 22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| 23 | * |
| 24 | * Google Author(s): Behdad Esfahbod |
| 25 | */ |
| 26 | |
Behdad Esfahbod | c77ae40 | 2018-08-25 22:36:36 -0700 | [diff] [blame] | 27 | #ifndef HB_SET_DIGEST_HH |
| 28 | #define HB_SET_DIGEST_HH |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 29 | |
Behdad Esfahbod | c77ae40 | 2018-08-25 22:36:36 -0700 | [diff] [blame] | 30 | #include "hb.hh" |
Behdad Esfahbod | afa71ee | 2022-11-16 16:22:45 -0700 | [diff] [blame] | 31 | #include "hb-machinery.hh" |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 32 | |
| 33 | /* |
Behdad Esfahbod | 2a061cb | 2022-06-08 11:35:50 -0600 | [diff] [blame] | 34 | * The set-digests here implement various "filters" that support |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 35 | * "approximate member query". Conceptually these are like Bloom |
| 36 | * Filter and Quotient Filter, however, much smaller, faster, and |
| 37 | * designed to fit the requirements of our uses for glyph coverage |
| 38 | * queries. |
| 39 | * |
| 40 | * Our filters are highly accurate if the lookup covers fairly local |
| 41 | * set of glyphs, but fully flooded and ineffective if coverage is |
| 42 | * all over the place. |
| 43 | * |
Behdad Esfahbod | 2a061cb | 2022-06-08 11:35:50 -0600 | [diff] [blame] | 44 | * The way these are used is that the filter is first populated by |
| 45 | * a lookup's or subtable's Coverage table(s), and then when we |
| 46 | * want to apply the lookup or subtable to a glyph, before trying |
| 47 | * to apply, we ask the filter if the glyph may be covered. If it's |
Behdad Esfahbod | 859f7d4 | 2023-04-28 12:22:11 -0600 | [diff] [blame] | 48 | * not, we return early. We can also match a digest against another |
| 49 | * digest. |
Behdad Esfahbod | 2a061cb | 2022-06-08 11:35:50 -0600 | [diff] [blame] | 50 | * |
Behdad Esfahbod | 859f7d4 | 2023-04-28 12:22:11 -0600 | [diff] [blame] | 51 | * We use these filters at three levels: |
| 52 | * - If the digest for all the glyphs in the buffer as a whole |
| 53 | * does not match the digest for the lookup, skip the lookup. |
| 54 | * - For each glyph, if it doesn't match the lookup digest, |
| 55 | * skip it. |
| 56 | * - For each glyph, if it doesn't match the subtable digest, |
| 57 | * skip it. |
Behdad Esfahbod | 2a061cb | 2022-06-08 11:35:50 -0600 | [diff] [blame] | 58 | * |
Behdad Esfahbod | 6453737 | 2022-06-08 11:37:12 -0600 | [diff] [blame] | 59 | * The main filter we use is a combination of three bits-pattern |
Behdad Esfahbod | 6a1edb8 | 2022-06-08 11:38:17 -0600 | [diff] [blame] | 60 | * filters. A bits-pattern filter checks a number of bits (5 or 6) |
Behdad Esfahbod | 2a061cb | 2022-06-08 11:35:50 -0600 | [diff] [blame] | 61 | * of the input number (glyph-id in this case) and checks whether |
Behdad Esfahbod | d4ddb3a | 2022-06-08 11:45:14 -0600 | [diff] [blame] | 62 | * its pattern is amongst the patterns of any of the accepted values. |
| 63 | * The accepted patterns are represented as a "long" integer. The |
Behdad Esfahbod | 2a061cb | 2022-06-08 11:35:50 -0600 | [diff] [blame] | 64 | * check is done using four bitwise operations only. |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 65 | */ |
| 66 | |
| 67 | template <typename mask_t, unsigned int shift> |
Behdad Esfahbod | 6453737 | 2022-06-08 11:37:12 -0600 | [diff] [blame] | 68 | struct hb_set_digest_bits_pattern_t |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 69 | { |
Behdad Esfahbod | 70a52d6 | 2019-01-22 12:15:23 +0100 | [diff] [blame] | 70 | static constexpr unsigned mask_bytes = sizeof (mask_t); |
| 71 | static constexpr unsigned mask_bits = sizeof (mask_t) * 8; |
Behdad Esfahbod | f398097 | 2019-01-25 16:08:25 +0100 | [diff] [blame] | 72 | static constexpr unsigned num_bits = 0 |
| 73 | + (mask_bytes >= 1 ? 3 : 0) |
| 74 | + (mask_bytes >= 2 ? 1 : 0) |
| 75 | + (mask_bytes >= 4 ? 1 : 0) |
| 76 | + (mask_bytes >= 8 ? 1 : 0) |
| 77 | + (mask_bytes >= 16? 1 : 0) |
| 78 | + 0; |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 79 | |
Behdad Esfahbod | 606bf57 | 2018-09-16 19:33:48 +0200 | [diff] [blame] | 80 | static_assert ((shift < sizeof (hb_codepoint_t) * 8), ""); |
| 81 | static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), ""); |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 82 | |
Ebrahim Byagowi | e412008 | 2018-12-17 21:31:01 +0330 | [diff] [blame] | 83 | void init () { mask = 0; } |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 84 | |
Behdad Esfahbod | 23e4a3c | 2024-05-12 10:49:46 -0600 | [diff] [blame] | 85 | static hb_set_digest_bits_pattern_t full () { hb_set_digest_bits_pattern_t d; d.mask = (mask_t) -1; return d; } |
| 86 | |
Behdad Esfahbod | e2ab6c7 | 2024-05-12 15:25:13 -0600 | [diff] [blame] | 87 | void union_ (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; } |
Behdad Esfahbod | a053b84 | 2022-11-16 14:39:25 -0700 | [diff] [blame] | 88 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 89 | void add (hb_codepoint_t g) { mask |= mask_for (g); } |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 90 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 91 | bool add_range (hb_codepoint_t a, hb_codepoint_t b) |
| 92 | { |
Behdad Esfahbod | e8948d6 | 2023-07-02 15:35:18 -0600 | [diff] [blame] | 93 | if (mask == (mask_t) -1) return false; |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 94 | if ((b >> shift) - (a >> shift) >= mask_bits - 1) |
Behdad Esfahbod | e8948d6 | 2023-07-02 15:35:18 -0600 | [diff] [blame] | 95 | { |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 96 | mask = (mask_t) -1; |
Behdad Esfahbod | e8948d6 | 2023-07-02 15:35:18 -0600 | [diff] [blame] | 97 | return false; |
| 98 | } |
| 99 | else |
| 100 | { |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 101 | mask_t ma = mask_for (a); |
| 102 | mask_t mb = mask_for (b); |
| 103 | mask |= mb + (mb - ma) - (mb < ma); |
Behdad Esfahbod | e8948d6 | 2023-07-02 15:35:18 -0600 | [diff] [blame] | 104 | return true; |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 105 | } |
| 106 | } |
Behdad Esfahbod | 5d02572 | 2017-12-14 19:33:55 -0800 | [diff] [blame] | 107 | |
Behdad Esfahbod | 0fe62c1 | 2017-12-13 13:12:20 -0800 | [diff] [blame] | 108 | template <typename T> |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 109 | void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
Behdad Esfahbod | 0fe62c1 | 2017-12-13 13:12:20 -0800 | [diff] [blame] | 110 | { |
| 111 | for (unsigned int i = 0; i < count; i++) |
Behdad Esfahbod | 5d02572 | 2017-12-14 19:33:55 -0800 | [diff] [blame] | 112 | { |
| 113 | add (*array); |
Behdad Esfahbod | afa71ee | 2022-11-16 16:22:45 -0700 | [diff] [blame] | 114 | array = &StructAtOffsetUnaligned<T> ((const void *) array, stride); |
Behdad Esfahbod | 5d02572 | 2017-12-14 19:33:55 -0800 | [diff] [blame] | 115 | } |
| 116 | } |
| 117 | template <typename T> |
Behdad Esfahbod | d3a2f99 | 2021-04-02 08:32:41 -0600 | [diff] [blame] | 118 | void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } |
| 119 | template <typename T> |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 120 | bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
Behdad Esfahbod | 5d02572 | 2017-12-14 19:33:55 -0800 | [diff] [blame] | 121 | { |
Behdad Esfahbod | 95b9763 | 2022-11-16 14:15:01 -0700 | [diff] [blame] | 122 | add_array (array, count, stride); |
Behdad Esfahbod | 5d02572 | 2017-12-14 19:33:55 -0800 | [diff] [blame] | 123 | return true; |
Behdad Esfahbod | 0fe62c1 | 2017-12-13 13:12:20 -0800 | [diff] [blame] | 124 | } |
Behdad Esfahbod | d3a2f99 | 2021-04-02 08:32:41 -0600 | [diff] [blame] | 125 | template <typename T> |
| 126 | bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 127 | |
Behdad Esfahbod | 15b6c32 | 2022-11-16 15:59:13 -0700 | [diff] [blame] | 128 | bool may_have (const hb_set_digest_bits_pattern_t &o) const |
| 129 | { return mask & o.mask; } |
| 130 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 131 | bool may_have (hb_codepoint_t g) const |
Behdad Esfahbod | b6fed6f | 2022-05-29 06:33:34 -0600 | [diff] [blame] | 132 | { return mask & mask_for (g); } |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 133 | |
Behdad Esfahbod | 5158255 | 2024-05-11 09:25:22 -0600 | [diff] [blame] | 134 | bool operator [] (hb_codepoint_t g) const |
| 135 | { return may_have (g); } |
| 136 | |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 137 | private: |
| 138 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 139 | static mask_t mask_for (hb_codepoint_t g) |
| 140 | { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } |
Behdad Esfahbod | 5158255 | 2024-05-11 09:25:22 -0600 | [diff] [blame] | 141 | mask_t mask = 0; |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 142 | }; |
| 143 | |
| 144 | template <typename head_t, typename tail_t> |
| 145 | struct hb_set_digest_combiner_t |
| 146 | { |
Ebrahim Byagowi | e412008 | 2018-12-17 21:31:01 +0330 | [diff] [blame] | 147 | void init () |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 148 | { |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 149 | head.init (); |
| 150 | tail.init (); |
| 151 | } |
| 152 | |
Behdad Esfahbod | 23e4a3c | 2024-05-12 10:49:46 -0600 | [diff] [blame] | 153 | static hb_set_digest_combiner_t full () { hb_set_digest_combiner_t d; d.head = head_t::full(); d.tail = tail_t::full (); return d; } |
| 154 | |
Behdad Esfahbod | e2ab6c7 | 2024-05-12 15:25:13 -0600 | [diff] [blame] | 155 | void union_ (const hb_set_digest_combiner_t &o) |
Behdad Esfahbod | a053b84 | 2022-11-16 14:39:25 -0700 | [diff] [blame] | 156 | { |
Behdad Esfahbod | e2ab6c7 | 2024-05-12 15:25:13 -0600 | [diff] [blame] | 157 | head.union_ (o.head); |
| 158 | tail.union_(o.tail); |
Behdad Esfahbod | a053b84 | 2022-11-16 14:39:25 -0700 | [diff] [blame] | 159 | } |
| 160 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 161 | void add (hb_codepoint_t g) |
| 162 | { |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 163 | head.add (g); |
| 164 | tail.add (g); |
| 165 | } |
| 166 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 167 | bool add_range (hb_codepoint_t a, hb_codepoint_t b) |
| 168 | { |
Behdad Esfahbod | cb73ba7 | 2023-07-02 15:27:26 -0600 | [diff] [blame] | 169 | return (int) head.add_range (a, b) | (int) tail.add_range (a, b); |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 170 | } |
Behdad Esfahbod | 0fe62c1 | 2017-12-13 13:12:20 -0800 | [diff] [blame] | 171 | template <typename T> |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 172 | void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
Behdad Esfahbod | 0fe62c1 | 2017-12-13 13:12:20 -0800 | [diff] [blame] | 173 | { |
Behdad Esfahbod | 5d02572 | 2017-12-14 19:33:55 -0800 | [diff] [blame] | 174 | head.add_array (array, count, stride); |
| 175 | tail.add_array (array, count, stride); |
| 176 | } |
| 177 | template <typename T> |
Behdad Esfahbod | d3a2f99 | 2021-04-02 08:32:41 -0600 | [diff] [blame] | 178 | void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } |
| 179 | template <typename T> |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 180 | bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
Behdad Esfahbod | 5d02572 | 2017-12-14 19:33:55 -0800 | [diff] [blame] | 181 | { |
Behdad Esfahbod | 20654cd | 2022-11-16 14:15:58 -0700 | [diff] [blame] | 182 | return head.add_sorted_array (array, count, stride) && |
| 183 | tail.add_sorted_array (array, count, stride); |
Behdad Esfahbod | 0fe62c1 | 2017-12-13 13:12:20 -0800 | [diff] [blame] | 184 | } |
Behdad Esfahbod | d3a2f99 | 2021-04-02 08:32:41 -0600 | [diff] [blame] | 185 | template <typename T> |
| 186 | bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 187 | |
Behdad Esfahbod | 15b6c32 | 2022-11-16 15:59:13 -0700 | [diff] [blame] | 188 | bool may_have (const hb_set_digest_combiner_t &o) const |
| 189 | { |
| 190 | return head.may_have (o.head) && tail.may_have (o.tail); |
| 191 | } |
| 192 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 193 | bool may_have (hb_codepoint_t g) const |
| 194 | { |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 195 | return head.may_have (g) && tail.may_have (g); |
| 196 | } |
| 197 | |
Behdad Esfahbod | 5158255 | 2024-05-11 09:25:22 -0600 | [diff] [blame] | 198 | bool operator [] (hb_codepoint_t g) const |
| 199 | { return may_have (g); } |
| 200 | |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 201 | private: |
| 202 | head_t head; |
| 203 | tail_t tail; |
| 204 | }; |
| 205 | |
| 206 | |
| 207 | /* |
| 208 | * hb_set_digest_t |
| 209 | * |
| 210 | * This is a combination of digests that performs "best". |
| 211 | * There is not much science to this: it's a result of intuition |
| 212 | * and testing. |
| 213 | */ |
Behdad Esfahbod | 3b2929e | 2021-09-21 12:21:02 -0600 | [diff] [blame] | 214 | using hb_set_digest_t = |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 215 | hb_set_digest_combiner_t |
| 216 | < |
Behdad Esfahbod | 6453737 | 2022-06-08 11:37:12 -0600 | [diff] [blame] | 217 | hb_set_digest_bits_pattern_t<unsigned long, 4>, |
Behdad Esfahbod | 3b2929e | 2021-09-21 12:21:02 -0600 | [diff] [blame] | 218 | hb_set_digest_combiner_t |
| 219 | < |
Behdad Esfahbod | 6453737 | 2022-06-08 11:37:12 -0600 | [diff] [blame] | 220 | hb_set_digest_bits_pattern_t<unsigned long, 0>, |
| 221 | hb_set_digest_bits_pattern_t<unsigned long, 9> |
Behdad Esfahbod | 3b2929e | 2021-09-21 12:21:02 -0600 | [diff] [blame] | 222 | > |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 223 | > |
Behdad Esfahbod | 3b2929e | 2021-09-21 12:21:02 -0600 | [diff] [blame] | 224 | ; |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 225 | |
| 226 | |
Behdad Esfahbod | c77ae40 | 2018-08-25 22:36:36 -0700 | [diff] [blame] | 227 | #endif /* HB_SET_DIGEST_HH */ |