blob: 59e8f4559f5921e1ccaf766c421bbb0cc0f58df2 [file] [log] [blame]
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -04001/*
2 * Copyright © 2012 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_SET_PRIVATE_HH
28#define HB_SET_PRIVATE_HH
29
30#include "hb-private.hh"
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -040031#include "hb-object-private.hh"
32
33
Behdad Esfahbod0edd0fd2013-04-17 17:26:56 -040034/*
35 * The set digests here implement various "filters" that support
36 * "approximate member query". Conceptually these are like Bloom
37 * Filter and Quotient Filter, however, much smaller, faster, and
38 * designed to fit the requirements of our uses for glyph coverage
39 * queries. As a result, our filters have much higher.
40 */
41
Behdad Esfahbodc7851ef2013-04-17 17:45:39 -040042template <typename mask_t, unsigned int shift>
Behdad Esfahbod60a30352012-08-01 21:06:27 -040043struct hb_set_digest_lowest_bits_t
44{
45 ASSERT_POD ();
46
Behdad Esfahbode6f19af2013-05-02 13:59:46 -040047 static const unsigned int mask_bytes = sizeof (mask_t);
48 static const unsigned int mask_bits = sizeof (mask_t) * 8;
Behdad Esfahbodc7851ef2013-04-17 17:45:39 -040049 static const unsigned int num_bits = 0
Behdad Esfahbode6f19af2013-05-02 13:59:46 -040050 + (mask_bytes >= 1 ? 3 : 0)
51 + (mask_bytes >= 2 ? 1 : 0)
52 + (mask_bytes >= 4 ? 1 : 0)
53 + (mask_bytes >= 8 ? 1 : 0)
54 + (mask_bytes >= 16? 1 : 0)
Behdad Esfahbodc7851ef2013-04-17 17:45:39 -040055 + 0;
56
57 ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8);
58 ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8);
Behdad Esfahbod60a30352012-08-01 21:06:27 -040059
60 inline void init (void) {
61 mask = 0;
62 }
63
64 inline void add (hb_codepoint_t g) {
65 mask |= mask_for (g);
66 }
67
68 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
Behdad Esfahbode6f19af2013-05-02 13:59:46 -040069 if ((b >> shift) - (a >> shift) >= mask_bits - 1)
Behdad Esfahbod048e3b52012-08-04 18:04:57 -070070 mask = (mask_t) -1;
71 else {
72 mask_t ma = mask_for (a);
73 mask_t mb = mask_for (b);
74 mask |= mb + (mb - ma) - (mb < ma);
75 }
Behdad Esfahbod60a30352012-08-01 21:06:27 -040076 }
77
78 inline bool may_have (hb_codepoint_t g) const {
79 return !!(mask & mask_for (g));
80 }
81
82 private:
83
Behdad Esfahbodf9a61102013-04-17 19:01:49 -040084 static inline mask_t mask_for (hb_codepoint_t g) {
Behdad Esfahbode6f19af2013-05-02 13:59:46 -040085 return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
Behdad Esfahbodc7851ef2013-04-17 17:45:39 -040086 }
Behdad Esfahbod60a30352012-08-01 21:06:27 -040087 mask_t mask;
88};
89
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -040090template <typename head_t, typename tail_t>
91struct hb_set_digest_combiner_t
Behdad Esfahbod60a30352012-08-01 21:06:27 -040092{
93 ASSERT_POD ();
94
95 inline void init (void) {
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -040096 head.init ();
97 tail.init ();
Behdad Esfahbod60a30352012-08-01 21:06:27 -040098 }
99
100 inline void add (hb_codepoint_t g) {
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -0400101 head.add (g);
102 tail.add (g);
Behdad Esfahbod60a30352012-08-01 21:06:27 -0400103 }
104
105 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -0400106 head.add_range (a, b);
107 tail.add_range (a, b);
Behdad Esfahbod60a30352012-08-01 21:06:27 -0400108 }
109
110 inline bool may_have (hb_codepoint_t g) const {
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -0400111 return head.may_have (g) && tail.may_have (g);
Behdad Esfahbod60a30352012-08-01 21:06:27 -0400112 }
113
114 private:
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -0400115 head_t head;
116 tail_t tail;
Behdad Esfahbod60a30352012-08-01 21:06:27 -0400117};
118
Behdad Esfahbod0d5798a2013-04-17 18:19:21 -0400119
120/*
121 * hb_set_digest_t
122 *
123 * This is a combination of digests that performs "best".
124 * There is not much science to this: it's a result of intuition
125 * and testing.
126 */
127typedef hb_set_digest_combiner_t
128<
129 hb_set_digest_lowest_bits_t<unsigned long, 4>,
130 hb_set_digest_combiner_t
131 <
132 hb_set_digest_lowest_bits_t<unsigned long, 0>,
133 hb_set_digest_lowest_bits_t<unsigned long, 9>
134 >
135> hb_set_digest_t;
136
137
138
139/*
140 * hb_set_t
141 */
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -0400142
Behdad Esfahbod60a30352012-08-01 21:06:27 -0400143
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400144/* TODO Make this faster and memmory efficient. */
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400145
Behdad Esfahbod1bc1cb32012-06-16 15:21:55 -0400146struct hb_set_t
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400147{
Behdad Esfahbod6220e5f2012-06-06 03:30:09 -0400148 hb_object_header_t header;
149 ASSERT_POD ();
Behdad Esfahbod8165f272013-01-02 22:50:36 -0600150 bool in_error;
Behdad Esfahbod6220e5f2012-06-06 03:30:09 -0400151
Behdad Esfahbod1827dc22012-04-24 16:56:37 -0400152 inline void init (void) {
Behdad Esfahbodcd7ea4f2014-08-14 12:57:02 -0400153 hb_object_init (this);
Behdad Esfahbod1827dc22012-04-24 16:56:37 -0400154 clear ();
155 }
Behdad Esfahboda5e39fe2012-04-25 00:14:46 -0400156 inline void fini (void) {
157 }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400158 inline void clear (void) {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600159 if (unlikely (hb_object_is_inert (this)))
160 return;
161 in_error = false;
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400162 memset (elts, 0, sizeof elts);
163 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800164 inline bool is_empty (void) const {
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400165 for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++)
166 if (elts[i])
167 return false;
168 return true;
169 }
Behdad Esfahbod5caece62012-04-23 23:03:12 -0400170 inline void add (hb_codepoint_t g)
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400171 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600172 if (unlikely (in_error)) return;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400173 if (unlikely (g == INVALID)) return;
Behdad Esfahbod5caece62012-04-23 23:03:12 -0400174 if (unlikely (g > MAX_G)) return;
175 elt (g) |= mask (g);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400176 }
Behdad Esfahbod67bb9e82012-06-09 02:02:46 -0400177 inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
178 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600179 if (unlikely (in_error)) return;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800180 /* TODO Speedup */
Behdad Esfahbod67bb9e82012-06-09 02:02:46 -0400181 for (unsigned int i = a; i < b + 1; i++)
182 add (i);
183 }
Behdad Esfahbod5caece62012-04-23 23:03:12 -0400184 inline void del (hb_codepoint_t g)
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400185 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600186 if (unlikely (in_error)) return;
Behdad Esfahbod5caece62012-04-23 23:03:12 -0400187 if (unlikely (g > MAX_G)) return;
188 elt (g) &= ~mask (g);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400189 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800190 inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
191 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600192 if (unlikely (in_error)) return;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800193 /* TODO Speedup */
194 for (unsigned int i = a; i < b + 1; i++)
195 del (i);
196 }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400197 inline bool has (hb_codepoint_t g) const
198 {
199 if (unlikely (g > MAX_G)) return false;
200 return !!(elt (g) & mask (g));
201 }
202 inline bool intersects (hb_codepoint_t first,
203 hb_codepoint_t last) const
204 {
205 if (unlikely (first > MAX_G)) return false;
206 if (unlikely (last > MAX_G)) last = MAX_G;
207 unsigned int end = last + 1;
208 for (hb_codepoint_t i = first; i < end; i++)
209 if (has (i))
210 return true;
211 return false;
212 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800213 inline bool is_equal (const hb_set_t *other) const
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400214 {
215 for (unsigned int i = 0; i < ELTS; i++)
216 if (elts[i] != other->elts[i])
217 return false;
218 return true;
219 }
220 inline void set (const hb_set_t *other)
221 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600222 if (unlikely (in_error)) return;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400223 for (unsigned int i = 0; i < ELTS; i++)
224 elts[i] = other->elts[i];
225 }
226 inline void union_ (const hb_set_t *other)
227 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600228 if (unlikely (in_error)) return;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400229 for (unsigned int i = 0; i < ELTS; i++)
230 elts[i] |= other->elts[i];
231 }
232 inline void intersect (const hb_set_t *other)
233 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600234 if (unlikely (in_error)) return;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400235 for (unsigned int i = 0; i < ELTS; i++)
236 elts[i] &= other->elts[i];
237 }
238 inline void subtract (const hb_set_t *other)
239 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600240 if (unlikely (in_error)) return;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400241 for (unsigned int i = 0; i < ELTS; i++)
242 elts[i] &= ~other->elts[i];
243 }
Behdad Esfahbod62c3e112012-05-25 13:48:00 -0400244 inline void symmetric_difference (const hb_set_t *other)
245 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600246 if (unlikely (in_error)) return;
Behdad Esfahbod62c3e112012-05-25 13:48:00 -0400247 for (unsigned int i = 0; i < ELTS; i++)
248 elts[i] ^= other->elts[i];
249 }
Behdad Esfahbod8165f272013-01-02 22:50:36 -0600250 inline void invert (void)
251 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600252 if (unlikely (in_error)) return;
Behdad Esfahbod8165f272013-01-02 22:50:36 -0600253 for (unsigned int i = 0; i < ELTS; i++)
254 elts[i] = ~elts[i];
255 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800256 inline bool next (hb_codepoint_t *codepoint) const
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400257 {
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400258 if (unlikely (*codepoint == INVALID)) {
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400259 hb_codepoint_t i = get_min ();
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400260 if (i != INVALID) {
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400261 *codepoint = i;
262 return true;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400263 } else {
264 *codepoint = INVALID;
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400265 return false;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400266 }
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400267 }
268 for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++)
269 if (has (i)) {
270 *codepoint = i;
271 return true;
272 }
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400273 *codepoint = INVALID;
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400274 return false;
275 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800276 inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
277 {
278 hb_codepoint_t i;
279
280 i = *last;
281 if (!next (&i))
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400282 {
283 *last = *first = INVALID;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800284 return false;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400285 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800286
287 *last = *first = i;
288 while (next (&i) && i == *last + 1)
289 (*last)++;
290
291 return true;
292 }
293
294 inline unsigned int get_population (void) const
295 {
296 unsigned int count = 0;
297 for (unsigned int i = 0; i < ELTS; i++)
298 count += _hb_popcount32 (elts[i]);
299 return count;
300 }
Behdad Esfahbodf039e792012-05-17 20:55:12 -0400301 inline hb_codepoint_t get_min (void) const
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400302 {
303 for (unsigned int i = 0; i < ELTS; i++)
304 if (elts[i])
Behdad Esfahbodfa3d0a02013-05-14 15:30:55 -0400305 for (unsigned int j = 0; j < BITS; j++)
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400306 if (elts[i] & (1 << j))
307 return i * BITS + j;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400308 return INVALID;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400309 }
Behdad Esfahbodf039e792012-05-17 20:55:12 -0400310 inline hb_codepoint_t get_max (void) const
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400311 {
312 for (unsigned int i = ELTS; i; i--)
313 if (elts[i - 1])
314 for (unsigned int j = BITS; j; j--)
315 if (elts[i - 1] & (1 << (j - 1)))
316 return (i - 1) * BITS + (j - 1);
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400317 return INVALID;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400318 }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400319
320 typedef uint32_t elt_t;
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400321 static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400322 static const unsigned int SHIFT = 5;
323 static const unsigned int BITS = (1 << SHIFT);
324 static const unsigned int MASK = BITS - 1;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400325 static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400326 static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400327
328 elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; }
329 elt_t elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
330 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
331
Behdad Esfahbodb5fa37c2012-05-10 23:09:48 +0200332 elt_t elts[ELTS]; /* XXX 8kb */
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400333
334 ASSERT_STATIC (sizeof (elt_t) * 8 == BITS);
Behdad Esfahbod1a2a4a02012-05-05 22:38:20 +0200335 ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400336};
337
338
339
340#endif /* HB_SET_PRIVATE_HH */