blob: b718b94e69a19793d41eea44757826bf731150a4 [file] [log] [blame]
Behdad Esfahbod826a1da2017-10-15 14:09:05 +02001/*
2 * Copyright © 2012 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
Behdad Esfahbodc77ae402018-08-25 22:36:36 -070027#ifndef HB_SET_DIGEST_HH
28#define HB_SET_DIGEST_HH
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020029
Behdad Esfahbodc77ae402018-08-25 22:36:36 -070030#include "hb.hh"
Behdad Esfahbodafa71ee2022-11-16 16:22:45 -070031#include "hb-machinery.hh"
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020032
33/*
Behdad Esfahbod2a061cb2022-06-08 11:35:50 -060034 * The set-digests here implement various "filters" that support
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020035 * "approximate member query". Conceptually these are like Bloom
36 * Filter and Quotient Filter, however, much smaller, faster, and
37 * designed to fit the requirements of our uses for glyph coverage
38 * queries.
39 *
40 * Our filters are highly accurate if the lookup covers fairly local
41 * set of glyphs, but fully flooded and ineffective if coverage is
42 * all over the place.
43 *
Behdad Esfahbod2a061cb2022-06-08 11:35:50 -060044 * The way these are used is that the filter is first populated by
45 * a lookup's or subtable's Coverage table(s), and then when we
46 * want to apply the lookup or subtable to a glyph, before trying
47 * to apply, we ask the filter if the glyph may be covered. If it's
Behdad Esfahbod859f7d42023-04-28 12:22:11 -060048 * not, we return early. We can also match a digest against another
49 * digest.
Behdad Esfahbod2a061cb2022-06-08 11:35:50 -060050 *
Behdad Esfahbod859f7d42023-04-28 12:22:11 -060051 * We use these filters at three levels:
52 * - If the digest for all the glyphs in the buffer as a whole
53 * does not match the digest for the lookup, skip the lookup.
54 * - For each glyph, if it doesn't match the lookup digest,
55 * skip it.
56 * - For each glyph, if it doesn't match the subtable digest,
57 * skip it.
Behdad Esfahbod2a061cb2022-06-08 11:35:50 -060058 *
Behdad Esfahbod64537372022-06-08 11:37:12 -060059 * The main filter we use is a combination of three bits-pattern
Behdad Esfahbod6a1edb82022-06-08 11:38:17 -060060 * filters. A bits-pattern filter checks a number of bits (5 or 6)
Behdad Esfahbod2a061cb2022-06-08 11:35:50 -060061 * of the input number (glyph-id in this case) and checks whether
Behdad Esfahbodd4ddb3a2022-06-08 11:45:14 -060062 * its pattern is amongst the patterns of any of the accepted values.
63 * The accepted patterns are represented as a "long" integer. The
Behdad Esfahbod2a061cb2022-06-08 11:35:50 -060064 * check is done using four bitwise operations only.
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020065 */
66
67template <typename mask_t, unsigned int shift>
Behdad Esfahbod64537372022-06-08 11:37:12 -060068struct hb_set_digest_bits_pattern_t
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020069{
Behdad Esfahbod70a52d62019-01-22 12:15:23 +010070 static constexpr unsigned mask_bytes = sizeof (mask_t);
71 static constexpr unsigned mask_bits = sizeof (mask_t) * 8;
Behdad Esfahbodf3980972019-01-25 16:08:25 +010072 static constexpr unsigned num_bits = 0
73 + (mask_bytes >= 1 ? 3 : 0)
74 + (mask_bytes >= 2 ? 1 : 0)
75 + (mask_bytes >= 4 ? 1 : 0)
76 + (mask_bytes >= 8 ? 1 : 0)
77 + (mask_bytes >= 16? 1 : 0)
78 + 0;
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020079
Behdad Esfahbod606bf572018-09-16 19:33:48 +020080 static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
81 static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020082
Ebrahim Byagowie4120082018-12-17 21:31:01 +033083 void init () { mask = 0; }
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020084
Behdad Esfahbod23e4a3c2024-05-12 10:49:46 -060085 static hb_set_digest_bits_pattern_t full () { hb_set_digest_bits_pattern_t d; d.mask = (mask_t) -1; return d; }
86
Behdad Esfahbode2ab6c72024-05-12 15:25:13 -060087 void union_ (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; }
Behdad Esfahboda053b842022-11-16 14:39:25 -070088
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +033089 void add (hb_codepoint_t g) { mask |= mask_for (g); }
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020090
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +033091 bool add_range (hb_codepoint_t a, hb_codepoint_t b)
92 {
Behdad Esfahbode8948d62023-07-02 15:35:18 -060093 if (mask == (mask_t) -1) return false;
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020094 if ((b >> shift) - (a >> shift) >= mask_bits - 1)
Behdad Esfahbode8948d62023-07-02 15:35:18 -060095 {
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020096 mask = (mask_t) -1;
Behdad Esfahbode8948d62023-07-02 15:35:18 -060097 return false;
98 }
99 else
100 {
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200101 mask_t ma = mask_for (a);
102 mask_t mb = mask_for (b);
103 mask |= mb + (mb - ma) - (mb < ma);
Behdad Esfahbode8948d62023-07-02 15:35:18 -0600104 return true;
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200105 }
106 }
Behdad Esfahbod5d025722017-12-14 19:33:55 -0800107
Behdad Esfahbod0fe62c12017-12-13 13:12:20 -0800108 template <typename T>
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330109 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
Behdad Esfahbod0fe62c12017-12-13 13:12:20 -0800110 {
111 for (unsigned int i = 0; i < count; i++)
Behdad Esfahbod5d025722017-12-14 19:33:55 -0800112 {
113 add (*array);
Behdad Esfahbodafa71ee2022-11-16 16:22:45 -0700114 array = &StructAtOffsetUnaligned<T> ((const void *) array, stride);
Behdad Esfahbod5d025722017-12-14 19:33:55 -0800115 }
116 }
117 template <typename T>
Behdad Esfahbodd3a2f992021-04-02 08:32:41 -0600118 void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
119 template <typename T>
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330120 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
Behdad Esfahbod5d025722017-12-14 19:33:55 -0800121 {
Behdad Esfahbod95b97632022-11-16 14:15:01 -0700122 add_array (array, count, stride);
Behdad Esfahbod5d025722017-12-14 19:33:55 -0800123 return true;
Behdad Esfahbod0fe62c12017-12-13 13:12:20 -0800124 }
Behdad Esfahbodd3a2f992021-04-02 08:32:41 -0600125 template <typename T>
126 bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200127
Behdad Esfahbod15b6c322022-11-16 15:59:13 -0700128 bool may_have (const hb_set_digest_bits_pattern_t &o) const
129 { return mask & o.mask; }
130
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330131 bool may_have (hb_codepoint_t g) const
Behdad Esfahbodb6fed6f2022-05-29 06:33:34 -0600132 { return mask & mask_for (g); }
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200133
Behdad Esfahbod51582552024-05-11 09:25:22 -0600134 bool operator [] (hb_codepoint_t g) const
135 { return may_have (g); }
136
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200137 private:
138
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330139 static mask_t mask_for (hb_codepoint_t g)
140 { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); }
Behdad Esfahbod51582552024-05-11 09:25:22 -0600141 mask_t mask = 0;
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200142};
143
144template <typename head_t, typename tail_t>
145struct hb_set_digest_combiner_t
146{
Ebrahim Byagowie4120082018-12-17 21:31:01 +0330147 void init ()
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330148 {
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200149 head.init ();
150 tail.init ();
151 }
152
Behdad Esfahbod23e4a3c2024-05-12 10:49:46 -0600153 static hb_set_digest_combiner_t full () { hb_set_digest_combiner_t d; d.head = head_t::full(); d.tail = tail_t::full (); return d; }
154
Behdad Esfahbode2ab6c72024-05-12 15:25:13 -0600155 void union_ (const hb_set_digest_combiner_t &o)
Behdad Esfahboda053b842022-11-16 14:39:25 -0700156 {
Behdad Esfahbode2ab6c72024-05-12 15:25:13 -0600157 head.union_ (o.head);
158 tail.union_(o.tail);
Behdad Esfahboda053b842022-11-16 14:39:25 -0700159 }
160
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330161 void add (hb_codepoint_t g)
162 {
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200163 head.add (g);
164 tail.add (g);
165 }
166
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330167 bool add_range (hb_codepoint_t a, hb_codepoint_t b)
168 {
Behdad Esfahbodcb73ba72023-07-02 15:27:26 -0600169 return (int) head.add_range (a, b) | (int) tail.add_range (a, b);
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200170 }
Behdad Esfahbod0fe62c12017-12-13 13:12:20 -0800171 template <typename T>
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330172 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
Behdad Esfahbod0fe62c12017-12-13 13:12:20 -0800173 {
Behdad Esfahbod5d025722017-12-14 19:33:55 -0800174 head.add_array (array, count, stride);
175 tail.add_array (array, count, stride);
176 }
177 template <typename T>
Behdad Esfahbodd3a2f992021-04-02 08:32:41 -0600178 void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
179 template <typename T>
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330180 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
Behdad Esfahbod5d025722017-12-14 19:33:55 -0800181 {
Behdad Esfahbod20654cd2022-11-16 14:15:58 -0700182 return head.add_sorted_array (array, count, stride) &&
183 tail.add_sorted_array (array, count, stride);
Behdad Esfahbod0fe62c12017-12-13 13:12:20 -0800184 }
Behdad Esfahbodd3a2f992021-04-02 08:32:41 -0600185 template <typename T>
186 bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200187
Behdad Esfahbod15b6c322022-11-16 15:59:13 -0700188 bool may_have (const hb_set_digest_combiner_t &o) const
189 {
190 return head.may_have (o.head) && tail.may_have (o.tail);
191 }
192
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330193 bool may_have (hb_codepoint_t g) const
194 {
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200195 return head.may_have (g) && tail.may_have (g);
196 }
197
Behdad Esfahbod51582552024-05-11 09:25:22 -0600198 bool operator [] (hb_codepoint_t g) const
199 { return may_have (g); }
200
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200201 private:
202 head_t head;
203 tail_t tail;
204};
205
206
207/*
208 * hb_set_digest_t
209 *
210 * This is a combination of digests that performs "best".
211 * There is not much science to this: it's a result of intuition
212 * and testing.
213 */
Behdad Esfahbod3b2929e2021-09-21 12:21:02 -0600214using hb_set_digest_t =
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200215 hb_set_digest_combiner_t
216 <
Behdad Esfahbod64537372022-06-08 11:37:12 -0600217 hb_set_digest_bits_pattern_t<unsigned long, 4>,
Behdad Esfahbod3b2929e2021-09-21 12:21:02 -0600218 hb_set_digest_combiner_t
219 <
Behdad Esfahbod64537372022-06-08 11:37:12 -0600220 hb_set_digest_bits_pattern_t<unsigned long, 0>,
221 hb_set_digest_bits_pattern_t<unsigned long, 9>
Behdad Esfahbod3b2929e2021-09-21 12:21:02 -0600222 >
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200223 >
Behdad Esfahbod3b2929e2021-09-21 12:21:02 -0600224;
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200225
226
Behdad Esfahbodc77ae402018-08-25 22:36:36 -0700227#endif /* HB_SET_DIGEST_HH */