blob: 3c302b1daf68eb8d6381c27a9d40d0b5c4e62889 [file] [log] [blame]
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -04001/*
2 * Copyright © 2012 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_SET_PRIVATE_HH
28#define HB_SET_PRIVATE_HH
29
30#include "hb-private.hh"
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -040031#include "hb-object-private.hh"
32
33
Behdad Esfahbod0edd0fd2013-04-17 17:26:56 -040034/*
35 * The set digests here implement various "filters" that support
36 * "approximate member query". Conceptually these are like Bloom
37 * Filter and Quotient Filter, however, much smaller, faster, and
38 * designed to fit the requirements of our uses for glyph coverage
Behdad Esfahbod23e56e02015-07-25 17:36:46 +020039 * queries.
40 *
41 * Our filters are highly accurate if the lookup covers fairly local
42 * set of glyphs, but fully flooded and ineffective if coverage is
43 * all over the place.
44 *
45 * The frozen-set can be used instead of a digest, to trade more
46 * memory for 100% accuracy, but in practice, that doesn't look like
47 * an attractive trade-off.
Behdad Esfahbod0edd0fd2013-04-17 17:26:56 -040048 */
49
Behdad Esfahbodc7851ef2013-04-17 17:45:39 -040050template <typename mask_t, unsigned int shift>
Behdad Esfahbod60a30352012-08-01 21:06:27 -040051struct hb_set_digest_lowest_bits_t
52{
53 ASSERT_POD ();
54
Behdad Esfahbode6f19af2013-05-02 13:59:46 -040055 static const unsigned int mask_bytes = sizeof (mask_t);
56 static const unsigned int mask_bits = sizeof (mask_t) * 8;
Behdad Esfahbodc7851ef2013-04-17 17:45:39 -040057 static const unsigned int num_bits = 0
Behdad Esfahbode6f19af2013-05-02 13:59:46 -040058 + (mask_bytes >= 1 ? 3 : 0)
59 + (mask_bytes >= 2 ? 1 : 0)
60 + (mask_bytes >= 4 ? 1 : 0)
61 + (mask_bytes >= 8 ? 1 : 0)
62 + (mask_bytes >= 16? 1 : 0)
Behdad Esfahbodc7851ef2013-04-17 17:45:39 -040063 + 0;
64
65 ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8);
66 ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8);
Behdad Esfahbod60a30352012-08-01 21:06:27 -040067
68 inline void init (void) {
69 mask = 0;
70 }
71
72 inline void add (hb_codepoint_t g) {
73 mask |= mask_for (g);
74 }
75
76 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
Behdad Esfahbode6f19af2013-05-02 13:59:46 -040077 if ((b >> shift) - (a >> shift) >= mask_bits - 1)
Behdad Esfahbod048e3b52012-08-04 18:04:57 -070078 mask = (mask_t) -1;
79 else {
80 mask_t ma = mask_for (a);
81 mask_t mb = mask_for (b);
82 mask |= mb + (mb - ma) - (mb < ma);
83 }
Behdad Esfahbod60a30352012-08-01 21:06:27 -040084 }
85
86 inline bool may_have (hb_codepoint_t g) const {
87 return !!(mask & mask_for (g));
88 }
89
90 private:
91
Behdad Esfahbodf9a61102013-04-17 19:01:49 -040092 static inline mask_t mask_for (hb_codepoint_t g) {
Behdad Esfahbode6f19af2013-05-02 13:59:46 -040093 return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
Behdad Esfahbodc7851ef2013-04-17 17:45:39 -040094 }
Behdad Esfahbod60a30352012-08-01 21:06:27 -040095 mask_t mask;
96};
97
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -040098template <typename head_t, typename tail_t>
99struct hb_set_digest_combiner_t
Behdad Esfahbod60a30352012-08-01 21:06:27 -0400100{
101 ASSERT_POD ();
102
103 inline void init (void) {
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -0400104 head.init ();
105 tail.init ();
Behdad Esfahbod60a30352012-08-01 21:06:27 -0400106 }
107
108 inline void add (hb_codepoint_t g) {
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -0400109 head.add (g);
110 tail.add (g);
Behdad Esfahbod60a30352012-08-01 21:06:27 -0400111 }
112
113 inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -0400114 head.add_range (a, b);
115 tail.add_range (a, b);
Behdad Esfahbod60a30352012-08-01 21:06:27 -0400116 }
117
118 inline bool may_have (hb_codepoint_t g) const {
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -0400119 return head.may_have (g) && tail.may_have (g);
Behdad Esfahbod60a30352012-08-01 21:06:27 -0400120 }
121
122 private:
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -0400123 head_t head;
124 tail_t tail;
Behdad Esfahbod60a30352012-08-01 21:06:27 -0400125};
126
Behdad Esfahbod0d5798a2013-04-17 18:19:21 -0400127
128/*
129 * hb_set_digest_t
130 *
131 * This is a combination of digests that performs "best".
132 * There is not much science to this: it's a result of intuition
133 * and testing.
134 */
135typedef hb_set_digest_combiner_t
136<
137 hb_set_digest_lowest_bits_t<unsigned long, 4>,
138 hb_set_digest_combiner_t
139 <
140 hb_set_digest_lowest_bits_t<unsigned long, 0>,
141 hb_set_digest_lowest_bits_t<unsigned long, 9>
142 >
143> hb_set_digest_t;
144
145
146
147/*
148 * hb_set_t
149 */
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -0400150
Behdad Esfahbod60a30352012-08-01 21:06:27 -0400151
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400152/* TODO Make this faster and memmory efficient. */
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400153
Behdad Esfahbod1bc1cb32012-06-16 15:21:55 -0400154struct hb_set_t
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400155{
Behdad Esfahbod7b7129c2015-01-28 21:46:07 -0800156 friend struct hb_frozen_set_t;
157
Behdad Esfahbod6220e5f2012-06-06 03:30:09 -0400158 hb_object_header_t header;
159 ASSERT_POD ();
Behdad Esfahbod8165f272013-01-02 22:50:36 -0600160 bool in_error;
Behdad Esfahbod6220e5f2012-06-06 03:30:09 -0400161
Behdad Esfahbod1827dc22012-04-24 16:56:37 -0400162 inline void init (void) {
Behdad Esfahbodcd7ea4f2014-08-14 12:57:02 -0400163 hb_object_init (this);
Behdad Esfahbod1827dc22012-04-24 16:56:37 -0400164 clear ();
165 }
Behdad Esfahboda5e39fe2012-04-25 00:14:46 -0400166 inline void fini (void) {
167 }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400168 inline void clear (void) {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600169 if (unlikely (hb_object_is_inert (this)))
170 return;
171 in_error = false;
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400172 memset (elts, 0, sizeof elts);
173 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800174 inline bool is_empty (void) const {
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400175 for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++)
176 if (elts[i])
177 return false;
178 return true;
179 }
Behdad Esfahbod5caece62012-04-23 23:03:12 -0400180 inline void add (hb_codepoint_t g)
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400181 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600182 if (unlikely (in_error)) return;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400183 if (unlikely (g == INVALID)) return;
Behdad Esfahbod5caece62012-04-23 23:03:12 -0400184 if (unlikely (g > MAX_G)) return;
185 elt (g) |= mask (g);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400186 }
Behdad Esfahbod67bb9e82012-06-09 02:02:46 -0400187 inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
188 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600189 if (unlikely (in_error)) return;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800190 /* TODO Speedup */
Behdad Esfahbod67bb9e82012-06-09 02:02:46 -0400191 for (unsigned int i = a; i < b + 1; i++)
192 add (i);
193 }
Behdad Esfahbod5caece62012-04-23 23:03:12 -0400194 inline void del (hb_codepoint_t g)
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400195 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600196 if (unlikely (in_error)) return;
Behdad Esfahbod5caece62012-04-23 23:03:12 -0400197 if (unlikely (g > MAX_G)) return;
198 elt (g) &= ~mask (g);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400199 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800200 inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
201 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600202 if (unlikely (in_error)) return;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800203 /* TODO Speedup */
204 for (unsigned int i = a; i < b + 1; i++)
205 del (i);
206 }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400207 inline bool has (hb_codepoint_t g) const
208 {
209 if (unlikely (g > MAX_G)) return false;
210 return !!(elt (g) & mask (g));
211 }
212 inline bool intersects (hb_codepoint_t first,
213 hb_codepoint_t last) const
214 {
215 if (unlikely (first > MAX_G)) return false;
216 if (unlikely (last > MAX_G)) last = MAX_G;
217 unsigned int end = last + 1;
218 for (hb_codepoint_t i = first; i < end; i++)
219 if (has (i))
220 return true;
221 return false;
222 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800223 inline bool is_equal (const hb_set_t *other) const
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400224 {
225 for (unsigned int i = 0; i < ELTS; i++)
226 if (elts[i] != other->elts[i])
227 return false;
228 return true;
229 }
230 inline void set (const hb_set_t *other)
231 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600232 if (unlikely (in_error)) return;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400233 for (unsigned int i = 0; i < ELTS; i++)
234 elts[i] = other->elts[i];
235 }
236 inline void union_ (const hb_set_t *other)
237 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600238 if (unlikely (in_error)) return;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400239 for (unsigned int i = 0; i < ELTS; i++)
240 elts[i] |= other->elts[i];
241 }
242 inline void intersect (const hb_set_t *other)
243 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600244 if (unlikely (in_error)) return;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400245 for (unsigned int i = 0; i < ELTS; i++)
246 elts[i] &= other->elts[i];
247 }
248 inline void subtract (const hb_set_t *other)
249 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600250 if (unlikely (in_error)) return;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400251 for (unsigned int i = 0; i < ELTS; i++)
252 elts[i] &= ~other->elts[i];
253 }
Behdad Esfahbod62c3e112012-05-25 13:48:00 -0400254 inline void symmetric_difference (const hb_set_t *other)
255 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600256 if (unlikely (in_error)) return;
Behdad Esfahbod62c3e112012-05-25 13:48:00 -0400257 for (unsigned int i = 0; i < ELTS; i++)
258 elts[i] ^= other->elts[i];
259 }
Behdad Esfahbod8165f272013-01-02 22:50:36 -0600260 inline void invert (void)
261 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600262 if (unlikely (in_error)) return;
Behdad Esfahbod8165f272013-01-02 22:50:36 -0600263 for (unsigned int i = 0; i < ELTS; i++)
264 elts[i] = ~elts[i];
265 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800266 inline bool next (hb_codepoint_t *codepoint) const
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400267 {
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400268 if (unlikely (*codepoint == INVALID)) {
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400269 hb_codepoint_t i = get_min ();
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400270 if (i != INVALID) {
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400271 *codepoint = i;
272 return true;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400273 } else {
274 *codepoint = INVALID;
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400275 return false;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400276 }
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400277 }
278 for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++)
279 if (has (i)) {
280 *codepoint = i;
281 return true;
282 }
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400283 *codepoint = INVALID;
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400284 return false;
285 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800286 inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
287 {
288 hb_codepoint_t i;
289
290 i = *last;
291 if (!next (&i))
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400292 {
293 *last = *first = INVALID;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800294 return false;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400295 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800296
297 *last = *first = i;
298 while (next (&i) && i == *last + 1)
299 (*last)++;
300
301 return true;
302 }
303
304 inline unsigned int get_population (void) const
305 {
306 unsigned int count = 0;
307 for (unsigned int i = 0; i < ELTS; i++)
308 count += _hb_popcount32 (elts[i]);
309 return count;
310 }
Behdad Esfahbodf039e792012-05-17 20:55:12 -0400311 inline hb_codepoint_t get_min (void) const
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400312 {
313 for (unsigned int i = 0; i < ELTS; i++)
314 if (elts[i])
Behdad Esfahbodfa3d0a02013-05-14 15:30:55 -0400315 for (unsigned int j = 0; j < BITS; j++)
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400316 if (elts[i] & (1 << j))
317 return i * BITS + j;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400318 return INVALID;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400319 }
Behdad Esfahbodf039e792012-05-17 20:55:12 -0400320 inline hb_codepoint_t get_max (void) const
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400321 {
322 for (unsigned int i = ELTS; i; i--)
323 if (elts[i - 1])
324 for (unsigned int j = BITS; j; j--)
325 if (elts[i - 1] & (1 << (j - 1)))
326 return (i - 1) * BITS + (j - 1);
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400327 return INVALID;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400328 }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400329
330 typedef uint32_t elt_t;
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400331 static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400332 static const unsigned int SHIFT = 5;
333 static const unsigned int BITS = (1 << SHIFT);
334 static const unsigned int MASK = BITS - 1;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400335 static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400336 static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400337
338 elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; }
Behdad Esfahbod7b7129c2015-01-28 21:46:07 -0800339 elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400340 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
341
Behdad Esfahbodb5fa37c2012-05-10 23:09:48 +0200342 elt_t elts[ELTS]; /* XXX 8kb */
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400343
344 ASSERT_STATIC (sizeof (elt_t) * 8 == BITS);
Behdad Esfahbod1a2a4a02012-05-05 22:38:20 +0200345 ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400346};
347
Behdad Esfahbod7b7129c2015-01-28 21:46:07 -0800348struct hb_frozen_set_t
349{
350 static const unsigned int SHIFT = hb_set_t::SHIFT;
351 static const unsigned int BITS = hb_set_t::BITS;
352 static const unsigned int MASK = hb_set_t::MASK;
353 typedef hb_set_t::elt_t elt_t;
354
355 inline void init (const hb_set_t &set)
356 {
357 start = count = 0;
358 elts = NULL;
359
360 unsigned int max = set.get_max ();
361 if (max == set.INVALID)
362 return;
363 unsigned int min = set.get_min ();
364 const elt_t &min_elt = set.elt (min);
Behdad Esfahbod7b7129c2015-01-28 21:46:07 -0800365
366 start = min & ~MASK;
367 count = max - start + 1;
368 unsigned int num_elts = (count + BITS - 1) / BITS;
369 unsigned int elts_size = num_elts * sizeof (elt_t);
370 elts = (elt_t *) malloc (elts_size);
371 if (unlikely (!elts))
372 {
373 start = count = 0;
374 return;
375 }
376 memcpy (elts, &min_elt, elts_size);
377 }
378
379 inline void fini (void)
380 {
381 if (elts)
382 free (elts);
383 }
384
385 inline bool has (hb_codepoint_t g) const
386 {
387 /* hb_codepoint_t is unsigned. */
388 g -= start;
389 if (unlikely (g > count)) return false;
390 return !!(elt (g) & mask (g));
391 }
392
393 elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
394 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
395
396 private:
397 hb_codepoint_t start, count;
398 elt_t *elts;
399};
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400400
401
402#endif /* HB_SET_PRIVATE_HH */