blob: b97526f775070e8c0d0bc7af6d6133eab660af85 [file] [log] [blame]
Behdad Esfahbod826a1da2017-10-15 14:09:05 +02001/*
2 * Copyright © 2012 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
Behdad Esfahbodc77ae402018-08-25 22:36:36 -070027#ifndef HB_SET_DIGEST_HH
28#define HB_SET_DIGEST_HH
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020029
Behdad Esfahbodc77ae402018-08-25 22:36:36 -070030#include "hb.hh"
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020031
32/*
33 * The set digests here implement various "filters" that support
34 * "approximate member query". Conceptually these are like Bloom
35 * Filter and Quotient Filter, however, much smaller, faster, and
36 * designed to fit the requirements of our uses for glyph coverage
37 * queries.
38 *
39 * Our filters are highly accurate if the lookup covers fairly local
40 * set of glyphs, but fully flooded and ineffective if coverage is
41 * all over the place.
42 *
43 * The frozen-set can be used instead of a digest, to trade more
44 * memory for 100% accuracy, but in practice, that doesn't look like
45 * an attractive trade-off.
46 */
47
48template <typename mask_t, unsigned int shift>
49struct hb_set_digest_lowest_bits_t
50{
Behdad Esfahbod70a52d62019-01-22 12:15:23 +010051 static constexpr unsigned mask_bytes = sizeof (mask_t);
52 static constexpr unsigned mask_bits = sizeof (mask_t) * 8;
Behdad Esfahbodf3980972019-01-25 16:08:25 +010053 static constexpr unsigned num_bits = 0
54 + (mask_bytes >= 1 ? 3 : 0)
55 + (mask_bytes >= 2 ? 1 : 0)
56 + (mask_bytes >= 4 ? 1 : 0)
57 + (mask_bytes >= 8 ? 1 : 0)
58 + (mask_bytes >= 16? 1 : 0)
59 + 0;
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020060
Behdad Esfahbod606bf572018-09-16 19:33:48 +020061 static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
62 static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020063
Ebrahim Byagowie4120082018-12-17 21:31:01 +033064 void init () { mask = 0; }
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020065
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +033066 void add (hb_codepoint_t g) { mask |= mask_for (g); }
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020067
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +033068 bool add_range (hb_codepoint_t a, hb_codepoint_t b)
69 {
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020070 if ((b >> shift) - (a >> shift) >= mask_bits - 1)
71 mask = (mask_t) -1;
72 else {
73 mask_t ma = mask_for (a);
74 mask_t mb = mask_for (b);
75 mask |= mb + (mb - ma) - (mb < ma);
76 }
Behdad Esfahbod81f27df2017-12-16 06:12:06 -080077 return true;
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020078 }
Behdad Esfahbod5d025722017-12-14 19:33:55 -080079
Behdad Esfahbod0fe62c12017-12-13 13:12:20 -080080 template <typename T>
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +033081 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
Behdad Esfahbod0fe62c12017-12-13 13:12:20 -080082 {
83 for (unsigned int i = 0; i < count; i++)
Behdad Esfahbod5d025722017-12-14 19:33:55 -080084 {
85 add (*array);
86 array = (const T *) (stride + (const char *) array);
87 }
88 }
89 template <typename T>
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +033090 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
Behdad Esfahbod5d025722017-12-14 19:33:55 -080091 {
92 for (unsigned int i = 0; i < count; i++)
93 {
94 add (*array);
95 array = (const T *) (stride + (const char *) array);
96 }
97 return true;
Behdad Esfahbod0fe62c12017-12-13 13:12:20 -080098 }
Behdad Esfahbod826a1da2017-10-15 14:09:05 +020099
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330100 bool may_have (hb_codepoint_t g) const
101 { return !!(mask & mask_for (g)); }
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200102
103 private:
104
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330105 static mask_t mask_for (hb_codepoint_t g)
106 { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); }
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200107 mask_t mask;
108};
109
110template <typename head_t, typename tail_t>
111struct hb_set_digest_combiner_t
112{
Ebrahim Byagowie4120082018-12-17 21:31:01 +0330113 void init ()
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330114 {
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200115 head.init ();
116 tail.init ();
117 }
118
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330119 void add (hb_codepoint_t g)
120 {
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200121 head.add (g);
122 tail.add (g);
123 }
124
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330125 bool add_range (hb_codepoint_t a, hb_codepoint_t b)
126 {
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200127 head.add_range (a, b);
128 tail.add_range (a, b);
Behdad Esfahbod81f27df2017-12-16 06:12:06 -0800129 return true;
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200130 }
Behdad Esfahbod0fe62c12017-12-13 13:12:20 -0800131 template <typename T>
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330132 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
Behdad Esfahbod0fe62c12017-12-13 13:12:20 -0800133 {
Behdad Esfahbod5d025722017-12-14 19:33:55 -0800134 head.add_array (array, count, stride);
135 tail.add_array (array, count, stride);
136 }
137 template <typename T>
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330138 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
Behdad Esfahbod5d025722017-12-14 19:33:55 -0800139 {
140 head.add_sorted_array (array, count, stride);
141 tail.add_sorted_array (array, count, stride);
142 return true;
Behdad Esfahbod0fe62c12017-12-13 13:12:20 -0800143 }
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200144
Ebrahim Byagowib2ebaa92018-12-16 22:38:10 +0330145 bool may_have (hb_codepoint_t g) const
146 {
Behdad Esfahbod826a1da2017-10-15 14:09:05 +0200147 return head.may_have (g) && tail.may_have (g);
148 }
149
150 private:
151 head_t head;
152 tail_t tail;
153};
154
155
156/*
157 * hb_set_digest_t
158 *
159 * This is a combination of digests that performs "best".
160 * There is not much science to this: it's a result of intuition
161 * and testing.
162 */
163typedef hb_set_digest_combiner_t
164<
165 hb_set_digest_lowest_bits_t<unsigned long, 4>,
166 hb_set_digest_combiner_t
167 <
168 hb_set_digest_lowest_bits_t<unsigned long, 0>,
169 hb_set_digest_lowest_bits_t<unsigned long, 9>
170 >
171> hb_set_digest_t;
172
173
Behdad Esfahbodc77ae402018-08-25 22:36:36 -0700174#endif /* HB_SET_DIGEST_HH */