Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright © 2012 Google, Inc. |
| 3 | * |
| 4 | * This is part of HarfBuzz, a text shaping library. |
| 5 | * |
| 6 | * Permission is hereby granted, without written agreement and without |
| 7 | * license or royalty fees, to use, copy, modify, and distribute this |
| 8 | * software and its documentation for any purpose, provided that the |
| 9 | * above copyright notice and the following two paragraphs appear in |
| 10 | * all copies of this software. |
| 11 | * |
| 12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| 13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| 14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| 15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| 16 | * DAMAGE. |
| 17 | * |
| 18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| 19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| 21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| 22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| 23 | * |
| 24 | * Google Author(s): Behdad Esfahbod |
| 25 | */ |
| 26 | |
Behdad Esfahbod | c77ae40 | 2018-08-25 22:36:36 -0700 | [diff] [blame] | 27 | #ifndef HB_SET_DIGEST_HH |
| 28 | #define HB_SET_DIGEST_HH |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 29 | |
Behdad Esfahbod | c77ae40 | 2018-08-25 22:36:36 -0700 | [diff] [blame] | 30 | #include "hb.hh" |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 31 | |
| 32 | /* |
| 33 | * The set digests here implement various "filters" that support |
| 34 | * "approximate member query". Conceptually these are like Bloom |
| 35 | * Filter and Quotient Filter, however, much smaller, faster, and |
| 36 | * designed to fit the requirements of our uses for glyph coverage |
| 37 | * queries. |
| 38 | * |
| 39 | * Our filters are highly accurate if the lookup covers fairly local |
| 40 | * set of glyphs, but fully flooded and ineffective if coverage is |
| 41 | * all over the place. |
| 42 | * |
| 43 | * The frozen-set can be used instead of a digest, to trade more |
| 44 | * memory for 100% accuracy, but in practice, that doesn't look like |
| 45 | * an attractive trade-off. |
| 46 | */ |
| 47 | |
| 48 | template <typename mask_t, unsigned int shift> |
| 49 | struct hb_set_digest_lowest_bits_t |
| 50 | { |
Behdad Esfahbod | 70a52d6 | 2019-01-22 12:15:23 +0100 | [diff] [blame] | 51 | static constexpr unsigned mask_bytes = sizeof (mask_t); |
| 52 | static constexpr unsigned mask_bits = sizeof (mask_t) * 8; |
Behdad Esfahbod | f398097 | 2019-01-25 16:08:25 +0100 | [diff] [blame] | 53 | static constexpr unsigned num_bits = 0 |
| 54 | + (mask_bytes >= 1 ? 3 : 0) |
| 55 | + (mask_bytes >= 2 ? 1 : 0) |
| 56 | + (mask_bytes >= 4 ? 1 : 0) |
| 57 | + (mask_bytes >= 8 ? 1 : 0) |
| 58 | + (mask_bytes >= 16? 1 : 0) |
| 59 | + 0; |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 60 | |
Behdad Esfahbod | 606bf57 | 2018-09-16 19:33:48 +0200 | [diff] [blame] | 61 | static_assert ((shift < sizeof (hb_codepoint_t) * 8), ""); |
| 62 | static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), ""); |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 63 | |
Ebrahim Byagowi | e412008 | 2018-12-17 21:31:01 +0330 | [diff] [blame] | 64 | void init () { mask = 0; } |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 65 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 66 | void add (hb_codepoint_t g) { mask |= mask_for (g); } |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 67 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 68 | bool add_range (hb_codepoint_t a, hb_codepoint_t b) |
| 69 | { |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 70 | if ((b >> shift) - (a >> shift) >= mask_bits - 1) |
| 71 | mask = (mask_t) -1; |
| 72 | else { |
| 73 | mask_t ma = mask_for (a); |
| 74 | mask_t mb = mask_for (b); |
| 75 | mask |= mb + (mb - ma) - (mb < ma); |
| 76 | } |
Behdad Esfahbod | 81f27df | 2017-12-16 06:12:06 -0800 | [diff] [blame] | 77 | return true; |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 78 | } |
Behdad Esfahbod | 5d02572 | 2017-12-14 19:33:55 -0800 | [diff] [blame] | 79 | |
Behdad Esfahbod | 0fe62c1 | 2017-12-13 13:12:20 -0800 | [diff] [blame] | 80 | template <typename T> |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 81 | void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
Behdad Esfahbod | 0fe62c1 | 2017-12-13 13:12:20 -0800 | [diff] [blame] | 82 | { |
| 83 | for (unsigned int i = 0; i < count; i++) |
Behdad Esfahbod | 5d02572 | 2017-12-14 19:33:55 -0800 | [diff] [blame] | 84 | { |
| 85 | add (*array); |
| 86 | array = (const T *) (stride + (const char *) array); |
| 87 | } |
| 88 | } |
| 89 | template <typename T> |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 90 | bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
Behdad Esfahbod | 5d02572 | 2017-12-14 19:33:55 -0800 | [diff] [blame] | 91 | { |
| 92 | for (unsigned int i = 0; i < count; i++) |
| 93 | { |
| 94 | add (*array); |
| 95 | array = (const T *) (stride + (const char *) array); |
| 96 | } |
| 97 | return true; |
Behdad Esfahbod | 0fe62c1 | 2017-12-13 13:12:20 -0800 | [diff] [blame] | 98 | } |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 99 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 100 | bool may_have (hb_codepoint_t g) const |
| 101 | { return !!(mask & mask_for (g)); } |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 102 | |
| 103 | private: |
| 104 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 105 | static mask_t mask_for (hb_codepoint_t g) |
| 106 | { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 107 | mask_t mask; |
| 108 | }; |
| 109 | |
| 110 | template <typename head_t, typename tail_t> |
| 111 | struct hb_set_digest_combiner_t |
| 112 | { |
Ebrahim Byagowi | e412008 | 2018-12-17 21:31:01 +0330 | [diff] [blame] | 113 | void init () |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 114 | { |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 115 | head.init (); |
| 116 | tail.init (); |
| 117 | } |
| 118 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 119 | void add (hb_codepoint_t g) |
| 120 | { |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 121 | head.add (g); |
| 122 | tail.add (g); |
| 123 | } |
| 124 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 125 | bool add_range (hb_codepoint_t a, hb_codepoint_t b) |
| 126 | { |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 127 | head.add_range (a, b); |
| 128 | tail.add_range (a, b); |
Behdad Esfahbod | 81f27df | 2017-12-16 06:12:06 -0800 | [diff] [blame] | 129 | return true; |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 130 | } |
Behdad Esfahbod | 0fe62c1 | 2017-12-13 13:12:20 -0800 | [diff] [blame] | 131 | template <typename T> |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 132 | void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
Behdad Esfahbod | 0fe62c1 | 2017-12-13 13:12:20 -0800 | [diff] [blame] | 133 | { |
Behdad Esfahbod | 5d02572 | 2017-12-14 19:33:55 -0800 | [diff] [blame] | 134 | head.add_array (array, count, stride); |
| 135 | tail.add_array (array, count, stride); |
| 136 | } |
| 137 | template <typename T> |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 138 | bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
Behdad Esfahbod | 5d02572 | 2017-12-14 19:33:55 -0800 | [diff] [blame] | 139 | { |
| 140 | head.add_sorted_array (array, count, stride); |
| 141 | tail.add_sorted_array (array, count, stride); |
| 142 | return true; |
Behdad Esfahbod | 0fe62c1 | 2017-12-13 13:12:20 -0800 | [diff] [blame] | 143 | } |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 144 | |
Ebrahim Byagowi | b2ebaa9 | 2018-12-16 22:38:10 +0330 | [diff] [blame] | 145 | bool may_have (hb_codepoint_t g) const |
| 146 | { |
Behdad Esfahbod | 826a1da | 2017-10-15 14:09:05 +0200 | [diff] [blame] | 147 | return head.may_have (g) && tail.may_have (g); |
| 148 | } |
| 149 | |
| 150 | private: |
| 151 | head_t head; |
| 152 | tail_t tail; |
| 153 | }; |
| 154 | |
| 155 | |
| 156 | /* |
| 157 | * hb_set_digest_t |
| 158 | * |
| 159 | * This is a combination of digests that performs "best". |
| 160 | * There is not much science to this: it's a result of intuition |
| 161 | * and testing. |
| 162 | */ |
| 163 | typedef hb_set_digest_combiner_t |
| 164 | < |
| 165 | hb_set_digest_lowest_bits_t<unsigned long, 4>, |
| 166 | hb_set_digest_combiner_t |
| 167 | < |
| 168 | hb_set_digest_lowest_bits_t<unsigned long, 0>, |
| 169 | hb_set_digest_lowest_bits_t<unsigned long, 9> |
| 170 | > |
| 171 | > hb_set_digest_t; |
| 172 | |
| 173 | |
Behdad Esfahbod | c77ae40 | 2018-08-25 22:36:36 -0700 | [diff] [blame] | 174 | #endif /* HB_SET_DIGEST_HH */ |