[kern/kerx] Use a bit-vector instead of bit-set for left/right filtering

Shows ~2% speedup shaping of HelveticaNeue.ttc.
diff --git a/src/hb-aat-layout-common.hh b/src/hb-aat-layout-common.hh
index fa34afa..4db01fa 100644
--- a/src/hb-aat-layout-common.hh
+++ b/src/hb-aat-layout-common.hh
@@ -35,6 +35,7 @@
 #include "hb-cache.hh"
 #include "hb-bit-set.hh"
 #include "hb-bit-page.hh"
+#include "hb-bit-vector.hh"
 
 
 namespace OT {
@@ -73,8 +74,8 @@
   const hb_sorted_vector_t<hb_aat_map_t::range_flags_t> *range_flags = nullptr;
   bool using_buffer_glyph_set = false;
   hb_bit_set_t buffer_glyph_set;
-  const hb_bit_set_t *left_set = nullptr;
-  const hb_bit_set_t *right_set = nullptr;
+  const hb_bit_vector_t *left_set = nullptr;
+  const hb_bit_vector_t *right_set = nullptr;
   const hb_bit_set_t *machine_glyph_set = nullptr;
   hb_aat_class_cache_t *machine_class_cache = nullptr;
   hb_mask_t subtable_flags = 0;
diff --git a/src/hb-aat-layout-kerx-table.hh b/src/hb-aat-layout-kerx-table.hh
index 4b980ca..ea5fef3 100644
--- a/src/hb-aat-layout-kerx-table.hh
+++ b/src/hb-aat-layout-kerx-table.hh
@@ -30,7 +30,7 @@
 
 #include "hb-kern.hh"
 #include "hb-aat-layout-ankr-table.hh"
-#include "hb-set-digest.hh"
+#include "hb-bit-vector.hh"
 
 /*
  * kerx -- Extended Kerning
@@ -394,7 +394,7 @@
   template <typename set_t>
   void collect_glyphs (set_t &left_set, set_t &right_set, unsigned num_glyphs) const
   {
-    set_t set;
+    hb_bit_set_t set;
     machine.collect_glyphs (set, num_glyphs);
     left_set.union_ (set);
     right_set.union_ (set);
@@ -671,7 +671,7 @@
   template <typename set_t>
   void collect_glyphs (set_t &left_set, set_t &right_set, unsigned num_glyphs) const
   {
-    set_t set;
+    hb_bit_set_t set;
     machine.collect_glyphs (set, num_glyphs);
     left_set.union_ (set);
     right_set.union_ (set);
@@ -921,7 +921,7 @@
  * The 'kerx' Table
  */
 
-using kern_accelerator_data_t = hb_vector_t<hb_pair_t<hb_bit_set_t, hb_bit_set_t>>;
+using kern_accelerator_data_t = hb_vector_t<hb_pair_t<hb_bit_vector_t, hb_bit_vector_t>>;
 
 template <typename T>
 struct KerxTable
@@ -1106,9 +1106,19 @@
     unsigned int count = thiz()->tableCount;
     for (unsigned int i = 0; i < count; i++)
     {
-      hb_bit_set_t left_set, right_set;
+      hb_min_max_t left_range, right_range;
+      st->collect_glyphs (left_range, right_range, num_glyphs);
+
+      hb_bit_vector_t left_set (left_range.get_min (), left_range.get_max ());
+      hb_bit_vector_t right_set (right_range.get_min (), right_range.get_max ());
       st->collect_glyphs (left_set, right_set, num_glyphs);
-      accel_data.push (hb_pair (left_set, right_set));
+
+      auto pair = accel_data.push ();
+      if (unlikely (accel_data.in_error ()))
+	break;
+      pair->first = std::move (left_set);
+      pair->second = std::move (right_set);
+
       st = &StructAfter<SubTable> (*st);
     }
 
diff --git a/src/hb-bit-vector.hh b/src/hb-bit-vector.hh
new file mode 100644
index 0000000..23044dc
--- /dev/null
+++ b/src/hb-bit-vector.hh
@@ -0,0 +1,189 @@
+/*
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Author(s): Behdad Esfahbod
+ */
+
+#ifndef HB_BIT_VECTOR_HH
+#define HB_BIT_VECTOR_HH
+
+#include "hb.hh"
+
+struct hb_min_max_t
+{
+  void add (hb_codepoint_t v) { min_v = hb_min (min_v, v); max_v = hb_max (max_v, v); }
+  void add_range (hb_codepoint_t a, hb_codepoint_t b)
+  {
+    min_v = hb_min (min_v, a);
+    max_v = hb_max (max_v, b);
+  }
+
+  template <typename set_t>
+  void union_ (const set_t &set)
+  {
+    hb_codepoint_t set_min = set.get_min ();
+    if (unlikely (set_min == HB_CODEPOINT_INVALID))
+      return;
+    hb_codepoint_t set_max = set.get_max ();
+    min_v = hb_min (min_v, set_min);
+    max_v = hb_max (max_v, set_max);
+  }
+
+  hb_codepoint_t get_min () const { return min_v; }
+  hb_codepoint_t get_max () const { return max_v; }
+
+  private:
+  hb_codepoint_t min_v = HB_CODEPOINT_INVALID;
+  hb_codepoint_t max_v = 0;
+};
+
+struct hb_bit_vector_t
+{
+  hb_bit_vector_t () = default;
+  hb_bit_vector_t (const hb_bit_vector_t &other) = delete;
+  hb_bit_vector_t &operator= (const hb_bit_vector_t &other) = delete;
+
+  // Move
+  hb_bit_vector_t (hb_bit_vector_t &&other)
+		: min_v (other.min_v), max_v (other.max_v), count (other.count), elts (other.elts)
+  {
+    other.min_v = other.max_v = other.count = 0;
+    other.elts = nullptr;
+  }
+  hb_bit_vector_t &operator= (hb_bit_vector_t &&other)
+  {
+    hb_swap (min_v, other.min_v);
+    hb_swap (max_v, other.max_v);
+    hb_swap (count, other.count);
+    hb_swap (elts, other.elts);
+    return *this;
+  }
+
+  hb_bit_vector_t (unsigned min_v, unsigned max_v)
+    : min_v (min_v), max_v (max_v)
+  {
+    if (unlikely (min_v > max_v))
+    {
+      min_v = max_v = count = 0;
+      return;
+    }
+
+    unsigned num = (max_v - min_v + sizeof (elt_t) * 8) / (sizeof (elt_t) * 8);
+    elts = (elt_t *) hb_calloc (num, sizeof (elt_t));
+    if (unlikely (!elts))
+    {
+      min_v = max_v = count = 0;
+      return;
+    }
+
+    count = max_v - min_v + 1;
+  }
+  ~hb_bit_vector_t ()
+  {
+    hb_free (elts);
+  }
+
+  void add (hb_codepoint_t g) { elt (g) |= mask (g); }
+  void del (hb_codepoint_t g) { elt (g) &= ~mask (g); }
+  void set (hb_codepoint_t g, bool value) { if (value) add (g); else del (g); }
+  bool get (hb_codepoint_t g) const { return elt (g) & mask (g); }
+  bool may_have (hb_codepoint_t g) const { return get (g); }
+
+  bool operator [] (hb_codepoint_t g) const { return get (g); }
+  bool operator () (hb_codepoint_t g) const { return get (g); }
+
+  void add_range (hb_codepoint_t a, hb_codepoint_t b)
+  {
+    if (unlikely (!count || a > b || a < min_v || b > max_v))
+      return;
+
+    elt_t *la = &elt (a);
+    elt_t *lb = &elt (b);
+    if (la == lb)
+      *la |= (mask (b) << 1) - mask(a);
+    else
+    {
+      *la |= ~(mask (a) - 1llu);
+      la++;
+
+      hb_memset (la, 0xff, (char *) lb - (char *) la);
+
+      *lb |= ((mask (b) << 1) - 1llu);
+    }
+  }
+  void del_range (hb_codepoint_t a, hb_codepoint_t b)
+  {
+    if (unlikely (!count || a > b || a < min_v || b > max_v))
+      return;
+
+    elt_t *la = &elt (a);
+    elt_t *lb = &elt (b);
+    if (la == lb)
+      *la &= ~((mask (b) << 1llu) - mask(a));
+    else
+    {
+      *la &= mask (a) - 1;
+      la++;
+
+      hb_memset (la, 0, (char *) lb - (char *) la);
+
+      *lb &= ~((mask (b) << 1) - 1llu);
+    }
+  }
+  void set_range (hb_codepoint_t a, hb_codepoint_t b, bool v)
+  { if (v) add_range (a, b); else del_range (a, b); }
+
+  template <typename set_t>
+  void union_ (const set_t &set)
+  {
+    for (hb_codepoint_t g : set)
+      add (g);
+  }
+
+  typedef unsigned long long elt_t;
+  static const unsigned int ELT_BITS = sizeof (elt_t) * 8;
+  static constexpr unsigned ELT_MASK = ELT_BITS - 1;
+
+  static constexpr elt_t zero = 0;
+
+  elt_t &elt (hb_codepoint_t g)
+  {
+    g -= min_v;
+    if (unlikely (g >= count))
+      return Crap(elt_t);
+    return elts[g / ELT_BITS];
+  }
+  const elt_t& elt (hb_codepoint_t g) const
+  {
+    g -= min_v;
+    if (unlikely (g >= count))
+      return Null(elt_t);
+    return elts[g / ELT_BITS];
+  }
+
+  static constexpr elt_t mask (hb_codepoint_t g) { return elt_t (1) << (g & ELT_MASK); }
+
+  hb_codepoint_t min_v = 0, max_v = 0, count = 0;
+  elt_t *elts = nullptr;
+};
+
+
+#endif /* HB_BIT_VECTOR_HH */
diff --git a/src/hb-ot-kern-table.hh b/src/hb-ot-kern-table.hh
index d11a913..95f3fd3 100644
--- a/src/hb-ot-kern-table.hh
+++ b/src/hb-ot-kern-table.hh
@@ -89,7 +89,7 @@
   template <typename set_t>
   void collect_glyphs (set_t &left_set, set_t &right_set, unsigned num_glyphs) const
   {
-    set_t set;
+    hb_bit_set_t set;
     if (likely (glyphCount))
       set.add_range (0, glyphCount - 1);
     left_set.union_ (set);
diff --git a/src/meson.build b/src/meson.build
index d4262be..e0e13d7 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -28,6 +28,7 @@
   'hb-atomic.hh',
   'hb-bimap.hh',
   'hb-bit-page.hh',
+  'hb-bit-vector.hh',
   'hb-blob.cc',
   'hb-blob.hh',
   'hb-buffer-serialize.cc',