[ot-font] Add a cmap cache

Speeds up Roboto shaping by 7%, for 1kb per face.
diff --git a/src/hb-ot-cmap-table.hh b/src/hb-ot-cmap-table.hh
index 3c7d2f0..c4a0987 100644
--- a/src/hb-ot-cmap-table.hh
+++ b/src/hb-ot-cmap-table.hh
@@ -1901,20 +1901,37 @@
       if (unlikely (!this->get_glyph_funcZ)) return false;
       return this->get_glyph_funcZ (this->get_glyph_data, unicode, glyph);
     }
+
+    template <typename cache_t = void>
+    inline bool _cached_get (hb_codepoint_t unicode,
+			     hb_codepoint_t *glyph,
+			     cache_t *cache) const
+    {
+      unsigned v;
+      if (cache && cache->get (unicode, &v))
+      {
+        *glyph = v;
+	return true;
+      }
+      bool ret = this->get_glyph_funcZ (this->get_glyph_data, unicode, glyph);
+      if (cache && ret)
+	cache->set (unicode, *glyph);
+      return ret;
+    }
+
+    template <typename cache_t = void>
     unsigned int get_nominal_glyphs (unsigned int count,
 				     const hb_codepoint_t *first_unicode,
 				     unsigned int unicode_stride,
 				     hb_codepoint_t *first_glyph,
-				     unsigned int glyph_stride) const
+				     unsigned int glyph_stride,
+				     cache_t *cache = nullptr) const
     {
       if (unlikely (!this->get_glyph_funcZ)) return 0;
 
-      hb_cmap_get_glyph_func_t get_glyph_funcZ = this->get_glyph_funcZ;
-      const void *get_glyph_data = this->get_glyph_data;
-
       unsigned int done;
       for (done = 0;
-	   done < count && get_glyph_funcZ (get_glyph_data, *first_unicode, first_glyph);
+	   done < count && _cached_get (*first_unicode, first_glyph, cache);
 	   done++)
       {
 	first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride);
diff --git a/src/hb-ot-font.cc b/src/hb-ot-font.cc
index 0fc6a10..df5ee6c 100644
--- a/src/hb-ot-font.cc
+++ b/src/hb-ot-font.cc
@@ -60,12 +60,17 @@
  * never need to call these functions directly.
  **/
 
+using hb_ot_font_cmap_cache_t    = hb_cache_t<21, 16, 8, true>;
 using hb_ot_font_advance_cache_t = hb_cache_t<24, 16, 8, true>;
 
+static hb_user_data_key_t hb_ot_font_cmap_cache_user_data_key;
+
 struct hb_ot_font_t
 {
   const hb_ot_face_t *ot_face;
 
+  hb_ot_font_cmap_cache_t *cmap_cache;
+
   /* h_advance caching */
   mutable hb_atomic_int_t cached_coords_serial;
   mutable hb_atomic_ptr_t<hb_ot_font_advance_cache_t> advance_cache;
@@ -80,6 +85,31 @@
 
   ot_font->ot_face = &font->face->table;
 
+  auto *cmap_cache  = (hb_ot_font_cmap_cache_t *) hb_face_get_user_data (font->face,
+									 &hb_ot_font_cmap_cache_user_data_key);
+  if (!cmap_cache)
+  {
+    cmap_cache = (hb_ot_font_cmap_cache_t *) hb_malloc (sizeof (hb_ot_font_cmap_cache_t));
+    if (unlikely (!cmap_cache)) goto out;
+    cmap_cache->init ();
+    if (!hb_face_set_user_data (font->face,
+				&hb_ot_font_cmap_cache_user_data_key,
+				cmap_cache,
+				hb_free,
+				false))
+    {
+      /* Normally we would retry after this, but that would
+       * infinite-loop if the face is the empty-face.
+       * Just let it go and this font will be uncached if it
+       * happened to collide with anothe thread creating the
+       * cache at the same time. */
+      hb_free (cmap_cache);
+      cmap_cache = nullptr;
+    }
+  }
+  out:
+  ot_font->cmap_cache = cmap_cache;
+
   return ot_font;
 }
 
@@ -121,7 +151,8 @@
   const hb_ot_face_t *ot_face = ot_font->ot_face;
   return ot_face->cmap->get_nominal_glyphs (count,
 					    first_unicode, unicode_stride,
-					    first_glyph, glyph_stride);
+					    first_glyph, glyph_stride,
+					    ot_font->cmap_cache);
 }
 
 static hb_bool_t