[layout] Build lookup accelerators lazily on-demand

Reduces memory consumption for large multi-script fonts
drastically.
diff --git a/src/hb-ot-font.cc b/src/hb-ot-font.cc
index 42f3143..92f9e39 100644
--- a/src/hb-ot-font.cc
+++ b/src/hb-ot-font.cc
@@ -121,8 +121,7 @@
   hb_ot_font_t *ot_font = (hb_ot_font_t *) font_data;
 
   auto *cache = ot_font->advance_cache.get_relaxed ();
-  if (cache)
-    hb_free (cache);
+  hb_free (cache);
 
   hb_free (ot_font);
 }
diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh
index afc02f5..0cfa139 100644
--- a/src/hb-ot-layout-gpos-table.hh
+++ b/src/hb-ot-layout-gpos-table.hh
@@ -64,11 +64,8 @@
   c->set_lookup_props (l.get_props ());
 
   bool ret = false;
-  if (lookup_index < gpos->lookup_count)
-  {
-    auto &accel = gpos->accels[lookup_index];
-    ret = accel.apply (c, l.get_subtable_count (), false);
-  }
+  auto *accel = gpos->get_accel (lookup_index);
+  ret = accel && accel->apply (c, l.get_subtable_count (), false);
 
   c->set_lookup_index (saved_lookup_index);
   c->set_lookup_props (saved_lookup_props);
diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh
index 6a3d12c..fd8a68b 100644
--- a/src/hb-ot-layout-gsub-table.hh
+++ b/src/hb-ot-layout-gsub-table.hh
@@ -77,11 +77,8 @@
   c->set_lookup_props (l.get_props ());
 
   bool ret = false;
-  if (lookup_index < gsub->lookup_count)
-  {
-    auto &accel = gsub->accels[lookup_index];
-    ret = accel.apply (c, l.get_subtable_count (), false);
-  }
+  auto *accel = gsub->get_accel (lookup_index);
+  ret = accel && accel->apply (c, l.get_subtable_count (), false);
 
   c->set_lookup_index (saved_lookup_index);
   c->set_lookup_props (saved_lookup_props);
diff --git a/src/hb-ot-layout-gsubgpos.hh b/src/hb-ot-layout-gsubgpos.hh
index 1b9be91..8d0f400 100644
--- a/src/hb-ot-layout-gsubgpos.hh
+++ b/src/hb-ot-layout-gsubgpos.hh
@@ -4013,37 +4013,40 @@
 struct hb_ot_layout_lookup_accelerator_t
 {
   template <typename TLookup>
-  void init (const TLookup &lookup)
+  static hb_ot_layout_lookup_accelerator_t *create (const TLookup &lookup)
   {
     unsigned count = lookup.get_subtable_count ();
-    subtables = (hb_accelerate_subtables_context_t::hb_applicable_t *)
-		hb_calloc (count, sizeof (hb_accelerate_subtables_context_t::hb_applicable_t));
-    if (unlikely (!subtables))
-      return;
 
-    hb_accelerate_subtables_context_t c_accelerate_subtables (subtables);
+    unsigned size = sizeof (hb_ot_layout_lookup_accelerator_t) -
+		    HB_VAR_ARRAY * sizeof (hb_accelerate_subtables_context_t::hb_applicable_t) +
+		    count * sizeof (hb_accelerate_subtables_context_t::hb_applicable_t);
+
+    auto *thiz = (hb_ot_layout_lookup_accelerator_t *) hb_calloc (1, size);
+    if (unlikely (!thiz))
+      return nullptr;
+
+    hb_accelerate_subtables_context_t c_accelerate_subtables (thiz->subtables);
     lookup.dispatch (&c_accelerate_subtables);
 
-    digest.init ();
-    for (auto& subtable : hb_iter (subtables, count))
-      digest.add (subtable.digest);
+    thiz->digest.init ();
+    for (auto& subtable : hb_iter (thiz->subtables, count))
+      thiz->digest.add (subtable.digest);
 
 #ifndef HB_NO_OT_LAYOUT_LOOKUP_CACHE
-    cache_user_idx = c_accelerate_subtables.cache_user_idx;
+    thiz->cache_user_idx = c_accelerate_subtables.cache_user_idx;
     for (unsigned i = 0; i < count; i++)
-      if (i != cache_user_idx)
-	subtables[i].apply_cached_func = subtables[i].apply_func;
+      if (i != thiz->cache_user_idx)
+	thiz->subtables[i].apply_cached_func = thiz->subtables[i].apply_func;
 #endif
+
+    return thiz;
   }
-  void fini () { hb_free (subtables); }
 
   bool may_have (hb_codepoint_t g) const
   { return digest.may_have (g); }
 
   bool apply (hb_ot_apply_context_t *c, unsigned subtables_count, bool use_cache) const
   {
-     if (unlikely (!subtables)) return false;
-
 #ifndef HB_NO_OT_LAYOUT_LOOKUP_CACHE
     if (use_cache)
     {
@@ -4084,10 +4087,10 @@
 
   hb_set_digest_t digest;
   private:
-  hb_accelerate_subtables_context_t::hb_applicable_t *subtables;
 #ifndef HB_NO_OT_LAYOUT_LOOKUP_CACHE
   unsigned cache_user_idx = (unsigned) -1;
 #endif
+  hb_accelerate_subtables_context_t::hb_applicable_t subtables[HB_VAR_ARRAY];
 };
 
 template <typename Types>
@@ -4450,28 +4453,47 @@
 
       this->lookup_count = table->get_lookup_count ();
 
-      this->accels = (hb_ot_layout_lookup_accelerator_t *) hb_calloc (this->lookup_count, sizeof (hb_ot_layout_lookup_accelerator_t));
+      this->accels = (hb_atomic_ptr_t<hb_ot_layout_lookup_accelerator_t> *) hb_calloc (this->lookup_count, sizeof (*accels));
       if (unlikely (!this->accels))
       {
 	this->lookup_count = 0;
 	this->table.destroy ();
 	this->table = hb_blob_get_empty ();
       }
-
-      for (unsigned int i = 0; i < this->lookup_count; i++)
-	this->accels[i].init (table->get_lookup (i));
     }
     ~accelerator_t ()
     {
       for (unsigned int i = 0; i < this->lookup_count; i++)
-	this->accels[i].fini ();
+	hb_free (this->accels[i]);
       hb_free (this->accels);
       this->table.destroy ();
     }
 
+    hb_ot_layout_lookup_accelerator_t *get_accel (unsigned lookup_index) const
+    {
+      if (unlikely (lookup_index >= lookup_count)) return nullptr;
+
+    retry:
+      auto *accel = accels[lookup_index].get_acquire ();
+      if (unlikely (!accel))
+      {
+	accel = hb_ot_layout_lookup_accelerator_t::create (table->get_lookup (lookup_index));
+	if (unlikely (!accel))
+	  return nullptr;
+
+	if (unlikely (!accels[lookup_index].cmpexch (nullptr, accel)))
+	{
+	  hb_free (accel);
+	  goto retry;
+	}
+      }
+
+      return accel;
+    }
+
     hb_blob_ptr_t<T> table;
     unsigned int lookup_count;
-    hb_ot_layout_lookup_accelerator_t *accels;
+    hb_atomic_ptr_t<hb_ot_layout_lookup_accelerator_t> *accels;
   };
 
   protected:
diff --git a/src/hb-ot-layout.cc b/src/hb-ot-layout.cc
index dcaf62d..44c1ea6 100644
--- a/src/hb-ot-layout.cc
+++ b/src/hb-ot-layout.cc
@@ -1487,11 +1487,13 @@
 				      unsigned int          glyphs_length,
 				      hb_bool_t             zero_context)
 {
-  if (unlikely (lookup_index >= face->table.GSUB->lookup_count)) return false;
+  auto &gsub = face->table.GSUB;
+  if (unlikely (lookup_index >= gsub->lookup_count)) return false;
   OT::hb_would_apply_context_t c (face, glyphs, glyphs_length, (bool) zero_context);
 
-  const OT::SubstLookup& l = face->table.GSUB->table->get_lookup (lookup_index);
-  return l.would_apply (&c, &face->table.GSUB->accels[lookup_index]);
+  const OT::SubstLookup& l = gsub->table->get_lookup (lookup_index);
+  auto *accel = gsub->get_accel (lookup_index);
+  return accel && l.would_apply (&c, accel);
 }
 
 
@@ -1830,11 +1832,9 @@
   typedef OT::SubstLookup Lookup;
 
   GSUBProxy (hb_face_t *face) :
-    table (*face->table.GSUB->table),
-    accels (face->table.GSUB->accels) {}
+    accel (*face->table.GSUB) {}
 
-  const GSUB &table;
-  const OT::hb_ot_layout_lookup_accelerator_t *accels;
+  const GSUB::accelerator_t &accel;
 };
 
 struct GPOSProxy
@@ -1844,11 +1844,9 @@
   typedef OT::PosLookup Lookup;
 
   GPOSProxy (hb_face_t *face) :
-    table (*face->table.GPOS->table),
-    accels (face->table.GPOS->accels) {}
+    accel (*face->table.GPOS) {}
 
-  const GPOS &table;
-  const OT::hb_ot_layout_lookup_accelerator_t *accels;
+  const GPOS::accelerator_t &accel;
 };
 
 
@@ -1911,12 +1909,13 @@
 	      const typename Proxy::Lookup &lookup,
 	      const OT::hb_ot_layout_lookup_accelerator_t &accel)
 {
-  bool ret = false;
   hb_buffer_t *buffer = c->buffer;
   unsigned subtable_count = lookup.get_subtable_count ();
 
   if (unlikely (!buffer->len || !c->lookup_mask))
-    return ret;
+    return false;
+
+  bool ret = false;
 
   c->set_lookup_props (lookup.get_props ());
 
@@ -1962,6 +1961,10 @@
       auto &lookup = lookups[table_index][i];
 
       unsigned int lookup_index = lookup.index;
+
+      auto *accel = proxy.accel.get_accel (lookup_index);
+      if (unlikely (!accel)) continue;
+
       if (buffer->messaging () &&
 	  !buffer->message (font, "start lookup %u feature '%c%c%c%c'", lookup_index, HB_UNTAG (lookup.feature_tag))) continue;
 
@@ -1969,7 +1972,7 @@
        * (plus some past glyphs).
        *
        * Only try applying the lookup if there is any overlap. */
-      if (proxy.accels[lookup_index].digest.may_have (c.digest))
+      if (accel->digest.may_have (c.digest))
       {
 	c.set_lookup_index (lookup_index);
 	c.set_lookup_mask (lookup.mask);
@@ -1979,8 +1982,8 @@
 	c.set_per_syllable (lookup.per_syllable);
 
 	apply_string<Proxy> (&c,
-			     proxy.table.get_lookup (lookup_index),
-			     proxy.accels[lookup_index]);
+			     proxy.accel.table->get_lookup (lookup_index),
+			     *accel);
       }
       else if (buffer->messaging ())
 	(void) buffer->message (font, "skipped lookup %u feature '%c%c%c%c' because no glyph matches", lookup_index, HB_UNTAG (lookup.feature_tag));
diff --git a/src/hb-ot-shaper-arabic-fallback.hh b/src/hb-ot-shaper-arabic-fallback.hh
index 6ba1ec7..e7a6900 100644
--- a/src/hb-ot-shaper-arabic-fallback.hh
+++ b/src/hb-ot-shaper-arabic-fallback.hh
@@ -228,7 +228,7 @@
 
   hb_mask_t mask_array[ARABIC_FALLBACK_MAX_LOOKUPS];
   OT::SubstLookup *lookup_array[ARABIC_FALLBACK_MAX_LOOKUPS];
-  OT::hb_ot_layout_lookup_accelerator_t accel_array[ARABIC_FALLBACK_MAX_LOOKUPS];
+  OT::hb_ot_layout_lookup_accelerator_t *accel_array[ARABIC_FALLBACK_MAX_LOOKUPS];
 };
 
 #if defined(_WIN32) && !defined(HB_NO_WIN1256)
@@ -278,7 +278,7 @@
       fallback_plan->lookup_array[j] = const_cast<OT::SubstLookup*> (&(&manifest+manifest[i].lookupOffset));
       if (fallback_plan->lookup_array[j])
       {
-	fallback_plan->accel_array[j].init (*fallback_plan->lookup_array[j]);
+	fallback_plan->accel_array[j] = OT::hb_ot_layout_lookup_accelerator_t::create (*fallback_plan->lookup_array[j]);
 	j++;
       }
     }
@@ -308,7 +308,7 @@
       fallback_plan->lookup_array[j] = arabic_fallback_synthesize_lookup (plan, font, i);
       if (fallback_plan->lookup_array[j])
       {
-	fallback_plan->accel_array[j].init (*fallback_plan->lookup_array[j]);
+	fallback_plan->accel_array[j] = OT::hb_ot_layout_lookup_accelerator_t::create (*fallback_plan->lookup_array[j]);
 	j++;
       }
     }
@@ -355,7 +355,7 @@
   for (unsigned int i = 0; i < fallback_plan->num_lookups; i++)
     if (fallback_plan->lookup_array[i])
     {
-      fallback_plan->accel_array[i].fini ();
+      hb_free (fallback_plan->accel_array[i]);
       if (fallback_plan->free_lookups)
 	hb_free (fallback_plan->lookup_array[i]);
     }
@@ -372,9 +372,10 @@
   for (unsigned int i = 0; i < fallback_plan->num_lookups; i++)
     if (fallback_plan->lookup_array[i]) {
       c.set_lookup_mask (fallback_plan->mask_array[i]);
-      hb_ot_layout_substitute_lookup (&c,
-				      *fallback_plan->lookup_array[i],
-				      fallback_plan->accel_array[i]);
+      if (fallback_plan->accel_array[i])
+	hb_ot_layout_substitute_lookup (&c,
+					*fallback_plan->lookup_array[i],
+					*fallback_plan->accel_array[i]);
     }
 }