Update upb for php. (#2662)

diff --git a/php/ext/google/protobuf/def.c b/php/ext/google/protobuf/def.c
index 6ea2cc9..0ce39ec 100644
--- a/php/ext/google/protobuf/def.c
+++ b/php/ext/google/protobuf/def.c
@@ -209,14 +209,14 @@
 
 static void descriptor_pool_init_c_instance(DescriptorPool *pool TSRMLS_DC) {
   zend_object_std_init(&pool->std, descriptor_pool_type TSRMLS_CC);
-  pool->symtab = upb_symtab_new(&pool->symtab);
+  pool->symtab = upb_symtab_new();
 
   ALLOC_HASHTABLE(pool->pending_list);
   zend_hash_init(pool->pending_list, 1, NULL, ZVAL_PTR_DTOR, 0);
 }
 
 static void descriptor_pool_free_c(DescriptorPool *pool TSRMLS_DC) {
-  upb_symtab_unref(pool->symtab, &pool->symtab);
+  upb_symtab_free(pool->symtab);
 
   zend_hash_destroy(pool->pending_list);
   FREE_HASHTABLE(pool->pending_list);
diff --git a/php/ext/google/protobuf/encode_decode.c b/php/ext/google/protobuf/encode_decode.c
index eafe1ae..32a0fbe 100644
--- a/php/ext/google/protobuf/encode_decode.c
+++ b/php/ext/google/protobuf/encode_decode.c
@@ -675,7 +675,7 @@
     case UPB_TYPE_INT64:
     case UPB_TYPE_UINT64:
     case UPB_TYPE_DOUBLE:
-      upb_shim_set(h, f, offset, -1);
+      upb_msg_setscalarhandler(h, f, offset, -1);
       break;
     case UPB_TYPE_STRING:
     case UPB_TYPE_BYTES: {
diff --git a/php/ext/google/protobuf/upb.c b/php/ext/google/protobuf/upb.c
index 98daafc..e0c56f8 100644
--- a/php/ext/google/protobuf/upb.c
+++ b/php/ext/google/protobuf/upb.c
@@ -1,33 +1,4 @@
-// Protocol Buffers - Google's data interchange format
-// Copyright 2008 Google Inc.  All rights reserved.
-// https://developers.google.com/protocol-buffers/
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
+// Amalgamated source file
 #include "upb.h"
 
 
@@ -150,21 +121,6 @@
 
 const upb_filedef *upb_def_file(const upb_def *d) { return d->file; }
 
-upb_def *upb_def_dup(const upb_def *def, const void *o) {
-  switch (def->type) {
-    case UPB_DEF_MSG:
-      return upb_msgdef_upcast_mutable(
-          upb_msgdef_dup(upb_downcast_msgdef(def), o));
-    case UPB_DEF_FIELD:
-      return upb_fielddef_upcast_mutable(
-          upb_fielddef_dup(upb_downcast_fielddef(def), o));
-    case UPB_DEF_ENUM:
-      return upb_enumdef_upcast_mutable(
-          upb_enumdef_dup(upb_downcast_enumdef(def), o));
-    default: UPB_ASSERT(false); return NULL;
-  }
-}
-
 static bool upb_def_init(upb_def *def, upb_deftype_t type,
                          const struct upb_refcounted_vtbl *vtbl,
                          const void *owner) {
@@ -311,6 +267,7 @@
   /* Sort fields.  upb internally relies on UPB_TYPE_MESSAGE fields having the
    * lowest indexes, but we do not publicly guarantee this. */
   upb_msg_field_iter j;
+  upb_msg_oneof_iter k;
   int i;
   uint32_t selector;
   int n = upb_msgdef_numfields(m);
@@ -395,6 +352,13 @@
 #undef TRY
 #endif
 
+  for(upb_msg_oneof_begin(&k, m), i = 0;
+      !upb_msg_oneof_done(&k);
+      upb_msg_oneof_next(&k), i++) {
+    upb_oneofdef *o = upb_msg_iter_oneof(&k);
+    o->index = i;
+  }
+
   upb_gfree(fields);
   return true;
 }
@@ -512,21 +476,6 @@
   return NULL;
 }
 
-upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) {
-  upb_enum_iter i;
-  upb_enumdef *new_e = upb_enumdef_new(owner);
-  if (!new_e) return NULL;
-  for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
-    bool success = upb_enumdef_addval(
-        new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL);
-    if (!success) {
-      upb_enumdef_unref(new_e, owner);
-      return NULL;
-    }
-  }
-  return new_e;
-}
-
 bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
   upb_def *d = upb_enumdef_upcast_mutable(e);
   return upb_def_freeze(&d, 1, status);
@@ -761,7 +710,8 @@
   return f;
 }
 
-upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) {
+static upb_fielddef *upb_fielddef_dup(const upb_fielddef *f,
+                                      const void *owner) {
   const char *srcname;
   upb_fielddef *newf = upb_fielddef_new(owner);
   if (!newf) return NULL;
@@ -1476,7 +1426,9 @@
   return NULL;
 }
 
-upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
+static upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner);
+
+static upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
   bool ok;
   upb_msg_field_iter i;
   upb_msg_oneof_iter o;
@@ -1812,7 +1764,8 @@
   return NULL;
 }
 
-upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) {
+static upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o,
+                                      const void *owner) {
   bool ok;
   upb_oneof_iter i;
   upb_oneofdef *newo = upb_oneofdef_new(owner);
@@ -1861,6 +1814,10 @@
   return upb_strtable_count(&o->ntof);
 }
 
+uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
+  return o->index;
+}
+
 bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
                            const void *ref_donor,
                            upb_status *s) {
@@ -2156,6 +2113,354 @@
     return false;
   }
 }
+
+void upb_symtab_free(upb_symtab *s) {
+  upb_strtable_iter i;
+  upb_strtable_begin(&i, &s->symtab);
+  for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
+    upb_def_unref(def, s);
+  }
+  upb_strtable_uninit(&s->symtab);
+  upb_gfree(s);
+}
+
+upb_symtab *upb_symtab_new() {
+  upb_symtab *s = upb_gmalloc(sizeof(*s));
+  if (!s) {
+    return NULL;
+  }
+
+  upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
+  return s;
+}
+
+const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
+  upb_value v;
+  upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
+      upb_value_getptr(v) : NULL;
+  return ret;
+}
+
+const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
+  upb_value v;
+  upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
+      upb_value_getptr(v) : NULL;
+  return def ? upb_dyncast_msgdef(def) : NULL;
+}
+
+const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
+  upb_value v;
+  upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
+      upb_value_getptr(v) : NULL;
+  return def ? upb_dyncast_enumdef(def) : NULL;
+}
+
+/* Given a symbol and the base symbol inside which it is defined, find the
+ * symbol's definition in t. */
+static upb_def *upb_resolvename(const upb_strtable *t,
+                                const char *base, const char *sym) {
+  if(strlen(sym) == 0) return NULL;
+  if(sym[0] == '.') {
+    /* Symbols starting with '.' are absolute, so we do a single lookup.
+     * Slice to omit the leading '.' */
+    upb_value v;
+    return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
+  } else {
+    /* Remove components from base until we find an entry or run out.
+     * TODO: This branch is totally broken, but currently not used. */
+    (void)base;
+    UPB_ASSERT(false);
+    return NULL;
+  }
+}
+
+const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
+                                  const char *sym) {
+  upb_def *ret = upb_resolvename(&s->symtab, base, sym);
+  return ret;
+}
+
+/* TODO(haberman): we need a lot more testing of error conditions. */
+static bool symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
+                       void *ref_donor, upb_refcounted *freeze_also,
+                       upb_status *status) {
+  size_t i;
+  size_t add_n;
+  size_t freeze_n;
+  upb_strtable_iter iter;
+  upb_refcounted **add_objs = NULL;
+  upb_def **add_defs = NULL;
+  size_t add_objs_size;
+  upb_strtable addtab;
+
+  if (n == 0 && !freeze_also) {
+    return true;
+  }
+
+  if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
+    upb_status_seterrmsg(status, "out of memory");
+    return false;
+  }
+
+  /* Add new defs to our "add" set. */
+  for (i = 0; i < n; i++) {
+    upb_def *def = defs[i];
+    const char *fullname;
+    upb_fielddef *f;
+
+    if (upb_def_isfrozen(def)) {
+      upb_status_seterrmsg(status, "added defs must be mutable");
+      goto err;
+    }
+    UPB_ASSERT(!upb_def_isfrozen(def));
+    fullname = upb_def_fullname(def);
+    if (!fullname) {
+      upb_status_seterrmsg(
+          status, "Anonymous defs cannot be added to a symtab");
+      goto err;
+    }
+
+    f = upb_dyncast_fielddef_mutable(def);
+
+    if (f) {
+      if (!upb_fielddef_containingtypename(f)) {
+        upb_status_seterrmsg(status,
+                             "Standalone fielddefs must have a containing type "
+                             "(extendee) name set");
+        goto err;
+      }
+    } else {
+      if (upb_strtable_lookup(&addtab, fullname, NULL)) {
+        upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
+        goto err;
+      }
+      if (upb_strtable_lookup(&s->symtab, fullname, NULL)) {
+        upb_status_seterrf(status, "Symtab already has a def named '%s'",
+                           fullname);
+        goto err;
+      }
+      upb_def_donateref(def, ref_donor, s);
+      if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
+        goto oom_err;
+      def->came_from_user = true;
+    }
+  }
+
+  /* Add standalone fielddefs (ie. extensions) to the appropriate messages.
+   * If the appropriate message only exists in the existing symtab, duplicate
+   * it so we have a mutable copy we can add the fields to. */
+  for (i = 0; i < n; i++) {
+    upb_def *def = defs[i];
+    upb_fielddef *f = upb_dyncast_fielddef_mutable(def);
+    const char *msgname;
+    upb_value v;
+    upb_msgdef *m;
+
+    if (!f) continue;
+    msgname = upb_fielddef_containingtypename(f);
+    /* We validated this earlier in this function. */
+    UPB_ASSERT(msgname);
+
+    /* If the extendee name is absolutely qualified, move past the initial ".".
+     * TODO(haberman): it is not obvious what it would mean if this was not
+     * absolutely qualified. */
+    if (msgname[0] == '.') {
+      msgname++;
+    }
+
+    if (upb_strtable_lookup(&addtab, msgname, &v)) {
+      /* Extendee is in the set of defs the user asked us to add. */
+      m = upb_value_getptr(v);
+    } else {
+      /* Need to find and dup the extendee from the existing symtab. */
+      const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname);
+      if (!frozen_m) {
+        upb_status_seterrf(status,
+                           "Tried to extend message %s that does not exist "
+                           "in this SymbolTable.",
+                           msgname);
+        goto err;
+      }
+      m = upb_msgdef_dup(frozen_m, s);
+      if (!m) goto oom_err;
+      if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) {
+        upb_msgdef_unref(m, s);
+        goto oom_err;
+      }
+    }
+
+    if (!upb_msgdef_addfield(m, f, ref_donor, status)) {
+      goto err;
+    }
+  }
+
+  /* Now using the table, resolve symbolic references for subdefs. */
+  upb_strtable_begin(&iter, &addtab);
+  for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
+    const char *base;
+    upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
+    upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
+    upb_msg_field_iter j;
+
+    if (!m) continue;
+    /* Type names are resolved relative to the message in which they appear. */
+    base = upb_msgdef_fullname(m);
+
+    for(upb_msg_field_begin(&j, m);
+        !upb_msg_field_done(&j);
+        upb_msg_field_next(&j)) {
+      upb_fielddef *f = upb_msg_iter_field(&j);
+      const char *name = upb_fielddef_subdefname(f);
+      if (name && !upb_fielddef_subdef(f)) {
+        /* Try the lookup in the current set of to-be-added defs first. If not
+         * there, try existing defs. */
+        upb_def *subdef = upb_resolvename(&addtab, base, name);
+        if (subdef == NULL) {
+          subdef = upb_resolvename(&s->symtab, base, name);
+        }
+        if (subdef == NULL) {
+          upb_status_seterrf(
+              status, "couldn't resolve name '%s' in message '%s'", name, base);
+          goto err;
+        } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
+          goto err;
+        }
+      }
+    }
+  }
+
+  /* We need an array of the defs in addtab, for passing to
+   * upb_refcounted_freeze(). */
+  add_objs_size = upb_strtable_count(&addtab);
+  if (freeze_also) {
+    add_objs_size++;
+  }
+
+  add_defs = upb_gmalloc(sizeof(void*) * add_objs_size);
+  if (add_defs == NULL) goto oom_err;
+  upb_strtable_begin(&iter, &addtab);
+  for (add_n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
+    add_defs[add_n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
+  }
+
+  /* Validate defs. */
+  if (!_upb_def_validate(add_defs, add_n, status)) {
+    goto err;
+  }
+
+  /* Cheat a little and give the array a new type.
+   * This is probably undefined behavior, but this code will be deleted soon. */
+  add_objs = (upb_refcounted**)add_defs;
+
+  freeze_n = add_n;
+  if (freeze_also) {
+    add_objs[freeze_n++] = freeze_also;
+  }
+
+  if (!upb_refcounted_freeze(add_objs, freeze_n, status,
+                             UPB_MAX_MESSAGE_DEPTH * 2)) {
+    goto err;
+  }
+
+  /* This must be delayed until all errors have been detected, since error
+   * recovery code uses this table to cleanup defs. */
+  upb_strtable_uninit(&addtab);
+
+  /* TODO(haberman) we don't properly handle errors after this point (like
+   * OOM in upb_strtable_insert() below). */
+  for (i = 0; i < add_n; i++) {
+    upb_def *def = (upb_def*)add_objs[i];
+    const char *name = upb_def_fullname(def);
+    upb_value v;
+    bool success;
+
+    if (upb_strtable_remove(&s->symtab, name, &v)) {
+      const upb_def *def = upb_value_getptr(v);
+      upb_def_unref(def, s);
+    }
+    success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
+    UPB_ASSERT(success == true);
+  }
+  upb_gfree(add_defs);
+  return true;
+
+oom_err:
+  upb_status_seterrmsg(status, "out of memory");
+err: {
+    /* We need to donate the refs back. */
+    upb_strtable_begin(&iter, &addtab);
+    for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
+      upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
+      upb_def_donateref(def, s, ref_donor);
+    }
+  }
+  upb_strtable_uninit(&addtab);
+  upb_gfree(add_defs);
+  UPB_ASSERT(!upb_ok(status));
+  return false;
+}
+
+bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
+                    void *ref_donor, upb_status *status) {
+  return symtab_add(s, defs, n, ref_donor, NULL, status);
+}
+
+bool upb_symtab_addfile(upb_symtab *s, upb_filedef *file, upb_status *status) {
+  size_t n;
+  size_t i;
+  upb_def **defs;
+  bool ret;
+
+  n = upb_filedef_defcount(file);
+  defs = upb_gmalloc(sizeof(*defs) * n);
+
+  if (defs == NULL) {
+    upb_status_seterrmsg(status, "Out of memory");
+    return false;
+  }
+
+  for (i = 0; i < n; i++) {
+    defs[i] = upb_filedef_mutabledef(file, i);
+  }
+
+  ret = symtab_add(s, defs, n, NULL, upb_filedef_upcast_mutable(file), status);
+
+  upb_gfree(defs);
+  return ret;
+}
+
+/* Iteration. */
+
+static void advance_to_matching(upb_symtab_iter *iter) {
+  if (iter->type == UPB_DEF_ANY)
+    return;
+
+  while (!upb_strtable_done(&iter->iter) &&
+         iter->type != upb_symtab_iter_def(iter)->type) {
+    upb_strtable_next(&iter->iter);
+  }
+}
+
+void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
+                      upb_deftype_t type) {
+  upb_strtable_begin(&iter->iter, &s->symtab);
+  iter->type = type;
+  advance_to_matching(iter);
+}
+
+void upb_symtab_next(upb_symtab_iter *iter) {
+  upb_strtable_next(&iter->iter);
+  advance_to_matching(iter);
+}
+
+bool upb_symtab_done(const upb_symtab_iter *iter) {
+  return upb_strtable_done(&iter->iter);
+}
+
+const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
+  return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
+}
 /*
 ** TODO(haberman): it's unclear whether a lot of the consistency checks should
 ** UPB_ASSERT() or return false.
@@ -2851,6 +3156,1216 @@
   h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d;
   return true;
 }
+
+
+static bool is_power_of_two(size_t val) {
+  return (val & (val - 1)) == 0;
+}
+
+/* Align up to the given power of 2. */
+static size_t align_up(size_t val, size_t align) {
+  UPB_ASSERT(is_power_of_two(align));
+  return (val + align - 1) & ~(align - 1);
+}
+
+static size_t div_round_up(size_t n, size_t d) {
+  return (n + d - 1) / d;
+}
+
+bool upb_fieldtype_mapkeyok(upb_fieldtype_t type) {
+  return type == UPB_TYPE_BOOL || type == UPB_TYPE_INT32 ||
+         type == UPB_TYPE_UINT32 || type == UPB_TYPE_INT64 ||
+         type == UPB_TYPE_UINT64 || type == UPB_TYPE_STRING;
+}
+
+void *upb_array_pack(const upb_array *arr, void *p, size_t *ofs, size_t size);
+void *upb_map_pack(const upb_map *map, void *p, size_t *ofs, size_t size);
+
+#define CHARPTR_AT(msg, ofs) ((char*)msg + ofs)
+#define ENCODE_MAX_NESTING 64
+#define CHECK_TRUE(x) if (!(x)) { return false; }
+
+/** upb_msgval ****************************************************************/
+
+#define upb_alignof(t) offsetof(struct { char c; t x; }, x)
+
+/* These functions will generate real memcpy() calls on ARM sadly, because
+ * the compiler assumes they might not be aligned. */
+
+static upb_msgval upb_msgval_read(const void *p, size_t ofs,
+                                  uint8_t size) {
+  upb_msgval val;
+  p = (char*)p + ofs;
+  memcpy(&val, p, size);
+  return val;
+}
+
+static void upb_msgval_write(void *p, size_t ofs, upb_msgval val,
+                             uint8_t size) {
+  p = (char*)p + ofs;
+  memcpy(p, &val, size);
+}
+
+static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
+  switch (type) {
+    case UPB_TYPE_DOUBLE:
+    case UPB_TYPE_INT64:
+    case UPB_TYPE_UINT64:
+      return 8;
+    case UPB_TYPE_ENUM:
+    case UPB_TYPE_INT32:
+    case UPB_TYPE_UINT32:
+    case UPB_TYPE_FLOAT:
+      return 4;
+    case UPB_TYPE_BOOL:
+      return 1;
+    case UPB_TYPE_BYTES:
+    case UPB_TYPE_MESSAGE:
+      return sizeof(void*);
+    case UPB_TYPE_STRING:
+      return sizeof(char*) + sizeof(size_t);
+  }
+  UPB_UNREACHABLE();
+}
+
+static uint8_t upb_msg_fieldsize(const upb_fielddef *f) {
+  if (upb_fielddef_isseq(f)) {
+    return sizeof(void*);
+  } else {
+    return upb_msgval_sizeof(upb_fielddef_type(f));
+  }
+}
+
+/* TODO(haberman): this is broken right now because upb_msgval can contain
+ * a char* / size_t pair, which is too big for a upb_value.  To fix this
+ * we'll probably need to dynamically allocate a upb_msgval and store a
+ * pointer to that in the tables for extensions/maps. */
+static upb_value upb_toval(upb_msgval val) {
+  upb_value ret;
+  UPB_UNUSED(val);
+  memset(&ret, 0, sizeof(upb_value));  /* XXX */
+  return ret;
+}
+
+static upb_msgval upb_msgval_fromval(upb_value val) {
+  upb_msgval ret;
+  UPB_UNUSED(val);
+  memset(&ret, 0, sizeof(upb_msgval));  /* XXX */
+  return ret;
+}
+
+static upb_ctype_t upb_fieldtotabtype(upb_fieldtype_t type) {
+  switch (type) {
+    case UPB_TYPE_FLOAT: return UPB_CTYPE_FLOAT;
+    case UPB_TYPE_DOUBLE: return UPB_CTYPE_DOUBLE;
+    case UPB_TYPE_BOOL: return UPB_CTYPE_BOOL;
+    case UPB_TYPE_BYTES:
+    case UPB_TYPE_MESSAGE:
+    case UPB_TYPE_STRING: return UPB_CTYPE_CONSTPTR;
+    case UPB_TYPE_ENUM:
+    case UPB_TYPE_INT32: return UPB_CTYPE_INT32;
+    case UPB_TYPE_UINT32: return UPB_CTYPE_UINT32;
+    case UPB_TYPE_INT64: return UPB_CTYPE_INT64;
+    case UPB_TYPE_UINT64: return UPB_CTYPE_UINT64;
+    default: UPB_ASSERT(false); return 0;
+  }
+}
+
+static upb_msgval upb_msgval_fromdefault(const upb_fielddef *f) {
+  /* TODO(haberman): improve/optimize this (maybe use upb_msgval in fielddef) */
+  switch (upb_fielddef_type(f)) {
+      case UPB_TYPE_FLOAT:
+        return upb_msgval_float(upb_fielddef_defaultfloat(f));
+      case UPB_TYPE_DOUBLE:
+        return upb_msgval_double(upb_fielddef_defaultdouble(f));
+      case UPB_TYPE_BOOL:
+        return upb_msgval_bool(upb_fielddef_defaultbool(f));
+      case UPB_TYPE_STRING:
+      case UPB_TYPE_BYTES: {
+        size_t len;
+        const char *ptr = upb_fielddef_defaultstr(f, &len);
+        return upb_msgval_str(ptr, len);
+      }
+      case UPB_TYPE_MESSAGE:
+        return upb_msgval_msg(NULL);
+      case UPB_TYPE_ENUM:
+      case UPB_TYPE_INT32:
+        return upb_msgval_int32(upb_fielddef_defaultint32(f));
+      case UPB_TYPE_UINT32:
+        return upb_msgval_uint32(upb_fielddef_defaultuint32(f));
+      case UPB_TYPE_INT64:
+        return upb_msgval_int64(upb_fielddef_defaultint64(f));
+      case UPB_TYPE_UINT64:
+        return upb_msgval_uint64(upb_fielddef_defaultuint64(f));
+      default:
+        UPB_ASSERT(false);
+        return upb_msgval_msg(NULL);
+  }
+}
+
+
+/** upb_msglayout *************************************************************/
+
+struct upb_msglayout {
+  upb_msgfactory *factory;
+  const upb_msgdef *msgdef;
+  size_t size;
+  size_t extdict_offset;
+  void *default_msg;
+  uint32_t *field_offsets;
+  uint32_t *case_offsets;
+  uint32_t *hasbits;
+  bool has_extdict;
+  uint8_t align;
+};
+
+static void upb_msg_checkfield(const upb_msglayout *l, const upb_fielddef *f) {
+  UPB_ASSERT(l->msgdef == upb_fielddef_containingtype(f));
+}
+
+static void upb_msglayout_free(upb_msglayout *l) {
+  upb_gfree(l->default_msg);
+  upb_gfree(l);
+}
+
+const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l) {
+  return l->msgdef;
+}
+
+static size_t upb_msglayout_place(upb_msglayout *l, size_t size) {
+  size_t ret;
+
+  l->size = align_up(l->size, size);
+  l->align = align_up(l->align, size);
+  ret = l->size;
+  l->size += size;
+  return ret;
+}
+
+static uint32_t upb_msglayout_offset(const upb_msglayout *l,
+                                     const upb_fielddef *f) {
+  return l->field_offsets[upb_fielddef_index(f)];
+}
+
+static uint32_t upb_msglayout_hasbit(const upb_msglayout *l,
+                                     const upb_fielddef *f) {
+  return l->hasbits[upb_fielddef_index(f)];
+}
+
+static bool upb_msglayout_initdefault(upb_msglayout *l) {
+  const upb_msgdef *m = l->msgdef;
+  upb_msg_field_iter it;
+
+  if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2 && l->size) {
+    /* Allocate default message and set default values in it. */
+    l->default_msg = upb_gmalloc(l->size);
+    if (!l->default_msg) {
+      return false;
+    }
+
+    memset(l->default_msg, 0, l->size);
+
+    for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
+         upb_msg_field_next(&it)) {
+      const upb_fielddef* f = upb_msg_iter_field(&it);
+
+      if (upb_fielddef_containingoneof(f)) {
+        continue;
+      }
+
+      if (!upb_fielddef_isstring(f) &&
+          !upb_fielddef_issubmsg(f) &&
+          !upb_fielddef_isseq(f)) {
+        upb_msg_set(l->default_msg, f, upb_msgval_fromdefault(f), l);
+      }
+    }
+  }
+
+  return true;
+}
+
+static upb_msglayout *upb_msglayout_new(const upb_msgdef *m) {
+  upb_msg_field_iter it;
+  upb_msg_oneof_iter oit;
+  upb_msglayout *l;
+  size_t hasbit;
+  size_t array_size = upb_msgdef_numfields(m) + upb_msgdef_numoneofs(m);
+
+  if (upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2) {
+    array_size += upb_msgdef_numfields(m);  /* hasbits. */
+  }
+
+  l = upb_gmalloc(sizeof(*l) + (sizeof(uint32_t) * array_size));
+  if (!l) return NULL;
+
+  memset(l, 0, sizeof(*l));
+
+  l->msgdef = m;
+  l->align = 1;
+  l->field_offsets = (uint32_t*)CHARPTR_AT(l, sizeof(*l));
+  l->case_offsets = l->field_offsets + upb_msgdef_numfields(m);
+  l->hasbits = l->case_offsets + upb_msgdef_numoneofs(m);
+
+  /* Allocate data offsets in three stages:
+   *
+   * 1. hasbits.
+   * 2. regular fields.
+   * 3. oneof fields.
+   *
+   * OPT: There is a lot of room for optimization here to minimize the size.
+   */
+
+  /* Allocate hasbits.  Start at sizeof(void*) for upb_alloc*. */
+  for (upb_msg_field_begin(&it, m), hasbit = sizeof(void*) * 8;
+       !upb_msg_field_done(&it);
+       upb_msg_field_next(&it)) {
+    const upb_fielddef* f = upb_msg_iter_field(&it);
+
+    if (upb_fielddef_haspresence(f) && !upb_fielddef_containingoneof(f)) {
+      l->hasbits[upb_fielddef_index(f)] = hasbit++;
+    }
+  }
+
+  /* Account for space used by hasbits. */
+  l->size = div_round_up(hasbit, 8);
+
+  /* Allocate non-oneof fields. */
+  for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
+       upb_msg_field_next(&it)) {
+    const upb_fielddef* f = upb_msg_iter_field(&it);
+    size_t field_size = upb_msg_fieldsize(f);
+    size_t index = upb_fielddef_index(f);
+
+
+    if (upb_fielddef_containingoneof(f)) {
+      /* Oneofs are handled separately below. */
+      continue;
+    }
+
+    l->field_offsets[index] = upb_msglayout_place(l, field_size);
+  }
+
+  /* Allocate oneof fields.  Each oneof field consists of a uint32 for the case
+   * and space for the actual data. */
+  for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
+       upb_msg_oneof_next(&oit)) {
+    const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit);
+    upb_oneof_iter fit;
+    size_t case_size = sizeof(uint32_t);  /* Could potentially optimize this. */
+    size_t field_size = 0;
+    size_t case_offset;
+    size_t val_offset;
+
+    /* Calculate field size: the max of all field sizes. */
+    for (upb_oneof_begin(&fit, oneof);
+         !upb_oneof_done(&fit);
+         upb_oneof_next(&fit)) {
+      const upb_fielddef* f = upb_oneof_iter_field(&fit);
+      field_size = UPB_MAX(field_size, upb_msg_fieldsize(f));
+    }
+
+    /* Align and allocate case offset. */
+    case_offset = upb_msglayout_place(l, case_size);
+    val_offset = upb_msglayout_place(l, field_size);
+
+    l->case_offsets[upb_oneofdef_index(oneof)] = case_offset;
+
+    /* Assign all fields in the oneof this same offset. */
+    for (upb_oneof_begin(&fit, oneof); !upb_oneof_done(&fit);
+         upb_oneof_next(&fit)) {
+      const upb_fielddef* f = upb_oneof_iter_field(&fit);
+      l->field_offsets[upb_fielddef_index(f)] = val_offset;
+    }
+  }
+
+  /* Size of the entire structure should be a multiple of its greatest
+   * alignment. */
+  l->size = align_up(l->size, l->align);
+
+  if (upb_msglayout_initdefault(l)) {
+    return l;
+  } else {
+    upb_msglayout_free(l);
+    return NULL;
+  }
+}
+
+upb_msgfactory *upb_msglayout_factory(const upb_msglayout *layout) {
+  return layout->factory;
+}
+
+
+/** upb_msgfactory ************************************************************/
+
+struct upb_msgfactory {
+  const upb_symtab *symtab;  /* We own a ref. */
+  upb_inttable layouts;
+  upb_inttable mergehandlers;
+};
+
+upb_msgfactory *upb_msgfactory_new(const upb_symtab *symtab) {
+  upb_msgfactory *ret = upb_gmalloc(sizeof(*ret));
+
+  ret->symtab = symtab;
+  upb_inttable_init(&ret->layouts, UPB_CTYPE_PTR);
+  upb_inttable_init(&ret->mergehandlers, UPB_CTYPE_CONSTPTR);
+
+  return ret;
+}
+
+void upb_msgfactory_free(upb_msgfactory *f) {
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, &f->layouts);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    upb_msglayout *l = upb_value_getptr(upb_inttable_iter_value(&i));
+    upb_msglayout_free(l);
+  }
+
+  upb_inttable_begin(&i, &f->mergehandlers);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    const upb_handlers *h = upb_value_getconstptr(upb_inttable_iter_value(&i));
+    upb_handlers_unref(h, f);
+  }
+
+  upb_inttable_uninit(&f->layouts);
+  upb_inttable_uninit(&f->mergehandlers);
+  upb_gfree(f);
+}
+
+const upb_symtab *upb_msgfactory_symtab(const upb_msgfactory *f) {
+  return f->symtab;
+}
+
+const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f,
+                                              const upb_msgdef *m) {
+  upb_value v;
+  UPB_ASSERT(upb_symtab_lookupmsg(f->symtab, upb_msgdef_fullname(m)) == m);
+  UPB_ASSERT(!upb_msgdef_mapentry(m));
+
+  if (upb_inttable_lookupptr(&f->layouts, m, &v)) {
+    UPB_ASSERT(upb_value_getptr(v));
+    return upb_value_getptr(v);
+  } else {
+    upb_msgfactory *mutable_f = (void*)f;
+    upb_msglayout *l = upb_msglayout_new(m);
+    upb_inttable_insertptr(&mutable_f->layouts, m, upb_value_ptr(l));
+    UPB_ASSERT(l);
+    l->factory = f;
+    return l;
+  }
+}
+
+/* Our handlers that we don't expose externally. */
+
+void *upb_msg_startstr(void *msg, const void *hd, size_t size_hint) {
+  uint32_t ofs = (uintptr_t)hd;
+  /* We pass NULL here because we know we can get away with it. */
+  upb_alloc *alloc = upb_msg_alloc(msg, NULL);
+  upb_msgval val;
+  UPB_UNUSED(size_hint);
+
+  val = upb_msgval_read(msg, ofs, upb_msgval_sizeof(UPB_TYPE_STRING));
+
+  upb_free(alloc, (void*)val.str.ptr);
+  val.str.ptr = NULL;
+  val.str.len = 0;
+
+  upb_msgval_write(msg, ofs, val, upb_msgval_sizeof(UPB_TYPE_STRING));
+  return msg;
+}
+
+size_t upb_msg_str(void *msg, const void *hd, const char *ptr, size_t size,
+                   const upb_bufhandle *handle) {
+  uint32_t ofs = (uintptr_t)hd;
+  /* We pass NULL here because we know we can get away with it. */
+  upb_alloc *alloc = upb_msg_alloc(msg, NULL);
+  upb_msgval val;
+  size_t newsize;
+  UPB_UNUSED(handle);
+
+  val = upb_msgval_read(msg, ofs, upb_msgval_sizeof(UPB_TYPE_STRING));
+
+  newsize = val.str.len + size;
+  val.str.ptr = upb_realloc(alloc, (void*)val.str.ptr, val.str.len, newsize);
+
+  if (!val.str.ptr) {
+    return false;
+  }
+
+  memcpy((char*)val.str.ptr + val.str.len, ptr, size);
+  val.str.len = newsize;
+  upb_msgval_write(msg, ofs, val, upb_msgval_sizeof(UPB_TYPE_STRING));
+  return size;
+}
+
+static void callback(const void *closure, upb_handlers *h) {
+  upb_msgfactory *factory = (upb_msgfactory*)closure;
+  const upb_msgdef *md = upb_handlers_msgdef(h);
+  const upb_msglayout* layout = upb_msgfactory_getlayout(factory, md);
+  upb_msg_field_iter i;
+  UPB_UNUSED(factory);
+
+  for(upb_msg_field_begin(&i, md);
+      !upb_msg_field_done(&i);
+      upb_msg_field_next(&i)) {
+    const upb_fielddef *f = upb_msg_iter_field(&i);
+    size_t offset = upb_msglayout_offset(layout, f);
+    upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+    upb_handlerattr_sethandlerdata(&attr, (void*)offset);
+
+    if (upb_fielddef_isseq(f)) {
+    } else if (upb_fielddef_isstring(f)) {
+      upb_handlers_setstartstr(h, f, upb_msg_startstr, &attr);
+      upb_handlers_setstring(h, f, upb_msg_str, &attr);
+    } else {
+      upb_msg_setscalarhandler(
+          h, f, offset, upb_msglayout_hasbit(layout, f));
+    }
+  }
+}
+
+const upb_handlers *upb_msgfactory_getmergehandlers(upb_msgfactory *f,
+                                                    const upb_msgdef *m) {
+  upb_msgfactory *mutable_f = (void*)f;
+
+  /* TODO(haberman): properly cache these. */
+  const upb_handlers *ret = upb_handlers_newfrozen(m, f, callback, f);
+  upb_inttable_push(&mutable_f->mergehandlers, upb_value_constptr(ret));
+
+  return ret;
+}
+
+const upb_visitorplan *upb_msgfactory_getvisitorplan(upb_msgfactory *f,
+                                                     const upb_handlers *h) {
+  const upb_msgdef *md = upb_handlers_msgdef(h);
+  return (const upb_visitorplan*)upb_msgfactory_getlayout(f, md);
+}
+
+
+/** upb_visitor ***************************************************************/
+
+struct upb_visitor {
+  const upb_msglayout *layout;
+  upb_sink *sink;
+};
+
+static upb_selector_t getsel2(const upb_fielddef *f, upb_handlertype_t type) {
+  upb_selector_t ret;
+  bool ok = upb_handlers_getselector(f, type, &ret);
+  UPB_ASSERT(ok);
+  return ret;
+}
+
+static bool upb_visitor_hasfield(const upb_msg *msg, const upb_fielddef *f,
+                                 const upb_msglayout *layout) {
+  if (upb_fielddef_isseq(f)) {
+    return upb_msgval_getarr(upb_msg_get(msg, f, layout)) != NULL;
+  } else if (upb_msgdef_syntax(upb_fielddef_containingtype(f)) ==
+             UPB_SYNTAX_PROTO2) {
+    return upb_msg_has(msg, f, layout);
+  } else {
+    upb_msgval val = upb_msg_get(msg, f, layout);
+    switch (upb_fielddef_type(f)) {
+      case UPB_TYPE_FLOAT:
+        return upb_msgval_getfloat(val) != 0;
+      case UPB_TYPE_DOUBLE:
+        return upb_msgval_getdouble(val) != 0;
+      case UPB_TYPE_BOOL:
+        return upb_msgval_getbool(val);
+      case UPB_TYPE_ENUM:
+      case UPB_TYPE_INT32:
+        return upb_msgval_getint32(val) != 0;
+      case UPB_TYPE_UINT32:
+        return upb_msgval_getuint32(val) != 0;
+      case UPB_TYPE_INT64:
+        return upb_msgval_getint64(val) != 0;
+      case UPB_TYPE_UINT64:
+        return upb_msgval_getuint64(val) != 0;
+      case UPB_TYPE_STRING:
+      case UPB_TYPE_BYTES:
+        return upb_msgval_getstr(val) && upb_msgval_getstrlen(val) > 0;
+      case UPB_TYPE_MESSAGE:
+        return upb_msgval_getmsg(val) != NULL;
+    }
+    UPB_UNREACHABLE();
+  }
+}
+
+static bool upb_visitor_visitmsg2(const upb_msg *msg,
+                                  const upb_msglayout *layout, upb_sink *sink,
+                                  int depth) {
+  const upb_msgdef *md = upb_msglayout_msgdef(layout);
+  upb_msg_field_iter i;
+  upb_status status;
+
+  upb_sink_startmsg(sink);
+
+  /* Protect against cycles (possible because users may freely reassign message
+   * and repeated fields) by imposing a maximum recursion depth. */
+  if (depth > ENCODE_MAX_NESTING) {
+    return false;
+  }
+
+  for (upb_msg_field_begin(&i, md);
+       !upb_msg_field_done(&i);
+       upb_msg_field_next(&i)) {
+    upb_fielddef *f = upb_msg_iter_field(&i);
+    upb_msgval val;
+
+    if (!upb_visitor_hasfield(msg, f, layout)) {
+      continue;
+    }
+
+    val = upb_msg_get(msg, f, layout);
+
+    if (upb_fielddef_isseq(f)) {
+      const upb_array *arr = upb_msgval_getarr(val);
+      UPB_ASSERT(arr);
+      /* TODO: putary(ary, f, sink, depth);*/
+    } else if (upb_fielddef_issubmsg(f)) {
+      const upb_map *map = upb_msgval_getmap(val);
+      UPB_ASSERT(map);
+      /* TODO: putmap(map, f, sink, depth);*/
+    } else if (upb_fielddef_isstring(f)) {
+      /* TODO putstr(); */
+    } else {
+      upb_selector_t sel = getsel2(f, upb_handlers_getprimitivehandlertype(f));
+      UPB_ASSERT(upb_fielddef_isprimitive(f));
+
+      switch (upb_fielddef_type(f)) {
+        case UPB_TYPE_FLOAT:
+          CHECK_TRUE(upb_sink_putfloat(sink, sel, upb_msgval_getfloat(val)));
+          break;
+        case UPB_TYPE_DOUBLE:
+          CHECK_TRUE(
+              upb_sink_putdouble(sink, sel, upb_msgval_getdouble(val)));
+          break;
+        case UPB_TYPE_BOOL:
+          CHECK_TRUE(upb_sink_putbool(sink, sel, upb_msgval_getbool(val)));
+          break;
+        case UPB_TYPE_ENUM:
+        case UPB_TYPE_INT32:
+          CHECK_TRUE(upb_sink_putint32(sink, sel, upb_msgval_getint32(val)));
+          break;
+        case UPB_TYPE_UINT32:
+          CHECK_TRUE(
+              upb_sink_putuint32(sink, sel, upb_msgval_getuint32(val)));
+          break;
+        case UPB_TYPE_INT64:
+          CHECK_TRUE(upb_sink_putint64(sink, sel, upb_msgval_getint64(val)));
+          break;
+        case UPB_TYPE_UINT64:
+          CHECK_TRUE(
+              upb_sink_putuint64(sink, sel, upb_msgval_getuint64(val)));
+          break;
+        case UPB_TYPE_STRING:
+        case UPB_TYPE_BYTES:
+        case UPB_TYPE_MESSAGE:
+          UPB_UNREACHABLE();
+      }
+    }
+  }
+
+  upb_sink_endmsg(sink, &status);
+  return true;
+}
+
+upb_visitor *upb_visitor_create(upb_env *e, const upb_visitorplan *vp,
+                                upb_sink *output) {
+  upb_visitor *visitor = upb_env_malloc(e, sizeof(*visitor));
+  visitor->layout = (const upb_msglayout*)vp;
+  visitor->sink = output;
+  return visitor;
+}
+
+bool upb_visitor_visitmsg(upb_visitor *visitor, const upb_msg *msg) {
+  return upb_visitor_visitmsg2(msg, visitor->layout, visitor->sink, 0);
+}
+
+
+/** upb_msg *******************************************************************/
+
+/* If we always read/write as a consistent type to each address, this shouldn't
+ * violate aliasing.
+ */
+#define DEREF(msg, ofs, type) *(type*)CHARPTR_AT(msg, ofs)
+
+static upb_inttable *upb_msg_trygetextdict(const upb_msg *msg,
+                                           const upb_msglayout *l) {
+  return l->has_extdict ? DEREF(msg, l->extdict_offset, upb_inttable*) : NULL;
+}
+
+static upb_inttable *upb_msg_getextdict(upb_msg *msg,
+                                        const upb_msglayout *l,
+                                        upb_alloc *a) {
+  upb_inttable *ext_dict;
+  UPB_ASSERT(l->has_extdict);
+
+  ext_dict = upb_msg_trygetextdict(msg, l);
+
+  if (!ext_dict) {
+    ext_dict = upb_malloc(a, sizeof(upb_inttable));
+
+    if (!ext_dict) {
+      return NULL;
+    }
+
+    /* Use an 8-byte type to ensure all bytes are copied. */
+    if (!upb_inttable_init2(ext_dict, UPB_CTYPE_INT64, a)) {
+      upb_free(a, ext_dict);
+      return NULL;
+    }
+
+    DEREF(msg, l->extdict_offset, upb_inttable*) = ext_dict;
+  }
+
+  return ext_dict;
+}
+
+static uint32_t upb_msg_getoneofint(const upb_msg *msg,
+                                    const upb_oneofdef *o,
+                                    const upb_msglayout *l) {
+  size_t oneof_ofs = l->case_offsets[upb_oneofdef_index(o)];
+  return DEREF(msg, oneof_ofs, uint8_t);
+}
+
+static void upb_msg_setoneofcase(const upb_msg *msg,
+                                 const upb_oneofdef *o,
+                                 const upb_msglayout *l,
+                                 uint32_t val) {
+  size_t oneof_ofs = l->case_offsets[upb_oneofdef_index(o)];
+  DEREF(msg, oneof_ofs, uint8_t) = val;
+}
+
+
+static bool upb_msg_oneofis(const upb_msg *msg, const upb_msglayout *l,
+                            const upb_oneofdef *o, const upb_fielddef *f) {
+  return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f);
+}
+
+size_t upb_msg_sizeof(const upb_msglayout *l) { return l->size; }
+
+void upb_msg_init(upb_msg *msg, const upb_msglayout *l, upb_alloc *a) {
+  if (l->default_msg) {
+    memcpy(msg, l->default_msg, l->size);
+  } else {
+    memset(msg, 0, l->size);
+  }
+
+  /* Set arena pointer. */
+  memcpy(msg, &a, sizeof(a));
+}
+
+void upb_msg_uninit(upb_msg *msg, const upb_msglayout *l) {
+  upb_inttable *ext_dict = upb_msg_trygetextdict(msg, l);
+  if (ext_dict) {
+    upb_inttable_uninit2(ext_dict, upb_msg_alloc(msg, l));
+  }
+}
+
+upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a) {
+  upb_msg *msg = upb_malloc(a, upb_msg_sizeof(l));
+
+  if (msg) {
+    upb_msg_init(msg, l, a);
+  }
+
+  return msg;
+}
+
+void upb_msg_free(upb_msg *msg, const upb_msglayout *l) {
+  upb_msg_uninit(msg, l);
+  upb_free(upb_msg_alloc(msg, l), msg);
+}
+
+upb_alloc *upb_msg_alloc(const upb_msg *msg, const upb_msglayout *l) {
+  upb_alloc *alloc;
+  UPB_UNUSED(l);
+  memcpy(&alloc, msg, sizeof(alloc));
+  return alloc;
+}
+
+bool upb_msg_has(const upb_msg *msg,
+                 const upb_fielddef *f,
+                 const upb_msglayout *l) {
+  const upb_oneofdef *o;
+  upb_msg_checkfield(l, f);
+  UPB_ASSERT(upb_fielddef_haspresence(f));
+
+  if (upb_fielddef_isextension(f)) {
+    /* Extensions are set when they are present in the extension dict. */
+    upb_inttable *ext_dict = upb_msg_trygetextdict(msg, l);
+    upb_value v;
+    return ext_dict != NULL &&
+           upb_inttable_lookup32(ext_dict, upb_fielddef_number(f), &v);
+  } else if ((o = upb_fielddef_containingoneof(f)) != NULL) {
+    /* Oneofs are set when the oneof number is set to this field. */
+    return upb_msg_getoneofint(msg, o, l) == upb_fielddef_number(f);
+  } else {
+    /* Other fields are set when their hasbit is set. */
+    uint32_t hasbit = l->hasbits[upb_fielddef_index(f)];
+    return DEREF(msg, hasbit / 8, char) | (1 << (hasbit % 8));
+  }
+}
+
+upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f,
+                       const upb_msglayout *l) {
+  upb_msg_checkfield(l, f);
+
+  if (upb_fielddef_isextension(f)) {
+    upb_inttable *ext_dict = upb_msg_trygetextdict(msg, l);
+    upb_value val;
+    if (upb_inttable_lookup32(ext_dict, upb_fielddef_number(f), &val)) {
+      return upb_msgval_fromval(val);
+    } else {
+      return upb_msgval_fromdefault(f);
+    }
+  } else {
+    size_t ofs = l->field_offsets[upb_fielddef_index(f)];
+    const upb_oneofdef *o = upb_fielddef_containingoneof(f);
+    upb_msgval ret;
+
+    if (o && !upb_msg_oneofis(msg, l, o, f)) {
+      /* Oneof defaults can't come from the message because the memory is reused
+       * by all types in the oneof. */
+      return upb_msgval_fromdefault(f);
+    }
+
+    ret = upb_msgval_read(msg, ofs, upb_msg_fieldsize(f));
+    return ret;
+  }
+}
+
+bool upb_msg_set(upb_msg *msg,
+                 const upb_fielddef *f,
+                 upb_msgval val,
+                 const upb_msglayout *l) {
+  upb_alloc *a = upb_msg_alloc(msg, l);
+  upb_msg_checkfield(l, f);
+
+  if (upb_fielddef_isextension(f)) {
+    /* TODO(haberman): introduce table API that can do this in one call. */
+    upb_inttable *ext = upb_msg_getextdict(msg, l, a);
+    upb_value val2 = upb_toval(val);
+    if (!upb_inttable_replace(ext, upb_fielddef_number(f), val2) &&
+        !upb_inttable_insert2(ext, upb_fielddef_number(f), val2, a)) {
+      return false;
+    }
+  } else {
+    size_t ofs = l->field_offsets[upb_fielddef_index(f)];
+    const upb_oneofdef *o = upb_fielddef_containingoneof(f);
+
+    if (o) {
+      upb_msg_setoneofcase(msg, o, l, upb_fielddef_number(f));
+    }
+
+    upb_msgval_write(msg, ofs, val, upb_msg_fieldsize(f));
+  }
+  return true;
+}
+
+
+/** upb_array *****************************************************************/
+
+struct upb_array {
+  upb_fieldtype_t type;
+  uint8_t element_size;
+  void *data;   /* Each element is element_size. */
+  size_t len;   /* Measured in elements. */
+  size_t size;  /* Measured in elements. */
+  upb_alloc *alloc;
+};
+
+#define DEREF_ARR(arr, i, type) ((type*)arr->data)[i]
+
+size_t upb_array_sizeof(upb_fieldtype_t type) {
+  UPB_UNUSED(type);
+  return sizeof(upb_array);
+}
+
+void upb_array_init(upb_array *arr, upb_fieldtype_t type, upb_alloc *alloc) {
+  arr->type = type;
+  arr->data = NULL;
+  arr->len = 0;
+  arr->size = 0;
+  arr->element_size = upb_msgval_sizeof(type);
+  arr->alloc = alloc;
+}
+
+void upb_array_uninit(upb_array *arr) {
+  upb_free(arr->alloc, arr->data);
+}
+
+upb_array *upb_array_new(upb_fieldtype_t type, upb_alloc *a) {
+  upb_array *ret = upb_malloc(a, upb_array_sizeof(type));
+
+  if (ret) {
+    upb_array_init(ret, type, a);
+  }
+
+  return ret;
+}
+
+void upb_array_free(upb_array *arr) {
+  upb_array_uninit(arr);
+  upb_free(arr->alloc, arr);
+}
+
+size_t upb_array_size(const upb_array *arr) {
+  return arr->len;
+}
+
+upb_fieldtype_t upb_array_type(const upb_array *arr) {
+  return arr->type;
+}
+
+upb_msgval upb_array_get(const upb_array *arr, size_t i) {
+  UPB_ASSERT(i < arr->len);
+  return upb_msgval_read(arr->data, i * arr->element_size, arr->element_size);
+}
+
+bool upb_array_set(upb_array *arr, size_t i, upb_msgval val) {
+  UPB_ASSERT(i <= arr->len);
+
+  if (i == arr->len) {
+    /* Extending the array. */
+
+    if (i == arr->size) {
+      /* Need to reallocate. */
+      size_t new_size = UPB_MAX(arr->size * 2, 8);
+      size_t new_bytes = new_size * arr->element_size;
+      size_t old_bytes = arr->size * arr->element_size;
+      upb_msgval *new_data =
+          upb_realloc(arr->alloc, arr->data, old_bytes, new_bytes);
+
+      if (!new_data) {
+        return false;
+      }
+
+      arr->data = new_data;
+      arr->size = new_size;
+    }
+
+    arr->len = i + 1;
+  }
+
+  upb_msgval_write(arr->data, i * arr->element_size, val, arr->element_size);
+  return true;
+}
+
+
+/** upb_map *******************************************************************/
+
+struct upb_map {
+  upb_fieldtype_t key_type;
+  upb_fieldtype_t val_type;
+  /* We may want to optimize this to use inttable where possible, for greater
+   * efficiency and lower memory footprint. */
+  upb_strtable strtab;
+  upb_alloc *alloc;
+};
+
+static void upb_map_tokey(upb_fieldtype_t type, upb_msgval *key,
+                          const char **out_key, size_t *out_len) {
+  switch (type) {
+    case UPB_TYPE_STRING:
+      /* Point to string data of the input key. */
+      *out_key = key->str.ptr;
+      *out_len = key->str.len;
+      return;
+    case UPB_TYPE_BOOL:
+    case UPB_TYPE_INT32:
+    case UPB_TYPE_UINT32:
+    case UPB_TYPE_INT64:
+    case UPB_TYPE_UINT64:
+      /* Point to the key itself.  XXX: big-endian. */
+      *out_key = (const char*)key;
+      *out_len = upb_msgval_sizeof(type);
+      return;
+    case UPB_TYPE_BYTES:
+    case UPB_TYPE_DOUBLE:
+    case UPB_TYPE_ENUM:
+    case UPB_TYPE_FLOAT:
+    case UPB_TYPE_MESSAGE:
+      break;  /* Cannot be a map key. */
+  }
+  UPB_UNREACHABLE();
+}
+
+static upb_msgval upb_map_fromkey(upb_fieldtype_t type, const char *key,
+                                  size_t len) {
+  switch (type) {
+    case UPB_TYPE_STRING:
+      return upb_msgval_str(key, len);
+    case UPB_TYPE_BOOL:
+    case UPB_TYPE_INT32:
+    case UPB_TYPE_UINT32:
+    case UPB_TYPE_INT64:
+    case UPB_TYPE_UINT64:
+      return upb_msgval_read(key, 0, upb_msgval_sizeof(type));
+    case UPB_TYPE_BYTES:
+    case UPB_TYPE_DOUBLE:
+    case UPB_TYPE_ENUM:
+    case UPB_TYPE_FLOAT:
+    case UPB_TYPE_MESSAGE:
+      break;  /* Cannot be a map key. */
+  }
+  UPB_UNREACHABLE();
+}
+
+size_t upb_map_sizeof(upb_fieldtype_t ktype, upb_fieldtype_t vtype) {
+  /* Size does not currently depend on key/value type. */
+  UPB_UNUSED(ktype);
+  UPB_UNUSED(vtype);
+  return sizeof(upb_map);
+}
+
+bool upb_map_init(upb_map *map, upb_fieldtype_t ktype, upb_fieldtype_t vtype,
+                  upb_alloc *a) {
+  upb_ctype_t vtabtype = upb_fieldtotabtype(vtype);
+  UPB_ASSERT(upb_fieldtype_mapkeyok(ktype));
+  map->key_type = ktype;
+  map->val_type = vtype;
+  map->alloc = a;
+
+  if (!upb_strtable_init2(&map->strtab, vtabtype, a)) {
+    return false;
+  }
+
+  return true;
+}
+
+void upb_map_uninit(upb_map *map) {
+  upb_strtable_uninit2(&map->strtab, map->alloc);
+}
+
+upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype,
+                     upb_alloc *a) {
+  upb_map *map = upb_malloc(a, upb_map_sizeof(ktype, vtype));
+
+  if (!map) {
+    return NULL;
+  }
+
+  if (!upb_map_init(map, ktype, vtype, a)) {
+    return NULL;
+  }
+
+  return map;
+}
+
+void upb_map_free(upb_map *map) {
+  upb_map_uninit(map);
+  upb_free(map->alloc, map);
+}
+
+size_t upb_map_size(const upb_map *map) {
+  return upb_strtable_count(&map->strtab);
+}
+
+upb_fieldtype_t upb_map_keytype(const upb_map *map) {
+  return map->key_type;
+}
+
+upb_fieldtype_t upb_map_valuetype(const upb_map *map) {
+  return map->val_type;
+}
+
+bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) {
+  upb_value tabval;
+  const char *key_str;
+  size_t key_len;
+  bool ret;
+
+  upb_map_tokey(map->key_type, &key, &key_str, &key_len);
+  ret = upb_strtable_lookup2(&map->strtab, key_str, key_len, &tabval);
+  if (ret) {
+    memcpy(val, &tabval, sizeof(tabval));
+  }
+
+  return ret;
+}
+
+bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val,
+                 upb_msgval *removed) {
+  const char *key_str;
+  size_t key_len;
+  upb_value tabval = upb_toval(val);
+  upb_value removedtabval;
+  upb_alloc *a = map->alloc;
+
+  upb_map_tokey(map->key_type, &key, &key_str, &key_len);
+
+  /* TODO(haberman): add overwrite operation to minimize number of lookups. */
+  if (upb_strtable_lookup2(&map->strtab, key_str, key_len, NULL)) {
+    upb_strtable_remove3(&map->strtab, key_str, key_len, &removedtabval, a);
+    memcpy(&removed, &removedtabval, sizeof(removed));
+  }
+
+  return upb_strtable_insert3(&map->strtab, key_str, key_len, tabval, a);
+}
+
+bool upb_map_del(upb_map *map, upb_msgval key) {
+  const char *key_str;
+  size_t key_len;
+  upb_alloc *a = map->alloc;
+
+  upb_map_tokey(map->key_type, &key, &key_str, &key_len);
+  return upb_strtable_remove3(&map->strtab, key_str, key_len, NULL, a);
+}
+
+
+/** upb_mapiter ***************************************************************/
+
+struct upb_mapiter {
+  upb_strtable_iter iter;
+  upb_fieldtype_t key_type;
+};
+
+size_t upb_mapiter_sizeof() {
+  return sizeof(upb_mapiter);
+}
+
+void upb_mapiter_begin(upb_mapiter *i, const upb_map *map) {
+  upb_strtable_begin(&i->iter, &map->strtab);
+  i->key_type = map->key_type;
+}
+
+upb_mapiter *upb_mapiter_new(const upb_map *t, upb_alloc *a) {
+  upb_mapiter *ret = upb_malloc(a, upb_mapiter_sizeof());
+
+  if (!ret) {
+    return NULL;
+  }
+
+  upb_mapiter_begin(ret, t);
+  return ret;
+}
+
+void upb_mapiter_free(upb_mapiter *i, upb_alloc *a) {
+  upb_free(a, i);
+}
+
+void upb_mapiter_next(upb_mapiter *i) {
+  upb_strtable_next(&i->iter);
+}
+
+bool upb_mapiter_done(const upb_mapiter *i) {
+  return upb_strtable_done(&i->iter);
+}
+
+upb_msgval upb_mapiter_key(const upb_mapiter *i) {
+  return upb_map_fromkey(i->key_type, upb_strtable_iter_key(&i->iter),
+                         upb_strtable_iter_keylength(&i->iter));
+}
+
+upb_msgval upb_mapiter_value(const upb_mapiter *i) {
+  return upb_msgval_fromval(upb_strtable_iter_value(&i->iter));
+}
+
+void upb_mapiter_setdone(upb_mapiter *i) {
+  upb_strtable_iter_setdone(&i->iter);
+}
+
+bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2) {
+  return upb_strtable_iter_isequal(&i1->iter, &i2->iter);
+}
+
+
+/** Handlers for upb_msg ******************************************************/
+
+typedef struct {
+  size_t offset;
+  int32_t hasbit;
+} upb_msg_handlerdata;
+
+/* Fallback implementation if the handler is not specialized by the producer. */
+#define MSG_WRITER(type, ctype)                                               \
+  bool upb_msg_set ## type (void *c, const void *hd, ctype val) {             \
+    uint8_t *m = c;                                                           \
+    const upb_msg_handlerdata *d = hd;                                        \
+    if (d->hasbit > 0)                                                        \
+      *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8);                   \
+    *(ctype*)&m[d->offset] = val;                                             \
+    return true;                                                              \
+  }                                                                           \
+
+MSG_WRITER(double, double)
+MSG_WRITER(float,  float)
+MSG_WRITER(int32,  int32_t)
+MSG_WRITER(int64,  int64_t)
+MSG_WRITER(uint32, uint32_t)
+MSG_WRITER(uint64, uint64_t)
+MSG_WRITER(bool,   bool)
+
+bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f,
+                              size_t offset, int32_t hasbit) {
+  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+  bool ok;
+
+  upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d));
+  if (!d) return false;
+  d->offset = offset;
+  d->hasbit = hasbit;
+
+  upb_handlerattr_sethandlerdata(&attr, d);
+  upb_handlerattr_setalwaysok(&attr, true);
+  upb_handlers_addcleanup(h, d, upb_gfree);
+
+#define TYPE(u, l) \
+  case UPB_TYPE_##u: \
+    ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break;
+
+  ok = false;
+
+  switch (upb_fielddef_type(f)) {
+    TYPE(INT64,  int64);
+    TYPE(INT32,  int32);
+    TYPE(ENUM,   int32);
+    TYPE(UINT64, uint64);
+    TYPE(UINT32, uint32);
+    TYPE(DOUBLE, double);
+    TYPE(FLOAT,  float);
+    TYPE(BOOL,   bool);
+    default: UPB_ASSERT(false); break;
+  }
+#undef TYPE
+
+  upb_handlerattr_uninit(&attr);
+  return ok;
+}
+
+bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
+                                  upb_selector_t s,
+                                  upb_fieldtype_t *type,
+                                  size_t *offset,
+                                  int32_t *hasbit) {
+  const upb_msg_handlerdata *d;
+  upb_func *f = upb_handlers_gethandler(h, s);
+
+  if ((upb_int64_handlerfunc*)f == upb_msg_setint64) {
+    *type = UPB_TYPE_INT64;
+  } else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) {
+    *type = UPB_TYPE_INT32;
+  } else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) {
+    *type = UPB_TYPE_UINT64;
+  } else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) {
+    *type = UPB_TYPE_UINT32;
+  } else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) {
+    *type = UPB_TYPE_DOUBLE;
+  } else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) {
+    *type = UPB_TYPE_FLOAT;
+  } else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) {
+    *type = UPB_TYPE_BOOL;
+  } else {
+    return false;
+  }
+
+  d = upb_handlers_gethandlerdata(h, s);
+  *offset = d->offset;
+  *hasbit = d->hasbit;
+  return true;
+}
 /*
 ** upb::RefCounted Implementation
 **
@@ -3703,579 +5218,91 @@
 }
 
 
-/* Fallback implementation if the shim is not specialized by the JIT. */
-#define SHIM_WRITER(type, ctype)                                              \
-  bool upb_shim_set ## type (void *c, const void *hd, ctype val) {            \
-    uint8_t *m = c;                                                           \
-    const upb_shim_data *d = hd;                                              \
-    if (d->hasbit > 0)                                                        \
-      *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8);                   \
-    *(ctype*)&m[d->offset] = val;                                             \
-    return true;                                                              \
-  }                                                                           \
-
-SHIM_WRITER(double, double)
-SHIM_WRITER(float,  float)
-SHIM_WRITER(int32,  int32_t)
-SHIM_WRITER(int64,  int64_t)
-SHIM_WRITER(uint32, uint32_t)
-SHIM_WRITER(uint64, uint64_t)
-SHIM_WRITER(bool,   bool)
-#undef SHIM_WRITER
-
-bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
-                  int32_t hasbit) {
-  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
-  bool ok;
-
-  upb_shim_data *d = upb_gmalloc(sizeof(*d));
-  if (!d) return false;
-  d->offset = offset;
-  d->hasbit = hasbit;
-
-  upb_handlerattr_sethandlerdata(&attr, d);
-  upb_handlerattr_setalwaysok(&attr, true);
-  upb_handlers_addcleanup(h, d, upb_gfree);
-
-#define TYPE(u, l) \
-  case UPB_TYPE_##u: \
-    ok = upb_handlers_set##l(h, f, upb_shim_set##l, &attr); break;
-
-  ok = false;
-
-  switch (upb_fielddef_type(f)) {
-    TYPE(INT64,  int64);
-    TYPE(INT32,  int32);
-    TYPE(ENUM,   int32);
-    TYPE(UINT64, uint64);
-    TYPE(UINT32, uint32);
-    TYPE(DOUBLE, double);
-    TYPE(FLOAT,  float);
-    TYPE(BOOL,   bool);
-    default: UPB_ASSERT(false); break;
-  }
-#undef TYPE
-
-  upb_handlerattr_uninit(&attr);
-  return ok;
-}
-
-const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s,
-                                      upb_fieldtype_t *type) {
-  upb_func *f = upb_handlers_gethandler(h, s);
-
-  if ((upb_int64_handlerfunc*)f == upb_shim_setint64) {
-    *type = UPB_TYPE_INT64;
-  } else if ((upb_int32_handlerfunc*)f == upb_shim_setint32) {
-    *type = UPB_TYPE_INT32;
-  } else if ((upb_uint64_handlerfunc*)f == upb_shim_setuint64) {
-    *type = UPB_TYPE_UINT64;
-  } else if ((upb_uint32_handlerfunc*)f == upb_shim_setuint32) {
-    *type = UPB_TYPE_UINT32;
-  } else if ((upb_double_handlerfunc*)f == upb_shim_setdouble) {
-    *type = UPB_TYPE_DOUBLE;
-  } else if ((upb_float_handlerfunc*)f == upb_shim_setfloat) {
-    *type = UPB_TYPE_FLOAT;
-  } else if ((upb_bool_handlerfunc*)f == upb_shim_setbool) {
-    *type = UPB_TYPE_BOOL;
-  } else {
-    return NULL;
-  }
-
-  return (const upb_shim_data*)upb_handlers_gethandlerdata(h, s);
-}
-
-
-#include <string.h>
-
-static void upb_symtab_free(upb_refcounted *r) {
-  upb_symtab *s = (upb_symtab*)r;
-  upb_strtable_iter i;
-  upb_strtable_begin(&i, &s->symtab);
-  for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
-    const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
-    upb_def_unref(def, s);
-  }
-  upb_strtable_uninit(&s->symtab);
-  upb_gfree(s);
-}
-
-upb_symtab *upb_symtab_new(const void *owner) {
-  static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free};
-
-  upb_symtab *s = upb_gmalloc(sizeof(*s));
-  if (!s) {
-    return NULL;
-  }
-
-  upb_refcounted_init(upb_symtab_upcast_mutable(s), &vtbl, owner);
-  upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
-  return s;
-}
-
-void upb_symtab_freeze(upb_symtab *s) {
-  upb_refcounted *r;
-  bool ok;
-
-  UPB_ASSERT(!upb_symtab_isfrozen(s));
-  r = upb_symtab_upcast_mutable(s);
-  /* The symtab does not take ref2's (see refcounted.h) on the defs, because
-   * defs cannot refer back to the table and therefore cannot create cycles.  So
-   * 0 will suffice for maxdepth here. */
-  ok = upb_refcounted_freeze(&r, 1, NULL, 0);
-  UPB_ASSERT(ok);
-}
-
-const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
-  upb_value v;
-  upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
-      upb_value_getptr(v) : NULL;
-  return ret;
-}
-
-const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
-  upb_value v;
-  upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
-      upb_value_getptr(v) : NULL;
-  return def ? upb_dyncast_msgdef(def) : NULL;
-}
-
-const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
-  upb_value v;
-  upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
-      upb_value_getptr(v) : NULL;
-  return def ? upb_dyncast_enumdef(def) : NULL;
-}
-
-/* Given a symbol and the base symbol inside which it is defined, find the
- * symbol's definition in t. */
-static upb_def *upb_resolvename(const upb_strtable *t,
-                                const char *base, const char *sym) {
-  if(strlen(sym) == 0) return NULL;
-  if(sym[0] == '.') {
-    /* Symbols starting with '.' are absolute, so we do a single lookup.
-     * Slice to omit the leading '.' */
-    upb_value v;
-    return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
-  } else {
-    /* Remove components from base until we find an entry or run out.
-     * TODO: This branch is totally broken, but currently not used. */
-    (void)base;
-    UPB_ASSERT(false);
-    return NULL;
-  }
-}
-
-const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
-                                  const char *sym) {
-  upb_def *ret = upb_resolvename(&s->symtab, base, sym);
-  return ret;
-}
-
-/* Starts a depth-first traversal at "def", recursing into any subdefs
- * (ie. submessage types).  Adds duplicates of existing defs to addtab
- * wherever necessary, so that the resulting symtab will be consistent once
- * addtab is added.
- *
- * More specifically, if any def D is found in the DFS that:
- *
- *   1. can reach a def that is being replaced by something in addtab, AND
- *
- *   2. is not itself being replaced already (ie. this name doesn't already
- *      exist in addtab)
- *
- * ...then a duplicate (new copy) of D will be added to addtab.
- *
- * Returns true if this happened for any def reachable from "def."
- *
- * It is slightly tricky to do this correctly in the presence of cycles.  If we
- * detect that our DFS has hit a cycle, we might not yet know if any SCCs on
- * our stack can reach a def in addtab or not.  Once we figure this out, that
- * answer needs to apply to *all* defs in these SCCs, even if we visited them
- * already.  So a straight up one-pass cycle-detecting DFS won't work.
- *
- * To work around this problem, we traverse each SCC (which we already
- * computed, since these defs are frozen) as a single node.  We first compute
- * whether the SCC as a whole can reach any def in addtab, then we dup (or not)
- * the entire SCC.  This requires breaking the encapsulation of upb_refcounted,
- * since that is where we get the data about what SCC we are in. */
-static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
-                            const void *new_owner, upb_inttable *seen,
-                            upb_status *s) {
-  upb_value v;
-  bool need_dup;
-  const upb_def *base;
-  const void* memoize_key;
-
-  /* Memoize results of this function for efficiency (since we're traversing a
-   * DAG this is not needed to limit the depth of the search).
-   *
-   * We memoize by SCC instead of by individual def. */
-  memoize_key = def->base.group;
-
-  if (upb_inttable_lookupptr(seen, memoize_key, &v))
-    return upb_value_getbool(v);
-
-  /* Visit submessages for all messages in the SCC. */
-  need_dup = false;
-  base = def;
-  do {
-    upb_value v;
-    const upb_msgdef *m;
-
-    UPB_ASSERT(upb_def_isfrozen(def));
-    if (def->type == UPB_DEF_FIELD) continue;
-    if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) {
-      need_dup = true;
-    }
-
-    /* For messages, continue the recursion by visiting all subdefs, but only
-     * ones in different SCCs. */
-    m = upb_dyncast_msgdef(def);
-    if (m) {
-      upb_msg_field_iter i;
-      for(upb_msg_field_begin(&i, m);
-          !upb_msg_field_done(&i);
-          upb_msg_field_next(&i)) {
-        upb_fielddef *f = upb_msg_iter_field(&i);
-        const upb_def *subdef;
-
-        if (!upb_fielddef_hassubdef(f)) continue;
-        subdef = upb_fielddef_subdef(f);
-
-        /* Skip subdefs in this SCC. */
-        if (def->base.group == subdef->base.group) continue;
-
-        /* |= to avoid short-circuit; we need its side-effects. */
-        need_dup |= upb_resolve_dfs(subdef, addtab, new_owner, seen, s);
-        if (!upb_ok(s)) return false;
-      }
-    }
-  } while ((def = (upb_def*)def->base.next) != base);
-
-  if (need_dup) {
-    /* Dup all defs in this SCC that don't already have entries in addtab. */
-    def = base;
-    do {
-      const char *name;
-
-      if (def->type == UPB_DEF_FIELD) continue;
-      name = upb_def_fullname(def);
-      if (!upb_strtable_lookup(addtab, name, NULL)) {
-        upb_def *newdef = upb_def_dup(def, new_owner);
-        if (!newdef) goto oom;
-        newdef->came_from_user = false;
-        if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef)))
-          goto oom;
-      }
-    } while ((def = (upb_def*)def->base.next) != base);
-  }
-
-  upb_inttable_insertptr(seen, memoize_key, upb_value_bool(need_dup));
-  return need_dup;
-
-oom:
-  upb_status_seterrmsg(s, "out of memory");
-  return false;
-}
-
-/* TODO(haberman): we need a lot more testing of error conditions.
- * The came_from_user stuff in particular is not tested. */
-static bool symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
-                       void *ref_donor, upb_refcounted *freeze_also,
-                       upb_status *status) {
-  size_t i;
-  size_t add_n;
-  size_t freeze_n;
-  upb_strtable_iter iter;
-  upb_refcounted **add_objs = NULL;
-  upb_def **add_defs = NULL;
-  size_t add_objs_size;
-  upb_strtable addtab;
-  upb_inttable seen;
-
-  if (n == 0 && !freeze_also) {
-    return true;
-  }
-
-  UPB_ASSERT(!upb_symtab_isfrozen(s));
-  if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
-    upb_status_seterrmsg(status, "out of memory");
-    return false;
-  }
-
-  /* Add new defs to our "add" set. */
-  for (i = 0; i < n; i++) {
-    upb_def *def = defs[i];
-    const char *fullname;
-    upb_fielddef *f;
-
-    if (upb_def_isfrozen(def)) {
-      upb_status_seterrmsg(status, "added defs must be mutable");
-      goto err;
-    }
-    UPB_ASSERT(!upb_def_isfrozen(def));
-    fullname = upb_def_fullname(def);
-    if (!fullname) {
-      upb_status_seterrmsg(
-          status, "Anonymous defs cannot be added to a symtab");
-      goto err;
-    }
-
-    f = upb_dyncast_fielddef_mutable(def);
-
-    if (f) {
-      if (!upb_fielddef_containingtypename(f)) {
-        upb_status_seterrmsg(status,
-                             "Standalone fielddefs must have a containing type "
-                             "(extendee) name set");
-        goto err;
-      }
-    } else {
-      if (upb_strtable_lookup(&addtab, fullname, NULL)) {
-        upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
-        goto err;
-      }
-      /* We need this to back out properly, because if there is a failure we
-       * need to donate the ref back to the caller. */
-      def->came_from_user = true;
-      upb_def_donateref(def, ref_donor, s);
-      if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
-        goto oom_err;
-    }
-  }
-
-  /* Add standalone fielddefs (ie. extensions) to the appropriate messages.
-   * If the appropriate message only exists in the existing symtab, duplicate
-   * it so we have a mutable copy we can add the fields to. */
-  for (i = 0; i < n; i++) {
-    upb_def *def = defs[i];
-    upb_fielddef *f = upb_dyncast_fielddef_mutable(def);
-    const char *msgname;
-    upb_value v;
-    upb_msgdef *m;
-
-    if (!f) continue;
-    msgname = upb_fielddef_containingtypename(f);
-    /* We validated this earlier in this function. */
-    UPB_ASSERT(msgname);
-
-    /* If the extendee name is absolutely qualified, move past the initial ".".
-     * TODO(haberman): it is not obvious what it would mean if this was not
-     * absolutely qualified. */
-    if (msgname[0] == '.') {
-      msgname++;
-    }
-
-    if (upb_strtable_lookup(&addtab, msgname, &v)) {
-      /* Extendee is in the set of defs the user asked us to add. */
-      m = upb_value_getptr(v);
-    } else {
-      /* Need to find and dup the extendee from the existing symtab. */
-      const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname);
-      if (!frozen_m) {
-        upb_status_seterrf(status,
-                           "Tried to extend message %s that does not exist "
-                           "in this SymbolTable.",
-                           msgname);
-        goto err;
-      }
-      m = upb_msgdef_dup(frozen_m, s);
-      if (!m) goto oom_err;
-      if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) {
-        upb_msgdef_unref(m, s);
-        goto oom_err;
-      }
-    }
-
-    if (!upb_msgdef_addfield(m, f, ref_donor, status)) {
-      goto err;
-    }
-  }
-
-  /* Add dups of any existing def that can reach a def with the same name as
-   * anything in our "add" set. */
-  if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err;
-  upb_strtable_begin(&iter, &s->symtab);
-  for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
-    upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
-    upb_resolve_dfs(def, &addtab, s, &seen, status);
-    if (!upb_ok(status)) goto err;
-  }
-  upb_inttable_uninit(&seen);
-
-  /* Now using the table, resolve symbolic references for subdefs. */
-  upb_strtable_begin(&iter, &addtab);
-  for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
-    const char *base;
-    upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
-    upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
-    upb_msg_field_iter j;
-
-    if (!m) continue;
-    /* Type names are resolved relative to the message in which they appear. */
-    base = upb_msgdef_fullname(m);
-
-    for(upb_msg_field_begin(&j, m);
-        !upb_msg_field_done(&j);
-        upb_msg_field_next(&j)) {
-      upb_fielddef *f = upb_msg_iter_field(&j);
-      const char *name = upb_fielddef_subdefname(f);
-      if (name && !upb_fielddef_subdef(f)) {
-        /* Try the lookup in the current set of to-be-added defs first. If not
-         * there, try existing defs. */
-        upb_def *subdef = upb_resolvename(&addtab, base, name);
-        if (subdef == NULL) {
-          subdef = upb_resolvename(&s->symtab, base, name);
-        }
-        if (subdef == NULL) {
-          upb_status_seterrf(
-              status, "couldn't resolve name '%s' in message '%s'", name, base);
-          goto err;
-        } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
-          goto err;
-        }
-      }
-    }
-  }
-
-  /* We need an array of the defs in addtab, for passing to
-   * upb_refcounted_freeze(). */
-  add_objs_size = upb_strtable_count(&addtab);
-  if (freeze_also) {
-    add_objs_size++;
-  }
-
-  add_defs = upb_gmalloc(sizeof(void*) * add_objs_size);
-  if (add_defs == NULL) goto oom_err;
-  upb_strtable_begin(&iter, &addtab);
-  for (add_n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
-    add_defs[add_n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
-  }
-
-  /* Validate defs. */
-  if (!_upb_def_validate(add_defs, add_n, status)) {
-    goto err;
-  }
-
-  /* Cheat a little and give the array a new type.
-   * This is probably undefined behavior, but this code will be deleted soon. */
-  add_objs = (upb_refcounted**)add_defs;
-
-  freeze_n = add_n;
-  if (freeze_also) {
-    add_objs[freeze_n++] = freeze_also;
-  }
-
-  if (!upb_refcounted_freeze(add_objs, freeze_n, status,
-                             UPB_MAX_MESSAGE_DEPTH * 2)) {
-    goto err;
-  }
-
-  /* This must be delayed until all errors have been detected, since error
-   * recovery code uses this table to cleanup defs. */
-  upb_strtable_uninit(&addtab);
-
-  /* TODO(haberman) we don't properly handle errors after this point (like
-   * OOM in upb_strtable_insert() below). */
-  for (i = 0; i < add_n; i++) {
-    upb_def *def = (upb_def*)add_objs[i];
-    const char *name = upb_def_fullname(def);
-    upb_value v;
-    bool success;
-
-    if (upb_strtable_remove(&s->symtab, name, &v)) {
-      const upb_def *def = upb_value_getptr(v);
-      upb_def_unref(def, s);
-    }
-    success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
-    UPB_ASSERT(success == true);
-  }
-  upb_gfree(add_defs);
-  return true;
-
-oom_err:
-  upb_status_seterrmsg(status, "out of memory");
-err: {
-    /* For defs the user passed in, we need to donate the refs back.  For defs
-     * we dup'd, we need to just unref them. */
-    upb_strtable_begin(&iter, &addtab);
-    for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
-      upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
-      bool came_from_user = def->came_from_user;
-      def->came_from_user = false;
-      if (came_from_user) {
-        upb_def_donateref(def, s, ref_donor);
-      } else {
-        upb_def_unref(def, s);
-      }
-    }
-  }
-  upb_strtable_uninit(&addtab);
-  upb_gfree(add_defs);
-  UPB_ASSERT(!upb_ok(status));
-  return false;
-}
-
-bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
-                    void *ref_donor, upb_status *status) {
-  return symtab_add(s, defs, n, ref_donor, NULL, status);
-}
-
-bool upb_symtab_addfile(upb_symtab *s, upb_filedef *file, upb_status *status) {
-  size_t n;
-  size_t i;
-  upb_def **defs;
+bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink *sink) {
+  void *subc;
   bool ret;
-
-  n = upb_filedef_defcount(file);
-  defs = upb_gmalloc(sizeof(*defs) * n);
-
-  if (defs == NULL) {
-    upb_status_seterrmsg(status, "Out of memory");
-    return false;
+  upb_bufhandle handle;
+  upb_bufhandle_init(&handle);
+  upb_bufhandle_setbuf(&handle, buf, 0);
+  ret = upb_bytessink_start(sink, len, &subc);
+  if (ret && len != 0) {
+    ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len);
   }
-
-  for (i = 0; i < n; i++) {
-    defs[i] = upb_filedef_mutabledef(file, i);
+  if (ret) {
+    ret = upb_bytessink_end(sink);
   }
-
-  ret = symtab_add(s, defs, n, NULL, upb_filedef_upcast_mutable(file), status);
-
-  upb_gfree(defs);
+  upb_bufhandle_uninit(&handle);
   return ret;
 }
 
-/* Iteration. */
+struct upb_bufsink {
+  upb_byteshandler handler;
+  upb_bytessink sink;
+  upb_env *env;
+  char *ptr;
+  size_t len, size;
+};
 
-static void advance_to_matching(upb_symtab_iter *iter) {
-  if (iter->type == UPB_DEF_ANY)
-    return;
+static void *upb_bufsink_start(void *_sink, const void *hd, size_t size_hint) {
+  upb_bufsink *sink = _sink;
+  UPB_UNUSED(hd);
+  UPB_UNUSED(size_hint);
+  sink->len = 0;
+  return sink;
+}
 
-  while (!upb_strtable_done(&iter->iter) &&
-         iter->type != upb_symtab_iter_def(iter)->type) {
-    upb_strtable_next(&iter->iter);
+static size_t upb_bufsink_string(void *_sink, const void *hd, const char *ptr,
+                                size_t len, const upb_bufhandle *handle) {
+  upb_bufsink *sink = _sink;
+  size_t new_size = sink->size;
+
+  UPB_ASSERT(new_size > 0);
+  UPB_UNUSED(hd);
+  UPB_UNUSED(handle);
+
+  while (sink->len + len > new_size) {
+    new_size *= 2;
   }
+
+  if (new_size != sink->size) {
+    sink->ptr = upb_env_realloc(sink->env, sink->ptr, sink->size, new_size);
+    sink->size = new_size;
+  }
+
+  memcpy(sink->ptr + sink->len, ptr, len);
+  sink->len += len;
+
+  return len;
 }
 
-void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
-                      upb_deftype_t type) {
-  upb_strtable_begin(&iter->iter, &s->symtab);
-  iter->type = type;
-  advance_to_matching(iter);
+upb_bufsink *upb_bufsink_new(upb_env *env) {
+  upb_bufsink *sink = upb_env_malloc(env, sizeof(upb_bufsink));
+  upb_byteshandler_init(&sink->handler);
+  upb_byteshandler_setstartstr(&sink->handler, upb_bufsink_start, NULL);
+  upb_byteshandler_setstring(&sink->handler, upb_bufsink_string, NULL);
+
+  upb_bytessink_reset(&sink->sink, &sink->handler, sink);
+
+  sink->env = env;
+  sink->size = 32;
+  sink->ptr = upb_env_malloc(env, sink->size);
+  sink->len = 0;
+
+  return sink;
 }
 
-void upb_symtab_next(upb_symtab_iter *iter) {
-  upb_strtable_next(&iter->iter);
-  advance_to_matching(iter);
+void upb_bufsink_free(upb_bufsink *sink) {
+  upb_env_free(sink->env, sink->ptr);
+  upb_env_free(sink->env, sink);
 }
 
-bool upb_symtab_done(const upb_symtab_iter *iter) {
-  return upb_strtable_done(&iter->iter);
+upb_bytessink *upb_bufsink_sink(upb_bufsink *sink) {
+  return &sink->sink;
 }
 
-const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
-  return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
+const char *upb_bufsink_getdata(const upb_bufsink *sink, size_t *len) {
+  *len = sink->len;
+  return sink->ptr;
 }
 /*
 ** upb_table Implementation
@@ -5285,7 +6312,7 @@
 /* Be conservative and choose 16 in case anyone is using SSE. */
 static const size_t maxalign = 16;
 
-static size_t align_up(size_t size) {
+static size_t align_up_max(size_t size) {
   return ((size + maxalign - 1) / maxalign) * maxalign;
 }
 
@@ -5309,7 +6336,7 @@
 
   block->next = a->block_head;
   block->size = size;
-  block->used = align_up(sizeof(mem_block));
+  block->used = align_up_max(sizeof(mem_block));
   block->owned = owned;
 
   a->block_head = block;
@@ -5342,7 +6369,7 @@
     return NULL;  /* We are an arena, don't need individual frees. */
   }
 
-  size = align_up(size);
+  size = align_up_max(size);
 
   /* TODO(haberman): special-case if this is a realloc of the last alloc? */
 
@@ -5412,6 +6439,10 @@
 
     block = next;
   }
+
+  /* Protect against multiple-uninit. */
+  a->cleanup_head = NULL;
+  a->block_head = NULL;
 }
 
 bool upb_arena_addcleanup(upb_arena *a, upb_cleanup_func *func, void *ud) {
@@ -6481,7 +7512,7 @@
   upb_fielddef *f;
 };
 
-static char *upb_strndup(const char *buf, size_t n) {
+static char *upb_gstrndup(const char *buf, size_t n) {
   char *ret = upb_gmalloc(n + 1);
   if (!ret) return NULL;
   memcpy(ret, buf, n);
@@ -6631,7 +7662,7 @@
   UPB_UNUSED(hd);
   UPB_UNUSED(handle);
 
-  name = upb_strndup(buf, n);
+  name = upb_gstrndup(buf, n);
   /* XXX: see comment at the top of the file. */
   ok = upb_filedef_setname(r->file, name, NULL);
   upb_gfree(name);
@@ -6647,7 +7678,7 @@
   UPB_UNUSED(hd);
   UPB_UNUSED(handle);
 
-  package = upb_strndup(buf, n);
+  package = upb_gstrndup(buf, n);
   /* XXX: see comment at the top of the file. */
   upb_descreader_setscopename(r, package);
   ok = upb_filedef_setpackage(r->file, package, NULL);
@@ -6719,7 +7750,7 @@
   UPB_UNUSED(handle);
   /* XXX: see comment at the top of the file. */
   upb_gfree(r->name);
-  r->name = upb_strndup(buf, n);
+  r->name = upb_gstrndup(buf, n);
   r->saw_name = true;
   return n;
 }
@@ -6770,7 +7801,7 @@
 static size_t enum_onname(void *closure, const void *hd, const char *buf,
                           size_t n, const upb_bufhandle *handle) {
   upb_descreader *r = closure;
-  char *fullname = upb_strndup(buf, n);
+  char *fullname = upb_gstrndup(buf, n);
   UPB_UNUSED(hd);
   UPB_UNUSED(handle);
   /* XXX: see comment at the top of the file. */
@@ -6938,7 +7969,7 @@
 static size_t field_onname(void *closure, const void *hd, const char *buf,
                            size_t n, const upb_bufhandle *handle) {
   upb_descreader *r = closure;
-  char *name = upb_strndup(buf, n);
+  char *name = upb_gstrndup(buf, n);
   UPB_UNUSED(hd);
   UPB_UNUSED(handle);
 
@@ -6951,7 +7982,7 @@
 static size_t field_ontypename(void *closure, const void *hd, const char *buf,
                                size_t n, const upb_bufhandle *handle) {
   upb_descreader *r = closure;
-  char *name = upb_strndup(buf, n);
+  char *name = upb_gstrndup(buf, n);
   UPB_UNUSED(hd);
   UPB_UNUSED(handle);
 
@@ -6964,7 +7995,7 @@
 static size_t field_onextendee(void *closure, const void *hd, const char *buf,
                                size_t n, const upb_bufhandle *handle) {
   upb_descreader *r = closure;
-  char *name = upb_strndup(buf, n);
+  char *name = upb_gstrndup(buf, n);
   UPB_UNUSED(hd);
   UPB_UNUSED(handle);
 
@@ -6984,7 +8015,7 @@
    * type yet, so we save it as a string until the end of the field.
    * XXX: see comment at the top of the file. */
   upb_gfree(r->default_string);
-  r->default_string = upb_strndup(buf, n);
+  r->default_string = upb_gstrndup(buf, n);
   return n;
 }
 
@@ -7005,7 +8036,7 @@
   upb_descreader *r = closure;
   upb_descreader_frame *f = &r->stack[r->stack_len-1];
   upb_oneofdef *o = upb_descreader_getoneof(r, f->oneof_index++);
-  char *name_null_terminated = upb_strndup(buf, n);
+  char *name_null_terminated = upb_gstrndup(buf, n);
   bool ok = upb_oneofdef_setname(o, name_null_terminated, NULL);
   UPB_UNUSED(hd);
   UPB_UNUSED(handle);
@@ -7042,7 +8073,7 @@
   upb_descreader *r = closure;
   upb_msgdef *m = upb_descreader_top(r);
   /* XXX: see comment at the top of the file. */
-  char *name = upb_strndup(buf, n);
+  char *name = upb_gstrndup(buf, n);
   UPB_UNUSED(hd);
   UPB_UNUSED(handle);
 
diff --git a/php/ext/google/protobuf/upb.h b/php/ext/google/protobuf/upb.h
index c83b0e0..5f78045 100644
--- a/php/ext/google/protobuf/upb.h
+++ b/php/ext/google/protobuf/upb.h
@@ -123,20 +123,21 @@
 #define UPB_NORETURN
 #endif
 
+#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L
+/* C99/C++11 versions. */
+#include <stdio.h>
+#define _upb_snprintf snprintf
+#define _upb_vsnprintf vsnprintf
+#define _upb_va_copy(a, b) va_copy(a, b)
+#elif defined __GNUC__
 /* A few hacky workarounds for functions not in C89.
  * For internal use only!
  * TODO(haberman): fix these by including our own implementations, or finding
  * another workaround.
  */
-#ifdef __GNUC__
 #define _upb_snprintf __builtin_snprintf
 #define _upb_vsnprintf __builtin_vsnprintf
 #define _upb_va_copy(a, b) __va_copy(a, b)
-#elif __STDC_VERSION__ >= 199901L
-/* C99 versions. */
-#define _upb_snprintf snprintf
-#define _upb_vsnprintf vsnprintf
-#define _upb_va_copy(a, b) va_copy(a, b)
 #else
 #error Need implementations of [v]snprintf and va_copy
 #endif
@@ -280,6 +281,12 @@
  * exist in debug mode.  This turns into regular assert. */
 #define UPB_ASSERT_DEBUGVAR(expr) assert(expr)
 
+#ifdef __GNUC__
+#define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0)
+#else
+#define UPB_UNREACHABLE() do { assert(0); } while(0)
+#endif
+
 /* Generic function type. */
 typedef void upb_func();
 
@@ -513,17 +520,18 @@
 };
 
 UPB_INLINE void *upb_malloc(upb_alloc *alloc, size_t size) {
-  UPB_ASSERT(size > 0);
+  UPB_ASSERT(alloc);
   return alloc->func(alloc, NULL, 0, size);
 }
 
 UPB_INLINE void *upb_realloc(upb_alloc *alloc, void *ptr, size_t oldsize,
                              size_t size) {
-  UPB_ASSERT(size > 0);
+  UPB_ASSERT(alloc);
   return alloc->func(alloc, ptr, oldsize, size);
 }
 
 UPB_INLINE void upb_free(upb_alloc *alloc, void *ptr) {
+  assert(alloc);
   alloc->func(alloc, ptr, 0, 0);
 }
 
@@ -572,11 +580,11 @@
 void upb_arena_init(upb_arena *a);
 void upb_arena_init2(upb_arena *a, void *mem, size_t n, upb_alloc *alloc);
 void upb_arena_uninit(upb_arena *a);
-upb_alloc *upb_arena_alloc(upb_arena *a);
 bool upb_arena_addcleanup(upb_arena *a, upb_cleanup_func *func, void *ud);
 size_t upb_arena_bytesallocated(const upb_arena *a);
 void upb_arena_setnextblocksize(upb_arena *a, size_t size);
 void upb_arena_setmaxblocksize(upb_arena *a, size_t size);
+UPB_INLINE upb_alloc *upb_arena_alloc(upb_arena *a) { return (upb_alloc*)a; }
 
 UPB_END_EXTERN_C
 
@@ -807,7 +815,9 @@
   UPB_CTYPE_CSTR     = 6,
   UPB_CTYPE_PTR      = 7,
   UPB_CTYPE_CONSTPTR = 8,
-  UPB_CTYPE_FPTR     = 9
+  UPB_CTYPE_FPTR     = 9,
+  UPB_CTYPE_FLOAT    = 10,
+  UPB_CTYPE_DOUBLE   = 11
 } upb_ctype_t;
 
 typedef struct {
@@ -881,6 +891,29 @@
 FUNCS(fptr,     fptr,         upb_func*,    uintptr_t,  UPB_CTYPE_FPTR)
 
 #undef FUNCS
+
+UPB_INLINE void upb_value_setfloat(upb_value *val, float cval) {
+  memcpy(&val->val, &cval, sizeof(cval));
+  SET_TYPE(val->ctype, UPB_CTYPE_FLOAT);
+}
+
+UPB_INLINE void upb_value_setdouble(upb_value *val, double cval) {
+  memcpy(&val->val, &cval, sizeof(cval));
+  SET_TYPE(val->ctype, UPB_CTYPE_DOUBLE);
+}
+
+UPB_INLINE upb_value upb_value_float(float cval) {
+  upb_value ret;
+  upb_value_setfloat(&ret, cval);
+  return ret;
+}
+
+UPB_INLINE upb_value upb_value_double(double cval) {
+  upb_value ret;
+  upb_value_setdouble(&ret, cval);
+  return ret;
+}
+
 #undef SET_TYPE
 
 
@@ -1123,6 +1156,13 @@
   return t->t.count;
 }
 
+void upb_inttable_packedsize(const upb_inttable *t, size_t *size);
+void upb_strtable_packedsize(const upb_strtable *t, size_t *size);
+upb_inttable *upb_inttable_pack(const upb_inttable *t, void *p, size_t *ofs,
+                                size_t size);
+upb_strtable *upb_strtable_pack(const upb_strtable *t, void *p, size_t *ofs,
+                                size_t size);
+
 /* Inserts the given key into the hashtable with the given value.  The key must
  * not already exist in the hash table.  For string tables, the key must be
  * NULL-terminated, and the table will make an internal copy of the key.
@@ -1669,6 +1709,7 @@
 class FileDef;
 class MessageDef;
 class OneofDef;
+class SymbolTable;
 }
 #endif
 
@@ -1677,6 +1718,8 @@
                          upb_refcounted)
 UPB_DECLARE_DERIVED_TYPE(upb::FileDef, upb::RefCounted, upb_filedef,
                          upb_refcounted)
+UPB_DECLARE_TYPE(upb::SymbolTable, upb_symtab)
+
 
 /* The maximum message depth that the type graph can have.  This is a resource
  * limit for the C stack since we sometimes need to recursively traverse the
@@ -1710,8 +1753,6 @@
  public:
   typedef upb_deftype_t Type;
 
-  Def* Dup(const void *owner) const;
-
   /* upb::RefCounted methods like Ref()/Unref(). */
   UPB_REFCOUNTED_CPPMETHODS
 
@@ -1757,9 +1798,6 @@
 
 UPB_BEGIN_EXTERN_C
 
-/* Native C API. */
-upb_def *upb_def_dup(const upb_def *def, const void *owner);
-
 /* Include upb_refcounted methods like upb_def_ref()/upb_def_unref(). */
 UPB_REFCOUNTED_CMETHODS(upb_def, upb_def_upcast)
 
@@ -1856,15 +1894,19 @@
  * types defined in descriptor.proto, which gives INT32 and SINT32 separate
  * types (we distinguish the two with the "integer encoding" enum below). */
 typedef enum {
-  UPB_TYPE_FLOAT    = 1,
-  UPB_TYPE_DOUBLE   = 2,
-  UPB_TYPE_BOOL     = 3,
-  UPB_TYPE_STRING   = 4,
-  UPB_TYPE_BYTES    = 5,
-  UPB_TYPE_MESSAGE  = 6,
-  UPB_TYPE_ENUM     = 7,  /* Enum values are int32. */
-  UPB_TYPE_INT32    = 8,
-  UPB_TYPE_UINT32   = 9,
+  /* Types stored in 1 byte. */
+  UPB_TYPE_BOOL     = 1,
+  /* Types stored in 4 bytes. */
+  UPB_TYPE_FLOAT    = 2,
+  UPB_TYPE_INT32    = 3,
+  UPB_TYPE_UINT32   = 4,
+  UPB_TYPE_ENUM     = 5,  /* Enum values are int32. */
+  /* Types stored as pointers (probably 4 or 8 bytes). */
+  UPB_TYPE_STRING   = 6,
+  UPB_TYPE_BYTES    = 7,
+  UPB_TYPE_MESSAGE  = 8,
+  /* Types stored as 8 bytes. */
+  UPB_TYPE_DOUBLE   = 9,
   UPB_TYPE_INT64    = 10,
   UPB_TYPE_UINT64   = 11
 } upb_fieldtype_t;
@@ -1945,13 +1987,6 @@
   /* Returns NULL if memory allocation failed. */
   static reffed_ptr<FieldDef> New();
 
-  /* Duplicates the given field, returning NULL if memory allocation failed.
-   * When a fielddef is duplicated, the subdef (if any) is made symbolic if it
-   * wasn't already.  If the subdef is set but has no name (which is possible
-   * since msgdefs are not required to have a name) the new fielddef's subdef
-   * will be unset. */
-  FieldDef* Dup(const void* owner) const;
-
   /* upb::RefCounted methods like Ref()/Unref(). */
   UPB_REFCOUNTED_CPPMETHODS
 
@@ -2038,16 +2073,10 @@
   bool IsPrimitive() const;
   bool IsMap() const;
 
-  /* Whether this field must be able to explicitly represent presence:
+  /* Returns whether this field explicitly represents presence.
    *
-   * * This is always false for repeated fields (an empty repeated field is
-   *   equivalent to a repeated field with zero entries).
-   *
-   * * This is always true for submessages.
-   *
-   * * For other fields, it depends on the message (see
-   *   MessageDef::SetPrimitivesHavePresence())
-   */
+   * For proto2 messages: Returns true for any scalar (non-repeated) field.
+   * For proto3 messages: Returns true for scalar submessage or oneof fields. */
   bool HasPresence() const;
 
   /* How integers are encoded.  Only meaningful for integer types.
@@ -2206,7 +2235,6 @@
 
 /* Native C API. */
 upb_fielddef *upb_fielddef_new(const void *owner);
-upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner);
 
 /* Include upb_refcounted methods like upb_fielddef_ref(). */
 UPB_REFCOUNTED_CMETHODS(upb_fielddef, upb_fielddef_upcast2)
@@ -2416,16 +2444,6 @@
     return FindOneofByName(str.c_str(), str.size());
   }
 
-  /* Returns a new msgdef that is a copy of the given msgdef (and a copy of all
-   * the fields) but with any references to submessages broken and replaced
-   * with just the name of the submessage.  Returns NULL if memory allocation
-   * failed.
-   *
-   * TODO(haberman): which is more useful, keeping fields resolved or
-   * unresolving them?  If there's no obvious answer, Should this functionality
-   * just be moved into symtab.c? */
-  MessageDef* Dup(const void* owner) const;
-
   /* Is this message a map entry? */
   void setmapentry(bool map_entry);
   bool mapentry() const;
@@ -2559,7 +2577,6 @@
 
 bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status);
 
-upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner);
 const char *upb_msgdef_fullname(const upb_msgdef *m);
 const char *upb_msgdef_name(const upb_msgdef *m);
 int upb_msgdef_numoneofs(const upb_msgdef *m);
@@ -2709,10 +2726,6 @@
    * first one that was added. */
   const char* FindValueByNumber(int32_t num) const;
 
-  /* Returns a new EnumDef with all the same values.  The new EnumDef will be
-   * owned by the given owner. */
-  EnumDef* Dup(const void* owner) const;
-
   /* Iteration over name/value pairs.  The order is undefined.
    * Adding an enum val invalidates any iterators.
    *
@@ -2740,7 +2753,6 @@
 
 /* Native C API. */
 upb_enumdef *upb_enumdef_new(const void *owner);
-upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner);
 
 /* Include upb_refcounted methods like upb_enumdef_ref(). */
 UPB_REFCOUNTED_CMETHODS(upb_enumdef, upb_enumdef_upcast2)
@@ -2785,6 +2797,7 @@
 
 UPB_END_EXTERN_C
 
+
 /* upb::OneofDef **************************************************************/
 
 typedef upb_inttable_iter upb_oneof_iter;
@@ -2849,10 +2862,6 @@
   /* Looks up by tag number. */
   const FieldDef* FindFieldByNumber(uint32_t num) const;
 
-  /* Returns a new OneofDef with all the same fields. The OneofDef will be owned
-   * by the given owner. */
-  OneofDef* Dup(const void* owner) const;
-
   /* Iteration over fields.  The order is undefined. */
   class iterator : public std::iterator<std::forward_iterator_tag, FieldDef*> {
    public:
@@ -2898,16 +2907,16 @@
 
 /* Native C API. */
 upb_oneofdef *upb_oneofdef_new(const void *owner);
-upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner);
 
 /* Include upb_refcounted methods like upb_oneofdef_ref(). */
 UPB_REFCOUNTED_CMETHODS(upb_oneofdef, upb_oneofdef_upcast)
 
 const char *upb_oneofdef_name(const upb_oneofdef *o);
-bool upb_oneofdef_setname(upb_oneofdef *o, const char *name, upb_status *s);
-
 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o);
 int upb_oneofdef_numfields(const upb_oneofdef *o);
+uint32_t upb_oneofdef_index(const upb_oneofdef *o);
+
+bool upb_oneofdef_setname(upb_oneofdef *o, const char *name, upb_status *s);
 bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
                            const void *ref_donor,
                            upb_status *s);
@@ -3051,6 +3060,153 @@
 
 UPB_END_EXTERN_C
 
+typedef struct {
+ UPB_PRIVATE_FOR_CPP
+  upb_strtable_iter iter;
+  upb_deftype_t type;
+} upb_symtab_iter;
+
+#ifdef __cplusplus
+
+/* Non-const methods in upb::SymbolTable are NOT thread-safe. */
+class upb::SymbolTable {
+ public:
+  /* Returns a new symbol table with a single ref owned by "owner."
+   * Returns NULL if memory allocation failed. */
+  static SymbolTable* New();
+  static void Free(upb::SymbolTable* table);
+
+  /* For all lookup functions, the returned pointer is not owned by the
+   * caller; it may be invalidated by any non-const call or unref of the
+   * SymbolTable!  To protect against this, take a ref if desired. */
+
+  /* Freezes the symbol table: prevents further modification of it.
+   * After the Freeze() operation is successful, the SymbolTable must only be
+   * accessed via a const pointer.
+   *
+   * Unlike with upb::MessageDef/upb::EnumDef/etc, freezing a SymbolTable is not
+   * a necessary step in using a SymbolTable.  If you have no need for it to be
+   * immutable, there is no need to freeze it ever.  However sometimes it is
+   * useful, and SymbolTables that are statically compiled into the binary are
+   * always frozen by nature. */
+  void Freeze();
+
+  /* Resolves the given symbol using the rules described in descriptor.proto,
+   * namely:
+   *
+   *    If the name starts with a '.', it is fully-qualified.  Otherwise,
+   *    C++-like scoping rules are used to find the type (i.e. first the nested
+   *    types within this message are searched, then within the parent, on up
+   *    to the root namespace).
+   *
+   * If not found, returns NULL. */
+  const Def* Resolve(const char* base, const char* sym) const;
+
+  /* Finds an entry in the symbol table with this exact name.  If not found,
+   * returns NULL. */
+  const Def* Lookup(const char *sym) const;
+  const MessageDef* LookupMessage(const char *sym) const;
+  const EnumDef* LookupEnum(const char *sym) const;
+
+  /* TODO: introduce a C++ iterator, but make it nice and templated so that if
+   * you ask for an iterator of MessageDef the iterated elements are strongly
+   * typed as MessageDef*. */
+
+  /* Adds the given mutable defs to the symtab, resolving all symbols (including
+   * enum default values) and finalizing the defs.  Only one def per name may be
+   * in the list, and the defs may not duplicate any name already in the symtab.
+   * All defs must have a name -- anonymous defs are not allowed.  Anonymous
+   * defs can still be frozen by calling upb_def_freeze() directly.
+   *
+   * The entire operation either succeeds or fails.  If the operation fails,
+   * the symtab is unchanged, false is returned, and status indicates the
+   * error.  The caller passes a ref on all defs to the symtab (even if the
+   * operation fails).
+   *
+   * TODO(haberman): currently failure will leave the symtab unchanged, but may
+   * leave the defs themselves partially resolved.  Does this matter?  If so we
+   * could do a prepass that ensures that all symbols are resolvable and bail
+   * if not, so we don't mutate anything until we know the operation will
+   * succeed. */
+  bool Add(Def*const* defs, size_t n, void* ref_donor, Status* status);
+
+  bool Add(const std::vector<Def*>& defs, void *owner, Status* status) {
+    return Add((Def*const*)&defs[0], defs.size(), owner, status);
+  }
+
+  /* Resolves all subdefs for messages in this file and attempts to freeze the
+   * file.  If this succeeds, adds all the symbols to this SymbolTable
+   * (replacing any existing ones with the same names). */
+  bool AddFile(FileDef* file, Status* s);
+
+ private:
+  UPB_DISALLOW_POD_OPS(SymbolTable, upb::SymbolTable)
+};
+
+#endif  /* __cplusplus */
+
+UPB_BEGIN_EXTERN_C
+
+/* Native C API. */
+
+upb_symtab *upb_symtab_new();
+void upb_symtab_free(upb_symtab* s);
+const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
+                                  const char *sym);
+const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym);
+const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym);
+const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym);
+bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
+                    void *ref_donor, upb_status *status);
+bool upb_symtab_addfile(upb_symtab *s, upb_filedef *file, upb_status* status);
+
+/* upb_symtab_iter i;
+ * for(upb_symtab_begin(&i, s, type); !upb_symtab_done(&i);
+ *     upb_symtab_next(&i)) {
+ *   const upb_def *def = upb_symtab_iter_def(&i);
+ *    // ...
+ * }
+ *
+ * For C we don't have separate iterators for const and non-const.
+ * It is the caller's responsibility to cast the upb_fielddef* to
+ * const if the upb_msgdef* is const. */
+void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
+                      upb_deftype_t type);
+void upb_symtab_next(upb_symtab_iter *iter);
+bool upb_symtab_done(const upb_symtab_iter *iter);
+const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+/* C++ inline wrappers. */
+namespace upb {
+inline SymbolTable* SymbolTable::New() {
+  return upb_symtab_new();
+}
+inline void SymbolTable::Free(SymbolTable* s) {
+  upb_symtab_free(s);
+}
+inline const Def *SymbolTable::Resolve(const char *base,
+                                       const char *sym) const {
+  return upb_symtab_resolve(this, base, sym);
+}
+inline const Def* SymbolTable::Lookup(const char *sym) const {
+  return upb_symtab_lookup(this, sym);
+}
+inline const MessageDef *SymbolTable::LookupMessage(const char *sym) const {
+  return upb_symtab_lookupmsg(this, sym);
+}
+inline bool SymbolTable::Add(
+    Def*const* defs, size_t n, void* ref_donor, Status* status) {
+  return upb_symtab_add(this, (upb_def*const*)defs, n, ref_donor, status);
+}
+inline bool SymbolTable::AddFile(FileDef* file, Status* s) {
+  return upb_symtab_addfile(this, file, s);
+}
+}  /* namespace upb */
+#endif
+
 #ifdef __cplusplus
 
 UPB_INLINE const char* upb_safecstr(const std::string& str) {
@@ -3061,9 +3217,6 @@
 /* Inline C++ wrappers. */
 namespace upb {
 
-inline Def* Def::Dup(const void* owner) const {
-  return upb_def_dup(this, owner);
-}
 inline Def::Type Def::def_type() const { return upb_def_type(this); }
 inline const char* Def::full_name() const { return upb_def_fullname(this); }
 inline const char* Def::name() const { return upb_def_name(this); }
@@ -3113,9 +3266,6 @@
   upb_fielddef *f = upb_fielddef_new(&f);
   return reffed_ptr<FieldDef>(f, &f);
 }
-inline FieldDef* FieldDef::Dup(const void* owner) const {
-  return upb_fielddef_dup(this, owner);
-}
 inline const char* FieldDef::full_name() const {
   return upb_fielddef_fullname(this);
 }
@@ -3355,9 +3505,6 @@
                                                    size_t len) const {
   return upb_msgdef_ntoo(this, name, len);
 }
-inline MessageDef* MessageDef::Dup(const void *owner) const {
-  return upb_msgdef_dup(this, owner);
-}
 inline void MessageDef::setmapentry(bool map_entry) {
   upb_msgdef_setmapentry(this, map_entry);
 }
@@ -3527,9 +3674,6 @@
 inline const char* EnumDef::FindValueByNumber(int32_t num) const {
   return upb_enumdef_iton(this, num);
 }
-inline EnumDef* EnumDef::Dup(const void* owner) const {
-  return upb_enumdef_dup(this, owner);
-}
 
 inline EnumDef::Iterator::Iterator(const EnumDef* e) {
   upb_enum_begin(&iter_, e);
@@ -3836,6 +3980,7 @@
 struct upb_oneofdef {
   upb_refcounted base;
 
+  uint32_t index;  /* Index within oneofs. */
   const char *name;
   upb_strtable ntof;
   upb_inttable itof;
@@ -3845,7 +3990,7 @@
 extern const struct upb_refcounted_vtbl upb_oneofdef_vtbl;
 
 #define UPB_ONEOFDEF_INIT(name, ntof, itof, refs, ref2s) \
-  { UPB_REFCOUNT_INIT(&upb_oneofdef_vtbl, refs, ref2s), name, ntof, itof }
+  { UPB_REFCOUNT_INIT(&upb_oneofdef_vtbl, refs, ref2s), 0, name, ntof, itof }
 
 
 /* upb_symtab *****************************************************************/
@@ -5832,12 +5977,14 @@
 
 #ifdef __cplusplus
 namespace upb {
+class BufferSink;
 class BufferSource;
 class BytesSink;
 class Sink;
 }
 #endif
 
+UPB_DECLARE_TYPE(upb::BufferSink, upb_bufsink)
 UPB_DECLARE_TYPE(upb::BufferSource, upb_bufsrc)
 UPB_DECLARE_TYPE(upb::BytesSink, upb_bytessink)
 UPB_DECLARE_TYPE(upb::Sink, upb_sink)
@@ -6024,6 +6171,13 @@
 
 UPB_BEGIN_EXTERN_C
 
+/* A class for accumulating output string data in a flat buffer. */
+
+upb_bufsink *upb_bufsink_new(upb_env *env);
+void upb_bufsink_free(upb_bufsink *sink);
+upb_bytessink *upb_bufsink_sink(upb_bufsink *sink);
+const char *upb_bufsink_getdata(const upb_bufsink *sink, size_t *len);
+
 /* Inline definitions. */
 
 UPB_INLINE void upb_bytessink_reset(upb_bytessink *s, const upb_byteshandler *h,
@@ -6073,23 +6227,7 @@
                  &s->handler->table[UPB_ENDSTR_SELECTOR].attr));
 }
 
-UPB_INLINE bool upb_bufsrc_putbuf(const char *buf, size_t len,
-                                  upb_bytessink *sink) {
-  void *subc;
-  bool ret;
-  upb_bufhandle handle;
-  upb_bufhandle_init(&handle);
-  upb_bufhandle_setbuf(&handle, buf, 0);
-  ret = upb_bytessink_start(sink, len, &subc);
-  if (ret && len != 0) {
-    ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len);
-  }
-  if (ret) {
-    ret = upb_bytessink_end(sink);
-  }
-  upb_bufhandle_uninit(&handle);
-  return ret;
-}
+bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink *sink);
 
 #define PUTVAL(type, ctype)                                                    \
   UPB_INLINE bool upb_sink_put##type(upb_sink *s, upb_selector_t sel,          \
@@ -6337,267 +6475,407 @@
 
 #endif
 /*
-** For handlers that do very tiny, very simple operations, the function call
-** overhead of calling a handler can be significant.  This file allows the
-** user to define handlers that do something very simple like store the value
-** to memory and/or set a hasbit.  JIT compilers can then special-case these
-** handlers and emit specialized code for them instead of actually calling the
-** handler.
+** upb::Message is a representation for protobuf messages.
 **
-** The functionality is very simple/limited right now but may expand to be able
-** to call another function.
-*/
+** However it differs from other common representations like
+** google::protobuf::Message in one key way: it does not prescribe any
+** ownership between messages and submessages, and it relies on the
+** client to delete each message/submessage/array/map at the appropriate
+** time.
+**
+** A client can access a upb::Message without knowing anything about
+** ownership semantics, but to create or mutate a message a user needs
+** to implement the memory management themselves.
+**
+** Currently all messages, arrays, and maps store a upb_alloc* internally.
+** Mutating operations use this when they require dynamically-allocated
+** memory.  We could potentially eliminate this size overhead later by
+** letting the user flip a bit on the factory that prevents this from
+** being stored.  The user would then need to use separate functions where
+** the upb_alloc* is passed explicitly.  However for handlers to populate
+** such structures, they would need a place to store this upb_alloc* during
+** parsing; upb_handlers don't currently have a good way to accommodate this.
+**
+** TODO: UTF-8 checking?
+**/
 
-#ifndef UPB_SHIM_H
-#define UPB_SHIM_H
+#ifndef UPB_MSG_H_
+#define UPB_MSG_H_
 
 
-typedef struct {
-  size_t offset;
-  int32_t hasbit;
-} upb_shim_data;
-
 #ifdef __cplusplus
 
 namespace upb {
-
-struct Shim {
-  typedef upb_shim_data Data;
-
-  /* Sets a handler for the given field that writes the value to the given
-   * offset and, if hasbit >= 0, sets a bit at the given bit offset.  Returns
-   * true if the handler was set successfully. */
-  static bool Set(Handlers *h, const FieldDef *f, size_t ofs, int32_t hasbit);
-
-  /* If this handler is a shim, returns the corresponding upb::Shim::Data and
-   * stores the type in "type".  Otherwise returns NULL. */
-  static const Data* GetData(const Handlers* h, Handlers::Selector s,
-                             FieldDef::Type* type);
-};
-
-}  /* namespace upb */
+class Array;
+class Map;
+class MapIterator;
+class MessageFactory;
+class MessageLayout;
+class Visitor;
+class VisitorPlan;
+}
 
 #endif
 
+UPB_DECLARE_TYPE(upb::MessageFactory, upb_msgfactory)
+UPB_DECLARE_TYPE(upb::MessageLayout, upb_msglayout)
+UPB_DECLARE_TYPE(upb::Array, upb_array)
+UPB_DECLARE_TYPE(upb::Map, upb_map)
+UPB_DECLARE_TYPE(upb::MapIterator, upb_mapiter)
+UPB_DECLARE_TYPE(upb::Visitor, upb_visitor)
+UPB_DECLARE_TYPE(upb::VisitorPlan, upb_visitorplan)
+
+/* TODO(haberman): C++ accessors */
+
 UPB_BEGIN_EXTERN_C
 
-/* C API. */
-bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
-                  int32_t hasbit);
-const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s,
-                                      upb_fieldtype_t *type);
-
-UPB_END_EXTERN_C
-
-#ifdef __cplusplus
-/* C++ Wrappers. */
-namespace upb {
-inline bool Shim::Set(Handlers* h, const FieldDef* f, size_t ofs,
-                      int32_t hasbit) {
-  return upb_shim_set(h, f, ofs, hasbit);
-}
-inline const Shim::Data* Shim::GetData(const Handlers* h, Handlers::Selector s,
-                                       FieldDef::Type* type) {
-  return upb_shim_getdata(h, s, type);
-}
-}  /* namespace upb */
-#endif
-
-#endif  /* UPB_SHIM_H */
-/*
-** upb::SymbolTable (upb_symtab)
-**
-** A symtab (symbol table) stores a name->def map of upb_defs.  Clients could
-** always create such tables themselves, but upb_symtab has logic for resolving
-** symbolic references, and in particular, for keeping a whole set of consistent
-** defs when replacing some subset of those defs.  This logic is nontrivial.
-**
-** This is a mixed C/C++ interface that offers a full API to both languages.
-** See the top-level README for more information.
-*/
-
-#ifndef UPB_SYMTAB_H_
-#define UPB_SYMTAB_H_
+typedef void upb_msg;
 
 
-#ifdef __cplusplus
-#include <vector>
-namespace upb { class SymbolTable; }
-#endif
+/** upb_msglayout *************************************************************/
 
-UPB_DECLARE_DERIVED_TYPE(upb::SymbolTable, upb::RefCounted,
-                         upb_symtab, upb_refcounted)
+/* upb_msglayout represents the memory layout of a given upb_msgdef.  You get
+ * instances of this from a upb_msgfactory, and the factory always owns the
+ * msglayout. */
 
-typedef struct {
- UPB_PRIVATE_FOR_CPP
-  upb_strtable_iter iter;
-  upb_deftype_t type;
-} upb_symtab_iter;
+/* Gets the factory for this layout */
+upb_msgfactory *upb_msglayout_factory(const upb_msglayout *l);
 
-#ifdef __cplusplus
+/* Get the msglayout for a submessage.  This requires that this field is a
+ * submessage, ie. upb_fielddef_issubmsg(upb_msglayout_msgdef(l)) == true.
+ *
+ * Since map entry messages don't have layouts, if upb_fielddef_ismap(f) == true
+ * then this function will return the layout for the map's value.  It requires
+ * that the value type of the map field is a submessage. */
+const upb_msglayout *upb_msglayout_sublayout(const upb_msglayout *l,
+                                             const upb_fielddef *f);
 
-/* Non-const methods in upb::SymbolTable are NOT thread-safe. */
-class upb::SymbolTable {
- public:
-  /* Returns a new symbol table with a single ref owned by "owner."
-   * Returns NULL if memory allocation failed. */
-  static reffed_ptr<SymbolTable> New();
+/* Returns the msgdef for this msglayout. */
+const upb_msgdef *upb_msglayout_msgdef(const upb_msglayout *l);
 
-  /* Include RefCounted base methods. */
-  UPB_REFCOUNTED_CPPMETHODS
 
-  /* For all lookup functions, the returned pointer is not owned by the
-   * caller; it may be invalidated by any non-const call or unref of the
-   * SymbolTable!  To protect against this, take a ref if desired. */
+/** upb_visitor ***************************************************************/
 
-  /* Freezes the symbol table: prevents further modification of it.
-   * After the Freeze() operation is successful, the SymbolTable must only be
-   * accessed via a const pointer.
-   *
-   * Unlike with upb::MessageDef/upb::EnumDef/etc, freezing a SymbolTable is not
-   * a necessary step in using a SymbolTable.  If you have no need for it to be
-   * immutable, there is no need to freeze it ever.  However sometimes it is
-   * useful, and SymbolTables that are statically compiled into the binary are
-   * always frozen by nature. */
-  void Freeze();
+/* upb_visitor will visit all the fields of a message and its submessages.  It
+ * uses a upb_visitorplan which you can obtain from a upb_msgfactory. */
 
-  /* Resolves the given symbol using the rules described in descriptor.proto,
-   * namely:
-   *
-   *    If the name starts with a '.', it is fully-qualified.  Otherwise,
-   *    C++-like scoping rules are used to find the type (i.e. first the nested
-   *    types within this message are searched, then within the parent, on up
-   *    to the root namespace).
-   *
-   * If not found, returns NULL. */
-  const Def* Resolve(const char* base, const char* sym) const;
+upb_visitor *upb_visitor_create(upb_env *e, const upb_visitorplan *vp,
+                                upb_sink *output);
+bool upb_visitor_visitmsg(upb_visitor *v, const upb_msg *msg);
 
-  /* Finds an entry in the symbol table with this exact name.  If not found,
-   * returns NULL. */
-  const Def* Lookup(const char *sym) const;
-  const MessageDef* LookupMessage(const char *sym) const;
-  const EnumDef* LookupEnum(const char *sym) const;
 
-  /* TODO: introduce a C++ iterator, but make it nice and templated so that if
-   * you ask for an iterator of MessageDef the iterated elements are strongly
-   * typed as MessageDef*. */
+/** upb_msgfactory ************************************************************/
 
-  /* Adds the given mutable defs to the symtab, resolving all symbols
-   * (including enum default values) and finalizing the defs.  Only one def per
-   * name may be in the list, but defs can replace existing defs in the symtab.
-   * All defs must have a name -- anonymous defs are not allowed.  Anonymous
-   * defs can still be frozen by calling upb_def_freeze() directly.
-   *
-   * Any existing defs that can reach defs that are being replaced will
-   * themselves be replaced also, so that the resulting set of defs is fully
-   * consistent.
-   *
-   * This logic implemented in this method is a convenience; ultimately it
-   * calls some combination of upb_fielddef_setsubdef(), upb_def_dup(), and
-   * upb_freeze(), any of which the client could call themself.  However, since
-   * the logic for doing so is nontrivial, we provide it here.
-   *
-   * The entire operation either succeeds or fails.  If the operation fails,
-   * the symtab is unchanged, false is returned, and status indicates the
-   * error.  The caller passes a ref on all defs to the symtab (even if the
-   * operation fails).
-   *
-   * TODO(haberman): currently failure will leave the symtab unchanged, but may
-   * leave the defs themselves partially resolved.  Does this matter?  If so we
-   * could do a prepass that ensures that all symbols are resolvable and bail
-   * if not, so we don't mutate anything until we know the operation will
-   * succeed.
-   *
-   * TODO(haberman): since the defs must be mutable, refining a frozen def
-   * requires making mutable copies of the entire tree.  This is wasteful if
-   * only a few messages are changing.  We may want to add a way of adding a
-   * tree of frozen defs to the symtab (perhaps an alternate constructor where
-   * you pass the root of the tree?) */
-  bool Add(Def*const* defs, size_t n, void* ref_donor, Status* status);
+/* A upb_msgfactory contains a cache of upb_msglayout, upb_handlers, and
+ * upb_visitorplan objects.  These are the objects necessary to represent,
+ * populate, and and visit upb_msg objects.
+ *
+ * These caches are all populated by upb_msgdef, and lazily created on demand.
+ */
 
-  bool Add(const std::vector<Def*>& defs, void *owner, Status* status) {
-    return Add((Def*const*)&defs[0], defs.size(), owner, status);
+/* Creates and destroys a msgfactory, respectively.  The messages for this
+ * msgfactory must come from |symtab| (which should outlive the msgfactory). */
+upb_msgfactory *upb_msgfactory_new(const upb_symtab *symtab);
+void upb_msgfactory_free(upb_msgfactory *f);
+
+const upb_symtab *upb_msgfactory_symtab(const upb_msgfactory *f);
+
+/* The functions to get cached objects, lazily creating them on demand.  These
+ * all require:
+ *
+ * - m is in upb_msgfactory_symtab(f)
+ * - upb_msgdef_mapentry(m) == false (since map messages can't have layouts).
+ *
+ * The returned objects will live for as long as the msgfactory does.
+ *
+ * TODO(haberman): consider making this thread-safe and take a const
+ * upb_msgfactory. */
+const upb_msglayout *upb_msgfactory_getlayout(upb_msgfactory *f,
+                                              const upb_msgdef *m);
+const upb_handlers *upb_msgfactory_getmergehandlers(upb_msgfactory *f,
+                                                    const upb_msgdef *m);
+const upb_visitorplan *upb_msgfactory_getvisitorplan(upb_msgfactory *f,
+                                                     const upb_handlers *h);
+
+
+/** upb_msgval ****************************************************************/
+
+/* A union representing all possible protobuf values.  Used for generic get/set
+ * operations. */
+
+typedef union {
+  bool b;
+  float flt;
+  double dbl;
+  int32_t i32;
+  int64_t i64;
+  uint32_t u32;
+  uint64_t u64;
+  const upb_map* map;
+  const upb_msg* msg;
+  const upb_array* arr;
+  const void* ptr;
+  struct {
+    const char *ptr;
+    size_t len;
+  } str;
+} upb_msgval;
+
+#define ACCESSORS(name, membername, ctype) \
+  UPB_INLINE ctype upb_msgval_get ## name(upb_msgval v) { \
+    return v.membername; \
+  } \
+  UPB_INLINE void upb_msgval_set ## name(upb_msgval *v, ctype cval) { \
+    v->membername = cval; \
+  } \
+  UPB_INLINE upb_msgval upb_msgval_ ## name(ctype v) { \
+    upb_msgval ret; \
+    ret.membername = v; \
+    return ret; \
   }
 
-  /* Resolves all subdefs for messages in this file and attempts to freeze the
-   * file.  If this succeeds, adds all the symbols to this SymbolTable
-   * (replacing any existing ones with the same names). */
-  bool AddFile(FileDef* file, Status* s);
+ACCESSORS(bool,   b,   bool)
+ACCESSORS(float,  flt, float)
+ACCESSORS(double, dbl, double)
+ACCESSORS(int32,  i32, int32_t)
+ACCESSORS(int64,  i64, int64_t)
+ACCESSORS(uint32, u32, uint32_t)
+ACCESSORS(uint64, u64, uint64_t)
+ACCESSORS(map,    map, const upb_map*)
+ACCESSORS(msg,    msg, const upb_msg*)
+ACCESSORS(ptr,    ptr, const void*)
+ACCESSORS(arr,    arr, const upb_array*)
 
- private:
-  UPB_DISALLOW_POD_OPS(SymbolTable, upb::SymbolTable)
-};
+#undef ACCESSORS
 
-#endif  /* __cplusplus */
+UPB_INLINE upb_msgval upb_msgval_str(const char *ptr, size_t len) {
+  upb_msgval ret;
+  ret.str.ptr = ptr;
+  ret.str.len = len;
+  return ret;
+}
 
-UPB_BEGIN_EXTERN_C
+UPB_INLINE const char* upb_msgval_getstr(upb_msgval val) {
+  return val.str.ptr;
+}
 
-/* Native C API. */
+UPB_INLINE size_t upb_msgval_getstrlen(upb_msgval val) {
+  return val.str.len;
+}
 
-/* Include refcounted methods like upb_symtab_ref(). */
-UPB_REFCOUNTED_CMETHODS(upb_symtab, upb_symtab_upcast)
 
-upb_symtab *upb_symtab_new(const void *owner);
-void upb_symtab_freeze(upb_symtab *s);
-const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
-                                  const char *sym);
-const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym);
-const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym);
-const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym);
-bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
-                    void *ref_donor, upb_status *status);
-bool upb_symtab_addfile(upb_symtab *s, upb_filedef *file, upb_status* status);
+/** upb_msg *******************************************************************/
 
-/* upb_symtab_iter i;
- * for(upb_symtab_begin(&i, s, type); !upb_symtab_done(&i);
- *     upb_symtab_next(&i)) {
- *   const upb_def *def = upb_symtab_iter_def(&i);
- *    // ...
- * }
+/* A upb_msg represents a protobuf message.  It always corresponds to a specific
+ * upb_msglayout, which describes how it is laid out in memory.
  *
- * For C we don't have separate iterators for const and non-const.
- * It is the caller's responsibility to cast the upb_fielddef* to
- * const if the upb_msgdef* is const. */
-void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
-                      upb_deftype_t type);
-void upb_symtab_next(upb_symtab_iter *iter);
-bool upb_symtab_done(const upb_symtab_iter *iter);
-const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter);
+ * The message will have a fixed size, as returned by upb_msg_sizeof(), which
+ * will be used to store fixed-length fields.  The upb_msg may also allocate
+ * dynamic memory internally to store data such as:
+ *
+ * - extensions
+ * - unknown fields
+ */
+
+/* Returns the size of a message given this layout. */
+size_t upb_msg_sizeof(const upb_msglayout *l);
+
+/* upb_msg_init() / upb_msg_uninit() allow the user to use a pre-allocated
+ * block of memory as a message.  The block's size should be upb_msg_sizeof().
+ * upb_msg_uninit() must be called to release internally-allocated memory
+ * unless the allocator is an arena that does not require freeing.
+ *
+ * Please note that upb_msg_uninit() does *not* free any submessages, maps,
+ * or arrays referred to by this message's fields.  You must free them manually
+ * yourself. */
+void upb_msg_init(upb_msg *msg, const upb_msglayout *l, upb_alloc *a);
+void upb_msg_uninit(upb_msg *msg, const upb_msglayout *l);
+
+/* Like upb_msg_init() / upb_msg_uninit(), except the message's memory is
+ * allocated / freed from the given upb_alloc. */
+upb_msg *upb_msg_new(const upb_msglayout *l, upb_alloc *a);
+void upb_msg_free(upb_msg *msg, const upb_msglayout *l);
+
+/* Returns the upb_alloc for the given message. */
+upb_alloc *upb_msg_alloc(const upb_msg *msg, const upb_msglayout *l);
+
+/* Packs the tree of messages rooted at "msg" into a single hunk of memory,
+ * allocated from the given allocator. */
+void *upb_msg_pack(const upb_msg *msg, const upb_msglayout *l,
+                   void *p, size_t *ofs, size_t size);
+
+/* Read-only message API.  Can be safely called by anyone. */
+
+/* Returns the value associated with this field:
+ *   - for scalar fields (including strings), the value directly.
+ *   - return upb_msg*, or upb_map* for msg/map.
+ *     If the field is unset for these field types, returns NULL.
+ *
+ * TODO(haberman): should we let users store cached array/map/msg
+ * pointers here for fields that are unset?  Could be useful for the
+ * strongly-owned submessage model (ie. generated C API that doesn't use
+ * arenas).
+ */
+upb_msgval upb_msg_get(const upb_msg *msg,
+                       const upb_fielddef *f,
+                       const upb_msglayout *l);
+
+/* May only be called for fields where upb_fielddef_haspresence(f) == true. */
+bool upb_msg_has(const upb_msg *msg,
+                 const upb_fielddef *f,
+                 const upb_msglayout *l);
+
+/* Returns NULL if no field in the oneof is set. */
+const upb_fielddef *upb_msg_getoneofcase(const upb_msg *msg,
+                                         const upb_oneofdef *o,
+                                         const upb_msglayout *l);
+
+/* Returns true if any field in the oneof is set. */
+bool upb_msg_hasoneof(const upb_msg *msg,
+                      const upb_oneofdef *o,
+                      const upb_msglayout *l);
+
+
+/* Mutable message API.  May only be called by the owner of the message who
+ * knows its ownership scheme and how to keep it consistent. */
+
+/* Sets the given field to the given value.  Does not perform any memory
+ * management: if you overwrite a pointer to a msg/array/map/string without
+ * cleaning it up (or using an arena) it will leak.
+ */
+bool upb_msg_set(upb_msg *msg,
+                 const upb_fielddef *f,
+                 upb_msgval val,
+                 const upb_msglayout *l);
+
+/* For a primitive field, set it back to its default. For repeated, string, and
+ * submessage fields set it back to NULL.  This could involve releasing some
+ * internal memory (for example, from an extension dictionary), but it is not
+ * recursive in any way and will not recover any memory that may be used by
+ * arrays/maps/strings/msgs that this field may have pointed to.
+ */
+bool upb_msg_clearfield(upb_msg *msg,
+                        const upb_fielddef *f,
+                        const upb_msglayout *l);
+
+/* Clears all fields in the oneof such that none of them are set. */
+bool upb_msg_clearoneof(upb_msg *msg,
+                        const upb_oneofdef *o,
+                        const upb_msglayout *l);
+
+/* TODO(haberman): copyfrom()/mergefrom()? */
+
+
+/** upb_array *****************************************************************/
+
+/* A upb_array stores data for a repeated field.  The memory management
+ * semantics are the same as upb_msg.  A upb_array allocates dynamic
+ * memory internally for the array elements. */
+
+size_t upb_array_sizeof(upb_fieldtype_t type);
+void upb_array_init(upb_array *arr, upb_fieldtype_t type, upb_alloc *a);
+void upb_array_uninit(upb_array *arr);
+upb_array *upb_array_new(upb_fieldtype_t type, upb_alloc *a);
+void upb_array_free(upb_array *arr);
+
+/* Read-only interface.  Safe for anyone to call. */
+
+size_t upb_array_size(const upb_array *arr);
+upb_fieldtype_t upb_array_type(const upb_array *arr);
+upb_msgval upb_array_get(const upb_array *arr, size_t i);
+
+/* Write interface.  May only be called by the message's owner who can enforce
+ * its memory management invariants. */
+
+bool upb_array_set(upb_array *arr, size_t i, upb_msgval val);
+
+
+/** upb_map *******************************************************************/
+
+/* A upb_map stores data for a map field.  The memory management semantics are
+ * the same as upb_msg, with one notable exception.  upb_map will internally
+ * store a copy of all string keys, but *not* any string values or submessages.
+ * So you must ensure that any string or message values outlive the map, and you
+ * must delete them manually when they are no longer required. */
+
+size_t upb_map_sizeof(upb_fieldtype_t ktype, upb_fieldtype_t vtype);
+bool upb_map_init(upb_map *map, upb_fieldtype_t ktype, upb_fieldtype_t vtype,
+                  upb_alloc *a);
+void upb_map_uninit(upb_map *map);
+upb_map *upb_map_new(upb_fieldtype_t ktype, upb_fieldtype_t vtype, upb_alloc *a);
+void upb_map_free(upb_map *map);
+
+/* Read-only interface.  Safe for anyone to call. */
+
+size_t upb_map_size(const upb_map *map);
+upb_fieldtype_t upb_map_keytype(const upb_map *map);
+upb_fieldtype_t upb_map_valuetype(const upb_map *map);
+bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val);
+
+/* Write interface.  May only be called by the message's owner who can enforce
+ * its memory management invariants. */
+
+/* Sets or overwrites an entry in the map.  Return value indicates whether
+ * the operation succeeded or failed with OOM, and also whether an existing
+ * key was replaced or not. */
+bool upb_map_set(upb_map *map,
+                 upb_msgval key, upb_msgval val,
+                 upb_msgval *valremoved);
+
+/* Deletes an entry in the map.  Returns true if the key was present. */
+bool upb_map_del(upb_map *map, upb_msgval key);
+
+
+/** upb_mapiter ***************************************************************/
+
+/* For iterating over a map.  Map iterators are invalidated by mutations to the
+ * map, but an invalidated iterator will never return junk or crash the process.
+ * An invalidated iterator may return entries that were already returned though,
+ * and if you keep invalidating the iterator during iteration, the program may
+ * enter an infinite loop. */
+
+size_t upb_mapiter_sizeof();
+
+void upb_mapiter_begin(upb_mapiter *i, const upb_map *t);
+upb_mapiter *upb_mapiter_new(const upb_map *t, upb_alloc *a);
+void upb_mapiter_free(upb_mapiter *i, upb_alloc *a);
+void upb_mapiter_next(upb_mapiter *i);
+bool upb_mapiter_done(const upb_mapiter *i);
+
+upb_msgval upb_mapiter_key(const upb_mapiter *i);
+upb_msgval upb_mapiter_value(const upb_mapiter *i);
+void upb_mapiter_setdone(upb_mapiter *i);
+bool upb_mapiter_isequal(const upb_mapiter *i1, const upb_mapiter *i2);
+
+
+/** Handlers ******************************************************************/
+
+/* These are the handlers used internally by upb_msgfactory_getmergehandlers().
+ * They write scalar data to a known offset from the message pointer.
+ *
+ * These would be trivial for anyone to implement themselves, but it's better
+ * to use these because some JITs will recognize and specialize these instead
+ * of actually calling the function. */
+
+/* Sets a handler for the given primitive field that will write the data at the
+ * given offset.  If hasbit > 0, also sets a hasbit at the given bit offset
+ * (addressing each byte low to high). */
+bool upb_msg_setscalarhandler(upb_handlers *h,
+                              const upb_fielddef *f,
+                              size_t offset,
+                              int32_t hasbit);
+
+/* If the given handler is a msghandlers_primitive field, returns true and sets
+ * *type, *offset and *hasbit.  Otherwise returns false. */
+bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
+                                  upb_selector_t s,
+                                  upb_fieldtype_t *type,
+                                  size_t *offset,
+                                  int32_t *hasbit);
 
 UPB_END_EXTERN_C
 
-#ifdef __cplusplus
-/* C++ inline wrappers. */
-namespace upb {
-inline reffed_ptr<SymbolTable> SymbolTable::New() {
-  upb_symtab *s = upb_symtab_new(&s);
-  return reffed_ptr<SymbolTable>(s, &s);
-}
-
-inline void SymbolTable::Freeze() {
-  return upb_symtab_freeze(this);
-}
-inline const Def *SymbolTable::Resolve(const char *base,
-                                       const char *sym) const {
-  return upb_symtab_resolve(this, base, sym);
-}
-inline const Def* SymbolTable::Lookup(const char *sym) const {
-  return upb_symtab_lookup(this, sym);
-}
-inline const MessageDef *SymbolTable::LookupMessage(const char *sym) const {
-  return upb_symtab_lookupmsg(this, sym);
-}
-inline bool SymbolTable::Add(
-    Def*const* defs, size_t n, void* ref_donor, Status* status) {
-  return upb_symtab_add(this, (upb_def*const*)defs, n, ref_donor, status);
-}
-inline bool SymbolTable::AddFile(FileDef* file, Status* s) {
-  return upb_symtab_addfile(this, file, s);
-}
-}  /* namespace upb */
-#endif
-
-#endif  /* UPB_SYMTAB_H_ */
+#endif /* UPB_MSG_H_ */
 /*
 ** upb::descriptor::Reader (upb_descreader)
 **