Added API for copy vs. alias and added benchmarks to test both.

Benchmark output:

$ bazel-bin/benchmarks/benchmark '--benchmark_filter=BM_Parse'
2020-11-11 15:39:04
Running bazel-bin/benchmarks/benchmark
Run on (72 X 3700 MHz CPU s)
CPU Caches:
  L1 Data 32K (x36)
  L1 Instruction 32K (x36)
  L2 Unified 1024K (x36)
  L3 Unified 25344K (x2)
-------------------------------------------------------------------------------------
Benchmark                                              Time           CPU Iterations
-------------------------------------------------------------------------------------
BM_Parse_Upb_FileDesc<UseArena, Copy>               4134 ns       4134 ns     168714   1.69152GB/s
BM_Parse_Upb_FileDesc<UseArena, Alias>              3487 ns       3487 ns     199509   2.00526GB/s
BM_Parse_Upb_FileDesc<InitBlock, Copy>              3727 ns       3726 ns     187581   1.87643GB/s
BM_Parse_Upb_FileDesc<InitBlock, Alias>             3110 ns       3110 ns     224970   2.24866GB/s
BM_Parse_Proto2<FileDesc, NoArena, Copy>           31132 ns      31132 ns      22437   229.995MB/s
BM_Parse_Proto2<FileDesc, UseArena, Copy>          21011 ns      21009 ns      33922   340.812MB/s
BM_Parse_Proto2<FileDesc, InitBlock, Copy>         17976 ns      17975 ns      38808   398.337MB/s
BM_Parse_Proto2<FileDescSV, InitBlock, Alias>      17357 ns      17356 ns      40244   412.539MB/s
diff --git a/benchmarks/benchmark.cc b/benchmarks/benchmark.cc
index cc510bd..7f4765a 100644
--- a/benchmarks/benchmark.cc
+++ b/benchmarks/benchmark.cc
@@ -115,13 +115,31 @@
 }
 BENCHMARK(BM_LoadAdsDescriptor_Proto2);
 
-static void BM_Parse_Upb_FileDesc_WithArena(benchmark::State& state) {
+enum CopyStrings {
+  Copy,
+  Alias,
+};
+
+enum ArenaMode {
+  NoArena,
+  UseArena,
+  InitBlock,
+};
+
+template <ArenaMode AMode, CopyStrings Copy>
+static void BM_Parse_Upb_FileDesc(benchmark::State& state) {
   size_t bytes = 0;
   for (auto _ : state) {
-    upb_arena* arena = upb_arena_new();
+    upb_arena *arena;
+    if (AMode == InitBlock) {
+      arena = upb_arena_init(buf, sizeof(buf), NULL);
+    } else {
+      arena = upb_arena_new();
+    }
     upb_benchmark_FileDescriptorProto* set =
-        upb_benchmark_FileDescriptorProto_parse(descriptor.data,
-                                                descriptor.size, arena);
+        upb_benchmark_FileDescriptorProto_parse_ex(
+            descriptor.data, descriptor.size, arena,
+            Copy == Alias ? UPB_DECODE_ALIAS : 0);
     if (!set) {
       printf("Failed to parse.\n");
       exit(1);
@@ -131,28 +149,16 @@
   }
   state.SetBytesProcessed(state.iterations() * descriptor.size);
 }
-BENCHMARK(BM_Parse_Upb_FileDesc_WithArena);
+BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, UseArena, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, UseArena, Alias);
+BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, InitBlock, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, InitBlock, Alias);
 
-static void BM_Parse_Upb_FileDesc_WithInitialBlock(benchmark::State& state) {
-  size_t bytes = 0;
-  for (auto _ : state) {
-    upb_arena* arena = upb_arena_init(buf, sizeof(buf), NULL);
-    upb_benchmark_FileDescriptorProto* set =
-        upb_benchmark_FileDescriptorProto_parse(descriptor.data,
-                                                descriptor.size, arena);
-    if (!set) {
-      printf("Failed to parse.\n");
-      exit(1);
-    }
-    bytes += descriptor.size;
-    upb_arena_free(arena);
-  }
-  state.SetBytesProcessed(state.iterations() * descriptor.size);
-}
-BENCHMARK(BM_Parse_Upb_FileDesc_WithInitialBlock);
+template <ArenaMode AMode, class P>
+struct Proto2Factory;
 
-template <class P>
-struct NoArena {
+template<class P>
+struct Proto2Factory<NoArena, P> {
  public:
   P* GetProto() { return &proto_; }
 
@@ -161,7 +167,7 @@
 };
 
 template <class P>
-struct WithArena {
+struct Proto2Factory<UseArena, P> {
  public:
   P* GetProto() { return protobuf::Arena::CreateMessage<P>(&arena_); }
 
@@ -170,9 +176,9 @@
 };
 
 template <class P>
-struct WithInitialBlock {
+struct Proto2Factory<InitBlock, P> {
  public:
-  WithInitialBlock() : arena_(GetOptions()) {}
+  Proto2Factory() : arena_(GetOptions()) {}
   P* GetProto() { return protobuf::Arena::CreateMessage<P>(&arena_); }
 
  private:
@@ -189,17 +195,15 @@
 using FileDesc = ::upb_benchmark::FileDescriptorProto;
 using FileDescSV = ::upb_benchmark::sv::FileDescriptorProto;
 
-const protobuf::MessageLite::ParseFlags kMergePartial =
-    protobuf::MessageLite::ParseFlags::kMergePartial;
-const protobuf::MessageLite::ParseFlags kAlias =
-    protobuf::MessageLite::ParseFlags::kMergePartialWithAliasing;
-
-template <class P, template <class> class Factory,
-          protobuf::MessageLite::ParseFlags kParseFlags = kMergePartial>
+template <class P, ArenaMode AMode, CopyStrings kCopy>
 void BM_Parse_Proto2(benchmark::State& state) {
   size_t bytes = 0;
+  constexpr protobuf::MessageLite::ParseFlags kParseFlags =
+      kCopy == Copy
+          ? protobuf::MessageLite::ParseFlags::kMergePartial
+          : protobuf::MessageLite::ParseFlags::kMergePartialWithAliasing;
   for (auto _ : state) {
-    Factory<P> proto_factory;
+    Proto2Factory<AMode, P> proto_factory;
     auto proto = proto_factory.GetProto();
     protobuf::StringPiece input(descriptor.data,descriptor.size);
     bool ok = proto->template ParseFrom<kParseFlags>(input);
@@ -211,15 +215,10 @@
   }
   state.SetBytesProcessed(state.iterations() * descriptor.size);
 }
-BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, NoArena);
-BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, WithArena);
-BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, WithInitialBlock);
-//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, NoArena);
-//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithArena);
-BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithInitialBlock);
-//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, NoArena, kAlias);
-//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithArena, kAlias);
-BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithInitialBlock, kAlias);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, NoArena, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, UseArena, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, InitBlock, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, InitBlock, Alias);
 
 static void BM_SerializeDescriptor_Proto2(benchmark::State& state) {
   size_t bytes = 0;
diff --git a/upb/decode.c b/upb/decode.c
index d38b46d..f49918e 100644
--- a/upb/decode.c
+++ b/upb/decode.c
@@ -643,10 +643,11 @@
   return decode_msg(d, ptr, msg, decode_totablep(table));
 }
 
-bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l,
-                upb_arena *arena) {
+bool _upb_decode(const char *buf, size_t size, void *msg,
+                 const upb_msglayout *l, upb_arena *arena, int options) {
   bool ok;
   upb_decstate state;
+  unsigned depth = (unsigned)options >> 16;
 
   if (size == 0) {
     return true;
@@ -660,12 +661,12 @@
   } else {
     state.end = buf + size - 16;
     state.limit = 16;
-    state.alias = true;
+    state.alias = options & UPB_DECODE_ALIAS;
   }
 
   state.limit_ptr = state.end;
   state.unknown_msg = NULL;
-  state.depth = 64;
+  state.depth = depth ? depth : 64;
   state.end_group = DECODE_NOGROUP;
   state.arena.head = arena->head;
   state.arena.last_size = arena->last_size;
diff --git a/upb/decode.h b/upb/decode.h
index 9de8638..00419ab 100644
--- a/upb/decode.h
+++ b/upb/decode.h
@@ -7,15 +7,32 @@
 
 #include "upb/msg.h"
 
+/* Must be last. */
+#include "upb/port_def.inc"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+enum {
+  UPB_DECODE_ALIAS = 1,
+};
+
+#define UPB_DECODE_MAXDEPTH(depth) ((depth) << 16)
+
+bool _upb_decode(const char *buf, size_t size, upb_msg *msg,
+                 const upb_msglayout *l, upb_arena *arena, int options);
+
+UPB_INLINE
 bool upb_decode(const char *buf, size_t size, upb_msg *msg,
-                const upb_msglayout *l, upb_arena *arena);
+                const upb_msglayout *l, upb_arena *arena) {
+  return _upb_decode(buf, size, msg, l, arena, 0);
+}
 
 #ifdef __cplusplus
 }  /* extern "C" */
 #endif
 
+#include "upb/port_undef.inc"
+
 #endif  /* UPB_DECODE_H_ */
diff --git a/upb/decode_fast.c b/upb/decode_fast.c
index f58f70e..f628e6d 100644
--- a/upb/decode_fast.c
+++ b/upb/decode_fast.c
@@ -763,7 +763,7 @@
   } else if (UPB_LIKELY(size <= 64)) {
     if (UPB_UNLIKELY(common_has < 64)) goto longstr;
     fastdecode_docopy(d, ptr, size, 64, buf, dst);
-  } else if (UPB_LIKELY(size <= 128)) {
+  } else if (UPB_LIKELY(size < 128)) {
     if (UPB_UNLIKELY(common_has < 128)) goto longstr;
     fastdecode_docopy(d, ptr, size, 128, buf, dst);
   } else {
diff --git a/upb/def.c b/upb/def.c
index 19b30fe..74c33cc 100644
--- a/upb/def.c
+++ b/upb/def.c
@@ -2140,8 +2140,8 @@
     if (!_upb_symtab_loaddefinit(s, *deps)) goto err;
   }
 
-  file = google_protobuf_FileDescriptorProto_parse(
-      init->descriptor.data, init->descriptor.size, arena);
+  file = google_protobuf_FileDescriptorProto_parse_ex(
+      init->descriptor.data, init->descriptor.size, arena, UPB_DECODE_ALIAS);
   s->bytes_loaded += init->descriptor.size;
 
   if (!file) {
diff --git a/upbc/generator.cc b/upbc/generator.cc
index f4d1bb9..4f9db9a 100644
--- a/upbc/generator.cc
+++ b/upbc/generator.cc
@@ -348,6 +348,12 @@
         "  $0 *ret = $0_new(arena);\n"
         "  return (ret && upb_decode(buf, size, ret, &$1, arena)) ? ret : NULL;\n"
         "}\n"
+        "UPB_INLINE $0 *$0_parse_ex(const char *buf, size_t size,\n"
+        "                           upb_arena *arena, int options) {\n"
+        "  $0 *ret = $0_new(arena);\n"
+        "  return (ret && _upb_decode(buf, size, ret, &$1, arena, options))\n"
+        "      ? ret : NULL;\n"
+        "}\n"
         "UPB_INLINE char *$0_serialize(const $0 *msg, upb_arena *arena, size_t "
         "*len) {\n"
         "  return upb_encode(msg, &$1, arena, len);\n"