Added benchmarks for `upb_Arena_Fuse()`

I added two variants, one which is unbalanced (always fusing a single arena into the group) while the other is balanced (always fusing two groups of equal size).

Unfortunately it is difficult to benchmark `upb_Arena_Free()` separately from fuse, since the only way to get a non-zero refcount is to fuse.

Results on my machine:

```
CPU: Intel Skylake Xeon with HyperThreading (48 cores) dL1:32KB dL2:1024KB dL3:38MB
Benchmark                    Time(ns)        CPU(ns)     Iterations
-------------------------------------------------------------------
BM_ArenaFuseUnbalanced/2           52.2           52.2      1000000  38.300M items/s
BM_ArenaFuseUnbalanced/8          416            416         142315  19.243M items/s
BM_ArenaFuseUnbalanced/64        4034           4033          14306  15.868M items/s
BM_ArenaFuseUnbalanced/128       8302           8301           6837  15.420M items/s
BM_ArenaFuseBalanced/2             54.7           54.7      1000000  36.581M items/s
BM_ArenaFuseBalanced/8            425            425         100000  18.845M items/s
BM_ArenaFuseBalanced/64          4029           4029          14632  15.886M items/s
BM_ArenaFuseBalanced/128         8050           8049           7176  15.902M items/s
```
PiperOrigin-RevId: 518292073
diff --git a/benchmarks/BUILD b/benchmarks/BUILD
index 07207fc..e219c42 100644
--- a/benchmarks/BUILD
+++ b/benchmarks/BUILD
@@ -87,7 +87,9 @@
         ":benchmark_descriptor_sv_cc_proto",
         ":benchmark_descriptor_upb_proto",
         ":benchmark_descriptor_upb_proto_reflection",
+        "//:base",
         "//:descriptor_upb_proto",
+        "//:mem",
         "//:reflection",
         "@com_github_google_benchmark//:benchmark_main",
         "@com_google_absl//absl/container:flat_hash_set",
diff --git a/benchmarks/benchmark.cc b/benchmarks/benchmark.cc
index 8ad30f0..58e76f8 100644
--- a/benchmarks/benchmark.cc
+++ b/benchmarks/benchmark.cc
@@ -35,6 +35,8 @@
 #include "benchmarks/descriptor.upb.h"
 #include "benchmarks/descriptor.upbdefs.h"
 #include "benchmarks/descriptor_sv.pb.h"
+#include "upb/base/log2.h"
+#include "upb/mem/arena.h"
 #include "upb/reflection/def.hpp"
 
 upb_StringView descriptor = benchmarks_descriptor_proto_upbdefinit.descriptor;
@@ -73,6 +75,52 @@
 }
 BENCHMARK(BM_ArenaInitialBlockOneAlloc);
 
+static void BM_ArenaFuseUnbalanced(benchmark::State& state) {
+  std::vector<upb_Arena*> arenas(state.range(0));
+  size_t n = 0;
+  for (auto _ : state) {
+    for (auto& arena : arenas) {
+      arena = upb_Arena_New();
+    }
+    for (auto& arena : arenas) {
+      upb_Arena_Fuse(arenas[0], arena);
+    }
+    for (auto& arena : arenas) {
+      upb_Arena_Free(arena);
+    }
+    n += arenas.size();
+  }
+  state.SetItemsProcessed(n);
+}
+BENCHMARK(BM_ArenaFuseUnbalanced)->Range(2, 128);
+
+static void BM_ArenaFuseBalanced(benchmark::State& state) {
+  std::vector<upb_Arena*> arenas(state.range(0));
+  size_t n = 0;
+
+  for (auto _ : state) {
+    for (auto& arena : arenas) {
+      arena = upb_Arena_New();
+    }
+
+    // Perform a series of fuses that keeps the halves balanced.
+    size_t max = upb_Log2Ceiling(arenas.size());
+    for (size_t n = 0; n <= max; n++) {
+      size_t step = 1 << n;
+      for (size_t i = 0; i + step < arenas.size(); i += (step * 2)) {
+        upb_Arena_Fuse(arenas[i], arenas[i + step]);
+      }
+    }
+
+    for (auto& arena : arenas) {
+      upb_Arena_Free(arena);
+    }
+    n += arenas.size();
+  }
+  state.SetItemsProcessed(n);
+}
+BENCHMARK(BM_ArenaFuseBalanced)->Range(2, 128);
+
 enum LoadDescriptorMode {
   NoLayout,
   WithLayout,