In arena, allocate strings on dedicated string blocks so that they can be efficiently destructed.

- StringBlocks form a chunked linked list in which all nodes other than the first are full. When sizeof(void*)==8 and sizeof(std::string)==24, there are 21 strings per 512 byte StringBlock.
- We no longer need to store cleanup nodes for std::string (when using AllocateAlignedWithCleanup) - instead we traverse the StringBlock list.
- Template AllocAligned on is_string in order to avoid comparing the destructor against the string destructor on the allocation hot path.
- Remove specialized cleanup blocks for string/cord. This allows for simplifying the allocation and cleanup code, and we still save memory due to no longer needing cleanup nodes for most strings.

PiperOrigin-RevId: 488736123
diff --git a/src/google/protobuf/arena.cc b/src/google/protobuf/arena.cc
index 327c26e..633548d 100644
--- a/src/google/protobuf/arena.cc
+++ b/src/google/protobuf/arena.cc
@@ -35,10 +35,12 @@
 #include <cstddef>
 #include <cstdint>
 #include <limits>
+#include <string>
 #include <typeinfo>
 
 #include "absl/synchronization/mutex.h"
 #include "google/protobuf/arena_allocation_policy.h"
+#include "google/protobuf/arena_cleanup.h"
 #include "google/protobuf/arena_impl.h"
 #include "google/protobuf/arenaz_sampler.h"
 #include "google/protobuf/port.h"
@@ -162,8 +164,11 @@
   head_.store(b, std::memory_order_relaxed);
   space_used_.store(0, std::memory_order_relaxed);
   space_allocated_.store(b->size, std::memory_order_relaxed);
-  cached_block_length_ = 0;
   cached_blocks_ = nullptr;
+  cached_block_length_ = 0;
+  first_string_block_size_.store(cleanup::StringBlock::kCapacity,
+                                 std::memory_order_relaxed);
+  string_blocks_ = nullptr;
 }
 
 SerialArena* SerialArena::New(Memory mem, ThreadSafeArena& parent) {
@@ -196,14 +201,14 @@
 PROTOBUF_NOINLINE
 void* SerialArena::AllocateAlignedWithCleanupFallback(
     size_t n, size_t align, void (*destructor)(void*)) {
-  size_t required = AlignUpTo(n, align) + cleanup::Size(destructor);
+  size_t required = AlignUpTo(n, align) + cleanup::Size();
   AllocateNewBlock(required);
   return AllocateFromExistingWithCleanupFallback(n, align, destructor);
 }
 
 PROTOBUF_NOINLINE
 void SerialArena::AddCleanupFallback(void* elem, void (*destructor)(void*)) {
-  size_t required = cleanup::Size(destructor);
+  size_t required = cleanup::Size();
   AllocateNewBlock(required);
   AddCleanupFromExisting(elem, destructor);
 }
@@ -265,10 +270,20 @@
       static_cast<uint64_t>(
           ptr() - const_cast<ArenaBlock*>(h)->Pointer(kBlockHeaderSize)),
       current_block_size);
+  // Subtract out the unused capacity on the first string block.
+  const uint8_t first_string_block_size =
+      first_string_block_size_.load(std::memory_order_relaxed);
+  current_space_used -= sizeof(std::string) * (cleanup::StringBlock::kCapacity -
+                                               first_string_block_size);
   return current_space_used + space_used_.load(std::memory_order_relaxed);
 }
 
 void SerialArena::CleanupList() {
+  if (string_blocks_ != nullptr) {
+    cleanup::DestroyStrings(string_blocks_, first_string_block_size_.load(
+                                                std::memory_order_relaxed));
+  }
+
   ArenaBlock* b = head();
   if (b->IsSentry()) return;
 
@@ -281,33 +296,28 @@
       // A prefetch distance of 8 here was chosen arbitrarily.  It makes the
       // pending nodes fill a cacheline which seemed nice.
       constexpr int kPrefetchDist = 8;
-      cleanup::Tag pending_type[kPrefetchDist];
       char* pending_node[kPrefetchDist];
 
       int pos = 0;
-      for (; pos < kPrefetchDist && it < limit; ++pos) {
-        pending_type[pos] = cleanup::Type(it);
+      for (; pos < kPrefetchDist && it < limit; ++pos, it += cleanup::Size()) {
         pending_node[pos] = it;
-        it += cleanup::Size(pending_type[pos]);
       }
 
       if (pos < kPrefetchDist) {
         for (int i = 0; i < pos; ++i) {
-          cleanup::DestroyNode(pending_type[i], pending_node[i]);
+          cleanup::DestroyNode(pending_node[i]);
         }
       } else {
         pos = 0;
         while (it < limit) {
           cleanup::PrefetchNode(it);
-          cleanup::DestroyNode(pending_type[pos], pending_node[pos]);
-          pending_type[pos] = cleanup::Type(it);
+          cleanup::DestroyNode(pending_node[pos]);
           pending_node[pos] = it;
-          it += cleanup::Size(pending_type[pos]);
+          it += cleanup::Size();
           pos = (pos + 1) % kPrefetchDist;
         }
         for (int i = pos; i < pos + kPrefetchDist; ++i) {
-          cleanup::DestroyNode(pending_type[i % kPrefetchDist],
-                               pending_node[i % kPrefetchDist]);
+          cleanup::DestroyNode(pending_node[i % kPrefetchDist]);
         }
       }
     }
@@ -739,16 +749,6 @@
   return space_allocated;
 }
 
-void* ThreadSafeArena::AllocateAlignedWithCleanup(size_t n, size_t align,
-                                                  void (*destructor)(void*)) {
-  SerialArena* arena;
-  if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) {
-    return arena->AllocateAlignedWithCleanup(n, align, destructor);
-  } else {
-    return AllocateAlignedWithCleanupFallback(n, align, destructor);
-  }
-}
-
 void ThreadSafeArena::AddCleanup(void* elem, void (*cleanup)(void*)) {
   SerialArena* arena;
   if (PROTOBUF_PREDICT_FALSE(!GetSerialArenaFast(&arena))) {
@@ -757,13 +757,6 @@
   arena->AddCleanup(elem, cleanup);
 }
 
-PROTOBUF_NOINLINE
-void* ThreadSafeArena::AllocateAlignedWithCleanupFallback(
-    size_t n, size_t align, void (*destructor)(void*)) {
-  return GetSerialArenaFallback(n + kMaxCleanupNodeSize)
-      ->AllocateAlignedWithCleanup(n, align, destructor);
-}
-
 template <typename Functor>
 void ThreadSafeArena::WalkConstSerialArenaChunk(Functor fn) const {
   const SerialArenaChunk* chunk = head_.load(std::memory_order_acquire);
@@ -882,6 +875,16 @@
   return serial;
 }
 
+// Extern template instantiations for inline functions.
+template void* ThreadSafeArena::AllocateAlignedWithCleanup<false>(
+    size_t n, size_t align, void (*destructor)(void*));
+template void* ThreadSafeArena::AllocateAlignedWithCleanup<true>(
+    size_t n, size_t align, void (*destructor)(void*));
+template void* ThreadSafeArena::AllocateAlignedWithCleanupFallback<false>(
+    size_t n, size_t align, void (*destructor)(void*));
+template void* ThreadSafeArena::AllocateAlignedWithCleanupFallback<true>(
+    size_t n, size_t align, void (*destructor)(void*));
+
 }  // namespace internal
 
 void* Arena::Allocate(size_t n) { return impl_.AllocateAligned(n); }
@@ -890,10 +893,11 @@
   return impl_.AllocateAligned<internal::AllocationClient::kArray>(n);
 }
 
-void* Arena::AllocateAlignedWithCleanup(size_t n, size_t align,
-                                        void (*destructor)(void*)) {
-  return impl_.AllocateAlignedWithCleanup(n, align, destructor);
-}
+// Extern template instantiations for inline functions.
+template void* Arena::AllocateAlignedWithCleanup<false>(
+    size_t n, size_t align, void (*destructor)(void*));
+template void* Arena::AllocateAlignedWithCleanup<true>(
+    size_t n, size_t align, void (*destructor)(void*));
 
 }  // namespace protobuf
 }  // namespace google
diff --git a/src/google/protobuf/arena.h b/src/google/protobuf/arena.h
index e76e66f..66fb0cc 100644
--- a/src/google/protobuf/arena.h
+++ b/src/google/protobuf/arena.h
@@ -562,13 +562,14 @@
     }
   }
 
+  template <bool is_string>
   PROTOBUF_NDEBUG_INLINE void* AllocateInternal(size_t size, size_t align,
                                                 void (*destructor)(void*)) {
     // Monitor allocation if needed.
     if (destructor == nullptr) {
       return AllocateAligned(size, align);
     } else {
-      return AllocateAlignedWithCleanup(size, align, destructor);
+      return AllocateAlignedWithCleanup<is_string>(size, align, destructor);
     }
   }
 
@@ -613,10 +614,11 @@
   template <typename T, typename... Args>
   PROTOBUF_NDEBUG_INLINE T* DoCreateMessage(Args&&... args) {
     return InternalHelper<T>::Construct(
-        AllocateInternal(sizeof(T), alignof(T),
-                         internal::ObjectDestructor<
-                             InternalHelper<T>::is_destructor_skippable::value,
-                             T>::destructor),
+        AllocateInternal<internal::IsString<T>()>(
+            sizeof(T), alignof(T),
+            internal::ObjectDestructor<
+                InternalHelper<T>::is_destructor_skippable::value,
+                T>::destructor),
         this, std::forward<Args>(args)...);
   }
 
@@ -666,9 +668,8 @@
       auto destructor =
           internal::ObjectDestructor<std::is_trivially_destructible<T>::value,
                                      T>::destructor;
-      T* result =
-          new (arena->AllocateInternal(sizeof(T), alignof(T), destructor))
-              T(std::forward<Args>(args)...);
+      T* result = new (arena->AllocateInternal<internal::IsString<T>()>(
+          sizeof(T), alignof(T), destructor)) T(std::forward<Args>(args)...);
       return result;
     }
   }
@@ -681,8 +682,8 @@
       auto destructor =
           internal::ObjectDestructor<std::is_trivially_destructible<T>::value,
                                      T>::destructor;
-      return new (arena->AllocateInternal(sizeof(T), alignof(T), destructor))
-          T(std::forward<Args>(args)...);
+      return new (arena->AllocateInternal<internal::IsString<T>()>(
+          sizeof(T), alignof(T), destructor)) T(std::forward<Args>(args)...);
     }
   }
 
@@ -727,8 +728,11 @@
 
   void* Allocate(size_t n);
   void* AllocateForArray(size_t n);
+  template <bool is_string>
   void* AllocateAlignedWithCleanup(size_t n, size_t align,
-                                   void (*destructor)(void*));
+                                   void (*destructor)(void*)) {
+    return impl_.AllocateAlignedWithCleanup<is_string>(n, align, destructor);
+  }
 
   template <typename Type>
   friend class internal::GenericTypeHandler;
diff --git a/src/google/protobuf/arena_cleanup.h b/src/google/protobuf/arena_cleanup.h
index 0ca60c2..d567e0b 100644
--- a/src/google/protobuf/arena_cleanup.h
+++ b/src/google/protobuf/arena_cleanup.h
@@ -54,14 +54,6 @@
   reinterpret_cast<T*>(object)->~T();
 }
 
-// Tag defines the type of cleanup / cleanup object. This tag is stored in the
-// lowest 2 bits of the `elem` value identifying the type of node. All node
-// types must start with a `uintptr_t` that stores `Tag` in its low two bits.
-enum class Tag : uintptr_t {
-  kDynamic = 0,  // DynamicNode
-  kString = 1,   // StringNode (std::string)
-};
-
 // DynamicNode contains the object (`elem`) that needs to be
 // destroyed, and the function to destroy it (`destructor`)
 // elem must be aligned at minimum on a 4 byte boundary.
@@ -70,37 +62,11 @@
   void (*destructor)(void*);
 };
 
-// StringNode contains a `std::string` object (`elem`) that needs to be
-// destroyed. The lowest 2 bits of `elem` contain the non-zero kString tag.
-struct StringNode {
-  uintptr_t elem;
-};
-
-
-// EnableSpecializedTags() return true if the alignment of tagged objects
-// such as std::string allow us to poke tags in the 2 LSB bits.
-inline constexpr bool EnableSpecializedTags() {
-  // For now we require 2 bits
-  return alignof(std::string) >= 8;
-}
-
-// Adds a cleanup entry identified by `tag` at memory location `pos`.
-inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CreateNode(Tag tag, void* pos,
+// Adds a cleanup entry at memory location `pos`.
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CreateNode(void* pos,
                                                     const void* elem_raw,
                                                     void (*destructor)(void*)) {
   auto elem = reinterpret_cast<uintptr_t>(elem_raw);
-  if (EnableSpecializedTags()) {
-    GOOGLE_DCHECK_EQ(elem & 3, 0ULL);  // Must be aligned
-    switch (tag) {
-      case Tag::kString: {
-        StringNode n = {elem | static_cast<uintptr_t>(Tag::kString)};
-        memcpy(pos, &n, sizeof(n));
-        return;
-      }
-      default:
-        break;
-    }
-  }
   DynamicNode n = {elem, destructor};
   memcpy(pos, &n, sizeof(n));
 }
@@ -111,76 +77,63 @@
   (void)elem_address;
 }
 
-// Destroys the node idenitfied by `tag` stored at memory location `pos`.
-inline ABSL_ATTRIBUTE_ALWAYS_INLINE void DestroyNode(Tag tag, const void* pos) {
-  if (EnableSpecializedTags()) {
-    switch (tag) {
-      case Tag::kString: {
-        StringNode n;
-        memcpy(&n, pos, sizeof(n));
-        auto* s = reinterpret_cast<std::string*>(n.elem & ~0x7ULL);
-        // Some compilers don't like fully qualified explicit dtor calls,
-        // so use an alias to avoid having to type `::`.
-        using string_type = std::string;
-        s->~string_type();
-        return;
-      }
-      default:
-        break;
-    }
-  }
+// Destroys the node stored at memory location `pos`.
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void DestroyNode(const void* pos) {
   DynamicNode n;
   memcpy(&n, pos, sizeof(n));
   n.destructor(reinterpret_cast<void*>(n.elem));
 }
 
-// Returns the `tag` identifying the type of object for `destructor` or
-// kDynamic if `destructor` does not identify a well know object type.
-inline ABSL_ATTRIBUTE_ALWAYS_INLINE Tag Type(void (*destructor)(void*)) {
-  if (EnableSpecializedTags()) {
-    if (destructor == &arena_destruct_object<std::string>) {
-      return Tag::kString;
+// Returns the required size in bytes for a DynamicNode.
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE size_t Size() {
+  return sizeof(DynamicNode);
+}
+
+// Block on the arena for std::strings so that we can destruct them efficiently.
+// This is a linked list. The first block may be partially full, but all other
+// blocks are full.
+struct StringBlock {
+  static constexpr size_t kMaxSize = 512;
+  static constexpr size_t kCapacity =
+      (kMaxSize - sizeof(StringBlock*)) / sizeof(std::string);
+
+  ABSL_ATTRIBUTE_ALWAYS_INLINE void* GetSpace(size_t i) {
+    static_assert(sizeof(StringBlock) <= kMaxSize, "");
+    return space + sizeof(std::string) * i;
+  }
+  ABSL_ATTRIBUTE_ALWAYS_INLINE std::string* Get(size_t i) {
+    return static_cast<std::string*>(GetSpace(i));
+  }
+
+  StringBlock* next = nullptr;
+  alignas(std::string) char space[kCapacity * sizeof(std::string)];
+};
+
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void DestroyStrings(
+    StringBlock* block, size_t first_block_size) {
+  // Some compilers don't like fully qualified explicit dtor calls,
+  // so use an alias to avoid having to type `::`.
+  using string_type = std::string;
+
+#ifndef PROTO2_OPENSOURCE
+  // Prefetch the next block if non-null.
+  if (block->next != nullptr) ::compiler::PrefetchT0(block->next);
+#endif
+  for (size_t i = 0; i < first_block_size; ++i) {
+    block->Get(i)->~string_type();
+  }
+
+  // All blocks other than the first are full.
+  while (block->next != nullptr) {
+    block = block->next;
+#ifndef PROTO2_OPENSOURCE
+    // Prefetch the next block if non-null.
+    if (block->next != nullptr) ::compiler::PrefetchT0(block->next);
+#endif
+    for (size_t i = 0; i < StringBlock::kCapacity; ++i) {
+      block->Get(i)->~string_type();
     }
   }
-  return Tag::kDynamic;
-}
-
-// Returns the `tag` identifying the type of object stored at memory location
-// `elem`, which represents the first uintptr_t value in the node.
-inline ABSL_ATTRIBUTE_ALWAYS_INLINE Tag Type(void* raw) {
-  if (!EnableSpecializedTags()) return Tag::kDynamic;
-
-  uintptr_t elem;
-  memcpy(&elem, raw, sizeof(elem));
-  switch (static_cast<Tag>(elem & 0x7ULL)) {
-    case Tag::kDynamic:
-      return Tag::kDynamic;
-    case Tag::kString:
-      return Tag::kString;
-    default:
-      GOOGLE_LOG(FATAL) << "Corrupted cleanup tag: " << (elem & 0x7ULL);
-      return Tag::kDynamic;
-  }
-}
-
-// Returns the required size in bytes off the node type identified by `tag`.
-inline ABSL_ATTRIBUTE_ALWAYS_INLINE size_t Size(Tag tag) {
-  if (!EnableSpecializedTags()) return sizeof(DynamicNode);
-
-  switch (tag) {
-    case Tag::kDynamic:
-      return sizeof(DynamicNode);
-    case Tag::kString:
-      return sizeof(StringNode);
-    default:
-      GOOGLE_LOG(FATAL) << "Corrupted cleanup tag: " << static_cast<int>(tag);
-      return sizeof(DynamicNode);
-  }
-}
-
-// Returns the required size in bytes off the node type for `destructor`.
-inline ABSL_ATTRIBUTE_ALWAYS_INLINE size_t Size(void (*destructor)(void*)) {
-  return destructor == nullptr ? 0 : Size(Type(destructor));
 }
 
 }  // namespace cleanup
diff --git a/src/google/protobuf/arena_impl.h b/src/google/protobuf/arena_impl.h
index 9d2de93..0a878df 100644
--- a/src/google/protobuf/arena_impl.h
+++ b/src/google/protobuf/arena_impl.h
@@ -88,6 +88,11 @@
   }
 }
 
+template <typename T>
+constexpr bool IsString() {
+  return std::is_same<std::string, typename std::remove_cv<T>::type>();
+}
+
 // Arena blocks are variable length malloc-ed objects.  The following structure
 // describes the common header for all blocks.
 struct ArenaBlock {
@@ -304,10 +309,19 @@
     GOOGLE_DCHECK_GE(limit_, ptr());
     static_assert(!std::is_trivially_destructible<T>::value,
                   "This function is only for non-trivial types.");
+    if (IsString<T>()) {
+      if (PROTOBUF_PREDICT_FALSE(!HasStringSpace())) {
+        if (!HasSpace(AlignUpTo8(sizeof(cleanup::StringBlock)))) return nullptr;
+        AllocateStringBlock();
+      }
+      void* ptr = AllocateStringFromExisting();
+      PROTOBUF_ASSUME(ptr != nullptr);
+      return ptr;
+    }
 
     constexpr int aligned_size = AlignUpTo8(sizeof(T));
     constexpr auto destructor = cleanup::arena_destruct_object<T>;
-    size_t required = aligned_size + cleanup::Size(destructor);
+    size_t required = aligned_size + cleanup::Size();
     if (PROTOBUF_PREDICT_FALSE(!HasSpace(required))) {
       return nullptr;
     }
@@ -317,10 +331,14 @@
     return ptr;
   }
 
-  PROTOBUF_ALWAYS_INLINE
-  void* AllocateAlignedWithCleanup(size_t n, size_t align,
-                                   void (*destructor)(void*)) {
-    size_t required = AlignUpTo(n, align) + cleanup::Size(destructor);
+  template <bool is_string>
+  PROTOBUF_ALWAYS_INLINE void* AllocateAlignedWithCleanup(
+      size_t n, size_t align, void (*destructor)(void*)) {
+    if (is_string) {
+      if (PROTOBUF_PREDICT_FALSE(!HasStringSpace())) AllocateStringBlock();
+      return AllocateStringFromExisting();
+    }
+    size_t required = AlignUpTo(n, align) + cleanup::Size();
     if (PROTOBUF_PREDICT_FALSE(!HasSpace(required))) {
       return AllocateAlignedWithCleanupFallback(n, align, destructor);
     }
@@ -329,7 +347,7 @@
 
   PROTOBUF_ALWAYS_INLINE
   void AddCleanup(void* elem, void (*destructor)(void*)) {
-    size_t required = cleanup::Size(destructor);
+    size_t required = cleanup::Size();
     if (PROTOBUF_PREDICT_FALSE(!HasSpace(required))) {
       return AddCleanupFallback(elem, destructor);
     }
@@ -337,6 +355,8 @@
   }
 
  private:
+  friend class ThreadSafeArena;
+
   void* AllocateFromExistingWithCleanupFallback(size_t n, size_t align,
                                                 void (*destructor)(void*)) {
     n = AlignUpTo(n, align);
@@ -350,17 +370,37 @@
 
   PROTOBUF_ALWAYS_INLINE
   void AddCleanupFromExisting(void* elem, void (*destructor)(void*)) {
-    cleanup::Tag tag = cleanup::Type(destructor);
-    size_t n = cleanup::Size(tag);
+    size_t n = cleanup::Size();
 
     PROTOBUF_UNPOISON_MEMORY_REGION(limit_ - n, n);
     limit_ -= n;
     GOOGLE_DCHECK_GE(limit_, ptr());
-    cleanup::CreateNode(tag, limit_, elem, destructor);
+    cleanup::CreateNode(limit_, elem, destructor);
   }
 
- private:
-  friend class ThreadSafeArena;
+  PROTOBUF_ALWAYS_INLINE bool HasStringSpace() const {
+    static_assert(
+        cleanup::StringBlock::kCapacity <= std::numeric_limits<uint8_t>::max(),
+        "");
+    return first_string_block_size_.load(std::memory_order_relaxed) <
+           cleanup::StringBlock::kCapacity;
+  }
+  PROTOBUF_ALWAYS_INLINE void* AllocateStringFromExisting() {
+    uint8_t first_string_block_size =
+        first_string_block_size_.load(std::memory_order_relaxed);
+    void* ptr = string_blocks_->GetSpace(first_string_block_size);
+    first_string_block_size_.store(first_string_block_size + 1);
+    return ptr;
+  }
+  void AllocateStringBlock() {
+    GOOGLE_DCHECK_EQ(first_string_block_size_.load(std::memory_order_relaxed),
+              cleanup::StringBlock::kCapacity);
+    auto* new_block = static_cast<cleanup::StringBlock*>(
+        AllocateAligned(AlignUpTo8(sizeof(cleanup::StringBlock))));
+    new_block->next = string_blocks_;
+    string_blocks_ = new_block;
+    first_string_block_size_.store(0, std::memory_order_relaxed);
+  }
 
   // Creates a new SerialArena inside mem using the remaining memory as for
   // future allocations.
@@ -397,8 +437,14 @@
     // Simple linked list.
     CachedBlock* next;
   };
-  uint8_t cached_block_length_ = 0;
   CachedBlock** cached_blocks_ = nullptr;
+  uint8_t cached_block_length_ = 0;
+
+  // Size of the first StringBlock. Other blocks are all full.
+  std::atomic<uint8_t> first_string_block_size_{
+      cleanup::StringBlock::kCapacity};
+  // Pointer to string destruct block list.
+  cleanup::StringBlock* string_blocks_ = nullptr;
 
   // Helper getters/setters to handle relaxed operations on atomic variables.
   ArenaBlock* head() { return head_.load(std::memory_order_relaxed); }
@@ -501,8 +547,17 @@
     return false;
   }
 
+  template <bool is_string>
   void* AllocateAlignedWithCleanup(size_t n, size_t align,
-                                   void (*destructor)(void*));
+                                   void (*destructor)(void*)) {
+    SerialArena* arena;
+    if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) {
+      return arena->AllocateAlignedWithCleanup<is_string>(n, align, destructor);
+    } else {
+      return AllocateAlignedWithCleanupFallback<is_string>(n, align,
+                                                           destructor);
+    }
+  }
 
   // Add object pointer and cleanup function pointer to the list.
   void AddCleanup(void* elem, void (*cleanup)(void*));
@@ -564,8 +619,13 @@
 
   const AllocationPolicy* AllocPolicy() const { return alloc_policy_.get(); }
   void InitializeWithPolicy(const AllocationPolicy& policy);
-  void* AllocateAlignedWithCleanupFallback(size_t n, size_t align,
-                                           void (*destructor)(void*));
+
+  template <bool is_string>
+  PROTOBUF_NOINLINE void* AllocateAlignedWithCleanupFallback(
+      size_t n, size_t align, void (*destructor)(void*)) {
+    return GetSerialArenaFallback(n + kMaxCleanupNodeSize)
+        ->AllocateAlignedWithCleanup<is_string>(n, align, destructor);
+  }
 
   void Init();
 
diff --git a/src/google/protobuf/repeated_field_unittest.cc b/src/google/protobuf/repeated_field_unittest.cc
index 4906cdd..d919694 100644
--- a/src/google/protobuf/repeated_field_unittest.cc
+++ b/src/google/protobuf/repeated_field_unittest.cc
@@ -223,12 +223,12 @@
 
   size_t used_bytes_if_reusing =
       values.size() * values[0]->Capacity() * (is_ptr ? sizeof(T*) : sizeof(T));
-  // Use a 2% slack for other overhead.
+  // Use a 10% slack for other overhead.
   // If we were not reusing the blocks, the actual value would be ~2x the
   // expected.
   EXPECT_THAT(
       arena.SpaceUsed() - (is_ptr ? sizeof(T) * kNumElems * kNumFields : 0),
-      AllOf(Ge(used_bytes_if_reusing), Le(1.02 * used_bytes_if_reusing)));
+      AllOf(Ge(used_bytes_if_reusing), Le(1.1 * used_bytes_if_reusing)));
 }
 
 TEST(RepeatedField, NaturalGrowthOnArenasReuseBlocks) {
@@ -1129,10 +1129,6 @@
   EXPECT_GE(field.SpaceUsedExcludingSelf(), min_expected_usage);
 }
 
-TEST(RepeatedPtrField, ArenaAllocationSizesMatchExpectedValues) {
-  CheckAllocationSizes<RepeatedPtrField<std::string>>(true);
-}
-
 TEST(RepeatedPtrField, NaturalGrowthOnArenasReuseBlocks) {
   CheckNaturalGrowthOnArenasReuseBlocks<RepeatedPtrField<std::string>>(true);
 }