Trace processor: Use varint encoding for string size in StringPool

Currently StringPool has a 32K limit on a size of a string. Some trace
fields (e.g. legacy_ftrace_output) can be bigger than that. The limit
is due to the string size being encoded as a 16-bit integer.

This CL introduces support for bigger strings by using VarInt encoding.

Bug: 130786269
Change-Id: Ibda6c2d7eb00a6f10348ffb9b16e0b430503228b
diff --git a/src/trace_processor/string_pool.cc b/src/trace_processor/string_pool.cc
index c299947..d660681 100644
--- a/src/trace_processor/string_pool.cc
+++ b/src/trace_processor/string_pool.cc
@@ -22,7 +22,7 @@
 namespace trace_processor {
 
 StringPool::StringPool() {
-  blocks_.emplace_back();
+  blocks_.emplace_back(kDefaultBlockSize);
 
   // Reserve a slot for the null string.
   PERFETTO_CHECK(blocks_.back().TryInsert(NullTermStringView()));
@@ -34,20 +34,21 @@
 StringPool& StringPool::operator=(StringPool&&) = default;
 
 StringPool::Id StringPool::InsertString(base::StringView str, uint64_t hash) {
-  // We shouldn't be writing string with more than 2^16 characters to the pool.
-  PERFETTO_CHECK(str.size() < std::numeric_limits<uint16_t>::max());
-
   // Try and find enough space in the current block for the string and the
-  // metadata (the size of the string + the null terminator + 2 bytes to encode
-  // the size itself).
-  auto* ptr = blocks_.back().TryInsert(str);
+  // metadata (varint-encoded size + the string data + the null terminator).
+  const uint8_t* ptr = blocks_.back().TryInsert(str);
   if (PERFETTO_UNLIKELY(!ptr)) {
     // This means the block did not have enough space. This should only happen
     // on 32-bit platforms as we allocate a 4GB mmap on 64 bit.
     PERFETTO_CHECK(sizeof(uint8_t*) == 4);
 
-    // Add a new block to store the data.
-    blocks_.emplace_back();
+    // Add a new block to store the data. If the string is larger that the
+    // default block size, add a bigger block exlusively for this string.
+    if (str.size() + kMaxMetadataSize > kDefaultBlockSize) {
+      blocks_.emplace_back(str.size() + kMaxMetadataSize);
+    } else {
+      blocks_.emplace_back(kDefaultBlockSize);
+    }
 
     // Try and reserve space again - this time we should definitely succeed.
     ptr = blocks_.back().TryInsert(str);
@@ -61,28 +62,29 @@
   return string_id;
 }
 
-uint8_t* StringPool::Block::TryInsert(base::StringView str) {
+const uint8_t* StringPool::Block::TryInsert(base::StringView str) {
   auto str_size = str.size();
-  auto size = str_size + kMetadataSize;
-  if (static_cast<uint64_t>(pos_) + size >= kBlockSize)
+  if (static_cast<uint64_t>(pos_) + str_size + kMaxMetadataSize > size_)
     return nullptr;
 
   // Get where we should start writing this string.
-  uint8_t* ptr = Get(pos_);
+  uint8_t* begin = Get(pos_);
 
-  // First memcpy the size of the string into the buffer.
-  memcpy(ptr, &str_size, sizeof(str_size));
+  // First write the size of the string using varint encoding.
+  uint8_t* end = protozero::proto_utils::WriteVarInt(str_size, begin);
 
-  // Next the string itself which starts at offset 2.
-  if (PERFETTO_LIKELY(str_size > 0))
-    memcpy(&ptr[2], str.data(), str_size);
+  // Next the string itself.
+  if (PERFETTO_LIKELY(str_size > 0)) {
+    memcpy(end, str.data(), str_size);
+    end += str_size;
+  }
 
   // Finally add a null terminator.
-  ptr[2 + str_size] = '\0';
+  *(end++) = '\0';
 
   // Update the end of the block and return the pointer to the string.
-  pos_ += size;
-  return ptr;
+  pos_ = OffsetOf(end);
+  return begin;
 }
 
 StringPool::Iterator::Iterator(const StringPool* pool) : pool_(pool) {}
@@ -95,8 +97,11 @@
 
   // Find the size of the string at the current offset in the block
   // and increment the offset by that size.
-  auto str_size = GetSize(block.Get(block_offset_));
-  block_offset_ += kMetadataSize + str_size;
+  uint32_t str_size = 0;
+  const uint8_t* ptr = block.Get(block_offset_);
+  ptr = ReadSize(ptr, &str_size);
+  ptr += str_size + 1;
+  block_offset_ = block.OffsetOf(ptr);
 
   // If we're out of bounds for this block, go to the start of the next block.
   if (block.pos() <= block_offset_) {