Add benchmark for client API.

Bug: 160388760
Change-Id: Id8a36f621b3cdedf9d1494d19ead5c5392ab8259
diff --git a/gn/perfetto_benchmarks.gni b/gn/perfetto_benchmarks.gni
index ddca3f2..2bde376 100644
--- a/gn/perfetto_benchmarks.gni
+++ b/gn/perfetto_benchmarks.gni
@@ -29,3 +29,7 @@
   "test:benchmark_main",
   "test:end_to_end_benchmarks",
 ]
+
+if (enable_perfetto_heapprofd) {
+  perfetto_benchmarks_targets += [ "src/profiling/memory:benchmarks" ]
+}
diff --git a/src/profiling/memory/BUILD.gn b/src/profiling/memory/BUILD.gn
index 32e049b..f0e350f 100644
--- a/src/profiling/memory/BUILD.gn
+++ b/src/profiling/memory/BUILD.gn
@@ -392,3 +392,18 @@
     "../../base",
   ]
 }
+
+if (enable_perfetto_benchmarks) {
+  source_set("benchmarks") {
+    testonly = true
+    deps = [
+      ":client",
+      ":client_api",
+      "../../../gn:benchmark",
+      "../../../gn:default_deps",
+      "../../base",
+      "../../base:test_support",
+    ]
+    sources = [ "client_api_benchmark.cc" ]
+  }
+}
diff --git a/src/profiling/memory/client_api_benchmark.cc b/src/profiling/memory/client_api_benchmark.cc
new file mode 100644
index 0000000..efa142a
--- /dev/null
+++ b/src/profiling/memory/client_api_benchmark.cc
@@ -0,0 +1,231 @@
+// Copyright (C) 2021 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <benchmark/benchmark.h>
+
+#include "perfetto/profiling/memory/heap_profile.h"
+#include "src/profiling/memory/heap_profile_internal.h"
+
+#include "src/profiling/memory/client.h"
+#include "src/profiling/memory/client_api_factory.h"
+
+namespace perfetto {
+namespace profiling {
+
+namespace {
+uint32_t GetHeapId() {
+  static uint32_t heap_id =
+      AHeapProfile_registerHeap(AHeapInfo_create("dev.perfetto.benchmark"));
+  return heap_id;
+}
+
+ClientConfiguration g_client_config;
+int g_shmem_fd;
+
+base::UnixSocketRaw& GlobalServerSocket() {
+  static base::UnixSocketRaw* srv_sock = new base::UnixSocketRaw;
+  return *srv_sock;
+}
+
+void DisconnectGlobalServerSocket() {
+  base::UnixSocketRaw destroy;
+  std::swap(destroy, GlobalServerSocket());
+}
+
+}  // namespace
+
+// This is called by AHeapProfile_initSession (client_api.cc) to construct a
+// client. The Client API requires to be linked against another compliation
+// unit that provides this function. This way, it can be used in different
+// circumstances (central heapprofd, fork heapprofd) and be agnostic about the
+// details. This is is used to create a test Client here.
+void StartHeapprofdIfStatic() {}
+std::shared_ptr<Client> ConstructClient(
+    UnhookedAllocator<perfetto::profiling::Client> unhooked_allocator) {
+  base::UnixSocketRaw cli_sock;
+  base::UnixSocketRaw& srv_sock = GlobalServerSocket();
+  std::tie(cli_sock, srv_sock) = base::UnixSocketRaw::CreatePairPosix(
+      base::SockFamily::kUnix, base::SockType::kStream);
+  auto ringbuf = SharedRingBuffer::Create(8 * 1048576);
+  ringbuf->InfiniteBufferForTesting();
+  PERFETTO_CHECK(ringbuf);
+  PERFETTO_CHECK(cli_sock);
+  PERFETTO_CHECK(srv_sock);
+  g_shmem_fd = ringbuf->fd();
+  return std::allocate_shared<Client>(unhooked_allocator, std::move(cli_sock),
+                                      g_client_config, std::move(*ringbuf),
+                                      getpid(), GetMainThreadStackRange());
+}
+
+static void BM_ClientApiOneTenthAllocation(benchmark::State& state) {
+  const uint32_t heap_id = GetHeapId();
+
+  ClientConfiguration client_config{};
+  client_config.default_interval = 32000;
+  client_config.all_heaps = true;
+  g_client_config = client_config;
+  PERFETTO_CHECK(AHeapProfile_initSession(malloc, free));
+
+  PERFETTO_CHECK(g_shmem_fd);
+  auto ringbuf = SharedRingBuffer::Attach(base::ScopedFile(dup(g_shmem_fd)));
+
+  for (auto _ : state) {
+    AHeapProfile_reportAllocation(heap_id, 0x123, 3200);
+  }
+  DisconnectGlobalServerSocket();
+  ringbuf->SetShuttingDown();
+}
+
+BENCHMARK(BM_ClientApiOneTenthAllocation);
+
+static void BM_ClientApiOneHundrethAllocation(benchmark::State& state) {
+  const uint32_t heap_id = GetHeapId();
+
+  ClientConfiguration client_config{};
+  client_config.default_interval = 32000;
+  client_config.all_heaps = true;
+  g_client_config = client_config;
+  PERFETTO_CHECK(AHeapProfile_initSession(malloc, free));
+
+  PERFETTO_CHECK(g_shmem_fd);
+  auto ringbuf = SharedRingBuffer::Attach(base::ScopedFile(dup(g_shmem_fd)));
+
+  for (auto _ : state) {
+    AHeapProfile_reportAllocation(heap_id, 0x123, 320);
+  }
+  DisconnectGlobalServerSocket();
+  ringbuf->SetShuttingDown();
+}
+
+BENCHMARK(BM_ClientApiOneHundrethAllocation);
+
+static void BM_ClientApiAlmostNoAllocation(benchmark::State& state) {
+  const uint32_t heap_id = GetHeapId();
+
+  ClientConfiguration client_config{};
+  client_config.default_interval = 10000000000000000;
+  client_config.all_heaps = true;
+  g_client_config = client_config;
+  PERFETTO_CHECK(AHeapProfile_initSession(malloc, free));
+
+  PERFETTO_CHECK(g_shmem_fd);
+  auto ringbuf = SharedRingBuffer::Attach(base::ScopedFile(dup(g_shmem_fd)));
+
+  for (auto _ : state) {
+    AHeapProfile_reportAllocation(heap_id, 0x123, 1);
+  }
+  DisconnectGlobalServerSocket();
+  ringbuf->SetShuttingDown();
+}
+
+BENCHMARK(BM_ClientApiAlmostNoAllocation);
+
+static void BM_ClientApiSample(benchmark::State& state) {
+  const uint32_t heap_id = GetHeapId();
+
+  ClientConfiguration client_config{};
+  client_config.default_interval = 32000;
+  client_config.all_heaps = true;
+  g_client_config = client_config;
+  PERFETTO_CHECK(AHeapProfile_initSession(malloc, free));
+
+  PERFETTO_CHECK(g_shmem_fd);
+  auto ringbuf = SharedRingBuffer::Attach(base::ScopedFile(dup(g_shmem_fd)));
+
+  for (auto _ : state) {
+    AHeapProfile_reportSample(heap_id, 0x123, 20);
+  }
+  DisconnectGlobalServerSocket();
+  ringbuf->SetShuttingDown();
+}
+
+BENCHMARK(BM_ClientApiSample);
+
+static void BM_ClientApiDisabledHeapAllocation(benchmark::State& state) {
+  const uint32_t heap_id = GetHeapId();
+
+  ClientConfiguration client_config{};
+  client_config.default_interval = 32000;
+  client_config.all_heaps = false;
+  g_client_config = client_config;
+  PERFETTO_CHECK(AHeapProfile_initSession(malloc, free));
+
+  PERFETTO_CHECK(g_shmem_fd);
+  auto ringbuf = SharedRingBuffer::Attach(base::ScopedFile(dup(g_shmem_fd)));
+
+  for (auto _ : state) {
+    AHeapProfile_reportAllocation(heap_id, 0x123, 20);
+  }
+  DisconnectGlobalServerSocket();
+  ringbuf->SetShuttingDown();
+}
+
+BENCHMARK(BM_ClientApiDisabledHeapAllocation);
+
+static void BM_ClientApiDisabledHeapFree(benchmark::State& state) {
+  const uint32_t heap_id = GetHeapId();
+
+  ClientConfiguration client_config{};
+  client_config.default_interval = 32000;
+  client_config.all_heaps = false;
+  g_client_config = client_config;
+  PERFETTO_CHECK(AHeapProfile_initSession(malloc, free));
+
+  PERFETTO_CHECK(g_shmem_fd);
+  auto ringbuf = SharedRingBuffer::Attach(base::ScopedFile(dup(g_shmem_fd)));
+
+  for (auto _ : state) {
+    AHeapProfile_reportFree(heap_id, 0x123);
+  }
+  DisconnectGlobalServerSocket();
+  ringbuf->SetShuttingDown();
+}
+
+BENCHMARK(BM_ClientApiDisabledHeapFree);
+
+static void BM_ClientApiEnabledHeapFree(benchmark::State& state) {
+  const uint32_t heap_id = GetHeapId();
+
+  ClientConfiguration client_config{};
+  client_config.default_interval = 32000;
+  client_config.all_heaps = true;
+  g_client_config = client_config;
+  PERFETTO_CHECK(AHeapProfile_initSession(malloc, free));
+
+  PERFETTO_CHECK(g_shmem_fd);
+  auto ringbuf = SharedRingBuffer::Attach(base::ScopedFile(dup(g_shmem_fd)));
+
+  for (auto _ : state) {
+    AHeapProfile_reportFree(heap_id, 0x123);
+  }
+  DisconnectGlobalServerSocket();
+  ringbuf->SetShuttingDown();
+}
+
+BENCHMARK(BM_ClientApiEnabledHeapFree);
+
+static void BM_ClientApiMallocFree(benchmark::State& state) {
+  for (auto _ : state) {
+    volatile char* x = static_cast<char*>(malloc(100));
+    if (x) {
+      x[0] = 'x';
+      free(const_cast<char*>(x));
+    }
+  }
+}
+
+BENCHMARK(BM_ClientApiMallocFree);
+
+}  // namespace profiling
+}  // namespace perfetto
diff --git a/src/profiling/memory/shared_ring_buffer.cc b/src/profiling/memory/shared_ring_buffer.cc
index 8e1b810..bc56327 100644
--- a/src/profiling/memory/shared_ring_buffer.cc
+++ b/src/profiling/memory/shared_ring_buffer.cc
@@ -175,7 +175,7 @@
     munmap(region, outer_size);
     return;
   }
-  size_ = size;
+  set_size(size);
   meta_ = reinterpret_cast<MetadataPage*>(region);
   mem_ = region + kMetaPageSize;
   mem_fd_ = std::move(mem_fd);
@@ -314,9 +314,10 @@
 SharedRingBuffer& SharedRingBuffer::operator=(
     SharedRingBuffer&& other) noexcept {
   mem_fd_ = std::move(other.mem_fd_);
-  std::tie(meta_, mem_, size_) = std::tie(other.meta_, other.mem_, other.size_);
-  std::tie(other.meta_, other.mem_, other.size_) =
-      std::make_tuple(nullptr, nullptr, 0);
+  std::tie(meta_, mem_, size_, size_mask_) =
+      std::tie(other.meta_, other.mem_, other.size_, other.size_mask_);
+  std::tie(other.meta_, other.mem_, other.size_, other.size_mask_) =
+      std::make_tuple(nullptr, nullptr, 0, 0);
   return *this;
 }
 
diff --git a/src/profiling/memory/shared_ring_buffer.h b/src/profiling/memory/shared_ring_buffer.h
index 89a47d9..638ac44 100644
--- a/src/profiling/memory/shared_ring_buffer.h
+++ b/src/profiling/memory/shared_ring_buffer.h
@@ -23,6 +23,7 @@
 #include "src/profiling/memory/scoped_spinlock.h"
 
 #include <atomic>
+#include <limits>
 #include <map>
 #include <memory>
 
@@ -160,6 +161,12 @@
     return meta_->reader_paused.exchange(false, std::memory_order_relaxed);
   }
 
+  void InfiniteBufferForTesting() {
+    // Pretend this buffer is really large, while keeping size_mask_ as
+    // original so it keeps wrapping in circles.
+    size_ = std::numeric_limits<size_t>::max() / 2;
+  }
+
   // Exposed for fuzzers.
   struct MetadataPage {
     alignas(uint64_t) std::atomic<bool> spinlock;
@@ -216,6 +223,11 @@
     return result;
   }
 
+  inline void set_size(size_t size) {
+    size_ = size;
+    size_mask_ = size - 1;
+  }
+
   inline size_t read_avail(const PointerPositions& pos) {
     PERFETTO_DCHECK(pos.write_pos >= pos.read_pos);
     auto res = static_cast<size_t>(pos.write_pos - pos.read_pos);
@@ -227,7 +239,7 @@
     return size_ - read_avail(pos);
   }
 
-  inline uint8_t* at(uint64_t pos) { return mem_ + (pos & (size_ - 1)); }
+  inline uint8_t* at(uint64_t pos) { return mem_ + (pos & size_mask_); }
 
   base::ScopedFile mem_fd_;
   MetadataPage* meta_ = nullptr;  // Start of the mmaped region.
@@ -236,6 +248,7 @@
   // Size of the ring buffer contents, without including metadata or the 2nd
   // mmap.
   size_t size_ = 0;
+  size_t size_mask_ = 0;
 
   // Remember to update the move ctor when adding new fields.
 };