Log invalid stack bound errors in trace.

Bug: 181012016
Change-Id: I9209044c462a383bd62c4bc9747d557025578c6c
diff --git a/protos/perfetto/trace/perfetto_trace.proto b/protos/perfetto/trace/perfetto_trace.proto
index 9af2389..b6f4360 100644
--- a/protos/perfetto/trace/perfetto_trace.proto
+++ b/protos/perfetto/trace/perfetto_trace.proto
@@ -7465,6 +7465,11 @@
 
   repeated ProcessHeapSamples process_dumps = 5;
   message ProcessHeapSamples {
+    enum ClientError {
+      CLIENT_ERROR_NONE = 0;
+      CLIENT_ERROR_HIT_TIMEOUT = 1;
+      CLIENT_ERROR_INVALID_STACK_BOUNDS = 2;
+    }
     optional uint64 pid = 1;
 
     // This process was profiled from startup.
@@ -7481,8 +7486,12 @@
 
     // If disconnected, this disconnect was caused by the client overrunning
     // the buffer.
+    // Equivalent to client_error == CLIENT_ERROR_HIT_TIMEOUT
+    // on new S builds.
     optional bool buffer_overran = 7;
 
+    optional ClientError client_error = 14;
+
     // If disconnected, this disconnected was caused by the shared memory
     // buffer being corrupted. THIS IS ALWAYS A BUG IN HEAPPROFD OR CLIENT
     // MEMORY CORRUPTION.
diff --git a/protos/perfetto/trace/profiling/profile_packet.proto b/protos/perfetto/trace/profiling/profile_packet.proto
index d137793..86640fd 100644
--- a/protos/perfetto/trace/profiling/profile_packet.proto
+++ b/protos/perfetto/trace/profiling/profile_packet.proto
@@ -88,6 +88,11 @@
 
   repeated ProcessHeapSamples process_dumps = 5;
   message ProcessHeapSamples {
+    enum ClientError {
+      CLIENT_ERROR_NONE = 0;
+      CLIENT_ERROR_HIT_TIMEOUT = 1;
+      CLIENT_ERROR_INVALID_STACK_BOUNDS = 2;
+    }
     optional uint64 pid = 1;
 
     // This process was profiled from startup.
@@ -104,8 +109,12 @@
 
     // If disconnected, this disconnect was caused by the client overrunning
     // the buffer.
+    // Equivalent to client_error == CLIENT_ERROR_HIT_TIMEOUT
+    // on new S builds.
     optional bool buffer_overran = 7;
 
+    optional ClientError client_error = 14;
+
     // If disconnected, this disconnected was caused by the shared memory
     // buffer being corrupted. THIS IS ALWAYS A BUG IN HEAPPROFD OR CLIENT
     // MEMORY CORRUPTION.
diff --git a/src/profiling/memory/client.cc b/src/profiling/memory/client.cc
index 05c7329..04af029 100644
--- a/src/profiling/memory/client.cc
+++ b/src/profiling/memory/client.cc
@@ -46,6 +46,7 @@
 #include "perfetto/ext/base/utils.h"
 #include "src/profiling/memory/sampler.h"
 #include "src/profiling/memory/scoped_spinlock.h"
+#include "src/profiling/memory/shared_ring_buffer.h"
 #include "src/profiling/memory/wire_protocol.h"
 
 namespace perfetto {
@@ -372,6 +373,7 @@
   const char* stackend = GetStackEnd(stackptr);
   if (!stackend) {
     PERFETTO_ELOG("Failed to find stackend.");
+    shmem_.SetErrorState(SharedRingBuffer::kInvalidStackBounds);
     return false;
   }
   uint64_t stack_size = static_cast<uint64_t>(stackend - stackptr);
@@ -422,7 +424,7 @@
     }
   }
   if (IsConnected())
-    shmem_.SetHitTimeout();
+    shmem_.SetErrorState(SharedRingBuffer::kHitTimeout);
   PERFETTO_PLOG("Failed to write to shared ring buffer. Disconnecting.");
   return -1;
 }
diff --git a/src/profiling/memory/heapprofd_producer.cc b/src/profiling/memory/heapprofd_producer.cc
index 28b464b..16e4b49 100644
--- a/src/profiling/memory/heapprofd_producer.cc
+++ b/src/profiling/memory/heapprofd_producer.cc
@@ -39,8 +39,10 @@
 #include "perfetto/ext/tracing/ipc/producer_ipc_client.h"
 #include "perfetto/tracing/core/data_source_config.h"
 #include "perfetto/tracing/core/data_source_descriptor.h"
+#include "protos/perfetto/trace/profiling/profile_packet.pbzero.h"
 #include "src/profiling/common/producer_support.h"
 #include "src/profiling/common/profiler_guardrails.h"
+#include "src/profiling/memory/shared_ring_buffer.h"
 #include "src/profiling/memory/unwound_messages.h"
 #include "src/profiling/memory/wire_protocol.h"
 
@@ -112,6 +114,21 @@
   return fdstat.st_ino == fnstat.st_ino;
 }
 
+protos::pbzero::ProfilePacket::ProcessHeapSamples::ClientError
+ErrorStateToProto(SharedRingBuffer::ErrorState state) {
+  switch (state) {
+    case (SharedRingBuffer::kNoError):
+      return protos::pbzero::ProfilePacket::ProcessHeapSamples::
+          CLIENT_ERROR_NONE;
+    case (SharedRingBuffer::kHitTimeout):
+      return protos::pbzero::ProfilePacket::ProcessHeapSamples::
+          CLIENT_ERROR_HIT_TIMEOUT;
+    case (SharedRingBuffer::kInvalidStackBounds):
+      return protos::pbzero::ProfilePacket::ProcessHeapSamples::
+          CLIENT_ERROR_INVALID_STACK_BOUNDS;
+  }
+}
+
 }  // namespace
 
 bool HeapprofdConfigToClientConfiguration(
@@ -679,7 +696,10 @@
           proto->set_timestamp(dump_timestamp);
           proto->set_from_startup(from_startup);
           proto->set_disconnected(process_state->disconnected);
-          proto->set_buffer_overran(process_state->buffer_overran);
+          proto->set_buffer_overran(process_state->error_state ==
+                                    SharedRingBuffer::kHitTimeout);
+          proto->set_client_error(
+              ErrorStateToProto(process_state->error_state));
           proto->set_buffer_corrupted(process_state->buffer_corrupted);
           proto->set_hit_guardrail(data_source->hit_guardrail);
           if (heap_name)
@@ -1173,7 +1193,7 @@
     return;
   ProcessState& process_state = process_state_it->second;
   process_state.disconnected = !ds.shutting_down;
-  process_state.buffer_overran = stats.hit_timeout;
+  process_state.error_state = stats.error_state;
   process_state.client_spinlock_blocked_us = stats.client_spinlock_blocked_us;
   process_state.buffer_corrupted =
       stats.num_writes_corrupt > 0 || stats.num_reads_corrupt > 0;
diff --git a/src/profiling/memory/heapprofd_producer.h b/src/profiling/memory/heapprofd_producer.h
index 96ef09b..be6ae64 100644
--- a/src/profiling/memory/heapprofd_producer.h
+++ b/src/profiling/memory/heapprofd_producer.h
@@ -42,6 +42,7 @@
 #include "src/profiling/memory/bookkeeping.h"
 #include "src/profiling/memory/bookkeeping_dump.h"
 #include "src/profiling/memory/log_histogram.h"
+#include "src/profiling/memory/shared_ring_buffer.h"
 #include "src/profiling/memory/system_property.h"
 #include "src/profiling/memory/unwinding.h"
 #include "src/profiling/memory/unwound_messages.h"
@@ -185,7 +186,7 @@
     ProcessState(GlobalCallstackTrie* c, bool d)
         : callsites(c), dump_at_max_mode(d) {}
     bool disconnected = false;
-    bool buffer_overran = false;
+    SharedRingBuffer::ErrorState error_state;
     bool buffer_corrupted = false;
 
     uint64_t heap_samples = 0;
diff --git a/src/profiling/memory/shared_ring_buffer.h b/src/profiling/memory/shared_ring_buffer.h
index 638ac44..864c7c1 100644
--- a/src/profiling/memory/shared_ring_buffer.h
+++ b/src/profiling/memory/shared_ring_buffer.h
@@ -73,6 +73,12 @@
     uint64_t bytes_free = 0;
   };
 
+  enum ErrorState : uint64_t {
+    kNoError = 0,
+    kHitTimeout = 1,
+    kInvalidStackBounds = 2,
+  };
+
   struct Stats {
     uint64_t bytes_written;
     uint64_t num_writes_succeeded;
@@ -86,7 +92,7 @@
     // Fields below get set by GetStats as copies of atomics in MetadataPage.
     uint64_t failed_spinlocks;
     uint64_t client_spinlock_blocked_us;
-    bool hit_timeout;
+    ErrorState error_state;
   };
 
   static base::Optional<SharedRingBuffer> Create(size_t);
@@ -119,13 +125,13 @@
     Stats stats = meta_->stats;
     stats.failed_spinlocks =
         meta_->failed_spinlocks.load(std::memory_order_relaxed);
-    stats.hit_timeout = meta_->hit_timeout.load(std::memory_order_relaxed);
+    stats.error_state = meta_->error_state.load(std::memory_order_relaxed);
     stats.client_spinlock_blocked_us =
         meta_->client_spinlock_blocked_us.load(std::memory_order_relaxed);
     return stats;
   }
 
-  void SetHitTimeout() { meta_->hit_timeout.store(true); }
+  void SetErrorState(ErrorState error) { meta_->error_state.store(error); }
 
   // This is used by the caller to be able to hold the SpinLock after
   // BeginWrite has returned. This is so that additional bookkeeping can be
@@ -175,7 +181,7 @@
 
     std::atomic<uint64_t> client_spinlock_blocked_us;
     std::atomic<uint64_t> failed_spinlocks;
-    alignas(uint64_t) std::atomic<bool> hit_timeout;
+    std::atomic<ErrorState> error_state;
     alignas(uint64_t) std::atomic<bool> shutting_down;
     alignas(uint64_t) std::atomic<bool> reader_paused;
     // For stats that are only accessed by a single thread or under the
diff --git a/src/trace_processor/importers/proto/proto_trace_parser.cc b/src/trace_processor/importers/proto/proto_trace_parser.cc
index 96c3833..93b248c 100644
--- a/src/trace_processor/importers/proto/proto_trace_parser.cc
+++ b/src/trace_processor/importers/proto/proto_trace_parser.cc
@@ -311,9 +311,17 @@
     if (entry.buffer_corrupted())
       context_->storage->IncrementIndexedStats(
           stats::heapprofd_buffer_corrupted, pid);
-    if (entry.buffer_overran())
+    if (entry.buffer_overran() ||
+        entry.client_error() ==
+            protos::pbzero::ProfilePacket::ProcessHeapSamples::
+                CLIENT_ERROR_HIT_TIMEOUT) {
       context_->storage->IncrementIndexedStats(stats::heapprofd_buffer_overran,
                                                pid);
+    }
+    if (entry.client_error()) {
+      context_->storage->SetIndexedStats(stats::heapprofd_client_error, pid,
+                                         entry.client_error());
+    }
     if (entry.rejected_concurrent())
       context_->storage->IncrementIndexedStats(
           stats::heapprofd_rejected_concurrent, pid);
diff --git a/src/trace_processor/storage/stats.h b/src/trace_processor/storage/stats.h
index 0e7f3eb..74ad737 100644
--- a/src/trace_processor/storage/stats.h
+++ b/src/trace_processor/storage/stats.h
@@ -136,6 +136,9 @@
   F(heapprofd_buffer_overran,           kIndexed, kDataLoss, kTrace,           \
       "The shared memory buffer between the target and heapprofd overran. "    \
       "The profile was truncated early. Indexed by target upid."),             \
+  F(heapprofd_client_error,             kIndexed, kError,    kTrace,           \
+      "The heapprofd client ran into a problem and disconnected. "             \
+      "See profile_packet.proto  for error codes."),                           \
   F(heapprofd_client_disconnected,      kIndexed, kInfo,     kTrace,    ""),   \
   F(heapprofd_malformed_packet,         kIndexed, kError,    kTrace,    ""),   \
   F(heapprofd_missing_packet,           kSingle,  kError,    kTrace,    ""),   \