Merge "Trace Redaction - Re-implement proc free redaction" into main
diff --git a/src/trace_processor/importers/proto/gpu_event_parser.cc b/src/trace_processor/importers/proto/gpu_event_parser.cc
index 9022cbb..d08d946 100644
--- a/src/trace_processor/importers/proto/gpu_event_parser.cc
+++ b/src/trace_processor/importers/proto/gpu_event_parser.cc
@@ -330,6 +330,7 @@
     ConstBytes blob) {
   protos::pbzero::GpuRenderStageEvent::Decoder event(blob.data, blob.size);
 
+  int32_t pid = 0;
   if (event.has_specifications()) {
     protos::pbzero::GpuRenderStageEvent_Specifications::Decoder spec(
         event.specifications().data, event.specifications().size);
@@ -349,6 +350,13 @@
             context_->storage->InternString(stage.description())));
       }
     }
+    if (spec.has_context_spec()) {
+      protos::pbzero::GpuRenderStageEvent_Specifications_ContextSpec::Decoder
+          context_spec(spec.context_spec());
+      if (context_spec.has_pid()) {
+        pid = context_spec.pid();
+      }
+    }
   }
 
   auto args_callback = [this, &event,
@@ -468,7 +476,8 @@
     row.command_buffer_name = command_buffer_name_id;
     row.submission_id = event.submission_id();
     row.hw_queue_id = static_cast<int64_t>(hw_queue_id);
-
+    row.upid = context_->process_tracker->GetOrCreateProcess(
+        static_cast<uint32_t>(pid));
     context_->slice_tracker->ScopedTyped(
         context_->storage->mutable_gpu_slice_table(), row, args_callback);
   }
diff --git a/src/trace_processor/tables/slice_tables.py b/src/trace_processor/tables/slice_tables.py
index 90c8176..1deca2e 100644
--- a/src/trace_processor/tables/slice_tables.py
+++ b/src/trace_processor/tables/slice_tables.py
@@ -135,6 +135,7 @@
         C('frame_id', CppOptional(CppUint32())),
         C('submission_id', CppOptional(CppUint32())),
         C('hw_queue_id', CppOptional(CppInt64())),
+        C('upid', CppOptional(CppUint32())),
         C('render_subpasses', CppString()),
     ],
     parent=SLICE_TABLE,
@@ -142,17 +143,33 @@
         doc='''''',
         group='Slice',
         columns={
-            'context_id': '''''',
-            'render_target': '''''',
-            'render_target_name': '''''',
-            'render_pass': '''''',
-            'render_pass_name': '''''',
-            'command_buffer': '''''',
-            'command_buffer_name': '''''',
-            'frame_id': '''''',
-            'submission_id': '''''',
-            'hw_queue_id': '''''',
-            'render_subpasses': ''''''
+            'context_id':
+                '''''',
+            'render_target':
+                '''''',
+            'render_target_name':
+                '''''',
+            'render_pass':
+                '''''',
+            'render_pass_name':
+                '''''',
+            'command_buffer':
+                '''''',
+            'command_buffer_name':
+                '''''',
+            'frame_id':
+                '''''',
+            'submission_id':
+                '''''',
+            'hw_queue_id':
+                '''''',
+            'upid':
+                '''
+                  Unique process id of the app that generates this gpu render
+                  stage event.
+                ''',
+            'render_subpasses':
+                ''''''
         }))
 
 GRAPHICS_FRAME_SLICE_TABLE = Table(
diff --git a/src/trace_redaction/redact_sched_switch.cc b/src/trace_redaction/redact_sched_switch.cc
index 87412c1..dc8898d 100644
--- a/src/trace_redaction/redact_sched_switch.cc
+++ b/src/trace_redaction/redact_sched_switch.cc
@@ -27,14 +27,62 @@
 namespace perfetto::trace_redaction {
 
 namespace {
-// TODO(vaage): While simple, this function saves us from declaring the sample
-// lambda each time we use the has_fields pattern. Once its usage increases, and
-// its value is obvious, remove this comment.
 bool IsTrue(bool value) {
   return value;
 }
+
+// Copy a field from 'decoder' to 'message' if the field can be found. Returns
+// false if the field cannot be found.
+bool Passthrough(protozero::ProtoDecoder& decoder,
+                 uint32_t field_id,
+                 protozero::Message* message) {
+  auto field = decoder.FindField(field_id);
+
+  if (field.valid()) {
+    proto_util::AppendField(field, message);
+    return true;
+  }
+
+  return false;
+}
 }  // namespace
 
+int64_t InternTable::Push(const char* data, size_t size) {
+  std::string_view outer(data, size);
+
+  for (size_t i = 0; i < interned_comms_.size(); ++i) {
+    auto view = interned_comms_[i];
+
+    if (view == outer) {
+      return static_cast<int64_t>(i);
+    }
+  }
+
+  // No room for the new string, reject the request.
+  if (comms_length_ + size > comms_.size()) {
+    return -1;
+  }
+
+  auto* head = comms_.data() + comms_length_;
+
+  // Important note, the null byte is not copied.
+  memcpy(head, data, size);
+  comms_length_ += size;
+
+  size_t id = interned_comms_.size();
+  interned_comms_.emplace_back(head, size);
+
+  return static_cast<int64_t>(id);
+}
+
+std::string_view InternTable::Find(size_t index) const {
+  if (index < interned_comms_.size()) {
+    return interned_comms_[index];
+  }
+
+  return {};
+}
+
 // Redact sched switch trace events in an ftrace event bundle:
 //
 //  event {
@@ -106,9 +154,10 @@
 
     if (field.id() ==
         protos::pbzero::FtraceEventBundle::kCompactSchedFieldNumber) {
-      // TODO(vaage): Replace this with logic specific to the comp sched data
-      // type.
-      proto_util::AppendField(field, message);
+      protos::pbzero::FtraceEventBundle::CompactSched::Decoder comp_sched(
+          field.as_bytes());
+      RETURN_IF_ERROR(TransformCompSched(context, cpu.as_int32(), comp_sched,
+                                         message->set_compact_sched()));
       continue;
     }
 
@@ -208,6 +257,176 @@
   return base::OkStatus();
 }
 
+base::Status RedactSchedSwitchHarness::TransformCompSched(
+    const Context& context,
+    int32_t cpu,
+    protos::pbzero::FtraceEventBundle::CompactSched::Decoder& comp_sched,
+    protos::pbzero::FtraceEventBundle::CompactSched* message) const {
+  auto has_switch_fields = {
+      comp_sched.has_switch_timestamp(),
+      comp_sched.has_switch_prev_state(),
+      comp_sched.has_switch_next_pid(),
+      comp_sched.has_switch_next_prio(),
+      comp_sched.has_switch_next_comm_index(),
+  };
+
+  auto has_waking_fields = {
+      comp_sched.has_waking_timestamp(),  comp_sched.has_waking_pid(),
+      comp_sched.has_waking_target_cpu(), comp_sched.has_waking_prio(),
+      comp_sched.has_waking_comm_index(), comp_sched.has_waking_common_flags(),
+  };
+
+  // Populate the intern table once; it will be used by both sched and waking.
+  InternTable intern_table;
+
+  for (auto it = comp_sched.intern_table(); it; ++it) {
+    auto chars = it->as_string();
+    auto index = intern_table.Push(chars.data, chars.size);
+
+    if (index < 0) {
+      return base::ErrStatus(
+          "RedactSchedSwitchHarness: failed to insert string into intern "
+          "table.");
+    }
+  }
+
+  if (std::any_of(has_switch_fields.begin(), has_switch_fields.end(), IsTrue)) {
+    RETURN_IF_ERROR(TransformCompSchedSwitch(context, cpu, comp_sched,
+                                             &intern_table, message));
+  }
+
+  if (std::any_of(has_waking_fields.begin(), has_waking_fields.end(), IsTrue)) {
+    // TODO(vaage): Create and call TransformCompSchedWaking().
+  }
+
+  // IMPORTANT: The intern table can only be added after switch and waking
+  // because switch and/or waking can/will modify the intern table.
+  for (auto view : intern_table.values()) {
+    message->add_intern_table(view.data(), view.size());
+  }
+
+  return base::OkStatus();
+}
+
+base::Status RedactSchedSwitchHarness::TransformCompSchedSwitch(
+    const Context& context,
+    int32_t cpu,
+    protos::pbzero::FtraceEventBundle::CompactSched::Decoder& comp_sched,
+    InternTable* intern_table,
+    protos::pbzero::FtraceEventBundle::CompactSched* message) const {
+  auto has_fields = {
+      comp_sched.has_intern_table(),
+      comp_sched.has_switch_timestamp(),
+      comp_sched.has_switch_prev_state(),
+      comp_sched.has_switch_next_pid(),
+      comp_sched.has_switch_next_prio(),
+      comp_sched.has_switch_next_comm_index(),
+  };
+
+  if (!std::all_of(has_fields.begin(), has_fields.end(), IsTrue)) {
+    return base::ErrStatus(
+        "RedactSchedSwitchHarness: missing required "
+        "FtraceEventBundle::CompactSched switch field.");
+  }
+
+  std::array<bool, 3> parse_errors = {false, false, false};
+
+  auto it_ts = comp_sched.switch_timestamp(&parse_errors.at(0));
+  auto it_pid = comp_sched.switch_next_pid(&parse_errors.at(1));
+  auto it_comm = comp_sched.switch_next_comm_index(&parse_errors.at(2));
+
+  if (std::any_of(parse_errors.begin(), parse_errors.end(), IsTrue)) {
+    return base::ErrStatus(
+        "RedactSchedSwitchHarness: failed to parse CompactSched.");
+  }
+
+  std::string scratch_str;
+
+  protozero::PackedVarInt packed_comm;
+  protozero::PackedVarInt packed_pid;
+
+  // The first it_ts value is an absolute value, all other values are delta
+  // values.
+  uint64_t ts = 0;
+
+  while (it_ts && it_pid && it_comm) {
+    ts += *it_ts;
+
+    auto pid = *it_pid;
+
+    auto comm_index = *it_comm;
+    auto comm = intern_table->Find(comm_index);
+
+    scratch_str.assign(comm);
+
+    for (const auto& transform : transforms_) {
+      transform->Transform(context, ts, cpu, &pid, &scratch_str);
+    }
+
+    auto found = intern_table->Push(scratch_str.data(), scratch_str.size());
+
+    if (found < 0) {
+      return base::ErrStatus(
+          "RedactSchedSwitchHarness: failed to insert string into intern "
+          "table.");
+    }
+
+    packed_comm.Append(found);
+    packed_pid.Append(pid);
+
+    ++it_ts;
+    ++it_pid;
+    ++it_comm;
+  }
+
+  if (it_ts || it_pid || it_comm) {
+    return base::ErrStatus(
+        "RedactSchedSwitchHarness: uneven associative arrays in "
+        "FtraceEventBundle::CompactSched (switch).");
+  }
+
+  message->set_switch_next_pid(packed_pid);
+  message->set_switch_next_comm_index(packed_comm);
+
+  // There's a lot of data in a compact sched message. Most of it is packed data
+  // and most of the data is not going to change. To avoid unpacking, doing
+  // nothing, and then packing... cheat. Find the fields and pass them as opaque
+  // blobs.
+  //
+  // kInternTableFieldNumber:         The intern table will be modified by both
+  //                                  switch events and waking events. It will
+  //                                  be written elsewhere.
+  //
+  // kSwitchNextPidFieldNumber:       The switch pid will change during thread
+  //                                  merging.
+  //
+  // kSwitchNextCommIndexFieldNumber: The switch comm value will change when
+  //                                  clearing thread names and replaced
+  //                                  during thread merging.
+
+  auto passed_through = {
+      Passthrough(comp_sched,
+                  protos::pbzero::FtraceEventBundle::CompactSched::
+                      kSwitchTimestampFieldNumber,
+                  message),
+      Passthrough(comp_sched,
+                  protos::pbzero::FtraceEventBundle::CompactSched::
+                      kSwitchPrevStateFieldNumber,
+                  message),
+      Passthrough(comp_sched,
+                  protos::pbzero::FtraceEventBundle::CompactSched::
+                      kSwitchNextPrioFieldNumber,
+                  message)};
+
+  if (!std::all_of(passed_through.begin(), passed_through.end(), IsTrue)) {
+    return base::ErrStatus(
+        "RedactSchedSwitchHarness: missing required "
+        "FtraceEventBundle::CompactSched switch field.");
+  }
+
+  return base::OkStatus();
+}
+
 // Switch event transformation: Clear the comm value if the thread/process is
 // not part of the target packet.
 base::Status ClearComms::Transform(const Context& context,
diff --git a/src/trace_redaction/redact_sched_switch.h b/src/trace_redaction/redact_sched_switch.h
index 55f9a75..36d6762 100644
--- a/src/trace_redaction/redact_sched_switch.h
+++ b/src/trace_redaction/redact_sched_switch.h
@@ -17,12 +17,33 @@
 #ifndef SRC_TRACE_REDACTION_REDACT_SCHED_SWITCH_H_
 #define SRC_TRACE_REDACTION_REDACT_SCHED_SWITCH_H_
 
-#include "protos/perfetto/trace/ftrace/ftrace_event_bundle.pbzero.h"
-#include "protos/perfetto/trace/ftrace/sched.pbzero.h"
 #include "src/trace_redaction/trace_redaction_framework.h"
 
+#include "protos/perfetto/trace/ftrace/ftrace_event_bundle.pbzero.h"
+#include "protos/perfetto/trace/ftrace/sched.pbzero.h"
+
 namespace perfetto::trace_redaction {
 
+class InternTable {
+ public:
+  int64_t Push(const char* data, size_t size);
+
+  std::string_view Find(size_t index) const;
+
+  const std::vector<std::string_view>& values() const {
+    return interned_comms_;
+  }
+
+ private:
+  constexpr static size_t kExpectedCommLength = 16;
+  constexpr static size_t kMaxElements = 4096;
+
+  std::array<char, kMaxElements * kExpectedCommLength> comms_;
+  size_t comms_length_ = 0;
+
+  std::vector<std::string_view> interned_comms_;
+};
+
 class SchedSwitchTransform {
  public:
   virtual ~SchedSwitchTransform();
@@ -65,6 +86,19 @@
       std::string* scratch_str,
       protos::pbzero::SchedSwitchFtraceEvent* message) const;
 
+  base::Status TransformCompSched(
+      const Context& context,
+      int32_t cpu,
+      protos::pbzero::FtraceEventBundle::CompactSched::Decoder& comp_sched,
+      protos::pbzero::FtraceEventBundle::CompactSched* message) const;
+
+  base::Status TransformCompSchedSwitch(
+      const Context& context,
+      int32_t cpu,
+      protos::pbzero::FtraceEventBundle::CompactSched::Decoder& comp_sched,
+      InternTable* intern_table,
+      protos::pbzero::FtraceEventBundle::CompactSched* message) const;
+
   std::vector<std::unique_ptr<SchedSwitchTransform>> transforms_;
 };
 
diff --git a/src/trace_redaction/redact_sched_switch_unittest.cc b/src/trace_redaction/redact_sched_switch_unittest.cc
index 88280d7..e2374d1 100644
--- a/src/trace_redaction/redact_sched_switch_unittest.cc
+++ b/src/trace_redaction/redact_sched_switch_unittest.cc
@@ -46,9 +46,21 @@
 constexpr auto kCommB = "comm-b";
 constexpr auto kCommNone = "";
 
+class NegatePid : public SchedSwitchTransform {
+ public:
+  base::Status Transform(const Context&,
+                         uint64_t,
+                         int32_t,
+                         int32_t* pid,
+                         std::string*) const override {
+    *pid = -(*pid);
+    return base::OkStatus();
+  }
+};
+
 }  // namespace
 
-class RedactSchedSwitchTest : public testing::Test {
+class RedactSchedSwitchFtraceEventTest : public testing::Test {
  protected:
   void SetUp() override {
     // Create a packet where two pids are swapping back-and-forth.
@@ -103,7 +115,7 @@
 
 // In this case, the target uid will be UID A. That means the comm values for
 // PID B should be removed, and the comm values for PID A should remain.
-TEST_F(RedactSchedSwitchTest, KeepsTargetCommValues) {
+TEST_F(RedactSchedSwitchFtraceEventTest, KeepsTargetCommValues) {
   RedactSchedSwitchHarness redact;
   redact.emplace_transform<ClearComms>();
 
@@ -137,7 +149,7 @@
 // This case is very similar to the "some are connected", expect that it
 // verifies all comm values will be removed when testing against an unused
 // uid.
-TEST_F(RedactSchedSwitchTest, RemovesAllCommsIfPackageDoesntExist) {
+TEST_F(RedactSchedSwitchFtraceEventTest, RemovesAllCommsIfPackageDoesntExist) {
   RedactSchedSwitchHarness redact;
   redact.emplace_transform<ClearComms>();
 
@@ -162,4 +174,181 @@
   ASSERT_EQ(events[1].sched_switch().next_comm(), kCommNone);
 }
 
+class RedactCompactSchedSwitchTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    // PID A and PID B need to be attached to different packages (UID) so that
+    // its possible to include one but not the other.
+    context_.timeline = std::make_unique<ProcessThreadTimeline>();
+    context_.timeline->Append(
+        ProcessThreadTimeline::Event::Open(kTimeA, kPidA, kNoParent, kUidA));
+    context_.timeline->Append(
+        ProcessThreadTimeline::Event::Open(kTimeA, kPidB, kNoParent, kUidB));
+    context_.timeline->Sort();
+
+    auto* bundle = packet_.mutable_ftrace_events();
+    bundle->set_cpu(kCpuA);  // All switch events occur on this CPU
+
+    compact_sched = bundle->mutable_compact_sched();
+
+    compact_sched->add_intern_table(kCommA);
+    compact_sched->add_intern_table(kCommB);
+  }
+
+  void AddSwitchEvent(uint64_t ts,
+                      int32_t next_pid,
+                      int32_t prev_state,
+                      int32_t prio,
+                      uint32_t comm) {
+    compact_sched->add_switch_timestamp(ts);
+    compact_sched->add_switch_next_pid(next_pid);
+    compact_sched->add_switch_prev_state(prev_state);
+    compact_sched->add_switch_next_prio(prio);
+    compact_sched->add_switch_next_comm_index(comm);
+  }
+
+  protos::gen::TracePacket packet_;
+  protos::gen::FtraceEventBundle::CompactSched* compact_sched;
+
+  Context context_;
+};
+
+TEST_F(RedactCompactSchedSwitchTest, KeepsTargetCommValues) {
+  uint32_t kCommIndexA = 0;
+  uint32_t kCommIndexB = 1;
+
+  // The new entry will be appended to the table. Another primitive can be used
+  // to reduce the intern string table.
+  uint32_t kCommIndexNone = 2;
+
+  AddSwitchEvent(kTimeA, kPidA, 0, 0, kCommIndexA);
+  AddSwitchEvent(kTimeB, kPidB, 0, 0, kCommIndexB);
+
+  context_.package_uid = kUidA;
+
+  auto packet_buffer = packet_.SerializeAsString();
+
+  RedactSchedSwitchHarness redact;
+  redact.emplace_transform<ClearComms>();
+
+  ASSERT_OK(redact.Transform(context_, &packet_buffer));
+
+  protos::gen::TracePacket packet;
+  ASSERT_TRUE(packet.ParseFromString(packet_buffer));
+
+  const auto& bundle = packet.ftrace_events();
+  ASSERT_TRUE(bundle.has_compact_sched());
+
+  const auto& compact_sched = bundle.compact_sched();
+
+  // A new entry (empty string) should have been added to the table.
+  ASSERT_EQ(compact_sched.intern_table_size(), 3);
+  ASSERT_EQ(compact_sched.intern_table().back(), kCommNone);
+
+  ASSERT_EQ(compact_sched.switch_next_comm_index_size(), 2);
+  ASSERT_EQ(compact_sched.switch_next_comm_index().at(0), kCommIndexA);
+  ASSERT_EQ(compact_sched.switch_next_comm_index().at(1), kCommIndexNone);
+}
+
+// If two pids use the same comm, but one pid changes, the shared comm should
+// still be available.
+TEST_F(RedactCompactSchedSwitchTest, ChangingSharedCommonRetainsComm) {
+  uint32_t kCommIndexA = 0;
+
+  AddSwitchEvent(kTimeA, kPidA, 0, 0, kCommIndexA);
+  AddSwitchEvent(kTimeB, kPidB, 0, 0, kCommIndexA);
+
+  context_.package_uid = kUidA;
+
+  auto packet_buffer = packet_.SerializeAsString();
+
+  RedactSchedSwitchHarness redact;
+  redact.emplace_transform<ClearComms>();
+
+  ASSERT_OK(redact.Transform(context_, &packet_buffer));
+
+  protos::gen::TracePacket packet;
+  ASSERT_TRUE(packet.ParseFromString(packet_buffer));
+
+  const auto& bundle = packet.ftrace_events();
+  ASSERT_TRUE(bundle.has_compact_sched());
+
+  const auto& compact_sched = bundle.compact_sched();
+
+  // A new entry should have been appended, but comm A (previously shared)
+  // should still exist in the table.
+  ASSERT_EQ(compact_sched.intern_table_size(), 3);
+  ASSERT_EQ(compact_sched.intern_table().front(), kCommA);
+  ASSERT_EQ(compact_sched.intern_table().back(), kCommNone);
+}
+
+TEST_F(RedactCompactSchedSwitchTest, RemovesAllCommsIfPackageDoesntExist) {
+  uint32_t kCommIndexA = 0;
+  uint32_t kCommIndexB = 1;
+
+  // The new entry will be appended to the table. Another primitive can be used
+  // to reduce the intern string table.
+  uint32_t kCommIndexNone = 2;
+
+  AddSwitchEvent(kTimeA, kPidA, 0, 0, kCommIndexA);
+  AddSwitchEvent(kTimeB, kPidB, 0, 0, kCommIndexB);
+
+  context_.package_uid = kUidC;
+
+  auto packet_buffer = packet_.SerializeAsString();
+
+  RedactSchedSwitchHarness redact;
+  redact.emplace_transform<ClearComms>();
+
+  ASSERT_OK(redact.Transform(context_, &packet_buffer));
+
+  protos::gen::TracePacket packet;
+  ASSERT_TRUE(packet.ParseFromString(packet_buffer));
+
+  const auto& bundle = packet.ftrace_events();
+  ASSERT_TRUE(bundle.has_compact_sched());
+
+  const auto& compact_sched = bundle.compact_sched();
+
+  // A new entry (empty string) should have been added to the table.
+  ASSERT_EQ(compact_sched.intern_table_size(), 3);
+  ASSERT_EQ(compact_sched.intern_table().back(), kCommNone);
+
+  ASSERT_EQ(compact_sched.switch_next_comm_index_size(), 2);
+  ASSERT_EQ(compact_sched.switch_next_comm_index().at(0), kCommIndexNone);
+  ASSERT_EQ(compact_sched.switch_next_comm_index().at(1), kCommIndexNone);
+}
+
+TEST_F(RedactCompactSchedSwitchTest, CanChangePid) {
+  uint32_t kCommIndexA = 0;
+  uint32_t kCommIndexB = 1;
+
+  AddSwitchEvent(kTimeA, kPidA, 0, 0, kCommIndexA);
+  AddSwitchEvent(kTimeB, kPidB, 0, 0, kCommIndexB);
+
+  context_.package_uid = kUidA;  // This value is not needed to use NegatePid.
+
+  auto packet_buffer = packet_.SerializeAsString();
+
+  RedactSchedSwitchHarness redact;
+  redact.emplace_transform<NegatePid>();
+
+  ASSERT_OK(redact.Transform(context_, &packet_buffer));
+
+  protos::gen::TracePacket packet;
+  ASSERT_TRUE(packet.ParseFromString(packet_buffer));
+
+  const auto& bundle = packet.ftrace_events();
+  ASSERT_TRUE(bundle.has_compact_sched());
+
+  const auto& compact_sched = bundle.compact_sched();
+
+  // The intern table should not change.
+  ASSERT_EQ(compact_sched.intern_table_size(), 2);
+
+  ASSERT_EQ(compact_sched.switch_next_pid_size(), 2);
+  ASSERT_EQ(compact_sched.switch_next_pid().at(0), -kPidA);
+  ASSERT_EQ(compact_sched.switch_next_pid().at(1), -kPidB);
+}
+
 }  // namespace perfetto::trace_redaction