Trace Redaction - Remove Trace Packets

Most packet types are being considered 'not necessary' to investigate
most issues. Since they are not necessary, those packets are being
dropped in order to ensure no sensitive information gets exposed.

This transform primitive looks at each packet and checks if the packet's
data type belongs to an allowlist. Since a packet can only have one data
type (as per the proto), it is safe to keep / drop the whole packet
based off of the packet's data type.

Bug: 318576092
Change-Id: Ic1e957784b6d051c7560bc0d35b57f0e35daa40e
diff --git a/Android.bp b/Android.bp
index ece4ad0..4fe80b2 100644
--- a/Android.bp
+++ b/Android.bp
@@ -12462,7 +12462,9 @@
     name: "perfetto_src_trace_redaction_trace_redaction",
     srcs: [
         "src/trace_redaction/find_package_uid.cc",
+        "src/trace_redaction/populate_allow_lists.cc",
         "src/trace_redaction/prune_package_list.cc",
+        "src/trace_redaction/scrub_trace_packet.cc",
         "src/trace_redaction/trace_redaction_framework.cc",
         "src/trace_redaction/trace_redactor.cc",
     ],
@@ -12474,6 +12476,7 @@
     srcs: [
         "src/trace_redaction/find_package_uid_unittest.cc",
         "src/trace_redaction/prune_package_list_unittest.cc",
+        "src/trace_redaction/scrub_trace_packet_unittest.cc",
     ],
 }
 
diff --git a/src/trace_redaction/BUILD.gn b/src/trace_redaction/BUILD.gn
index 4536331..cff9588 100644
--- a/src/trace_redaction/BUILD.gn
+++ b/src/trace_redaction/BUILD.gn
@@ -18,8 +18,12 @@
   sources = [
     "find_package_uid.cc",
     "find_package_uid.h",
+    "populate_allow_lists.cc",
+    "populate_allow_lists.h",
     "prune_package_list.cc",
     "prune_package_list.h",
+    "scrub_trace_packet.cc",
+    "scrub_trace_packet.h",
     "trace_redaction_framework.cc",
     "trace_redaction_framework.h",
     "trace_redactor.cc",
@@ -58,12 +62,14 @@
   sources = [
     "find_package_uid_unittest.cc",
     "prune_package_list_unittest.cc",
+    "scrub_trace_packet_unittest.cc",
   ]
   deps = [
     ":trace_redaction",
     "../../gn:default_deps",
     "../../gn:gtest_and_gmock",
     "../../protos/perfetto/trace:non_minimal_cpp",
+    "../../protos/perfetto/trace:zero",
     "../../protos/perfetto/trace/android:cpp",
     "../../protos/perfetto/trace/ps:cpp",
     "../../protos/perfetto/trace/ps:zero",
diff --git a/src/trace_redaction/populate_allow_lists.cc b/src/trace_redaction/populate_allow_lists.cc
new file mode 100644
index 0000000..b14b1f6
--- /dev/null
+++ b/src/trace_redaction/populate_allow_lists.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_redaction/populate_allow_lists.h"
+
+#include "perfetto/base/status.h"
+#include "src/trace_redaction/trace_redaction_framework.h"
+
+#include "protos/perfetto/trace/trace_packet.pbzero.h"
+
+namespace perfetto::trace_redaction {
+
+base::Status PopulateAllowlists::Build(Context* context) const {
+  if (!context->trace_packet_allow_list.empty()) {
+    return base::ErrStatus("Trace packet allow-list should be empty.");
+  }
+
+  context->trace_packet_allow_list = {
+      protos::pbzero::TracePacket::kProcessTreeFieldNumber,
+      protos::pbzero::TracePacket::kProcessStatsFieldNumber,
+      protos::pbzero::TracePacket::kClockSnapshotFieldNumber,
+      protos::pbzero::TracePacket::kSysStatsFieldNumber,
+      protos::pbzero::TracePacket::kTraceConfigFieldNumber,
+      protos::pbzero::TracePacket::kTraceStatsFieldNumber,
+      protos::pbzero::TracePacket::kSystemInfoFieldNumber,
+      protos::pbzero::TracePacket::kTriggerFieldNumber,
+      protos::pbzero::TracePacket::kCpuInfoFieldNumber,
+      protos::pbzero::TracePacket::kServiceEventFieldNumber,
+      protos::pbzero::TracePacket::kInitialDisplayStateFieldNumber,
+      protos::pbzero::TracePacket::kFrameTimelineEventFieldNumber,
+      protos::pbzero::TracePacket::kAndroidSystemPropertyFieldNumber,
+      protos::pbzero::TracePacket::kSynchronizationMarkerFieldNumber,
+      protos::pbzero::TracePacket::kFtraceEventsFieldNumber,
+  };
+
+  return base::OkStatus();
+}
+
+}  // namespace perfetto::trace_redaction
diff --git a/src/trace_redaction/populate_allow_lists.h b/src/trace_redaction/populate_allow_lists.h
new file mode 100644
index 0000000..5a05d42
--- /dev/null
+++ b/src/trace_redaction/populate_allow_lists.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_REDACTION_POPULATE_ALLOW_LISTS_H_
+#define SRC_TRACE_REDACTION_POPULATE_ALLOW_LISTS_H_
+
+#include "perfetto/base/status.h"
+#include "src/trace_redaction/trace_redaction_framework.h"
+
+namespace perfetto::trace_redaction {
+
+// Populates the different allow-lists needed to remove data from the trace.
+// Configuration data in the context can be used to change the contents of the
+// lists.
+class PopulateAllowlists final : public BuildPrimitive {
+ public:
+  base::Status Build(Context* context) const override;
+};
+
+}  // namespace perfetto::trace_redaction
+
+#endif  // SRC_TRACE_REDACTION_POPULATE_ALLOW_LISTS_H_
diff --git a/src/trace_redaction/scrub_trace_packet.cc b/src/trace_redaction/scrub_trace_packet.cc
new file mode 100644
index 0000000..e4f03cf
--- /dev/null
+++ b/src/trace_redaction/scrub_trace_packet.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_redaction/scrub_trace_packet.h"
+
+namespace perfetto::trace_redaction {
+// The TracePacket message has a simple structure. At its core its one sub
+// message (e.g. ProcessTree) and some additional context (e.g. timestamp).
+// This makes the per-packet check binary - does it contain one of the
+// allow-listed messages?
+//
+// This transform will be called P times where P is the number of packet in the
+// trace.
+//
+// There are A packet types in the allow-list. The allow-list in a set with logA
+// look up. Since the allow-list is relatively small and constant in size,
+// allow-list can be considered constant.
+//
+// There are at most F fields where F is the max number of concurrent fields in
+// a trace packet. Given the limit, this can be considered constant.
+//
+// All together, this implementation can be considered linear in relation to the
+// trace size.
+base::Status ScrubTracePacket::Transform(const Context& context,
+                                         std::string* packet) const {
+  if (packet == nullptr || packet->empty()) {
+    return base::ErrStatus("Cannot scrub null or empty trace packet.");
+  }
+
+  const auto& allow_list = context.trace_packet_allow_list;
+
+  if (allow_list.empty()) {
+    return base::ErrStatus("Cannot scrub trace packets, missing allow-list.");
+  }
+
+  protozero::ProtoDecoder d(*packet);
+
+  // A packet should only have one data type (proto oneof), but there are other
+  // values in the packet (e.g. timestamp). If one field is in the allowlist,
+  // then allow the whole trace packet.
+  for (auto f = d.ReadField(); f.valid(); f = d.ReadField()) {
+    if (allow_list.count(f.id()) != 0) {
+      return base::OkStatus();
+    }
+  }
+
+  packet->clear();
+  return base::OkStatus();
+}
+
+}  // namespace perfetto::trace_redaction
diff --git a/src/trace_redaction/scrub_trace_packet.h b/src/trace_redaction/scrub_trace_packet.h
new file mode 100644
index 0000000..06710d3
--- /dev/null
+++ b/src/trace_redaction/scrub_trace_packet.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_REDACTION_SCRUB_TRACE_PACKET_H_
+#define SRC_TRACE_REDACTION_SCRUB_TRACE_PACKET_H_
+
+#include "src/trace_redaction/trace_redaction_framework.h"
+
+#include "protos/perfetto/trace/trace_packet.pbzero.h"
+
+namespace perfetto::trace_redaction {
+
+// Drops whole trace packets based on an allow-list (e.g. retain ProcessTree
+// packets).
+class ScrubTracePacket final : public TransformPrimitive {
+ public:
+  base::Status Transform(const Context& context,
+                         std::string* packet) const override;
+};
+
+}  // namespace perfetto::trace_redaction
+
+#endif  // SRC_TRACE_REDACTION_SCRUB_TRACE_PACKET_H_
diff --git a/src/trace_redaction/scrub_trace_packet_unittest.cc b/src/trace_redaction/scrub_trace_packet_unittest.cc
new file mode 100644
index 0000000..82b99d4
--- /dev/null
+++ b/src/trace_redaction/scrub_trace_packet_unittest.cc
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string>
+
+#include "src/base/test/status_matchers.h"
+#include "src/trace_redaction/scrub_trace_packet.h"
+#include "test/gtest_and_gmock.h"
+
+#include "protos/perfetto/trace/ps/process_tree.gen.h"
+#include "protos/perfetto/trace/trace_packet.pbzero.h"
+
+namespace perfetto::trace_redaction {
+
+TEST(ScrubTracePacketTest, ReturnErrorForNullPacket) {
+  // Have something in the allow-list to avoid that error.
+  Context context;
+  context.trace_packet_allow_list.insert(
+      protos::pbzero::TracePacket::kProcessTreeFieldNumber);
+
+  ScrubTracePacket scrub;
+  ASSERT_FALSE(scrub.Transform(context, nullptr).ok());
+}
+
+TEST(ScrubTracePacketTest, ReturnErrorForEmptyPacket) {
+  // Have something in the allow-list to avoid that error.
+  Context context;
+  context.trace_packet_allow_list.insert(
+      protos::pbzero::TracePacket::kProcessTreeFieldNumber);
+
+  std::string packet_str = "";
+
+  ScrubTracePacket scrub;
+  ASSERT_FALSE(scrub.Transform(context, &packet_str).ok());
+}
+
+TEST(ScrubTracePacketTest, ReturnErrorForEmptyAllowList) {
+  // The context will have no allow-list entries. ScrubTracePacket should fail.
+  Context context;
+
+  protos::gen::TracePacket packet;
+  std::string packet_str = packet.SerializeAsString();
+
+  ScrubTracePacket scrub;
+  ASSERT_FALSE(scrub.Transform(context, &packet_str).ok());
+}
+
+// The whole packet should be dropped (cleared) when it has a data type not
+// included in the allow-list.
+TEST(ScrubTracePacketTest, DropsOutsiderPacketType) {
+  protos::gen::TracePacket packet;
+  packet.set_timestamp(1234);
+  packet.mutable_android_camera_frame_event();  // Creates and sets data.
+
+  std::string packet_str = packet.SerializeAsString();
+  ASSERT_GT(packet_str.size(), 0u);
+
+  // Populate the allow-list with something that doesn't match the data in the
+  // packet.
+  Context context;
+  context.trace_packet_allow_list.insert(
+      protos::pbzero::TracePacket::kProcessTreeFieldNumber);
+
+  ScrubTracePacket scrub;
+  ASSERT_OK(scrub.Transform(context, &packet_str));
+
+  ASSERT_TRUE(packet_str.empty());
+}
+
+// Typically a trace packet should always have a data type (e.g. ProcessTree),
+// but it is possible that another transformation has cleared that data. If
+// that's the case, this primitive should treat it as an outsider.
+TEST(ScrubTracePacketTest, DropsPacketsWithNoType) {
+  protos::gen::TracePacket packet;
+  packet.set_timestamp(1234);
+
+  std::string packet_str = packet.SerializeAsString();
+  ASSERT_GT(packet_str.size(), 0u);
+
+  Context context;
+  context.trace_packet_allow_list.insert(
+      protos::pbzero::TracePacket::kProcessTreeFieldNumber);
+
+  ScrubTracePacket scrub;
+  ASSERT_OK(scrub.Transform(context, &packet_str));
+
+  ASSERT_TRUE(packet_str.empty());
+}
+
+// A packet should not change (at all) if it's in the allow-list.
+TEST(ScrubTracePacketTest, SkipsAllowedPacket) {
+  protos::gen::TracePacket packet;
+  packet.set_timestamp(1234);
+
+  // Add a process tree to the packet. Process trees are in the allow-list.
+  auto* process = packet.mutable_process_tree()->add_processes();
+  process->set_uid(0);
+  process->set_ppid(3);
+  process->set_pid(7);
+
+  std::string original_packet_str = packet.SerializeAsString();
+  ASSERT_GT(original_packet_str.size(), 0u);
+
+  // Make a copy that can be modified by the primitive (even though it shouldn't
+  // be).
+  std::string mutable_packet_str(original_packet_str);
+
+  Context context;
+  context.trace_packet_allow_list.insert(
+      protos::pbzero::TracePacket::kProcessTreeFieldNumber);
+
+  ScrubTracePacket scrub;
+  ASSERT_OK(scrub.Transform(context, &mutable_packet_str));
+
+  // The transform shouldn't have changed the string, so the string before and
+  // after should match.
+  ASSERT_EQ(original_packet_str, mutable_packet_str);
+}
+
+}  // namespace perfetto::trace_redaction
diff --git a/src/trace_redaction/trace_redaction_framework.h b/src/trace_redaction/trace_redaction_framework.h
index c9e7fb4..b481961 100644
--- a/src/trace_redaction/trace_redaction_framework.h
+++ b/src/trace_redaction/trace_redaction_framework.h
@@ -17,10 +17,14 @@
 #ifndef SRC_TRACE_REDACTION_TRACE_REDACTION_FRAMEWORK_H_
 #define SRC_TRACE_REDACTION_TRACE_REDACTION_FRAMEWORK_H_
 
+#include <cstdint>
+#include <optional>
 #include <string>
 #include <vector>
 
+#include "perfetto/base/flat_set.h"
 #include "perfetto/ext/base/status_or.h"
+
 #include "protos/perfetto/trace/trace_packet.gen.h"
 #include "protos/perfetto/trace/trace_packet.pbzero.h"
 
@@ -89,6 +93,31 @@
   //      uid: 1010113
   //    }
   std::optional<uint64_t> package_uid;
+
+  // Trace packets contain a "one of" entry called "data". This field can be
+  // thought of as the message. A track packet with have other fields along
+  // side "data" (e.g. "timestamp"). These fields can be thought of as metadata.
+  //
+  // A message should be removed if:
+  //
+  //  ...we know it contains too much sensitive information
+  //
+  //  ...we know it contains sensitive information and we know how to remove
+  //        the sensitive information, but don't have the resources to do it
+  //        right now
+  //
+  //  ...we know it provide little value
+  //
+  // "trace_packet_allow_list" contains the field ids of trace packets we want
+  // to pass onto later transformations. Examples are:
+  //
+  //    - protos::pbzero::TracePacket::kProcessTreeFieldNumber
+  //    - protos::pbzero::TracePacket::kProcessStatsFieldNumber
+  //    - protos::pbzero::TracePacket::kClockSnapshotFieldNumber
+  //
+  // Because "data" is a "one of", if no field in "trace_packet_allow_list" can
+  // be found, it packet should be removed.
+  base::FlatSet<uint32_t> trace_packet_allow_list;
 };
 
 // Responsible for extracting low-level data from the trace and storing it in
diff --git a/src/trace_redaction/trace_redactor.cc b/src/trace_redaction/trace_redactor.cc
index eb20c27..122d2d9 100644
--- a/src/trace_redaction/trace_redactor.cc
+++ b/src/trace_redaction/trace_redactor.cc
@@ -187,12 +187,25 @@
     auto packet = packet_it->as_std_string();
 
     for (const auto& transformer : transformers_) {
+      // If the packet has been cleared, it means a tranformation has removed it
+      // from the trace. Stop processing it. This saves transforms from having
+      // to check and handle empty packets.
+      if (packet.empty()) {
+        break;
+      }
+
       if (auto status = transformer->Transform(context, &packet);
           !status.ok()) {
         return status;
       }
     }
 
+    // The packet has been removed from the trace. Don't write an empty packet
+    // to disk.
+    if (packet.empty()) {
+      continue;
+    }
+
     protozero::HeapBuffered<> serializer;
     auto* packet_message =
         serializer->BeginNestedMessage<TracePacket>(Trace::kPacketFieldNumber);