blob: 96f7bf1b95ef039a352b90adb03045061ea547a0 [file] [log] [blame]
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SRC_TRACE_REDACTION_TRACE_REDACTION_FRAMEWORK_H_
#define SRC_TRACE_REDACTION_TRACE_REDACTION_FRAMEWORK_H_
#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include "perfetto/base/flat_set.h"
#include "perfetto/base/status.h"
#include "perfetto/ext/base/status_or.h"
#include "protos/perfetto/trace/trace_packet.pbzero.h"
#include "src/trace_redaction/process_thread_timeline.h"
namespace perfetto::trace_redaction {
// Multiple packages can share the same name. This is common when a device has
// multiple users. When this happens, each instance shares the 5 least
// significant digits.
constexpr uint64_t NormalizeUid(uint64_t uid) {
return uid % 1000000;
}
// Primitives should be stateless. All state should be stored in the context.
// Primitives should depend on data in the context, not the origin of the data.
// This allows primitives to be swapped out or work together to populate data
// needed by another primitive.
//
// For this to work, primitives are divided into three types:
//
// `CollectPrimitive` : Reads data from trace packets and saves low-level data
// in the context.
//
// `BuildPrimitive` : Reads low-level data from the context and builds
// high-level (read-optimized) data structures.
//
// `TransformPrimitive`: Reads high-level data from the context and modifies
// trace packets.
class Context {
public:
// The package that should not be redacted. This must be populated before
// running any primitives.
std::string package_name;
// The package list maps a package name to a uid. It is possible for multiple
// package names to map to the same uid, for example:
//
// packages {
// name: "com.google.android.gms"
// uid: 10113
// debuggable: false
// profileable_from_shell: false
// version_code: 235013038
// }
// packages {
// name: "com.google.android.gsf"
// uid: 10113
// debuggable: false
// profileable_from_shell: false
// version_code: 34
// }
//
// The process tree maps processes to packages via the uid value. However
// multiple processes can map to the same uid, only differed by some multiple
// of 100000, for example:
//
// processes {
// pid: 18176
// ppid: 904
// cmdline: "com.google.android.gms.persistent"
// uid: 10113
// }
// processes {
// pid: 21388
// ppid: 904
// cmdline: "com.google.android.gms.persistent"
// uid: 1010113
// }
std::optional<uint64_t> package_uid;
// Trace packets contain a "one of" entry called "data". This field can be
// thought of as the message. A track packet with have other fields along
// side "data" (e.g. "timestamp"). These fields can be thought of as metadata.
//
// A message should be removed if:
//
// ...we know it contains too much sensitive information
//
// ...we know it contains sensitive information and we know how to remove
// the sensitive information, but don't have the resources to do it
// right now
//
// ...we know it provide little value
//
// "trace_packet_allow_list" contains the field ids of trace packets we want
// to pass onto later transformations. Examples are:
//
// - protos::pbzero::TracePacket::kProcessTreeFieldNumber
// - protos::pbzero::TracePacket::kProcessStatsFieldNumber
// - protos::pbzero::TracePacket::kClockSnapshotFieldNumber
//
// Because "data" is a "one of", if no field in "trace_packet_allow_list" can
// be found, it packet should be removed.
base::FlatSet<uint32_t> trace_packet_allow_list;
// Ftrace packets contain a "one of" entry called "event". Within the scope of
// a ftrace event, the event can be considered the payload and other other
// values can be considered metadata (e.g. timestamp and pid).
//
// A ftrace event should be removed if:
//
// ... we know it contains too much sensitive information
//
// ... we know it contains sensitive information and we have some ideas on
// to remove it, but don't have the resources to do it right now (e.g.
// print).
//
// ... we don't see value in including it
//
// "ftrace_packet_allow_list" contains field ids of ftrace packets that we
// want to pass onto later transformations. An example would be:
//
// ... kSchedWakingFieldNumber because it contains cpu activity information
//
// Compared against track days, the rules around removing ftrace packets are
// complicated because...
//
// packet {
// ftrace_packets { <-- ONE-OF (1)
// event { <-- REPEATED (2)
// cpu_idle { } <-- ONE-OF (3)
// }
// event { ... }
// }
// }
//
// 1. A ftrace packet will populate the one-of slot in the trace packet.
//
// 2. A ftrace packet can have multiple events
//
// 3. In this example, a cpu_idle event populates the one-of slot in the
// ftrace event
base::FlatSet<uint32_t> ftrace_packet_allow_list;
// The timeline is a query-focused data structure that connects a pid to a
// uid at specific point in time.
//
// A timeline has two modes:
//
// 1. write-only
// 2. read-only
//
// Attempting to use the timeline incorrectly results in undefined behaviour.
//
// To use a timeline, the primitive needs to be "built" (add events) and then
// "sealed" (transition to read-only).
//
// A timeline must have Sort() called to change from write-only to read-only.
// After Sort(), Flatten() and Reduce() can be called (optional) to improve
// the practical look-up times (compared to theoretical look-up times).
std::unique_ptr<ProcessThreadTimeline> timeline;
};
// Responsible for extracting low-level data from the trace and storing it in
// the context.
class CollectPrimitive {
public:
// When a collect primitive has collected all necessary information, it can
// stop processing packets by returning kRetire. If the primitives wants to
// continue processing packets, it will return kNextPacket.
//
// If a collector encounters an unrecoverable error, base::ErrStatus() is
// returned.
enum class ContinueCollection : bool { kRetire = false, kNextPacket = true };
virtual ~CollectPrimitive();
// Processes a packet and writes low-level data to the context. Returns
// kContinue if the primitive wants more data (i.e. next packet).
virtual base::StatusOr<ContinueCollection> Collect(
const protos::pbzero::TracePacket::Decoder& packet,
Context* context) const = 0;
};
// Responsible for converting low-level data from the context and storing it in
// the context (high-level data).
class BuildPrimitive {
public:
virtual ~BuildPrimitive();
// Reads low-level data from the context and writes high-level data to the
// context.
virtual base::Status Build(Context* context) const = 0;
};
// Responsible for modifying trace packets using data from the context.
class TransformPrimitive {
public:
virtual ~TransformPrimitive();
// Modifies a packet using data from the context.
virtual base::Status Transform(const Context& context,
std::string* packet) const = 0;
};
} // namespace perfetto::trace_redaction
#endif // SRC_TRACE_REDACTION_TRACE_REDACTION_FRAMEWORK_H_