blob: a09f13dfa15026b7ebeef81f7d7bd11da2ec9220 [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/traced/probes/ftrace/ftrace_controller.h"
#include <fcntl.h>
#include <poll.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <unistd.h>
#include <cstdint>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <tuple>
#include <utility>
#include "perfetto/base/build_config.h"
#include "perfetto/base/logging.h"
#include "perfetto/base/time.h"
#include "perfetto/ext/base/file_utils.h"
#include "perfetto/ext/base/metatrace.h"
#include "perfetto/ext/base/scoped_file.h"
#include "perfetto/ext/base/string_utils.h"
#include "perfetto/ext/tracing/core/trace_writer.h"
#include "src/kallsyms/kernel_symbol_map.h"
#include "src/kallsyms/lazy_kernel_symbolizer.h"
#include "src/traced/probes/ftrace/atrace_hal_wrapper.h"
#include "src/traced/probes/ftrace/cpu_reader.h"
#include "src/traced/probes/ftrace/cpu_stats_parser.h"
#include "src/traced/probes/ftrace/event_info.h"
#include "src/traced/probes/ftrace/event_info_constants.h"
#include "src/traced/probes/ftrace/ftrace_config_muxer.h"
#include "src/traced/probes/ftrace/ftrace_config_utils.h"
#include "src/traced/probes/ftrace/ftrace_data_source.h"
#include "src/traced/probes/ftrace/ftrace_metadata.h"
#include "src/traced/probes/ftrace/ftrace_procfs.h"
#include "src/traced/probes/ftrace/ftrace_stats.h"
#include "src/traced/probes/ftrace/proto_translation_table.h"
#include "src/traced/probes/ftrace/vendor_tracepoints.h"
namespace perfetto {
namespace {
constexpr uint32_t kDefaultTickPeriodMs = 100;
constexpr uint32_t kPollBackingTickPeriodMs = 1000;
constexpr uint32_t kMinTickPeriodMs = 1;
constexpr uint32_t kMaxTickPeriodMs = 1000 * 60;
constexpr int kPollRequiredMajorVersion = 6;
constexpr int kPollRequiredMinorVersion = 9;
// Read at most this many pages of data per cpu per read task. If we hit this
// limit on at least one cpu, we stop and repost the read task, letting other
// tasks get some cpu time before continuing reading.
constexpr size_t kMaxPagesPerCpuPerReadTick = 256; // 1 MB per cpu
bool WriteToFile(const char* path, const char* str) {
auto fd = base::OpenFile(path, O_WRONLY);
if (!fd)
return false;
const size_t str_len = strlen(str);
return base::WriteAll(*fd, str, str_len) == static_cast<ssize_t>(str_len);
}
bool ClearFile(const char* path) {
auto fd = base::OpenFile(path, O_WRONLY | O_TRUNC);
return !!fd;
}
std::optional<int64_t> ReadFtraceNowTs(const base::ScopedFile& cpu_stats_fd) {
PERFETTO_CHECK(cpu_stats_fd);
char buf[512];
ssize_t res = PERFETTO_EINTR(pread(*cpu_stats_fd, buf, sizeof(buf) - 1, 0));
if (res <= 0)
return std::nullopt;
buf[res] = '\0';
FtraceCpuStats stats{};
DumpCpuStats(buf, &stats);
return static_cast<int64_t>(stats.now_ts * 1000 * 1000 * 1000);
}
std::map<std::string, std::vector<GroupAndName>> GetAtraceVendorEvents(
FtraceProcfs* tracefs) {
#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
if (base::FileExists(vendor_tracepoints::kCategoriesFile)) {
std::map<std::string, std::vector<GroupAndName>> vendor_evts;
base::Status status =
vendor_tracepoints::DiscoverAccessibleVendorTracepointsWithFile(
vendor_tracepoints::kCategoriesFile, &vendor_evts, tracefs);
if (!status.ok()) {
PERFETTO_ELOG("Cannot load vendor categories: %s", status.c_message());
}
return vendor_evts;
} else {
AtraceHalWrapper hal;
return vendor_tracepoints::DiscoverVendorTracepointsWithHal(&hal, tracefs);
}
#else
base::ignore_result(tracefs);
return {};
#endif
}
struct AndroidGkiVersion {
uint64_t version = 0;
uint64_t patch_level = 0;
uint64_t sub_level = 0;
uint64_t release = 0;
uint64_t kmi_gen = 0;
};
#define ANDROID_GKI_UNAME_FMT \
"%" PRIu64 ".%" PRIu64 ".%" PRIu64 "-android%" PRIu64 "-%" PRIu64
std::optional<AndroidGkiVersion> ParseAndroidGkiVersion(const char* s) {
AndroidGkiVersion v = {};
if (sscanf(s, ANDROID_GKI_UNAME_FMT, &v.version, &v.patch_level, &v.sub_level,
&v.release, &v.kmi_gen) != 5) {
return std::nullopt;
}
return v;
}
} // namespace
// Method of last resort to reset ftrace state.
// We don't know what state the rest of the system and process is so as far
// as possible avoid allocations.
bool HardResetFtraceState() {
for (const char* const* item = FtraceProcfs::kTracingPaths; *item; ++item) {
std::string prefix(*item);
PERFETTO_CHECK(base::EndsWith(prefix, "/"));
bool res = true;
res &= WriteToFile((prefix + "tracing_on").c_str(), "0");
res &= WriteToFile((prefix + "buffer_size_kb").c_str(), "4");
// Not checking success because these files might not be accessible on
// older or release builds of Android:
WriteToFile((prefix + "events/enable").c_str(), "0");
WriteToFile((prefix + "events/raw_syscalls/filter").c_str(), "0");
WriteToFile((prefix + "current_tracer").c_str(), "nop");
res &= ClearFile((prefix + "trace").c_str());
if (res)
return true;
}
return false;
}
// static
std::unique_ptr<FtraceController> FtraceController::Create(
base::TaskRunner* runner,
Observer* observer) {
std::unique_ptr<FtraceProcfs> ftrace_procfs =
FtraceProcfs::CreateGuessingMountPoint("");
if (!ftrace_procfs)
return nullptr;
std::unique_ptr<ProtoTranslationTable> table = ProtoTranslationTable::Create(
ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
if (!table)
return nullptr;
auto atrace_wrapper = std::make_unique<AtraceWrapperImpl>();
std::map<std::string, std::vector<GroupAndName>> vendor_evts =
GetAtraceVendorEvents(ftrace_procfs.get());
SyscallTable syscalls = SyscallTable::FromCurrentArch();
auto muxer = std::make_unique<FtraceConfigMuxer>(
ftrace_procfs.get(), atrace_wrapper.get(), table.get(),
std::move(syscalls), vendor_evts);
return std::unique_ptr<FtraceController>(new FtraceController(
std::move(ftrace_procfs), std::move(table), std::move(atrace_wrapper),
std::move(muxer), runner, observer));
}
FtraceController::FtraceController(
std::unique_ptr<FtraceProcfs> ftrace_procfs,
std::unique_ptr<ProtoTranslationTable> table,
std::unique_ptr<AtraceWrapper> atrace_wrapper,
std::unique_ptr<FtraceConfigMuxer> muxer,
base::TaskRunner* task_runner,
Observer* observer)
: task_runner_(task_runner),
observer_(observer),
atrace_wrapper_(std::move(atrace_wrapper)),
primary_(std::move(ftrace_procfs), std::move(table), std::move(muxer)),
weak_factory_(this) {}
FtraceController::~FtraceController() {
while (!data_sources_.empty()) {
RemoveDataSource(*data_sources_.begin());
}
PERFETTO_DCHECK(data_sources_.empty());
PERFETTO_DCHECK(primary_.started_data_sources.empty());
PERFETTO_DCHECK(primary_.cpu_readers.empty());
PERFETTO_DCHECK(secondary_instances_.empty());
}
uint64_t FtraceController::NowMs() const {
return static_cast<uint64_t>(base::GetWallTimeMs().count());
}
template <typename F>
void FtraceController::ForEachInstance(F fn) {
fn(&primary_);
for (auto& kv : secondary_instances_) {
fn(kv.second.get());
}
}
void FtraceController::StartIfNeeded(FtraceInstanceState* instance,
const std::string& instance_name) {
if (buffer_watermark_support_ == PollSupport::kUntested) {
buffer_watermark_support_ = VerifyKernelSupportForBufferWatermark();
}
// If instance is already active, then at most we need to update the buffer
// poll callbacks. The periodic |ReadTick| will pick up any updates to the
// period the next time it executes.
if (instance->started_data_sources.size() > 1) {
UpdateBufferWatermarkWatches(instance, instance_name);
return;
}
// Lazily allocate the memory used for reading & parsing ftrace. In the case
// of multiple ftrace instances, this might already be valid.
parsing_mem_.AllocateIfNeeded();
const auto ftrace_clock = instance->ftrace_config_muxer->ftrace_clock();
size_t num_cpus = instance->ftrace_procfs->NumberOfCpus();
PERFETTO_CHECK(instance->cpu_readers.empty());
instance->cpu_readers.reserve(num_cpus);
for (size_t cpu = 0; cpu < num_cpus; cpu++) {
instance->cpu_readers.emplace_back(
cpu, instance->ftrace_procfs->OpenPipeForCpu(cpu),
instance->table.get(), &symbolizer_, ftrace_clock,
&ftrace_clock_snapshot_);
}
// Special case for primary instance: if not using the boot clock, take
// manual clock snapshots so that the trace parser can do a best effort
// conversion back to boot. This is primarily for old kernels that predate
// boot support, and therefore default to "global" clock.
if (instance == &primary_ &&
ftrace_clock != protos::pbzero::FtraceClock::FTRACE_CLOCK_UNSPECIFIED) {
cpu_zero_stats_fd_ = primary_.ftrace_procfs->OpenCpuStats(0 /* cpu */);
MaybeSnapshotFtraceClock();
}
// Set up poll callbacks for the buffers if requested by at least one DS.
UpdateBufferWatermarkWatches(instance, instance_name);
// Start a new repeating read task (even if there is already one posted due
// to a different ftrace instance). Any old tasks will stop due to generation
// checks.
auto generation = ++tick_generation_;
auto tick_period_ms = GetTickPeriodMs();
auto weak_this = weak_factory_.GetWeakPtr();
task_runner_->PostDelayedTask(
[weak_this, generation] {
if (weak_this)
weak_this->ReadTick(generation);
},
tick_period_ms - (NowMs() % tick_period_ms));
}
// We handle the ftrace buffers in a repeating task (ReadTick). On a given tick,
// we iterate over all per-cpu buffers, parse their contents, and then write out
// the serialized packets. This is handled by |CpuReader| instances, which
// attempt to read from their respective per-cpu buffer fd until they catch up
// to the head of the buffer, or hit a transient error.
//
// The readers work in batches of |kParsingBufferSizePages| pages for cache
// locality, and to limit memory usage.
//
// However, the reading happens on the primary thread, shared with the rest of
// the service (including ipc). If there is a lot of ftrace data to read, we
// want to yield to the event loop, re-enqueueing a continuation task at the end
// of the immediate queue (letting other enqueued tasks to run before
// continuing). Therefore we introduce |kMaxPagesPerCpuPerReadTick|.
void FtraceController::ReadTick(int generation) {
metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
metatrace::FTRACE_READ_TICK);
if (generation != tick_generation_ || GetStartedDataSourcesCount() == 0) {
return;
}
MaybeSnapshotFtraceClock();
// Read all per-cpu buffers.
bool all_cpus_done = ReadPassForInstance(&primary_);
ForEachInstance([&](FtraceInstanceState* instance) {
all_cpus_done &= ReadPassForInstance(instance);
});
observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
auto weak_this = weak_factory_.GetWeakPtr();
if (!all_cpus_done) {
PERFETTO_DLOG("Reposting immediate ReadTick as there's more work.");
task_runner_->PostTask([weak_this, generation] {
if (weak_this)
weak_this->ReadTick(generation);
});
} else {
// Done until next period.
auto tick_period_ms = GetTickPeriodMs();
task_runner_->PostDelayedTask(
[weak_this, generation] {
if (weak_this)
weak_this->ReadTick(generation);
},
tick_period_ms - (NowMs() % tick_period_ms));
}
#if PERFETTO_DCHECK_IS_ON()
// OnFtraceDataWrittenIntoDataSourceBuffers() is supposed to clear
// all metadata, including the |kernel_addrs| map for symbolization.
ForEachInstance([&](FtraceInstanceState* instance) {
for (FtraceDataSource* ds : instance->started_data_sources) {
FtraceMetadata* ftrace_metadata = ds->mutable_metadata();
PERFETTO_DCHECK(ftrace_metadata->kernel_addrs.empty());
PERFETTO_DCHECK(ftrace_metadata->last_kernel_addr_index_written == 0);
}
});
#endif
}
bool FtraceController::ReadPassForInstance(FtraceInstanceState* instance) {
if (instance->started_data_sources.empty())
return true;
bool all_cpus_done = true;
for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
size_t max_pages = kMaxPagesPerCpuPerReadTick;
size_t pages_read = instance->cpu_readers[i].ReadCycle(
&parsing_mem_, max_pages, instance->started_data_sources);
PERFETTO_DCHECK(pages_read <= max_pages);
if (pages_read == max_pages) {
all_cpus_done = false;
}
}
return all_cpus_done;
}
uint32_t FtraceController::GetTickPeriodMs() {
if (data_sources_.empty())
return kDefaultTickPeriodMs;
uint32_t kUnsetPeriod = std::numeric_limits<uint32_t>::max();
uint32_t min_period_ms = kUnsetPeriod;
bool using_poll = true;
ForEachInstance([&](FtraceInstanceState* instance) {
using_poll &= instance->buffer_watches_posted;
for (FtraceDataSource* ds : instance->started_data_sources) {
if (ds->config().has_drain_period_ms()) {
min_period_ms = std::min(min_period_ms, ds->config().drain_period_ms());
}
}
});
// None of the active data sources requested an explicit tick period.
// The historical default is 100ms, but if we know that all instances are also
// using buffer watermark polling, we can raise it. We don't disable the tick
// entirely as it spreads the read work more evenly, and ensures procfs
// scrapes of seen TIDs are not too stale.
if (min_period_ms == kUnsetPeriod) {
return using_poll ? kPollBackingTickPeriodMs : kDefaultTickPeriodMs;
}
if (min_period_ms < kMinTickPeriodMs || min_period_ms > kMaxTickPeriodMs) {
PERFETTO_LOG(
"drain_period_ms was %u should be between %u and %u. "
"Falling back onto a default.",
min_period_ms, kMinTickPeriodMs, kMaxTickPeriodMs);
return kDefaultTickPeriodMs;
}
return min_period_ms;
}
void FtraceController::UpdateBufferWatermarkWatches(
FtraceInstanceState* instance,
const std::string& instance_name) {
PERFETTO_DCHECK(buffer_watermark_support_ != PollSupport::kUntested);
if (buffer_watermark_support_ == PollSupport::kUnsupported)
return;
bool requested_poll = false;
for (const FtraceDataSource* ds : instance->started_data_sources) {
requested_poll |= ds->config().has_drain_buffer_percent();
}
if (!requested_poll || instance->buffer_watches_posted)
return;
auto weak_this = weak_factory_.GetWeakPtr();
for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
int fd = instance->cpu_readers[i].RawBufferFd();
task_runner_->AddFileDescriptorWatch(fd, [weak_this, instance_name, i] {
if (weak_this)
weak_this->OnBufferPastWatermark(instance_name, i,
/*repoll_watermark=*/true);
});
}
instance->buffer_watches_posted = true;
}
void FtraceController::RemoveBufferWatermarkWatches(
FtraceInstanceState* instance) {
if (!instance->buffer_watches_posted)
return;
for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
int fd = instance->cpu_readers[i].RawBufferFd();
task_runner_->RemoveFileDescriptorWatch(fd);
}
instance->buffer_watches_posted = false;
}
// TODO(rsavitski): consider calling OnFtraceData only if we're not reposting
// a continuation. It's a tradeoff between procfs scrape freshness and urgency
// to drain ftrace kernel buffers.
void FtraceController::OnBufferPastWatermark(std::string instance_name,
size_t cpu,
bool repoll_watermark) {
metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
metatrace::FTRACE_CPU_BUFFER_WATERMARK);
// Instance might have been stopped before this callback runs.
FtraceInstanceState* instance = GetInstance(instance_name);
if (!instance || cpu >= instance->cpu_readers.size())
return;
// Repoll all per-cpu buffers with zero timeout to confirm that at least
// one is still past the watermark. This might not be true if a different
// callback / readtick / flush did a read pass before this callback reached
// the front of the task runner queue.
if (repoll_watermark) {
size_t num_cpus = instance->cpu_readers.size();
std::vector<struct pollfd> pollfds(num_cpus);
for (size_t i = 0; i < num_cpus; i++) {
pollfds[i].fd = instance->cpu_readers[i].RawBufferFd();
pollfds[i].events = POLLIN;
}
int r = PERFETTO_EINTR(poll(pollfds.data(), num_cpus, 0));
if (r < 0) {
PERFETTO_DPLOG("poll failed");
return;
} else if (r == 0) { // no buffers below the watermark -> we're done.
return;
}
// Count the number of readable fds, as some poll results might be POLLERR,
// as seen in cases with offlined cores. It's still fine to attempt reading
// from those buffers as CpuReader will handle the ENODEV.
bool has_readable_fd = false;
for (size_t i = 0; i < num_cpus; i++) {
has_readable_fd |= (pollfds[i].revents & POLLIN);
}
if (!has_readable_fd) {
return;
}
}
MaybeSnapshotFtraceClock();
bool all_cpus_done = ReadPassForInstance(instance);
observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
if (!all_cpus_done) {
// More data to be read, but we want to let other task_runner tasks to run.
// Repost a continuation task.
auto weak_this = weak_factory_.GetWeakPtr();
task_runner_->PostTask([weak_this, instance_name, cpu] {
if (weak_this)
weak_this->OnBufferPastWatermark(instance_name, cpu,
/*repoll_watermark=*/false);
});
}
}
void FtraceController::Flush(FlushRequestID flush_id) {
metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
metatrace::FTRACE_CPU_FLUSH);
ForEachInstance([&](FtraceInstanceState* instance) { // for clang-format
FlushForInstance(instance);
});
observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
ForEachInstance([&](FtraceInstanceState* instance) {
for (FtraceDataSource* ds : instance->started_data_sources) {
ds->OnFtraceFlushComplete(flush_id);
}
});
}
void FtraceController::FlushForInstance(FtraceInstanceState* instance) {
if (instance->started_data_sources.empty())
return;
// Read all cpus in one go, limiting the per-cpu read amount to make sure we
// don't get stuck chasing the writer if there's a very high bandwidth of
// events.
size_t max_pages = instance->ftrace_config_muxer->GetPerCpuBufferSizePages();
for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
instance->cpu_readers[i].ReadCycle(&parsing_mem_, max_pages,
instance->started_data_sources);
}
}
// We are not implicitly flushing on Stop. The tracing service is supposed to
// ask for an explicit flush before stopping, unless it needs to perform a
// non-graceful stop.
void FtraceController::StopIfNeeded(FtraceInstanceState* instance) {
if (!instance->started_data_sources.empty())
return;
RemoveBufferWatermarkWatches(instance);
instance->cpu_readers.clear();
if (instance == &primary_) {
cpu_zero_stats_fd_.reset();
}
// Muxer cannot change the current_tracer until we close the trace pipe fds
// (i.e. per_cpu). Hence an explicit request here.
instance->ftrace_config_muxer->ResetCurrentTracer();
DestroyIfUnusedSeconaryInstance(instance);
// Clean up global state if done with all data sources.
if (!data_sources_.empty())
return;
if (!retain_ksyms_on_stop_) {
symbolizer_.Destroy();
}
retain_ksyms_on_stop_ = false;
// Note: might have never been allocated if data sources were rejected.
parsing_mem_.Release();
}
bool FtraceController::AddDataSource(FtraceDataSource* data_source) {
if (!ValidConfig(data_source->config()))
return false;
FtraceInstanceState* instance =
GetOrCreateInstance(data_source->config().instance_name());
if (!instance)
return false;
// note: from this point onwards, need to not leak a possibly created
// instance if returning early.
FtraceConfigId config_id = next_cfg_id_++;
if (!instance->ftrace_config_muxer->SetupConfig(
config_id, data_source->config(),
data_source->mutable_setup_errors())) {
DestroyIfUnusedSeconaryInstance(instance);
return false;
}
const FtraceDataSourceConfig* ds_config =
instance->ftrace_config_muxer->GetDataSourceConfig(config_id);
auto it_and_inserted = data_sources_.insert(data_source);
PERFETTO_DCHECK(it_and_inserted.second);
data_source->Initialize(config_id, ds_config);
return true;
}
bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
PERFETTO_DCHECK(data_sources_.count(data_source) > 0);
FtraceConfigId config_id = data_source->config_id();
PERFETTO_CHECK(config_id);
const std::string& instance_name = data_source->config().instance_name();
FtraceInstanceState* instance = GetOrCreateInstance(instance_name);
PERFETTO_CHECK(instance);
if (!instance->ftrace_config_muxer->ActivateConfig(config_id))
return false;
instance->started_data_sources.insert(data_source);
StartIfNeeded(instance, instance_name);
// Parse kernel symbols if required by the config. This can be an expensive
// operation (cpu-bound for 500ms+), so delay the StartDataSource
// acknowledgement until after we're done. This lets a consumer wait for the
// expensive work to be done by waiting on the "all data sources started"
// fence. This helps isolate the effects of the cpu-bound work on
// frequency scaling of cpus when recording benchmarks (b/236143653).
// Note that we're already recording data into the kernel ftrace
// buffers while doing the symbol parsing.
if (data_source->config().symbolize_ksyms()) {
symbolizer_.GetOrCreateKernelSymbolMap();
// If at least one config sets the KSYMS_RETAIN flag, keep the ksysm map
// around in StopIfNeeded().
const auto KRET = FtraceConfig::KSYMS_RETAIN;
retain_ksyms_on_stop_ |= data_source->config().ksyms_mem_policy() == KRET;
}
return true;
}
void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
size_t removed = data_sources_.erase(data_source);
if (!removed)
return; // can happen if AddDataSource failed
FtraceInstanceState* instance =
GetOrCreateInstance(data_source->config().instance_name());
PERFETTO_CHECK(instance);
instance->ftrace_config_muxer->RemoveConfig(data_source->config_id());
instance->started_data_sources.erase(data_source);
StopIfNeeded(instance);
}
void FtraceController::DumpFtraceStats(FtraceDataSource* data_source,
FtraceStats* stats_out) {
FtraceInstanceState* instance =
GetInstance(data_source->config().instance_name());
PERFETTO_DCHECK(instance);
if (!instance)
return;
DumpAllCpuStats(instance->ftrace_procfs.get(), stats_out);
if (symbolizer_.is_valid()) {
auto* symbol_map = symbolizer_.GetOrCreateKernelSymbolMap();
stats_out->kernel_symbols_parsed =
static_cast<uint32_t>(symbol_map->num_syms());
stats_out->kernel_symbols_mem_kb =
static_cast<uint32_t>(symbol_map->size_bytes() / 1024);
}
}
void FtraceController::MaybeSnapshotFtraceClock() {
if (!cpu_zero_stats_fd_)
return;
auto ftrace_clock = primary_.ftrace_config_muxer->ftrace_clock();
PERFETTO_DCHECK(ftrace_clock != protos::pbzero::FTRACE_CLOCK_UNSPECIFIED);
// Snapshot the boot clock *before* reading CPU stats so that
// two clocks are as close togher as possible (i.e. if it was the
// other way round, we'd skew by the const of string parsing).
ftrace_clock_snapshot_.boot_clock_ts = base::GetBootTimeNs().count();
// A value of zero will cause this snapshot to be skipped.
ftrace_clock_snapshot_.ftrace_clock_ts =
ReadFtraceNowTs(cpu_zero_stats_fd_).value_or(0);
}
FtraceController::PollSupport
FtraceController::VerifyKernelSupportForBufferWatermark() {
struct utsname uts = {};
if (uname(&uts) < 0 || strcmp(uts.sysname, "Linux") != 0)
return PollSupport::kUnsupported;
if (!PollSupportedOnKernelVersion(uts.release))
return PollSupport::kUnsupported;
// buffer_percent exists and is writable
auto* tracefs = primary_.ftrace_procfs.get();
uint32_t current = tracefs->ReadBufferPercent();
if (!tracefs->SetBufferPercent(current ? current : 50)) {
return PollSupport::kUnsupported;
}
// Polling on per_cpu/cpu0/trace_pipe_raw doesn't return errors.
base::ScopedFile fd = tracefs->OpenPipeForCpu(0);
struct pollfd pollset = {};
pollset.fd = fd.get();
pollset.events = POLLIN;
int r = PERFETTO_EINTR(poll(&pollset, 1, 0));
if (r < 0 || (r > 0 && (pollset.revents & POLLERR))) {
return PollSupport::kUnsupported;
}
return PollSupport::kSupported;
}
// Check kernel version since the poll implementation has historical bugs.
// We're looking for at least 6.9 for the following:
// ffe3986fece6 ring-buffer: Only update pages_touched when a new page...
// static
bool FtraceController::PollSupportedOnKernelVersion(const char* uts_release) {
int major = 0, minor = 0;
if (sscanf(uts_release, "%d.%d", &major, &minor) != 2) {
return false;
}
if (major < kPollRequiredMajorVersion ||
(major == kPollRequiredMajorVersion &&
minor < kPollRequiredMinorVersion)) {
// Android: opportunistically detect a few select GKI kernels that are known
// to have the fixes.
std::optional<AndroidGkiVersion> gki = ParseAndroidGkiVersion(uts_release);
if (!gki.has_value())
return false;
// android14-6.1.86 or higher sublevel:
// 2d5f12de4cf5 ring-buffer: Only update pages_touched when a new page...
// android15-6.6.27 or higher sublevel:
// a9cd92bc051f ring-buffer: Only update pages_touched when a new page...
bool gki_patched = (gki->release == 14 && gki->version == 6 &&
gki->patch_level == 1 && gki->sub_level >= 86) ||
(gki->release == 15 && gki->version == 6 &&
gki->patch_level == 6 && gki->sub_level >= 27);
return gki_patched;
}
return true;
}
size_t FtraceController::GetStartedDataSourcesCount() {
size_t cnt = 0;
ForEachInstance([&](FtraceInstanceState* instance) {
cnt += instance->started_data_sources.size();
});
return cnt;
}
FtraceController::FtraceInstanceState::FtraceInstanceState(
std::unique_ptr<FtraceProcfs> ft,
std::unique_ptr<ProtoTranslationTable> ptt,
std::unique_ptr<FtraceConfigMuxer> fcm)
: ftrace_procfs(std::move(ft)),
table(std::move(ptt)),
ftrace_config_muxer(std::move(fcm)) {}
FtraceController::FtraceInstanceState* FtraceController::GetOrCreateInstance(
const std::string& instance_name) {
FtraceInstanceState* maybe_existing = GetInstance(instance_name);
if (maybe_existing)
return maybe_existing;
PERFETTO_DCHECK(!instance_name.empty());
std::unique_ptr<FtraceInstanceState> instance =
CreateSecondaryInstance(instance_name);
if (!instance)
return nullptr;
auto it_and_inserted = secondary_instances_.emplace(
std::piecewise_construct, std::forward_as_tuple(instance_name),
std::forward_as_tuple(std::move(instance)));
PERFETTO_CHECK(it_and_inserted.second);
return it_and_inserted.first->second.get();
}
FtraceController::FtraceInstanceState* FtraceController::GetInstance(
const std::string& instance_name) {
if (instance_name.empty())
return &primary_;
auto it = secondary_instances_.find(instance_name);
return it != secondary_instances_.end() ? it->second.get() : nullptr;
}
void FtraceController::DestroyIfUnusedSeconaryInstance(
FtraceInstanceState* instance) {
if (instance == &primary_)
return;
for (auto it = secondary_instances_.begin(); it != secondary_instances_.end();
++it) {
if (it->second.get() == instance &&
instance->ftrace_config_muxer->GetDataSourcesCount() == 0) {
// no data sources left referencing this secondary instance
secondary_instances_.erase(it);
return;
}
}
PERFETTO_FATAL("Bug in ftrace instance lifetimes");
}
std::unique_ptr<FtraceController::FtraceInstanceState>
FtraceController::CreateSecondaryInstance(const std::string& instance_name) {
std::optional<std::string> instance_path = AbsolutePathForInstance(
primary_.ftrace_procfs->GetRootPath(), instance_name);
if (!instance_path.has_value()) {
PERFETTO_ELOG("Invalid ftrace instance name: \"%s\"",
instance_name.c_str());
return nullptr;
}
auto ftrace_procfs = FtraceProcfs::Create(*instance_path);
if (!ftrace_procfs) {
PERFETTO_ELOG("Failed to create ftrace procfs for \"%s\"",
instance_path->c_str());
return nullptr;
}
auto table = ProtoTranslationTable::Create(
ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
if (!table) {
PERFETTO_ELOG("Failed to create proto translation table for \"%s\"",
instance_path->c_str());
return nullptr;
}
// secondary instances don't support atrace and vendor tracepoint HAL
std::map<std::string, std::vector<GroupAndName>> vendor_evts;
auto syscalls = SyscallTable::FromCurrentArch();
auto muxer = std::make_unique<FtraceConfigMuxer>(
ftrace_procfs.get(), atrace_wrapper_.get(), table.get(),
std::move(syscalls), vendor_evts,
/* secondary_instance= */ true);
return std::make_unique<FtraceInstanceState>(
std::move(ftrace_procfs), std::move(table), std::move(muxer));
}
// TODO(rsavitski): we want to eventually add support for the default
// (primary_) tracefs path to be an instance itself, at which point we'll need
// to be careful to distinguish the tracefs mount point from the default
// instance path.
// static
std::optional<std::string> FtraceController::AbsolutePathForInstance(
const std::string& tracefs_root,
const std::string& raw_cfg_name) {
if (base::Contains(raw_cfg_name, '/') ||
base::StartsWith(raw_cfg_name, "..")) {
return std::nullopt;
}
// ARM64 pKVM hypervisor tracing emulates an instance, but is not under
// instances/, we special-case that name for now.
if (raw_cfg_name == "hyp") {
std::string hyp_path = tracefs_root + "hyp/";
PERFETTO_LOG(
"Config specified reserved \"hyp\" instance name, using %s for events.",
hyp_path.c_str());
return std::make_optional(hyp_path);
}
return tracefs_root + "instances/" + raw_cfg_name + "/";
}
FtraceController::Observer::~Observer() = default;
} // namespace perfetto