traced_perf: allow collection and symbolization of kernel frames
If a config asks for kernel frames, we include PERF_SAMPLE_CALLCHAIN in
the event config. The kernel unwinds its own frames, and we only need to
symbolize them in the unwinder, using the kallsyms symbolizer that was
added for traced_probes.
The kernel frames are output as part of the normal callstack, and can
be further identified by having a magical "kernel" string for their
mapping name. This is similar to what we do for synthetic error frames.
Tested with aosp/1496216 for the SELinux changes around /proc/kallsyms
access on android.
Bug: 173124818
Change-Id: I05ef78621d00c60e05bb4833c63505c910a0928a
diff --git a/Android.bp b/Android.bp
index 1756f7b..65c9f37 100644
--- a/Android.bp
+++ b/Android.bp
@@ -8844,6 +8844,7 @@
":perfetto_src_profiling_perf_traced_perf_main",
":perfetto_src_profiling_perf_unwinding",
":perfetto_src_protozero_protozero",
+ ":perfetto_src_traced_probes_ftrace_kallsyms_kallsyms",
":perfetto_src_tracing_common",
":perfetto_src_tracing_core_core",
":perfetto_src_tracing_core_service",
diff --git a/src/profiling/perf/BUILD.gn b/src/profiling/perf/BUILD.gn
index b715de1..c6c48ae 100644
--- a/src/profiling/perf/BUILD.gn
+++ b/src/profiling/perf/BUILD.gn
@@ -92,6 +92,7 @@
":common_types",
"../../../gn:default_deps",
"../../../include/perfetto/ext/tracing/core",
+ "../../../src//traced/probes/ftrace/kallsyms",
"../../../src/base",
"../common:unwind_support",
]
diff --git a/src/profiling/perf/common_types.h b/src/profiling/perf/common_types.h
index 109ff35..8577503 100644
--- a/src/profiling/perf/common_types.h
+++ b/src/profiling/perf/common_types.h
@@ -49,6 +49,7 @@
std::unique_ptr<unwindstack::Regs> regs;
std::vector<char> stack;
bool stack_maxed = false;
+ std::vector<uint64_t> kernel_ips;
};
// Entry in an unwinding queue. Either a sample that requires unwinding, or a
diff --git a/src/profiling/perf/event_config.cc b/src/profiling/perf/event_config.cc
index 896fa70..2e8e58d 100644
--- a/src/profiling/perf/event_config.cc
+++ b/src/profiling/perf/event_config.cc
@@ -164,7 +164,8 @@
samples_per_tick_limit_(samples_per_tick_limit),
target_filter_(std::move(target_filter)),
remote_descriptor_timeout_ms_(remote_descriptor_timeout_ms),
- unwind_state_clear_period_ms_(cfg.unwind_state_clear_period_ms()) {
+ unwind_state_clear_period_ms_(cfg.unwind_state_clear_period_ms()),
+ kernel_frames_(cfg.kernel_frames()) {
auto& pe = perf_event_attr_;
pe.size = sizeof(perf_event_attr);
@@ -190,6 +191,12 @@
// PERF_SAMPLE_REGS_USER:
pe.sample_regs_user =
PerfUserRegsMaskForArch(unwindstack::Regs::CurrentArch());
+
+ // Optional kernel call frames (unwound by the kernel itself):
+ if (kernel_frames_) {
+ pe.sample_type |= PERF_SAMPLE_CALLCHAIN;
+ pe.exclude_callchain_user = true;
+ }
}
} // namespace profiling
diff --git a/src/profiling/perf/event_config.h b/src/profiling/perf/event_config.h
index 7a9ca00..da77075 100644
--- a/src/profiling/perf/event_config.h
+++ b/src/profiling/perf/event_config.h
@@ -59,8 +59,8 @@
uint32_t unwind_state_clear_period_ms() const {
return unwind_state_clear_period_ms_;
}
-
const TargetFilter& filter() const { return target_filter_; }
+ bool kernel_frames() const { return kernel_frames_; }
perf_event_attr* perf_attr() const {
return const_cast<perf_event_attr*>(&perf_event_attr_);
@@ -100,6 +100,9 @@
// Optional period for clearing cached unwinder state. Skipped if zero.
const uint32_t unwind_state_clear_period_ms_;
+
+ // If true, include kernel frames in the callstacks.
+ const bool kernel_frames_;
};
} // namespace profiling
diff --git a/src/profiling/perf/event_reader.cc b/src/profiling/perf/event_reader.cc
index 35054f8..c6ddf3d 100644
--- a/src/profiling/perf/event_reader.cc
+++ b/src/profiling/perf/event_reader.cc
@@ -33,10 +33,17 @@
template <typename T>
const char* ReadValue(T* value_out, const char* ptr) {
- memcpy(value_out, reinterpret_cast<const void*>(ptr), sizeof(T));
+ memcpy(value_out, ptr, sizeof(T));
return ptr + sizeof(T);
}
+template <typename T>
+const char* ReadValues(T* out, const char* ptr, size_t num_values) {
+ size_t sz = sizeof(T) * num_values;
+ memcpy(out, ptr, sz);
+ return ptr + sz;
+}
+
bool IsPowerOfTwo(size_t v) {
return (v != 0 && ((v & (v - 1)) == 0));
}
@@ -281,7 +288,7 @@
const char* record_start) {
if (event_attr_.sample_type &
(~uint64_t(PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_STACK_USER |
- PERF_SAMPLE_REGS_USER))) {
+ PERF_SAMPLE_REGS_USER | PERF_SAMPLE_CALLCHAIN))) {
PERFETTO_FATAL("Unsupported sampling option");
}
@@ -309,6 +316,14 @@
parse_pos = ReadValue(&sample.timestamp, parse_pos);
}
+ if (event_attr_.sample_type & PERF_SAMPLE_CALLCHAIN) {
+ uint64_t chain_len = 0;
+ parse_pos = ReadValue(&chain_len, parse_pos);
+ sample.kernel_ips.resize(static_cast<size_t>(chain_len));
+ parse_pos = ReadValues<uint64_t>(sample.kernel_ips.data(), parse_pos,
+ static_cast<size_t>(chain_len));
+ }
+
if (event_attr_.sample_type & PERF_SAMPLE_REGS_USER) {
// Can be empty, e.g. if we sampled a kernel thread.
sample.regs = ReadPerfUserRegsData(&parse_pos);
diff --git a/src/profiling/perf/perf_producer.cc b/src/profiling/perf/perf_producer.cc
index 32bc20a..b99b1f5 100644
--- a/src/profiling/perf/perf_producer.cc
+++ b/src/profiling/perf/perf_producer.cc
@@ -257,7 +257,8 @@
// Inform unwinder of the new data source instance, and optionally start a
// periodic task to clear its cached state.
- unwinding_worker_->PostStartDataSource(instance_id);
+ unwinding_worker_->PostStartDataSource(instance_id,
+ ds.event_config.kernel_frames());
if (ds.event_config.unwind_state_clear_period_ms()) {
unwinding_worker_->PostClearCachedStatePeriodic(
instance_id, ds.event_config.unwind_state_clear_period_ms());
diff --git a/src/profiling/perf/unwinding.cc b/src/profiling/perf/unwinding.cc
index fb310cc..1dfb005 100644
--- a/src/profiling/perf/unwinding.cc
+++ b/src/profiling/perf/unwinding.cc
@@ -20,6 +20,8 @@
#include <inttypes.h>
+#include <unwindstack/Unwinder.h>
+
#include "perfetto/ext/base/metatrace.h"
#include "perfetto/ext/base/thread_utils.h"
#include "perfetto/ext/base/utils.h"
@@ -40,18 +42,24 @@
base::MaybeSetThreadName("stack-unwinding");
}
-void Unwinder::PostStartDataSource(DataSourceInstanceID ds_id) {
+void Unwinder::PostStartDataSource(DataSourceInstanceID ds_id,
+ bool kernel_frames) {
// No need for a weak pointer as the associated task runner quits (stops
// running tasks) strictly before the Unwinder's destruction.
- task_runner_->PostTask([this, ds_id] { StartDataSource(ds_id); });
+ task_runner_->PostTask(
+ [this, ds_id, kernel_frames] { StartDataSource(ds_id, kernel_frames); });
}
-void Unwinder::StartDataSource(DataSourceInstanceID ds_id) {
+void Unwinder::StartDataSource(DataSourceInstanceID ds_id, bool kernel_frames) {
PERFETTO_DCHECK_THREAD(thread_checker_);
PERFETTO_DLOG("Unwinder::StartDataSource(%zu)", static_cast<size_t>(ds_id));
auto it_and_inserted = data_sources_.emplace(ds_id, DataSourceState{});
PERFETTO_DCHECK(it_and_inserted.second);
+
+ if (kernel_frames) {
+ kernel_symbolizer_.GetOrCreateKernelSymbolMap();
+ }
}
// c++11: use shared_ptr to transfer resource handles, so that the resources get
@@ -358,7 +366,12 @@
unwind = attempt_unwind();
}
- ret.frames.reserve(unwind.frames.size());
+ // Symbolize kernel-unwound kernel frames (if any).
+ std::vector<FrameData> kernel_frames = SymbolizeKernelCallchain(sample);
+
+ // Concatenate the kernel and userspace frames.
+ ret.frames = std::move(kernel_frames);
+ ret.frames.reserve(ret.frames.size() + unwind.frames.size());
for (unwindstack::FrameData& frame : unwind.frames) {
ret.frames.emplace_back(unwind_state->AnnotateFrame(std::move(frame)));
}
@@ -379,6 +392,38 @@
return ret;
}
+std::vector<FrameData> Unwinder::SymbolizeKernelCallchain(
+ const ParsedSample& sample) {
+ std::vector<FrameData> ret;
+ if (sample.kernel_ips.empty())
+ return ret;
+
+ // The list of addresses contains special context marker values (inserted by
+ // the kernel's unwinding) to indicate which section of the callchain belongs
+ // to the kernel/user mode (if the kernel can successfully unwind user
+ // stacks). In our case, we request only the kernel frames.
+ if (sample.kernel_ips[0] != PERF_CONTEXT_KERNEL) {
+ PERFETTO_DFATAL_OR_ELOG(
+ "Unexpected: 0th frame of callchain is not PERF_CONTEXT_KERNEL.");
+ return ret;
+ }
+
+ auto* kernel_map = kernel_symbolizer_.GetOrCreateKernelSymbolMap();
+ ret.reserve(sample.kernel_ips.size());
+ for (size_t i = 1; i < sample.kernel_ips.size(); i++) {
+ std::string function_name = kernel_map->Lookup(sample.kernel_ips[i]);
+
+ // Synthesise a partially-valid libunwindstack frame struct for the kernel
+ // frame. We reuse the type for convenience. The kernel frames are marked by
+ // a magical "kernel" string as their containing mapping.
+ unwindstack::FrameData frame{};
+ frame.function_name = std::move(function_name);
+ frame.map_name = "kernel";
+ ret.emplace_back(FrameData{std::move(frame), /*build_id=*/""});
+ }
+ return ret;
+}
+
void Unwinder::PostInitiateDataSourceStop(DataSourceInstanceID ds_id) {
task_runner_->PostTask([this, ds_id] { InitiateDataSourceStop(ds_id); });
}
@@ -414,8 +459,10 @@
data_sources_.erase(it);
// Clean up state if there are no more active sources.
- if (data_sources_.empty())
+ if (data_sources_.empty()) {
+ kernel_symbolizer_.Destroy();
ResetAndEnableUnwindstackCache();
+ }
// Inform service thread that the unwinder is done with the source.
delegate_->PostFinishDataSourceStop(ds_id);
diff --git a/src/profiling/perf/unwinding.h b/src/profiling/perf/unwinding.h
index 4b7b091..274d485 100644
--- a/src/profiling/perf/unwinding.h
+++ b/src/profiling/perf/unwinding.h
@@ -35,6 +35,10 @@
#include "src/profiling/perf/common_types.h"
#include "src/profiling/perf/unwind_queue.h"
+// TODO(rsavitski): move kallsyms code to a common location.
+#include "src/traced/probes/ftrace/kallsyms/kernel_symbol_map.h"
+#include "src/traced/probes/ftrace/kallsyms/lazy_kernel_symbolizer.h"
+
namespace perfetto {
namespace profiling {
@@ -85,7 +89,7 @@
~Unwinder() { PERFETTO_DCHECK_THREAD(thread_checker_); }
- void PostStartDataSource(DataSourceInstanceID ds_id);
+ void PostStartDataSource(DataSourceInstanceID ds_id, bool kernel_frames);
void PostAdoptProcDescriptors(DataSourceInstanceID ds_id,
pid_t pid,
base::ScopedFile maps_fd,
@@ -128,7 +132,8 @@
Unwinder(Delegate* delegate, base::UnixTaskRunner* task_runner);
// Marks the data source as valid and active at the unwinding stage.
- void StartDataSource(DataSourceInstanceID ds_id);
+ // Initializes kernel address symbolization if needed.
+ void StartDataSource(DataSourceInstanceID ds_id, bool kernel_frames);
void AdoptProcDescriptors(DataSourceInstanceID ds_id,
pid_t pid,
@@ -149,6 +154,9 @@
UnwindingMetadata* unwind_state,
bool pid_unwound_before);
+ // Returns a list of symbolized kernel frames in the sample (if any).
+ std::vector<FrameData> SymbolizeKernelCallchain(const ParsedSample& sample);
+
// Marks the data source as shutting down at the unwinding stage. It is known
// that no new samples for this source will be pushed into the queue, but we
// need to delay the unwinder state teardown until all previously-enqueued
@@ -196,6 +204,7 @@
Delegate* const delegate_;
UnwindQueue<UnwindEntry, kUnwindQueueCapacity> unwind_queue_;
std::map<DataSourceInstanceID, DataSourceState> data_sources_;
+ LazyKernelSymbolizer kernel_symbolizer_;
PERFETTO_THREAD_CHECKER(thread_checker_)
};