perf: record unwinding errors, pace ticks, make rbuf size configurable
Might need to do randomized per-DS offsets for the ticks (as to not
align them with other such patterns, including the ReadTick in
traced_probes). But will see how big of a problem that is.
Bug: 144281346
Change-Id: I1e457d35211d11f0537d3d9755326844e5f45d2c
diff --git a/src/profiling/memory/unwinding.cc b/src/profiling/memory/unwinding.cc
index 752e9d4..26a393a 100644
--- a/src/profiling/memory/unwinding.cc
+++ b/src/profiling/memory/unwinding.cc
@@ -180,7 +180,7 @@
frame_data.function_name = "ERROR " + std::to_string(error_code);
frame_data.map_name = "ERROR";
- out->frames.emplace_back(frame_data, "");
+ out->frames.emplace_back(std::move(frame_data), "");
out->error = true;
}
return true;
diff --git a/src/profiling/perf/event_config.h b/src/profiling/perf/event_config.h
index b601b33..5749158 100644
--- a/src/profiling/perf/event_config.h
+++ b/src/profiling/perf/event_config.h
@@ -40,8 +40,6 @@
// Describes a single profiling configuration. Bridges the gap between the data
// source config proto, and the raw "perf_event_attr" structs to pass to the
// perf_event_open syscall.
-// Also, for non-sample events (if they're possible), union of structs is
-// interesting.
class EventConfig {
public:
static base::Optional<EventConfig> Create(const DataSourceConfig& ds_config) {
@@ -52,6 +50,7 @@
}
uint32_t target_all_cpus() const { return target_all_cpus_; }
+ size_t ring_buffer_pages() const { return ring_buffer_pages_; }
perf_event_attr* perf_attr() const {
return const_cast<perf_event_attr*>(&perf_event_attr_);
@@ -59,7 +58,8 @@
private:
EventConfig(const protos::pbzero::PerfEventConfig::Decoder& cfg)
- : target_all_cpus_(cfg.all_cpus()) {
+ : target_all_cpus_(cfg.all_cpus()),
+ ring_buffer_pages_(cfg.ring_buffer_pages()) {
auto& pe = perf_event_attr_;
pe.size = sizeof(perf_event_attr);
@@ -88,6 +88,11 @@
// If true, process all system-wide samples.
const bool target_all_cpus_;
+ // Size (in 4k pages) of each per-cpu ring buffer shared with the kernel. If
+ // zero, |EventReader| will choose a default value. Must be a power of two
+ // otherwise.
+ const size_t ring_buffer_pages_;
+
// TODO(rsavitski): if we allow for event groups containing multiple sampled
// counters, we'll need to vary the .type & .config fields per
// perf_event_open.
diff --git a/src/profiling/perf/event_reader.cc b/src/profiling/perf/event_reader.cc
index d381eb7..e7642bb 100644
--- a/src/profiling/perf/event_reader.cc
+++ b/src/profiling/perf/event_reader.cc
@@ -31,7 +31,7 @@
namespace {
-constexpr size_t kDataPagesPerRingBuffer = 256; // 1 MB (256 x 4k pages)
+constexpr size_t kDefaultDataPagesPerRingBuffer = 256; // 1 MB (256 x 4k pages)
template <typename T>
const char* ReadValue(T* value_out, const char* ptr) {
@@ -93,7 +93,7 @@
int perf_fd,
size_t data_page_count) {
// perf_event_open requires the ring buffer to be a power of two in size.
- PERFETTO_CHECK(IsPowerOfTwo(data_page_count));
+ PERFETTO_DCHECK(IsPowerOfTwo(data_page_count));
PerfRingBuffer ret;
@@ -105,7 +105,7 @@
void* mmap_addr = mmap(nullptr, ret.mmap_sz_, PROT_READ | PROT_WRITE,
MAP_SHARED, perf_fd, 0);
if (mmap_addr == MAP_FAILED) {
- PERFETTO_PLOG("failed mmap (check perf_event_mlock_kb in procfs)");
+ PERFETTO_PLOG("failed mmap");
return base::nullopt;
}
@@ -220,8 +220,18 @@
return base::nullopt;
}
- auto ring_buffer =
- PerfRingBuffer::Allocate(perf_fd.get(), kDataPagesPerRingBuffer);
+ // choose a reasonable ring buffer size
+ size_t ring_buffer_pages = kDefaultDataPagesPerRingBuffer;
+ size_t config_pages = event_cfg.ring_buffer_pages();
+ if (config_pages) {
+ if (!IsPowerOfTwo(config_pages)) {
+ PERFETTO_ELOG("kernel buffer size must be a power of two pages");
+ return base::nullopt;
+ }
+ ring_buffer_pages = config_pages;
+ }
+
+ auto ring_buffer = PerfRingBuffer::Allocate(perf_fd.get(), ring_buffer_pages);
if (!ring_buffer.has_value()) {
return base::nullopt;
}
diff --git a/src/profiling/perf/event_reader.h b/src/profiling/perf/event_reader.h
index 7533798..870b2d5 100644
--- a/src/profiling/perf/event_reader.h
+++ b/src/profiling/perf/event_reader.h
@@ -96,6 +96,8 @@
void PauseEvents();
+ uint32_t cpu() const { return cpu_; }
+
~EventReader() = default;
// move-only
diff --git a/src/profiling/perf/perf_producer.cc b/src/profiling/perf/perf_producer.cc
index fdc7d44..a828dc6 100644
--- a/src/profiling/perf/perf_producer.cc
+++ b/src/profiling/perf/perf_producer.cc
@@ -20,6 +20,7 @@
#include <unistd.h>
+#include <unwindstack/Error.h>
#include <unwindstack/Unwinder.h>
#include "perfetto/base/logging.h"
@@ -62,6 +63,10 @@
return static_cast<size_t>(sysconf(_SC_NPROCESSORS_CONF));
}
+uint64_t NowMs() {
+ return static_cast<uint64_t>(base::GetWallTimeMs().count());
+}
+
protos::pbzero::CpuMode ToCpuModeEnum(uint16_t perf_cpu_mode) {
using CpuMode = protos::pbzero::CpuMode;
switch (perf_cpu_mode) {
@@ -80,6 +85,30 @@
}
}
+protos::pbzero::StackUnwindError ToProtoEnum(
+ unwindstack::ErrorCode error_code) {
+ using StackUnwindError = protos::pbzero::StackUnwindError;
+ switch (error_code) {
+ case unwindstack::ERROR_NONE:
+ return StackUnwindError::UNWIND_ERROR_NONE;
+ case unwindstack::ERROR_MEMORY_INVALID:
+ return StackUnwindError::UNWIND_ERROR_MEMORY_INVALID;
+ case unwindstack::ERROR_UNWIND_INFO:
+ return StackUnwindError::UNWIND_ERROR_UNWIND_INFO;
+ case unwindstack::ERROR_UNSUPPORTED:
+ return StackUnwindError::UNWIND_ERROR_UNSUPPORTED;
+ case unwindstack::ERROR_INVALID_MAP:
+ return StackUnwindError::UNWIND_ERROR_INVALID_MAP;
+ case unwindstack::ERROR_MAX_FRAMES_EXCEEDED:
+ return StackUnwindError::UNWIND_ERROR_MAX_FRAMES_EXCEEDED;
+ case unwindstack::ERROR_REPEATED_FRAME:
+ return StackUnwindError::UNWIND_ERROR_REPEATED_FRAME;
+ case unwindstack::ERROR_INVALID_ELF:
+ return StackUnwindError::UNWIND_ERROR_INVALID_ELF;
+ }
+ return StackUnwindError::UNWIND_ERROR_UNKNOWN;
+}
+
} // namespace
PerfProducer::PerfProducer(ProcDescriptorGetter* proc_fd_getter,
@@ -149,7 +178,7 @@
if (weak_this)
weak_this->TickDataSourceRead(instance_id);
},
- kReadTickPeriodMs);
+ kReadTickPeriodMs - (NowMs() % kReadTickPeriodMs));
// Set up unwind queue and kick off a periodic task to process it.
unwind_queues_.emplace(instance_id, std::deque<UnwindEntry>{});
@@ -158,7 +187,7 @@
if (weak_this)
weak_this->TickDataSourceUnwind(instance_id);
},
- kUnwindTickPeriodMs);
+ kUnwindTickPeriodMs - (NowMs() % kUnwindTickPeriodMs));
}
void PerfProducer::StopDataSource(DataSourceInstanceID instance_id) {
@@ -218,7 +247,7 @@
if (weak_this)
weak_this->TickDataSourceRead(ds_id);
},
- kReadTickPeriodMs);
+ kReadTickPeriodMs - (NowMs() % kReadTickPeriodMs));
}
}
@@ -229,9 +258,11 @@
using Status = DataSource::ProcDescriptors::Status;
// TODO(rsavitski): record the loss in the trace.
- auto lost_events_callback = [ds_id](uint64_t lost_events) {
- PERFETTO_ELOG("DataSource instance [%zu] lost [%" PRIu64 "] events",
- static_cast<size_t>(ds_id), lost_events);
+ size_t cpu = reader->cpu();
+ auto lost_events_callback = [ds_id, cpu](uint64_t lost_events) {
+ PERFETTO_ELOG("DataSource instance [%zu] lost [%" PRIu64
+ "] events on cpu [%zu]",
+ static_cast<size_t>(ds_id), lost_events, cpu);
};
for (size_t i = 0; i < max_samples; i++) {
@@ -241,17 +272,13 @@
return false; // caught up to the writer
}
- pid_t pid = sample->pid;
if (!sample->regs) {
- // TODO(rsavitski): don't discard if/when doing stackless events.
- if (pid) { // kernel threads not worth logging (never have regs)
- PERFETTO_DLOG("Dropping sample without register data for pid [%d]",
- static_cast<int>(pid));
- }
+ // skip kernel threads/workers
continue;
}
// Request proc-fds for the process if this is the first time we see it yet.
+ pid_t pid = sample->pid;
auto& fd_entry = ds->proc_fds[pid]; // created if absent
if (fd_entry.status == Status::kInitial) {
@@ -361,7 +388,7 @@
if (weak_this)
weak_this->TickDataSourceUnwind(ds_id);
},
- kUnwindTickPeriodMs);
+ kUnwindTickPeriodMs - (NowMs() % kUnwindTickPeriodMs));
}
}
@@ -457,7 +484,7 @@
// Unwindstack clobbers registers, so make a copy in case we need to retry.
auto working_regs = std::unique_ptr<unwindstack::Regs>{sample.regs->Clone()};
- uint8_t error_code = unwindstack::ERROR_NONE;
+ unwindstack::ErrorCode error_code = unwindstack::ERROR_NONE;
unwindstack::Unwinder unwinder(kUnwindingMaxFrames, &unwind_state.fd_maps,
working_regs.get(), overlay_memory);
@@ -484,10 +511,17 @@
ret.frames.emplace_back(unwind_state.AnnotateFrame(std::move(frame)));
}
- // TODO(rsavitski): we still get a partially-unwound stack on most errors,
- // consider adding a synthetic "error frame" like heapprofd.
- if (error_code != unwindstack::ERROR_NONE)
- ret.unwind_error = true;
+ // In case of an unwinding error, add a synthetic error frame (which will
+ // appear as a caller of the partially-unwound fragment), for easier
+ // visualization of errors.
+ if (error_code != unwindstack::ERROR_NONE) {
+ PERFETTO_DLOG("Unwinding error %" PRIu8, error_code);
+ unwindstack::FrameData frame_data{};
+ frame_data.function_name = "ERROR " + std::to_string(error_code);
+ frame_data.map_name = "ERROR";
+ ret.frames.emplace_back(std::move(frame_data), /*build_id=*/"");
+ ret.unwind_error = error_code;
+ }
return ret;
}
@@ -534,6 +568,9 @@
perf_sample->set_tid(static_cast<uint32_t>(sample.tid));
perf_sample->set_callstack_iid(callstack_iid);
perf_sample->set_cpu_mode(ToCpuModeEnum(sample.cpu_mode));
+ if (sample.unwind_error != unwindstack::ERROR_NONE) {
+ perf_sample->set_unwind_error(ToProtoEnum(sample.unwind_error));
+ }
}
void PerfProducer::InitiateReaderStop(DataSource* ds) {
diff --git a/src/profiling/perf/perf_producer.h b/src/profiling/perf/perf_producer.h
index d4d48fe..106b780 100644
--- a/src/profiling/perf/perf_producer.h
+++ b/src/profiling/perf/perf_producer.h
@@ -23,6 +23,7 @@
#include <unistd.h>
+#include <unwindstack/Error.h>
#include <unwindstack/Regs.h>
#include "perfetto/base/task_runner.h"
@@ -147,7 +148,7 @@
uint64_t timestamp = 0;
uint16_t cpu_mode = PERF_RECORD_MISC_CPUMODE_UNKNOWN;
std::vector<FrameData> frames;
- bool unwind_error = false;
+ unwindstack::ErrorCode unwind_error = unwindstack::ERROR_NONE;
};
void ConnectService();