Merge "traced_perf: re-enable linux build + add x86 regs handling"
diff --git a/buildtools/BUILD.gn b/buildtools/BUILD.gn
index e15a5e5..2ef83f4 100644
--- a/buildtools/BUILD.gn
+++ b/buildtools/BUILD.gn
@@ -999,6 +999,14 @@
public_configs = [ ":libunwindstack_config" ]
}
+config("bionic_kernel_uapi_headers") {
+ visibility = _buildtools_visibility
+ cflags = [
+ "-isystem",
+ rebase_path("bionic/libc/kernel", root_build_dir),
+ ]
+}
+
config("jsoncpp_config") {
visibility = _buildtools_visibility
cflags = [
diff --git a/gn/BUILD.gn b/gn/BUILD.gn
index 0962c9b..b00f447 100644
--- a/gn/BUILD.gn
+++ b/gn/BUILD.gn
@@ -273,13 +273,20 @@
} # if (enable_perfetto_trace_processor_linenoise)
# Only used by src/profiling in standalone and android builds.
-if (enable_perfetto_heapprofd) {
+if (enable_perfetto_heapprofd || enable_perfetto_traced_perf) {
group("libunwindstack") {
public_configs = [ "//buildtools:libunwindstack_config" ]
public_deps = [ "//buildtools:libunwindstack" ]
}
}
+# Used by src/profiling/perf for perf_regs.h.
+if (enable_perfetto_traced_perf) {
+ group("bionic_kernel_uapi_headers") {
+ public_configs = [ "//buildtools:bionic_kernel_uapi_headers" ]
+ }
+}
+
# Zlib is used both by trace_processor and by perfetto_cmd.
if (enable_perfetto_zlib) {
group("zlib") {
diff --git a/gn/perfetto.gni b/gn/perfetto.gni
index 828a493..e6ed051 100644
--- a/gn/perfetto.gni
+++ b/gn/perfetto.gni
@@ -153,7 +153,11 @@
# Build the perf event profiler (traced_perf).
# TODO(b/144281346): under development.
- enable_perfetto_traced_perf = perfetto_build_with_android
+ # TODO(rsavitski): figure out how to make the android-core dependencies build
+ # under gcc (_Atomic and other issues).
+ enable_perfetto_traced_perf =
+ perfetto_build_with_android ||
+ (perfetto_build_standalone && is_clang && is_linux)
# The Trace Processor: offline analytical engine to process traces and compute
# metrics using a SQL engine.
diff --git a/src/profiling/perf/BUILD.gn b/src/profiling/perf/BUILD.gn
index 829ece9..80b0cb8 100644
--- a/src/profiling/perf/BUILD.gn
+++ b/src/profiling/perf/BUILD.gn
@@ -18,9 +18,6 @@
assert(enable_perfetto_traced_perf)
-# TODO(rsavitski): only building in-tree at the moment (so this build file is
-# only used for gen_android_bp, expect bitrot).
-
executable("traced_perf") {
deps = [
":traced_perf_main",
@@ -43,8 +40,11 @@
}
source_set("producer") {
- deps = [
+ public_deps = [
":unwind_support",
+ "../../../include/perfetto/tracing/core",
+ ]
+ deps = [
"../../../gn:default_deps",
"../../../protos/perfetto/config:cpp",
"../../../protos/perfetto/config/profiling:zero",
@@ -53,7 +53,6 @@
"../../../src/base:unix_socket",
"../../../src/tracing/ipc/producer",
]
- public_deps = [ "../../../include/perfetto/tracing/core" ]
sources = [
"event_config.h",
"event_reader.cc",
@@ -64,9 +63,10 @@
}
source_set("unwind_support") {
+ public_deps = [ "../../../gn:libunwindstack" ]
deps = [
+ "../../../gn:bionic_kernel_uapi_headers",
"../../../gn:default_deps",
- "../../../gn:libunwindstack",
"../../../src/base",
]
sources = [
diff --git a/src/profiling/perf/event_reader.cc b/src/profiling/perf/event_reader.cc
index a2d0e87..f7b297f 100644
--- a/src/profiling/perf/event_reader.cc
+++ b/src/profiling/perf/event_reader.cc
@@ -56,7 +56,8 @@
// cpu-scoped?
base::ScopedFile PerfEventOpen(const EventConfig& event_cfg) {
base::ScopedFile perf_fd{
- perf_event_open(event_cfg.perf_attr(), /*pid=*/-1, event_cfg.target_cpu(),
+ perf_event_open(event_cfg.perf_attr(), /*pid=*/-1,
+ static_cast<int>(event_cfg.target_cpu()),
/*group_fd=*/-1, PERF_FLAG_FD_CLOEXEC)};
return perf_fd;
}
diff --git a/src/profiling/perf/unwind_support.cc b/src/profiling/perf/unwind_support.cc
index 47dc3c0..a3cef1b 100644
--- a/src/profiling/perf/unwind_support.cc
+++ b/src/profiling/perf/unwind_support.cc
@@ -28,13 +28,13 @@
#include <unwindstack/Regs.h>
#include <unwindstack/RegsArm.h>
#include <unwindstack/RegsArm64.h>
+#include <unwindstack/RegsX86.h>
+#include <unwindstack/RegsX86_64.h>
#include <unwindstack/UserArm.h>
#include <unwindstack/UserArm64.h>
+#include <unwindstack/UserX86.h>
+#include <unwindstack/UserX86_64.h>
-// TODO(rsavitski): this includes the kernel uapi constant definitions (for
-// register sampling). For now hardcoded for in-tree builds (specifically,
-// bionic/include/kernel/). Standalone builds will need to source the headers
-// from elsewhere (without depending on the host machine's system headers).
#include <uapi/asm-arm/asm/perf_regs.h>
#include <uapi/asm-x86/asm/perf_regs.h>
#define perf_event_arm_regs perf_event_arm64_regs
@@ -46,6 +46,10 @@
namespace {
+constexpr size_t constexpr_max(size_t x, size_t y) {
+ return x > y ? x : y;
+}
+
template <typename T>
const char* ReadValue(T* value_out, const char* ptr) {
memcpy(value_out, reinterpret_cast<const void*>(ptr), sizeof(T));
@@ -57,17 +61,30 @@
// * 64 bit daemon, mixed bitness userspace
// Therefore give the kernel the mask corresponding to our build architecture.
// Register parsing handles the mixed userspace ABI cases.
+// For simplicity, we ask for as many registers as we can, even if not all of
+// them will be used during unwinding.
// TODO(rsavitski): cleanly detect 32 bit builds being side-loaded onto a system
// with 64 bit userspace processes.
uint64_t PerfUserRegsMask(unwindstack::ArchEnum arch) {
- // TODO(rsavitski): support the rest of the architectures.
- switch (arch) {
+ switch (static_cast<uint8_t>(arch)) { // cast to please -Wswitch-enum
case unwindstack::ARCH_ARM64:
return (1ULL << PERF_REG_ARM64_MAX) - 1;
case unwindstack::ARCH_ARM:
return ((1ULL << PERF_REG_ARM_MAX) - 1);
+ // perf on x86_64 doesn't allow sampling ds/es/fs/gs registers. See
+ // arch/x86/kernel/perf_regs.c in the kernel.
+ case unwindstack::ARCH_X86_64:
+ return (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~(1ULL << PERF_REG_X86_DS) &
+ ~(1ULL << PERF_REG_X86_ES) & ~(1ULL << PERF_REG_X86_FS) &
+ ~(1ULL << PERF_REG_X86_GS));
+ // Note: excluding these segment registers might not be necessary on x86,
+ // but they won't be used anyway (so follow x64).
+ case unwindstack::ARCH_X86:
+ return ((1ULL << PERF_REG_X86_32_MAX) - 1) & ~(1ULL << PERF_REG_X86_DS) &
+ ~(1ULL << PERF_REG_X86_ES) & ~(1ULL << PERF_REG_X86_FS) &
+ ~(1ULL << PERF_REG_X86_GS);
default:
- PERFETTO_FATAL("Unsupported architecture (work in progress)");
+ PERFETTO_FATAL("Unsupported architecture");
}
}
@@ -86,59 +103,100 @@
// Register values as an array, indexed using the kernel uapi perf_events.h enum
// values. Unsampled values will be left as zeroes.
-// TODO(rsavitski): support all relevant architectures (allocate enough space
-// for the widest register bank).
struct RawRegisterData {
- static constexpr uint64_t kMaxSize = PERF_REG_ARM64_MAX;
+ static constexpr uint64_t kMaxSize =
+ constexpr_max(PERF_REG_ARM64_MAX,
+ constexpr_max(PERF_REG_ARM_MAX, PERF_REG_X86_64_MAX));
uint64_t regs[kMaxSize] = {};
};
+// First converts the |RawRegisterData| array to libunwindstack's "user"
+// register structs (which match the ptrace/coredump format, also available at
+// <sys/user.h>), then constructs the relevant unwindstack::Regs subclass out
+// of the latter.
std::unique_ptr<unwindstack::Regs> ToLibUnwindstackRegs(
const RawRegisterData& raw_regs,
unwindstack::ArchEnum arch) {
- // First converts the |RawRegisterData| array to libunwindstack's raw register
- // format, then constructs the relevant unwindstack::Regs subclass out of the
- // latter.
if (arch == unwindstack::ARCH_ARM64) {
static_assert(static_cast<int>(unwindstack::ARM64_REG_R0) ==
- static_cast<int>(PERF_REG_ARM64_X0),
+ static_cast<int>(PERF_REG_ARM64_X0) &&
+ static_cast<int>(unwindstack::ARM64_REG_R0) == 0,
"register layout mismatch");
static_assert(static_cast<int>(unwindstack::ARM64_REG_R30) ==
static_cast<int>(PERF_REG_ARM64_LR),
"register layout mismatch");
-
- unwindstack::arm64_user_regs arm64_user_regs;
- memset(&arm64_user_regs, 0, sizeof(arm64_user_regs));
- memcpy(&arm64_user_regs.regs[unwindstack::ARM64_REG_R0],
- &raw_regs.regs[PERF_REG_ARM64_X0],
- sizeof(uint64_t) * (PERF_REG_ARM64_LR - PERF_REG_ARM64_X0 + 1));
+ // Both the perf_event register order and the "user" format are derived from
+ // "struct pt_regs", so we can directly memcpy the first 31 regs (up to and
+ // including LR).
+ unwindstack::arm64_user_regs arm64_user_regs = {};
+ memcpy(&arm64_user_regs.regs[0], &raw_regs.regs[0],
+ sizeof(uint64_t) * (PERF_REG_ARM64_LR + 1));
arm64_user_regs.sp = raw_regs.regs[PERF_REG_ARM64_SP];
arm64_user_regs.pc = raw_regs.regs[PERF_REG_ARM64_PC];
-
return std::unique_ptr<unwindstack::Regs>(
unwindstack::RegsArm64::Read(&arm64_user_regs));
}
if (arch == unwindstack::ARCH_ARM) {
static_assert(static_cast<int>(unwindstack::ARM_REG_R0) ==
- static_cast<int>(PERF_REG_ARM_R0),
+ static_cast<int>(PERF_REG_ARM_R0) &&
+ static_cast<int>(unwindstack::ARM_REG_R0) == 0,
"register layout mismatch");
static_assert(static_cast<int>(unwindstack::ARM_REG_LAST) ==
static_cast<int>(PERF_REG_ARM_MAX),
"register layout mismatch");
-
- unwindstack::arm_user_regs arm_user_regs;
- memset(&arm_user_regs, 0, sizeof(arm_user_regs));
- for (size_t i = unwindstack::ARM_REG_R0; i < unwindstack::ARM_REG_LAST;
- i++) {
+ // As with arm64, the layouts match, but we need to downcast to u32.
+ unwindstack::arm_user_regs arm_user_regs = {};
+ for (size_t i = 0; i < unwindstack::ARM_REG_LAST; i++) {
arm_user_regs.regs[i] = static_cast<uint32_t>(raw_regs.regs[i]);
}
-
return std::unique_ptr<unwindstack::Regs>(
unwindstack::RegsArm::Read(&arm_user_regs));
}
- PERFETTO_FATAL("Unsupported architecture (work in progress)");
+ if (arch == unwindstack::ARCH_X86_64) {
+ // We've sampled more registers than what libunwindstack will use. Don't
+ // copy over cs/ss/flags.
+ unwindstack::x86_64_user_regs x86_64_user_regs = {};
+ x86_64_user_regs.rax = raw_regs.regs[PERF_REG_X86_AX];
+ x86_64_user_regs.rbx = raw_regs.regs[PERF_REG_X86_BX];
+ x86_64_user_regs.rcx = raw_regs.regs[PERF_REG_X86_CX];
+ x86_64_user_regs.rdx = raw_regs.regs[PERF_REG_X86_DX];
+ x86_64_user_regs.r8 = raw_regs.regs[PERF_REG_X86_R8];
+ x86_64_user_regs.r9 = raw_regs.regs[PERF_REG_X86_R9];
+ x86_64_user_regs.r10 = raw_regs.regs[PERF_REG_X86_R10];
+ x86_64_user_regs.r11 = raw_regs.regs[PERF_REG_X86_R11];
+ x86_64_user_regs.r12 = raw_regs.regs[PERF_REG_X86_R12];
+ x86_64_user_regs.r13 = raw_regs.regs[PERF_REG_X86_R13];
+ x86_64_user_regs.r14 = raw_regs.regs[PERF_REG_X86_R14];
+ x86_64_user_regs.r15 = raw_regs.regs[PERF_REG_X86_R15];
+ x86_64_user_regs.rdi = raw_regs.regs[PERF_REG_X86_DI];
+ x86_64_user_regs.rsi = raw_regs.regs[PERF_REG_X86_SI];
+ x86_64_user_regs.rbp = raw_regs.regs[PERF_REG_X86_BP];
+ x86_64_user_regs.rsp = raw_regs.regs[PERF_REG_X86_SP];
+ x86_64_user_regs.rip = raw_regs.regs[PERF_REG_X86_IP];
+ return std::unique_ptr<unwindstack::Regs>(
+ unwindstack::RegsX86_64::Read(&x86_64_user_regs));
+ }
+
+ if (arch == unwindstack::ARCH_X86) {
+ // We've sampled more registers than what libunwindstack will use. Don't
+ // copy over cs/ss/flags.
+ unwindstack::x86_user_regs x86_user_regs = {};
+ x86_user_regs.eax = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_AX]);
+ x86_user_regs.ebx = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_BX]);
+ x86_user_regs.ecx = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_CX]);
+ x86_user_regs.edx = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_DX]);
+ x86_user_regs.ebp = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_BP]);
+ x86_user_regs.edi = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_DI]);
+ x86_user_regs.esi = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_SI]);
+ x86_user_regs.esp = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_SP]);
+ x86_user_regs.eip = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_IP]);
+ return std::unique_ptr<unwindstack::Regs>(
+ unwindstack::RegsX86::Read(&x86_user_regs));
+ }
+
+ PERFETTO_FATAL("Unsupported architecture");
}
} // namespace
@@ -164,7 +222,7 @@
RawRegisterData raw_regs{};
uint64_t regs_mask = PerfUserRegsMaskForCurrentArch();
for (size_t i = 0; regs_mask && (i < RawRegisterData::kMaxSize); i++) {
- if (regs_mask & (1u << i)) {
+ if (regs_mask & (1ULL << i)) {
parse_pos = ReadValue(&raw_regs.regs[i], parse_pos);
}
}
@@ -178,6 +236,7 @@
// the PC into the R15 slot, and treat the resulting RawRegisterData as an
// arm32 register bank. See "Fundamentals of ARMv8-A" (ARM DOC
// 100878_0100_en), page 28.
+ // x86-64 doesn't need any such fixups.
if (requested_arch == unwindstack::ARCH_ARM64 &&
sampled_abi == PERF_SAMPLE_REGS_ABI_32) {
raw_regs.regs[PERF_REG_ARM_PC] = raw_regs.regs[PERF_REG_ARM64_PC];
diff --git a/tools/gen_android_bp b/tools/gen_android_bp
index bf15131..7c1c444 100755
--- a/tools/gen_android_bp
+++ b/tools/gen_android_bp
@@ -171,9 +171,6 @@
('required', {'libperfetto_android_internal', 'trigger_perfetto'}),
],
'libperfetto_android_internal': [('static_libs', {'libhealthhalutils'}),],
- 'traced_perf': [
- ('include_dirs', {'bionic/libc/kernel'}),
- ],
'trace_processor_shell': [
('dist', {'targets': ['sdk_repo']}),
('stl', 'libc++_static'),
@@ -231,6 +228,10 @@
module.shared_libs.add('libz')
+def enable_uapi_headers(module):
+ module.include_dirs.add('bionic/libc/kernel')
+
+
# Android equivalents for third-party libraries that the upstream project
# depends on.
builtin_deps = {
@@ -245,6 +246,7 @@
'//gn:libunwindstack': enable_libunwindstack,
'//gn:sqlite': enable_sqlite,
'//gn:zlib': enable_zlib,
+ '//gn:bionic_kernel_uapi_headers' : enable_uapi_headers,
}
# ----------------------------------------------------------------------------