Merge "traced_perf: re-enable linux build + add x86 regs handling"
diff --git a/buildtools/BUILD.gn b/buildtools/BUILD.gn
index e15a5e5..2ef83f4 100644
--- a/buildtools/BUILD.gn
+++ b/buildtools/BUILD.gn
@@ -999,6 +999,14 @@
   public_configs = [ ":libunwindstack_config" ]
 }
 
+config("bionic_kernel_uapi_headers") {
+  visibility = _buildtools_visibility
+  cflags = [
+    "-isystem",
+    rebase_path("bionic/libc/kernel", root_build_dir),
+  ]
+}
+
 config("jsoncpp_config") {
   visibility = _buildtools_visibility
   cflags = [
diff --git a/gn/BUILD.gn b/gn/BUILD.gn
index 0962c9b..b00f447 100644
--- a/gn/BUILD.gn
+++ b/gn/BUILD.gn
@@ -273,13 +273,20 @@
 }  # if (enable_perfetto_trace_processor_linenoise)
 
 # Only used by src/profiling in standalone and android builds.
-if (enable_perfetto_heapprofd) {
+if (enable_perfetto_heapprofd || enable_perfetto_traced_perf) {
   group("libunwindstack") {
     public_configs = [ "//buildtools:libunwindstack_config" ]
     public_deps = [ "//buildtools:libunwindstack" ]
   }
 }
 
+# Used by src/profiling/perf for perf_regs.h.
+if (enable_perfetto_traced_perf) {
+  group("bionic_kernel_uapi_headers") {
+    public_configs = [ "//buildtools:bionic_kernel_uapi_headers" ]
+  }
+}
+
 # Zlib is used both by trace_processor and by perfetto_cmd.
 if (enable_perfetto_zlib) {
   group("zlib") {
diff --git a/gn/perfetto.gni b/gn/perfetto.gni
index 828a493..e6ed051 100644
--- a/gn/perfetto.gni
+++ b/gn/perfetto.gni
@@ -153,7 +153,11 @@
 
   # Build the perf event profiler (traced_perf).
   # TODO(b/144281346): under development.
-  enable_perfetto_traced_perf = perfetto_build_with_android
+  # TODO(rsavitski): figure out how to make the android-core dependencies build
+  # under gcc (_Atomic and other issues).
+  enable_perfetto_traced_perf =
+      perfetto_build_with_android ||
+      (perfetto_build_standalone && is_clang && is_linux)
 
   # The Trace Processor: offline analytical engine to process traces and compute
   # metrics using a SQL engine.
diff --git a/src/profiling/perf/BUILD.gn b/src/profiling/perf/BUILD.gn
index 829ece9..80b0cb8 100644
--- a/src/profiling/perf/BUILD.gn
+++ b/src/profiling/perf/BUILD.gn
@@ -18,9 +18,6 @@
 
 assert(enable_perfetto_traced_perf)
 
-# TODO(rsavitski): only building in-tree at the moment (so this build file is
-# only used for gen_android_bp, expect bitrot).
-
 executable("traced_perf") {
   deps = [
     ":traced_perf_main",
@@ -43,8 +40,11 @@
 }
 
 source_set("producer") {
-  deps = [
+  public_deps = [
     ":unwind_support",
+    "../../../include/perfetto/tracing/core",
+  ]
+  deps = [
     "../../../gn:default_deps",
     "../../../protos/perfetto/config:cpp",
     "../../../protos/perfetto/config/profiling:zero",
@@ -53,7 +53,6 @@
     "../../../src/base:unix_socket",
     "../../../src/tracing/ipc/producer",
   ]
-  public_deps = [ "../../../include/perfetto/tracing/core" ]
   sources = [
     "event_config.h",
     "event_reader.cc",
@@ -64,9 +63,10 @@
 }
 
 source_set("unwind_support") {
+  public_deps = [ "../../../gn:libunwindstack" ]
   deps = [
+    "../../../gn:bionic_kernel_uapi_headers",
     "../../../gn:default_deps",
-    "../../../gn:libunwindstack",
     "../../../src/base",
   ]
   sources = [
diff --git a/src/profiling/perf/event_reader.cc b/src/profiling/perf/event_reader.cc
index a2d0e87..f7b297f 100644
--- a/src/profiling/perf/event_reader.cc
+++ b/src/profiling/perf/event_reader.cc
@@ -56,7 +56,8 @@
 // cpu-scoped?
 base::ScopedFile PerfEventOpen(const EventConfig& event_cfg) {
   base::ScopedFile perf_fd{
-      perf_event_open(event_cfg.perf_attr(), /*pid=*/-1, event_cfg.target_cpu(),
+      perf_event_open(event_cfg.perf_attr(), /*pid=*/-1,
+                      static_cast<int>(event_cfg.target_cpu()),
                       /*group_fd=*/-1, PERF_FLAG_FD_CLOEXEC)};
   return perf_fd;
 }
diff --git a/src/profiling/perf/unwind_support.cc b/src/profiling/perf/unwind_support.cc
index 47dc3c0..a3cef1b 100644
--- a/src/profiling/perf/unwind_support.cc
+++ b/src/profiling/perf/unwind_support.cc
@@ -28,13 +28,13 @@
 #include <unwindstack/Regs.h>
 #include <unwindstack/RegsArm.h>
 #include <unwindstack/RegsArm64.h>
+#include <unwindstack/RegsX86.h>
+#include <unwindstack/RegsX86_64.h>
 #include <unwindstack/UserArm.h>
 #include <unwindstack/UserArm64.h>
+#include <unwindstack/UserX86.h>
+#include <unwindstack/UserX86_64.h>
 
-// TODO(rsavitski): this includes the kernel uapi constant definitions (for
-// register sampling). For now hardcoded for in-tree builds (specifically,
-// bionic/include/kernel/). Standalone builds will need to source the headers
-// from elsewhere (without depending on the host machine's system headers).
 #include <uapi/asm-arm/asm/perf_regs.h>
 #include <uapi/asm-x86/asm/perf_regs.h>
 #define perf_event_arm_regs perf_event_arm64_regs
@@ -46,6 +46,10 @@
 
 namespace {
 
+constexpr size_t constexpr_max(size_t x, size_t y) {
+  return x > y ? x : y;
+}
+
 template <typename T>
 const char* ReadValue(T* value_out, const char* ptr) {
   memcpy(value_out, reinterpret_cast<const void*>(ptr), sizeof(T));
@@ -57,17 +61,30 @@
 // * 64 bit daemon, mixed bitness userspace
 // Therefore give the kernel the mask corresponding to our build architecture.
 // Register parsing handles the mixed userspace ABI cases.
+// For simplicity, we ask for as many registers as we can, even if not all of
+// them will be used during unwinding.
 // TODO(rsavitski): cleanly detect 32 bit builds being side-loaded onto a system
 // with 64 bit userspace processes.
 uint64_t PerfUserRegsMask(unwindstack::ArchEnum arch) {
-  // TODO(rsavitski): support the rest of the architectures.
-  switch (arch) {
+  switch (static_cast<uint8_t>(arch)) {  // cast to please -Wswitch-enum
     case unwindstack::ARCH_ARM64:
       return (1ULL << PERF_REG_ARM64_MAX) - 1;
     case unwindstack::ARCH_ARM:
       return ((1ULL << PERF_REG_ARM_MAX) - 1);
+    // perf on x86_64 doesn't allow sampling ds/es/fs/gs registers. See
+    // arch/x86/kernel/perf_regs.c in the kernel.
+    case unwindstack::ARCH_X86_64:
+      return (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~(1ULL << PERF_REG_X86_DS) &
+              ~(1ULL << PERF_REG_X86_ES) & ~(1ULL << PERF_REG_X86_FS) &
+              ~(1ULL << PERF_REG_X86_GS));
+    // Note: excluding these segment registers might not be necessary on x86,
+    // but they won't be used anyway (so follow x64).
+    case unwindstack::ARCH_X86:
+      return ((1ULL << PERF_REG_X86_32_MAX) - 1) & ~(1ULL << PERF_REG_X86_DS) &
+             ~(1ULL << PERF_REG_X86_ES) & ~(1ULL << PERF_REG_X86_FS) &
+             ~(1ULL << PERF_REG_X86_GS);
     default:
-      PERFETTO_FATAL("Unsupported architecture (work in progress)");
+      PERFETTO_FATAL("Unsupported architecture");
   }
 }
 
@@ -86,59 +103,100 @@
 
 // Register values as an array, indexed using the kernel uapi perf_events.h enum
 // values. Unsampled values will be left as zeroes.
-// TODO(rsavitski): support all relevant architectures (allocate enough space
-// for the widest register bank).
 struct RawRegisterData {
-  static constexpr uint64_t kMaxSize = PERF_REG_ARM64_MAX;
+  static constexpr uint64_t kMaxSize =
+      constexpr_max(PERF_REG_ARM64_MAX,
+                    constexpr_max(PERF_REG_ARM_MAX, PERF_REG_X86_64_MAX));
   uint64_t regs[kMaxSize] = {};
 };
 
+// First converts the |RawRegisterData| array to libunwindstack's "user"
+// register structs (which match the ptrace/coredump format, also available at
+// <sys/user.h>), then constructs the relevant unwindstack::Regs subclass out
+// of the latter.
 std::unique_ptr<unwindstack::Regs> ToLibUnwindstackRegs(
     const RawRegisterData& raw_regs,
     unwindstack::ArchEnum arch) {
-  // First converts the |RawRegisterData| array to libunwindstack's raw register
-  // format, then constructs the relevant unwindstack::Regs subclass out of the
-  // latter.
   if (arch == unwindstack::ARCH_ARM64) {
     static_assert(static_cast<int>(unwindstack::ARM64_REG_R0) ==
-                      static_cast<int>(PERF_REG_ARM64_X0),
+                          static_cast<int>(PERF_REG_ARM64_X0) &&
+                      static_cast<int>(unwindstack::ARM64_REG_R0) == 0,
                   "register layout mismatch");
     static_assert(static_cast<int>(unwindstack::ARM64_REG_R30) ==
                       static_cast<int>(PERF_REG_ARM64_LR),
                   "register layout mismatch");
-
-    unwindstack::arm64_user_regs arm64_user_regs;
-    memset(&arm64_user_regs, 0, sizeof(arm64_user_regs));
-    memcpy(&arm64_user_regs.regs[unwindstack::ARM64_REG_R0],
-           &raw_regs.regs[PERF_REG_ARM64_X0],
-           sizeof(uint64_t) * (PERF_REG_ARM64_LR - PERF_REG_ARM64_X0 + 1));
+    // Both the perf_event register order and the "user" format are derived from
+    // "struct pt_regs", so we can directly memcpy the first 31 regs (up to and
+    // including LR).
+    unwindstack::arm64_user_regs arm64_user_regs = {};
+    memcpy(&arm64_user_regs.regs[0], &raw_regs.regs[0],
+           sizeof(uint64_t) * (PERF_REG_ARM64_LR + 1));
     arm64_user_regs.sp = raw_regs.regs[PERF_REG_ARM64_SP];
     arm64_user_regs.pc = raw_regs.regs[PERF_REG_ARM64_PC];
-
     return std::unique_ptr<unwindstack::Regs>(
         unwindstack::RegsArm64::Read(&arm64_user_regs));
   }
 
   if (arch == unwindstack::ARCH_ARM) {
     static_assert(static_cast<int>(unwindstack::ARM_REG_R0) ==
-                      static_cast<int>(PERF_REG_ARM_R0),
+                          static_cast<int>(PERF_REG_ARM_R0) &&
+                      static_cast<int>(unwindstack::ARM_REG_R0) == 0,
                   "register layout mismatch");
     static_assert(static_cast<int>(unwindstack::ARM_REG_LAST) ==
                       static_cast<int>(PERF_REG_ARM_MAX),
                   "register layout mismatch");
-
-    unwindstack::arm_user_regs arm_user_regs;
-    memset(&arm_user_regs, 0, sizeof(arm_user_regs));
-    for (size_t i = unwindstack::ARM_REG_R0; i < unwindstack::ARM_REG_LAST;
-         i++) {
+    // As with arm64, the layouts match, but we need to downcast to u32.
+    unwindstack::arm_user_regs arm_user_regs = {};
+    for (size_t i = 0; i < unwindstack::ARM_REG_LAST; i++) {
       arm_user_regs.regs[i] = static_cast<uint32_t>(raw_regs.regs[i]);
     }
-
     return std::unique_ptr<unwindstack::Regs>(
         unwindstack::RegsArm::Read(&arm_user_regs));
   }
 
-  PERFETTO_FATAL("Unsupported architecture (work in progress)");
+  if (arch == unwindstack::ARCH_X86_64) {
+    // We've sampled more registers than what libunwindstack will use. Don't
+    // copy over cs/ss/flags.
+    unwindstack::x86_64_user_regs x86_64_user_regs = {};
+    x86_64_user_regs.rax = raw_regs.regs[PERF_REG_X86_AX];
+    x86_64_user_regs.rbx = raw_regs.regs[PERF_REG_X86_BX];
+    x86_64_user_regs.rcx = raw_regs.regs[PERF_REG_X86_CX];
+    x86_64_user_regs.rdx = raw_regs.regs[PERF_REG_X86_DX];
+    x86_64_user_regs.r8 = raw_regs.regs[PERF_REG_X86_R8];
+    x86_64_user_regs.r9 = raw_regs.regs[PERF_REG_X86_R9];
+    x86_64_user_regs.r10 = raw_regs.regs[PERF_REG_X86_R10];
+    x86_64_user_regs.r11 = raw_regs.regs[PERF_REG_X86_R11];
+    x86_64_user_regs.r12 = raw_regs.regs[PERF_REG_X86_R12];
+    x86_64_user_regs.r13 = raw_regs.regs[PERF_REG_X86_R13];
+    x86_64_user_regs.r14 = raw_regs.regs[PERF_REG_X86_R14];
+    x86_64_user_regs.r15 = raw_regs.regs[PERF_REG_X86_R15];
+    x86_64_user_regs.rdi = raw_regs.regs[PERF_REG_X86_DI];
+    x86_64_user_regs.rsi = raw_regs.regs[PERF_REG_X86_SI];
+    x86_64_user_regs.rbp = raw_regs.regs[PERF_REG_X86_BP];
+    x86_64_user_regs.rsp = raw_regs.regs[PERF_REG_X86_SP];
+    x86_64_user_regs.rip = raw_regs.regs[PERF_REG_X86_IP];
+    return std::unique_ptr<unwindstack::Regs>(
+        unwindstack::RegsX86_64::Read(&x86_64_user_regs));
+  }
+
+  if (arch == unwindstack::ARCH_X86) {
+    // We've sampled more registers than what libunwindstack will use. Don't
+    // copy over cs/ss/flags.
+    unwindstack::x86_user_regs x86_user_regs = {};
+    x86_user_regs.eax = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_AX]);
+    x86_user_regs.ebx = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_BX]);
+    x86_user_regs.ecx = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_CX]);
+    x86_user_regs.edx = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_DX]);
+    x86_user_regs.ebp = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_BP]);
+    x86_user_regs.edi = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_DI]);
+    x86_user_regs.esi = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_SI]);
+    x86_user_regs.esp = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_SP]);
+    x86_user_regs.eip = static_cast<uint32_t>(raw_regs.regs[PERF_REG_X86_IP]);
+    return std::unique_ptr<unwindstack::Regs>(
+        unwindstack::RegsX86::Read(&x86_user_regs));
+  }
+
+  PERFETTO_FATAL("Unsupported architecture");
 }
 
 }  // namespace
@@ -164,7 +222,7 @@
   RawRegisterData raw_regs{};
   uint64_t regs_mask = PerfUserRegsMaskForCurrentArch();
   for (size_t i = 0; regs_mask && (i < RawRegisterData::kMaxSize); i++) {
-    if (regs_mask & (1u << i)) {
+    if (regs_mask & (1ULL << i)) {
       parse_pos = ReadValue(&raw_regs.regs[i], parse_pos);
     }
   }
@@ -178,6 +236,7 @@
   // the PC into the R15 slot, and treat the resulting RawRegisterData as an
   // arm32 register bank. See "Fundamentals of ARMv8-A" (ARM DOC
   // 100878_0100_en), page 28.
+  // x86-64 doesn't need any such fixups.
   if (requested_arch == unwindstack::ARCH_ARM64 &&
       sampled_abi == PERF_SAMPLE_REGS_ABI_32) {
     raw_regs.regs[PERF_REG_ARM_PC] = raw_regs.regs[PERF_REG_ARM64_PC];
diff --git a/tools/gen_android_bp b/tools/gen_android_bp
index bf15131..7c1c444 100755
--- a/tools/gen_android_bp
+++ b/tools/gen_android_bp
@@ -171,9 +171,6 @@
         ('required', {'libperfetto_android_internal', 'trigger_perfetto'}),
     ],
     'libperfetto_android_internal': [('static_libs', {'libhealthhalutils'}),],
-    'traced_perf': [
-        ('include_dirs', {'bionic/libc/kernel'}),
-    ],
     'trace_processor_shell': [
       ('dist', {'targets': ['sdk_repo']}),
       ('stl', 'libc++_static'),
@@ -231,6 +228,10 @@
     module.shared_libs.add('libz')
 
 
+def enable_uapi_headers(module):
+  module.include_dirs.add('bionic/libc/kernel')
+
+
 # Android equivalents for third-party libraries that the upstream project
 # depends on.
 builtin_deps = {
@@ -245,6 +246,7 @@
     '//gn:libunwindstack': enable_libunwindstack,
     '//gn:sqlite': enable_sqlite,
     '//gn:zlib': enable_zlib,
+    '//gn:bionic_kernel_uapi_headers' : enable_uapi_headers,
 }
 
 # ----------------------------------------------------------------------------