Merge "Drop unneeded capabilities from heapprofd.rc."
diff --git a/BUILD b/BUILD
index 2298de7..5a5378c 100644
--- a/BUILD
+++ b/BUILD
@@ -972,24 +972,3 @@
     ],
     main = "tools/gen_merged_sql_metrics.py",
 )
-
-load("//security/fuzzing/blaze:cc_fuzz_target.bzl", "cc_fuzz_target")
-
-cc_fuzz_target(
-    name = "trace_parsing_fuzzer",
-    srcs = ["src/trace_processor/trace_parsing_fuzzer.cc"],
-    componentid = 323270,
-    deps = [
-        ":trace_processor",
-        "//third_party/perfetto/protos:trace_processor_cc_proto",
-    ],
-)
-
-cc_fuzz_target(
-    name = "proto_decoder_fuzzer",
-    srcs = ["src/protozero/proto_decoder_fuzzer.cc"],
-    componentid = 323270,
-    deps = [
-        ":trace_processor",
-    ],
-)
diff --git a/BUILD.extras b/BUILD.extras
index d18d9a2..cda25fb 100644
--- a/BUILD.extras
+++ b/BUILD.extras
@@ -17,24 +17,3 @@
     ],
     main = "tools/gen_merged_sql_metrics.py",
 )
-
-load("//security/fuzzing/blaze:cc_fuzz_target.bzl", "cc_fuzz_target")
-
-cc_fuzz_target(
-    name = "trace_parsing_fuzzer",
-    srcs = ["src/trace_processor/trace_parsing_fuzzer.cc"],
-    componentid = 323270,
-    deps = [
-        ":trace_processor",
-        "//third_party/perfetto/protos:trace_processor_cc_proto",
-    ],
-)
-
-cc_fuzz_target(
-    name = "proto_decoder_fuzzer",
-    srcs = ["src/protozero/proto_decoder_fuzzer.cc"],
-    componentid = 323270,
-    deps = [
-        ":trace_processor",
-    ],
-)
diff --git a/PRESUBMIT.py b/PRESUBMIT.py
index 3c30708..60f8dd9 100644
--- a/PRESUBMIT.py
+++ b/PRESUBMIT.py
@@ -53,7 +53,8 @@
   # If no GN files were modified, bail out.
   def build_file_filter(x): return input_api.FilterSourceFile(
       x,
-      white_list=('.*BUILD[.]gn$', '.*[.]gni$', 'tools/gen_bazel'))
+      white_list=('.*BUILD[.]gn$', '.*[.]gni$', 'tools/gen_bazel',
+          'BUILD\.extras'))
   if not input_api.AffectedSourceFiles(build_file_filter):
     return []
 
diff --git a/docs/heapprofd.md b/docs/heapprofd.md
index 224fd92..8461e4e 100644
--- a/docs/heapprofd.md
+++ b/docs/heapprofd.md
@@ -159,6 +159,17 @@
 always produced. You can create multiple of these dumps, and they will be
 enumerated in the output directory.
 
+## Symbolization
+If the profiled binary or libraries do not have debug symbols, you can use
+pprof to symbolize offline.
+
+To do so, copy symbolized versions of your binary and/or libraries into a
+directory. Then run
+`PPROF_BINARY_PATH=thatdirectory pprof heap_profile.${n}.${pid}.gz`, and pprof
+will read symbol information from these files.
+
+You can save the symbolized version by issuing the `proto` command in pprof.
+
 ## Idle page tracking
 This is only available in Android versions newer than Q.
 
diff --git a/protos/perfetto/trace/perfetto_trace.proto b/protos/perfetto/trace/perfetto_trace.proto
index d3cc00d..6ef12a6 100644
--- a/protos/perfetto/trace/perfetto_trace.proto
+++ b/protos/perfetto/trace/perfetto_trace.proto
@@ -3514,18 +3514,16 @@
 
 message GpuCounterEvent {
   // The first trace packet of each session should include counter_spec.
-  message GpuCounterSpec {
-    optional uint32 counter_id = 1;
-    optional string name = 2;
-    optional string description = 3;
-  }
-  repeated GpuCounterSpec counter_specs = 1;
+  optional GpuCounterDescriptor counter_descriptor = 1;
 
   message GpuCounter {
     // required. Identifier for counter.
     optional uint32 counter_id = 1;
     // required. Value of the counter.
-    optional int64 value = 2;
+    oneof value {
+      int64 int_value = 2;
+      double double_value = 3;
+    }
   }
   repeated GpuCounter counters = 2;
 }
@@ -3591,6 +3589,21 @@
 
 // End of protos/perfetto/trace/gpu/gpu_render_stage_event.proto
 
+// Begin of protos/perfetto/common/gpu_counter_descriptor.proto
+
+// Description of GPU counters.
+// This message is sent by a GPU counter producer to specify the counters available in the hardware.
+message GpuCounterDescriptor {
+  message GpuCounterSpec {
+    optional uint32 counter_id = 1;
+    optional string name = 2;
+    optional string description = 3;
+  }
+  repeated GpuCounterSpec specs = 1;
+}
+
+// End of protos/perfetto/common/gpu_counter_descriptor.proto
+
 // Begin of protos/perfetto/config/android/android_log_config.proto
 
 message AndroidLogConfig {
diff --git a/src/profiling/memory/bookkeeping.h b/src/profiling/memory/bookkeeping.h
index 7b562f3..a0797fc 100644
--- a/src/profiling/memory/bookkeeping.h
+++ b/src/profiling/memory/bookkeeping.h
@@ -163,18 +163,18 @@
     friend class GlobalCallstackTrie;
 
     Node(Interned<Frame> frame) : Node(std::move(frame), 0, nullptr) {}
-    Node(Interned<Frame> frame, uint32_t id)
+    Node(Interned<Frame> frame, uint64_t id)
         : Node(std::move(frame), id, nullptr) {}
-    Node(Interned<Frame> frame, uint32_t id, Node* parent)
+    Node(Interned<Frame> frame, uint64_t id, Node* parent)
         : id_(id), parent_(parent), location_(std::move(frame)) {}
 
-    uint32_t id() const { return id_; }
+    uint64_t id() const { return id_; }
 
    private:
     Node* GetOrCreateChild(const Interned<Frame>& loc);
 
     uint64_t ref_count_ = 0;
-    uint32_t id_;
+    uint64_t id_;
     Node* const parent_;
     const Interned<Frame> location_;
     base::LookupSet<Node, const Interned<Frame>, &Node::location_> children_;
@@ -200,7 +200,7 @@
   Interner<Mapping> mapping_interner_;
   Interner<Frame> frame_interner_;
 
-  uint32_t next_callstack_id_ = 0;
+  uint64_t next_callstack_id_ = 0;
 
   Node root_{MakeRootFrame(), ++next_callstack_id_};
 };
diff --git a/src/profiling/memory/bookkeeping_dump.cc b/src/profiling/memory/bookkeeping_dump.cc
index 6eebfe4..81a3a46 100644
--- a/src/profiling/memory/bookkeeping_dump.cc
+++ b/src/profiling/memory/bookkeeping_dump.cc
@@ -28,8 +28,27 @@
 uint32_t kPacketSizeThreshold = 400000;
 }  // namespace
 
+void WriteFixedInternings(TraceWriter* trace_writer) {
+  constexpr const uint8_t kEmptyString[] = "";
+  // Explicitly reserve intern ID 0 for the empty string, so unset string
+  // fields get mapped to this.
+  auto packet = trace_writer->NewTracePacket();
+  auto* interned_data = packet->set_interned_data();
+  auto interned_string = interned_data->add_build_ids();
+  interned_string->set_iid(0);
+  interned_string->set_str(kEmptyString, 0);
+
+  interned_string = interned_data->add_mapping_paths();
+  interned_string->set_iid(0);
+  interned_string->set_str(kEmptyString, 0);
+
+  interned_string = interned_data->add_function_names();
+  interned_string->set_iid(0);
+  interned_string->set_str(kEmptyString, 0);
+}
+
 void DumpState::WriteMap(const Interned<Mapping> map) {
-  auto map_it_and_inserted = dumped_mappings_.emplace(map.id());
+  auto map_it_and_inserted = intern_state_->dumped_mappings_.emplace(map.id());
   if (map_it_and_inserted.second) {
     for (const Interned<std::string>& str : map->path_components)
       WriteMappingPathString(str);
@@ -52,7 +71,8 @@
   WriteMap(frame->mapping);
   WriteFunctionNameString(frame->function_name);
   bool inserted;
-  std::tie(std::ignore, inserted) = dumped_frames_.emplace(frame.id());
+  std::tie(std::ignore, inserted) =
+      intern_state_->dumped_frames_.emplace(frame.id());
   if (inserted) {
     auto frame_proto = GetCurrentInternedData()->add_frames();
     frame_proto->set_iid(frame.id());
@@ -64,7 +84,8 @@
 
 void DumpState::WriteBuildIDString(const Interned<std::string>& str) {
   bool inserted;
-  std::tie(std::ignore, inserted) = dumped_strings_.emplace(str.id());
+  std::tie(std::ignore, inserted) =
+      intern_state_->dumped_strings_.emplace(str.id());
   if (inserted) {
     auto interned_string = GetCurrentInternedData()->add_build_ids();
     interned_string->set_iid(str.id());
@@ -75,7 +96,8 @@
 
 void DumpState::WriteMappingPathString(const Interned<std::string>& str) {
   bool inserted;
-  std::tie(std::ignore, inserted) = dumped_strings_.emplace(str.id());
+  std::tie(std::ignore, inserted) =
+      intern_state_->dumped_strings_.emplace(str.id());
   if (inserted) {
     auto interned_string = GetCurrentInternedData()->add_mapping_paths();
     interned_string->set_iid(str.id());
@@ -86,7 +108,8 @@
 
 void DumpState::WriteFunctionNameString(const Interned<std::string>& str) {
   bool inserted;
-  std::tie(std::ignore, inserted) = dumped_strings_.emplace(str.id());
+  std::tie(std::ignore, inserted) =
+      intern_state_->dumped_strings_.emplace(str.id());
   if (inserted) {
     auto interned_string = GetCurrentInternedData()->add_function_names();
     interned_string->set_iid(str.id());
@@ -95,17 +118,10 @@
   }
 }
 
-void DumpState::StartProcessDump(
-    std::function<void(protos::pbzero::ProfilePacket::ProcessHeapSamples*)>
-        fill_process_header) {
-  current_process_fill_header_ = std::move(fill_process_header);
-  current_process_heap_samples_ = nullptr;
-  current_process_idle_allocs_.clear();
-}
-
 void DumpState::WriteAllocation(
     const HeapTracker::CallstackAllocations& alloc) {
-  if (dumped_callstacks_.find(alloc.node->id()) == dumped_callstacks_.end())
+  if (intern_state_->dumped_callstacks_.find(alloc.node->id()) ==
+      intern_state_->dumped_callstacks_.end())
     callstacks_to_dump_.emplace(alloc.node);
 
   auto* heap_samples = GetCurrentProcessHeapSamples();
@@ -143,9 +159,8 @@
     for (const Interned<Frame>& frame : built_callstack)
       callstack->add_frame_ids(frame.id());
 
-    dumped_callstacks_.emplace(node->id());
+    intern_state_->dumped_callstacks_.emplace(node->id());
   }
-  callstacks_to_dump_.clear();
   MakeProfilePacket();
 }
 
@@ -153,13 +168,6 @@
   current_process_idle_allocs_[callstack_id] += bytes;
 }
 
-void DumpState::RejectConcurrent(pid_t pid) {
-  ProfilePacket::ProcessHeapSamples* proto =
-      current_profile_packet_->add_process_dumps();
-  proto->set_pid(static_cast<uint64_t>(pid));
-  proto->set_rejected_concurrent(true);
-}
-
 ProfilePacket::ProcessHeapSamples* DumpState::GetCurrentProcessHeapSamples() {
   if (currently_written() > kPacketSizeThreshold) {
     if (current_profile_packet_)
diff --git a/src/profiling/memory/bookkeeping_dump.h b/src/profiling/memory/bookkeeping_dump.h
index 51450d7..2333f15 100644
--- a/src/profiling/memory/bookkeeping_dump.h
+++ b/src/profiling/memory/bookkeeping_dump.h
@@ -35,39 +35,44 @@
 namespace perfetto {
 namespace profiling {
 
+void WriteFixedInternings(TraceWriter* trace_writer);
+
 class DumpState {
  public:
-  DumpState(TraceWriter* trace_writer) : trace_writer_(trace_writer) {
-    MakeTracePacket();
+  class InternState {
+   private:
+    friend class DumpState;
 
-    constexpr const uint8_t kEmptyString[] = "";
-    // Explicitly reserve intern ID 0 for the empty string, so unset string
-    // fields get mapped to this.
-    auto interned_string = GetCurrentInternedData()->add_build_ids();
-    interned_string->set_iid(0);
-    interned_string->set_str(kEmptyString, 0);
+    std::set<InternID> dumped_strings_;
+    std::set<InternID> dumped_frames_;
+    std::set<InternID> dumped_mappings_;
+    std::set<uint64_t> dumped_callstacks_;
 
-    interned_string = GetCurrentInternedData()->add_mapping_paths();
-    interned_string->set_iid(0);
-    interned_string->set_str(kEmptyString, 0);
+    uint64_t next_index_ = 0;
+  };
 
-    interned_string = GetCurrentInternedData()->add_function_names();
-    interned_string->set_iid(0);
-    interned_string->set_str(kEmptyString, 0);
+  DumpState(
+      TraceWriter* trace_writer,
+      std::function<void(protos::pbzero::ProfilePacket::ProcessHeapSamples*)>
+          process_fill_header,
+      InternState* intern_state)
+      : trace_writer_(trace_writer),
+        intern_state_(intern_state),
+        current_process_fill_header_(std::move(process_fill_header)) {
+    MakeProfilePacket();
   }
 
-  void StartDump() { MakeProfilePacket(); }
-
-  void StartProcessDump(
-      std::function<void(protos::pbzero::ProfilePacket::ProcessHeapSamples*)>
-          fill_process_header);
+  // This should be a temporary object, only used on the stack for dumping a
+  // single process.
+  DumpState(const DumpState&) = delete;
+  DumpState& operator=(const DumpState&) = delete;
+  DumpState(DumpState&&) = delete;
+  DumpState& operator=(DumpState&&) = delete;
 
   void AddIdleBytes(uintptr_t callstack_id, uint64_t bytes);
 
   void WriteAllocation(const HeapTracker::CallstackAllocations& alloc);
   void DumpCallstacks(GlobalCallstackTrie* callsites);
-  void RejectConcurrent(pid_t pid);
-  void Finalize() { current_trace_packet_ = TraceWriter::TracePacketHandle(); }
 
  private:
   void WriteMap(const Interned<Mapping> map);
@@ -93,7 +98,7 @@
     MakeTracePacket();
 
     current_profile_packet_ = current_trace_packet_->set_profile_packet();
-    current_profile_packet_->set_index(next_index_++);
+    current_profile_packet_->set_index(intern_state_->next_index_++);
   }
 
   uint64_t currently_written() {
@@ -104,27 +109,20 @@
   GetCurrentProcessHeapSamples();
   protos::pbzero::InternedData* GetCurrentInternedData();
 
-  std::set<InternID> dumped_strings_;
-  std::set<InternID> dumped_frames_;
-  std::set<InternID> dumped_mappings_;
-  std::set<uint64_t> dumped_callstacks_;
-
   std::set<GlobalCallstackTrie::Node*> callstacks_to_dump_;
 
   TraceWriter* trace_writer_;
+  InternState* intern_state_;
 
   protos::pbzero::ProfilePacket* current_profile_packet_ = nullptr;
   protos::pbzero::InternedData* current_interned_data_ = nullptr;
   TraceWriter::TracePacketHandle current_trace_packet_;
   protos::pbzero::ProfilePacket::ProcessHeapSamples*
       current_process_heap_samples_ = nullptr;
-
   std::function<void(protos::pbzero::ProfilePacket::ProcessHeapSamples*)>
       current_process_fill_header_;
-
   std::map<uintptr_t /* callstack_id */, uint64_t> current_process_idle_allocs_;
 
-  uint64_t next_index_ = 0;
   uint64_t last_written_ = 0;
 };
 
diff --git a/src/profiling/memory/client.cc b/src/profiling/memory/client.cc
index fd20cd1..53738f0 100644
--- a/src/profiling/memory/client.cc
+++ b/src/profiling/memory/client.cc
@@ -142,6 +142,8 @@
     prctl(PR_SET_DUMPABLE, 1);
   }
 
+  size_t num_send_fds = kHandshakeSize;
+
   base::ScopedFile maps(base::OpenFile("/proc/self/maps", O_RDONLY));
   if (!maps) {
     PERFETTO_DFATAL_OR_ELOG("Failed to open /proc/self/maps");
@@ -153,16 +155,23 @@
     return nullptr;
   }
 
+  base::ScopedFile page_idle(base::OpenFile("/proc/self/page_idle", O_RDWR));
+  if (!page_idle) {
+    PERFETTO_LOG("Failed to open /proc/self/page_idle. Continuing.");
+    num_send_fds = kHandshakeSize - 1;
+  }
+
   // Restore original dumpability value if we overrode it.
   unset_dumpable.reset();
 
   int fds[kHandshakeSize];
   fds[kHandshakeMaps] = *maps;
   fds[kHandshakeMem] = *mem;
+  fds[kHandshakePageIdle] = *page_idle;
 
   // Send an empty record to transfer fds for /proc/self/maps and
   // /proc/self/mem.
-  if (sock.Send(kSingleByte, sizeof(kSingleByte), fds, kHandshakeSize) !=
+  if (sock.Send(kSingleByte, sizeof(kSingleByte), fds, num_send_fds) !=
       sizeof(kSingleByte)) {
     PERFETTO_DFATAL_OR_ELOG("Failed to send file descriptors.");
     return nullptr;
diff --git a/src/profiling/memory/heapprofd_producer.cc b/src/profiling/memory/heapprofd_producer.cc
index f13541b..6952404 100644
--- a/src/profiling/memory/heapprofd_producer.cc
+++ b/src/profiling/memory/heapprofd_producer.cc
@@ -85,22 +85,6 @@
   return i;
 }
 
-base::Optional<PageIdleChecker> MakePageIdleChecker(base::ScopedFile pagemap) {
-  base::Optional<PageIdleChecker> res;
-  if (!pagemap) {
-    PERFETTO_PLOG("Invalid pagemap.");
-    return res;
-  }
-  base::ScopedFile bitmap(
-      base::OpenFile("/sys/kernel/mm/page_idle/bitmap", O_RDWR));
-  if (!bitmap) {
-    PERFETTO_PLOG("Failed to open /sys/kernel/mm/page_idle/bitmap.");
-    return res;
-  }
-  res = PageIdleChecker(std::move(pagemap), std::move(bitmap));
-  return res;
-}
-
 }  // namespace
 
 const uint64_t LogHistogram::kMaxBucket = 0;
@@ -368,6 +352,7 @@
   data_source.config = heapprofd_config;
   data_source.normalized_cmdlines = std::move(normalized_cmdlines);
 
+  WriteFixedInternings(data_source.trace_writer.get());
   data_sources_.emplace(id, std::move(data_source));
   PERFETTO_DLOG("Set up data source.");
 
@@ -517,12 +502,16 @@
   }
   DataSource& data_source = it->second;
 
-  DumpState& dump_state = data_source.dump_state;
-
-  dump_state.StartDump();
-
-  for (pid_t rejected_pid : data_source.rejected_pids)
-    dump_state.RejectConcurrent(rejected_pid);
+  if (!data_source.rejected_pids.empty()) {
+    auto trace_packet = data_source.trace_writer->NewTracePacket();
+    ProfilePacket* profile_packet = trace_packet->set_profile_packet();
+    for (pid_t rejected_pid : data_source.rejected_pids) {
+      ProfilePacket::ProcessHeapSamples* proto =
+          profile_packet->add_process_dumps();
+      proto->set_pid(static_cast<uint64_t>(rejected_pid));
+      proto->set_rejected_concurrent(true);
+    }
+  }
 
   for (std::pair<const pid_t, ProcessState>& pid_and_process_state :
        data_source.process_states) {
@@ -557,7 +546,9 @@
         bucket->set_count(p.second);
       }
     };
-    dump_state.StartProcessDump(std::move(new_heapsamples));
+
+    DumpState dump_state(data_source.trace_writer.get(),
+                         std::move(new_heapsamples), &data_source.intern_state);
 
     if (process_state.page_idle_checker) {
       PageIdleChecker& page_idle_checker = *process_state.page_idle_checker;
@@ -581,10 +572,9 @@
         });
     if (process_state.page_idle_checker)
       process_state.page_idle_checker->MarkPagesIdle();
+    dump_state.DumpCallstacks(&callsites_);
   }
 
-  dump_state.DumpCallstacks(&callsites_);
-  dump_state.Finalize();
 
   if (has_flush_id) {
     auto weak_producer = weak_factory_.GetWeakPtr();
@@ -670,7 +660,9 @@
   char buf[1];
   self->Receive(buf, sizeof(buf), fds, base::ArraySize(fds));
 
-  static_assert(kHandshakeSize == 2, "change if and else if below.");
+  static_assert(kHandshakeSize == 3, "change if and else if below.");
+  // We deliberately do not check for fds[kHandshakePageIdle] so we can
+  // degrade gracefully on kernels that do not have the file yet.
   if (fds[kHandshakeMaps] && fds[kHandshakeMem]) {
     auto ds_it =
         producer_->data_sources_.find(pending_process.data_source_instance_id);
@@ -685,17 +677,15 @@
 
     ProcessState& process_state = it_and_inserted.first->second;
     if (data_source.config.idle_allocations()) {
-      // We have to open this here, because reading the PFN requires
-      // the process that opened the file to have CAP_SYS_ADMIN. We can work
-      // around this by making this a setenforce 0 only feature, giving
-      // heapprofd very broad capabilities (CAP_SYS_ADMIN and CAP_SYS_PTRACE)
-      // which will get rejected by SELinux on real builds.
-      std::string procfs_path =
-          "/proc/" + std::to_string(self->peer_pid()) + "/pagemap";
-      base::ScopedFile pagemap_fd(
-          base::OpenFile(procfs_path.c_str(), O_RDONLY));
-      process_state.page_idle_checker =
-          MakePageIdleChecker(std::move(pagemap_fd));
+      if (fds[kHandshakePageIdle]) {
+        process_state.page_idle_checker =
+            PageIdleChecker(std::move(fds[kHandshakePageIdle]));
+      } else {
+        PERFETTO_ELOG(
+            "Idle page tracking requested but did not receive "
+            "page_idle file. Continuing without idle page tracking. Please "
+            "check your kernel version.");
+      }
     }
 
     PERFETTO_DLOG("%d: Received FDs.", self->peer_pid());
@@ -717,11 +707,12 @@
     producer_->UnwinderForPID(self->peer_pid())
         .PostHandoffSocket(std::move(handoff_data));
     producer_->pending_processes_.erase(it);
-  } else if (fds[kHandshakeMaps] || fds[kHandshakeMem]) {
+  } else if (fds[kHandshakeMaps] || fds[kHandshakeMem] ||
+             fds[kHandshakePageIdle]) {
     PERFETTO_DFATAL_OR_ELOG("%d: Received partial FDs.", self->peer_pid());
     producer_->pending_processes_.erase(it);
   } else {
-    PERFETTO_DLOG("%d: Received no FDs.", self->peer_pid());
+    PERFETTO_ELOG("%d: Received no FDs.", self->peer_pid());
   }
 }
 
diff --git a/src/profiling/memory/heapprofd_producer.h b/src/profiling/memory/heapprofd_producer.h
index 9e97a7d..91e89f0 100644
--- a/src/profiling/memory/heapprofd_producer.h
+++ b/src/profiling/memory/heapprofd_producer.h
@@ -196,7 +196,7 @@
     std::set<pid_t> rejected_pids;
     std::map<pid_t, ProcessState> process_states;
     std::vector<std::string> normalized_cmdlines;
-    DumpState dump_state{trace_writer.get()};
+    DumpState::InternState intern_state;
   };
 
   struct PendingProcess {
diff --git a/src/profiling/memory/malloc_hooks.cc b/src/profiling/memory/malloc_hooks.cc
index 66e0d58..723a341 100644
--- a/src/profiling/memory/malloc_hooks.cc
+++ b/src/profiling/memory/malloc_hooks.cc
@@ -241,8 +241,11 @@
 
   perfetto::base::Optional<perfetto::base::UnixSocketRaw> sock =
       Client::ConnectToHeapprofd(perfetto::profiling::kHeapprofdSocketFile);
-  if (!sock)
+  if (!sock) {
+    PERFETTO_ELOG("Failed to connect to %s.",
+                  perfetto::profiling::kHeapprofdSocketFile);
     return nullptr;
+  }
   return Client::CreateAndHandshake(std::move(sock.value()),
                                     unhooked_allocator);
 }
diff --git a/src/profiling/memory/page_idle_checker.cc b/src/profiling/memory/page_idle_checker.cc
index 10cd1fe..e0f0ef3 100644
--- a/src/profiling/memory/page_idle_checker.cc
+++ b/src/profiling/memory/page_idle_checker.cc
@@ -23,12 +23,8 @@
 
 namespace perfetto {
 namespace profiling {
-namespace {
 
-constexpr uint64_t kIsInRam = 1ULL << 63;
-constexpr uint64_t kRamPhysicalPageMask = ~(~0ULL << 55);
-
-}  // namespace
+// TODO(fmayer): Be smarter about batching reads and writes to page_idle.
 
 int64_t PageIdleChecker::OnIdlePage(uint64_t addr, size_t size) {
   uint64_t page_nr = addr / base::kPageSize;
@@ -39,37 +35,10 @@
     end_page_nr++;
 
   size_t pages = end_page_nr - page_nr;
-  std::vector<uint64_t> virt_page_infos(pages);
-
-  off64_t virt_off = static_cast<off64_t>(page_nr * sizeof(virt_page_infos[0]));
-  size_t virt_rd_size = pages * sizeof(virt_page_infos[0]);
-  ssize_t rd = ReadAtOffsetClobberSeekPos(*pagemap_fd_, &(virt_page_infos[0]),
-                                          virt_rd_size, virt_off);
-  if (rd != static_cast<ssize_t>(virt_rd_size)) {
-    PERFETTO_ELOG("Invalid read from pagemap: %zd", rd);
-    return -1;
-  }
 
   int64_t idle_mem = 0;
-
   for (size_t i = 0; i < pages; ++i) {
-    if (!virt_page_infos[i]) {
-      PERFETTO_DLOG("Empty pageinfo.");
-      continue;
-    }
-
-    if (!(virt_page_infos[i] & kIsInRam)) {
-      PERFETTO_DLOG("Page is not in RAM.");
-      continue;
-    }
-
-    uint64_t phys_page_nr = virt_page_infos[i] & kRamPhysicalPageMask;
-    if (!phys_page_nr) {
-      PERFETTO_ELOG("Failed to get physical page number.");
-      continue;
-    }
-
-    int idle = IsPageIdle(phys_page_nr);
+    int idle = IsPageIdle(page_nr + i);
     if (idle == -1)
       continue;
 
@@ -81,19 +50,19 @@
       else
         idle_mem += base::kPageSize;
     } else {
-      touched_phys_page_nrs_.emplace(phys_page_nr);
+      touched_virt_page_nrs_.emplace(page_nr + i);
     }
   }
   return idle_mem;
 }
 
 void PageIdleChecker::MarkPagesIdle() {
-  for (uint64_t phys_page_nr : touched_phys_page_nrs_)
-    MarkPageIdle(phys_page_nr);
-  touched_phys_page_nrs_.clear();
+  for (uint64_t virt_page_nr : touched_virt_page_nrs_)
+    MarkPageIdle(virt_page_nr);
+  touched_virt_page_nrs_.clear();
 }
 
-void PageIdleChecker::MarkPageIdle(uint64_t phys_page_nr) {
+void PageIdleChecker::MarkPageIdle(uint64_t virt_page_nr) {
   // The file implements a bitmap where each bit corresponds to a memory page.
   // The bitmap is represented by an array of 8-byte integers, and the page at
   // PFN #i is mapped to bit #i%64 of array element #i/64, byte order i
@@ -102,22 +71,22 @@
   // The kernel ORs the value written with the existing bitmap, so we do not
   // override previously written values.
   // See https://www.kernel.org/doc/Documentation/vm/idle_page_tracking.txt
-  off64_t offset = 8 * (phys_page_nr / 64);
-  size_t bit_offset = phys_page_nr % 64;
+  off64_t offset = 8 * (virt_page_nr / 64);
+  size_t bit_offset = virt_page_nr % 64;
   uint64_t bit_pattern = 1 << bit_offset;
-  if (WriteAtOffsetClobberSeekPos(*bitmap_fd_, &bit_pattern,
+  if (WriteAtOffsetClobberSeekPos(*page_idle_fd_, &bit_pattern,
                                   sizeof(bit_pattern), offset) !=
       static_cast<ssize_t>(sizeof(bit_pattern))) {
     PERFETTO_PLOG("Failed to write bit pattern at %" PRIi64 ".", offset);
   }
 }
 
-int PageIdleChecker::IsPageIdle(uint64_t phys_page_nr) {
-  off64_t offset = 8 * (phys_page_nr / 64);
-  size_t bit_offset = phys_page_nr % 64;
+int PageIdleChecker::IsPageIdle(uint64_t virt_page_nr) {
+  off64_t offset = 8 * (virt_page_nr / 64);
+  size_t bit_offset = virt_page_nr % 64;
   uint64_t bit_pattern;
-  if (ReadAtOffsetClobberSeekPos(*bitmap_fd_, &bit_pattern, sizeof(bit_pattern),
-                                 offset) !=
+  if (ReadAtOffsetClobberSeekPos(*page_idle_fd_, &bit_pattern,
+                                 sizeof(bit_pattern), offset) !=
       static_cast<ssize_t>(sizeof(bit_pattern))) {
     PERFETTO_PLOG("Failed to read bit pattern at %" PRIi64 ".", offset);
     return -1;
diff --git a/src/profiling/memory/page_idle_checker.h b/src/profiling/memory/page_idle_checker.h
index 529c097..f9c73cd 100644
--- a/src/profiling/memory/page_idle_checker.h
+++ b/src/profiling/memory/page_idle_checker.h
@@ -32,8 +32,8 @@
 
 class PageIdleChecker {
  public:
-  PageIdleChecker(base::ScopedFile pagemap_fd, base::ScopedFile bitmap_fd)
-      : pagemap_fd_(std::move(pagemap_fd)), bitmap_fd_(std::move(bitmap_fd)) {}
+  PageIdleChecker(base::ScopedFile page_idle_fd)
+      : page_idle_fd_(std::move(page_idle_fd)) {}
 
   // Return number of bytes of allocation of size bytes starting at alloc that
   // are on unreferenced pages.
@@ -43,14 +43,13 @@
   void MarkPagesIdle();
 
  private:
-  void MarkPageIdle(uint64_t phys_page_nr);
+  void MarkPageIdle(uint64_t virt_page_nr);
   // Return 1 if page is idle, 0 if it is not idle, or -1 on error.
-  int IsPageIdle(uint64_t phys_page_nr);
+  int IsPageIdle(uint64_t virt_page_nr);
 
-  std::set<uint64_t> touched_phys_page_nrs_;
+  std::set<uint64_t> touched_virt_page_nrs_;
 
-  base::ScopedFile pagemap_fd_;
-  base::ScopedFile bitmap_fd_;
+  base::ScopedFile page_idle_fd_;
 };
 
 }  // namespace profiling
diff --git a/src/profiling/memory/wire_protocol.h b/src/profiling/memory/wire_protocol.h
index 656135c..0eb1562 100644
--- a/src/profiling/memory/wire_protocol.h
+++ b/src/profiling/memory/wire_protocol.h
@@ -120,6 +120,7 @@
 enum HandshakeFDs : size_t {
   kHandshakeMaps = 0,
   kHandshakeMem,
+  kHandshakePageIdle,
   kHandshakeSize,
 };
 
diff --git a/src/trace_processor/metrics/android/span_view_stats.sql b/src/trace_processor/metrics/android/span_view_stats.sql
index 1cd845e..dcc5044 100644
--- a/src/trace_processor/metrics/android/span_view_stats.sql
+++ b/src/trace_processor/metrics/android/span_view_stats.sql
@@ -30,9 +30,7 @@
   MIN(span.{{table_name}}_val) AS min_value,
   MAX(span.{{table_name}}_val) AS max_value,
   SUM(span.{{table_name}}_val * span.dur) / SUM(span.dur) AS avg_value
-FROM {{table_name}}_span AS span
-CROSS JOIN process
-WHERE span.upid = process.upid
-AND process.name IS NOT NULL
+FROM {{table_name}}_span AS span JOIN process USING(upid)
+WHERE process.name IS NOT NULL
 GROUP BY 1
 ORDER BY 1;
diff --git a/src/trace_processor/process_table.cc b/src/trace_processor/process_table.cc
index c045a36..e889ec0 100644
--- a/src/trace_processor/process_table.cc
+++ b/src/trace_processor/process_table.cc
@@ -53,15 +53,15 @@
 }
 
 int ProcessTable::BestIndex(const QueryConstraints& qc, BestIndexInfo* info) {
-  info->estimated_cost = static_cast<uint32_t>(storage_->process_count());
-
   // If the query has a constraint on the |upid| field, return a reduced cost
   // because we can do that filter efficiently.
-  const auto& constraints = qc.constraints();
-  if (constraints.size() == 1 && constraints.front().iColumn == Column::kUpid) {
-    info->estimated_cost = IsOpEq(constraints.front().op) ? 1 : 10;
-  }
-
+  const auto& cs = qc.constraints();
+  auto fn = [](const QueryConstraints::Constraint& c) {
+    return c.iColumn == Column::kUpid && sqlite_utils::IsOpEq(c.op);
+  };
+  info->estimated_cost = std::find_if(cs.begin(), cs.end(), fn) != cs.end()
+                             ? 1
+                             : static_cast<uint32_t>(storage_->process_count());
   return SQLITE_OK;
 }
 
diff --git a/test/cts/README.md b/test/cts/README.md
index 27670de..c76856d 100644
--- a/test/cts/README.md
+++ b/test/cts/README.md
@@ -1,5 +1,4 @@
-This directory contains the CTS tests for the Perfetto library (at the time of
-writing - a single native GTest suite, and several helper apps).
+This directory contains the CTS tests for the Perfetto library.
 
 # Background
 For information about what CTS is, please go to
@@ -7,7 +6,8 @@
 on the purpose of CTS and how to run these tests.
 
 # Test contents
-The single GTest target contains the following notable test suites:
+The single GTest target (CtsPerfettoTestCases) contains the following notable
+test suites:
 * PerfettoCtsTest - verifies that any Android app can operate as a perfetto
   producer.
 * HeapprofdCtsTest - verifies that Android apps can be heap-profiled, and that
diff --git a/tools/gen_merged_protos b/tools/gen_merged_protos
index e1a6a67..5f68999 100755
--- a/tools/gen_merged_protos
+++ b/tools/gen_merged_protos
@@ -92,6 +92,7 @@
   'protos/perfetto/trace/trigger.proto',
   'protos/perfetto/trace/gpu/gpu_counter_event.proto',
   'protos/perfetto/trace/gpu/gpu_render_stage_event.proto',
+  'protos/perfetto/common/gpu_counter_descriptor.proto',
 )
 
 MERGED_TRACE_PROTO = 'protos/perfetto/trace/perfetto_trace.proto'