Merge "Do not report errors for unsupported trace points"
diff --git a/Android.bp b/Android.bp
index 515e736..cbbed46 100644
--- a/Android.bp
+++ b/Android.bp
@@ -18,13 +18,14 @@
   name: "gen_merged_sql_metrics",
   srcs: [
     "src/trace_processor/metrics/android/android_mem.sql",
+    "src/trace_processor/metrics/android/android_mem_lmk.sql",
   ],
-  cmd: "$(location tools/gen_merged_sql_metrics) --cpp_out=$(out) $(in)",
+  cmd: "$(location tools/gen_merged_sql_metrics.py) --cpp_out=$(out) $(in)",
   out: [
     "src/trace_processor/metrics/sql_metrics.h",
   ],
   tool_files: [
-    "tools/gen_merged_sql_metrics",
+    "tools/gen_merged_sql_metrics.py",
   ],
 }
 
@@ -3214,7 +3215,9 @@
     "src/trace_processor/fuchsia_trace_parser.cc",
     "src/trace_processor/fuchsia_trace_tokenizer.cc",
     "src/trace_processor/fuchsia_trace_utils.cc",
+    "src/trace_processor/heap_profile_tracker.cc",
     "src/trace_processor/instants_table.cc",
+    "src/trace_processor/metrics/metrics.cc",
     "src/trace_processor/process_table.cc",
     "src/trace_processor/process_tracker.cc",
     "src/trace_processor/proto_trace_parser.cc",
diff --git a/BUILD b/BUILD
index d54ab5e..80a7419 100644
--- a/BUILD
+++ b/BUILD
@@ -25,11 +25,12 @@
     name = "gen_merged_sql_metrics",
     srcs = [
         "src/trace_processor/metrics/android/android_mem.sql",
+        "src/trace_processor/metrics/android/android_mem_lmk.sql",
     ],
-    cmd = "$(location gen_merged_sql_metrics_py) --cpp_out=$@ $SRCS",
     outs = [
         "src/trace_processor/metrics/sql_metrics.h",
     ],
+    cmd = "$(location gen_merged_sql_metrics_py) --cpp_out=$@ $(SRCS)",
     tools = [
         "gen_merged_sql_metrics_py",
     ],
@@ -178,6 +179,8 @@
         "src/trace_processor/fuchsia_trace_tokenizer.h",
         "src/trace_processor/fuchsia_trace_utils.cc",
         "src/trace_processor/fuchsia_trace_utils.h",
+        "src/trace_processor/heap_profile_tracker.cc",
+        "src/trace_processor/heap_profile_tracker.h",
         "src/trace_processor/instants_table.cc",
         "src/trace_processor/instants_table.h",
         "src/trace_processor/json_trace_parser.cc",
@@ -186,6 +189,9 @@
         "src/trace_processor/json_trace_tokenizer.h",
         "src/trace_processor/json_trace_utils.cc",
         "src/trace_processor/json_trace_utils.h",
+        "src/trace_processor/metrics/metrics.cc",
+        "src/trace_processor/metrics/metrics.h",
+        "src/trace_processor/metrics/sql_metrics.h",
         "src/trace_processor/null_term_string_view.h",
         "src/trace_processor/process_table.cc",
         "src/trace_processor/process_table.h",
@@ -424,6 +430,8 @@
         "src/trace_processor/fuchsia_trace_tokenizer.h",
         "src/trace_processor/fuchsia_trace_utils.cc",
         "src/trace_processor/fuchsia_trace_utils.h",
+        "src/trace_processor/heap_profile_tracker.cc",
+        "src/trace_processor/heap_profile_tracker.h",
         "src/trace_processor/instants_table.cc",
         "src/trace_processor/instants_table.h",
         "src/trace_processor/json_trace_parser.cc",
@@ -432,6 +440,9 @@
         "src/trace_processor/json_trace_tokenizer.h",
         "src/trace_processor/json_trace_utils.cc",
         "src/trace_processor/json_trace_utils.h",
+        "src/trace_processor/metrics/metrics.cc",
+        "src/trace_processor/metrics/metrics.h",
+        "src/trace_processor/metrics/sql_metrics.h",
         "src/trace_processor/null_term_string_view.h",
         "src/trace_processor/process_table.cc",
         "src/trace_processor/process_table.h",
@@ -626,6 +637,8 @@
         "src/trace_processor/fuchsia_trace_tokenizer.h",
         "src/trace_processor/fuchsia_trace_utils.cc",
         "src/trace_processor/fuchsia_trace_utils.h",
+        "src/trace_processor/heap_profile_tracker.cc",
+        "src/trace_processor/heap_profile_tracker.h",
         "src/trace_processor/instants_table.cc",
         "src/trace_processor/instants_table.h",
         "src/trace_processor/json_trace_parser.cc",
@@ -634,6 +647,9 @@
         "src/trace_processor/json_trace_tokenizer.h",
         "src/trace_processor/json_trace_utils.cc",
         "src/trace_processor/json_trace_utils.h",
+        "src/trace_processor/metrics/metrics.cc",
+        "src/trace_processor/metrics/metrics.h",
+        "src/trace_processor/metrics/sql_metrics.h",
         "src/trace_processor/null_term_string_view.h",
         "src/trace_processor/process_table.cc",
         "src/trace_processor/process_table.h",
@@ -756,22 +772,21 @@
 )
 
 gensignature(
-    name = "trace_processor_shell_sig",
+    name = "trace_processor_sig",
     srcs = [
         ":trace_processor_shell",
-    ],
-)
-
-gensignature(
-    name = "trace_to_text_sig",
-    srcs = [
         ":trace_to_text",
     ],
+    tags = [
+        "__TRACE_PROCESSOR_SIG_TAG1",
+        "__TRACE_PROCESSOR_SIG_TAG2",
+    ],
 )
 
 py_binary(
-    name = "gen_merged_sql_metrics_py"
+    name = "gen_merged_sql_metrics_py",
     srcs = [
-      "tools/gen_merged_sql_metrics"
-    ]
+        "tools/gen_merged_sql_metrics.py",
+    ],
+    main = "tools/gen_merged_sql_metrics.py",
 )
diff --git a/BUILD.extras b/BUILD.extras
index 1139c44..cda25fb 100644
--- a/BUILD.extras
+++ b/BUILD.extras
@@ -1,20 +1,19 @@
 gensignature(
-    name = "trace_processor_shell_sig",
+    name = "trace_processor_sig",
     srcs = [
         ":trace_processor_shell",
-    ],
-)
-
-gensignature(
-    name = "trace_to_text_sig",
-    srcs = [
         ":trace_to_text",
     ],
+    tags = [
+        "__TRACE_PROCESSOR_SIG_TAG1",
+        "__TRACE_PROCESSOR_SIG_TAG2",
+    ],
 )
 
 py_binary(
-    name = "gen_merged_sql_metrics_py"
+    name = "gen_merged_sql_metrics_py",
     srcs = [
-      "tools/gen_merged_sql_metrics"
-    ]
+        "tools/gen_merged_sql_metrics.py",
+    ],
+    main = "tools/gen_merged_sql_metrics.py",
 )
diff --git a/docs/heapprofd.md b/docs/heapprofd.md
index f4faa29..a34334b 100644
--- a/docs/heapprofd.md
+++ b/docs/heapprofd.md
@@ -150,12 +150,48 @@
 
 ### Profile is empty
 Check whether your target process is eligible to be profiled by consulting
-[Target process](#Target_process) above.
+[Target processes](#target-processes) above.
 
 ## Known Issues
 
 * Does not work on x86 platforms (including the Android cuttlefish emulator).
 
+## Ways to count memory
+
+When using heapprofd and interpreting results, it is important to know the
+precise meaning of the different memory metrics that can be obtained from the
+operating system.
+
+**heapprofd** gives you the number of bytes the target program
+requested from the allocator. If you are profiling a Java app from startup,
+allocations that happen early in the application's initialization will not be
+visibile to heapprofd. Native services that do not fork from the Zygote
+are not affected by this.
+
+**malloc\_info** is a libc function that gives you information about the
+allocator. This can be triggered on userdebug builds by using
+`am dumpheap -m <PID> /data/local/tmp/heap.txt`. This will in general be more
+than the memory seen by heapprofd, depending on the allocator not all memory
+is immediately freed. In particular, jemalloc retains some freed memory in
+thread caches.
+
+**Heap RSS** is the amount of memory requested from the operating system by the
+allocator. This is larger than the previous two numbers because memory can only
+be obtained in page size chunks, and fragmentation causes some of that memory to
+be wasted. This can be obtained by running `adb shell dumpsys meminfo <PID>` and
+looking at the "Private Dirty" column.
+
+|                     | heapprofd         | malloc\_info | RSS |
+|---------------------|-------------------|--------------|-----|
+| from native startup |          x        |      x       |  x  |
+| after zygote init   |          x        |      x       |  x  |
+| before zygote init  |                   |      x       |  x  |
+| thread caches       |                   |      x       |  x  |
+| fragmentation       |                   |              |  x  |
+
+If you observe high RSS or malloc\_info metrics but heapprofd does not match,
+there might be a problem with fragmentation or the allocator.
+
 ## Manual instructions
 *It is not recommended to use these instructions unless you have advanced
 requirements or are developing heapprofd. Proceed with caution*
diff --git a/include/perfetto/trace_processor/trace_processor.h b/include/perfetto/trace_processor/trace_processor.h
index e6751e8..13be4dd 100644
--- a/include/perfetto/trace_processor/trace_processor.h
+++ b/include/perfetto/trace_processor/trace_processor.h
@@ -95,7 +95,8 @@
 
   // Executes a SQLite query on the loaded portion of the trace. The returned
   // iterator can be used to load rows from the result.
-  virtual Iterator ExecuteQuery(const std::string& sql) = 0;
+  virtual Iterator ExecuteQuery(const std::string& sql,
+                                int64_t time_queued = 0) = 0;
 
   // Computes the given metrics on the loded portion of the trace. If
   // successful, the output argument |metrics_proto| will be filled with the
diff --git a/include/perfetto/tracing/core/shared_memory_arbiter.h b/include/perfetto/tracing/core/shared_memory_arbiter.h
index 5dc4c03..660014c 100644
--- a/include/perfetto/tracing/core/shared_memory_arbiter.h
+++ b/include/perfetto/tracing/core/shared_memory_arbiter.h
@@ -58,12 +58,12 @@
   //
   // All StartupTraceWriters created by the registry are bound to the arbiter
   // and the given target buffer. The writers may not be bound immediately if
-  // they are concurrently being written to. The registry will retry on the
-  // arbiter's TaskRunner until all writers were bound successfully.
+  // they are concurrently being written to or if this method isn't called on
+  // the arbiter's TaskRunner. The registry will retry on the arbiter's
+  // TaskRunner until all writers were bound successfully.
   //
-  // Should only be called on the passed TaskRunner's sequence. By calling this
-  // method, the registry's ownership is transferred to the arbiter. The arbiter
-  // will delete the registry once all writers were bound.
+  // By calling this method, the registry's ownership is transferred to the
+  // arbiter. The arbiter will delete the registry once all writers were bound.
   //
   // TODO(eseckler): Make target buffer assignment more flexible (i.e. per
   // writer). For now, embedders can use multiple registries instead.
diff --git a/include/perfetto/tracing/core/startup_trace_writer_registry.h b/include/perfetto/tracing/core/startup_trace_writer_registry.h
index db19862..df48c5f 100644
--- a/include/perfetto/tracing/core/startup_trace_writer_registry.h
+++ b/include/perfetto/tracing/core/startup_trace_writer_registry.h
@@ -91,8 +91,8 @@
   // concurrently being written to. The registry will retry on the passed
   // TaskRunner until all writers were bound successfully.
   //
-  // Calls |on_bound_callback| asynchronously on |trace_writer| once all writers
-  // were bound.
+  // Calls |on_bound_callback| asynchronously on the passed TaskRunner once all
+  // writers were bound.
   void BindToArbiter(
       SharedMemoryArbiterImpl*,
       BufferID target_buffer,
diff --git a/protos/perfetto/trace/interned_data/interned_data.proto b/protos/perfetto/trace/interned_data/interned_data.proto
index 266b061..4120e81 100644
--- a/protos/perfetto/trace/interned_data/interned_data.proto
+++ b/protos/perfetto/trace/interned_data/interned_data.proto
@@ -32,7 +32,7 @@
 // The writer will emit new entries when it encounters new internable values
 // that aren't yet in the index. Data in current and subsequent TracePackets can
 // then refer to the entry by its position (interning ID, abbreviated "iid") in
-// its index.
+// its index. An interning ID with value 0 is considered invalid (not set).
 //
 // Because of the incremental build-up, the interning index will miss data when
 // TracePackets are lost, e.g. because a chunk was overridden in the central
diff --git a/protos/perfetto/trace/perfetto_trace.proto b/protos/perfetto/trace/perfetto_trace.proto
index fc3c02f..2f333e2 100644
--- a/protos/perfetto/trace/perfetto_trace.proto
+++ b/protos/perfetto/trace/perfetto_trace.proto
@@ -2338,7 +2338,7 @@
 // The writer will emit new entries when it encounters new internable values
 // that aren't yet in the index. Data in current and subsequent TracePackets can
 // then refer to the entry by its position (interning ID, abbreviated "iid") in
-// its index.
+// its index. An interning ID with value 0 is considered invalid (not set).
 //
 // Because of the incremental build-up, the interning index will miss data when
 // TracePackets are lost, e.g. because a chunk was overridden in the central
diff --git a/src/base/string_utils.cc b/src/base/string_utils.cc
index 267952b..41df6b6 100644
--- a/src/base/string_utils.cc
+++ b/src/base/string_utils.cc
@@ -16,6 +16,8 @@
 
 #include "perfetto/base/string_utils.h"
 
+#include <algorithm>
+
 #include "perfetto/base/logging.h"
 
 namespace perfetto {
diff --git a/src/profiling/memory/bookkeeping.h b/src/profiling/memory/bookkeeping.h
index 1559459..3ce98df 100644
--- a/src/profiling/memory/bookkeeping.h
+++ b/src/profiling/memory/bookkeeping.h
@@ -208,6 +208,13 @@
     current_trace_packet = trace_writer->NewTracePacket();
     current_profile_packet = current_trace_packet->set_profile_packet();
     current_profile_packet->set_index((*next_index)++);
+
+    // Explicitly reserve intern ID 0 for the empty string, so unset string
+    // fields get mapped to this.
+    auto interned_string = current_profile_packet->add_strings();
+    constexpr const uint8_t kEmptyString[] = "";
+    interned_string->set_id(0);
+    interned_string->set_str(kEmptyString, 0);
   }
 
   void WriteMap(const Interned<Mapping> map);
diff --git a/src/profiling/memory/client.cc b/src/profiling/memory/client.cc
index b05eafa..18801e2 100644
--- a/src/profiling/memory/client.cc
+++ b/src/profiling/memory/client.cc
@@ -247,7 +247,7 @@
 bool Client::RecordMalloc(uint64_t alloc_size,
                           uint64_t total_size,
                           uint64_t alloc_address) {
-  if (getpid() != pid_at_creation_) {
+  if (PERFETTO_UNLIKELY(getpid() != pid_at_creation_)) {
     PERFETTO_LOG("Detected post-fork child situation, stopping profiling.");
     return false;
   }
@@ -306,7 +306,7 @@
 }
 
 bool Client::FlushFreesLocked() {
-  if (getpid() != pid_at_creation_) {
+  if (PERFETTO_UNLIKELY(getpid() != pid_at_creation_)) {
     PERFETTO_LOG("Detected post-fork child situation, stopping profiling.");
     return false;
   }
diff --git a/src/profiling/memory/shared_ring_buffer.cc b/src/profiling/memory/shared_ring_buffer.cc
index 93f075f..f7da664 100644
--- a/src/profiling/memory/shared_ring_buffer.cc
+++ b/src/profiling/memory/shared_ring_buffer.cc
@@ -56,7 +56,7 @@
   base::ScopedFile fd;
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
   bool is_memfd = false;
-  fd.reset(static_cast<int>(syscall(__NR_memfd_create, "heaprofd_ringbuf",
+  fd.reset(static_cast<int>(syscall(__NR_memfd_create, "heapprofd_ringbuf",
                                     MFD_CLOEXEC | MFD_ALLOW_SEALING)));
   is_memfd = !!fd;
 
diff --git a/src/trace_processor/BUILD.gn b/src/trace_processor/BUILD.gn
index 8bfa043..a1be89e 100644
--- a/src/trace_processor/BUILD.gn
+++ b/src/trace_processor/BUILD.gn
@@ -72,6 +72,8 @@
     "fuchsia_trace_tokenizer.h",
     "fuchsia_trace_utils.cc",
     "fuchsia_trace_utils.h",
+    "heap_profile_tracker.cc",
+    "heap_profile_tracker.h",
     "instants_table.cc",
     "instants_table.h",
     "null_term_string_view.h",
@@ -161,7 +163,7 @@
     "../../protos/perfetto/trace/track_event:zero",
     "../base",
     "../protozero",
-    "metrics:gen_merged_sql_metrics",
+    "metrics:lib",
   ]
   public_deps = [
     "../../include/perfetto/trace_processor",
@@ -219,6 +221,7 @@
     "event_tracker_unittest.cc",
     "filtered_row_index_unittest.cc",
     "ftrace_utils_unittest.cc",
+    "heap_profile_tracker_unittest.cc",
     "null_term_string_view_unittest.cc",
     "process_table_unittest.cc",
     "process_tracker_unittest.cc",
diff --git a/src/trace_processor/heap_profile_tracker.cc b/src/trace_processor/heap_profile_tracker.cc
new file mode 100644
index 0000000..9b9e776
--- /dev/null
+++ b/src/trace_processor/heap_profile_tracker.cc
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_processor/heap_profile_tracker.h"
+
+#include "src/trace_processor/trace_processor_context.h"
+
+#include "perfetto/base/logging.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+HeapProfileTracker::HeapProfileTracker(TraceProcessorContext* context)
+    : context_(context), empty_(context_->storage->InternString({"", 0})) {}
+
+HeapProfileTracker::~HeapProfileTracker() = default;
+
+void HeapProfileTracker::AddString(ProfileIndex pidx,
+                                   SourceStringId id,
+                                   StringId str) {
+  string_map_.emplace(std::make_pair(pidx, id), str);
+}
+
+void HeapProfileTracker::AddMapping(ProfileIndex pidx,
+                                    SourceMappingId id,
+                                    const SourceMapping& mapping) {
+  auto opt_name_id = FindString(pidx, mapping.name_id);
+  if (!opt_name_id)
+    return;
+  const StringId name_id = opt_name_id.value();
+
+  auto opt_build_id = FindString(pidx, mapping.build_id);
+  if (!opt_build_id)
+    return;
+  const StringId build_id = opt_build_id.value();
+
+  int64_t cur_row =
+      context_->storage->mutable_heap_profile_mappings()->FindOrInsert(
+          build_id, static_cast<int64_t>(mapping.offset),
+          static_cast<int64_t>(mapping.start),
+          static_cast<int64_t>(mapping.end),
+          static_cast<int64_t>(mapping.load_bias), name_id);
+  mappings_.emplace(std::make_pair(pidx, id), cur_row);
+}
+
+void HeapProfileTracker::AddFrame(ProfileIndex pidx,
+                                  SourceFrameId id,
+                                  const SourceFrame& frame) {
+  auto opt_str_id = FindString(pidx, frame.name_id);
+  if (!opt_str_id)
+    return;
+  const StringId& str_id = opt_str_id.value();
+
+  auto mapping_it = mappings_.find({pidx, frame.mapping_id});
+  if (mapping_it == mappings_.end()) {
+    context_->storage->IncrementStats(stats::heapprofd_invalid_mapping_id);
+    PERFETTO_DFATAL("Invalid mapping.");
+    return;
+  }
+  int64_t mapping_row = mapping_it->second;
+
+  int64_t cur_row =
+      context_->storage->mutable_heap_profile_frames()->FindOrInsert(
+          str_id, mapping_row, static_cast<int64_t>(frame.rel_pc));
+  frames_.emplace(std::make_pair(pidx, id), cur_row);
+}
+
+void HeapProfileTracker::AddCallstack(ProfileIndex pidx,
+                                      SourceCallstackId id,
+                                      const SourceCallstack& frame_ids) {
+  int64_t parent_id = 0;
+  for (size_t depth = 0; depth < frame_ids.size(); ++depth) {
+    std::vector<uint64_t> frame_subset = frame_ids;
+    frame_subset.resize(depth + 1);
+    auto self_it = callstacks_from_frames_.find({pidx, frame_subset});
+    if (self_it != callstacks_from_frames_.end()) {
+      parent_id = self_it->second;
+      continue;
+    }
+
+    uint64_t frame_id = frame_ids[depth];
+    auto it = frames_.find({pidx, frame_id});
+    if (it == frames_.end()) {
+      context_->storage->IncrementStats(stats::heapprofd_invalid_frame_id);
+      PERFETTO_DFATAL("Unknown frames.");
+      return;
+    }
+    int64_t frame_row = it->second;
+    int64_t self_id =
+        context_->storage->mutable_heap_profile_callsites()->FindOrInsert(
+            static_cast<int64_t>(depth), parent_id, frame_row);
+    parent_id = self_id;
+  }
+  callstacks_.emplace(std::make_pair(pidx, id), parent_id);
+}
+
+void HeapProfileTracker::AddAllocation(ProfileIndex pidx,
+                                       const SourceAllocation& alloc) {
+  auto it = callstacks_.find({pidx, alloc.callstack_id});
+  if (it == callstacks_.end()) {
+    context_->storage->IncrementStats(stats::heapprofd_invalid_callstack_id);
+    PERFETTO_DFATAL("Unknown callstack %" PRIu64 " : %zu", alloc.callstack_id,
+                    callstacks_.size());
+    return;
+  }
+  context_->storage->mutable_heap_profile_allocations()->Insert(
+      static_cast<int64_t>(alloc.timestamp), static_cast<int64_t>(alloc.pid),
+      static_cast<int64_t>(it->second), static_cast<int64_t>(alloc.alloc_count),
+      static_cast<int64_t>(alloc.self_allocated));
+  context_->storage->mutable_heap_profile_allocations()->Insert(
+      static_cast<int64_t>(alloc.timestamp), static_cast<int64_t>(alloc.pid),
+      static_cast<int64_t>(it->second), -static_cast<int64_t>(alloc.free_count),
+      -static_cast<int64_t>(alloc.self_freed));
+}
+
+void HeapProfileTracker::StoreAllocation(ProfileIndex pidx,
+                                         SourceAllocation alloc) {
+  pending_allocs_.emplace_back(pidx, std::move(alloc));
+}
+
+void HeapProfileTracker::ApplyAllAllocations() {
+  for (const auto& p : pending_allocs_)
+    AddAllocation(p.first, p.second);
+}
+
+int64_t HeapProfileTracker::GetDatabaseFrameIdForTesting(
+    ProfileIndex pidx,
+    SourceFrameId frame_id) {
+  auto it = frames_.find({pidx, frame_id});
+  if (it == frames_.end()) {
+    PERFETTO_DFATAL("Invalid frame.");
+    return -1;
+  }
+  return it->second;
+}
+
+base::Optional<StringId> HeapProfileTracker::FindString(ProfileIndex pidx,
+                                                        SourceStringId id) {
+  base::Optional<StringId> res;
+  if (id == 0) {
+    res = empty_;
+    return res;
+  }
+
+  auto it = string_map_.find({pidx, id});
+  if (it == string_map_.end()) {
+    context_->storage->IncrementStats(stats::heapprofd_invalid_string_id);
+    PERFETTO_DFATAL("Invalid string.");
+    return res;
+  }
+  res = it->second;
+  return res;
+}
+
+}  // namespace trace_processor
+}  // namespace perfetto
diff --git a/src/trace_processor/heap_profile_tracker.h b/src/trace_processor/heap_profile_tracker.h
new file mode 100644
index 0000000..f1ce455
--- /dev/null
+++ b/src/trace_processor/heap_profile_tracker.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_HEAP_PROFILE_TRACKER_H_
+#define SRC_TRACE_PROCESSOR_HEAP_PROFILE_TRACKER_H_
+
+#include <deque>
+
+#include "perfetto/trace/profiling/profile_packet.pbzero.h"
+#include "src/trace_processor/trace_storage.h"
+
+namespace std {
+
+template <>
+struct hash<std::pair<uint64_t, uint64_t>> {
+  using argument_type = std::pair<uint64_t, uint64_t>;
+  using result_type = size_t;
+
+  result_type operator()(const argument_type& p) const {
+    return std::hash<uint64_t>{}(p.first) ^ std::hash<uint64_t>{}(p.second);
+  }
+};
+
+template <>
+struct hash<std::pair<uint64_t, std::vector<uint64_t>>> {
+  using argument_type = std::pair<uint64_t, std::vector<uint64_t>>;
+  using result_type = size_t;
+
+  result_type operator()(const argument_type& p) const {
+    auto h = std::hash<uint64_t>{}(p.first);
+    for (auto v : p.second)
+      h = h ^ std::hash<uint64_t>{}(v);
+    return h;
+  }
+};
+
+}  // namespace std
+namespace perfetto {
+namespace trace_processor {
+
+class TraceProcessorContext;
+
+class HeapProfileTracker {
+ public:
+  // Not the same as ProfilePacket.index. This gets only gets incremented when
+  // encountering a ProfilePacket that is not continued.
+  // This namespaces all other Source*Ids.
+  using ProfileIndex = uint64_t;
+
+  using SourceStringId = uint64_t;
+
+  struct SourceMapping {
+    SourceStringId build_id = 0;
+    uint64_t offset = 0;
+    uint64_t start = 0;
+    uint64_t end = 0;
+    uint64_t load_bias = 0;
+    SourceStringId name_id = 0;
+  };
+  using SourceMappingId = uint64_t;
+
+  struct SourceFrame {
+    SourceStringId name_id = 0;
+    SourceMappingId mapping_id = 0;
+    uint64_t rel_pc = 0;
+  };
+  using SourceFrameId = uint64_t;
+
+  using SourceCallstack = std::vector<SourceFrameId>;
+  using SourceCallstackId = uint64_t;
+
+  struct SourceAllocation {
+    uint64_t pid = 0;
+    uint64_t timestamp = 0;
+    SourceCallstackId callstack_id = 0;
+    uint64_t self_allocated = 0;
+    uint64_t self_freed = 0;
+    uint64_t alloc_count = 0;
+    uint64_t free_count = 0;
+  };
+
+  explicit HeapProfileTracker(TraceProcessorContext* context);
+
+  void AddString(ProfileIndex, SourceStringId, StringId);
+  void AddMapping(ProfileIndex, SourceMappingId, const SourceMapping&);
+  void AddFrame(ProfileIndex, SourceFrameId, const SourceFrame&);
+  void AddCallstack(ProfileIndex, SourceCallstackId, const SourceCallstack&);
+
+  void StoreAllocation(ProfileIndex, SourceAllocation);
+  void ApplyAllAllocations();
+
+  int64_t GetDatabaseFrameIdForTesting(ProfileIndex, SourceFrameId);
+
+  ~HeapProfileTracker();
+
+ private:
+  void AddAllocation(ProfileIndex, const SourceAllocation&);
+
+  base::Optional<StringId> FindString(ProfileIndex, SourceStringId);
+
+  std::unordered_map<std::pair<ProfileIndex, SourceStringId>, StringId>
+      string_map_;
+  std::unordered_map<std::pair<ProfileIndex, SourceMappingId>, int64_t>
+      mappings_;
+  std::unordered_map<std::pair<ProfileIndex, SourceFrameId>, int64_t> frames_;
+  std::unordered_map<std::pair<ProfileIndex, SourceCallstack>, int64_t>
+      callstacks_from_frames_;
+  std::unordered_map<std::pair<ProfileIndex, SourceCallstackId>, int64_t>
+      callstacks_;
+
+  std::vector<std::pair<ProfileIndex, SourceAllocation>> pending_allocs_;
+
+  TraceProcessorContext* const context_;
+  const StringId empty_;
+};
+
+}  // namespace trace_processor
+}  // namespace perfetto
+
+#endif  // SRC_TRACE_PROCESSOR_HEAP_PROFILE_TRACKER_H_
diff --git a/src/trace_processor/heap_profile_tracker_unittest.cc b/src/trace_processor/heap_profile_tracker_unittest.cc
new file mode 100644
index 0000000..3e095be
--- /dev/null
+++ b/src/trace_processor/heap_profile_tracker_unittest.cc
@@ -0,0 +1,311 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_processor/heap_profile_tracker.h"
+
+#include "src/trace_processor/trace_processor_context.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace perfetto {
+namespace trace_processor {
+namespace {
+
+constexpr auto kFirstPacket = 0;
+constexpr auto kFirstPacketMappingNameId = 1;
+constexpr auto kFirstPacketBuildId = 2;
+constexpr auto kFirstPacketFrameNameId = 3;
+
+constexpr auto kFirstPacketMappingId = 1;
+constexpr auto kFirstPacketFrameId = 1;
+
+constexpr auto kSecondPacket = 1;
+constexpr auto kSecondPacketMappingNameId = 3;
+constexpr auto kSecondPacketBuildId = 2;
+constexpr auto kSecondPacketFrameNameId = 1;
+
+constexpr auto kSecondPacketFrameId = 2;
+constexpr auto kSecondPacketMappingId = 2;
+
+constexpr auto kMappingOffset = 123;
+constexpr auto kMappingStart = 234;
+constexpr auto kMappingEnd = 345;
+constexpr auto kMappingLoadBias = 456;
+
+static constexpr auto kFrameRelPc = 567;
+
+using ::testing::ElementsAre;
+
+class HeapProfileTrackerDupTest : public ::testing::Test {
+ public:
+  HeapProfileTrackerDupTest() {
+    context.storage.reset(new TraceStorage());
+    context.heap_profile_tracker.reset(new HeapProfileTracker(&context));
+
+    mapping_name = context.storage->InternString("[mapping]");
+    build = context.storage->InternString("[build id]");
+    frame_name = context.storage->InternString("[frame]");
+  }
+
+ protected:
+  void InsertMapping() {
+    context.heap_profile_tracker->AddString(
+        kFirstPacket, kFirstPacketMappingNameId, mapping_name);
+    context.heap_profile_tracker->AddString(
+        kSecondPacket, kSecondPacketMappingNameId, mapping_name);
+
+    context.heap_profile_tracker->AddString(kFirstPacket, kFirstPacketBuildId,
+                                            build);
+    context.heap_profile_tracker->AddString(kSecondPacket, kSecondPacketBuildId,
+                                            build);
+
+    HeapProfileTracker::SourceMapping first_frame;
+    first_frame.build_id = kFirstPacketBuildId;
+    first_frame.offset = kMappingOffset;
+    first_frame.start = kMappingStart;
+    first_frame.end = kMappingEnd;
+    first_frame.load_bias = kMappingLoadBias;
+    first_frame.name_id = kFirstPacketMappingNameId;
+
+    HeapProfileTracker::SourceMapping second_frame;
+    second_frame.build_id = kSecondPacketBuildId;
+    second_frame.offset = kMappingOffset;
+    second_frame.start = kMappingStart;
+    second_frame.end = kMappingEnd;
+    second_frame.load_bias = kMappingLoadBias;
+    second_frame.name_id = kSecondPacketMappingNameId;
+
+    context.heap_profile_tracker->AddMapping(
+        kFirstPacket, kFirstPacketMappingId, first_frame);
+    context.heap_profile_tracker->AddMapping(
+        kSecondPacket, kSecondPacketMappingId, second_frame);
+  }
+
+  void InsertFrame() {
+    InsertMapping();
+    context.heap_profile_tracker->AddString(
+        kFirstPacket, kFirstPacketFrameNameId, frame_name);
+    context.heap_profile_tracker->AddString(
+        kSecondPacket, kSecondPacketFrameNameId, frame_name);
+
+    HeapProfileTracker::SourceFrame first_frame;
+    first_frame.name_id = kFirstPacketFrameNameId;
+    first_frame.mapping_id = kFirstPacketMappingId;
+    first_frame.rel_pc = kFrameRelPc;
+
+    HeapProfileTracker::SourceFrame second_frame;
+    second_frame.name_id = kSecondPacketFrameNameId;
+    second_frame.mapping_id = kSecondPacketMappingId;
+    second_frame.rel_pc = kFrameRelPc;
+
+    context.heap_profile_tracker->AddFrame(kFirstPacket, kFirstPacketFrameId,
+                                           first_frame);
+    context.heap_profile_tracker->AddFrame(kSecondPacket, kSecondPacketFrameId,
+                                           second_frame);
+  }
+
+  void InsertCallsite() {
+    InsertFrame();
+
+    HeapProfileTracker::SourceCallstack first_callsite = {kFirstPacketFrameId,
+                                                          kFirstPacketFrameId};
+    HeapProfileTracker::SourceCallstack second_callsite = {
+        kSecondPacketFrameId, kSecondPacketFrameId};
+
+    context.heap_profile_tracker->AddCallstack(kFirstPacket, 0, first_callsite);
+    context.heap_profile_tracker->AddCallstack(kSecondPacket, 0,
+                                               second_callsite);
+  }
+
+  StringId mapping_name;
+  StringId build;
+  StringId frame_name;
+  TraceProcessorContext context;
+};
+
+// Insert the same mapping from two different packets, with different strings
+// interned, and assert we only store one.
+TEST_F(HeapProfileTrackerDupTest, Mapping) {
+  InsertMapping();
+
+  EXPECT_THAT(context.storage->heap_profile_mappings().build_ids(),
+              ElementsAre(build));
+  EXPECT_THAT(context.storage->heap_profile_mappings().offsets(),
+              ElementsAre(kMappingOffset));
+  EXPECT_THAT(context.storage->heap_profile_mappings().starts(),
+              ElementsAre(kMappingStart));
+  EXPECT_THAT(context.storage->heap_profile_mappings().ends(),
+              ElementsAre(kMappingEnd));
+  EXPECT_THAT(context.storage->heap_profile_mappings().load_biases(),
+              ElementsAre(kMappingLoadBias));
+  EXPECT_THAT(context.storage->heap_profile_mappings().names(),
+              ElementsAre(mapping_name));
+}
+
+// Insert the same mapping from two different packets, with different strings
+// interned, and assert we only store one.
+TEST_F(HeapProfileTrackerDupTest, Frame) {
+  InsertFrame();
+
+  EXPECT_THAT(context.storage->heap_profile_frames().names(),
+              ElementsAre(frame_name));
+  EXPECT_THAT(context.storage->heap_profile_frames().mappings(),
+              ElementsAre(0));
+  EXPECT_THAT(context.storage->heap_profile_frames().rel_pcs(),
+              ElementsAre(kFrameRelPc));
+}
+
+// Insert the same callstack from two different packets, assert it is only
+// stored once.
+TEST_F(HeapProfileTrackerDupTest, Callstack) {
+  InsertCallsite();
+
+  EXPECT_THAT(context.storage->heap_profile_callsites().frame_depths(),
+              ElementsAre(0, 1));
+  EXPECT_THAT(context.storage->heap_profile_callsites().parent_callsite_ids(),
+              ElementsAre(0, 0));
+  EXPECT_THAT(context.storage->heap_profile_callsites().frame_ids(),
+              ElementsAre(0, 0));
+}
+
+int64_t FindCallstack(const TraceStorage& storage,
+                      int64_t depth,
+                      int64_t parent,
+                      int64_t frame_id) {
+  const auto& callsites = storage.heap_profile_callsites();
+  for (size_t i = 0; i < callsites.frame_depths().size(); ++i) {
+    if (callsites.frame_depths()[i] == depth &&
+        callsites.parent_callsite_ids()[i] == parent &&
+        callsites.frame_ids()[i] == frame_id) {
+      return static_cast<int64_t>(i);
+    }
+  }
+  return -1;
+}
+
+// Insert multiple mappings, frames and callstacks and check result.
+TEST(HeapProfileTrackerTest, Functional) {
+  TraceProcessorContext context;
+  context.storage.reset(new TraceStorage());
+  context.heap_profile_tracker.reset(new HeapProfileTracker(&context));
+
+  HeapProfileTracker* hpt = context.heap_profile_tracker.get();
+
+  constexpr auto kPacket = 0;
+  uint64_t next_string_intern_id = 1;
+
+  const std::string build_ids[] = {"build1", "build2", "build3"};
+  uint64_t build_id_ids[base::ArraySize(build_ids)];
+  for (size_t i = 0; i < base::ArraySize(build_ids); ++i)
+    build_id_ids[i] = next_string_intern_id++;
+
+  const std::string mapping_names[] = {"map1", "map2", "map3"};
+  uint64_t mapping_name_ids[base::ArraySize(mapping_names)];
+  for (size_t i = 0; i < base::ArraySize(mapping_names); ++i)
+    mapping_name_ids[i] = next_string_intern_id++;
+
+  HeapProfileTracker::SourceMapping mappings[base::ArraySize(mapping_names)] =
+      {};
+  mappings[0].build_id = build_id_ids[0];
+  mappings[0].offset = 1;
+  mappings[0].start = 2;
+  mappings[0].end = 3;
+  mappings[0].load_bias = 0;
+  mappings[0].name_id = mapping_name_ids[0];
+
+  mappings[1].build_id = build_id_ids[1];
+  mappings[1].offset = 1;
+  mappings[1].start = 2;
+  mappings[1].end = 3;
+  mappings[1].load_bias = 1;
+  mappings[1].name_id = mapping_name_ids[1];
+
+  mappings[2].build_id = build_id_ids[2];
+  mappings[2].offset = 1;
+  mappings[2].start = 2;
+  mappings[2].end = 3;
+  mappings[2].load_bias = 2;
+  mappings[2].name_id = mapping_name_ids[2];
+
+  const std::string function_names[] = {"fun1", "fun2", "fun3", "fun4"};
+  uint64_t function_name_ids[base::ArraySize(function_names)];
+  for (size_t i = 0; i < base::ArraySize(function_names); ++i)
+    function_name_ids[i] = next_string_intern_id++;
+
+  HeapProfileTracker::SourceFrame frames[base::ArraySize(function_names)];
+  frames[0].name_id = function_name_ids[0];
+  frames[0].mapping_id = 0;
+  frames[0].rel_pc = 123;
+
+  frames[1].name_id = function_name_ids[1];
+  frames[1].mapping_id = 0;
+  frames[1].rel_pc = 123;
+
+  frames[2].name_id = function_name_ids[2];
+  frames[2].mapping_id = 1;
+  frames[2].rel_pc = 123;
+
+  frames[3].name_id = function_name_ids[3];
+  frames[3].mapping_id = 2;
+  frames[3].rel_pc = 123;
+
+  HeapProfileTracker::SourceCallstack callstacks[3];
+  callstacks[0] = {2, 1, 0};
+  callstacks[1] = {2, 1, 0, 1, 0};
+  callstacks[2] = {0, 2, 0, 1, 2};
+
+  for (size_t i = 0; i < base::ArraySize(build_ids); ++i) {
+    auto interned = context.storage->InternString(
+        {build_ids[i].data(), build_ids[i].size()});
+    hpt->AddString(kPacket, build_id_ids[i], interned);
+  }
+  for (size_t i = 0; i < base::ArraySize(mapping_names); ++i) {
+    auto interned = context.storage->InternString(
+        {mapping_names[i].data(), mapping_names[i].size()});
+    hpt->AddString(kPacket, mapping_name_ids[i], interned);
+  }
+  for (size_t i = 0; i < base::ArraySize(function_names); ++i) {
+    auto interned = context.storage->InternString(
+        {function_names[i].data(), function_names[i].size()});
+    hpt->AddString(kPacket, function_name_ids[i], interned);
+  }
+
+  for (size_t i = 0; i < base::ArraySize(mappings); ++i)
+    hpt->AddMapping(kPacket, i, mappings[i]);
+  for (size_t i = 0; i < base::ArraySize(frames); ++i)
+    hpt->AddFrame(kPacket, i, frames[i]);
+  for (size_t i = 0; i < base::ArraySize(callstacks); ++i)
+    hpt->AddCallstack(kPacket, i, callstacks[i]);
+
+  for (size_t i = 0; i < base::ArraySize(callstacks); ++i) {
+    int64_t parent = 0;
+    const HeapProfileTracker::SourceCallstack& callstack = callstacks[i];
+    for (size_t depth = 0; depth < callstack.size(); ++depth) {
+      auto frame_id =
+          hpt->GetDatabaseFrameIdForTesting(kPacket, callstack[depth]);
+      ASSERT_NE(frame_id, -1);
+      int64_t self = FindCallstack(
+          *context.storage, static_cast<int64_t>(depth), parent, frame_id);
+      ASSERT_NE(self, -1);
+      parent = self;
+    }
+  }
+}
+
+}  // namespace
+}  // namespace trace_processor
+}  // namespace perfetto
diff --git a/src/trace_processor/metrics/BUILD.gn b/src/trace_processor/metrics/BUILD.gn
index 779cdf7..1f0fdbf 100644
--- a/src/trace_processor/metrics/BUILD.gn
+++ b/src/trace_processor/metrics/BUILD.gn
@@ -14,14 +14,17 @@
 
 import("../../../gn/perfetto.gni")
 
-sql_files = [ "android/android_mem.sql" ]
+sql_files = [
+  "android/android_mem.sql",
+  "android/android_mem_lmk.sql",
+]
 
 config("gen_config") {
   include_dirs = [ "${root_gen_dir}/${perfetto_root_path}" ]
 }
 
 action("gen_merged_sql_metrics") {
-  script = "../../../tools/gen_merged_sql_metrics"
+  script = "../../../tools/gen_merged_sql_metrics.py"
   generated_header = "${target_gen_dir}/sql_metrics.h"
   args = rebase_path(sql_files, root_build_dir) + [
            "--cpp_out",
@@ -33,3 +36,19 @@
   ]
   public_configs = [ ":gen_config" ]
 }
+
+source_set("lib") {
+  sources = [
+    "metrics.cc",
+    "metrics.h",
+  ]
+  deps = [
+    ":gen_merged_sql_metrics",
+    "../../../buildtools:sqlite",
+    "../../../gn:default_deps",
+    "../../../include/perfetto/trace_processor",
+    "../../../protos/perfetto/metrics:zero",
+    "../../../protos/perfetto/metrics/android:zero",
+    "../../protozero:protozero",
+  ]
+}
diff --git a/src/trace_processor/metrics/android/android_mem.sql b/src/trace_processor/metrics/android/android_mem.sql
index 80daf9c..7646381 100644
--- a/src/trace_processor/metrics/android/android_mem.sql
+++ b/src/trace_processor/metrics/android/android_mem.sql
@@ -15,28 +15,4 @@
 --
 
 -- Create all the views used to generate the Android Memory metrics proto.
-CREATE TABLE last_oom_adj(upid BIG INT PRIMARY KEY, ts BIG INT, score INT);
-
-INSERT INTO last_oom_adj
-SELECT upid, ts, score
-FROM (
-  SELECT ref AS upid,
-         ts,
-         CAST(value AS INT) AS score,
-         row_number() OVER (PARTITION BY counter_id ORDER BY ts DESC) AS rank
-  FROM counter_definitions JOIN counter_values USING(counter_id)
-  WHERE name = 'oom_score_adj'
-  AND ref_type = 'upid')
-WHERE rank = 1;
-
-CREATE VIEW lmk_events AS
-SELECT ref AS upid
-FROM instants
-WHERE name = 'mem.lmk' AND ref_type = 'upid';
-
-CREATE VIEW lmk_by_score AS
-SELECT process.name, last_oom_adj.score
-FROM lmk_events
-LEFT JOIN process ON lmk_events.upid = process.upid
-LEFT JOIN last_oom_adj ON lmk_events.upid = last_oom_adj.upid
-ORDER BY lmk_events.upid;
+SELECT RUN_METRIC('android_mem_lmk.sql');
diff --git a/src/trace_processor/metrics/android/android_mem_lmk.sql b/src/trace_processor/metrics/android/android_mem_lmk.sql
new file mode 100644
index 0000000..e24f736
--- /dev/null
+++ b/src/trace_processor/metrics/android/android_mem_lmk.sql
@@ -0,0 +1,42 @@
+--
+-- Copyright 2019 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+-- Create all the views used to for LMK related stuff.
+CREATE TABLE last_oom_adj(upid BIG INT PRIMARY KEY, ts BIG INT, score INT);
+
+INSERT INTO last_oom_adj
+SELECT upid, ts, score
+FROM (
+  SELECT ref AS upid,
+         ts,
+         CAST(value AS INT) AS score,
+         row_number() OVER (PARTITION BY counter_id ORDER BY ts DESC) AS rank
+  FROM counter_definitions JOIN counter_values USING(counter_id)
+  WHERE name = 'oom_score_adj'
+  AND ref_type = 'upid')
+WHERE rank = 1;
+
+CREATE VIEW lmk_events AS
+SELECT ref AS upid
+FROM instants
+WHERE name = 'mem.lmk' AND ref_type = 'upid';
+
+CREATE VIEW lmk_by_score AS
+SELECT process.name, last_oom_adj.score
+FROM lmk_events
+LEFT JOIN process ON lmk_events.upid = process.upid
+LEFT JOIN last_oom_adj ON lmk_events.upid = last_oom_adj.upid
+ORDER BY lmk_events.upid;
diff --git a/src/trace_processor/metrics/metrics.cc b/src/trace_processor/metrics/metrics.cc
new file mode 100644
index 0000000..48a6f19
--- /dev/null
+++ b/src/trace_processor/metrics/metrics.cc
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_processor/metrics/metrics.h"
+
+#include "perfetto/base/string_utils.h"
+#include "perfetto/metrics/android/mem_metric.pbzero.h"
+#include "perfetto/metrics/metrics.pbzero.h"
+#include "perfetto/protozero/scattered_heap_buffer.h"
+#include "src/trace_processor/metrics/sql_metrics.h"
+
+namespace perfetto {
+namespace trace_processor {
+namespace metrics {
+
+void RunMetric(sqlite3_context* ctx, int argc, sqlite3_value** argv) {
+  auto* tp = static_cast<TraceProcessor*>(sqlite3_user_data(ctx));
+  if (argc == 0 || sqlite3_value_type(argv[0]) != SQLITE_TEXT) {
+    sqlite3_result_error(ctx, "Invalid call to RUN_METRIC", -1);
+    return;
+  }
+
+  const char* filename =
+      reinterpret_cast<const char*>(sqlite3_value_text(argv[0]));
+  const char* sql = sql_metrics::GetBundledMetric(filename);
+  if (!sql) {
+    sqlite3_result_error(ctx, "Unknown filename provided to RUN_METRIC", -1);
+    return;
+  }
+
+  for (const auto& query : base::SplitString(sql, ";\n\n")) {
+    PERFETTO_DLOG("Executing query in RUN_METRIC: %s", query.c_str());
+
+    auto it = tp->ExecuteQuery(query);
+    if (auto opt_error = it.GetLastError()) {
+      sqlite3_result_error(ctx, "Error when running RUN_METRIC file", -1);
+      return;
+    } else if (it.Next()) {
+      sqlite3_result_error(
+          ctx, "RUN_METRIC functions should not produce any output", -1);
+      return;
+    }
+  }
+}
+
+int ComputeMetrics(TraceProcessor* tp,
+                   const std::vector<std::string>& metric_names,
+                   std::vector<uint8_t>* metrics_proto) {
+  // TODO(lalitm): stop hardcoding android.mem metric and read the proto
+  // descriptor for this logic instead.
+  if (metric_names.size() != 1 || metric_names[0] != "android.mem") {
+    PERFETTO_ELOG("Only android.mem metric is currently supported");
+    return 1;
+  }
+
+  auto queries = base::SplitString(sql_metrics::kAndroidMem, ";\n\n");
+  for (const auto& query : queries) {
+    PERFETTO_DLOG("Executing query: %s", query.c_str());
+    auto prep_it = tp->ExecuteQuery(query);
+    auto prep_has_next = prep_it.Next();
+    if (auto opt_error = prep_it.GetLastError()) {
+      PERFETTO_ELOG("SQLite error: %s", opt_error->c_str());
+      return 1;
+    }
+    PERFETTO_DCHECK(!prep_has_next);
+  }
+
+  protozero::ScatteredHeapBuffer delegate;
+  protozero::ScatteredStreamWriter writer(&delegate);
+  delegate.set_writer(&writer);
+
+  protos::pbzero::TraceMetrics metrics;
+  metrics.Reset(&writer);
+
+  // TODO(lalitm): all the below is temporary hardcoded queries and proto
+  // filling to ensure that the code above works.
+  auto it = tp->ExecuteQuery("SELECT COUNT(*) from lmk_by_score;");
+  auto has_next = it.Next();
+  if (auto opt_error = it.GetLastError()) {
+    PERFETTO_ELOG("SQLite error: %s", opt_error->c_str());
+    return 1;
+  }
+  PERFETTO_CHECK(has_next);
+  PERFETTO_CHECK(it.Get(0).type == SqlValue::Type::kLong);
+
+  auto* memory = metrics.set_android_mem();
+  memory->set_system_metrics()->set_lmks()->set_total_count(
+      static_cast<int32_t>(it.Get(0).long_value));
+  metrics.Finalize();
+
+  *metrics_proto = delegate.StitchSlices();
+
+  has_next = it.Next();
+  PERFETTO_DCHECK(!has_next);
+  return 0;
+}
+
+}  // namespace metrics
+}  // namespace trace_processor
+}  // namespace perfetto
diff --git a/src/trace_processor/metrics/metrics.h b/src/trace_processor/metrics/metrics.h
new file mode 100644
index 0000000..324d436
--- /dev/null
+++ b/src/trace_processor/metrics/metrics.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_METRICS_METRICS_H_
+#define SRC_TRACE_PROCESSOR_METRICS_METRICS_H_
+
+#include <sqlite3.h>
+#include <vector>
+
+#include "perfetto/trace_processor/trace_processor.h"
+
+namespace perfetto {
+namespace trace_processor {
+namespace metrics {
+
+// This function implements the RUN_METRIC SQL function.
+void RunMetric(sqlite3_context* ctx, int argc, sqlite3_value** argv);
+
+int ComputeMetrics(TraceProcessor* impl,
+                   const std::vector<std::string>& metric_names,
+                   std::vector<uint8_t>* metrics_proto);
+
+}  // namespace metrics
+}  // namespace trace_processor
+}  // namespace perfetto
+
+#endif  // SRC_TRACE_PROCESSOR_METRICS_METRICS_H_
diff --git a/src/trace_processor/proto_trace_parser.cc b/src/trace_processor/proto_trace_parser.cc
index e345787..e52e2ab 100644
--- a/src/trace_processor/proto_trace_parser.cc
+++ b/src/trace_processor/proto_trace_parser.cc
@@ -31,6 +31,7 @@
 #include "src/trace_processor/clock_tracker.h"
 #include "src/trace_processor/event_tracker.h"
 #include "src/trace_processor/ftrace_descriptors.h"
+#include "src/trace_processor/heap_profile_tracker.h"
 #include "src/trace_processor/process_tracker.h"
 #include "src/trace_processor/slice_tracker.h"
 #include "src/trace_processor/syscall_tracker.h"
@@ -1280,14 +1281,90 @@
 }
 
 void ProtoTraceParser::ParseProfilePacket(ConstBytes blob) {
+  uint64_t index = 0;
   protos::pbzero::ProfilePacket::Decoder packet(blob.data, blob.size);
+
   for (auto it = packet.strings(); it; ++it) {
     protos::pbzero::ProfilePacket::InternedString::Decoder entry(it->data(),
                                                                  it->size());
 
     const char* str = reinterpret_cast<const char*>(entry.str().data);
-    context_->storage->InternString(base::StringView(str, entry.str().size));
+    auto str_id = context_->storage->InternString(
+        base::StringView(str, entry.str().size));
+    context_->heap_profile_tracker->AddString(index, entry.id(), str_id);
   }
+
+  for (auto it = packet.mappings(); it; ++it) {
+    protos::pbzero::ProfilePacket::Mapping::Decoder entry(it->data(),
+                                                          it->size());
+    HeapProfileTracker::SourceMapping src_mapping;
+    src_mapping.build_id = entry.build_id();
+    src_mapping.offset = entry.offset();
+    src_mapping.start = entry.start();
+    src_mapping.end = entry.end();
+    src_mapping.load_bias = entry.load_bias();
+    src_mapping.name_id = 0;
+    for (auto path_string_id_it = entry.path_string_ids(); path_string_id_it;
+         ++path_string_id_it)
+      src_mapping.name_id = path_string_id_it->as_uint64();
+    context_->heap_profile_tracker->AddMapping(index, entry.id(), src_mapping);
+  }
+
+  for (auto it = packet.frames(); it; ++it) {
+    protos::pbzero::ProfilePacket::Frame::Decoder entry(it->data(), it->size());
+    HeapProfileTracker::SourceFrame src_frame;
+    src_frame.name_id = entry.function_name_id();
+    src_frame.mapping_id = entry.mapping_id();
+    src_frame.rel_pc = entry.rel_pc();
+
+    context_->heap_profile_tracker->AddFrame(index, entry.id(), src_frame);
+  }
+
+  for (auto it = packet.callstacks(); it; ++it) {
+    protos::pbzero::ProfilePacket::Callstack::Decoder entry(it->data(),
+                                                            it->size());
+    HeapProfileTracker::SourceCallstack src_callstack;
+    for (auto frame_it = entry.frame_ids(); frame_it; ++frame_it)
+      src_callstack.emplace_back(frame_it->as_uint64());
+
+    context_->heap_profile_tracker->AddCallstack(index, entry.id(),
+                                                 src_callstack);
+  }
+
+  for (auto it = packet.process_dumps(); it; ++it) {
+    protos::pbzero::ProfilePacket::ProcessHeapSamples::Decoder entry(
+        it->data(), it->size());
+
+    int pid = static_cast<int>(entry.pid());
+
+    if (entry.buffer_corrupted())
+      context_->storage->IncrementIndexedStats(
+          stats::heapprofd_buffer_corrupted, pid);
+    if (entry.buffer_overran())
+      context_->storage->IncrementIndexedStats(stats::heapprofd_buffer_overran,
+                                               pid);
+    if (entry.rejected_concurrent())
+      context_->storage->IncrementIndexedStats(
+          stats::heapprofd_rejected_concurrent, pid);
+
+    for (auto sample_it = entry.samples(); sample_it; ++sample_it) {
+      protos::pbzero::ProfilePacket::HeapSample::Decoder sample(
+          sample_it->data(), sample_it->size());
+
+      HeapProfileTracker::SourceAllocation src_allocation;
+      src_allocation.pid = entry.pid();
+      src_allocation.timestamp = sample.timestamp();
+      src_allocation.callstack_id = sample.callstack_id();
+      src_allocation.self_allocated = sample.self_allocated();
+      src_allocation.self_freed = sample.self_freed();
+      src_allocation.alloc_count = sample.alloc_count();
+      src_allocation.free_count = sample.free_count();
+
+      context_->heap_profile_tracker->StoreAllocation(index, src_allocation);
+    }
+  }
+  if (!packet.continued())
+    index++;
 }
 
 void ProtoTraceParser::ParseSystemInfo(ConstBytes blob) {
diff --git a/src/trace_processor/sql_stats_table.cc b/src/trace_processor/sql_stats_table.cc
index d9bef3d..25e4a17 100644
--- a/src/trace_processor/sql_stats_table.cc
+++ b/src/trace_processor/sql_stats_table.cc
@@ -41,6 +41,8 @@
           Table::Column(Column::kQuery, "query", ColumnType::kString),
           Table::Column(Column::kTimeQueued, "queued", ColumnType::kLong),
           Table::Column(Column::kTimeStarted, "started", ColumnType::kLong),
+          Table::Column(Column::kTimeFirstNext, "first_next",
+                        ColumnType::kLong),
           Table::Column(Column::kTimeEnded, "ended", ColumnType::kLong),
       },
       {Column::kTimeQueued});
@@ -83,16 +85,16 @@
                           sqlite_utils::kSqliteStatic);
       break;
     case Column::kTimeQueued:
-      sqlite3_result_int64(context,
-                           static_cast<int64_t>(stats.times_queued()[row_]));
+      sqlite3_result_int64(context, stats.times_queued()[row_]);
       break;
     case Column::kTimeStarted:
-      sqlite3_result_int64(context,
-                           static_cast<int64_t>(stats.times_started()[row_]));
+      sqlite3_result_int64(context, stats.times_started()[row_]);
+      break;
+    case Column::kTimeFirstNext:
+      sqlite3_result_int64(context, stats.times_first_next()[row_]);
       break;
     case Column::kTimeEnded:
-      sqlite3_result_int64(context,
-                           static_cast<int64_t>(stats.times_ended()[row_]));
+      sqlite3_result_int64(context, stats.times_ended()[row_]);
       break;
   }
   return SQLITE_OK;
diff --git a/src/trace_processor/sql_stats_table.h b/src/trace_processor/sql_stats_table.h
index 4544ed4..ebbe120 100644
--- a/src/trace_processor/sql_stats_table.h
+++ b/src/trace_processor/sql_stats_table.h
@@ -36,7 +36,8 @@
     kQuery = 0,
     kTimeQueued = 1,
     kTimeStarted = 2,
-    kTimeEnded = 3,
+    kTimeFirstNext = 3,
+    kTimeEnded = 4,
   };
 
   // Implementation of the SQLite cursor interface.
diff --git a/src/trace_processor/stats.h b/src/trace_processor/stats.h
index 66ce84d..8443c7f 100644
--- a/src/trace_processor/stats.h
+++ b/src/trace_processor/stats.h
@@ -91,7 +91,14 @@
   F(vmstat_unknown_keys,                        kSingle,  kError, kAnalysis), \
   F(clock_sync_failure,                         kSingle,  kError, kAnalysis), \
   F(process_tracker_errors,                     kSingle,  kError, kAnalysis), \
-  F(json_tokenizer_failure,                     kSingle,  kError, kTrace)
+  F(json_tokenizer_failure,                     kSingle,  kError, kTrace),    \
+  F(heapprofd_buffer_corrupted,                 kIndexed, kError, kTrace),    \
+  F(heapprofd_buffer_overran,                   kIndexed, kError, kTrace),    \
+  F(heapprofd_rejected_concurrent,              kIndexed, kError, kTrace),    \
+  F(heapprofd_invalid_string_id,                kSingle,  kError, kTrace),    \
+  F(heapprofd_invalid_mapping_id,               kSingle,  kError, kTrace),    \
+  F(heapprofd_invalid_frame_id,                 kSingle,  kError, kTrace),    \
+  F(heapprofd_invalid_callstack_id,             kSingle,  kError, kTrace)
 // clang-format on
 
 enum Type {
diff --git a/src/trace_processor/trace_processor_context.cc b/src/trace_processor/trace_processor_context.cc
index 7155348..5483dae 100644
--- a/src/trace_processor/trace_processor_context.cc
+++ b/src/trace_processor/trace_processor_context.cc
@@ -20,6 +20,7 @@
 #include "src/trace_processor/chunked_trace_reader.h"
 #include "src/trace_processor/clock_tracker.h"
 #include "src/trace_processor/event_tracker.h"
+#include "src/trace_processor/heap_profile_tracker.h"
 #include "src/trace_processor/json_trace_parser.h"
 #include "src/trace_processor/process_tracker.h"
 #include "src/trace_processor/proto_trace_parser.h"
diff --git a/src/trace_processor/trace_processor_context.h b/src/trace_processor/trace_processor_context.h
index 2fd83f2..5a4f815 100644
--- a/src/trace_processor/trace_processor_context.h
+++ b/src/trace_processor/trace_processor_context.h
@@ -32,6 +32,7 @@
 class TraceParser;
 class TraceStorage;
 class TraceSorter;
+class HeapProfileTracker;
 
 class TraceProcessorContext {
  public:
@@ -48,6 +49,7 @@
   std::unique_ptr<TraceParser> parser;
   std::unique_ptr<TraceSorter> sorter;
   std::unique_ptr<ChunkedTraceReader> chunk_reader;
+  std::unique_ptr<HeapProfileTracker> heap_profile_tracker;
 };
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/trace_processor_impl.cc b/src/trace_processor/trace_processor_impl.cc
index 04ddd16..588afaa 100644
--- a/src/trace_processor/trace_processor_impl.cc
+++ b/src/trace_processor/trace_processor_impl.cc
@@ -33,8 +33,9 @@
 #include "src/trace_processor/event_tracker.h"
 #include "src/trace_processor/fuchsia_trace_parser.h"
 #include "src/trace_processor/fuchsia_trace_tokenizer.h"
+#include "src/trace_processor/heap_profile_tracker.h"
 #include "src/trace_processor/instants_table.h"
-#include "src/trace_processor/metrics/sql_metrics.h"
+#include "src/trace_processor/metrics/metrics.h"
 #include "src/trace_processor/process_table.h"
 #include "src/trace_processor/process_tracker.h"
 #include "src/trace_processor/proto_trace_parser.h"
@@ -181,6 +182,15 @@
   }
 }
 
+void CreateMetricsFunctions(TraceProcessorImpl* tp, sqlite3* db) {
+  auto ret = sqlite3_create_function_v2(db, "RUN_METRIC", -1, SQLITE_UTF8, tp,
+                                        metrics::RunMetric, nullptr, nullptr,
+                                        sqlite_utils::kSqliteStatic);
+  if (ret) {
+    PERFETTO_ELOG("Error initializing RUN_METRIC");
+  }
+}
+
 // Fuchsia traces have a magic number as documented here:
 // https://fuchsia.googlesource.com/fuchsia/+/HEAD/docs/development/tracing/trace-format/README.md#magic-number-record-trace-info-type-0
 constexpr uint64_t kFuchsiaMagicNumber = 0x0016547846040010;
@@ -211,6 +221,7 @@
   InitializeSqlite(db);
   CreateBuiltinTables(db);
   CreateBuiltinViews(db);
+  CreateMetricsFunctions(this, db);
   db_.reset(std::move(db));
 
   context_.storage.reset(new TraceStorage());
@@ -220,6 +231,7 @@
   context_.process_tracker.reset(new ProcessTracker(&context_));
   context_.syscall_tracker.reset(new SyscallTracker(&context_));
   context_.clock_tracker.reset(new ClockTracker(&context_));
+  context_.heap_profile_tracker.reset(new HeapProfileTracker(&context_));
 
   ArgsTable::RegisterTable(*db_, context_.storage.get());
   ProcessTable::RegisterTable(*db_, context_.storage.get());
@@ -293,11 +305,13 @@
 
   context_.sorter->ExtractEventsForced();
   context_.event_tracker->FlushPendingEvents();
+  context_.heap_profile_tracker->ApplyAllAllocations();
   BuildBoundsTable(*db_, context_.storage->GetTraceTimestampBoundsNs());
 }
 
 TraceProcessor::Iterator TraceProcessorImpl::ExecuteQuery(
-    const std::string& sql) {
+    const std::string& sql,
+    int64_t time_queued) {
   sqlite3_stmt* raw_stmt;
   int err = sqlite3_prepare_v2(*db_, sql.c_str(), static_cast<int>(sql.size()),
                                &raw_stmt, nullptr);
@@ -308,8 +322,14 @@
   } else {
     col_count = static_cast<uint32_t>(sqlite3_column_count(raw_stmt));
   }
-  std::unique_ptr<IteratorImpl> impl(
-      new IteratorImpl(this, *db_, ScopedStmt(raw_stmt), col_count, error));
+
+  base::TimeNanos t_start = base::GetWallTimeNs();
+  uint32_t sql_stats_row =
+      context_.storage->mutable_sql_stats()->RecordQueryBegin(sql, time_queued,
+                                                              t_start.count());
+
+  std::unique_ptr<IteratorImpl> impl(new IteratorImpl(
+      this, *db_, ScopedStmt(raw_stmt), col_count, error, sql_stats_row));
   iterators_.emplace_back(impl.get());
   return TraceProcessor::Iterator(std::move(impl));
 }
@@ -324,66 +344,21 @@
 int TraceProcessorImpl::ComputeMetric(
     const std::vector<std::string>& metric_names,
     std::vector<uint8_t>* metrics_proto) {
-  // TODO(lalitm): stop hardcoding android.mem metric and read the proto
-  // descriptor for this logic instead.
-  if (metric_names.size() != 1 || metric_names[0] != "android.mem") {
-    PERFETTO_ELOG("Only android.mem metric is currently supported");
-    return 1;
-  }
-
-  auto queries = base::SplitString(metrics::kAndroidMem, ";\n\n");
-  for (const auto& query : queries) {
-    PERFETTO_DLOG("Executing query: %s", query.c_str());
-    auto prep_it = ExecuteQuery(query);
-    auto prep_has_next = prep_it.Next();
-    if (auto opt_error = prep_it.GetLastError()) {
-      PERFETTO_ELOG("SQLite error: %s", opt_error->c_str());
-      return 1;
-    }
-    PERFETTO_DCHECK(!prep_has_next);
-  }
-
-  protozero::ScatteredHeapBuffer delegate;
-  protozero::ScatteredStreamWriter writer(&delegate);
-  delegate.set_writer(&writer);
-
-  protos::pbzero::TraceMetrics metrics;
-  metrics.Reset(&writer);
-
-  // TODO(lalitm): all the below is temporary hardcoded queries and proto
-  // filling to ensure that the code above works.
-  auto it = ExecuteQuery("SELECT COUNT(*) from lmk_by_score;");
-  auto has_next = it.Next();
-  if (auto opt_error = it.GetLastError()) {
-    PERFETTO_ELOG("SQLite error: %s", opt_error->c_str());
-    return 1;
-  }
-  PERFETTO_CHECK(has_next);
-  PERFETTO_CHECK(it.Get(0).type == SqlValue::Type::kLong);
-
-  auto* memory = metrics.set_android_mem();
-  memory->set_system_metrics()->set_lmks()->set_total_count(
-      static_cast<int32_t>(it.Get(0).long_value));
-  metrics.Finalize();
-
-  *metrics_proto = delegate.StitchSlices();
-
-  has_next = it.Next();
-  PERFETTO_DCHECK(!has_next);
-
-  return 0;
+  return metrics::ComputeMetrics(this, metric_names, metrics_proto);
 }
 
 TraceProcessor::IteratorImpl::IteratorImpl(TraceProcessorImpl* trace_processor,
                                            sqlite3* db,
                                            ScopedStmt stmt,
                                            uint32_t column_count,
-                                           base::Optional<std::string> error)
+                                           base::Optional<std::string> error,
+                                           uint32_t sql_stats_row)
     : trace_processor_(trace_processor),
       db_(db),
       stmt_(std::move(stmt)),
       column_count_(column_count),
-      error_(error) {}
+      error_(error),
+      sql_stats_row_(sql_stats_row) {}
 
 TraceProcessor::IteratorImpl::~IteratorImpl() {
   if (trace_processor_) {
@@ -391,11 +366,21 @@
     auto it = std::find(its->begin(), its->end(), this);
     PERFETTO_CHECK(it != its->end());
     its->erase(it);
+
+    base::TimeNanos t_end = base::GetWallTimeNs();
+    auto* sql_stats = trace_processor_->context_.storage->mutable_sql_stats();
+    sql_stats->RecordQueryEnd(sql_stats_row_, t_end.count());
   }
 }
 
 void TraceProcessor::IteratorImpl::Reset() {
-  *this = IteratorImpl(nullptr, nullptr, ScopedStmt(), 0, base::nullopt);
+  *this = IteratorImpl(nullptr, nullptr, ScopedStmt(), 0, base::nullopt, 0);
+}
+
+void TraceProcessor::IteratorImpl::RecordFirstNextInSqlStats() {
+  base::TimeNanos t_first_next = base::GetWallTimeNs();
+  auto* sql_stats = trace_processor_->context_.storage->mutable_sql_stats();
+  sql_stats->RecordQueryFirstNext(sql_stats_row_, t_first_next.count());
 }
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/trace_processor_impl.h b/src/trace_processor/trace_processor_impl.h
index fcab0e5..39a3ed8 100644
--- a/src/trace_processor/trace_processor_impl.h
+++ b/src/trace_processor/trace_processor_impl.h
@@ -31,11 +31,6 @@
 
 namespace perfetto {
 
-namespace protos {
-class RawQueryArgs;
-class RawQueryResult;
-}  // namespace protos
-
 namespace trace_processor {
 
 enum TraceType {
@@ -59,7 +54,8 @@
 
   void NotifyEndOfFile() override;
 
-  Iterator ExecuteQuery(const std::string& sql) override;
+  Iterator ExecuteQuery(const std::string& sql,
+                        int64_t time_queued = 0) override;
 
   int ComputeMetric(const std::vector<std::string>& metric_names,
                     std::vector<uint8_t>* metrics) override;
@@ -90,7 +86,8 @@
                sqlite3* db,
                ScopedStmt,
                uint32_t column_count,
-               base::Optional<std::string> error);
+               base::Optional<std::string> error,
+               uint32_t sql_stats_row);
   ~IteratorImpl();
 
   IteratorImpl(IteratorImpl&) noexcept = delete;
@@ -101,6 +98,13 @@
 
   // Methods called by TraceProcessor::Iterator.
   bool Next() {
+    // Delegate to the cc file to prevent trace_storage.h include in this
+    // file.
+    if (!called_next_) {
+      RecordFirstNextInSqlStats();
+      called_next_ = true;
+    }
+
     if (PERFETTO_UNLIKELY(error_.has_value()))
       return false;
 
@@ -149,11 +153,16 @@
   void Reset();
 
  private:
+  void RecordFirstNextInSqlStats();
+
   TraceProcessorImpl* trace_processor_;
   sqlite3* db_ = nullptr;
   ScopedStmt stmt_;
   uint32_t column_count_ = 0;
   base::Optional<std::string> error_;
+
+  uint32_t sql_stats_row_ = 0;
+  bool called_next_ = false;
 };
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/trace_processor_shell.cc b/src/trace_processor/trace_processor_shell.cc
index a75d934..093c853 100644
--- a/src/trace_processor/trace_processor_shell.cc
+++ b/src/trace_processor/trace_processor_shell.cc
@@ -267,7 +267,7 @@
         if (input[0] == 'q')
           break;
       } else {
-        t_end = base::GetWallTimeMs();
+        t_end = base::GetWallTimeNs();
       }
       for (uint32_t i = 0; i < it->ColumnCount(); i++)
         printf("%20s ", it->GetColumName(i).c_str());
diff --git a/src/trace_processor/trace_storage.cc b/src/trace_processor/trace_storage.cc
index ee09777..a0892b4 100644
--- a/src/trace_processor/trace_storage.cc
+++ b/src/trace_processor/trace_storage.cc
@@ -71,25 +71,44 @@
   *this = TraceStorage();
 }
 
-void TraceStorage::SqlStats::RecordQueryBegin(const std::string& query,
-                                              int64_t time_queued,
-                                              int64_t time_started) {
+uint32_t TraceStorage::SqlStats::RecordQueryBegin(const std::string& query,
+                                                  int64_t time_queued,
+                                                  int64_t time_started) {
   if (queries_.size() >= kMaxLogEntries) {
     queries_.pop_front();
     times_queued_.pop_front();
     times_started_.pop_front();
+    times_first_next_.pop_front();
     times_ended_.pop_front();
+    popped_queries_++;
   }
   queries_.push_back(query);
   times_queued_.push_back(time_queued);
   times_started_.push_back(time_started);
+  times_first_next_.push_back(0);
   times_ended_.push_back(0);
+  return static_cast<uint32_t>(popped_queries_ + queries_.size() - 1);
 }
 
-void TraceStorage::SqlStats::RecordQueryEnd(int64_t time_ended) {
-  PERFETTO_DCHECK(!times_ended_.empty());
-  PERFETTO_DCHECK(times_ended_.back() == 0);
-  times_ended_.back() = time_ended;
+void TraceStorage::SqlStats::RecordQueryFirstNext(uint32_t row,
+                                                  int64_t time_first_next) {
+  // This means we've popped this query off the queue of queries before it had
+  // a chance to finish. Just silently drop this number.
+  if (popped_queries_ > row)
+    return;
+  uint32_t queue_row = row - popped_queries_;
+  PERFETTO_DCHECK(queue_row < queries_.size());
+  times_first_next_[queue_row] = time_first_next;
+}
+
+void TraceStorage::SqlStats::RecordQueryEnd(uint32_t row, int64_t time_ended) {
+  // This means we've popped this query off the queue of queries before it had
+  // a chance to finish. Just silently drop this number.
+  if (popped_queries_ > row)
+    return;
+  uint32_t queue_row = row - popped_queries_;
+  PERFETTO_DCHECK(queue_row < queries_.size());
+  times_ended_[queue_row] = time_ended;
 }
 
 std::pair<int64_t, int64_t> TraceStorage::GetTraceTimestampBoundsNs() const {
diff --git a/src/trace_processor/trace_storage.h b/src/trace_processor/trace_storage.h
index 47366e1..285296b 100644
--- a/src/trace_processor/trace_storage.h
+++ b/src/trace_processor/trace_storage.h
@@ -443,20 +443,27 @@
   class SqlStats {
    public:
     static constexpr size_t kMaxLogEntries = 100;
-    void RecordQueryBegin(const std::string& query,
-                          int64_t time_queued,
-                          int64_t time_started);
-    void RecordQueryEnd(int64_t time_ended);
+    uint32_t RecordQueryBegin(const std::string& query,
+                              int64_t time_queued,
+                              int64_t time_started);
+    void RecordQueryFirstNext(uint32_t row, int64_t time_first_next);
+    void RecordQueryEnd(uint32_t row, int64_t time_end);
     size_t size() const { return queries_.size(); }
     const std::deque<std::string>& queries() const { return queries_; }
     const std::deque<int64_t>& times_queued() const { return times_queued_; }
     const std::deque<int64_t>& times_started() const { return times_started_; }
+    const std::deque<int64_t>& times_first_next() const {
+      return times_first_next_;
+    }
     const std::deque<int64_t>& times_ended() const { return times_ended_; }
 
    private:
+    uint32_t popped_queries_ = 0;
+
     std::deque<std::string> queries_;
     std::deque<int64_t> times_queued_;
     std::deque<int64_t> times_started_;
+    std::deque<int64_t> times_first_next_;
     std::deque<int64_t> times_ended_;
   };
 
@@ -578,6 +585,133 @@
   };
   using StatsMap = std::array<Stats, stats::kNumKeys>;
 
+  class HeapProfileFrames {
+   public:
+    int64_t FindOrInsert(StringId name_id,
+                         int64_t mapping_row,
+                         int64_t rel_pc) {
+      // TODO(fmayer): More efficiently search for existing entries.
+      for (size_t i = 0; i < names_.size(); ++i) {
+        if (name_id == names_[i] && mapping_row == mappings_[i] &&
+            rel_pc == rel_pcs_[i]) {
+          return static_cast<int64_t>(i);
+        }
+      }
+
+      names_.emplace_back(name_id);
+      mappings_.emplace_back(mapping_row);
+      rel_pcs_.emplace_back(rel_pc);
+      return static_cast<int64_t>(names_.size()) - 1;
+    }
+
+    const std::deque<StringId>& names() const { return names_; }
+    const std::deque<int64_t>& mappings() const { return mappings_; }
+    const std::deque<int64_t>& rel_pcs() const { return rel_pcs_; }
+
+   private:
+    std::deque<StringId> names_;
+    std::deque<int64_t> mappings_;
+    std::deque<int64_t> rel_pcs_;
+  };
+
+  class HeapProfileCallsites {
+   public:
+    int64_t FindOrInsert(int64_t depth, int64_t parent_id, int64_t frame_row) {
+      // TODO(fmayer): More efficiently search for existing entries.
+      for (size_t i = 0; i < frame_depths_.size(); ++i) {
+        if (depth == frame_depths_[i] && parent_id == parent_callsite_ids_[i] &&
+            frame_row == frame_ids_[i]) {
+          return static_cast<int64_t>(i);
+        }
+      }
+      frame_depths_.emplace_back(depth);
+      parent_callsite_ids_.emplace_back(parent_id);
+      frame_ids_.emplace_back(frame_row);
+      return static_cast<int64_t>(frame_depths_.size()) - 1;
+    }
+
+    const std::deque<int64_t>& frame_depths() const { return frame_depths_; }
+    const std::deque<int64_t>& parent_callsite_ids() const {
+      return parent_callsite_ids_;
+    }
+    const std::deque<int64_t>& frame_ids() const { return frame_ids_; }
+
+   private:
+    std::deque<int64_t> frame_depths_;
+    std::deque<int64_t> parent_callsite_ids_;
+    std::deque<int64_t> frame_ids_;
+  };
+
+  class HeapProfileMappings {
+   public:
+    int64_t FindOrInsert(StringId build_id,
+                         int64_t offset,
+                         int64_t start,
+                         int64_t end,
+                         int64_t load_bias,
+                         StringId name_id) {
+      // TODO(fmayer): More efficiently search for existing entries.
+      for (size_t i = 0; i < build_ids_.size(); ++i) {
+        if (build_id == build_ids_[i] && offset == offsets_[i] &&
+            start == starts_[i] && end == ends_[i] &&
+            load_bias == load_biases_[i] && name_id == names_[i]) {
+          return static_cast<int64_t>(i);
+        }
+      }
+
+      build_ids_.emplace_back(build_id);
+      offsets_.emplace_back(offset);
+      starts_.emplace_back(start);
+      ends_.emplace_back(end);
+      load_biases_.emplace_back(load_bias);
+      names_.emplace_back(name_id);
+      return static_cast<int64_t>(build_ids_.size()) - 1;
+    }
+
+    const std::deque<StringId>& build_ids() const { return build_ids_; }
+    const std::deque<int64_t>& offsets() const { return offsets_; }
+    const std::deque<int64_t>& starts() const { return starts_; }
+    const std::deque<int64_t>& ends() const { return ends_; }
+    const std::deque<int64_t>& load_biases() const { return load_biases_; }
+    const std::deque<StringId>& names() const { return names_; }
+
+   private:
+    std::deque<StringId> build_ids_;
+    std::deque<int64_t> offsets_;
+    std::deque<int64_t> starts_;
+    std::deque<int64_t> ends_;
+    std::deque<int64_t> load_biases_;
+    std::deque<StringId> names_;
+  };
+
+  class HeapProfileAllocations {
+   public:
+    void Insert(int64_t timestamp,
+                int64_t pid,
+                int64_t callsite_id,
+                int64_t count,
+                int64_t size) {
+      timestamps_.emplace_back(timestamp);
+      pids_.emplace_back(pid);
+      callsite_ids_.emplace_back(callsite_id);
+      counts_.emplace_back(count);
+      sizes_.emplace_back(size);
+    }
+
+    const std::deque<int64_t>& timestamps() const { return timestamps_; }
+    const std::deque<int64_t>& pids() const { return pids_; }
+    const std::deque<int64_t>& callsite_ids() const { return callsite_ids_; }
+    const std::deque<int64_t>& counts() const { return counts_; }
+    const std::deque<int64_t>& sizes() const { return sizes_; }
+
+   private:
+    std::deque<int64_t> timestamps_;
+    std::deque<int64_t> pids_;
+    std::deque<int64_t> callsite_ids_;
+    std::deque<int64_t> counts_;
+    std::deque<int64_t> sizes_;
+  };
+
   void ResetStorage();
 
   UniqueTid AddEmptyThread(uint32_t tid) {
@@ -621,6 +755,13 @@
     stats_[key].value += increment;
   }
 
+  // Example usage: IncrementIndexedStats(stats::cpu_failure, 1);
+  void IncrementIndexedStats(size_t key, int index, int64_t increment = 1) {
+    PERFETTO_DCHECK(key < stats::kNumKeys);
+    PERFETTO_DCHECK(stats::kTypes[key] == stats::kIndexed);
+    stats_[key].indexed_values[index] += increment;
+  }
+
   // Example usage: SetIndexedStats(stats::cpu_failure, 1, 42);
   void SetIndexedStats(size_t key, int index, int64_t value) {
     PERFETTO_DCHECK(key < stats::kNumKeys);
@@ -688,6 +829,34 @@
   const RawEvents& raw_events() const { return raw_events_; }
   RawEvents* mutable_raw_events() { return &raw_events_; }
 
+  const HeapProfileMappings& heap_profile_mappings() const {
+    return heap_profile_mappings_;
+  }
+  HeapProfileMappings* mutable_heap_profile_mappings() {
+    return &heap_profile_mappings_;
+  }
+
+  const HeapProfileFrames& heap_profile_frames() const {
+    return heap_profile_frames_;
+  }
+  HeapProfileFrames* mutable_heap_profile_frames() {
+    return &heap_profile_frames_;
+  }
+
+  const HeapProfileCallsites& heap_profile_callsites() const {
+    return heap_profile_callsites_;
+  }
+  HeapProfileCallsites* mutable_heap_profile_callsites() {
+    return &heap_profile_callsites_;
+  }
+
+  const HeapProfileAllocations& heap_profile_allocations() const {
+    return heap_profile_allocations_;
+  }
+  HeapProfileAllocations* mutable_heap_profile_allocations() {
+    return &heap_profile_allocations_;
+  }
+
   const StringPool& string_pool() const { return string_pool_; }
 
   // |unique_processes_| always contains at least 1 element becuase the 0th ID
@@ -759,6 +928,11 @@
   // trace.
   RawEvents raw_events_;
   AndroidLogs android_log_;
+
+  HeapProfileMappings heap_profile_mappings_;
+  HeapProfileFrames heap_profile_frames_;
+  HeapProfileCallsites heap_profile_callsites_;
+  HeapProfileAllocations heap_profile_allocations_;
 };
 
 }  // namespace trace_processor
diff --git a/src/tracing/core/shared_memory_arbiter_impl.cc b/src/tracing/core/shared_memory_arbiter_impl.cc
index ce55c32..7f646c8 100644
--- a/src/tracing/core/shared_memory_arbiter_impl.cc
+++ b/src/tracing/core/shared_memory_arbiter_impl.cc
@@ -301,6 +301,19 @@
 void SharedMemoryArbiterImpl::BindStartupTraceWriterRegistry(
     std::unique_ptr<StartupTraceWriterRegistry> registry,
     BufferID target_buffer) {
+  if (!task_runner_->RunsTasksOnCurrentThread()) {
+    auto weak_this = weak_ptr_factory_.GetWeakPtr();
+    auto* raw_reg = registry.release();
+    task_runner_->PostTask([weak_this, raw_reg, target_buffer]() {
+      std::unique_ptr<StartupTraceWriterRegistry> owned_reg(raw_reg);
+      if (!weak_this)
+        return;
+      weak_this->BindStartupTraceWriterRegistry(std::move(owned_reg),
+                                                target_buffer);
+    });
+    return;
+  }
+
   // The registry will be owned by the arbiter, so it's safe to capture |this|
   // in the callback.
   auto on_bound_callback = [this](StartupTraceWriterRegistry* bound_registry) {
diff --git a/src/tracing/core/startup_trace_writer_registry.cc b/src/tracing/core/startup_trace_writer_registry.cc
index 6de4ab0..c3fcfce 100644
--- a/src/tracing/core/startup_trace_writer_registry.cc
+++ b/src/tracing/core/startup_trace_writer_registry.cc
@@ -81,6 +81,10 @@
     arbiter_ = arbiter;
     target_buffer_ = target_buffer;
     task_runner_ = task_runner;
+    // Weakptrs should be valid on |task_runner|. For this, the factory needs to
+    // be created on |task_runner|, i.e. BindToArbiter must be called on
+    // |task_runner|.
+    PERFETTO_DCHECK(task_runner_->RunsTasksOnCurrentThread());
     weak_ptr_factory_.reset(
         new base::WeakPtrFactory<StartupTraceWriterRegistry>(this));
     on_bound_callback_ = std::move(on_bound_callback);
diff --git a/tools/gen_android_bp b/tools/gen_android_bp
index b6b5eda..f2719fe 100755
--- a/tools/gen_android_bp
+++ b/tools/gen_android_bp
@@ -519,9 +519,13 @@
         'gen_merged_sql_metrics',
     )
     module.tool_files = [
-        'tools/gen_merged_sql_metrics',
+        'tools/gen_merged_sql_metrics.py',
     ]
-    module.cmd = '$(location tools/gen_merged_sql_metrics) --cpp_out=$(out) $(in)'
+    module.cmd = ' '.join([
+        '$(location tools/gen_merged_sql_metrics.py)',
+        '--cpp_out=$(out)',
+        '$(in)',
+    ])
     module.out = set(
         src[src.index('gen/') + len('gen/'):]
         for src in target_desc.get('outputs', [])
diff --git a/tools/gen_build b/tools/gen_build
index ade417f..d03f8d2 100755
--- a/tools/gen_build
+++ b/tools/gen_build
@@ -277,33 +277,6 @@
     else:
       self.line('{} = {},'.format(key, value), indent=1)
 
-  def rule(self, type, name, visibility=None, srcs=None, hdrs=None, deps=None, is_pbzero=False, **kwargs):
-    self.line('{}('.format(type))
-    self.variable('name', name)
-    if srcs:
-      self.variable('srcs', srcs)
-    if hdrs:
-      self.variable('hdrs', hdrs)
-
-    if type == 'proto_library' and not is_pbzero:
-      if srcs:
-        self.variable('has_services', 1)
-      self.variable('cc_api_version', 2)
-      if srcs:
-        self.variable('cc_generic_services', 1)
-
-    for key, value in kwargs.iteritems():
-      self.variable(key, value)
-
-    # Keep visibility and deps last.
-    if visibility:
-      self.variable('visibility', visibility)
-
-    if type != 'filegroup':
-      self.variable('deps', deps)
-
-    self.line(')')
-
   def header(self):
     self.output.write(header)
 
@@ -311,7 +284,7 @@
 class Target(object):
   """In-memory representation of a BUILD target."""
 
-  def __init__(self, type, name, gn_name=None, **kwargs):
+  def __init__(self, type, name, gn_name=None):
     assert type in ('cc_binary', 'cc_library', 'cc_proto_library',
                     'proto_library', 'filegroup', 'alias',
                     'pbzero_cc_proto_library', 'genrule', )
@@ -322,13 +295,41 @@
     self.deps = set()
     self.visibility = set()
     self.gn_name = gn_name
-    self.args = kwargs
+    self.is_pbzero = False
+    self.src_proto_library = None
+    self.outs = set()
+    self.cmd = None
+    self.tools = set()
 
   def write(self, writer):
     if self.gn_name:
       writer.comment('GN target: {}'.format(self.gn_name))
-    writer.rule(self.type, self.name, visibility=self.visibility,
-                srcs=self.srcs, hdrs=self.hdrs, deps=self.deps, **self.args)
+
+    writer.line('{}('.format(self.type))
+    writer.variable('name', self.name)
+    writer.variable('srcs', self.srcs)
+    writer.variable('hdrs', self.hdrs)
+
+    if self.type == 'proto_library' and not self.is_pbzero:
+      if self.srcs:
+        writer.variable('has_services', 1)
+      writer.variable('cc_api_version', 2)
+      if self.srcs:
+        writer.variable('cc_generic_services', 1)
+
+    writer.variable('src_proto_library', self.src_proto_library)
+
+    writer.variable('outs', self.outs)
+    writer.variable('cmd', self.cmd)
+    writer.variable('tools', self.tools)
+
+    # Keep visibility and deps last.
+    writer.variable('visibility', self.visibility)
+
+    if type != 'filegroup':
+      writer.variable('deps', self.deps)
+
+    writer.line(')')
 
 
 class Build(object):
@@ -463,6 +464,9 @@
       if "gen_merged_sql_metrics" in dep_name:
         dep_target = self.create_merged_sql_metrics_target(dep_name)
         target.deps.add(Label("//third_party/perfetto:" + dep_target.name))
+
+        if target.type == 'cc_library' or target.type == 'cc_binary':
+          target.srcs.update(dep_target.outs)
       elif args[0].endswith('/protoc'):
         (proto_target, cc_target) = self.create_proto_target(dep_name)
         if target.type == 'proto_library':
@@ -495,15 +499,15 @@
       'genrule',
       'gen_merged_sql_metrics',
       gn_name=gn_target_name_no_toolchain,
-      outs=set(
-        Label(src[src.index('gen/') + len('gen/'):])
-        for src in target_desc.get('outputs', [])
-      ),
-      cmd = '$(location gen_merged_sql_metrics_py) --cpp_out=$@ $SRCS',
-      tools=[
-        'gen_merged_sql_metrics_py',
-      ],
     )
+    target.outs.update(
+      Label(src[src.index('gen/') + len('gen/'):])
+      for src in target_desc.get('outputs', [])
+    )
+    target.cmd = '$(location gen_merged_sql_metrics_py) --cpp_out=$@ $(SRCS)'
+    target.tools.update([
+      'gen_merged_sql_metrics_py',
+    ])
     target.srcs.update(
       Label(label_to_path(src))
       for src in target_desc.get('inputs', [])
@@ -516,7 +520,6 @@
     target_desc = self.desc[gn_target_name]
     args = target_desc['args']
 
-    is_pbzero = any("pbzero" in arg for arg in args)
     gn_target_name_no_toolchain = label_without_toolchain(gn_target_name)
     stripped_path = gn_target_name_no_toolchain.replace("protos/perfetto/", "")
     pretty_target_name = label_to_target_name_with_path(stripped_path)
@@ -526,21 +529,21 @@
     proto_target = Target(
       'proto_library',
       pretty_target_name,
-      gn_name=gn_target_name_no_toolchain,
-      is_pbzero=is_pbzero
+      gn_name=gn_target_name_no_toolchain
     )
+    proto_target.is_pbzero = any("pbzero" in arg for arg in args)
     proto_target.srcs.update([
       Label(label_to_path(src).replace('protos/', ''))
       for src in target_desc.get('sources', [])
     ])
-    if not is_pbzero:
+    if not proto_target.is_pbzero:
       proto_target.visibility.add("//visibility:public")
     self.proto_build.add_target(proto_target)
 
     for dep_name in self.resolve_dependencies(gn_target_name):
       self.apply_module_dependency(proto_target, dep_name)
 
-    if is_pbzero:
+    if proto_target.is_pbzero:
       # Remove all the protozero srcs from the proto_library.
       proto_target.srcs.difference_update(
           [src for src in proto_target.srcs if not src.label.endswith('.proto')])
@@ -548,21 +551,21 @@
       # Remove all the non-proto deps from the proto_library and add to the cc
       # library.
       cc_deps = [
-          dep for dep in proto_target.deps
-          if not dep.label.startswith('//third_party/perfetto/protos')
+        dep for dep in proto_target.deps
+        if not dep.label.startswith('//third_party/perfetto/protos')
       ]
       proto_target.deps.difference_update(cc_deps)
 
       cc_target_name = proto_target.name + "_cc_proto"
-      cc_target = Target('pbzero_cc_proto_library',
-                        cc_target_name, gn_name=gn_target_name_no_toolchain)
+      cc_target = Target('pbzero_cc_proto_library', cc_target_name,
+                         gn_name=gn_target_name_no_toolchain)
 
       cc_target.deps.add(Label('//third_party/perfetto:libprotozero'))
       cc_target.deps.update(cc_deps)
 
       # Add the proto_library to the cc_target.
-      cc_target.args['src_proto_library'] = "//third_party/perfetto/protos:" + \
-          proto_target.name
+      cc_target.src_proto_library = \
+          "//third_party/perfetto/protos:" + proto_target.name
 
       self.proto_build.add_target(cc_target)
     else:
diff --git a/tools/gen_merged_sql_metrics b/tools/gen_merged_sql_metrics.py
similarity index 88%
rename from tools/gen_merged_sql_metrics
rename to tools/gen_merged_sql_metrics.py
index 6ad35ea..071423c 100755
--- a/tools/gen_merged_sql_metrics
+++ b/tools/gen_merged_sql_metrics.py
@@ -41,12 +41,15 @@
  * AUTOGENERATED BY tools/gen_merged_sql_metrics - DO NOT EDIT
  *******************************************************************************
  */
+
+ #include <string.h>
 '''
 
 NAMESPACE_BEGIN = '''
 namespace perfetto {
 namespace trace_processor {
 namespace metrics {
+namespace sql_metrics {
 '''
 
 FILE_TO_SQL_STRUCT = '''
@@ -56,7 +59,19 @@
 };
 '''
 
+FIND_SQL_FN = '''
+inline const char* GetBundledMetric(const char* filename) {
+  for (const auto& filename_to_sql : sql_metrics::kFileToSql) {
+    if (strcmp(filename_to_sql.filename, filename) == 0) {
+      return filename_to_sql.sql;
+    }
+  }
+  return nullptr;
+}
+'''
+
 NAMESPACE_END = '''
+}  // namespace sql_metrics
 }  // namespace metrics
 }  // namespace trace_processor
 }  // namsepace perfetto
@@ -68,7 +83,7 @@
 def main():
   parser = argparse.ArgumentParser()
   parser.add_argument('--cpp_out', required=True)
-  parser.add_argument('sql_files', action='append')
+  parser.add_argument('sql_files', nargs='*')
   args = parser.parse_args()
 
   # Extract the SQL output from each file.
@@ -99,6 +114,7 @@
       output.write('\n  {{"{}", {}}},\n'.format(name, variable))
     output.write("};\n")
 
+    output.write(FIND_SQL_FN)
     output.write(NAMESPACE_END)
 
   return 0