Merge "tp: fix missing status checks"
diff --git a/CHANGELOG b/CHANGELOG
index 114c03d..d1bba4c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,9 @@
 Unreleased:
   Tracing service and probes:
-    *
+    * Added support for building most targets (including traced, SDK and
+      trace_processor_shell) from Windows using either clang-cl or MSVC 2019.
+    * Added tracebox, a monolithic binary to capture traces with one command
+      on Linux and older versions of Android (tested on Android Oreo).
   Trace Processor:
     *
   UI:
diff --git a/debian/control b/debian/control
index 94a03bb..00f1346 100644
--- a/debian/control
+++ b/debian/control
@@ -7,6 +7,7 @@
   git,
   libprotoc-dev,
   ninja-build,
+  pandoc,
   protobuf-compiler,
   python3,
   zlib1g-dev,
diff --git a/debian/perfetto.manpages b/debian/perfetto.manpages
new file mode 100644
index 0000000..07cb459
--- /dev/null
+++ b/debian/perfetto.manpages
@@ -0,0 +1 @@
+debian/perfetto.1
diff --git a/debian/rules b/debian/rules
index a081c5c..f8d21e0 100755
--- a/debian/rules
+++ b/debian/rules
@@ -34,6 +34,8 @@
 
 override_dh_auto_build:
 	ninja -C out/release perfetto traced traced_probes
+	pandoc docs/reference/perfetto-cli.md -s -t man --shift-heading-level-by=-1 >\
+	 debian/perfetto.1
 
 override_dh_auto_clean:
 	rm -rf out/release
diff --git a/docs/contributing/build-instructions.md b/docs/contributing/build-instructions.md
index 0c12902..9eba8f2 100644
--- a/docs/contributing/build-instructions.md
+++ b/docs/contributing/build-instructions.md
@@ -7,64 +7,99 @@
 
 Perfetto can be built both from the Android tree (AOSP) and standalone.
 Standalone builds are meant only for local testing and are not shipped.
-Due to the reduced dependencies they are faster to iterate on and the
-suggested way to work on Perfetto.
+Due to the reduced dependencies, the standalone workflow is faster to iterate on
+and the suggested way to work on Perfetto, unless you are working on code that
+has non-NDK depedencies into Android internals. Profilers and internal HAL/AIDL
+dependencies will not be built in the standalone build.
 
-## Get the code
+If you are chromium contributor, AOSP is still the place you should send CLs to.
+The code inside chromium's
+[third_party/perfetto](https://source.chromium.org/chromium/chromium/src/+/main:third_party/perfetto/?q=f:third_party%2Fperfetto&ss=chromium)
+is a direct mirror of the AOSP repo. The
+[AOSP->Chromium autoroller](https://autoroll.skia.org/r/perfetto-chromium-autoroll)
+takes care of keeping chromium's DEPS up to date.
 
-**Standalone checkout**:
+## Standalone builds
+
+#### Get the code
 
 ```bash
 git clone https://android.googlesource.com/platform/external/perfetto/
 ```
 
-**Android tree**:
-
-Perfetto lives in [`external/perfetto` in the AOSP tree](https://cs.android.com/android/platform/superproject/+/master:external/perfetto/).
-
-## Prerequisites
-
-**Standalone checkout**:
-
-All dependent libraries are self-hosted and pulled through:
+#### Pull dependent libraries and toolchains
 
 ```bash
 tools/install-build-deps [--android] [--ui]
 ```
 
-**Android tree**:
+`--android` will pull the Android NDK, emulator and other deps required
+to build for `target_os = "android"`.
 
-See https://source.android.com/setup
+`--ui` will pull NodeJS and all the NPM modules required to build the
+Web UI. See the [UI Development](#ui-development) section below for more.
 
-## Building
+#### Generate the build files via GN
 
-**Standalone checkout**:
-
-If you are a chromium developer and have depot_tools installed you can avoid
-the `tools/` prefix below and just use gn/ninja from depot_tools.
-
-`$ tools/gn args out/android` to generate build files and enter in the editor:
-
-```python
-target_os = "android"                 # Only when building for Android
-target_cpu = "arm" / "arm64" / "x64"
-is_debug = true / false
-cc_wrapper = "ccache"                 # Optionally speed repeated builds with ccache
-```
-
-(See the [Build Configurations](#build-configurations) section below for more)
+Perfetto uses [GN](https://gn.googlesource.com/gn/+/HEAD/docs/quick_start.md)
+as primary build system. See the [Build files](#build-files) section below for
+more.
 
 ```bash
-tools/ninja -C out/android
+tools/gn args out/android` 
 ```
 
-**Android tree**
+This will open an editor to customize the GN args. Enter:
 
-`mmma external/perfetto`
-or
-`m perfetto traced traced_probes`
+```python
+# Set only when building for Android, omit when building for linux, mac or win.
+target_os = "android"
+target_cpu = "arm" / "arm64" / "x64"
+
+is_debug = true / false
+cc_wrapper = "ccache"             # [Optional] speed up rebuilds with ccache.
+```
+
+See the [Build Configurations](#build-configurations) and
+[Building on Windows](#building-on-windows) sections below for more.
+
+TIP: If you are a chromium developer and have depot_tools installed you can
+avoid the `tools/` prefix below and just use gn/ninja from depot_tools.
+
+#### Build native C/C++ targets
+
+```bash
+# This will build all the targets.
+tools/ninja -C out/android
+
+# Alternatively, list targets explicitly.
+tools/ninja -C out/android \
+  traced \                 # Tracing service.
+  traced_probes \          # Ftrace interop and /proc poller.
+  perfetto \               # Cmdline client.
+  trace_processor_shell \  # Trace parsing.
+  trace_to_text            # Trace conversion.
+...
+```
+
+## Android tree builds
+
+Follow these instructions if you are an AOSP contributor.
+
+The source code lives in [`external/perfetto` in the AOSP tree](https://cs.android.com/android/platform/superproject/+/master:external/perfetto/).
+
+Follow the instructions on https://source.android.com/setup/build/building .
+
+Then:
+
+```bash
+mmma external/perfetto
+# or
+m traced traced_probes perfetto
+```
 
 This will generate artifacts `out/target/product/XXX/system/`.
+
 Executables and shared libraries are stripped by default by the Android build
 system. The unstripped artifacts are kept into `out/target/product/XXX/symbols`.
 
@@ -100,19 +135,6 @@
 source file is changed it, the script will automatically re-build it and show a
 prompt in the web page.
 
-## IDE setup
-
-Use a following command in the checkout directory in order to generate the
-compilation database file:
-
-```bash
-tools/gn gen out/default --export-compile-commands
-```
-
-After generating, it can be used in CLion (File -> Open -> Open As Project),
-Visual Studio Code with C/C++ extension and any other tool and editor that
-supports the compilation database format.
-
 ## Build files
 
 The source of truth of our build file is in the BUILD.gn files, which are based
@@ -120,6 +142,8 @@
 The Android build file ([Android.bp](/Android.bp)) is autogenerated from the GN
 files through `tools/gen_android_bp`, which needs to be invoked whenever a
 change touches GN files or introduces new ones.
+Likewise, the Bazel build file ([BUILD](/BUILD)) is autogenerated through the
+`tools/gen_bazel` script.
 
 A presubmit check checks that the Android.bp is consistent with GN files when
 submitting a CL through `git cl upload`.
@@ -142,15 +166,79 @@
 
 **Mac**
 
-- XCode 9 / clang (currently maintained best-effort).
+- XCode 9 / clang (maintained best-effort).
 
 **Windows**
 
-Windows builds are not currently supported when using the standalone checkout
-and GN. Windows is supported only for a subset of the targets (mainly
-`trace_processor` and the in-process version of the
-[Tracing SDK](/docs/instrumentation/tracing-sdk.md)) in two ways:
-(1) when building through Bazel; (2) when building as part of Chromium.
+- Windows 10 with either MSVC 2019 or clang-cl (maintained best-effort).
+
+### Building on Windows
+
+Building on Windows is possible using both the MSVC 2019 compiler (you don't
+need the full IDE, just the build tools) or the LLVM clang-cl compiler.
+
+The Windows support in standalone builds has been introduced in v16 by
+[r.android.com/1711913](https://r.android.com/1711913).
+
+clang-cl support is more stable because that build configuration is actively
+covered by the Chromium project (Perfetto rolls into chromium and underpins
+chrome://tracing). The MSVC build is maintained best-effort.
+
+The following targets are supported on Windows:
+
+- `trace_processor_shell`: the trace importer and SQL query engine.
+- `trace_to_text`: the trace conversion tool.
+- `traced` and `perfetto`: the tracing service and cmdline client. They use an
+  alternative implementation of the [inter-process tracing protocol](/docs/design-docs/api-and-abi.md#tracing-protocol-abi)
+  based on a TCP socket and named shared memory. This configuration is only for
+  testing / benchmarks and is not shipped in production.
+  Googlers: see [go/perfetto-win](http://go/perfetto-win) for details.
+- `perfetto_unittests` / `perfetto_integrationtests`: although they support only
+  the subset of code that is supported on Windows (e.g. no ftrace).
+
+It is NOT possible to build the Perfetto UI from Windows.
+
+#### Prerequisites
+
+You need all of these both for MSVC and clang-cl:
+
+- [Build Tools for Visual Studio 2019](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019)
+- [Windows 10 SDK](https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk/)
+- [Python 3](https://www.python.org/downloads/windows/)
+
+The [`win_find_msvc.py`](/gn/standalone/toolchain/win_find_msvc.py) script will
+locate the higest version numbers available from
+`C:\Program Files (x86)\Windows Kits\10` and
+`C:\Program Files (x86)\Microsoft Visual Studio\2019`.
+
+#### Pull dependent libraries and toolchains
+
+```bash
+# This will download also the LLVM clang-cl prebuilt used by chromium.
+python3 tools/install-build-deps
+```
+
+#### Generate build files
+
+```bash
+python3 tools/gn gen out/win
+```
+
+In the editor type:
+
+```bash
+is_debug = true | false
+
+is_clang = true  # Will use the hermetic clang-cl toolchain.
+# or
+is_clang = false  # Will use MSVC 2019.
+```
+
+#### Build
+
+```bash
+python3 tools/ninja -C out/win perfetto traced trace_processor_shell
+```
 
 ## Build configurations
 
@@ -304,3 +392,73 @@
 ```
 
 [gn-quickstart]: https://gn.googlesource.com/gn/+/master/docs/quick_start.md
+
+## IDE setup
+
+Use a following command in the checkout directory in order to generate the
+compilation database file:
+
+```bash
+tools/gn gen out/default --export-compile-commands
+```
+
+After generating, it can be used in CLion (File -> Open -> Open As Project),
+Visual Studio Code with C/C++ extension and any other tool and editor that
+supports the compilation database format.
+
+#### Useful extensions
+
+If you are using VS Code we suggest the following extensions:
+
+- [Clang-Format](https://marketplace.visualstudio.com/items?itemName=xaver.clang-format)
+- [C/C++](https://marketplace.visualstudio.com/items?itemName=ms-vscode.cpptools)
+- [clangd](https://marketplace.visualstudio.com/items?itemName=llvm-vs-code-extensions.vscode-clangd)
+- [Native Debug](https://marketplace.visualstudio.com/items?itemName=webfreak.debug)
+- [GNFormat](https://marketplace.visualstudio.com/items?itemName=persidskiy.vscode-gnformat)
+- [ESlint](https://marketplace.visualstudio.com/items?itemName=dbaeumer.vscode-eslint)
+- [markdownlint](https://marketplace.visualstudio.com/items?itemName=DavidAnson.vscode-markdownlint)
+
+#### Useful settings
+
+In `.vscode/settings.json`:
+
+```json
+{
+  "C_Cpp.clang_format_path": "${workspaceRoot}/buildtools/mac/clang-format",
+  "C_Cpp.clang_format_sortIncludes": true,
+  "files.exclude": {
+    "out/*/obj": true,
+    "out/*/gen": true,
+  },
+  "clangd.arguments": [
+    "--compile-commands-dir=${workspaceFolder}/out/mac_debug",
+    "--completion-style=detailed",
+    "--header-insertion=never"
+  ],
+}
+```
+
+Replace `/mac/` with `/linux64/` on Linux.
+
+### Debugging with VSCode
+
+Edit `.vscode/launch.json`:
+
+```json
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "request": "launch",
+      "type": "cppdbg",
+      "name": "Perfetto unittests",
+      "program": "${workspaceRoot}/out/mac_debug/perfetto_unittests",
+      "args": ["--gtest_filter=TracingServiceImplTest.StopTracingTriggerRingBuffer"],
+      "cwd": "${workspaceFolder}/out/mac_debug",
+      "MIMode": "lldb",
+    },
+  ]
+}
+```
+
+Then open the command palette `Meta`+`Shift`+`P` -> `Debug: Start debugging`.
diff --git a/docs/design-docs/heapprofd-design.md b/docs/design-docs/heapprofd-design.md
index fe5a8ef..005b0d3 100644
--- a/docs/design-docs/heapprofd-design.md
+++ b/docs/design-docs/heapprofd-design.md
@@ -182,6 +182,9 @@
 
 The sampling rate is configurable as part of the initial handshake. A sampling rate == 1 will degenerate into the fully-accurate high-overhead mode.
 
+See [Sampling for Memory Profiles](/docs/design-docs/heapprofd-sampling) for
+more details.
+
 Prior art: [crbug.com/812262](http://crbug.com/812262), [crbug.com/803276](http://crbug.com/803276).
 
 ## Implementation Plan
diff --git a/docs/quickstart/android-tracing.md b/docs/quickstart/android-tracing.md
index 2c65d30..3d3b37e 100644
--- a/docs/quickstart/android-tracing.md
+++ b/docs/quickstart/android-tracing.md
@@ -17,6 +17,12 @@
 adb shell setprop persist.traced.enable 1
 ```
 
+If you are running a version of Android older than P, you can still capture a
+trace with Perfetto using the `record_android_trace` script. See instructions
+below in the
+[Recording a trace through the cmdline](#recording-a-trace-through-the-cmdline)
+section.
+
 ## Recording a trace
 
 Command line tools (usage examples below in this page):
@@ -65,6 +71,8 @@
 the command line. It is the equivalent of running `adb shell perfetto` but it
 helps with getting the paths right, auto-pulling the trace once done and opening
 it on the browser.
+Furthermore, on older versions of Android it takes care of sideloading the
+`tracebox` binary to make up for the lack of tracing system services.
 
 If you are already familiar with `systrace` or `atrace`, both cmdline tools
 support a systrace-equivalent syntax:
diff --git a/docs/quickstart/linux-tracing.md b/docs/quickstart/linux-tracing.md
index 93fa823..104b02e 100644
--- a/docs/quickstart/linux-tracing.md
+++ b/docs/quickstart/linux-tracing.md
@@ -17,24 +17,42 @@
 ```bash
 tools/install-build-deps
 ```
-_If the script fails with SSL errors, try invoking it as `python3 tools/install-build-deps`, or upgrading your openssl libraries._
+_If the script fails with SSL errors, try upgrading your openssl package._
 
-3. Generate all most common GN build configurations:
+3. Generate the build configuration
 ```bash
-tools/build_all_configs.py
+tools/gn gn gen --args='is_debug=false' out/linux
+# Or use `tools/build_all_configs.py` to generate more build configs.
 ```
 
 4. Build the Linux tracing binaries (On Linux it uses a hermetic clang toolchain, downloaded as part of step 2):
 ```bash
-tools/ninja -C out/linux_clang_release traced traced_probes perfetto
+tools/ninja -C out/linux tracebox traced traced_probes perfetto 
 ```
-_This step is optional when using the convenience `tools/tmux` script below._
 
 ## Capturing a trace
 
 Due to Perfetto's [service-based architecture](/docs/concepts/service-model.md),
 in order to capture a trace, the `traced` (session daemon) and `traced_probes`
 (probes and ftrace-interop daemon) need to be running.
+As per Perfetto v16, the `tracebox` binary bundles together all the binaries you
+need in a single executable (a bit like `toybox` or `busybox`).
+
+#### Capturing a trace with ftrace and /proc pollers, no SDK
+
+If you are interested in overall system tracing and are not interested in
+testing the SDK, you can use `tracebox` in autostart mode as follows:
+
+```bash
+out/linux/tracebox -o trace_file.perfetto-trace --txt -c test/configs/scheduling.cfg
+```
+
+#### Testing the SDK integration in out-of-process tracing mode (system mode)
+
+If you are using the Perfetto [tracing SDK](/docs/instrumentation/tracing-sdk)
+and want to capture a fused trace that contains both system traces events and
+your custom app trace events, you need to start the `traced` and `traced_probes`
+services ahead of time and then use the `perfetto` cmdline client.
 
 For a quick start, the [tools/tmux](/tools/tmux) script takes care of building,
 setting up and running everything.
@@ -44,8 +62,8 @@
 [ftrace]: https://www.kernel.org/doc/Documentation/trace/ftrace.txt
 
 1. Run the convenience script with an example tracing config (10s duration):
-```
-OUT=out/linux_clang_release CONFIG=test/configs/scheduling.cfg tools/tmux -n
+```bash
+tools/tmux -c test/configs/scheduling.cfg -C out/linux -n
 ```
 This will open a tmux window with three panes, one per the binary involved in
 tracing: `traced`, `traced_probes` and the `perfetto` client cmdline.
@@ -62,9 +80,9 @@
 
 We can now explore the captured trace visually by using a dedicated web-based UI.
 
-NOTE: The UI runs fully in-browser using JavaScript + Web Assembly. The trace
+NOTE: The UI runs in-browser using JavaScript + Web Assembly. The trace
       file is **not** uploaded anywhere by default, unless you explicitly click
-      on the 'Share' link.
+      on the 'Share' link. The 'Share' link is available only to Googlers.
 
 1. Navigate to [ui.perfetto.dev](https://ui.perfetto.dev) in a browser.
 
@@ -75,5 +93,5 @@
    process tracks (rows) into their constituent thread tracks.
    Press "?" for further navigation controls.
 
-Alternatively, you can explore the trace contents issuing SQL queries through 
+Alternatively, you can explore the trace contents issuing SQL queries through
 the [trace processor](/docs/analysis/trace-processor).
diff --git a/docs/reference/heap_profile-cli.md b/docs/reference/heap_profile-cli.md
index 457b699..7a68ea2 100644
--- a/docs/reference/heap_profile-cli.md
+++ b/docs/reference/heap_profile-cli.md
@@ -1,4 +1,10 @@
-# heap_profile
+# HEAP_PROFILE(1)
+
+## NAME
+
+heap_profile - record heap profile on Android device
+
+## DESCRIPTION
 
 `tools/heap_profile` allows to collect native memory profiles on Android.
 See [Recording traces](/docs/data-sources/native-heap-profiler.md) for more
@@ -16,27 +22,78 @@
                     [--print-config] [-o DIRECTORY]
 ```
 
-## Options
-|Option|Description|
-|---|---|
-| -n, --name | Comma-separated list of process names to profile. |
-| -p, --pid | Comma-separated list of PIDs to profile. |
-| -i, --interval | Sampling interval. Default 4096 (4KiB) |
-| -o, --output | Output directory. |
-| -d, --duration | Duration of profile (ms). Default 7 days. |
-| --block-client | When buffer is full, block the client to wait for buffer space. Use with caution as this can significantly slow down the client. This is the default |
-| --no-block-client | When buffer is full, stop the profile early. |
-| --block-client-timeout | If --block-client is given, do not block any allocation for longer than this timeout (us). |
-| -h, --help | Show this help message and exit |
-| --no-start | Do not start heapprofd. |
-| -c, --continuous-dump | Dump interval in ms. 0 to disable continuous dump. |
-| --disable-selinux | Disable SELinux enforcement for duration of profile. |
-| --no-versions | Do not get version information about APKs. |
-| --no-running | Do not target already running processes. Requires Android 11. |
-| --no-startup | Do not target processes that start during the profile. Requires Android 11. |
-| --shmem-size | Size of buffer between client and heapprofd. Default 8MiB. Needs to be a power of two multiple of 4096, at least 8192. |
-| --dump-at-max | Dump the maximum memory usage rather than at the time of the dump. |
-| --disable-fork-teardown | Do not tear down client in forks. This can be useful for programs that use vfork. Android 11+ only. |
-| --simpleperf | Get simpleperf profile of heapprofd. This is only for heapprofd development. |
-| --trace-to-text-binary | Path to local trace to text. For debugging. |
-| --print-config | Print config instead of running. For debugging. |
+## OPTIONS
+`-n`, `--name` _NAMES_
+:    Comma-separated list of process names to profile.
+
+`-p`, `--pid` _PIDS_
+:    Comma-separated list of PIDs to profile.
+
+`-i`, `--interval`
+:    Sampling interval. Default 4096 (4KiB)
+
+`-o`, `--output` _DIRECTORY_
+:    Output directory.
+
+`--all-heaps`
+:    Collect allocations from all heaps registered by target.
+
+`--block-client`
+:    When buffer is full, block the client to wait for buffer space. Use with caution as this can significantly slow down the client. This is the default
+
+`--block-client-timeout`
+:    If --block-client is given, do not block any allocation for longer than this timeout (us).
+
+`-c`, `--continuous-dump`
+:    Dump interval in ms. 0 to disable continuous dump.
+
+`-d`, `--duration`
+:    Duration of profile (ms). 0 to run until interrupted. Default: until interrupted by user.
+
+`--disable-fork-teardown`
+:    Do not tear down client in forks. This can be useful for programs that use vfork. Android 11+ only.
+
+`--disable-selinux`
+:    Disable SELinux enforcement for duration of profile.
+
+`--dump-at-max`
+:    Dump the maximum memory usage rather than at the time of the dump.
+
+`-h`, `--help`
+:    show this help message and exit
+
+`--heaps` _HEAPS_
+:    Comma-separated list of heaps to collect, e.g: malloc,art. Requires Android 12.
+
+`--idle-allocations`
+:    Keep track of how many bytes were unused since the last dump, per callstack
+
+`--no-android-tree-symbolization`
+:    Do not symbolize using currently lunched target in the Android tree.
+
+`--no-block-client`
+:    When buffer is full, stop the profile early.
+
+`--no-running`
+:    Do not target already running processes. Requires Android 11.
+
+`--no-start`
+:    Do not start heapprofd.
+
+`--no-startup`
+:    Do not target processes that start during the profile. Requires Android 11.
+
+`--no-versions`
+:    Do not get version information about APKs.
+
+`--print-config`
+:    Print config instead of running. For debugging.
+
+`--shmem-size`
+:    Size of buffer between client and heapprofd. Default 8MiB. Needs to be a power of two multiple of 4096, at least 8192.
+
+`--simpleperf`
+:    Get simpleperf profile of heapprofd. This is only for heapprofd development.
+
+`--trace-to-text-binary`
+:    Path to local trace to text. For debugging.
diff --git a/docs/reference/perfetto-cli.md b/docs/reference/perfetto-cli.md
index f0771d6..8de0f31 100644
--- a/docs/reference/perfetto-cli.md
+++ b/docs/reference/perfetto-cli.md
@@ -1,4 +1,10 @@
-# Perfetto CLI
+# PERFETTO(1)
+
+## NAME
+
+perfetto - capture traces
+
+## DESCRIPTION
 
 This section describes how to use the `perfetto` commandline binary to capture
 traces. Examples are given in terms of an Android device connected over ADB.
@@ -6,70 +12,117 @@
 `perfetto` has two modes for configuring the tracing session (i.e. what and how
 to collect):
 
-* __lightweight mode__: all config options are supplied as commandline flags,
+__lightweight mode__
+: all config options are supplied as commandline flags,
   but the available data sources are restricted to ftrace and atrace. This mode
   is similar to
   [`systrace`](https://developer.android.com/topic/performance/tracing/command-line).
-* __normal mode__: the configuration is specified in a protocol buffer. This
-  allows for full customisation of collected traces.
+
+__normal mode__
+: the configuration is specified in a protocol buffer. This allows for full
+  customisation of collected traces.
 
 
-## General options
+## GENERAL OPTIONS
 
 The following table lists the available options when using `perfetto` in either
 mode.
 
-|Option|Description|
-|---|---|
-| `--background \| -d` |Perfetto immediately exits the command-line interface and continues recording your trace in background.|
-|`--out OUT_FILE \| -o OUT_FILE`|Specifies the desired path to the output trace file, or `-` for stdout. `perfetto` writes the output to the file described in the flags above. The output format compiles with the format defined in [AOSP `trace.proto`](/protos/perfetto/trace/trace.proto).|
-|`--dropbox TAG`|Uploads your trace via the [DropBoxManager API](https://developer.android.com/reference/android/os/DropBoxManager.html) using the tag you specify.|
-|`--no-guardrails`|Disables protections against excessive resource usage when enabling the `--dropbox` flag during testing.|
-|`--reset-guardrails`|Resets the persistent state of the guardrails and exits (for testing).|
-|`--query`|Queries the service state and prints it as human-readable text.|
-|`--query-raw`|Similar to `--query`, but prints raw proto-encoded bytes of `tracing_service_state.proto`.|
-|`--help \| -h`|Prints out help text for the `perfetto` tool.|
+`-d`, `--background`
+:    Perfetto immediately exits the command-line interface and continues
+     recording your trace in background.
+
+`-o`, `--out` _OUT_FILE_
+:    Specifies the desired path to the output trace file, or `-` for stdout.
+     `perfetto` writes the output to the file described in the flags above.
+     The output format compiles with the format defined in
+     [AOSP `trace.proto`](/protos/perfetto/trace/trace.proto).
+
+`--dropbox` _TAG_
+:    Uploads your trace via the
+     [DropBoxManager API](https://developer.android.com/reference/android/os/DropBoxManager.html)
+     using the tag you specify. Android only.
+
+`--no-guardrails`
+:     Disables protections against excessive resource usage when enabling the
+      `--dropbox` flag during testing.
 
 
-## Lightweight mode
+`--reset-guardrails`
+:     Resets the persistent state of the guardrails and exits (for testing).
+
+`--query`
+:     Queries the service state and prints it as human-readable text.
+
+`--query-raw`
+:     Similar to `--query`, but prints raw proto-encoded bytes of
+      `tracing_service_state.proto`.
+
+`-h`,  `--help`
+:     Prints out help text for the `perfetto` tool.
+
+
+## LIGHTWEIGHT MODE
 
 The general syntax for using `perfetto` in *lightweight mode* is as follows:
 
-<pre class="none">
- adb shell perfetto [ --time <var>TIMESPEC</var> ] [ --buffer <var>SIZE</var> ] [ --size <var>SIZE</var> ]
-           [ <var>ATRACE_CAT</var> | <var>FTRACE_GROUP/FTRACE_NAME</var>]...
-</pre>
+```
+ adb shell perfetto [ --time TIMESPEC ] [ --buffer SIZE ] [ --size SIZE ]
+    [ ATRACE_CAT | FTRACE_GROUP/FTRACE_NAME]...
+```
 
 
 The following table lists the available options when using `perfetto` in
 *lightweight mode*.
 
-|Option|Description|
-|--- |--- |
-|`--time TIME[s\|m\|h] \| -t TIME[s\|m\|h]`|Specifies the trace duration in seconds, minutes, or hours. For example, `--time 1m` specifies a trace duration of 1 minute. The default duration is 10 seconds.|
-|`--buffer SIZE[mb\|gb] \| -b SIZE[mb\|gb`]|Specifies the ring buffer size in megabytes (mb) or gigabytes (gb). The default parameter is `--buffer 32mb`.|
-|`--size SIZE[mb\|gb] \| -s SIZE[mb\|gb]`|Specifies the max file size in megabytes (mb) or gigabytes (gb). By default `perfetto` uses only in-memory ring-buffer.|
+`-t`, `--time` _TIME[s|m|h]_
+:    Specifies the trace duration in seconds, minutes, or hours.
+     For example, `--time 1m` specifies a trace duration of 1 minute.
+     The default duration is 10 seconds.
+
+`-b`, `--buffer` _SIZE[mb|gb]_
+:    Specifies the ring buffer size in megabytes (mb) or gigabytes (gb).
+     The default parameter is `--buffer 32mb`.
+
+`-s`, `--size` _SIZE[mb|gb]_
+:    Specifies the max file size in megabytes (mb) or gigabytes (gb).
+     By default `perfetto` uses only in-memory ring-buffer.
 
 
 This is followed by a list of event specifiers:
 
-|Event|Description|
-|--- |--- |
-|`ATRACE_CAT`|Specifies the atrace categories you want to record a trace for. For example, the following command traces Window Manager using atrace: `adb shell perfetto --out FILE wm`. To record other categories, see this [list of atrace categories](https://android.googlesource.com/platform/frameworks/native/+/refs/tags/android-q-preview-5/cmds/atrace/atrace.cpp#100).|
-|`FTRACE_GROUP/FTRACE_NAME`|Specifies the ftrace events you want to record a trace for. For example, the following command traces sched/sched_switch events: `adb shell perfetto --out FILE sched/sched_switch`|
+`ATRACE_CAT`
+:    Specifies the atrace categories you want to record a trace for.
+     For example, the following command traces Window Manager using atrace:
+     `adb shell perfetto --out FILE wm`. To record other categories, see this
+     [list of atrace categories](https://android.googlesource.com/platform/frameworks/native/+/refs/tags/android-q-preview-5/cmds/atrace/atrace.cpp#100).
+
+`FTRACE_GROUP/FTRACE_NAME`
+:    Specifies the ftrace events you want to record a trace for.
+     For example, the following command traces sched/sched_switch events:
+     `adb shell perfetto --out FILE sched/sched_switch`
 
 
-## Normal mode
+## NORMAL MODE
 
 The general syntax for using `perfetto` in *normal mode* is as follows:
 
-<pre class="none">
- adb shell perfetto [ --txt ] --config <var>CONFIG_FILE</var>
-</pre>
+```
+ adb shell perfetto [ --txt ] --config CONFIG_FILE
+```
 
-The following table lists the available options when using `perfetto` in *normal* mode.
+The following table lists the available options when using `perfetto` in
+*normal* mode.
 
-|Option|Description|
-|--- |--- |
-|`--config CONFIG_FILE \| -c CONFIG_FILE`|Specifies the path to a configuration file. In normal mode, some configurations may be encoded in a configuration protocol buffer. This file must comply with the protocol buffer schema defined in [AOSP `trace_config.proto`](/protos/perfetto/config/data_source_config.proto). You select and configure the data sources using the DataSourceConfig member of the TraceConfig, as defined in [AOSP `data_source_config.proto`](/protos/perfetto/config/data_source_config.proto).|
-|`--txt`|Instructs `perfetto` to parse the config file as pbtxt. This flag is experimental, and it's not recommended that you enable it for production.|
+`-c`, `--config` _CONFIG_FILE_
+:    Specifies the path to a configuration file. In normal mode, some
+     configurations may be encoded in a configuration protocol buffer.
+     This file must comply with the protocol buffer schema defined in AOSP
+     [`trace_config.proto`](/protos/perfetto/config/data_source_config.proto).
+     You select and configure the data sources using the DataSourceConfig member
+     of the TraceConfig, as defined in AOSP
+     [`data_source_config.proto`](/protos/perfetto/config/data_source_config.proto).
+
+`--txt`
+:    Instructs `perfetto` to parse the config file as pbtxt. This flag is
+     experimental, and it's not recommended that you enable it for production.
diff --git a/gn/standalone/toolchain/win_find_msvc.py b/gn/standalone/toolchain/win_find_msvc.py
index 41b3476..0badd1b 100644
--- a/gn/standalone/toolchain/win_find_msvc.py
+++ b/gn/standalone/toolchain/win_find_msvc.py
@@ -63,7 +63,7 @@
     filt = lambda x: os.path.exists(os.path.join(x, 'ucrt', 'x64', 'ucrt.lib'))
     out[1] = find_max_subdir(lib_base, filt)
 
-  for version in ['BuildTools', 'Community']:
+  for version in ['BuildTools', 'Community', 'Professional']:
     msvc_base = ('C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\'
                  '{}\\VC\\Tools\\MSVC').format(version)
     if os.path.exists(msvc_base):
diff --git a/include/perfetto/tracing/event_context.h b/include/perfetto/tracing/event_context.h
index 21cb788..e0c1a16 100644
--- a/include/perfetto/tracing/event_context.h
+++ b/include/perfetto/tracing/event_context.h
@@ -72,7 +72,7 @@
     static_assert(std::is_base_of<protozero::Message, MessageType>::value,
                   "TracedProto can be used only with protozero messages");
 
-    return TracedProto<MessageType>(message, *this);
+    return TracedProto<MessageType>(message, this);
   }
 
  private:
diff --git a/include/perfetto/tracing/traced_proto.h b/include/perfetto/tracing/traced_proto.h
index a52972b..4298bcc 100644
--- a/include/perfetto/tracing/traced_proto.h
+++ b/include/perfetto/tracing/traced_proto.h
@@ -60,8 +60,6 @@
 
   MessageType* message() { return message_; }
 
-  EventContext& context() const { return context_; }
-
   // Write additional untyped values into the same context, which is useful
   // when a given C++ class has a typed representation, but also either has
   // members which can only be written into an untyped context (e.g. they are
@@ -87,14 +85,47 @@
     return TracedDictionary(message_, MessageType::kDebugAnnotations, nullptr);
   }
 
+  // Write a nested message into a field according to the provided metadata.
+  template <typename FieldMetadata>
+  TracedProto<typename FieldMetadata::cpp_field_type> WriteNestedMessage() {
+    static_assert(std::is_base_of<MessageType,
+                                  typename FieldMetadata::message_type>::value,
+                  "Field should belong to the current message");
+    return TracedProto<typename FieldMetadata::cpp_field_type>(
+        message_->template BeginNestedMessage<
+            typename FieldMetadata::cpp_field_type>(FieldMetadata::kFieldId),
+        context_);
+  }
+
+  template <typename FieldMetadata>
+  TracedProto<typename FieldMetadata::cpp_field_type> WriteNestedMessage(
+      protozero::proto_utils::internal::FieldMetadataHelper<FieldMetadata>) {
+    return WriteNestedMessage<FieldMetadata>();
+  }
+
  private:
   friend class EventContext;
+  // Allow TracedProto<Foo> to create TracedProto<Bar>.
+  template <typename T>
+  friend class TracedProto;
 
-  TracedProto(MessageType* message, EventContext& context)
+  // Wraps a raw protozero message using the same context as the current object.
+  template <typename ChildMessageType>
+  TracedProto<ChildMessageType> Wrap(ChildMessageType* message) {
+    return TracedProto(message, context_);
+  }
+
+  // Context might be null here when writing typed message which is
+  // nested into untyped legacy trace event macro argument.
+  // TODO(altimin): Turn this into EventContext& when this case is eliminated
+  // and expose it in public API.
+  EventContext* context() const { return context_; }
+
+  TracedProto(MessageType* message, EventContext* context)
       : message_(message), context_(context) {}
 
   MessageType* const message_;
-  EventContext& context_;
+  EventContext* context_;
 };
 
 namespace internal {
@@ -157,11 +188,7 @@
       std::is_same<Check, void>::value>
   Write(TracedProto<Proto> context, ValueType&& value) {
     // TODO(altimin): support TraceFormatTraits here.
-    value.WriteIntoTrace(
-        context.context().Wrap(context.message()
-                                   ->template BeginNestedMessage<
-                                       typename FieldMetadata::cpp_field_type>(
-                                       FieldMetadata::kFieldId)));
+    value.WriteIntoTrace(context.template WriteNestedMessage<FieldMetadata>());
   }
 
   // Nested repeated non-packed field.
@@ -173,11 +200,7 @@
   Write(TracedProto<Proto> context, ValueType&& value) {
     // TODO(altimin): support TraceFormatTraits here.
     for (auto&& item : value) {
-      item.WriteIntoTrace(context.context().Wrap(
-          context.message()
-              ->template BeginNestedMessage<
-                  typename FieldMetadata::cpp_field_type>(
-                  FieldMetadata::kFieldId)));
+      item.WriteIntoTrace(context.template WriteNestedMessage<FieldMetadata>());
     }
   }
 };
diff --git a/infra/perfetto.dev/src/markdown_render.js b/infra/perfetto.dev/src/markdown_render.js
index 8fe5cc9..21d03ed 100644
--- a/infra/perfetto.dev/src/markdown_render.js
+++ b/infra/perfetto.dev/src/markdown_render.js
@@ -158,6 +158,15 @@
   if (cssClass != '') {
     cssClass = ` class="callout ${cssClass}"`;
   }
+
+  // Rudimentary support of definition lists.
+  var colonStart = text.search("\n:")
+  if (colonStart != -1) {
+    var key = text.substring(0, colonStart);
+    var value = text.substring(colonStart + 2);
+    return `<dl><dt><p>${key}</p></dt><dd><p>${value}</p></dd></dl>`
+  }
+
   return `<p${cssClass}>${text}</p>\n`;
 }
 
diff --git a/protos/perfetto/trace_processor/trace_processor.proto b/protos/perfetto/trace_processor/trace_processor.proto
index f32745b..efebc38 100644
--- a/protos/perfetto/trace_processor/trace_processor.proto
+++ b/protos/perfetto/trace_processor/trace_processor.proto
@@ -35,6 +35,94 @@
 //    In this case these messages are used to {,un}marshall HTTP requests and
 //    response made through src/trace_processor/rpc/httpd.cc .
 
+// At lowest level, the wire-format of the RPC procol is a linear sequence of
+// TraceProcessorRpc messages on each side of the byte pipe
+// Each message is prefixed by a tag (field = 1, type = length delimited) and a
+// varint encoding its size (this is so the whole stream can also be read /
+// written as if it was a repeated field of TraceProcessorRpcStream).
+
+message TraceProcessorRpcStream {
+  repeated TraceProcessorRpc msg = 1;
+}
+
+message TraceProcessorRpc {
+  // A monotonic counter used only for debugging purposes, to detect if the
+  // underlying stream is missing or duping data. The counter starts at 0 on
+  // each side of the pipe and is incremented on each message.
+  // Do NOT expect that a response has the same |seq| of its corresponding
+  // request: some requests (e.g., a query returning many rows) can yield more
+  // than one response message, bringing the tx and rq seq our of sync.
+  optional int64 seq = 1;
+
+  enum TraceProcessorMethod {
+    TPM_UNSPECIFIED = 0;
+    TPM_APPEND_TRACE_DATA = 1;
+    TPM_FINALIZE_TRACE_DATA = 2;
+    TPM_QUERY_STREAMING = 3;
+    TPM_QUERY_RAW_DEPRECATED = 4;
+    TPM_COMPUTE_METRIC = 5;
+    TPM_GET_METRIC_DESCRIPTORS = 6;
+    TPM_RESTORE_INITIAL_TABLES = 7;
+    TPM_ENABLE_METATRACE = 8;
+    TPM_DISABLE_AND_READ_METATRACE = 9;
+  }
+
+  oneof type {
+    // Client -> TraceProcessor requests.
+    TraceProcessorMethod request = 2;
+
+    // TraceProcessor -> Client responses.
+    TraceProcessorMethod response = 3;
+
+    // This is sent back instead of filling |response| when the client sends a
+    // |request| which is not known by the TraceProcessor service. This can
+    // happen when the client is newer than the service.
+    TraceProcessorMethod invalid_request = 4;
+  }
+
+  // Request/Response arguments.
+  // Not all requests / responses require an argument.
+
+  oneof args {
+    // TraceProcessorMethod request args.
+
+    // For TPM_APPEND_TRACE_DATA.
+    bytes append_trace_data = 101;
+    // For TPM_QUERY_STREAMING.
+    QueryArgs query_args = 103;
+    // For TPM_QUERY_RAW_DEPRECATED.
+    RawQueryArgs raw_query_args = 104;
+    // For TPM_COMPUTE_METRIC.
+    ComputeMetricArgs compute_metric_args = 105;
+
+    // TraceProcessorMethod response args.
+    // For TPM_APPEND_TRACE_DATA.
+    AppendTraceDataResult append_result = 201;
+    // For TPM_QUERY_STREAMING.
+    QueryResult query_result = 203;
+    // For TPM_QUERY_RAW_DEPRECATED.
+    RawQueryResult raw_query_result = 204;
+    // For TPM_COMPUTE_METRIC.
+    ComputeMetricResult metric_result = 205;
+    // For TPM_GET_METRIC_DESCRIPTORS.
+    DescriptorSet metric_descriptors = 206;
+    // For TPM_DISABLE_AND_READ_METATRACE.
+    DisableAndReadMetatraceResult metatrace = 209;
+  }
+}
+
+message AppendTraceDataResult {
+  optional int64 total_bytes_parsed = 1;
+  optional string error = 2;
+}
+
+message QueryArgs {
+  optional string sql_query = 1;
+
+  // Wall time when the query was queued. Used only for query stats.
+  optional uint64 time_queued_ns = 2;
+}
+
 // Input for the /raw_query endpoint.
 message RawQueryArgs {
   optional string sql_query = 1;
@@ -117,7 +205,6 @@
     // NUL-terminated. This is because JS incurs into a non-negligible overhead
     // when decoding strings and one decode + split('\0') is measurably faster
     // than decoding N strings. See goto.google.com/postmessage-benchmark .
-    // \0-concatenated.
     optional string string_cells = 5;
 
     // If true this is the last batch for the query result.
@@ -189,11 +276,3 @@
 message DescriptorSet {
   repeated DescriptorProto descriptors = 1;
 }
-
-// Input for the /get_metric_descriptors endpoint.
-message GetMetricDescriptorsArgs {}
-
-// Output for the /get_metric_descriptors endpoint.
-message GetMetricDescriptorsResult {
-  optional DescriptorSet descriptor_set = 1;
-}
diff --git a/src/android_stats/perfetto_atoms.h b/src/android_stats/perfetto_atoms.h
index 3731c2e..0a857b9 100644
--- a/src/android_stats/perfetto_atoms.h
+++ b/src/android_stats/perfetto_atoms.h
@@ -69,6 +69,7 @@
   kTracedEnableTracingUnknown = 35,
   kTracedStartTracingInvalidSessionState = 36,
   kTracedEnableTracingInvalidFilter = 47,
+  kTracedEnableTracingOobTargetBuffer = 48,
 
   // Checkpoints inside perfetto_cmd after tracing has finished.
   kOnTracingDisabled = 4,
diff --git a/src/profiling/memory/shared_ring_buffer_write_fuzzer.cc b/src/profiling/memory/shared_ring_buffer_write_fuzzer.cc
index 486033e..62b82eb 100644
--- a/src/profiling/memory/shared_ring_buffer_write_fuzzer.cc
+++ b/src/profiling/memory/shared_ring_buffer_write_fuzzer.cc
@@ -70,6 +70,7 @@
   memcpy(&header, data, sizeof(header));
   SharedRingBuffer::MetadataPage& metadata_page = header.metadata_page;
   metadata_page.spinlock.locked = false;
+  metadata_page.spinlock.poisoned = false;
 
   PERFETTO_CHECK(ftruncate(*fd, static_cast<off_t>(total_size_pages *
                                                    base::kPageSize)) == 0);
diff --git a/src/protozero/proto_ring_buffer.cc b/src/protozero/proto_ring_buffer.cc
index 92fe153..631d355 100644
--- a/src/protozero/proto_ring_buffer.cc
+++ b/src/protozero/proto_ring_buffer.cc
@@ -106,7 +106,7 @@
 
   size_t avail = buf_.size() - wr_;
   if (data_len > avail) {
-    // This whole section should be hit extremely rare.
+    // This whole section should be hit extremely rarely.
 
     // Try first just recompacting the buffer by moving everything to the left.
     // This can happen if we received "a message and a bit" on each Append call
diff --git a/src/protozero/proto_ring_buffer.h b/src/protozero/proto_ring_buffer.h
index 06ef539..d71a231 100644
--- a/src/protozero/proto_ring_buffer.h
+++ b/src/protozero/proto_ring_buffer.h
@@ -60,10 +60,10 @@
 // Internally this is similar to a ring-buffer, with the caveat that it never
 // wraps, it only expands. Expansions are rare. The deal is that in most cases
 // the read cursor follows very closely the write cursor. For instance, if the
-// uderlying behaves as a dgram socket, after each Append, the read cursor will
-// chase completely the write cursor. Even if the underyling stream is not
-// always atomic, the expectation is that the read cursor will eventually reach
-// the write one within few messages.
+// underlying transport behaves as a dgram socket, after each Append, the read
+// cursor will chase completely the write cursor. Even if the underlying stream
+// is not always atomic, the expectation is that the read cursor will eventually
+// reach the write one within few messages.
 // A visual example, imagine we have four messages: 2it 4will 2be 4fine
 // Visually:
 //
diff --git a/src/trace_processor/importers/proto/heap_profile_tracker.cc b/src/trace_processor/importers/proto/heap_profile_tracker.cc
index 968da7d..45a5c7b 100644
--- a/src/trace_processor/importers/proto/heap_profile_tracker.cc
+++ b/src/trace_processor/importers/proto/heap_profile_tracker.cc
@@ -30,6 +30,8 @@
 struct MergedCallsite {
   StringId frame_name;
   StringId mapping_name;
+  base::Optional<StringId> source_file;
+  base::Optional<uint32_t> line_number;
   base::Optional<uint32_t> parent_idx;
   bool operator<(const MergedCallsite& o) const {
     return std::tie(frame_name, mapping_name, parent_idx) <
@@ -62,7 +64,7 @@
     base::Optional<StringId> deobfuscated_name =
         frames_tbl.deobfuscated_name()[frame_idx];
     return {{deobfuscated_name ? *deobfuscated_name : frame_name, mapping_name,
-             base::nullopt}};
+             base::nullopt, base::nullopt, base::nullopt}};
   }
 
   std::vector<MergedCallsite> result;
@@ -74,8 +76,9 @@
        i < symbols_tbl.row_count() &&
        symbols_tbl.symbol_set_id()[i] == *symbol_set_id;
        ++i) {
-    result.emplace_back(
-        MergedCallsite{symbols_tbl.name()[i], mapping_name, base::nullopt});
+    result.emplace_back(MergedCallsite{
+        symbols_tbl.name()[i], mapping_name, symbols_tbl.source_file()[i],
+        symbols_tbl.line_number()[i], base::nullopt});
   }
   std::reverse(result.begin(), result.end());
   return result;
@@ -118,6 +121,7 @@
     auto callsites = GetMergedCallsites(storage, i);
     // Loop below needs to run at least once for parent_idx to get updated.
     PERFETTO_CHECK(!callsites.empty());
+    std::map<MergedCallsite, uint32_t> callsites_to_rowid;
     for (MergedCallsite& merged_callsite : callsites) {
       merged_callsite.parent_idx = parent_idx;
       auto it = merged_callsites_to_table_idx.find(merged_callsite);
@@ -137,14 +141,35 @@
         row.map_name = merged_callsite.mapping_name;
         if (parent_idx)
           row.parent_id = tbl->id()[*parent_idx];
-
         parent_idx = tbl->Insert(std::move(row)).row;
+        callsites_to_rowid[merged_callsite] =
+            static_cast<uint32_t>(merged_callsites_to_table_idx.size());
+
         PERFETTO_CHECK(merged_callsites_to_table_idx.size() ==
                        tbl->row_count());
+      } else {
+        MergedCallsite saved_callsite = it->first;
+        callsites_to_rowid.erase(saved_callsite);
+        if (saved_callsite.source_file != merged_callsite.source_file) {
+          saved_callsite.source_file = base::nullopt;
+        }
+        if (saved_callsite.line_number != merged_callsite.line_number) {
+          saved_callsite.line_number = base::nullopt;
+        }
+        callsites_to_rowid[saved_callsite] = it->second;
       }
       parent_idx = it->second;
     }
 
+    for (const auto& it : callsites_to_rowid) {
+      if (it.first.source_file) {
+        tbl->mutable_source_file()->Set(it.second, *it.first.source_file);
+      }
+      if (it.first.line_number) {
+        tbl->mutable_line_number()->Set(it.second, *it.first.line_number);
+      }
+    }
+
     PERFETTO_CHECK(parent_idx);
     callsite_to_merged_callsite[i] = *parent_idx;
   }
diff --git a/src/trace_processor/importers/proto/profiler_util.cc b/src/trace_processor/importers/proto/profiler_util.cc
index 175942f..c28f2fc 100644
--- a/src/trace_processor/importers/proto/profiler_util.cc
+++ b/src/trace_processor/importers/proto/profiler_util.cc
@@ -104,7 +104,8 @@
     return "com.google.android.gm";
   }
 
-  if (location.find("PrebuiltGmsCore") != std::string::npos) {
+  if (location.find("PrebuiltGmsCore") != std::string::npos ||
+      location.find("com.google.android.gms") != std::string::npos) {
     return "com.google.android.gms";
   }
 
diff --git a/src/trace_processor/importers/proto/stack_profile_tracker.cc b/src/trace_processor/importers/proto/stack_profile_tracker.cc
index 52e4dbd..6bf9a73 100644
--- a/src/trace_processor/importers/proto/stack_profile_tracker.cc
+++ b/src/trace_processor/importers/proto/stack_profile_tracker.cc
@@ -185,7 +185,7 @@
       cur_id = frames->Insert(row).id;
       context_->global_stack_profile_tracker->InsertFrameRow(
           mapping_id, static_cast<uint64_t>(row.rel_pc), *cur_id);
-      if (name.find('.') != std::string::npos) {
+      if (base::Contains(name, '.')) {
         // Java frames always contain a '.'
         base::Optional<std::string> package =
             PackageFromLocation(context_->storage.get(), mapping_name);
diff --git a/src/trace_processor/importers/proto/track_event_parser.cc b/src/trace_processor/importers/proto/track_event_parser.cc
index 7926bd3..6be84c5 100644
--- a/src/trace_processor/importers/proto/track_event_parser.cc
+++ b/src/trace_processor/importers/proto/track_event_parser.cc
@@ -110,6 +110,8 @@
   }
   bool AddJson(const Key& key, const protozero::ConstChars& value) final {
     auto json_value = json::ParseJsonString(value);
+    if (!json_value)
+      return false;
     return json::AddJsonValueToArgs(*json_value, base::StringView(key.flat_key),
                                     base::StringView(key.key), &storage_,
                                     &inserter_);
diff --git a/src/trace_processor/python/perfetto/trace_processor/trace_processor.descriptor b/src/trace_processor/python/perfetto/trace_processor/trace_processor.descriptor
index 3d2bf28..c3672b2 100644
--- a/src/trace_processor/python/perfetto/trace_processor/trace_processor.descriptor
+++ b/src/trace_processor/python/perfetto/trace_processor/trace_processor.descriptor
Binary files differ
diff --git a/src/trace_processor/python/perfetto/trace_processor/trace_processor.descriptor.sha1 b/src/trace_processor/python/perfetto/trace_processor/trace_processor.descriptor.sha1
index 5bbd54a..5f5b733 100644
--- a/src/trace_processor/python/perfetto/trace_processor/trace_processor.descriptor.sha1
+++ b/src/trace_processor/python/perfetto/trace_processor/trace_processor.descriptor.sha1
@@ -2,5 +2,5 @@
 // SHA1(tools/gen_binary_descriptors)
 // 9fc6d77de57ec76a80b76aa282f4c7cf5ce55eec
 // SHA1(protos/perfetto/trace_processor/trace_processor.proto)
-// 8320f306d6d5bbcb5ef6ba8cd62cc70a0994d102
+// ee875dc7384617e22ecb9e9d4ac03df4bba82252
   
\ No newline at end of file
diff --git a/src/trace_processor/rpc/httpd.cc b/src/trace_processor/rpc/httpd.cc
index 9412279..edc4568 100644
--- a/src/trace_processor/rpc/httpd.cc
+++ b/src/trace_processor/rpc/httpd.cc
@@ -71,6 +71,9 @@
   ~HttpServer() override;
   void Run(const char*, const char*);
 
+  // This is non-null only while serving an HTTP request.
+  Client* active_client() { return active_client_; }
+
  private:
   size_t ParseOneHttpRequest(Client* client);
   void HandleRequest(Client*, const HttpRequest&);
@@ -85,9 +88,12 @@
   base::UnixTaskRunner task_runner_;
   std::unique_ptr<base::UnixSocket> sock4_;
   std::unique_ptr<base::UnixSocket> sock6_;
-  std::vector<Client> clients_;
+  std::list<Client> clients_;
+  Client* active_client_ = nullptr;
 };
 
+HttpServer* g_httpd_instance;
+
 void Append(std::vector<char>& buf, const char* str) {
   buf.insert(buf.end(), str, str + strlen(str));
 }
@@ -197,7 +203,9 @@
   // At this point |rxbuf| can contain a partial HTTP request, a full one or
   // more (in case of HTTP Keepalive pipelining).
   for (;;) {
+    active_client_ = client;
     size_t bytes_consumed = ParseOneHttpRequest(client);
+    active_client_ = nullptr;
     if (bytes_consumed == 0)
       break;
     memmove(rxbuf, &rxbuf[bytes_consumed], client->rxbuf_used - bytes_consumed);
@@ -306,6 +314,37 @@
                      });
   }
 
+  if (req.uri == "/rpc") {
+    // Start the chunked reply.
+    strncpy(transfer_encoding_hdr, "Transfer-Encoding: chunked",
+            sizeof(transfer_encoding_hdr));
+    base::UnixSocket* cli_sock = client->sock.get();
+    HttpReply(cli_sock, "200 OK", headers, nullptr, kOmitContentLength);
+
+    static auto resp_fn = [](const void* data, uint32_t len) {
+      char chunk_hdr[32];
+      auto hdr_len = static_cast<size_t>(sprintf(chunk_hdr, "%x\r\n", len));
+      auto* http_client = g_httpd_instance->active_client();
+      PERFETTO_CHECK(http_client);
+      if (data == nullptr) {
+        // Unrecoverable RPC error case.
+        http_client->sock->Shutdown(/*notify=*/true);
+        return;
+      }
+      http_client->sock->Send(chunk_hdr, hdr_len);
+      http_client->sock->Send(data, len);
+      http_client->sock->Send("\r\n", 2);
+    };
+
+    trace_processor_rpc_.SetRpcResponseFunction(resp_fn);
+    trace_processor_rpc_.OnRpcRequest(req.body.data(), req.body.size());
+    trace_processor_rpc_.SetRpcResponseFunction(nullptr);
+
+    // Terminate chunked stream.
+    cli_sock->Send("0\r\n\r\n", 5);
+    return;
+  }
+
   if (req.uri == "/parse") {
     trace_processor_rpc_.Parse(
         reinterpret_cast<const uint8_t*>(req.body.data()), req.body.size());
@@ -381,13 +420,6 @@
                      res.size());
   }
 
-  if (req.uri == "/get_metric_descriptors") {
-    std::vector<uint8_t> res = trace_processor_rpc_.GetMetricDescriptors(
-        reinterpret_cast<const uint8_t*>(req.body.data()), req.body.size());
-    return HttpReply(client->sock.get(), "200 OK", headers, res.data(),
-                     res.size());
-  }
-
   if (req.uri == "/enable_metatrace") {
     trace_processor_rpc_.EnableMetatrace();
     return HttpReply(client->sock.get(), "200 OK", headers);
@@ -407,6 +439,7 @@
 void RunHttpRPCServer(std::unique_ptr<TraceProcessor> preloaded_instance,
                       std::string port_number) {
   HttpServer srv(std::move(preloaded_instance));
+  g_httpd_instance = &srv;
   std::string port = port_number.empty() ? kBindPort : port_number;
   std::string ipv4_addr = "127.0.0.1:" + port;
   std::string ipv6_addr = "[::1]:" + port;
diff --git a/src/trace_processor/rpc/query_result_serializer.h b/src/trace_processor/rpc/query_result_serializer.h
index 9c05e0b..c29d66d 100644
--- a/src/trace_processor/rpc/query_result_serializer.h
+++ b/src/trace_processor/rpc/query_result_serializer.h
@@ -53,6 +53,7 @@
 // chunked-encoded HTTP response, or through a repetition of Wasm calls.
 class QueryResultSerializer {
  public:
+  static constexpr uint32_t kDefaultBatchSplitThreshold = 128 * 1024;
   explicit QueryResultSerializer(Iterator);
   ~QueryResultSerializer();
 
@@ -92,7 +93,7 @@
   // the limit (it splits on the next row *after* the limit is hit).
   // Overridable for testing only.
   uint32_t cells_per_batch_ = 50000;
-  uint32_t batch_split_threshold_ = 1024 * 128;
+  uint32_t batch_split_threshold_ = kDefaultBatchSplitThreshold;
 };
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/rpc/rpc.cc b/src/trace_processor/rpc/rpc.cc
index c63e5cb..a0d4a05 100644
--- a/src/trace_processor/rpc/rpc.cc
+++ b/src/trace_processor/rpc/rpc.cc
@@ -16,39 +16,238 @@
 
 #include "src/trace_processor/rpc/rpc.h"
 
+#include <string.h>
+
 #include <vector>
 
+#include "perfetto/base/logging.h"
 #include "perfetto/base/time.h"
+#include "perfetto/ext/base/utils.h"
 #include "perfetto/protozero/scattered_heap_buffer.h"
+#include "perfetto/protozero/scattered_stream_writer.h"
 #include "perfetto/trace_processor/trace_processor.h"
-#include "protos/perfetto/trace_processor/trace_processor.pbzero.h"
+#include "src/protozero/proto_ring_buffer.h"
 #include "src/trace_processor/rpc/query_result_serializer.h"
 #include "src/trace_processor/tp_metatrace.h"
 
+#include "protos/perfetto/trace_processor/trace_processor.pbzero.h"
+
 namespace perfetto {
 namespace trace_processor {
 
-using ColumnValues = protos::pbzero::RawQueryResult::ColumnValues;
-using ColumnDesc = protos::pbzero::RawQueryResult::ColumnDesc;
-
+namespace {
 // Writes a "Loading trace ..." update every N bytes.
 constexpr size_t kProgressUpdateBytes = 50 * 1000 * 1000;
+using TraceProcessorRpcStream = protos::pbzero::TraceProcessorRpcStream;
+using RpcProto = protos::pbzero::TraceProcessorRpc;
+
+// Most RPC messages are either very small or a query results.
+// QueryResultSerializer splits rows into batches of approximately 128KB. Try
+// avoid extra heap allocations for the nominal case.
+constexpr auto kSliceSize =
+    QueryResultSerializer::kDefaultBatchSplitThreshold + 4096;
+
+// Holds a trace_processor::TraceProcessorRpc pbzero message. Avoids extra
+// copies by doing direct scattered calls from the fragmented heap buffer onto
+// the RpcResponseFunction (the receiver is expected to deal with arbitrary
+// fragmentation anyways). It also takes care of prefixing each message with
+// the proto preamble and varint size.
+class Response {
+ public:
+  Response(int64_t seq, int method);
+  Response(const Response&) = delete;
+  Response& operator=(const Response&) = delete;
+  RpcProto* operator->() { return msg_; }
+  void Send(Rpc::RpcResponseFunction);
+
+ private:
+  RpcProto* msg_ = nullptr;
+
+  // The reason why we use TraceProcessorRpcStream as root message is because
+  // the RPC wire protocol expects each message to be prefixed with a proto
+  // preamble and varint size. This happens to be the same serialization of a
+  // repeated field (this is really the same trick we use between
+  // Trace and TracePacket in trace.proto)
+  protozero::HeapBuffered<TraceProcessorRpcStream> buf_;
+};
+
+Response::Response(int64_t seq, int method) : buf_(kSliceSize, kSliceSize) {
+  msg_ = buf_->add_msg();
+  msg_->set_seq(seq);
+  msg_->set_response(static_cast<RpcProto::TraceProcessorMethod>(method));
+}
+
+void Response::Send(Rpc::RpcResponseFunction send_fn) {
+  buf_->Finalize();
+  for (const auto& slice : buf_.GetSlices()) {
+    auto range = slice.GetUsedRange();
+    send_fn(range.begin, static_cast<uint32_t>(range.size()));
+  }
+}
+
+}  // namespace
 
 Rpc::Rpc(std::unique_ptr<TraceProcessor> preloaded_instance)
     : trace_processor_(std::move(preloaded_instance)),
-      session_id_(base::Uuidv4()) {}
+      session_id_(base::Uuidv4()) {
+  if (!trace_processor_)
+    ResetTraceProcessor();
+}
 
 Rpc::Rpc() : Rpc(nullptr) {}
-
 Rpc::~Rpc() = default;
 
+void Rpc::ResetTraceProcessor() {
+  trace_processor_ = TraceProcessor::CreateInstance(Config());
+  bytes_parsed_ = bytes_last_progress_ = 0;
+  t_parse_started_ = base::GetWallTimeNs().count();
+  // Deliberately not resetting the RPC channel state (rxbuf_, {tx,rx}_seq_id_).
+  // This is invoked from the same client to clear the current trace state
+  // before loading a new one. The IPC channel is orthogonal to that and the
+  // message numbering continues regardless of the reset.
+}
+
+void Rpc::OnRpcRequest(const void* data, size_t len) {
+  rxbuf_.Append(data, len);
+  for (;;) {
+    auto msg = rxbuf_.ReadMessage();
+    if (!msg.valid()) {
+      if (msg.fatal_framing_error)
+        rpc_response_fn_(nullptr, 0);  // Disconnect.
+      break;
+    }
+    ParseRpcRequest(msg.start, msg.len);
+  }
+}
+
+// [data, len] here is a tokenized TraceProcessorRpc proto message, without the
+// size header.
+void Rpc::ParseRpcRequest(const uint8_t* data, size_t len) {
+  RpcProto::Decoder req(data, len);
+
+  // We allow restarting the sequence from 0. This happens when refreshing the
+  // browser while using the external trace_processor_shell --httpd.
+  if (req.seq() != 0 && rx_seq_id_ != 0 && req.seq() != rx_seq_id_ + 1) {
+    PERFETTO_ELOG("RPC request out of order. Expected %" PRId64
+                  ", got %" PRId64,
+                  rx_seq_id_ + 1, req.seq());
+    rpc_response_fn_(nullptr, 0);  // Disconnect.
+    return;
+  }
+  rx_seq_id_ = req.seq();
+
+  // The static cast is to prevent that the compiler breaks future proofness.
+  const int req_type = static_cast<int>(req.request());
+  static const char kErrFieldNotSet[] = "RPC error: request field not set";
+  switch (req_type) {
+    case RpcProto::TPM_APPEND_TRACE_DATA: {
+      Response resp(tx_seq_id_++, req_type);
+      auto* result = resp->set_append_result();
+      if (!req.has_append_trace_data()) {
+        result->set_error(kErrFieldNotSet);
+      } else {
+        protozero::ConstBytes byte_range = req.append_trace_data();
+        util::Status res = Parse(byte_range.data, byte_range.size);
+        if (!res.ok()) {
+          result->set_error(res.message());
+        }
+      }
+      resp.Send(rpc_response_fn_);
+      break;
+    }
+    case RpcProto::TPM_FINALIZE_TRACE_DATA: {
+      Response resp(tx_seq_id_++, req_type);
+      NotifyEndOfFile();
+      resp.Send(rpc_response_fn_);
+      break;
+    }
+    case RpcProto::TPM_QUERY_STREAMING: {
+      if (!req.has_query_args()) {
+        Response resp(tx_seq_id_++, req_type);
+        auto* result = resp->set_query_result();
+        result->set_error(kErrFieldNotSet);
+        resp.Send(rpc_response_fn_);
+      } else {
+        protozero::ConstBytes args = req.query_args();
+        auto it = QueryInternal(args.data, args.size);
+        QueryResultSerializer serializer(std::move(it));
+        for (bool has_more = true; has_more;) {
+          Response resp(tx_seq_id_++, req_type);
+          has_more = serializer.Serialize(resp->set_query_result());
+          resp.Send(rpc_response_fn_);
+        }
+      }
+      break;
+    }
+    case RpcProto::TPM_QUERY_RAW_DEPRECATED: {
+      Response resp(tx_seq_id_++, req_type);
+      auto* result = resp->set_raw_query_result();
+      if (!req.has_raw_query_args()) {
+        result->set_error(kErrFieldNotSet);
+      } else {
+        protozero::ConstBytes args = req.raw_query_args();
+        RawQueryInternal(args.data, args.size, result);
+      }
+      resp.Send(rpc_response_fn_);
+      break;
+    }
+    case RpcProto::TPM_COMPUTE_METRIC: {
+      Response resp(tx_seq_id_++, req_type);
+      auto* result = resp->set_metric_result();
+      if (!req.has_compute_metric_args()) {
+        result->set_error(kErrFieldNotSet);
+      } else {
+        protozero::ConstBytes args = req.compute_metric_args();
+        ComputeMetricInternal(args.data, args.size, result);
+      }
+      resp.Send(rpc_response_fn_);
+      break;
+    }
+    case RpcProto::TPM_GET_METRIC_DESCRIPTORS: {
+      Response resp(tx_seq_id_++, req_type);
+      auto descriptor_set = trace_processor_->GetMetricDescriptors();
+      auto* result = resp->set_metric_descriptors();
+      result->AppendRawProtoBytes(descriptor_set.data(), descriptor_set.size());
+      resp.Send(rpc_response_fn_);
+      break;
+    }
+    case RpcProto::TPM_RESTORE_INITIAL_TABLES: {
+      trace_processor_->RestoreInitialTables();
+      Response resp(tx_seq_id_++, req_type);
+      resp.Send(rpc_response_fn_);
+      break;
+    }
+    case RpcProto::TPM_ENABLE_METATRACE: {
+      trace_processor_->EnableMetatrace();
+      Response resp(tx_seq_id_++, req_type);
+      resp.Send(rpc_response_fn_);
+      break;
+    }
+    case RpcProto::TPM_DISABLE_AND_READ_METATRACE: {
+      Response resp(tx_seq_id_++, req_type);
+      DisableAndReadMetatraceInternal(resp->set_metatrace());
+      resp.Send(rpc_response_fn_);
+      break;
+    }
+    default: {
+      // This can legitimately happen if the client is newer. We reply with a
+      // generic "unkown request" response, so the client can do feature
+      // detection
+      PERFETTO_DLOG("[RPC] Uknown request type (%d), size=%zu", req_type, len);
+      Response resp(tx_seq_id_++, req_type);
+      resp->set_invalid_request(
+          static_cast<RpcProto::TraceProcessorMethod>(req_type));
+      resp.Send(rpc_response_fn_);
+      break;
+    }
+  }  // switch(req_type)
+}
+
 util::Status Rpc::Parse(const uint8_t* data, size_t len) {
   if (eof_) {
-    // Reset the trace processor state if this is either the first call ever or
-    // if another trace has been previously fully loaded.
-    trace_processor_ = TraceProcessor::CreateInstance(Config());
-    bytes_parsed_ = bytes_last_progress_ = 0;
-    t_parse_started_ = base::GetWallTimeNs().count();
+    // Reset the trace processor state if another trace has been previously
+    // loaded.
+    ResetTraceProcessor();
   }
 
   eof_ = false;
@@ -65,8 +264,6 @@
 }
 
 void Rpc::NotifyEndOfFile() {
-  if (!trace_processor_)
-    return;
   trace_processor_->NotifyEndOfFile();
   eof_ = true;
   MaybePrintProgress();
@@ -89,23 +286,7 @@
 void Rpc::Query(const uint8_t* args,
                 size_t len,
                 QueryResultBatchCallback result_callback) {
-  protos::pbzero::RawQueryArgs::Decoder query(args, len);
-  std::string sql = query.sql_query().ToStdString();
-  PERFETTO_DLOG("[RPC] Query < %s", sql.c_str());
-  PERFETTO_TP_TRACE("RPC_QUERY",
-                    [&](metatrace::Record* r) { r->AddArg("SQL", sql); });
-
-  if (!trace_processor_) {
-    static const char kErr[] = "Query() called before Parse()";
-    PERFETTO_ELOG("[RPC] %s", kErr);
-    protozero::HeapBuffered<protos::pbzero::QueryResult> result;
-    result->set_error(kErr);
-    auto vec = result.SerializeAsArray();
-    result_callback(vec.data(), vec.size(), /*has_more=*/false);
-    return;
-  }
-
-  auto it = trace_processor_->ExecuteQuery(sql.c_str());
+  auto it = QueryInternal(args, len);
   QueryResultSerializer serializer(std::move(it));
 
   std::vector<uint8_t> res;
@@ -116,21 +297,34 @@
   }
 }
 
+Iterator Rpc::QueryInternal(const uint8_t* args, size_t len) {
+  protos::pbzero::RawQueryArgs::Decoder query(args, len);
+  std::string sql = query.sql_query().ToStdString();
+  PERFETTO_DLOG("[RPC] Query < %s", sql.c_str());
+  PERFETTO_TP_TRACE("RPC_QUERY",
+                    [&](metatrace::Record* r) { r->AddArg("SQL", sql); });
+
+  return trace_processor_->ExecuteQuery(sql.c_str());
+}
+
 std::vector<uint8_t> Rpc::RawQuery(const uint8_t* args, size_t len) {
   protozero::HeapBuffered<protos::pbzero::RawQueryResult> result;
+  RawQueryInternal(args, len, result.get());
+  return result.SerializeAsArray();
+}
+
+void Rpc::RawQueryInternal(const uint8_t* args,
+                           size_t len,
+                           protos::pbzero::RawQueryResult* result) {
+  using ColumnValues = protos::pbzero::RawQueryResult::ColumnValues;
+  using ColumnDesc = protos::pbzero::RawQueryResult::ColumnDesc;
+
   protos::pbzero::RawQueryArgs::Decoder query(args, len);
   std::string sql = query.sql_query().ToStdString();
   PERFETTO_DLOG("[RPC] RawQuery < %s", sql.c_str());
   PERFETTO_TP_TRACE("RPC_RAW_QUERY",
                     [&](metatrace::Record* r) { r->AddArg("SQL", sql); });
 
-  if (!trace_processor_) {
-    static const char kErr[] = "RawQuery() called before Parse()";
-    PERFETTO_ELOG("[RPC] %s", kErr);
-    result->set_error(kErr);
-    return result.SerializeAsArray();
-  }
-
   auto it = trace_processor_->ExecuteQuery(sql.c_str());
 
   // This vector contains a standalone protozero message per column. The problem
@@ -245,29 +439,26 @@
   if (!status.ok())
     result->set_error(status.c_message());
   PERFETTO_DLOG("[RPC] RawQuery > %d rows (err: %d)", rows, !status.ok());
-
-  return result.SerializeAsArray();
 }
 
 std::string Rpc::GetCurrentTraceName() {
-  if (!trace_processor_)
-    return "";
   return trace_processor_->GetCurrentTraceName();
 }
 
 void Rpc::RestoreInitialTables() {
-  if (trace_processor_)
-    trace_processor_->RestoreInitialTables();
+  trace_processor_->RestoreInitialTables();
   session_id_ = base::Uuidv4();
 }
 
-std::vector<uint8_t> Rpc::ComputeMetric(const uint8_t* data, size_t len) {
+std::vector<uint8_t> Rpc::ComputeMetric(const uint8_t* args, size_t len) {
   protozero::HeapBuffered<protos::pbzero::ComputeMetricResult> result;
-  if (!trace_processor_) {
-    result->set_error("Null trace processor instance");
-    return result.SerializeAsArray();
-  }
+  ComputeMetricInternal(args, len, result.get());
+  return result.SerializeAsArray();
+}
 
+void Rpc::ComputeMetricInternal(const uint8_t* data,
+                                size_t len,
+                                protos::pbzero::ComputeMetricResult* result) {
   protos::pbzero::ComputeMetricArgs::Decoder args(data, len);
   std::vector<std::string> metric_names;
   for (auto it = args.metric_names(); it; ++it) {
@@ -281,15 +472,16 @@
     }
   });
 
+  PERFETTO_DLOG("[RPC] ComputeMetrics(%zu, %s), format=%d", metric_names.size(),
+                metric_names.empty() ? "" : metric_names.front().c_str(),
+                args.format());
   switch (args.format()) {
     case protos::pbzero::ComputeMetricArgs::BINARY_PROTOBUF: {
       std::vector<uint8_t> metrics_proto;
       util::Status status =
           trace_processor_->ComputeMetric(metric_names, &metrics_proto);
       if (status.ok()) {
-        result->AppendBytes(
-            protos::pbzero::ComputeMetricResult::kMetricsFieldNumber,
-            metrics_proto.data(), metrics_proto.size());
+        result->set_metrics(metrics_proto.data(), metrics_proto.size());
       } else {
         result->set_error(status.message());
       }
@@ -301,44 +493,27 @@
           metric_names, TraceProcessor::MetricResultFormat::kProtoText,
           &metrics_string);
       if (status.ok()) {
-        result->AppendString(
-            protos::pbzero::ComputeMetricResult::kMetricsAsPrototextFieldNumber,
-            metrics_string);
+        result->set_metrics_as_prototext(metrics_string);
       } else {
         result->set_error(status.message());
       }
       break;
     }
   }
-  return result.SerializeAsArray();
-}
-
-std::vector<uint8_t> Rpc::GetMetricDescriptors(const uint8_t*, size_t) {
-  protozero::HeapBuffered<protos::pbzero::GetMetricDescriptorsResult> result;
-  if (!trace_processor_) {
-    return result.SerializeAsArray();
-  }
-  std::vector<uint8_t> descriptor_set =
-      trace_processor_->GetMetricDescriptors();
-  result->AppendBytes(
-      protos::pbzero::GetMetricDescriptorsResult::kDescriptorSetFieldNumber,
-      descriptor_set.data(), descriptor_set.size());
-  return result.SerializeAsArray();
 }
 
 void Rpc::EnableMetatrace() {
-  if (!trace_processor_)
-    return;
   trace_processor_->EnableMetatrace();
 }
 
 std::vector<uint8_t> Rpc::DisableAndReadMetatrace() {
   protozero::HeapBuffered<protos::pbzero::DisableAndReadMetatraceResult> result;
-  if (!trace_processor_) {
-    result->set_error("Null trace processor instance");
-    return result.SerializeAsArray();
-  }
+  DisableAndReadMetatraceInternal(result.get());
+  return result.SerializeAsArray();
+}
 
+void Rpc::DisableAndReadMetatraceInternal(
+    protos::pbzero::DisableAndReadMetatraceResult* result) {
   std::vector<uint8_t> trace_proto;
   util::Status status = trace_processor_->DisableAndReadMetatrace(&trace_proto);
   if (status.ok()) {
@@ -346,7 +521,6 @@
   } else {
     result->set_error(status.message());
   }
-  return result.SerializeAsArray();
 }
 
 std::string Rpc::GetSessionId() {
diff --git a/src/trace_processor/rpc/rpc.h b/src/trace_processor/rpc/rpc.h
index 3448937..7bca56d 100644
--- a/src/trace_processor/rpc/rpc.h
+++ b/src/trace_processor/rpc/rpc.h
@@ -26,10 +26,21 @@
 
 #include "perfetto/ext/base/uuid.h"
 #include "perfetto/trace_processor/status.h"
+#include "src/protozero/proto_ring_buffer.h"
 
 namespace perfetto {
+
+namespace protos {
+namespace pbzero {
+class RawQueryResult;
+class ComputeMetricResult;
+class DisableAndReadMetatraceResult;
+}  // namespace pbzero
+}  // namespace protos
+
 namespace trace_processor {
 
+class Iterator;
 class TraceProcessor;
 
 // This class handles the binary {,un}marshalling for the Trace Processor RPC
@@ -55,6 +66,38 @@
   Rpc();
   ~Rpc();
 
+  // 1. TraceProcessor byte-pipe RPC interface.
+  // This is a bidirectional channel with a remote TraceProcessor instance. All
+  // it needs is a byte-oriented pipe (e.g., a TCP socket, a pipe(2) between two
+  // processes or a postmessage channel in the JS+Wasm case). The messages
+  // exchanged on these pipes are TraceProcessorRpc protos (defined in
+  // trace_processor.proto). This has been introduced in Perfetto v15.
+
+  // Pushes data received by the RPC channel into the parser. Inbound messages
+  // are tokenized and turned into TraceProcessor method invocations. |data|
+  // does not need to be a whole TraceProcessorRpc message. It can be a portion
+  // of it or a union of >1 messages.
+  // Responses are sent throught the RpcResponseFunction (below).
+  void OnRpcRequest(const void* data, size_t len);
+
+  // The size argument is a uint32_t and not size_t to avoid ABI mismatches
+  // with Wasm, where size_t = uint32_t.
+  // (nullptr, 0) has the semantic of "close the channel" and is issued when an
+  // unrecoverable wire-protocol framing error is detected.
+  using RpcResponseFunction = void (*)(const void* /*data*/, uint32_t /*len*/);
+  void SetRpcResponseFunction(RpcResponseFunction f) { rpc_response_fn_ = f; }
+
+  // 2. TraceProcessor legacy RPC endpoints.
+  // The methods below are exposed for the old RPC interfaces, where each RPC
+  // implementation deals with the method demuxing: (i) wasm_bridge.cc has one
+  // exported C function per method (going away soon); (ii) httpd.cc has one
+  // REST endpoint per method. Over time this turned out to have too much
+  // duplicated boilerplate and we moved to the byte-pipe model above.
+  // We still keep these endpoints around, because httpd.cc still  exposes the
+  // individual REST endpoints to legacy clients (TP's Python API). The
+  // mainteinance cost of those is very low. Both the new byte-pipe and the
+  // old endpoints run exactly the same code. The {de,}serialization format is
+  // the same, the only difference is only who does the method demuxing.
   // The methods of this class are mirrors (modulo {un,}marshalling of args) of
   // the corresponding names in trace_processor.h . See that header for docs.
 
@@ -62,7 +105,6 @@
   void NotifyEndOfFile();
   std::string GetCurrentTraceName();
   std::vector<uint8_t> ComputeMetric(const uint8_t* data, size_t len);
-  std::vector<uint8_t> GetMetricDescriptors(const uint8_t* data, size_t len);
   void EnableMetatrace();
   std::vector<uint8_t> DisableAndReadMetatrace();
 
@@ -113,10 +155,25 @@
   std::vector<uint8_t> RawQuery(const uint8_t* args, size_t len);
 
  private:
+  void ParseRpcRequest(const uint8_t* data, size_t len);
+  void ResetTraceProcessor();
   void MaybePrintProgress();
+  Iterator QueryInternal(const uint8_t* args, size_t len);
+  void RawQueryInternal(const uint8_t* args,
+                        size_t len,
+                        protos::pbzero::RawQueryResult*);
+  void ComputeMetricInternal(const uint8_t* args,
+                             size_t len,
+                             protos::pbzero::ComputeMetricResult*);
+  void DisableAndReadMetatraceInternal(
+      protos::pbzero::DisableAndReadMetatraceResult*);
 
   std::unique_ptr<TraceProcessor> trace_processor_;
-  bool eof_ = true;  // Reset when calling Parse().
+  RpcResponseFunction rpc_response_fn_;
+  protozero::ProtoRingBuffer rxbuf_;
+  int64_t tx_seq_id_ = 0;
+  int64_t rx_seq_id_ = 0;
+  bool eof_ = false;
   int64_t t_parse_started_ = 0;
   size_t bytes_last_progress_ = 0;
   size_t bytes_parsed_ = 0;
diff --git a/src/trace_processor/rpc/wasm_bridge.cc b/src/trace_processor/rpc/wasm_bridge.cc
index 79e5dae..4d2decf 100644
--- a/src/trace_processor/rpc/wasm_bridge.cc
+++ b/src/trace_processor/rpc/wasm_bridge.cc
@@ -97,14 +97,6 @@
           static_cast<uint32_t>(res.size()));
 }
 
-void EMSCRIPTEN_KEEPALIVE trace_processor_get_metric_descriptors(uint32_t);
-void trace_processor_get_metric_descriptors(uint32_t size) {
-  std::vector<uint8_t> res =
-      g_trace_processor_rpc->GetMetricDescriptors(g_req_buf, size);
-  g_reply(reinterpret_cast<const char*>(res.data()),
-          static_cast<uint32_t>(res.size()));
-}
-
 void EMSCRIPTEN_KEEPALIVE trace_processor_enable_metatrace(uint32_t);
 void trace_processor_enable_metatrace(uint32_t) {
   g_trace_processor_rpc->EnableMetatrace();
diff --git a/src/trace_processor/sqlite/span_join_operator_table.cc b/src/trace_processor/sqlite/span_join_operator_table.cc
index c6c3b78..13fea5e 100644
--- a/src/trace_processor/sqlite/span_join_operator_table.cc
+++ b/src/trace_processor/sqlite/span_join_operator_table.cc
@@ -210,7 +210,7 @@
   // Check if any column has : in its name. This often happens when SELECT *
   // is used to create a view with the same column name in two joined tables.
   for (const auto& col : cols) {
-    if (col.name().find(':') != std::string::npos) {
+    if (base::Contains(col.name(), ':')) {
       return util::ErrStatus("SPAN_JOIN: column %s has illegal character :",
                              col.name().c_str());
     }
diff --git a/src/trace_processor/tables/profiler_tables.h b/src/trace_processor/tables/profiler_tables.h
index 419d852..c51ac91 100644
--- a/src/trace_processor/tables/profiler_tables.h
+++ b/src/trace_processor/tables/profiler_tables.h
@@ -282,7 +282,9 @@
   C(int64_t, cumulative_alloc_count)                                      \
   C(int64_t, alloc_size)                                                  \
   C(int64_t, cumulative_alloc_size)                                       \
-  C(base::Optional<ExperimentalFlamegraphNodesTable::Id>, parent_id)
+  C(base::Optional<ExperimentalFlamegraphNodesTable::Id>, parent_id)      \
+  C(base::Optional<StringPool::Id>, source_file)                          \
+  C(base::Optional<uint32_t>, line_number)
 
 PERFETTO_TP_TABLE(PERFETTO_TP_EXPERIMENTAL_FLAMEGRAPH_NODES);
 
diff --git a/src/traced/probes/ftrace/atrace_wrapper.cc b/src/traced/probes/ftrace/atrace_wrapper.cc
index 2522e47..43b9862 100644
--- a/src/traced/probes/ftrace/atrace_wrapper.cc
+++ b/src/traced/probes/ftrace/atrace_wrapper.cc
@@ -25,16 +25,24 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#include "perfetto/base/build_config.h"
 #include "perfetto/base/logging.h"
 #include "perfetto/base/time.h"
+#include "perfetto/ext/base/optional.h"
 #include "perfetto/ext/base/pipe.h"
+#include "perfetto/ext/base/string_utils.h"
 #include "perfetto/ext/base/utils.h"
 
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
+#include <sys/system_properties.h>
+#endif  // PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
+
 namespace perfetto {
 
 namespace {
 
 RunAtraceFunction g_run_atrace_for_testing = nullptr;
+base::Optional<bool> g_is_old_atrace_for_testing{};
 
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
 // Args should include "atrace" for argv[0].
@@ -183,4 +191,29 @@
   g_run_atrace_for_testing = f;
 }
 
+bool IsOldAtrace() {
+  if (g_is_old_atrace_for_testing.has_value())
+    return *g_is_old_atrace_for_testing;
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) && \
+    !PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
+  // Sideloaded case. We could be sideloaded on a modern device or an older one.
+  char str_value[PROP_VALUE_MAX];
+  if (!__system_property_get("ro.build.version.sdk", str_value))
+    return false;
+  auto opt_value = base::CStringToUInt32(str_value);
+  return opt_value.has_value() && *opt_value < 28;  // 28 == Android P.
+#else
+  // In in-tree builds we know that atrace is current, no runtime checks needed.
+  return false;
+#endif
+}
+
+void SetIsOldAtraceForTesting(bool value) {
+  g_is_old_atrace_for_testing = value;
+}
+
+void ClearIsOldAtraceForTesting() {
+  g_is_old_atrace_for_testing.reset();
+}
+
 }  // namespace perfetto
diff --git a/src/traced/probes/ftrace/atrace_wrapper.h b/src/traced/probes/ftrace/atrace_wrapper.h
index 264cee0..91f02cb 100644
--- a/src/traced/probes/ftrace/atrace_wrapper.h
+++ b/src/traced/probes/ftrace/atrace_wrapper.h
@@ -26,6 +26,15 @@
 using RunAtraceFunction =
     std::add_pointer<bool(const std::vector<std::string>& /*args*/)>::type;
 
+// When we are sideloaded on an old version of Android (pre P), we cannot use
+// atrace --only_userspace because that option doesn't exist. In that case we:
+// - Just use atrace --async_start/stop, which will cause atrace to also
+//   poke at ftrace.
+// - Suppress the checks for "somebody else enabled ftrace unexpectedly".
+bool IsOldAtrace();
+void SetIsOldAtraceForTesting(bool);
+void ClearIsOldAtraceForTesting();
+
 bool RunAtrace(const std::vector<std::string>& args);
 void SetRunAtraceForTesting(RunAtraceFunction);
 
diff --git a/src/traced/probes/ftrace/ftrace_config_muxer.cc b/src/traced/probes/ftrace/ftrace_config_muxer.cc
index 24b84b5..0af1f2c 100644
--- a/src/traced/probes/ftrace/ftrace_config_muxer.cc
+++ b/src/traced/probes/ftrace/ftrace_config_muxer.cc
@@ -455,7 +455,7 @@
     PERFETTO_DCHECK(active_configs_.empty());
 
     // If someone outside of perfetto is using ftrace give up now.
-    if (is_ftrace_enabled) {
+    if (is_ftrace_enabled && !IsOldAtrace()) {
       PERFETTO_ELOG("ftrace in use by non-Perfetto.");
       return 0;
     }
@@ -466,7 +466,7 @@
     SetupBufferSize(request);
   } else {
     // Did someone turn ftrace off behind our back? If so give up.
-    if (!active_configs_.empty() && !is_ftrace_enabled) {
+    if (!active_configs_.empty() && !is_ftrace_enabled && !IsOldAtrace()) {
       PERFETTO_ELOG("ftrace disabled by non-Perfetto.");
       return 0;
     }
@@ -486,8 +486,15 @@
     }
   }
 
-  if (RequiresAtrace(request))
+  if (RequiresAtrace(request)) {
+    if (IsOldAtrace() && !ds_configs_.empty()) {
+      PERFETTO_ELOG(
+          "Concurrent atrace sessions are not supported before Android P, "
+          "bailing out.");
+      return 0;
+    }
     UpdateAtrace(request);
+  }
 
   for (const auto& group_and_name : events) {
     const Event* event = table_->GetOrCreateEvent(group_and_name);
@@ -533,7 +540,7 @@
   }
 
   if (active_configs_.empty()) {
-    if (ftrace_->IsTracingEnabled()) {
+    if (ftrace_->IsTracingEnabled() && !IsOldAtrace()) {
       // If someone outside of perfetto is using ftrace give up now.
       PERFETTO_ELOG("ftrace in use by non-Perfetto.");
       return false;
@@ -695,7 +702,8 @@
   std::vector<std::string> args;
   args.push_back("atrace");  // argv0 for exec()
   args.push_back("--async_start");
-  args.push_back("--only_userspace");
+  if (!IsOldAtrace())
+    args.push_back("--only_userspace");
 
   for (const auto& category : categories)
     args.push_back(category);
@@ -721,7 +729,10 @@
 
   PERFETTO_DLOG("Stop atrace...");
 
-  if (RunAtrace({"atrace", "--async_stop", "--only_userspace"})) {
+  std::vector<std::string> args{"atrace", "--async_stop"};
+  if (!IsOldAtrace())
+    args.push_back("--only_userspace");
+  if (RunAtrace(args)) {
     current_state_.atrace_categories.clear();
     current_state_.atrace_apps.clear();
     current_state_.atrace_on = false;
diff --git a/src/traced/probes/ftrace/ftrace_config_muxer_unittest.cc b/src/traced/probes/ftrace/ftrace_config_muxer_unittest.cc
index c3602aa..312f849 100644
--- a/src/traced/probes/ftrace/ftrace_config_muxer_unittest.cc
+++ b/src/traced/probes/ftrace/ftrace_config_muxer_unittest.cc
@@ -26,11 +26,11 @@
 
 using testing::_;
 using testing::AnyNumber;
-using testing::MatchesRegex;
 using testing::Contains;
 using testing::ElementsAreArray;
 using testing::Eq;
 using testing::IsEmpty;
+using testing::MatchesRegex;
 using testing::NiceMock;
 using testing::Not;
 using testing::Return;
@@ -93,6 +93,14 @@
 
 class FtraceConfigMuxerTest : public ::testing::Test {
  protected:
+  void SetUp() override {
+    // Don't probe for older SDK levels, that would relax the atrace-related
+    // checks on older versions of Android (But some tests here test those).
+    // We want the unittests to behave consistently (as if we were on a post P
+    // device) regardless of the Android versions they run on.
+    SetIsOldAtraceForTesting(false);
+  }
+  void TearDown() override { ClearIsOldAtraceForTesting(); }
   std::unique_ptr<MockProtoTranslationTable> GetMockTable() {
     std::vector<Field> common_fields;
     std::vector<Event> events;
diff --git a/src/tracing/core/tracing_service_impl.cc b/src/tracing/core/tracing_service_impl.cc
index 63839b0..1a5b43d 100644
--- a/src/tracing/core/tracing_service_impl.cc
+++ b/src/tracing/core/tracing_service_impl.cc
@@ -647,6 +647,21 @@
     return PERFETTO_SVC_ERR("Too many buffers configured (%d)",
                             cfg.buffers_size());
   }
+  // Check that the config specifies all buffers for its data sources. This
+  // is also checked in SetupDataSource, but it is simpler to return a proper
+  // error to the consumer from here (and there will be less state to undo).
+  for (const TraceConfig::DataSource& cfg_data_source : cfg.data_sources()) {
+    size_t num_buffers = static_cast<size_t>(cfg.buffers_size());
+    size_t target_buffer = cfg_data_source.config().target_buffer();
+    if (target_buffer >= num_buffers) {
+      MaybeLogUploadEvent(
+          cfg, PerfettoStatsdAtom::kTracedEnableTracingOobTargetBuffer);
+      return PERFETTO_SVC_ERR(
+          "Data source \"%s\" specified an out of bounds target_buffer (%zu >= "
+          "%zu)",
+          cfg_data_source.config().name().c_str(), target_buffer, num_buffers);
+    }
+  }
 
   if (!cfg.unique_session_name().empty()) {
     const std::string& name = cfg.unique_session_name();
diff --git a/test/trace_processor/profiling/heap_profile_flamegraph_system-server-native-profile.out b/test/trace_processor/profiling/heap_profile_flamegraph_system-server-native-profile.out
index 42ab65d..b521691 100644
--- a/test/trace_processor/profiling/heap_profile_flamegraph_system-server-native-profile.out
+++ b/test/trace_processor/profiling/heap_profile_flamegraph_system-server-native-profile.out
@@ -1,11 +1,11 @@
-"id","type","depth","name","map_name","count","cumulative_count","size","cumulative_size","alloc_count","cumulative_alloc_count","alloc_size","cumulative_alloc_size","parent_id"
-0,"experimental_flamegraph_nodes",0,"__start_thread","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,"[NULL]"
-1,"experimental_flamegraph_nodes",1,"_ZL15__pthread_startPv","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,0
-2,"experimental_flamegraph_nodes",2,"_ZN7android14AndroidRuntime15javaThreadShellEPv","/system/lib64/libandroid_runtime.so",0,5,0,27704,0,77,0,348050,1
-3,"experimental_flamegraph_nodes",3,"_ZN7android6Thread11_threadLoopEPv","/system/lib64/libutils.so",0,5,0,27704,0,77,0,348050,2
-4,"experimental_flamegraph_nodes",4,"_ZN7android10PoolThread10threadLoopEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,3
-5,"experimental_flamegraph_nodes",5,"_ZN7android14IPCThreadState14joinThreadPoolEb","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,4
-6,"experimental_flamegraph_nodes",6,"_ZN7android14IPCThreadState20getAndExecuteCommandEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,5
-7,"experimental_flamegraph_nodes",7,"_ZN7android14IPCThreadState14executeCommandEi","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,6
-8,"experimental_flamegraph_nodes",8,"_ZN7android7BBinder8transactEjRKNS_6ParcelEPS1_j","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,7
-9,"experimental_flamegraph_nodes",9,"_ZN11JavaBBinder10onTransactEjRKN7android6ParcelEPS1_j","/system/lib64/libandroid_runtime.so",0,0,0,0,0,60,0,262730,8
+"id","type","depth","name","map_name","count","cumulative_count","size","cumulative_size","alloc_count","cumulative_alloc_count","alloc_size","cumulative_alloc_size","parent_id","source_file","line_number"
+0,"experimental_flamegraph_nodes",0,"__start_thread","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,"[NULL]","[NULL]","[NULL]"
+1,"experimental_flamegraph_nodes",1,"_ZL15__pthread_startPv","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,0,"[NULL]","[NULL]"
+2,"experimental_flamegraph_nodes",2,"_ZN7android14AndroidRuntime15javaThreadShellEPv","/system/lib64/libandroid_runtime.so",0,5,0,27704,0,77,0,348050,1,"[NULL]","[NULL]"
+3,"experimental_flamegraph_nodes",3,"_ZN7android6Thread11_threadLoopEPv","/system/lib64/libutils.so",0,5,0,27704,0,77,0,348050,2,"[NULL]","[NULL]"
+4,"experimental_flamegraph_nodes",4,"_ZN7android10PoolThread10threadLoopEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,3,"[NULL]","[NULL]"
+5,"experimental_flamegraph_nodes",5,"_ZN7android14IPCThreadState14joinThreadPoolEb","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,4,"[NULL]","[NULL]"
+6,"experimental_flamegraph_nodes",6,"_ZN7android14IPCThreadState20getAndExecuteCommandEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,5,"[NULL]","[NULL]"
+7,"experimental_flamegraph_nodes",7,"_ZN7android14IPCThreadState14executeCommandEi","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,6,"[NULL]","[NULL]"
+8,"experimental_flamegraph_nodes",8,"_ZN7android7BBinder8transactEjRKNS_6ParcelEPS1_j","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,7,"[NULL]","[NULL]"
+9,"experimental_flamegraph_nodes",9,"_ZN11JavaBBinder10onTransactEjRKN7android6ParcelEPS1_j","/system/lib64/libandroid_runtime.so",0,0,0,0,0,60,0,262730,8,"[NULL]","[NULL]"
diff --git a/tools/heap_profile b/tools/heap_profile
index b635a12..f3ee35a 100755
--- a/tools/heap_profile
+++ b/tools/heap_profile
@@ -164,6 +164,25 @@
   ).decode('utf-8').strip()
   return codename == release
 
+ORDER = ['-n', '-p', '-i', '-o']
+
+def arg_order(action):
+  result = len(ORDER)
+  for opt in action.option_strings:
+    if opt in ORDER:
+      result = min(ORDER.index(opt), result)
+  return result, action.option_strings[0].strip('-')
+
+def print_options(parser):
+  for action in sorted(parser._actions, key=arg_order):
+    if action.help is argparse.SUPPRESS:
+      continue
+    opts = ', '.join('`' + x + '`' for x in action.option_strings)
+    metavar = '' if action.metavar is None else ' _'     + action.metavar + '_'
+    print('{}{}'.format(opts, metavar))
+    print(':    {}'.format(action.help))
+    print()
+
 def main(argv):
   parser = argparse.ArgumentParser()
   parser.add_argument(
@@ -294,8 +313,16 @@
       help="Output directory.",
       metavar="DIRECTORY",
       default=None)
+  parser.add_argument(
+      "--print-options",
+      action="store_true",
+      help=argparse.SUPPRESS
+  )
 
   args = parser.parse_args()
+  if args.print_options:
+    print_options(parser)
+    return 0
   fail = False
   if args.block_client and args.no_block_client:
     print(
diff --git a/ui/release/channels.json b/ui/release/channels.json
index b6827d6..526675f 100644
--- a/ui/release/channels.json
+++ b/ui/release/channels.json
@@ -6,7 +6,7 @@
     },
     {
       "name": "canary",
-      "rev": "dac610dd857535ee840a1cdcc2a5103e59a99388"
+      "rev": "bae8193de6c017394901163b7817157342914679"
     },
     {
       "name": "autopush",
diff --git a/ui/src/common/flamegraph_util.ts b/ui/src/common/flamegraph_util.ts
index f083428..2910952 100644
--- a/ui/src/common/flamegraph_util.ts
+++ b/ui/src/common/flamegraph_util.ts
@@ -103,7 +103,8 @@
     mapping: callsite.mapping,
     selfSize: callsite.selfSize,
     merged: callsite.merged,
-    highlighted: callsite.highlighted
+    highlighted: callsite.highlighted,
+    location: callsite.location
   };
 }
 
diff --git a/ui/src/common/state.ts b/ui/src/common/state.ts
index 347c904..e38f4b2 100644
--- a/ui/src/common/state.ts
+++ b/ui/src/common/state.ts
@@ -80,6 +80,7 @@
   mapping: string;
   merged: boolean;
   highlighted: boolean;
+  location?: string;
 }
 
 export interface TraceFileSource {
diff --git a/ui/src/controller/heap_profile_controller.ts b/ui/src/controller/heap_profile_controller.ts
index c424b8f..37af362 100644
--- a/ui/src/controller/heap_profile_controller.ts
+++ b/ui/src/controller/heap_profile_controller.ts
@@ -269,8 +269,9 @@
     const callsites = await this.args.engine.query(
         `SELECT id, IFNULL(DEMANGLE(name), name), IFNULL(parent_id, -1), depth,
         cumulative_size, cumulative_alloc_size, cumulative_count,
-        cumulative_alloc_count, map_name, size, count from ${tableName} ${
-            orderBy}`);
+        cumulative_alloc_count, map_name, size, count,
+        IFNULL(source_file, ''), IFNULL(line_number, -1)
+        from ${tableName} ${orderBy}`);
 
     const flamegraphData: CallsiteInfo[] = new Array();
     const hashToindex: Map<number, number> = new Map();
@@ -286,6 +287,17 @@
           name.toLocaleLowerCase().includes(focusRegex.toLocaleLowerCase());
       const parentId =
           hashToindex.has(+parentHash) ? hashToindex.get(+parentHash)! : -1;
+
+      let location: string|undefined;
+      if (callsites.columns[11].stringValues != null &&
+          /[a-zA-Z]/i.test(callsites.columns[11].stringValues[i])) {
+        location = callsites.columns[11].stringValues[i];
+        if (callsites.columns[12].longValues != null &&
+            callsites.columns[12].longValues[i] !== -1) {
+          location += `:${callsites.columns[12].longValues[i].toString()}`;
+        }
+      }
+
       if (depth === maxDepth - 1) {
         name += ' [tree truncated]';
       }
@@ -302,7 +314,8 @@
         selfSize,
         mapping,
         merged: false,
-        highlighted
+        highlighted,
+        location
       });
     }
     return flamegraphData;
@@ -321,7 +334,7 @@
     return this.cache.getTableName(
         `select id, name, map_name, parent_id, depth, cumulative_size,
           cumulative_alloc_size, cumulative_count, cumulative_alloc_count,
-          size, alloc_size, count, alloc_count
+          size, alloc_size, count, alloc_count, source_file, line_number
           from experimental_flamegraph(${ts}, ${upid}, '${type}') ${
             whereClause}`);
   }
diff --git a/ui/src/controller/trace_controller.ts b/ui/src/controller/trace_controller.ts
index d91a3bb..1b3580e 100644
--- a/ui/src/controller/trace_controller.ts
+++ b/ui/src/controller/trace_controller.ts
@@ -544,11 +544,13 @@
         let hasSliceName = false;
         let hasDur = false;
         let hasUpid = false;
+        let hasValue = false;
         for (let i = 0; i < slowlyCountRows(result); i++) {
           const name = result.columns[1].stringValues![i];
           hasSliceName = hasSliceName || name === 'slice_name';
           hasDur = hasDur || name === 'dur';
           hasUpid = hasUpid || name === 'upid';
+          hasValue = hasValue || name === 'value';
         }
 
         const upidColumnSelect = hasUpid ? 'upid' : '0 AS upid';
@@ -579,7 +581,6 @@
           `);
         }
 
-        const hasValue = result.columnDescriptors.some(x => x.name === 'value');
         if (hasValue) {
           const minMax = await engine.query(`
           SELECT MIN(value) as min_value, MAX(value) as max_value
diff --git a/ui/src/frontend/flamegraph.ts b/ui/src/frontend/flamegraph.ts
index 77e209a..8836c11 100644
--- a/ui/src/frontend/flamegraph.ts
+++ b/ui/src/frontend/flamegraph.ts
@@ -267,19 +267,21 @@
       const offsetPx = 4;
 
       const lines: string[] = [];
-      let lineSplitter: LineSplitter;
-      const nameText = this.getCallsiteName(this.hoveredCallsite);
-      const nameTextSize = ctx.measureText(nameText);
-      lineSplitter =
-          splitIfTooBig(nameText, width - paddingPx, nameTextSize.width);
-      let textWidth = lineSplitter.lineWidth;
-      lines.push(...lineSplitter.lines);
 
-      const mappingText = this.hoveredCallsite.mapping;
-      lineSplitter =
-          splitIfTooBig(mappingText, width, ctx.measureText(mappingText).width);
-      textWidth = Math.max(textWidth, lineSplitter.lineWidth);
-      lines.push(...lineSplitter.lines);
+      let textWidth = this.addToTooltip(
+          this.getCallsiteName(this.hoveredCallsite),
+          width - paddingPx,
+          ctx,
+          lines);
+      if (this.hoveredCallsite.location != null) {
+        textWidth = Math.max(
+            textWidth,
+            this.addToTooltip(
+                this.hoveredCallsite.location, width, ctx, lines));
+      }
+      textWidth = Math.max(
+          textWidth,
+          this.addToTooltip(this.hoveredCallsite.mapping, width, ctx, lines));
 
       if (this.nodeRendering.totalSize !== undefined) {
         const percentage =
@@ -289,10 +291,8 @@
                 this.hoveredCallsite.totalSize,
                 unit,
                 unit === 'B' ? 1024 : 1000)} (${percentage.toFixed(2)}%)`;
-        lineSplitter = splitIfTooBig(
-            totalSizeText, width, ctx.measureText(totalSizeText).width);
-        textWidth = Math.max(textWidth, lineSplitter.lineWidth);
-        lines.push(...lineSplitter.lines);
+        textWidth = Math.max(
+            textWidth, this.addToTooltip(totalSizeText, width, ctx, lines));
       }
 
       if (this.nodeRendering.selfSize !== undefined &&
@@ -304,10 +304,8 @@
                 this.hoveredCallsite.selfSize,
                 unit,
                 unit === 'B' ? 1024 : 1000)} (${selfPercentage.toFixed(2)}%)`;
-        lineSplitter = splitIfTooBig(
-            selfSizeText, width, ctx.measureText(selfSizeText).width);
-        textWidth = Math.max(textWidth, lineSplitter.lineWidth);
-        lines.push(...lineSplitter.lines);
+        textWidth = Math.max(
+            textWidth, this.addToTooltip(selfSizeText, width, ctx, lines));
       }
 
       // Compute a line height as the bounding box height + 50%:
@@ -344,6 +342,15 @@
     }
   }
 
+  private addToTooltip(
+      text: string, width: number, ctx: CanvasRenderingContext2D,
+      lines: string[]): number {
+    const lineSplitter: LineSplitter =
+        splitIfTooBig(text, width, ctx.measureText(text).width);
+    lines.push(...lineSplitter.lines);
+    return lineSplitter.lineWidth;
+  }
+
   private getCallsiteName(value: CallsiteInfo): string {
     return value.name === undefined || value.name === '' ? 'unknown' :
                                                            value.name;
diff --git a/ui/src/tracks/cpu_profile/controller.ts b/ui/src/tracks/cpu_profile/controller.ts
index 070cedb..0042888 100644
--- a/ui/src/tracks/cpu_profile/controller.ts
+++ b/ui/src/tracks/cpu_profile/controller.ts
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-import {slowlyCountRows} from '../../common/query_iterator';
+import {iter, NUM, slowlyCountRows} from '../../common/query_iterator';
 import {
   TrackController,
   trackControllerRegistry
@@ -28,9 +28,13 @@
   static readonly kind = CPU_PROFILE_TRACK_KIND;
   async onBoundsChange(start: number, end: number, resolution: number):
       Promise<Data> {
-    const query = `select id, ts, callsite_id from cpu_profile_stack_sample
-        where utid = ${this.config.utid}
-        order by ts`;
+    const query = `select
+        id,
+        ts,
+        callsite_id as callsiteId
+      from cpu_profile_stack_sample
+      where utid = ${this.config.utid}
+      order by ts`;
 
     const result = await this.query(query);
 
@@ -45,10 +49,11 @@
       callsiteId: new Uint32Array(numRows),
     };
 
-    for (let row = 0; row < numRows; row++) {
-      data.ids[row] = +result.columns[0].longValues![row];
-      data.tsStarts[row] = +result.columns[1].longValues![row];
-      data.callsiteId[row] = +result.columns[2].longValues![row];
+    const it = iter({id: NUM, ts: NUM, callsiteId: NUM}, result);
+    for (let i = 0; it.valid(); it.next(), ++i) {
+      data.ids[i] = it.row.id;
+      data.tsStarts[i] = it.row.ts;
+      data.callsiteId[i] = it.row.callsiteId;
     }
 
     return data;