Merge "ui: Fix derived events counter not shown in global track"
diff --git a/Android.bp b/Android.bp
index 70e2297..6c2a184 100644
--- a/Android.bp
+++ b/Android.bp
@@ -9741,6 +9741,7 @@
     ":perfetto_src_profiling_deobfuscator",
     ":perfetto_src_profiling_symbolizer_symbolize_database",
     ":perfetto_src_profiling_symbolizer_symbolizer",
+    ":perfetto_src_protozero_proto_ring_buffer",
     ":perfetto_src_protozero_protozero",
     ":perfetto_src_trace_processor_analysis_analysis",
     ":perfetto_src_trace_processor_containers_containers",
diff --git a/BUILD b/BUILD
index 23d5d19..db42ce5 100644
--- a/BUILD
+++ b/BUILD
@@ -3847,6 +3847,7 @@
         ":src_profiling_deobfuscator",
         ":src_profiling_symbolizer_symbolize_database",
         ":src_profiling_symbolizer_symbolizer",
+        ":src_protozero_proto_ring_buffer",
         ":src_trace_processor_analysis_analysis",
         ":src_trace_processor_db_db",
         ":src_trace_processor_export_json",
diff --git a/BUILD.gn b/BUILD.gn
index 0fab609..fe44ed2 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -124,7 +124,6 @@
 # compile-time checks for the CI.
 if (perfetto_build_standalone) {
   all_targets += [
-    "src/tracebox",
     "test/configs",
 
     # For syntax-checking the proto.
@@ -145,6 +144,9 @@
   if (is_linux || is_android) {
     all_targets += [ "src/tracing/consumer_api_deprecated:consumer_api_test" ]
   }
+  if (is_linux || is_android || is_mac) {
+    all_targets += [ "src/tracebox" ]
+  }
 }
 
 # The CTS code is built (but not ran) also in standalone builds. This is to
diff --git a/CHANGELOG b/CHANGELOG
index 114c03d..d1bba4c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,9 @@
 Unreleased:
   Tracing service and probes:
-    *
+    * Added support for building most targets (including traced, SDK and
+      trace_processor_shell) from Windows using either clang-cl or MSVC 2019.
+    * Added tracebox, a monolithic binary to capture traces with one command
+      on Linux and older versions of Android (tested on Android Oreo).
   Trace Processor:
     *
   UI:
diff --git a/buildtools/BUILD.gn b/buildtools/BUILD.gn
index 97ab40a..9d5fad4 100644
--- a/buildtools/BUILD.gn
+++ b/buildtools/BUILD.gn
@@ -724,7 +724,8 @@
       "//gn/standalone/sanitizers:sanitizer_options_link_helper",
     ]
     defines = [ "_LIBCPP_BUILDING_LIBRARY" ]
-    if ((is_linux || is_android) && (is_asan || is_tsan || is_msan)) {
+    if ((is_linux || is_android) && using_sanitizer &&
+        (is_asan || is_tsan || is_msan)) {
       # In {a,t,m}san configurations, operator new and operator delete will be
       # provided by the sanitizer runtime library.  Since libc++ defines these
       # symbols with weak linkage, and the *san runtime uses strong linkage, it
diff --git a/docs/contributing/build-instructions.md b/docs/contributing/build-instructions.md
index 0c12902..9eba8f2 100644
--- a/docs/contributing/build-instructions.md
+++ b/docs/contributing/build-instructions.md
@@ -7,64 +7,99 @@
 
 Perfetto can be built both from the Android tree (AOSP) and standalone.
 Standalone builds are meant only for local testing and are not shipped.
-Due to the reduced dependencies they are faster to iterate on and the
-suggested way to work on Perfetto.
+Due to the reduced dependencies, the standalone workflow is faster to iterate on
+and the suggested way to work on Perfetto, unless you are working on code that
+has non-NDK depedencies into Android internals. Profilers and internal HAL/AIDL
+dependencies will not be built in the standalone build.
 
-## Get the code
+If you are chromium contributor, AOSP is still the place you should send CLs to.
+The code inside chromium's
+[third_party/perfetto](https://source.chromium.org/chromium/chromium/src/+/main:third_party/perfetto/?q=f:third_party%2Fperfetto&ss=chromium)
+is a direct mirror of the AOSP repo. The
+[AOSP->Chromium autoroller](https://autoroll.skia.org/r/perfetto-chromium-autoroll)
+takes care of keeping chromium's DEPS up to date.
 
-**Standalone checkout**:
+## Standalone builds
+
+#### Get the code
 
 ```bash
 git clone https://android.googlesource.com/platform/external/perfetto/
 ```
 
-**Android tree**:
-
-Perfetto lives in [`external/perfetto` in the AOSP tree](https://cs.android.com/android/platform/superproject/+/master:external/perfetto/).
-
-## Prerequisites
-
-**Standalone checkout**:
-
-All dependent libraries are self-hosted and pulled through:
+#### Pull dependent libraries and toolchains
 
 ```bash
 tools/install-build-deps [--android] [--ui]
 ```
 
-**Android tree**:
+`--android` will pull the Android NDK, emulator and other deps required
+to build for `target_os = "android"`.
 
-See https://source.android.com/setup
+`--ui` will pull NodeJS and all the NPM modules required to build the
+Web UI. See the [UI Development](#ui-development) section below for more.
 
-## Building
+#### Generate the build files via GN
 
-**Standalone checkout**:
-
-If you are a chromium developer and have depot_tools installed you can avoid
-the `tools/` prefix below and just use gn/ninja from depot_tools.
-
-`$ tools/gn args out/android` to generate build files and enter in the editor:
-
-```python
-target_os = "android"                 # Only when building for Android
-target_cpu = "arm" / "arm64" / "x64"
-is_debug = true / false
-cc_wrapper = "ccache"                 # Optionally speed repeated builds with ccache
-```
-
-(See the [Build Configurations](#build-configurations) section below for more)
+Perfetto uses [GN](https://gn.googlesource.com/gn/+/HEAD/docs/quick_start.md)
+as primary build system. See the [Build files](#build-files) section below for
+more.
 
 ```bash
-tools/ninja -C out/android
+tools/gn args out/android` 
 ```
 
-**Android tree**
+This will open an editor to customize the GN args. Enter:
 
-`mmma external/perfetto`
-or
-`m perfetto traced traced_probes`
+```python
+# Set only when building for Android, omit when building for linux, mac or win.
+target_os = "android"
+target_cpu = "arm" / "arm64" / "x64"
+
+is_debug = true / false
+cc_wrapper = "ccache"             # [Optional] speed up rebuilds with ccache.
+```
+
+See the [Build Configurations](#build-configurations) and
+[Building on Windows](#building-on-windows) sections below for more.
+
+TIP: If you are a chromium developer and have depot_tools installed you can
+avoid the `tools/` prefix below and just use gn/ninja from depot_tools.
+
+#### Build native C/C++ targets
+
+```bash
+# This will build all the targets.
+tools/ninja -C out/android
+
+# Alternatively, list targets explicitly.
+tools/ninja -C out/android \
+  traced \                 # Tracing service.
+  traced_probes \          # Ftrace interop and /proc poller.
+  perfetto \               # Cmdline client.
+  trace_processor_shell \  # Trace parsing.
+  trace_to_text            # Trace conversion.
+...
+```
+
+## Android tree builds
+
+Follow these instructions if you are an AOSP contributor.
+
+The source code lives in [`external/perfetto` in the AOSP tree](https://cs.android.com/android/platform/superproject/+/master:external/perfetto/).
+
+Follow the instructions on https://source.android.com/setup/build/building .
+
+Then:
+
+```bash
+mmma external/perfetto
+# or
+m traced traced_probes perfetto
+```
 
 This will generate artifacts `out/target/product/XXX/system/`.
+
 Executables and shared libraries are stripped by default by the Android build
 system. The unstripped artifacts are kept into `out/target/product/XXX/symbols`.
 
@@ -100,19 +135,6 @@
 source file is changed it, the script will automatically re-build it and show a
 prompt in the web page.
 
-## IDE setup
-
-Use a following command in the checkout directory in order to generate the
-compilation database file:
-
-```bash
-tools/gn gen out/default --export-compile-commands
-```
-
-After generating, it can be used in CLion (File -> Open -> Open As Project),
-Visual Studio Code with C/C++ extension and any other tool and editor that
-supports the compilation database format.
-
 ## Build files
 
 The source of truth of our build file is in the BUILD.gn files, which are based
@@ -120,6 +142,8 @@
 The Android build file ([Android.bp](/Android.bp)) is autogenerated from the GN
 files through `tools/gen_android_bp`, which needs to be invoked whenever a
 change touches GN files or introduces new ones.
+Likewise, the Bazel build file ([BUILD](/BUILD)) is autogenerated through the
+`tools/gen_bazel` script.
 
 A presubmit check checks that the Android.bp is consistent with GN files when
 submitting a CL through `git cl upload`.
@@ -142,15 +166,79 @@
 
 **Mac**
 
-- XCode 9 / clang (currently maintained best-effort).
+- XCode 9 / clang (maintained best-effort).
 
 **Windows**
 
-Windows builds are not currently supported when using the standalone checkout
-and GN. Windows is supported only for a subset of the targets (mainly
-`trace_processor` and the in-process version of the
-[Tracing SDK](/docs/instrumentation/tracing-sdk.md)) in two ways:
-(1) when building through Bazel; (2) when building as part of Chromium.
+- Windows 10 with either MSVC 2019 or clang-cl (maintained best-effort).
+
+### Building on Windows
+
+Building on Windows is possible using both the MSVC 2019 compiler (you don't
+need the full IDE, just the build tools) or the LLVM clang-cl compiler.
+
+The Windows support in standalone builds has been introduced in v16 by
+[r.android.com/1711913](https://r.android.com/1711913).
+
+clang-cl support is more stable because that build configuration is actively
+covered by the Chromium project (Perfetto rolls into chromium and underpins
+chrome://tracing). The MSVC build is maintained best-effort.
+
+The following targets are supported on Windows:
+
+- `trace_processor_shell`: the trace importer and SQL query engine.
+- `trace_to_text`: the trace conversion tool.
+- `traced` and `perfetto`: the tracing service and cmdline client. They use an
+  alternative implementation of the [inter-process tracing protocol](/docs/design-docs/api-and-abi.md#tracing-protocol-abi)
+  based on a TCP socket and named shared memory. This configuration is only for
+  testing / benchmarks and is not shipped in production.
+  Googlers: see [go/perfetto-win](http://go/perfetto-win) for details.
+- `perfetto_unittests` / `perfetto_integrationtests`: although they support only
+  the subset of code that is supported on Windows (e.g. no ftrace).
+
+It is NOT possible to build the Perfetto UI from Windows.
+
+#### Prerequisites
+
+You need all of these both for MSVC and clang-cl:
+
+- [Build Tools for Visual Studio 2019](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019)
+- [Windows 10 SDK](https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk/)
+- [Python 3](https://www.python.org/downloads/windows/)
+
+The [`win_find_msvc.py`](/gn/standalone/toolchain/win_find_msvc.py) script will
+locate the higest version numbers available from
+`C:\Program Files (x86)\Windows Kits\10` and
+`C:\Program Files (x86)\Microsoft Visual Studio\2019`.
+
+#### Pull dependent libraries and toolchains
+
+```bash
+# This will download also the LLVM clang-cl prebuilt used by chromium.
+python3 tools/install-build-deps
+```
+
+#### Generate build files
+
+```bash
+python3 tools/gn gen out/win
+```
+
+In the editor type:
+
+```bash
+is_debug = true | false
+
+is_clang = true  # Will use the hermetic clang-cl toolchain.
+# or
+is_clang = false  # Will use MSVC 2019.
+```
+
+#### Build
+
+```bash
+python3 tools/ninja -C out/win perfetto traced trace_processor_shell
+```
 
 ## Build configurations
 
@@ -304,3 +392,73 @@
 ```
 
 [gn-quickstart]: https://gn.googlesource.com/gn/+/master/docs/quick_start.md
+
+## IDE setup
+
+Use a following command in the checkout directory in order to generate the
+compilation database file:
+
+```bash
+tools/gn gen out/default --export-compile-commands
+```
+
+After generating, it can be used in CLion (File -> Open -> Open As Project),
+Visual Studio Code with C/C++ extension and any other tool and editor that
+supports the compilation database format.
+
+#### Useful extensions
+
+If you are using VS Code we suggest the following extensions:
+
+- [Clang-Format](https://marketplace.visualstudio.com/items?itemName=xaver.clang-format)
+- [C/C++](https://marketplace.visualstudio.com/items?itemName=ms-vscode.cpptools)
+- [clangd](https://marketplace.visualstudio.com/items?itemName=llvm-vs-code-extensions.vscode-clangd)
+- [Native Debug](https://marketplace.visualstudio.com/items?itemName=webfreak.debug)
+- [GNFormat](https://marketplace.visualstudio.com/items?itemName=persidskiy.vscode-gnformat)
+- [ESlint](https://marketplace.visualstudio.com/items?itemName=dbaeumer.vscode-eslint)
+- [markdownlint](https://marketplace.visualstudio.com/items?itemName=DavidAnson.vscode-markdownlint)
+
+#### Useful settings
+
+In `.vscode/settings.json`:
+
+```json
+{
+  "C_Cpp.clang_format_path": "${workspaceRoot}/buildtools/mac/clang-format",
+  "C_Cpp.clang_format_sortIncludes": true,
+  "files.exclude": {
+    "out/*/obj": true,
+    "out/*/gen": true,
+  },
+  "clangd.arguments": [
+    "--compile-commands-dir=${workspaceFolder}/out/mac_debug",
+    "--completion-style=detailed",
+    "--header-insertion=never"
+  ],
+}
+```
+
+Replace `/mac/` with `/linux64/` on Linux.
+
+### Debugging with VSCode
+
+Edit `.vscode/launch.json`:
+
+```json
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "request": "launch",
+      "type": "cppdbg",
+      "name": "Perfetto unittests",
+      "program": "${workspaceRoot}/out/mac_debug/perfetto_unittests",
+      "args": ["--gtest_filter=TracingServiceImplTest.StopTracingTriggerRingBuffer"],
+      "cwd": "${workspaceFolder}/out/mac_debug",
+      "MIMode": "lldb",
+    },
+  ]
+}
+```
+
+Then open the command palette `Meta`+`Shift`+`P` -> `Debug: Start debugging`.
diff --git a/docs/contributing/embedding.md b/docs/contributing/embedding.md
index ad3ac88..dbf6c56 100644
--- a/docs/contributing/embedding.md
+++ b/docs/contributing/embedding.md
@@ -42,7 +42,7 @@
 
 Metrics can also be registered at run time using the `RegisterMetric` and `ExtendMetricsProto` functions. These can subsequently be executed with `ComputeMetric`.
 
-WARNING: embedders should ensure that the path of any registered metric is consistent with the the name used to execute the metric and output view in the SQL.
+WARNING: embedders should ensure that the path of any registered metric is consistent with the name used to execute the metric and output view in the SQL.
 
 ### Annotations
 
diff --git a/docs/design-docs/api-and-abi.md b/docs/design-docs/api-and-abi.md
index 2c8a6b6..29f47a5 100644
--- a/docs/design-docs/api-and-abi.md
+++ b/docs/design-docs/api-and-abi.md
@@ -272,7 +272,7 @@
 [`InitializeConnectionRequest`][producer_port.proto] request to the
 service, which is the very first IPC sent after connection.
 By default, the service creates the SMB and passes back its file descriptor to
-the producer with the the [`InitializeConnectionResponse`][producer_port.proto]
+the producer with the [`InitializeConnectionResponse`][producer_port.proto]
 IPC reply. Recent versions of the service (Android R / 11) allow the FD to be
 created by the producer and passed down to the service in the request. When the
 service supports this, it acks the request setting
diff --git a/docs/quickstart/android-tracing.md b/docs/quickstart/android-tracing.md
index 2c65d30..3d3b37e 100644
--- a/docs/quickstart/android-tracing.md
+++ b/docs/quickstart/android-tracing.md
@@ -17,6 +17,12 @@
 adb shell setprop persist.traced.enable 1
 ```
 
+If you are running a version of Android older than P, you can still capture a
+trace with Perfetto using the `record_android_trace` script. See instructions
+below in the
+[Recording a trace through the cmdline](#recording-a-trace-through-the-cmdline)
+section.
+
 ## Recording a trace
 
 Command line tools (usage examples below in this page):
@@ -65,6 +71,8 @@
 the command line. It is the equivalent of running `adb shell perfetto` but it
 helps with getting the paths right, auto-pulling the trace once done and opening
 it on the browser.
+Furthermore, on older versions of Android it takes care of sideloading the
+`tracebox` binary to make up for the lack of tracing system services.
 
 If you are already familiar with `systrace` or `atrace`, both cmdline tools
 support a systrace-equivalent syntax:
diff --git a/docs/quickstart/linux-tracing.md b/docs/quickstart/linux-tracing.md
index 93fa823..104b02e 100644
--- a/docs/quickstart/linux-tracing.md
+++ b/docs/quickstart/linux-tracing.md
@@ -17,24 +17,42 @@
 ```bash
 tools/install-build-deps
 ```
-_If the script fails with SSL errors, try invoking it as `python3 tools/install-build-deps`, or upgrading your openssl libraries._
+_If the script fails with SSL errors, try upgrading your openssl package._
 
-3. Generate all most common GN build configurations:
+3. Generate the build configuration
 ```bash
-tools/build_all_configs.py
+tools/gn gn gen --args='is_debug=false' out/linux
+# Or use `tools/build_all_configs.py` to generate more build configs.
 ```
 
 4. Build the Linux tracing binaries (On Linux it uses a hermetic clang toolchain, downloaded as part of step 2):
 ```bash
-tools/ninja -C out/linux_clang_release traced traced_probes perfetto
+tools/ninja -C out/linux tracebox traced traced_probes perfetto 
 ```
-_This step is optional when using the convenience `tools/tmux` script below._
 
 ## Capturing a trace
 
 Due to Perfetto's [service-based architecture](/docs/concepts/service-model.md),
 in order to capture a trace, the `traced` (session daemon) and `traced_probes`
 (probes and ftrace-interop daemon) need to be running.
+As per Perfetto v16, the `tracebox` binary bundles together all the binaries you
+need in a single executable (a bit like `toybox` or `busybox`).
+
+#### Capturing a trace with ftrace and /proc pollers, no SDK
+
+If you are interested in overall system tracing and are not interested in
+testing the SDK, you can use `tracebox` in autostart mode as follows:
+
+```bash
+out/linux/tracebox -o trace_file.perfetto-trace --txt -c test/configs/scheduling.cfg
+```
+
+#### Testing the SDK integration in out-of-process tracing mode (system mode)
+
+If you are using the Perfetto [tracing SDK](/docs/instrumentation/tracing-sdk)
+and want to capture a fused trace that contains both system traces events and
+your custom app trace events, you need to start the `traced` and `traced_probes`
+services ahead of time and then use the `perfetto` cmdline client.
 
 For a quick start, the [tools/tmux](/tools/tmux) script takes care of building,
 setting up and running everything.
@@ -44,8 +62,8 @@
 [ftrace]: https://www.kernel.org/doc/Documentation/trace/ftrace.txt
 
 1. Run the convenience script with an example tracing config (10s duration):
-```
-OUT=out/linux_clang_release CONFIG=test/configs/scheduling.cfg tools/tmux -n
+```bash
+tools/tmux -c test/configs/scheduling.cfg -C out/linux -n
 ```
 This will open a tmux window with three panes, one per the binary involved in
 tracing: `traced`, `traced_probes` and the `perfetto` client cmdline.
@@ -62,9 +80,9 @@
 
 We can now explore the captured trace visually by using a dedicated web-based UI.
 
-NOTE: The UI runs fully in-browser using JavaScript + Web Assembly. The trace
+NOTE: The UI runs in-browser using JavaScript + Web Assembly. The trace
       file is **not** uploaded anywhere by default, unless you explicitly click
-      on the 'Share' link.
+      on the 'Share' link. The 'Share' link is available only to Googlers.
 
 1. Navigate to [ui.perfetto.dev](https://ui.perfetto.dev) in a browser.
 
@@ -75,5 +93,5 @@
    process tracks (rows) into their constituent thread tracks.
    Press "?" for further navigation controls.
 
-Alternatively, you can explore the trace contents issuing SQL queries through 
+Alternatively, you can explore the trace contents issuing SQL queries through
 the [trace processor](/docs/analysis/trace-processor).
diff --git a/docs/quickstart/traceconv.md b/docs/quickstart/traceconv.md
index 226e5b2..83cf975 100644
--- a/docs/quickstart/traceconv.md
+++ b/docs/quickstart/traceconv.md
@@ -38,6 +38,6 @@
 
 If you just want to open a Perfetto trace with the legacy (Catapult) trace
 viewer, you can just navigate to [ui.perfetto.dev](https://ui.perfetto.dev),
-and use the the _"Open with legacy UI"_ link. This runs `traceconv` within
+and use the _"Open with legacy UI"_ link. This runs `traceconv` within
 the browser using WebAssembly and passes the converted trace seamlessly to
 chrome://tracing.
diff --git a/gn/BUILD.gn b/gn/BUILD.gn
index 7fc0cdd..15fa31b 100644
--- a/gn/BUILD.gn
+++ b/gn/BUILD.gn
@@ -81,6 +81,7 @@
     "PERFETTO_ZLIB=$enable_perfetto_zlib",
     "PERFETTO_TRACED_PERF=$enable_perfetto_traced_perf",
     "PERFETTO_HEAPPROFD=$enable_perfetto_heapprofd",
+    "PERFETTO_STDERR_CRASH_DUMP=$enable_perfetto_stderr_crash_dump",
   ]
 
   rel_out_path = rebase_path(gen_header_path, "$root_build_dir")
diff --git a/gn/standalone/BUILD.gn b/gn/standalone/BUILD.gn
index 31941ed..ffe5725 100644
--- a/gn/standalone/BUILD.gn
+++ b/gn/standalone/BUILD.gn
@@ -209,10 +209,10 @@
     ldflags += [ "-flto=full" ]
   }
 
-  # We support only x64 builds on Windows.
-  assert(!is_win || current_cpu == "x64")
-
-  if (current_cpu == "arm") {
+  if (is_win) {
+    # We support only x86/x64 builds on Windows.
+    assert(current_cpu == "x64" || current_cpu == "x86")
+  } else if (current_cpu == "arm") {
     cflags += [
       "-march=armv7-a",
       "-mfpu=neon",
diff --git a/gn/standalone/BUILDCONFIG.gn b/gn/standalone/BUILDCONFIG.gn
index e080700..6f32686 100644
--- a/gn/standalone/BUILDCONFIG.gn
+++ b/gn/standalone/BUILDCONFIG.gn
@@ -55,9 +55,13 @@
   current_cpu = target_cpu
 }
 
-is_cross_compiling =
-    target_cpu != host_cpu || target_os != host_os || target_triplet != ""
-
+declare_args() {
+  # the ossfuzz sanitizer overrides this to true. In that config the
+  # host/target cpu and arch are identical, but we want to build only the
+  # targets with the sanitizer/fuzzer flags
+  is_cross_compiling =
+      target_cpu != host_cpu || target_os != host_os || target_triplet != ""
+}
 default_configs = [
   "//gn/standalone:debug_symbols",
   "//gn/standalone:default",
diff --git a/gn/standalone/sanitizers/BUILD.gn b/gn/standalone/sanitizers/BUILD.gn
index d3374de..574dcfb 100644
--- a/gn/standalone/sanitizers/BUILD.gn
+++ b/gn/standalone/sanitizers/BUILD.gn
@@ -33,61 +33,60 @@
 }
 
 config("sanitizers_cflags") {
-  cflags = []
-  defines = []
   if (using_sanitizer) {
-    cflags += [ "-fno-omit-frame-pointer" ]
-  }
+    cflags = [ "-fno-omit-frame-pointer" ]
+    defines = []
 
-  if (is_asan) {
-    cflags += [ "-fsanitize=address" ]
-    defines += [ "ADDRESS_SANITIZER" ]
-  }
-  if (is_lsan) {
-    cflags += [ "-fsanitize=leak" ]
-    defines += [ "LEAK_SANITIZER" ]
-  }
-  if (is_tsan) {
-    cflags += [ "-fsanitize=thread" ]
-    defines += [
-      "THREAD_SANITIZER",
-      "DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL=1",
-    ]
-  }
-  if (is_msan) {
-    cflags += [
-      "-fsanitize=memory",
-      "-fsanitize-memory-track-origins=2",
-    ]
-    defines += [ "MEMORY_SANITIZER" ]
-  }
-  if (is_ubsan) {
-    cflags += [
-      "-fsanitize=bounds",
-      "-fsanitize=float-divide-by-zero",
-      "-fsanitize=integer-divide-by-zero",
-      "-fsanitize=null",
-      "-fsanitize=object-size",
-      "-fsanitize=return",
-      "-fsanitize=returns-nonnull-attribute",
-      "-fsanitize=shift-exponent",
-      "-fsanitize=signed-integer-overflow",
-      "-fsanitize=unreachable",
-      "-fsanitize=vla-bound",
-    ]
-    defines += [ "UNDEFINED_SANITIZER" ]
-  }
-  if (is_fuzzer) {
-    # FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION is also defined by oss-fuzz,
-    # so using the same name.
-    defines += [ "FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION" ]
-    cflags += [ "-fsanitize=fuzzer-no-link" ]
     if (is_asan) {
-      cflags += [
-        "-mllvm",
-        "-asan-use-private-alias",
+      cflags += [ "-fsanitize=address" ]
+      defines += [ "ADDRESS_SANITIZER" ]
+    }
+    if (is_lsan) {
+      cflags += [ "-fsanitize=leak" ]
+      defines += [ "LEAK_SANITIZER" ]
+    }
+    if (is_tsan) {
+      cflags += [ "-fsanitize=thread" ]
+      defines += [
+        "THREAD_SANITIZER",
+        "DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL=1",
       ]
     }
+    if (is_msan) {
+      cflags += [
+        "-fsanitize=memory",
+        "-fsanitize-memory-track-origins=2",
+      ]
+      defines += [ "MEMORY_SANITIZER" ]
+    }
+    if (is_ubsan) {
+      cflags += [
+        "-fsanitize=bounds",
+        "-fsanitize=float-divide-by-zero",
+        "-fsanitize=integer-divide-by-zero",
+        "-fsanitize=null",
+        "-fsanitize=object-size",
+        "-fsanitize=return",
+        "-fsanitize=returns-nonnull-attribute",
+        "-fsanitize=shift-exponent",
+        "-fsanitize=signed-integer-overflow",
+        "-fsanitize=unreachable",
+        "-fsanitize=vla-bound",
+      ]
+      defines += [ "UNDEFINED_SANITIZER" ]
+    }
+    if (is_fuzzer) {
+      # FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION is also defined by oss-fuzz,
+      # so using the same name.
+      defines += [ "FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION" ]
+      cflags += [ "-fsanitize=fuzzer-no-link" ]
+      if (is_asan) {
+        cflags += [
+          "-mllvm",
+          "-asan-use-private-alias",
+        ]
+      }
+    }
   }
 }
 
@@ -98,25 +97,27 @@
 }
 
 config("sanitizers_ldflags") {
-  visibility = [ ":deps" ]
-  ldflags = []
-  if (is_asan) {
-    ldflags += [ "-fsanitize=address" ]
+  if (using_sanitizer) {
+    visibility = [ ":deps" ]
+    ldflags = []
+    if (is_asan) {
+      ldflags += [ "-fsanitize=address" ]
+    }
+    if (is_lsan) {
+      # This is not a copy/paste mistake. The LSan runtime library has
+      # moved into asan. So in order to make LSan work one has to build
+      # .cc files with -fsanitize=leak but link with -fsanitize=address.
+      ldflags += [ "-fsanitize=address" ]
+    }
+    if (is_tsan) {
+      ldflags += [ "-fsanitize=thread" ]
+    }
+    if (is_msan) {
+      ldflags += [ "-fsanitize=memory" ]
+    }
+    if (is_ubsan) {
+      ldflags += [ "-fsanitize=undefined" ]
+    }
+    configs = [ ":sanitizer_options_link_helper" ]
   }
-  if (is_lsan) {
-    # This is not a copy/paste mistake. The LSan runtime library has
-    # moved into asan. So in order to make LSan work one has to build
-    # .cc files with -fsanitize=leak but link with -fsanitize=address.
-    ldflags += [ "-fsanitize=address" ]
-  }
-  if (is_tsan) {
-    ldflags += [ "-fsanitize=thread" ]
-  }
-  if (is_msan) {
-    ldflags += [ "-fsanitize=memory" ]
-  }
-  if (is_ubsan) {
-    ldflags += [ "-fsanitize=undefined" ]
-  }
-  configs = [ ":sanitizer_options_link_helper" ]
 }
diff --git a/gn/standalone/sanitizers/vars.gni b/gn/standalone/sanitizers/vars.gni
index 9dada78..228908f 100644
--- a/gn/standalone/sanitizers/vars.gni
+++ b/gn/standalone/sanitizers/vars.gni
@@ -47,8 +47,9 @@
 }
 
 declare_args() {
-  using_sanitizer =
-      is_asan || is_lsan || is_tsan || is_msan || is_ubsan || use_libfuzzer
+  # Don't build host artifacts with sanitizers/fuzzers, only target toolchain.
+  using_sanitizer = (is_asan || is_lsan || is_tsan || is_msan || is_ubsan ||
+                     use_libfuzzer) && current_toolchain == default_toolchain
 }
 
 assert(!using_sanitizer || is_clang || is_system_compiler,
diff --git a/gn/standalone/toolchain/BUILD.gn b/gn/standalone/toolchain/BUILD.gn
index d7f54db..ca8f247 100644
--- a/gn/standalone/toolchain/BUILD.gn
+++ b/gn/standalone/toolchain/BUILD.gn
@@ -376,10 +376,7 @@
 toolchain("msvc") {
   lib_switch = ""
   lib_dir_switch = "/LIBPATH:"
-
-  sys_lib_flags = "/LIBPATH:\"${win_sdk_lib_dir}\\ucrt\\x64\" "
-  sys_lib_flags += "/LIBPATH:\"${win_sdk_lib_dir}\\um\\x64\" "
-  sys_lib_flags += "/LIBPATH:\"${win_msvc_lib_dir}\" "
+  sys_lib_flags = string_join(" ", win_msvc_sys_lib_flags)
 
   # Note: /showIncludes below is required for ninja, to build a complete
   # dependency graph for headers. Removing it breaks incremental builds.
diff --git a/gn/standalone/toolchain/msvc.gni b/gn/standalone/toolchain/msvc.gni
index 34c0a50..7ec7313 100644
--- a/gn/standalone/toolchain/msvc.gni
+++ b/gn/standalone/toolchain/msvc.gni
@@ -36,22 +36,28 @@
 
   # These variables are required both for clang-cl.exe and MSVC (cl.exe).
   win_sdk_lib_dir = _win_sdk_base + "\\Lib\\" + _win_sdk_ver
-  win_msvc_lib_dir = _win_msvc_base + "\\lib\\x64"
+  win_msvc_lib_dir = _win_msvc_base + "\\lib\\${target_cpu}"
 
   # These variables are only required when building with MSVC.
   # Clang is clever enough to figure out the right include path by querying the
   # registry and detect the Windows SDK path (it still needs the /LIBPATH
   # though, hence the _lib_dir above).
-  win_msvc_bin_dir = _win_msvc_base + "\\bin\\Hostx64\\x64"
+  win_msvc_bin_dir = _win_msvc_base + "\\bin\\Host${host_cpu}\\${target_cpu}"
   win_msvc_inc_dirs = [
     _win_msvc_base + "\\include",
     _win_sdk_base + "\\Include\\" + _win_sdk_ver + "\\ucrt",
     _win_sdk_base + "\\Include\\" + _win_sdk_ver + "\\um",
     _win_sdk_base + "\\Include\\" + _win_sdk_ver + "\\shared",
   ]
+  win_msvc_sys_lib_flags = [
+    "/LIBPATH:\"${win_sdk_lib_dir}\\ucrt\\${target_cpu}\"",
+    "/LIBPATH:\"${win_sdk_lib_dir}\\um\\${target_cpu}\"",
+    "/LIBPATH:\"${win_msvc_lib_dir}\"",
+  ]
 } else {
   win_sdk_lib_dir = ""
   win_msvc_lib_dir = ""
   win_msvc_bin_dir = ""
   win_msvc_inc_dirs = []
+  win_msvc_sys_lib_flags = []
 }
diff --git a/include/perfetto/base/build_configs/android_tree/perfetto_build_flags.h b/include/perfetto/base/build_configs/android_tree/perfetto_build_flags.h
index d68f557..649d9ed 100644
--- a/include/perfetto/base/build_configs/android_tree/perfetto_build_flags.h
+++ b/include/perfetto/base/build_configs/android_tree/perfetto_build_flags.h
@@ -40,6 +40,7 @@
 #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_ZLIB() (1)
 #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_TRACED_PERF() (1)
 #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_HEAPPROFD() (1)
+#define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_STDERR_CRASH_DUMP() (0)
 
 // clang-format on
 #endif  // GEN_BUILD_CONFIG_PERFETTO_BUILD_FLAGS_H_
diff --git a/include/perfetto/base/build_configs/bazel/perfetto_build_flags.h b/include/perfetto/base/build_configs/bazel/perfetto_build_flags.h
index dad653c..393c2b2 100644
--- a/include/perfetto/base/build_configs/bazel/perfetto_build_flags.h
+++ b/include/perfetto/base/build_configs/bazel/perfetto_build_flags.h
@@ -40,6 +40,7 @@
 #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_ZLIB() (1)
 #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_TRACED_PERF() (0)
 #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_HEAPPROFD() (0)
+#define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_STDERR_CRASH_DUMP() (0)
 
 // clang-format on
 #endif  // GEN_BUILD_CONFIG_PERFETTO_BUILD_FLAGS_H_
diff --git a/include/perfetto/tracing/event_context.h b/include/perfetto/tracing/event_context.h
index 21cb788..e0c1a16 100644
--- a/include/perfetto/tracing/event_context.h
+++ b/include/perfetto/tracing/event_context.h
@@ -72,7 +72,7 @@
     static_assert(std::is_base_of<protozero::Message, MessageType>::value,
                   "TracedProto can be used only with protozero messages");
 
-    return TracedProto<MessageType>(message, *this);
+    return TracedProto<MessageType>(message, this);
   }
 
  private:
diff --git a/include/perfetto/tracing/traced_proto.h b/include/perfetto/tracing/traced_proto.h
index a52972b..4298bcc 100644
--- a/include/perfetto/tracing/traced_proto.h
+++ b/include/perfetto/tracing/traced_proto.h
@@ -60,8 +60,6 @@
 
   MessageType* message() { return message_; }
 
-  EventContext& context() const { return context_; }
-
   // Write additional untyped values into the same context, which is useful
   // when a given C++ class has a typed representation, but also either has
   // members which can only be written into an untyped context (e.g. they are
@@ -87,14 +85,47 @@
     return TracedDictionary(message_, MessageType::kDebugAnnotations, nullptr);
   }
 
+  // Write a nested message into a field according to the provided metadata.
+  template <typename FieldMetadata>
+  TracedProto<typename FieldMetadata::cpp_field_type> WriteNestedMessage() {
+    static_assert(std::is_base_of<MessageType,
+                                  typename FieldMetadata::message_type>::value,
+                  "Field should belong to the current message");
+    return TracedProto<typename FieldMetadata::cpp_field_type>(
+        message_->template BeginNestedMessage<
+            typename FieldMetadata::cpp_field_type>(FieldMetadata::kFieldId),
+        context_);
+  }
+
+  template <typename FieldMetadata>
+  TracedProto<typename FieldMetadata::cpp_field_type> WriteNestedMessage(
+      protozero::proto_utils::internal::FieldMetadataHelper<FieldMetadata>) {
+    return WriteNestedMessage<FieldMetadata>();
+  }
+
  private:
   friend class EventContext;
+  // Allow TracedProto<Foo> to create TracedProto<Bar>.
+  template <typename T>
+  friend class TracedProto;
 
-  TracedProto(MessageType* message, EventContext& context)
+  // Wraps a raw protozero message using the same context as the current object.
+  template <typename ChildMessageType>
+  TracedProto<ChildMessageType> Wrap(ChildMessageType* message) {
+    return TracedProto(message, context_);
+  }
+
+  // Context might be null here when writing typed message which is
+  // nested into untyped legacy trace event macro argument.
+  // TODO(altimin): Turn this into EventContext& when this case is eliminated
+  // and expose it in public API.
+  EventContext* context() const { return context_; }
+
+  TracedProto(MessageType* message, EventContext* context)
       : message_(message), context_(context) {}
 
   MessageType* const message_;
-  EventContext& context_;
+  EventContext* context_;
 };
 
 namespace internal {
@@ -157,11 +188,7 @@
       std::is_same<Check, void>::value>
   Write(TracedProto<Proto> context, ValueType&& value) {
     // TODO(altimin): support TraceFormatTraits here.
-    value.WriteIntoTrace(
-        context.context().Wrap(context.message()
-                                   ->template BeginNestedMessage<
-                                       typename FieldMetadata::cpp_field_type>(
-                                       FieldMetadata::kFieldId)));
+    value.WriteIntoTrace(context.template WriteNestedMessage<FieldMetadata>());
   }
 
   // Nested repeated non-packed field.
@@ -173,11 +200,7 @@
   Write(TracedProto<Proto> context, ValueType&& value) {
     // TODO(altimin): support TraceFormatTraits here.
     for (auto&& item : value) {
-      item.WriteIntoTrace(context.context().Wrap(
-          context.message()
-              ->template BeginNestedMessage<
-                  typename FieldMetadata::cpp_field_type>(
-                  FieldMetadata::kFieldId)));
+      item.WriteIntoTrace(context.template WriteNestedMessage<FieldMetadata>());
     }
   }
 };
diff --git a/infra/oss-fuzz/build_fuzzers b/infra/oss-fuzz/build_fuzzers
index 7f1fe48..95d9383 100755
--- a/infra/oss-fuzz/build_fuzzers
+++ b/infra/oss-fuzz/build_fuzzers
@@ -11,9 +11,9 @@
 
 GN_ARGS="is_clang=true is_debug=false is_fuzzer=true use_libfuzzer=false \
 link_fuzzer=\"-lFuzzingEngine\" is_hermetic_clang=false \
-use_custom_libcxx=false \
-extra_cflags=\"$CFLAGS -Wno-implicit-int-float-conversion\" \
-extra_cxxflags=\"$CXXFLAGS\" extra_ldflags=\"$CXXFLAGS\" \
+use_custom_libcxx=false is_cross_compiling=true \
+extra_target_cflags=\"$CFLAGS -Wno-implicit-int-float-conversion\" \
+extra_target_cxxflags=\"$CXXFLAGS\" extra_target_ldflags=\"$CXXFLAGS\" \
 is_system_compiler=true cc=\"$CC\" cxx=\"$CXX\" linker=\"gold\""
 
 if [ "$SANITIZER" = "address" ]; then
diff --git a/src/android_stats/perfetto_atoms.h b/src/android_stats/perfetto_atoms.h
index caebb2f..0a857b9 100644
--- a/src/android_stats/perfetto_atoms.h
+++ b/src/android_stats/perfetto_atoms.h
@@ -68,7 +68,8 @@
   kTracedEnableTracingOom = 34,
   kTracedEnableTracingUnknown = 35,
   kTracedStartTracingInvalidSessionState = 36,
-  kTracedEnableTracingInvalidFilter = 37,  // TODO actually add this
+  kTracedEnableTracingInvalidFilter = 47,
+  kTracedEnableTracingOobTargetBuffer = 48,
 
   // Checkpoints inside perfetto_cmd after tracing has finished.
   kOnTracingDisabled = 4,
diff --git a/src/base/debug_crash_stack_trace.cc b/src/base/debug_crash_stack_trace.cc
index 4da0fba..2a55d2b 100644
--- a/src/base/debug_crash_stack_trace.cc
+++ b/src/base/debug_crash_stack_trace.cc
@@ -107,6 +107,7 @@
 }
 
 void RestoreSignalHandlers() {
+  g_sighandler_registered = false;
   for (size_t i = 0; i < sizeof(g_signals) / sizeof(g_signals[0]); i++)
     sigaction(g_signals[i].sig_num, &g_signals[i].old_handler, nullptr);
 }
@@ -233,9 +234,13 @@
   }
 }
 
+}  // namespace
+
+namespace perfetto {
 // __attribute__((constructor)) causes a static initializer that automagically
 // early runs this function before the main().
-void __attribute__((constructor)) EnableStacktraceOnCrashForDebug();
+void PERFETTO_EXPORT __attribute__((constructor))
+EnableStacktraceOnCrashForDebug();
 
 void EnableStacktraceOnCrashForDebug() {
   if (g_sighandler_registered)
@@ -259,7 +264,6 @@
   // (ii) the output of death test is not visible.
   pthread_atfork(nullptr, nullptr, &RestoreSignalHandlers);
 }
-
-}  // namespace
+}  // namespace perfetto
 
 #pragma GCC diagnostic pop
diff --git a/src/profiling/memory/client_api_factory_standalone.cc b/src/profiling/memory/client_api_factory_standalone.cc
index b49da1f..da2a083 100644
--- a/src/profiling/memory/client_api_factory_standalone.cc
+++ b/src/profiling/memory/client_api_factory_standalone.cc
@@ -45,6 +45,8 @@
 //   service. This happens in CreateClient.
 
 namespace perfetto {
+void EnableStacktraceOnCrashForDebug();
+
 namespace profiling {
 namespace {
 
@@ -107,6 +109,11 @@
 
   daemon(/* nochdir= */ 0, /* noclose= */ 1);
 
+  // On debug builds, we want to turn on crash reporting for heapprofd.
+#if PERFETTO_BUILDFLAG(PERFETTO_STDERR_CRASH_DUMP)
+  EnableStacktraceOnCrashForDebug();
+#endif
+
   cli_sock.ReleaseFd();
 
   // Leave stderr open for logging.
diff --git a/src/profiling/memory/heapprofd_end_to_end_test.cc b/src/profiling/memory/heapprofd_end_to_end_test.cc
index 97c9876..d406ace 100644
--- a/src/profiling/memory/heapprofd_end_to_end_test.cc
+++ b/src/profiling/memory/heapprofd_end_to_end_test.cc
@@ -1752,7 +1752,7 @@
 #error "Need to start daemons for Linux test."
 #endif
 
-INSTANTIATE_TEST_CASE_P(DISABLED_Run,
+INSTANTIATE_TEST_CASE_P(Run,
                         HeapprofdEndToEnd,
                         Values(std::make_tuple(TestMode::kStatic,
                                                AllocatorMode::kCustom)),
diff --git a/src/profiling/memory/heapprofd_producer.h b/src/profiling/memory/heapprofd_producer.h
index 83ce0e5..51cc0c7 100644
--- a/src/profiling/memory/heapprofd_producer.h
+++ b/src/profiling/memory/heapprofd_producer.h
@@ -186,7 +186,8 @@
     ProcessState(GlobalCallstackTrie* c, bool d)
         : callsites(c), dump_at_max_mode(d) {}
     bool disconnected = false;
-    SharedRingBuffer::ErrorState error_state;
+    SharedRingBuffer::ErrorState error_state =
+        SharedRingBuffer::ErrorState::kNoError;
     bool buffer_corrupted = false;
 
     uint64_t heap_samples = 0;
diff --git a/src/profiling/memory/shared_ring_buffer_write_fuzzer.cc b/src/profiling/memory/shared_ring_buffer_write_fuzzer.cc
index 486033e..62b82eb 100644
--- a/src/profiling/memory/shared_ring_buffer_write_fuzzer.cc
+++ b/src/profiling/memory/shared_ring_buffer_write_fuzzer.cc
@@ -70,6 +70,7 @@
   memcpy(&header, data, sizeof(header));
   SharedRingBuffer::MetadataPage& metadata_page = header.metadata_page;
   metadata_page.spinlock.locked = false;
+  metadata_page.spinlock.poisoned = false;
 
   PERFETTO_CHECK(ftruncate(*fd, static_cast<off_t>(total_size_pages *
                                                    base::kPageSize)) == 0);
diff --git a/src/profiling/perf/event_config.cc b/src/profiling/perf/event_config.cc
index a7b6533..7b5170e 100644
--- a/src/profiling/perf/event_config.cc
+++ b/src/profiling/perf/event_config.cc
@@ -298,7 +298,7 @@
     // expected = rate * period, with a conversion of period from ms to s:
     uint64_t expected_samples_per_tick =
         1 + (sampling_frequency * read_tick_period_ms) / 1000;
-    // Double the the limit to account of actual sample rate uncertainties, as
+    // Double the limit to account of actual sample rate uncertainties, as
     // well as any other factors:
     samples_per_tick_limit = 2 * expected_samples_per_tick;
   } else {  // sampling_period
diff --git a/src/protozero/filtering/filter_bytecode_parser.h b/src/protozero/filtering/filter_bytecode_parser.h
index b2378ca..6ecbdfb 100644
--- a/src/protozero/filtering/filter_bytecode_parser.h
+++ b/src/protozero/filtering/filter_bytecode_parser.h
@@ -111,7 +111,7 @@
   std::vector<uint32_t> words_;
 
   // One entry for each message index stored in the filter plus a sentinel at
-  // the end. Maps each message index to the offset in |words_| where the the
+  // the end. Maps each message index to the offset in |words_| where the
   // Nth message start.
   // message_offset_.size() - 2 == the max message id that can be parsed.
   std::vector<uint32_t> message_offset_;
diff --git a/src/protozero/filtering/message_filter.h b/src/protozero/filtering/message_filter.h
index 37dc9b5..80ddd0e 100644
--- a/src/protozero/filtering/message_filter.h
+++ b/src/protozero/filtering/message_filter.h
@@ -130,7 +130,7 @@
   // Gets into an error state which swallows all the input and emits no output.
   void SetUnrecoverableErrorState();
 
-  // We keep track of the the nest of messages in a stack. Each StackState
+  // We keep track of the nest of messages in a stack. Each StackState
   // object corresponds to a level of nesting in the proto message structure.
   // Every time a new field of type len-delimited that has a corresponding
   // sub-message in the bytecode is encountered, a new StackState is pushed in
diff --git a/src/trace_processor/containers/bit_vector.h b/src/trace_processor/containers/bit_vector.h
index 20773dd..5f9c27a 100644
--- a/src/trace_processor/containers/bit_vector.h
+++ b/src/trace_processor/containers/bit_vector.h
@@ -491,7 +491,7 @@
       // mask: 00000000001111111
       uint64_t mask = MaskAllBitsSetUntil(idx);
 
-      // Finish up by anding the the atom with the computed msk.
+      // Finish up by and'ing the atom with the computed mask.
       return word_ & mask;
     }
 
diff --git a/src/trace_processor/importers/proto/heap_profile_tracker.cc b/src/trace_processor/importers/proto/heap_profile_tracker.cc
index 968da7d..45a5c7b 100644
--- a/src/trace_processor/importers/proto/heap_profile_tracker.cc
+++ b/src/trace_processor/importers/proto/heap_profile_tracker.cc
@@ -30,6 +30,8 @@
 struct MergedCallsite {
   StringId frame_name;
   StringId mapping_name;
+  base::Optional<StringId> source_file;
+  base::Optional<uint32_t> line_number;
   base::Optional<uint32_t> parent_idx;
   bool operator<(const MergedCallsite& o) const {
     return std::tie(frame_name, mapping_name, parent_idx) <
@@ -62,7 +64,7 @@
     base::Optional<StringId> deobfuscated_name =
         frames_tbl.deobfuscated_name()[frame_idx];
     return {{deobfuscated_name ? *deobfuscated_name : frame_name, mapping_name,
-             base::nullopt}};
+             base::nullopt, base::nullopt, base::nullopt}};
   }
 
   std::vector<MergedCallsite> result;
@@ -74,8 +76,9 @@
        i < symbols_tbl.row_count() &&
        symbols_tbl.symbol_set_id()[i] == *symbol_set_id;
        ++i) {
-    result.emplace_back(
-        MergedCallsite{symbols_tbl.name()[i], mapping_name, base::nullopt});
+    result.emplace_back(MergedCallsite{
+        symbols_tbl.name()[i], mapping_name, symbols_tbl.source_file()[i],
+        symbols_tbl.line_number()[i], base::nullopt});
   }
   std::reverse(result.begin(), result.end());
   return result;
@@ -118,6 +121,7 @@
     auto callsites = GetMergedCallsites(storage, i);
     // Loop below needs to run at least once for parent_idx to get updated.
     PERFETTO_CHECK(!callsites.empty());
+    std::map<MergedCallsite, uint32_t> callsites_to_rowid;
     for (MergedCallsite& merged_callsite : callsites) {
       merged_callsite.parent_idx = parent_idx;
       auto it = merged_callsites_to_table_idx.find(merged_callsite);
@@ -137,14 +141,35 @@
         row.map_name = merged_callsite.mapping_name;
         if (parent_idx)
           row.parent_id = tbl->id()[*parent_idx];
-
         parent_idx = tbl->Insert(std::move(row)).row;
+        callsites_to_rowid[merged_callsite] =
+            static_cast<uint32_t>(merged_callsites_to_table_idx.size());
+
         PERFETTO_CHECK(merged_callsites_to_table_idx.size() ==
                        tbl->row_count());
+      } else {
+        MergedCallsite saved_callsite = it->first;
+        callsites_to_rowid.erase(saved_callsite);
+        if (saved_callsite.source_file != merged_callsite.source_file) {
+          saved_callsite.source_file = base::nullopt;
+        }
+        if (saved_callsite.line_number != merged_callsite.line_number) {
+          saved_callsite.line_number = base::nullopt;
+        }
+        callsites_to_rowid[saved_callsite] = it->second;
       }
       parent_idx = it->second;
     }
 
+    for (const auto& it : callsites_to_rowid) {
+      if (it.first.source_file) {
+        tbl->mutable_source_file()->Set(it.second, *it.first.source_file);
+      }
+      if (it.first.line_number) {
+        tbl->mutable_line_number()->Set(it.second, *it.first.line_number);
+      }
+    }
+
     PERFETTO_CHECK(parent_idx);
     callsite_to_merged_callsite[i] = *parent_idx;
   }
diff --git a/src/trace_processor/importers/proto/profiler_util.cc b/src/trace_processor/importers/proto/profiler_util.cc
index 175942f..c28f2fc 100644
--- a/src/trace_processor/importers/proto/profiler_util.cc
+++ b/src/trace_processor/importers/proto/profiler_util.cc
@@ -104,7 +104,8 @@
     return "com.google.android.gm";
   }
 
-  if (location.find("PrebuiltGmsCore") != std::string::npos) {
+  if (location.find("PrebuiltGmsCore") != std::string::npos ||
+      location.find("com.google.android.gms") != std::string::npos) {
     return "com.google.android.gms";
   }
 
diff --git a/src/trace_processor/importers/proto/track_event_parser.cc b/src/trace_processor/importers/proto/track_event_parser.cc
index 7926bd3..6be84c5 100644
--- a/src/trace_processor/importers/proto/track_event_parser.cc
+++ b/src/trace_processor/importers/proto/track_event_parser.cc
@@ -110,6 +110,8 @@
   }
   bool AddJson(const Key& key, const protozero::ConstChars& value) final {
     auto json_value = json::ParseJsonString(value);
+    if (!json_value)
+      return false;
     return json::AddJsonValueToArgs(*json_value, base::StringView(key.flat_key),
                                     base::StringView(key.key), &storage_,
                                     &inserter_);
diff --git a/src/trace_processor/metrics/android/android_sysui_cuj.sql b/src/trace_processor/metrics/android/android_sysui_cuj.sql
index 2a0bba1..e5b4079 100644
--- a/src/trace_processor/metrics/android/android_sysui_cuj.sql
+++ b/src/trace_processor/metrics/android/android_sysui_cuj.sql
@@ -64,44 +64,44 @@
 JOIN thread_track ON slice.track_id = thread_track.id
 JOIN android_sysui_cuj_thread thread USING (utid)
 JOIN android_sysui_cuj_last_cuj last_cuj
-ON ts >= last_cuj.ts_start AND ts <= last_cuj.ts_end
+ON ts + slice.dur >= last_cuj.ts_start AND ts <= last_cuj.ts_end
 WHERE slice.dur > 0;
 
 DROP TABLE IF EXISTS android_sysui_cuj_main_thread_slices_in_cuj;
 CREATE TABLE android_sysui_cuj_main_thread_slices_in_cuj AS
 SELECT slices.* FROM android_sysui_cuj_main_thread_slices slices
 JOIN android_sysui_cuj_last_cuj last_cuj
-ON ts >= last_cuj.ts_start AND ts <= last_cuj.ts_end;
+ON ts + slices.dur >= last_cuj.ts_start AND ts <= last_cuj.ts_end;
 
 DROP TABLE IF EXISTS android_sysui_cuj_do_frame_slices_in_cuj;
 CREATE TABLE android_sysui_cuj_do_frame_slices_in_cuj AS
 SELECT slices.* FROM android_sysui_cuj_do_frame_slices slices
 JOIN android_sysui_cuj_last_cuj last_cuj
-ON ts >= last_cuj.ts_start AND ts <= last_cuj.ts_end;
+ON ts + slices.dur >= last_cuj.ts_start AND ts <= last_cuj.ts_end;
 
 DROP TABLE IF EXISTS android_sysui_cuj_render_thread_slices_in_cuj;
 CREATE TABLE android_sysui_cuj_render_thread_slices_in_cuj AS
 SELECT slices.* FROM android_sysui_cuj_render_thread_slices slices
 JOIN android_sysui_cuj_last_cuj last_cuj
-ON ts >= last_cuj.ts_start AND ts <= last_cuj.ts_end;
+ON ts + slices.dur >= last_cuj.ts_start AND ts <= last_cuj.ts_end;
 
 DROP TABLE IF EXISTS android_sysui_cuj_draw_frame_slices_in_cuj;
 CREATE TABLE android_sysui_cuj_draw_frame_slices_in_cuj AS
 SELECT slices.* FROM android_sysui_cuj_draw_frame_slices slices
 JOIN android_sysui_cuj_last_cuj last_cuj
-ON ts >= last_cuj.ts_start AND ts <= last_cuj.ts_end;
+ON ts + slices.dur >= last_cuj.ts_start AND ts <= last_cuj.ts_end;
 
 DROP TABLE IF EXISTS android_sysui_cuj_hwc_release_slices_in_cuj;
 CREATE TABLE android_sysui_cuj_hwc_release_slices_in_cuj AS
 SELECT slices.* FROM android_sysui_cuj_hwc_release_slices slices
 JOIN android_sysui_cuj_last_cuj last_cuj
-ON ts >= last_cuj.ts_start AND ts <= last_cuj.ts_end;
+ON ts + slices.dur >= last_cuj.ts_start AND ts <= last_cuj.ts_end;
 
 DROP TABLE IF EXISTS android_sysui_cuj_gpu_completion_slices_in_cuj;
 CREATE TABLE android_sysui_cuj_gpu_completion_slices_in_cuj AS
 SELECT slices.* FROM android_sysui_cuj_gpu_completion_slices slices
 JOIN android_sysui_cuj_last_cuj last_cuj
-ON ts >= last_cuj.ts_start AND ts <= last_cuj.ts_end;
+ON ts + slices.dur >= last_cuj.ts_start AND ts <= last_cuj.ts_end;
 
 DROP TABLE IF EXISTS android_sysui_cuj_jit_slices;
 CREATE TABLE android_sysui_cuj_jit_slices AS
@@ -126,8 +126,6 @@
   FROM expected_frame_timeline_slice expected
   JOIN android_sysui_cuj_last_cuj cuj
     ON expected.upid = cuj.upid
-    AND expected.ts + expected.dur > cuj.ts_start
-    AND expected.ts < cuj.ts_end
   JOIN actual_frame_timeline_slice actual
     ON expected.surface_frame_token = actual.surface_frame_token
     AND expected.upid = actual.upid
diff --git a/src/trace_processor/tables/profiler_tables.h b/src/trace_processor/tables/profiler_tables.h
index 419d852..c51ac91 100644
--- a/src/trace_processor/tables/profiler_tables.h
+++ b/src/trace_processor/tables/profiler_tables.h
@@ -282,7 +282,9 @@
   C(int64_t, cumulative_alloc_count)                                      \
   C(int64_t, alloc_size)                                                  \
   C(int64_t, cumulative_alloc_size)                                       \
-  C(base::Optional<ExperimentalFlamegraphNodesTable::Id>, parent_id)
+  C(base::Optional<ExperimentalFlamegraphNodesTable::Id>, parent_id)      \
+  C(base::Optional<StringPool::Id>, source_file)                          \
+  C(base::Optional<uint32_t>, line_number)
 
 PERFETTO_TP_TABLE(PERFETTO_TP_EXPERIMENTAL_FLAMEGRAPH_NODES);
 
diff --git a/src/traced/probes/filesystem/inode_file_data_source_unittest.cc b/src/traced/probes/filesystem/inode_file_data_source_unittest.cc
index edd208b..ea62a29 100644
--- a/src/traced/probes/filesystem/inode_file_data_source_unittest.cc
+++ b/src/traced/probes/filesystem/inode_file_data_source_unittest.cc
@@ -133,7 +133,7 @@
   EXPECT_CALL(*data_source, FillInodeEntry(_, buf.st_ino, Eq(value)));
 
   data_source->OnInodes({{buf.st_ino, buf.st_dev}});
-  // Expect that the found inode is not added the the LRU cache.
+  // Expect that the found inode is not added the LRU cache.
   EXPECT_THAT(cache_.Get(std::make_pair(buf.st_dev, buf.st_ino)), IsNull());
 }
 
diff --git a/src/traced/probes/ftrace/atrace_wrapper.cc b/src/traced/probes/ftrace/atrace_wrapper.cc
index 2522e47..43b9862 100644
--- a/src/traced/probes/ftrace/atrace_wrapper.cc
+++ b/src/traced/probes/ftrace/atrace_wrapper.cc
@@ -25,16 +25,24 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#include "perfetto/base/build_config.h"
 #include "perfetto/base/logging.h"
 #include "perfetto/base/time.h"
+#include "perfetto/ext/base/optional.h"
 #include "perfetto/ext/base/pipe.h"
+#include "perfetto/ext/base/string_utils.h"
 #include "perfetto/ext/base/utils.h"
 
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
+#include <sys/system_properties.h>
+#endif  // PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
+
 namespace perfetto {
 
 namespace {
 
 RunAtraceFunction g_run_atrace_for_testing = nullptr;
+base::Optional<bool> g_is_old_atrace_for_testing{};
 
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
 // Args should include "atrace" for argv[0].
@@ -183,4 +191,29 @@
   g_run_atrace_for_testing = f;
 }
 
+bool IsOldAtrace() {
+  if (g_is_old_atrace_for_testing.has_value())
+    return *g_is_old_atrace_for_testing;
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) && \
+    !PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
+  // Sideloaded case. We could be sideloaded on a modern device or an older one.
+  char str_value[PROP_VALUE_MAX];
+  if (!__system_property_get("ro.build.version.sdk", str_value))
+    return false;
+  auto opt_value = base::CStringToUInt32(str_value);
+  return opt_value.has_value() && *opt_value < 28;  // 28 == Android P.
+#else
+  // In in-tree builds we know that atrace is current, no runtime checks needed.
+  return false;
+#endif
+}
+
+void SetIsOldAtraceForTesting(bool value) {
+  g_is_old_atrace_for_testing = value;
+}
+
+void ClearIsOldAtraceForTesting() {
+  g_is_old_atrace_for_testing.reset();
+}
+
 }  // namespace perfetto
diff --git a/src/traced/probes/ftrace/atrace_wrapper.h b/src/traced/probes/ftrace/atrace_wrapper.h
index 264cee0..91f02cb 100644
--- a/src/traced/probes/ftrace/atrace_wrapper.h
+++ b/src/traced/probes/ftrace/atrace_wrapper.h
@@ -26,6 +26,15 @@
 using RunAtraceFunction =
     std::add_pointer<bool(const std::vector<std::string>& /*args*/)>::type;
 
+// When we are sideloaded on an old version of Android (pre P), we cannot use
+// atrace --only_userspace because that option doesn't exist. In that case we:
+// - Just use atrace --async_start/stop, which will cause atrace to also
+//   poke at ftrace.
+// - Suppress the checks for "somebody else enabled ftrace unexpectedly".
+bool IsOldAtrace();
+void SetIsOldAtraceForTesting(bool);
+void ClearIsOldAtraceForTesting();
+
 bool RunAtrace(const std::vector<std::string>& args);
 void SetRunAtraceForTesting(RunAtraceFunction);
 
diff --git a/src/traced/probes/ftrace/ftrace_config_muxer.cc b/src/traced/probes/ftrace/ftrace_config_muxer.cc
index 24b84b5..0af1f2c 100644
--- a/src/traced/probes/ftrace/ftrace_config_muxer.cc
+++ b/src/traced/probes/ftrace/ftrace_config_muxer.cc
@@ -455,7 +455,7 @@
     PERFETTO_DCHECK(active_configs_.empty());
 
     // If someone outside of perfetto is using ftrace give up now.
-    if (is_ftrace_enabled) {
+    if (is_ftrace_enabled && !IsOldAtrace()) {
       PERFETTO_ELOG("ftrace in use by non-Perfetto.");
       return 0;
     }
@@ -466,7 +466,7 @@
     SetupBufferSize(request);
   } else {
     // Did someone turn ftrace off behind our back? If so give up.
-    if (!active_configs_.empty() && !is_ftrace_enabled) {
+    if (!active_configs_.empty() && !is_ftrace_enabled && !IsOldAtrace()) {
       PERFETTO_ELOG("ftrace disabled by non-Perfetto.");
       return 0;
     }
@@ -486,8 +486,15 @@
     }
   }
 
-  if (RequiresAtrace(request))
+  if (RequiresAtrace(request)) {
+    if (IsOldAtrace() && !ds_configs_.empty()) {
+      PERFETTO_ELOG(
+          "Concurrent atrace sessions are not supported before Android P, "
+          "bailing out.");
+      return 0;
+    }
     UpdateAtrace(request);
+  }
 
   for (const auto& group_and_name : events) {
     const Event* event = table_->GetOrCreateEvent(group_and_name);
@@ -533,7 +540,7 @@
   }
 
   if (active_configs_.empty()) {
-    if (ftrace_->IsTracingEnabled()) {
+    if (ftrace_->IsTracingEnabled() && !IsOldAtrace()) {
       // If someone outside of perfetto is using ftrace give up now.
       PERFETTO_ELOG("ftrace in use by non-Perfetto.");
       return false;
@@ -695,7 +702,8 @@
   std::vector<std::string> args;
   args.push_back("atrace");  // argv0 for exec()
   args.push_back("--async_start");
-  args.push_back("--only_userspace");
+  if (!IsOldAtrace())
+    args.push_back("--only_userspace");
 
   for (const auto& category : categories)
     args.push_back(category);
@@ -721,7 +729,10 @@
 
   PERFETTO_DLOG("Stop atrace...");
 
-  if (RunAtrace({"atrace", "--async_stop", "--only_userspace"})) {
+  std::vector<std::string> args{"atrace", "--async_stop"};
+  if (!IsOldAtrace())
+    args.push_back("--only_userspace");
+  if (RunAtrace(args)) {
     current_state_.atrace_categories.clear();
     current_state_.atrace_apps.clear();
     current_state_.atrace_on = false;
diff --git a/src/traced/probes/ftrace/ftrace_config_muxer.h b/src/traced/probes/ftrace/ftrace_config_muxer.h
index 3965371..5aee63a 100644
--- a/src/traced/probes/ftrace/ftrace_config_muxer.h
+++ b/src/traced/probes/ftrace/ftrace_config_muxer.h
@@ -53,7 +53,7 @@
   std::vector<std::string> atrace_apps;
   std::vector<std::string> atrace_categories;
 
-  // When enabled will turn on the the kallsyms symbolizer in CpuReader.
+  // When enabled will turn on the kallsyms symbolizer in CpuReader.
   const bool symbolize_ksyms;
 };
 
diff --git a/src/traced/probes/ftrace/ftrace_config_muxer_unittest.cc b/src/traced/probes/ftrace/ftrace_config_muxer_unittest.cc
index c3602aa..312f849 100644
--- a/src/traced/probes/ftrace/ftrace_config_muxer_unittest.cc
+++ b/src/traced/probes/ftrace/ftrace_config_muxer_unittest.cc
@@ -26,11 +26,11 @@
 
 using testing::_;
 using testing::AnyNumber;
-using testing::MatchesRegex;
 using testing::Contains;
 using testing::ElementsAreArray;
 using testing::Eq;
 using testing::IsEmpty;
+using testing::MatchesRegex;
 using testing::NiceMock;
 using testing::Not;
 using testing::Return;
@@ -93,6 +93,14 @@
 
 class FtraceConfigMuxerTest : public ::testing::Test {
  protected:
+  void SetUp() override {
+    // Don't probe for older SDK levels, that would relax the atrace-related
+    // checks on older versions of Android (But some tests here test those).
+    // We want the unittests to behave consistently (as if we were on a post P
+    // device) regardless of the Android versions they run on.
+    SetIsOldAtraceForTesting(false);
+  }
+  void TearDown() override { ClearIsOldAtraceForTesting(); }
   std::unique_ptr<MockProtoTranslationTable> GetMockTable() {
     std::vector<Field> common_fields;
     std::vector<Event> events;
diff --git a/src/tracing/BUILD.gn b/src/tracing/BUILD.gn
index 848751e..b2eb293 100644
--- a/src/tracing/BUILD.gn
+++ b/src/tracing/BUILD.gn
@@ -159,10 +159,15 @@
       ":client_api_without_backends",
       ":platform_impl",
     ]
-    sources += [
-      "traced_proto_unittest.cc",
-      "traced_value_unittest.cc",
-    ]
+
+    # TODO(primiano/altimin): these sources build with clang-cl but fail to
+    # build with MSVC 2019. Investigate.
+    if (!is_win || is_clang) {
+      sources += [
+        "traced_proto_unittest.cc",
+        "traced_value_unittest.cc",
+      ]
+    }
   }
 }
 
diff --git a/src/tracing/core/tracing_service_impl.cc b/src/tracing/core/tracing_service_impl.cc
index 63839b0..1a5b43d 100644
--- a/src/tracing/core/tracing_service_impl.cc
+++ b/src/tracing/core/tracing_service_impl.cc
@@ -647,6 +647,21 @@
     return PERFETTO_SVC_ERR("Too many buffers configured (%d)",
                             cfg.buffers_size());
   }
+  // Check that the config specifies all buffers for its data sources. This
+  // is also checked in SetupDataSource, but it is simpler to return a proper
+  // error to the consumer from here (and there will be less state to undo).
+  for (const TraceConfig::DataSource& cfg_data_source : cfg.data_sources()) {
+    size_t num_buffers = static_cast<size_t>(cfg.buffers_size());
+    size_t target_buffer = cfg_data_source.config().target_buffer();
+    if (target_buffer >= num_buffers) {
+      MaybeLogUploadEvent(
+          cfg, PerfettoStatsdAtom::kTracedEnableTracingOobTargetBuffer);
+      return PERFETTO_SVC_ERR(
+          "Data source \"%s\" specified an out of bounds target_buffer (%zu >= "
+          "%zu)",
+          cfg_data_source.config().name().c_str(), target_buffer, num_buffers);
+    }
+  }
 
   if (!cfg.unique_session_name().empty()) {
     const std::string& name = cfg.unique_session_name();
diff --git a/src/tracing/platform_windows.cc b/src/tracing/platform_windows.cc
index 651812a..f5db52d 100644
--- a/src/tracing/platform_windows.cc
+++ b/src/tracing/platform_windows.cc
@@ -164,7 +164,7 @@
 #else  // _WIN64
 
 #pragma data_seg(".CRT$XLP")
-PIMAGE_TLS_CALLBACK perfetto_thread_callback_base = OnThreadExit;
+PIMAGE_TLS_CALLBACK perfetto_thread_callback_base = PerfettoOnThreadExit;
 // Reset the default section.
 #pragma data_seg()
 
diff --git a/test/trace_processor/graphics/android_sysui_cuj.out b/test/trace_processor/graphics/android_sysui_cuj.out
index 1cdea90..fffda29 100644
--- a/test/trace_processor/graphics/android_sysui_cuj.out
+++ b/test/trace_processor/graphics/android_sysui_cuj.out
@@ -1,6 +1,6 @@
 android_sysui_cuj {
  cuj_name: "SHADE_ROW_EXPAND"
-  cuj_start: 0
+  cuj_start: 10
   cuj_dur: 1000000000
   process {
     name: "com.android.systemui"
diff --git a/test/trace_processor/graphics/android_sysui_cuj.py b/test/trace_processor/graphics/android_sysui_cuj.py
index dcbbf9b..14dc2b0 100644
--- a/test/trace_processor/graphics/android_sysui_cuj.py
+++ b/test/trace_processor/graphics/android_sysui_cuj.py
@@ -115,9 +115,9 @@
 trace.add_thread(
     tid=JITID, tgid=PID, cmdline="Jit thread pool", name="Jit thread pool")
 trace.add_ftrace_packet(cpu=0)
-trace.add_atrace_async_begin(ts=0, tid=PID, pid=PID, buf="J<SHADE_ROW_EXPAND>")
+trace.add_atrace_async_begin(ts=10, tid=PID, pid=PID, buf="J<SHADE_ROW_EXPAND>")
 trace.add_atrace_async_end(
-    ts=1_000_000_000, tid=PID, pid=PID, buf="J<SHADE_ROW_EXPAND>")
+    ts=1_000_000_010, tid=PID, pid=PID, buf="J<SHADE_ROW_EXPAND>")
 
 add_frame(
     trace,
diff --git a/test/trace_processor/parsing/rss_stat_mm_id_clone.py b/test/trace_processor/parsing/rss_stat_mm_id_clone.py
index 3b36d88..a12f516 100644
--- a/test/trace_processor/parsing/rss_stat_mm_id_clone.py
+++ b/test/trace_processor/parsing/rss_stat_mm_id_clone.py
@@ -81,7 +81,7 @@
 # In this packet, we check what happens to kernel threads in RSS stat.
 trace.add_ftrace_packet(1)
 
-# Emit an rss stat event for the the existing kernel thread.
+# Emit an rss stat event for the existing kernel thread.
 trace.add_rss_stat(100, tid=3, member=0, size=10, mm_id=0x2345, curr=1)
 
 # Start a new kernel thread.
diff --git a/test/trace_processor/profiling/heap_profile_flamegraph_system-server-native-profile.out b/test/trace_processor/profiling/heap_profile_flamegraph_system-server-native-profile.out
index 42ab65d..b521691 100644
--- a/test/trace_processor/profiling/heap_profile_flamegraph_system-server-native-profile.out
+++ b/test/trace_processor/profiling/heap_profile_flamegraph_system-server-native-profile.out
@@ -1,11 +1,11 @@
-"id","type","depth","name","map_name","count","cumulative_count","size","cumulative_size","alloc_count","cumulative_alloc_count","alloc_size","cumulative_alloc_size","parent_id"
-0,"experimental_flamegraph_nodes",0,"__start_thread","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,"[NULL]"
-1,"experimental_flamegraph_nodes",1,"_ZL15__pthread_startPv","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,0
-2,"experimental_flamegraph_nodes",2,"_ZN7android14AndroidRuntime15javaThreadShellEPv","/system/lib64/libandroid_runtime.so",0,5,0,27704,0,77,0,348050,1
-3,"experimental_flamegraph_nodes",3,"_ZN7android6Thread11_threadLoopEPv","/system/lib64/libutils.so",0,5,0,27704,0,77,0,348050,2
-4,"experimental_flamegraph_nodes",4,"_ZN7android10PoolThread10threadLoopEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,3
-5,"experimental_flamegraph_nodes",5,"_ZN7android14IPCThreadState14joinThreadPoolEb","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,4
-6,"experimental_flamegraph_nodes",6,"_ZN7android14IPCThreadState20getAndExecuteCommandEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,5
-7,"experimental_flamegraph_nodes",7,"_ZN7android14IPCThreadState14executeCommandEi","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,6
-8,"experimental_flamegraph_nodes",8,"_ZN7android7BBinder8transactEjRKNS_6ParcelEPS1_j","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,7
-9,"experimental_flamegraph_nodes",9,"_ZN11JavaBBinder10onTransactEjRKN7android6ParcelEPS1_j","/system/lib64/libandroid_runtime.so",0,0,0,0,0,60,0,262730,8
+"id","type","depth","name","map_name","count","cumulative_count","size","cumulative_size","alloc_count","cumulative_alloc_count","alloc_size","cumulative_alloc_size","parent_id","source_file","line_number"
+0,"experimental_flamegraph_nodes",0,"__start_thread","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,"[NULL]","[NULL]","[NULL]"
+1,"experimental_flamegraph_nodes",1,"_ZL15__pthread_startPv","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,0,"[NULL]","[NULL]"
+2,"experimental_flamegraph_nodes",2,"_ZN7android14AndroidRuntime15javaThreadShellEPv","/system/lib64/libandroid_runtime.so",0,5,0,27704,0,77,0,348050,1,"[NULL]","[NULL]"
+3,"experimental_flamegraph_nodes",3,"_ZN7android6Thread11_threadLoopEPv","/system/lib64/libutils.so",0,5,0,27704,0,77,0,348050,2,"[NULL]","[NULL]"
+4,"experimental_flamegraph_nodes",4,"_ZN7android10PoolThread10threadLoopEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,3,"[NULL]","[NULL]"
+5,"experimental_flamegraph_nodes",5,"_ZN7android14IPCThreadState14joinThreadPoolEb","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,4,"[NULL]","[NULL]"
+6,"experimental_flamegraph_nodes",6,"_ZN7android14IPCThreadState20getAndExecuteCommandEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,5,"[NULL]","[NULL]"
+7,"experimental_flamegraph_nodes",7,"_ZN7android14IPCThreadState14executeCommandEi","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,6,"[NULL]","[NULL]"
+8,"experimental_flamegraph_nodes",8,"_ZN7android7BBinder8transactEjRKNS_6ParcelEPS1_j","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,7,"[NULL]","[NULL]"
+9,"experimental_flamegraph_nodes",9,"_ZN11JavaBBinder10onTransactEjRKN7android6ParcelEPS1_j","/system/lib64/libandroid_runtime.so",0,0,0,0,0,60,0,262730,8,"[NULL]","[NULL]"
diff --git a/test/trace_processor/python/api_unittest.py b/test/trace_processor/python/api_unittest.py
index 52e2c41..9feada4 100755
--- a/test/trace_processor/python/api_unittest.py
+++ b/test/trace_processor/python/api_unittest.py
@@ -20,7 +20,7 @@
 
 
 class TestQueryResultIterator(unittest.TestCase):
-  # The numbers input into cells correspond the the CellType enum values
+  # The numbers input into cells correspond the CellType enum values
   # defined under trace_processor.proto
   CELL_VARINT = ProtoFactory().CellsBatch().CELL_VARINT
   CELL_STRING = ProtoFactory().CellsBatch().CELL_STRING
diff --git a/tools/gen_amalgamated b/tools/gen_amalgamated
index bff4641..e2faa61 100755
--- a/tools/gen_amalgamated
+++ b/tools/gen_amalgamated
@@ -580,7 +580,7 @@
   args = parser.parse_args()
   targets = args.targets or default_targets
 
-  # The CHANGELOG mtime triggers the the perfetto_version.gen.h genrule. This is
+  # The CHANGELOG mtime triggers the perfetto_version.gen.h genrule. This is
   # to avoid emitting a stale version information in the remote case of somebody
   # running gen_amalgamated incrementally after having moved to another commit.
   changelog_path = os.path.join(project_root, 'CHANGELOG')
diff --git a/tools/record_android_trace b/tools/record_android_trace
index 5de8a4e..7fcffc6 100755
--- a/tools/record_android_trace
+++ b/tools/record_android_trace
@@ -107,7 +107,7 @@
   help = 'Force the use of the sideloaded binaries rather than system daemons'
   parser.add_argument('--sideload', action='store_true', help=help)
 
-  help = ('Sideload the the given binary rather than downloading it. ' +
+  help = ('Sideload the given binary rather than downloading it. ' +
           'Implies --sideload')
   parser.add_argument('--sideload-path', default=None, help=help)
 
diff --git a/tools/trace_to_text/BUILD.gn b/tools/trace_to_text/BUILD.gn
index 503e688..1abb0a8 100644
--- a/tools/trace_to_text/BUILD.gn
+++ b/tools/trace_to_text/BUILD.gn
@@ -137,6 +137,7 @@
     "../../gn:default_deps",
     "../../gn:protobuf_full",
     "../../protos/perfetto/trace:zero",
+    "../../src/protozero:proto_ring_buffer",
   ]
   if (enable_perfetto_zlib) {
     deps += [ "../../gn:zlib" ]
diff --git a/tools/trace_to_text/trace_to_text.cc b/tools/trace_to_text/trace_to_text.cc
index 070bb14..1199d3a 100644
--- a/tools/trace_to_text/trace_to_text.cc
+++ b/tools/trace_to_text/trace_to_text.cc
@@ -23,6 +23,7 @@
 #include "perfetto/base/logging.h"
 #include "perfetto/ext/base/file_utils.h"
 #include "perfetto/ext/base/scoped_file.h"
+#include "src/protozero/proto_ring_buffer.h"
 #include "tools/trace_to_text/proto_full_utils.h"
 #include "tools/trace_to_text/trace.descriptor.h"
 #include "tools/trace_to_text/utils.h"
@@ -167,33 +168,67 @@
   const Reflection* reflect = msg->GetReflection();
   const FieldDescriptor* compressed_desc =
       trace_descriptor->FindFieldByNumber(kCompressedPacketFieldDescriptor);
-  std::unique_ptr<Message> compressed_msg_scratch(prototype->New());
-  std::string compressed_packet_scratch;
+
+  std::unique_ptr<Message> compressed_packets_msg(prototype->New());
+  std::string compressed_packets;
 
   TextFormat::Printer printer;
   printer.SetInitialIndentLevel(1);
-  ForEachPacketBlobInTrace(
-      input, [&msg, reflect, compressed_desc, zero_copy_output_ptr,
-              &compressed_packet_scratch, &compressed_msg_scratch,
-              &printer](std::unique_ptr<char[]> buf, size_t size) {
-        if (!msg->ParseFromArray(buf.get(), static_cast<int>(size))) {
-          PERFETTO_ELOG("Skipping invalid packet");
-          return;
-        }
-        if (reflect->HasField(*msg, compressed_desc)) {
-          const auto& compressed_packets = reflect->GetStringReference(
-              *msg, compressed_desc, &compressed_packet_scratch);
-          PrintCompressedPackets(compressed_packets,
-                                 compressed_msg_scratch.get(),
-                                 zero_copy_output_ptr);
-        } else {
-          WriteToZeroCopyOutput(zero_copy_output_ptr, kPacketPrefix,
-                                sizeof(kPacketPrefix) - 1);
-          printer.Print(*msg, zero_copy_output_ptr);
-          WriteToZeroCopyOutput(zero_copy_output_ptr, kPacketSuffix,
-                                sizeof(kPacketSuffix) - 1);
-        }
-      });
+
+  static constexpr size_t kMaxMsgSize = protozero::ProtoRingBuffer::kMaxMsgSize;
+  std::unique_ptr<char> data(new char[kMaxMsgSize]);
+  protozero::ProtoRingBuffer ring_buffer;
+
+  uint32_t packet = 0;
+  size_t bytes_processed = 0;
+  while (!input->eof()) {
+    input->read(data.get(), kMaxMsgSize);
+    if (input->bad() || (input->fail() && !input->eof())) {
+      PERFETTO_ELOG("Failed while reading trace");
+      return 1;
+    }
+    ring_buffer.Append(data.get(), static_cast<size_t>(input->gcount()));
+
+    for (;;) {
+      auto token = ring_buffer.ReadMessage();
+      if (token.fatal_framing_error) {
+        PERFETTO_ELOG("Failed to tokenize trace packet");
+        return 1;
+      }
+      if (!token.valid())
+        break;
+      bytes_processed += token.len;
+
+      if (token.field_id != 1) {
+        PERFETTO_ELOG("Skipping invalid field");
+        continue;
+      }
+
+      if ((packet++ & 0x3f) == 0) {
+        fprintf(stderr, "Processing trace: %8zu KB%c", bytes_processed / 1024,
+                kProgressChar);
+        fflush(stderr);
+      }
+
+      if (!msg->ParseFromArray(token.start, static_cast<int>(token.len))) {
+        PERFETTO_ELOG("Skipping invalid packet");
+        continue;
+      }
+
+      if (reflect->HasField(*msg, compressed_desc)) {
+        compressed_packets = reflect->GetStringReference(*msg, compressed_desc,
+                                                         &compressed_packets);
+        PrintCompressedPackets(compressed_packets, compressed_packets_msg.get(),
+                               zero_copy_output_ptr);
+      } else {
+        WriteToZeroCopyOutput(zero_copy_output_ptr, kPacketPrefix,
+                              sizeof(kPacketPrefix) - 1);
+        printer.Print(*msg, zero_copy_output_ptr);
+        WriteToZeroCopyOutput(zero_copy_output_ptr, kPacketSuffix,
+                              sizeof(kPacketSuffix) - 1);
+      }
+    }
+  }
   return 0;
 }
 
diff --git a/tools/trace_to_text/utils.cc b/tools/trace_to_text/utils.cc
index ff1ae61..fb0505c 100644
--- a/tools/trace_to_text/utils.cc
+++ b/tools/trace_to_text/utils.cc
@@ -48,52 +48,6 @@
 
 }  // namespace
 
-void ForEachPacketBlobInTrace(
-    std::istream* input,
-    const std::function<void(std::unique_ptr<char[]>, size_t)>& f) {
-  size_t bytes_processed = 0;
-  // The trace stream can be very large. We cannot just pass it in one go to
-  // libprotobuf as that will refuse to parse messages > 64MB. However we know
-  // that a trace is merely a sequence of TracePackets. Here we just manually
-  // tokenize the repeated TracePacket messages and parse them individually
-  // using libprotobuf.
-  for (uint32_t i = 0;; i++) {
-    if ((i & 0x3f) == 0) {
-      fprintf(stderr, "Processing trace: %8zu KB%c", bytes_processed / 1024,
-              kProgressChar);
-      fflush(stderr);
-    }
-    // A TracePacket consists in one byte stating its field id and type ...
-    char preamble;
-    input->get(preamble);
-    if (!input->good())
-      break;
-    bytes_processed++;
-    PERFETTO_DCHECK(preamble == 0x0a);  // Field ID:1, type:length delimited.
-
-    // ... a varint stating its size ...
-    uint32_t field_size = 0;
-    uint32_t shift = 0;
-    for (;;) {
-      char c = 0;
-      input->get(c);
-      field_size |= static_cast<uint32_t>(c & 0x7f) << shift;
-      shift += 7;
-      bytes_processed++;
-      if (!(c & 0x80))
-        break;
-    }
-
-    // ... and the actual TracePacket itself.
-    std::unique_ptr<char[]> buf(new char[field_size]);
-    input->read(buf.get(), static_cast<std::streamsize>(field_size));
-    bytes_processed += field_size;
-
-    f(std::move(buf), field_size);
-  }
-}
-
-
 bool ReadTrace(trace_processor::TraceProcessor* tp, std::istream* input) {
   // 1MB chunk size seems the best tradeoff on a MacBook Pro 2013 - i7 2.8 GHz.
   constexpr size_t kChunkSize = 1024 * 1024;
diff --git a/tools/trace_to_text/utils.h b/tools/trace_to_text/utils.h
index f95c3d8..063b891 100644
--- a/tools/trace_to_text/utils.h
+++ b/tools/trace_to_text/utils.h
@@ -53,11 +53,6 @@
 constexpr char kProgressChar = '\r';
 #endif
 
-void ForEachPacketBlobInTrace(
-    std::istream* input,
-    const std::function<void(std::unique_ptr<char[]>, size_t)>&);
-
-
 bool ReadTrace(trace_processor::TraceProcessor* tp, std::istream* input);
 void IngestTraceOrDie(trace_processor::TraceProcessor* tp,
                       const std::string& trace_proto);
diff --git a/ui/src/common/flamegraph_util.ts b/ui/src/common/flamegraph_util.ts
index f083428..2910952 100644
--- a/ui/src/common/flamegraph_util.ts
+++ b/ui/src/common/flamegraph_util.ts
@@ -103,7 +103,8 @@
     mapping: callsite.mapping,
     selfSize: callsite.selfSize,
     merged: callsite.merged,
-    highlighted: callsite.highlighted
+    highlighted: callsite.highlighted,
+    location: callsite.location
   };
 }
 
diff --git a/ui/src/common/state.ts b/ui/src/common/state.ts
index 347c904..e38f4b2 100644
--- a/ui/src/common/state.ts
+++ b/ui/src/common/state.ts
@@ -80,6 +80,7 @@
   mapping: string;
   merged: boolean;
   highlighted: boolean;
+  location?: string;
 }
 
 export interface TraceFileSource {
diff --git a/ui/src/controller/heap_profile_controller.ts b/ui/src/controller/heap_profile_controller.ts
index c424b8f..37af362 100644
--- a/ui/src/controller/heap_profile_controller.ts
+++ b/ui/src/controller/heap_profile_controller.ts
@@ -269,8 +269,9 @@
     const callsites = await this.args.engine.query(
         `SELECT id, IFNULL(DEMANGLE(name), name), IFNULL(parent_id, -1), depth,
         cumulative_size, cumulative_alloc_size, cumulative_count,
-        cumulative_alloc_count, map_name, size, count from ${tableName} ${
-            orderBy}`);
+        cumulative_alloc_count, map_name, size, count,
+        IFNULL(source_file, ''), IFNULL(line_number, -1)
+        from ${tableName} ${orderBy}`);
 
     const flamegraphData: CallsiteInfo[] = new Array();
     const hashToindex: Map<number, number> = new Map();
@@ -286,6 +287,17 @@
           name.toLocaleLowerCase().includes(focusRegex.toLocaleLowerCase());
       const parentId =
           hashToindex.has(+parentHash) ? hashToindex.get(+parentHash)! : -1;
+
+      let location: string|undefined;
+      if (callsites.columns[11].stringValues != null &&
+          /[a-zA-Z]/i.test(callsites.columns[11].stringValues[i])) {
+        location = callsites.columns[11].stringValues[i];
+        if (callsites.columns[12].longValues != null &&
+            callsites.columns[12].longValues[i] !== -1) {
+          location += `:${callsites.columns[12].longValues[i].toString()}`;
+        }
+      }
+
       if (depth === maxDepth - 1) {
         name += ' [tree truncated]';
       }
@@ -302,7 +314,8 @@
         selfSize,
         mapping,
         merged: false,
-        highlighted
+        highlighted,
+        location
       });
     }
     return flamegraphData;
@@ -321,7 +334,7 @@
     return this.cache.getTableName(
         `select id, name, map_name, parent_id, depth, cumulative_size,
           cumulative_alloc_size, cumulative_count, cumulative_alloc_count,
-          size, alloc_size, count, alloc_count
+          size, alloc_size, count, alloc_count, source_file, line_number
           from experimental_flamegraph(${ts}, ${upid}, '${type}') ${
             whereClause}`);
   }
diff --git a/ui/src/controller/track_controller.ts b/ui/src/controller/track_controller.ts
index 75ee194..b3742b3 100644
--- a/ui/src/controller/track_controller.ts
+++ b/ui/src/controller/track_controller.ts
@@ -151,7 +151,7 @@
         globals.state.frontendLocalState.visibleState.resolution;
   }
 
-  // Decides, based on the the length of the trace and the number of rows
+  // Decides, based on the length of the trace and the number of rows
   // provided whether a TrackController subclass should cache its quantized
   // data. Returns the bucket size (in ns) if caching should happen and
   // undefined otherwise.
diff --git a/ui/src/frontend/flamegraph.ts b/ui/src/frontend/flamegraph.ts
index 77e209a..8836c11 100644
--- a/ui/src/frontend/flamegraph.ts
+++ b/ui/src/frontend/flamegraph.ts
@@ -267,19 +267,21 @@
       const offsetPx = 4;
 
       const lines: string[] = [];
-      let lineSplitter: LineSplitter;
-      const nameText = this.getCallsiteName(this.hoveredCallsite);
-      const nameTextSize = ctx.measureText(nameText);
-      lineSplitter =
-          splitIfTooBig(nameText, width - paddingPx, nameTextSize.width);
-      let textWidth = lineSplitter.lineWidth;
-      lines.push(...lineSplitter.lines);
 
-      const mappingText = this.hoveredCallsite.mapping;
-      lineSplitter =
-          splitIfTooBig(mappingText, width, ctx.measureText(mappingText).width);
-      textWidth = Math.max(textWidth, lineSplitter.lineWidth);
-      lines.push(...lineSplitter.lines);
+      let textWidth = this.addToTooltip(
+          this.getCallsiteName(this.hoveredCallsite),
+          width - paddingPx,
+          ctx,
+          lines);
+      if (this.hoveredCallsite.location != null) {
+        textWidth = Math.max(
+            textWidth,
+            this.addToTooltip(
+                this.hoveredCallsite.location, width, ctx, lines));
+      }
+      textWidth = Math.max(
+          textWidth,
+          this.addToTooltip(this.hoveredCallsite.mapping, width, ctx, lines));
 
       if (this.nodeRendering.totalSize !== undefined) {
         const percentage =
@@ -289,10 +291,8 @@
                 this.hoveredCallsite.totalSize,
                 unit,
                 unit === 'B' ? 1024 : 1000)} (${percentage.toFixed(2)}%)`;
-        lineSplitter = splitIfTooBig(
-            totalSizeText, width, ctx.measureText(totalSizeText).width);
-        textWidth = Math.max(textWidth, lineSplitter.lineWidth);
-        lines.push(...lineSplitter.lines);
+        textWidth = Math.max(
+            textWidth, this.addToTooltip(totalSizeText, width, ctx, lines));
       }
 
       if (this.nodeRendering.selfSize !== undefined &&
@@ -304,10 +304,8 @@
                 this.hoveredCallsite.selfSize,
                 unit,
                 unit === 'B' ? 1024 : 1000)} (${selfPercentage.toFixed(2)}%)`;
-        lineSplitter = splitIfTooBig(
-            selfSizeText, width, ctx.measureText(selfSizeText).width);
-        textWidth = Math.max(textWidth, lineSplitter.lineWidth);
-        lines.push(...lineSplitter.lines);
+        textWidth = Math.max(
+            textWidth, this.addToTooltip(selfSizeText, width, ctx, lines));
       }
 
       // Compute a line height as the bounding box height + 50%:
@@ -344,6 +342,15 @@
     }
   }
 
+  private addToTooltip(
+      text: string, width: number, ctx: CanvasRenderingContext2D,
+      lines: string[]): number {
+    const lineSplitter: LineSplitter =
+        splitIfTooBig(text, width, ctx.measureText(text).width);
+    lines.push(...lineSplitter.lines);
+    return lineSplitter.lineWidth;
+  }
+
   private getCallsiteName(value: CallsiteInfo): string {
     return value.name === undefined || value.name === '' ? 'unknown' :
                                                            value.name;
diff --git a/ui/src/tracks/cpu_profile/controller.ts b/ui/src/tracks/cpu_profile/controller.ts
index 070cedb..0042888 100644
--- a/ui/src/tracks/cpu_profile/controller.ts
+++ b/ui/src/tracks/cpu_profile/controller.ts
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-import {slowlyCountRows} from '../../common/query_iterator';
+import {iter, NUM, slowlyCountRows} from '../../common/query_iterator';
 import {
   TrackController,
   trackControllerRegistry
@@ -28,9 +28,13 @@
   static readonly kind = CPU_PROFILE_TRACK_KIND;
   async onBoundsChange(start: number, end: number, resolution: number):
       Promise<Data> {
-    const query = `select id, ts, callsite_id from cpu_profile_stack_sample
-        where utid = ${this.config.utid}
-        order by ts`;
+    const query = `select
+        id,
+        ts,
+        callsite_id as callsiteId
+      from cpu_profile_stack_sample
+      where utid = ${this.config.utid}
+      order by ts`;
 
     const result = await this.query(query);
 
@@ -45,10 +49,11 @@
       callsiteId: new Uint32Array(numRows),
     };
 
-    for (let row = 0; row < numRows; row++) {
-      data.ids[row] = +result.columns[0].longValues![row];
-      data.tsStarts[row] = +result.columns[1].longValues![row];
-      data.callsiteId[row] = +result.columns[2].longValues![row];
+    const it = iter({id: NUM, ts: NUM, callsiteId: NUM}, result);
+    for (let i = 0; it.valid(); it.next(), ++i) {
+      data.ids[i] = it.row.id;
+      data.tsStarts[i] = it.row.ts;
+      data.callsiteId[i] = it.row.callsiteId;
     }
 
     return data;