Improve metatrace, allow to be used in production

Before this CL meta-tracing was based on serializing
events on the fly as JSON and writing them on a file.
This had a number of disadvantages: (1) perf, string
manipulation + write() isn't really nice on fast-paths;
(2) interoperaability: it required an env var and
required loading a separate json file.

This CL improves meta-tracing as follows:
1. Events are stored in a statically allocated
  (.rwdata) ring-buffer in a very efficient way
  using 16 bytes per event.
2. They are serialized into protos asynchronously.
3. They can be injected into the real trace, exposed
   as a data source.

Meta-tracing is organized into three layers:
1. The core base class: it only holds event records in a
   ring-buffer and does NOT deal with serialization.
   This allows any part of the codebase to use
   meta-tracing, even base/ if we'll need it in future.
2. A MetatraceWriter class that is able to write event
   records into the trace using a TraceWriter.
3. a MetatraceDataSource: hooks it up to traced-probes.

The split between 2 and 3 is to allow other components
(e.g. Chrome, heapprofd) to take advantage of meta-tracing
outside of traced_probes.

Bug: 133312949
Test: perfetto_unittests --gtest_filter=Metatrace*

Change-Id: Ife1390e599e8c2ca3f4e1039e73398619dbd1af6
diff --git a/Android.bp b/Android.bp
index 79de486..65ab786 100644
--- a/Android.bp
+++ b/Android.bp
@@ -51,6 +51,7 @@
     ":perfetto_protos_perfetto_trace_ftrace_zero_gen",
     ":perfetto_protos_perfetto_trace_interned_data_zero_gen",
     ":perfetto_protos_perfetto_trace_minimal_lite_gen",
+    ":perfetto_protos_perfetto_trace_perfetto_zero_gen",
     ":perfetto_protos_perfetto_trace_power_zero_gen",
     ":perfetto_protos_perfetto_trace_profiling_zero_gen",
     ":perfetto_protos_perfetto_trace_ps_zero_gen",
@@ -105,6 +106,7 @@
     "src/tracing/core/data_source_config.cc",
     "src/tracing/core/data_source_descriptor.cc",
     "src/tracing/core/id_allocator.cc",
+    "src/tracing/core/metatrace_writer.cc",
     "src/tracing/core/null_trace_writer.cc",
     "src/tracing/core/observable_events.cc",
     "src/tracing/core/packet_stream_validator.cc",
@@ -149,6 +151,7 @@
     "perfetto_protos_perfetto_trace_ftrace_zero_gen_headers",
     "perfetto_protos_perfetto_trace_interned_data_zero_gen_headers",
     "perfetto_protos_perfetto_trace_minimal_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_zero_gen_headers",
     "perfetto_protos_perfetto_trace_power_zero_gen_headers",
     "perfetto_protos_perfetto_trace_profiling_zero_gen_headers",
     "perfetto_protos_perfetto_trace_ps_zero_gen_headers",
@@ -244,6 +247,7 @@
     ":perfetto_protos_perfetto_trace_ftrace_zero_gen",
     ":perfetto_protos_perfetto_trace_interned_data_zero_gen",
     ":perfetto_protos_perfetto_trace_minimal_lite_gen",
+    ":perfetto_protos_perfetto_trace_perfetto_zero_gen",
     ":perfetto_protos_perfetto_trace_power_zero_gen",
     ":perfetto_protos_perfetto_trace_profiling_zero_gen",
     ":perfetto_protos_perfetto_trace_ps_zero_gen",
@@ -304,6 +308,7 @@
     "src/traced/probes/ftrace/ftrace_stats.cc",
     "src/traced/probes/ftrace/page_pool.cc",
     "src/traced/probes/ftrace/proto_translation_table.cc",
+    "src/traced/probes/metatrace/metatrace_data_source.cc",
     "src/traced/probes/packages_list/packages_list_data_source.cc",
     "src/traced/probes/power/android_power_data_source.cc",
     "src/traced/probes/probes.cc",
@@ -319,6 +324,7 @@
     "src/tracing/core/data_source_config.cc",
     "src/tracing/core/data_source_descriptor.cc",
     "src/tracing/core/id_allocator.cc",
+    "src/tracing/core/metatrace_writer.cc",
     "src/tracing/core/null_trace_writer.cc",
     "src/tracing/core/observable_events.cc",
     "src/tracing/core/packet_stream_validator.cc",
@@ -357,6 +363,7 @@
     "perfetto_protos_perfetto_trace_ftrace_zero_gen_headers",
     "perfetto_protos_perfetto_trace_interned_data_zero_gen_headers",
     "perfetto_protos_perfetto_trace_minimal_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_zero_gen_headers",
     "perfetto_protos_perfetto_trace_power_zero_gen_headers",
     "perfetto_protos_perfetto_trace_profiling_zero_gen_headers",
     "perfetto_protos_perfetto_trace_ps_zero_gen_headers",
@@ -421,6 +428,7 @@
     ":perfetto_protos_perfetto_trace_ftrace_zero_gen",
     ":perfetto_protos_perfetto_trace_interned_data_zero_gen",
     ":perfetto_protos_perfetto_trace_minimal_lite_gen",
+    ":perfetto_protos_perfetto_trace_perfetto_zero_gen",
     ":perfetto_protos_perfetto_trace_power_zero_gen",
     ":perfetto_protos_perfetto_trace_profiling_zero_gen",
     ":perfetto_protos_perfetto_trace_ps_zero_gen",
@@ -462,6 +470,7 @@
     "src/tracing/core/data_source_config.cc",
     "src/tracing/core/data_source_descriptor.cc",
     "src/tracing/core/id_allocator.cc",
+    "src/tracing/core/metatrace_writer.cc",
     "src/tracing/core/null_trace_writer.cc",
     "src/tracing/core/observable_events.cc",
     "src/tracing/core/packet_stream_validator.cc",
@@ -477,6 +486,7 @@
     "src/tracing/core/trace_stats.cc",
     "src/tracing/core/trace_writer_impl.cc",
     "src/tracing/core/tracing_service_impl.cc",
+    "src/tracing/core/tracing_service_state.cc",
     "src/tracing/core/virtual_destructors.cc",
     "src/tracing/data_source.cc",
     "src/tracing/internal/in_process_tracing_backend.cc",
@@ -509,6 +519,7 @@
     "perfetto_protos_perfetto_trace_ftrace_zero_gen_headers",
     "perfetto_protos_perfetto_trace_interned_data_zero_gen_headers",
     "perfetto_protos_perfetto_trace_minimal_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_zero_gen_headers",
     "perfetto_protos_perfetto_trace_power_zero_gen_headers",
     "perfetto_protos_perfetto_trace_profiling_zero_gen_headers",
     "perfetto_protos_perfetto_trace_ps_zero_gen_headers",
@@ -530,6 +541,7 @@
     "perfetto_protos_perfetto_trace_ftrace_zero_gen_headers",
     "perfetto_protos_perfetto_trace_interned_data_zero_gen_headers",
     "perfetto_protos_perfetto_trace_minimal_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_zero_gen_headers",
     "perfetto_protos_perfetto_trace_power_zero_gen_headers",
     "perfetto_protos_perfetto_trace_profiling_zero_gen_headers",
     "perfetto_protos_perfetto_trace_ps_zero_gen_headers",
@@ -564,6 +576,7 @@
     ":perfetto_protos_perfetto_trace_ftrace_zero_gen",
     ":perfetto_protos_perfetto_trace_interned_data_zero_gen",
     ":perfetto_protos_perfetto_trace_minimal_lite_gen",
+    ":perfetto_protos_perfetto_trace_perfetto_zero_gen",
     ":perfetto_protos_perfetto_trace_power_zero_gen",
     ":perfetto_protos_perfetto_trace_profiling_zero_gen",
     ":perfetto_protos_perfetto_trace_ps_zero_gen",
@@ -613,6 +626,7 @@
     "src/tracing/core/data_source_config.cc",
     "src/tracing/core/data_source_descriptor.cc",
     "src/tracing/core/id_allocator.cc",
+    "src/tracing/core/metatrace_writer.cc",
     "src/tracing/core/null_trace_writer.cc",
     "src/tracing/core/observable_events.cc",
     "src/tracing/core/packet_stream_validator.cc",
@@ -656,6 +670,7 @@
     "perfetto_protos_perfetto_trace_ftrace_zero_gen_headers",
     "perfetto_protos_perfetto_trace_interned_data_zero_gen_headers",
     "perfetto_protos_perfetto_trace_minimal_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_zero_gen_headers",
     "perfetto_protos_perfetto_trace_power_zero_gen_headers",
     "perfetto_protos_perfetto_trace_profiling_zero_gen_headers",
     "perfetto_protos_perfetto_trace_ps_zero_gen_headers",
@@ -722,6 +737,8 @@
     ":perfetto_protos_perfetto_trace_interned_data_zero_gen",
     ":perfetto_protos_perfetto_trace_lite_gen",
     ":perfetto_protos_perfetto_trace_minimal_lite_gen",
+    ":perfetto_protos_perfetto_trace_perfetto_lite_gen",
+    ":perfetto_protos_perfetto_trace_perfetto_zero_gen",
     ":perfetto_protos_perfetto_trace_power_lite_gen",
     ":perfetto_protos_perfetto_trace_power_zero_gen",
     ":perfetto_protos_perfetto_trace_profiling_lite_gen",
@@ -807,6 +824,7 @@
     "src/traced/probes/ftrace/page_pool.cc",
     "src/traced/probes/ftrace/proto_translation_table.cc",
     "src/traced/probes/ftrace/test/cpu_reader_support.cc",
+    "src/traced/probes/metatrace/metatrace_data_source.cc",
     "src/traced/probes/packages_list/packages_list_data_source.cc",
     "src/traced/probes/power/android_power_data_source.cc",
     "src/traced/probes/probes_data_source.cc",
@@ -819,6 +837,7 @@
     "src/tracing/core/data_source_config.cc",
     "src/tracing/core/data_source_descriptor.cc",
     "src/tracing/core/id_allocator.cc",
+    "src/tracing/core/metatrace_writer.cc",
     "src/tracing/core/null_trace_writer.cc",
     "src/tracing/core/observable_events.cc",
     "src/tracing/core/packet_stream_validator.cc",
@@ -880,6 +899,8 @@
     "perfetto_protos_perfetto_trace_interned_data_zero_gen_headers",
     "perfetto_protos_perfetto_trace_lite_gen_headers",
     "perfetto_protos_perfetto_trace_minimal_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_zero_gen_headers",
     "perfetto_protos_perfetto_trace_power_lite_gen_headers",
     "perfetto_protos_perfetto_trace_power_zero_gen_headers",
     "perfetto_protos_perfetto_trace_profiling_lite_gen_headers",
@@ -1988,6 +2009,74 @@
   ],
 }
 
+// GN target: //protos/perfetto/trace/perfetto:lite_gen
+genrule {
+  name: "perfetto_protos_perfetto_trace_perfetto_lite_gen",
+  srcs: [
+    "protos/perfetto/trace/perfetto/perfetto_metatrace.proto",
+  ],
+  tools: [
+    "aprotoc",
+  ],
+  cmd: "mkdir -p $(genDir)/external/perfetto/protos && $(location aprotoc) --cpp_out=$(genDir)/external/perfetto/protos --proto_path=external/perfetto/protos $(in)",
+  out: [
+    "external/perfetto/protos/perfetto/trace/perfetto/perfetto_metatrace.pb.cc",
+  ],
+}
+
+// GN target: //protos/perfetto/trace/perfetto:lite_gen
+genrule {
+  name: "perfetto_protos_perfetto_trace_perfetto_lite_gen_headers",
+  srcs: [
+    "protos/perfetto/trace/perfetto/perfetto_metatrace.proto",
+  ],
+  tools: [
+    "aprotoc",
+  ],
+  cmd: "mkdir -p $(genDir)/external/perfetto/protos && $(location aprotoc) --cpp_out=$(genDir)/external/perfetto/protos --proto_path=external/perfetto/protos $(in)",
+  out: [
+    "external/perfetto/protos/perfetto/trace/perfetto/perfetto_metatrace.pb.h",
+  ],
+  export_include_dirs: [
+    "protos",
+  ],
+}
+
+// GN target: //protos/perfetto/trace/perfetto:zero_gen
+genrule {
+  name: "perfetto_protos_perfetto_trace_perfetto_zero_gen",
+  srcs: [
+    "protos/perfetto/trace/perfetto/perfetto_metatrace.proto",
+  ],
+  tools: [
+    "aprotoc",
+    "perfetto_src_protozero_protoc_plugin_protoc_plugin___gn_standalone_toolchain_gcc_like_host_",
+  ],
+  cmd: "mkdir -p $(genDir)/external/perfetto/protos && $(location aprotoc) --cpp_out=$(genDir)/external/perfetto/protos --proto_path=external/perfetto/protos --plugin=protoc-gen-plugin=$(location perfetto_src_protozero_protoc_plugin_protoc_plugin___gn_standalone_toolchain_gcc_like_host_) --plugin_out=wrapper_namespace=pbzero:$(genDir)/external/perfetto/protos $(in)",
+  out: [
+    "external/perfetto/protos/perfetto/trace/perfetto/perfetto_metatrace.pbzero.cc",
+  ],
+}
+
+// GN target: //protos/perfetto/trace/perfetto:zero_gen
+genrule {
+  name: "perfetto_protos_perfetto_trace_perfetto_zero_gen_headers",
+  srcs: [
+    "protos/perfetto/trace/perfetto/perfetto_metatrace.proto",
+  ],
+  tools: [
+    "aprotoc",
+    "perfetto_src_protozero_protoc_plugin_protoc_plugin___gn_standalone_toolchain_gcc_like_host_",
+  ],
+  cmd: "mkdir -p $(genDir)/external/perfetto/protos && $(location aprotoc) --cpp_out=$(genDir)/external/perfetto/protos --proto_path=external/perfetto/protos --plugin=protoc-gen-plugin=$(location perfetto_src_protozero_protoc_plugin_protoc_plugin___gn_standalone_toolchain_gcc_like_host_) --plugin_out=wrapper_namespace=pbzero:$(genDir)/external/perfetto/protos $(in)",
+  out: [
+    "external/perfetto/protos/perfetto/trace/perfetto/perfetto_metatrace.pbzero.h",
+  ],
+  export_include_dirs: [
+    "protos",
+  ],
+}
+
 // GN target: //protos/perfetto/trace/power:lite_gen
 genrule {
   name: "perfetto_protos_perfetto_trace_power_lite_gen",
@@ -2862,6 +2951,7 @@
     ":perfetto_protos_perfetto_trace_ftrace_zero_gen",
     ":perfetto_protos_perfetto_trace_interned_data_zero_gen",
     ":perfetto_protos_perfetto_trace_minimal_lite_gen",
+    ":perfetto_protos_perfetto_trace_perfetto_zero_gen",
     ":perfetto_protos_perfetto_trace_power_zero_gen",
     ":perfetto_protos_perfetto_trace_profiling_zero_gen",
     ":perfetto_protos_perfetto_trace_ps_zero_gen",
@@ -2903,6 +2993,7 @@
     "src/tracing/core/data_source_config.cc",
     "src/tracing/core/data_source_descriptor.cc",
     "src/tracing/core/id_allocator.cc",
+    "src/tracing/core/metatrace_writer.cc",
     "src/tracing/core/null_trace_writer.cc",
     "src/tracing/core/observable_events.cc",
     "src/tracing/core/packet_stream_validator.cc",
@@ -2947,6 +3038,7 @@
     "perfetto_protos_perfetto_trace_ftrace_zero_gen_headers",
     "perfetto_protos_perfetto_trace_interned_data_zero_gen_headers",
     "perfetto_protos_perfetto_trace_minimal_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_zero_gen_headers",
     "perfetto_protos_perfetto_trace_power_zero_gen_headers",
     "perfetto_protos_perfetto_trace_profiling_zero_gen_headers",
     "perfetto_protos_perfetto_trace_ps_zero_gen_headers",
@@ -2968,6 +3060,7 @@
     "perfetto_protos_perfetto_trace_ftrace_zero_gen_headers",
     "perfetto_protos_perfetto_trace_interned_data_zero_gen_headers",
     "perfetto_protos_perfetto_trace_minimal_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_zero_gen_headers",
     "perfetto_protos_perfetto_trace_power_zero_gen_headers",
     "perfetto_protos_perfetto_trace_profiling_zero_gen_headers",
     "perfetto_protos_perfetto_trace_ps_zero_gen_headers",
@@ -3000,6 +3093,7 @@
     ":perfetto_protos_perfetto_trace_interned_data_lite_gen",
     ":perfetto_protos_perfetto_trace_lite_gen",
     ":perfetto_protos_perfetto_trace_minimal_lite_gen",
+    ":perfetto_protos_perfetto_trace_perfetto_lite_gen",
     ":perfetto_protos_perfetto_trace_power_lite_gen",
     ":perfetto_protos_perfetto_trace_profiling_lite_gen",
     ":perfetto_protos_perfetto_trace_ps_lite_gen",
@@ -3023,6 +3117,7 @@
     "perfetto_protos_perfetto_trace_interned_data_lite_gen_headers",
     "perfetto_protos_perfetto_trace_lite_gen_headers",
     "perfetto_protos_perfetto_trace_minimal_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_lite_gen_headers",
     "perfetto_protos_perfetto_trace_power_lite_gen_headers",
     "perfetto_protos_perfetto_trace_profiling_lite_gen_headers",
     "perfetto_protos_perfetto_trace_ps_lite_gen_headers",
@@ -3039,6 +3134,7 @@
     "perfetto_protos_perfetto_trace_interned_data_lite_gen_headers",
     "perfetto_protos_perfetto_trace_lite_gen_headers",
     "perfetto_protos_perfetto_trace_minimal_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_lite_gen_headers",
     "perfetto_protos_perfetto_trace_power_lite_gen_headers",
     "perfetto_protos_perfetto_trace_profiling_lite_gen_headers",
     "perfetto_protos_perfetto_trace_ps_lite_gen_headers",
@@ -3075,6 +3171,8 @@
     ":perfetto_protos_perfetto_trace_interned_data_zero_gen",
     ":perfetto_protos_perfetto_trace_lite_gen",
     ":perfetto_protos_perfetto_trace_minimal_lite_gen",
+    ":perfetto_protos_perfetto_trace_perfetto_lite_gen",
+    ":perfetto_protos_perfetto_trace_perfetto_zero_gen",
     ":perfetto_protos_perfetto_trace_power_lite_gen",
     ":perfetto_protos_perfetto_trace_power_zero_gen",
     ":perfetto_protos_perfetto_trace_profiling_lite_gen",
@@ -3099,6 +3197,7 @@
     "src/base/event.cc",
     "src/base/file_utils.cc",
     "src/base/metatrace.cc",
+    "src/base/metatrace_unittest.cc",
     "src/base/no_destructor_unittest.cc",
     "src/base/optional_unittest.cc",
     "src/base/paged_memory.cc",
@@ -3230,6 +3329,7 @@
     "src/traced/probes/ftrace/proto_translation_table.cc",
     "src/traced/probes/ftrace/proto_translation_table_unittest.cc",
     "src/traced/probes/ftrace/test/cpu_reader_support.cc",
+    "src/traced/probes/metatrace/metatrace_data_source.cc",
     "src/traced/probes/packages_list/packages_list_data_source.cc",
     "src/traced/probes/packages_list/packages_list_data_source_unittest.cc",
     "src/traced/probes/power/android_power_data_source.cc",
@@ -3248,6 +3348,7 @@
     "src/tracing/core/data_source_descriptor.cc",
     "src/tracing/core/id_allocator.cc",
     "src/tracing/core/id_allocator_unittest.cc",
+    "src/tracing/core/metatrace_writer.cc",
     "src/tracing/core/null_trace_writer.cc",
     "src/tracing/core/null_trace_writer_unittest.cc",
     "src/tracing/core/observable_events.cc",
@@ -3326,6 +3427,8 @@
     "perfetto_protos_perfetto_trace_interned_data_zero_gen_headers",
     "perfetto_protos_perfetto_trace_lite_gen_headers",
     "perfetto_protos_perfetto_trace_minimal_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_zero_gen_headers",
     "perfetto_protos_perfetto_trace_power_lite_gen_headers",
     "perfetto_protos_perfetto_trace_power_zero_gen_headers",
     "perfetto_protos_perfetto_trace_profiling_lite_gen_headers",
@@ -3383,6 +3486,8 @@
     ":perfetto_protos_perfetto_trace_interned_data_zero_gen",
     ":perfetto_protos_perfetto_trace_lite_gen",
     ":perfetto_protos_perfetto_trace_minimal_lite_gen",
+    ":perfetto_protos_perfetto_trace_perfetto_lite_gen",
+    ":perfetto_protos_perfetto_trace_perfetto_zero_gen",
     ":perfetto_protos_perfetto_trace_power_lite_gen",
     ":perfetto_protos_perfetto_trace_power_zero_gen",
     ":perfetto_protos_perfetto_trace_processor_metrics_impl_zero_gen",
@@ -3505,6 +3610,8 @@
     "perfetto_protos_perfetto_trace_interned_data_zero_gen_headers",
     "perfetto_protos_perfetto_trace_lite_gen_headers",
     "perfetto_protos_perfetto_trace_minimal_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_zero_gen_headers",
     "perfetto_protos_perfetto_trace_power_lite_gen_headers",
     "perfetto_protos_perfetto_trace_power_zero_gen_headers",
     "perfetto_protos_perfetto_trace_processor_metrics_impl_zero_gen_headers",
@@ -3587,6 +3694,7 @@
     ":perfetto_protos_perfetto_trace_ftrace_zero_gen",
     ":perfetto_protos_perfetto_trace_interned_data_zero_gen",
     ":perfetto_protos_perfetto_trace_minimal_lite_gen",
+    ":perfetto_protos_perfetto_trace_perfetto_zero_gen",
     ":perfetto_protos_perfetto_trace_power_zero_gen",
     ":perfetto_protos_perfetto_trace_profiling_zero_gen",
     ":perfetto_protos_perfetto_trace_ps_zero_gen",
@@ -3632,6 +3740,7 @@
     "src/tracing/core/data_source_config.cc",
     "src/tracing/core/data_source_descriptor.cc",
     "src/tracing/core/id_allocator.cc",
+    "src/tracing/core/metatrace_writer.cc",
     "src/tracing/core/null_trace_writer.cc",
     "src/tracing/core/observable_events.cc",
     "src/tracing/core/packet_stream_validator.cc",
@@ -3671,6 +3780,7 @@
     "perfetto_protos_perfetto_trace_ftrace_zero_gen_headers",
     "perfetto_protos_perfetto_trace_interned_data_zero_gen_headers",
     "perfetto_protos_perfetto_trace_minimal_lite_gen_headers",
+    "perfetto_protos_perfetto_trace_perfetto_zero_gen_headers",
     "perfetto_protos_perfetto_trace_power_zero_gen_headers",
     "perfetto_protos_perfetto_trace_profiling_zero_gen_headers",
     "perfetto_protos_perfetto_trace_ps_zero_gen_headers",
diff --git a/BUILD b/BUILD
index b93362b..3a624d7 100644
--- a/BUILD
+++ b/BUILD
@@ -332,6 +332,7 @@
         "//third_party/perfetto/protos:trace_filesystem_zero_cc_proto",
         "//third_party/perfetto/protos:trace_ftrace_zero_cc_proto",
         "//third_party/perfetto/protos:trace_interned_data_zero_cc_proto",
+        "//third_party/perfetto/protos:trace_perfetto_zero_cc_proto",
         "//third_party/perfetto/protos:trace_power_zero_cc_proto",
         "//third_party/perfetto/protos:trace_processor_metrics_impl_zero_cc_proto",
         "//third_party/perfetto/protos:trace_profiling_zero_cc_proto",
@@ -562,6 +563,7 @@
         "//third_party/perfetto/protos:trace_filesystem_zero_cc_proto",
         "//third_party/perfetto/protos:trace_ftrace_zero_cc_proto",
         "//third_party/perfetto/protos:trace_interned_data_zero_cc_proto",
+        "//third_party/perfetto/protos:trace_perfetto_zero_cc_proto",
         "//third_party/perfetto/protos:trace_power_zero_cc_proto",
         "//third_party/perfetto/protos:trace_processor_metrics_impl_zero_cc_proto",
         "//third_party/perfetto/protos:trace_profiling_zero_cc_proto",
@@ -813,6 +815,8 @@
         "//third_party/perfetto/protos:trace_interned_data_cc_proto",
         "//third_party/perfetto/protos:trace_interned_data_zero_cc_proto",
         "//third_party/perfetto/protos:trace_minimal_cc_proto",
+        "//third_party/perfetto/protos:trace_perfetto_cc_proto",
+        "//third_party/perfetto/protos:trace_perfetto_zero_cc_proto",
         "//third_party/perfetto/protos:trace_power_cc_proto",
         "//third_party/perfetto/protos:trace_power_zero_cc_proto",
         "//third_party/perfetto/protos:trace_processor_metrics_impl_zero_cc_proto",
diff --git a/include/perfetto/ext/base/metatrace.h b/include/perfetto/ext/base/metatrace.h
index c41e0ee..4345b49 100644
--- a/include/perfetto/ext/base/metatrace.h
+++ b/include/perfetto/ext/base/metatrace.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018 The Android Open Source Project
+ * Copyright (C) 2019 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,53 +17,284 @@
 #ifndef INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
 #define INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
 
-#include <string.h>
-
+#include <array>
+#include <atomic>
+#include <functional>
 #include <string>
 
 #include "perfetto/base/logging.h"
+#include "perfetto/ext/base/metatrace_events.h"
+#include "perfetto/ext/base/thread_utils.h"
+#include "perfetto/ext/base/time.h"
 #include "perfetto/ext/base/utils.h"
 
+// A facility to trace execution of the perfetto codebase itself.
+// The meta-tracing framework is organized into three layers:
+//
+// 1. A static ring-buffer in base/ (this file) that supports concurrent writes
+//    and a single reader.
+//    The responsibility of this layer is to store events and counters as
+//    efficiently as possible without re-entering any tracing code.
+//    This is really a static-storage-based ring-buffer based on a POD array.
+//    This layer does NOT deal with serializing the meta-trace buffer.
+//    It posts a task when it's half full and expects something outside of
+//    base/ to drain the ring-buffer and serialize it, eventually writing it
+//    into the trace itself, before it gets 100% full.
+//
+// 2. A class in tracing/core which takes care of serializing the meta-trace
+//    buffer into the trace using a TraceWriter. See metatrace_writer.h .
+//
+// 3. A data source in traced_probes that, when be enabled via the trace config,
+//    injects metatrace events into the trace. See metatrace_data_source.h .
+//
+// The available events and tags are defined in metatrace_events.h .
+
 namespace perfetto {
+
 namespace base {
+class TaskRunner;
+}  // namespace base
 
-class MetaTrace {
- public:
-  static constexpr uint32_t kMainThreadCpu = 255;
+namespace metatrace {
 
-  MetaTrace(const char* evt_name, size_t cpu) : evt_name_(evt_name), cpu_(cpu) {
-    WriteEvent('B', evt_name, cpu);
+// Meta-tracing is organized in "tags" that can be selectively enabled. This is
+// to enable meta-tracing only of one sub-system. This word has one "enabled"
+// bit for each tag. 0 -> meta-tracing off.
+extern std::atomic<uint32_t> g_enabled_tags;
+
+// Time of the Enable() call. Used as a reference for keeping delta timestmaps
+// in Record.
+extern std::atomic<uint64_t> g_enabled_timestamp;
+
+// Enables meta-tracing for one or more tags. Once enabled it will discard any
+// futher Enable() calls and return false until disabled,
+// |read_task| is a closure that will be called enqueued |task_runner| when the
+// meta-tracing ring buffer is half full. The task is expected to read the ring
+// buffer using RingBuffer::GetReadIterator() and serialize the contents onto a
+// file or into the trace itself.
+// Must be called on the |task_runner| passed.
+// |task_runner| must have static lifetime.
+bool Enable(std::function<void()> read_task, base::TaskRunner*, uint32_t tags);
+
+// Disables meta-tracing.
+// Must be called on the same |task_runner| as Enable().
+void Disable();
+
+inline uint64_t TraceTimeNowNs() {
+  return static_cast<uint64_t>(base::GetBootTimeNs().count());
+}
+
+// Holds the data for a metatrace event or counter.
+struct Record {
+  static constexpr uint16_t kTypeMask = 0x8000;
+  static constexpr uint16_t kTypeCounter = 0x8000;
+  static constexpr uint16_t kTypeEvent = 0;
+
+  uint64_t timestamp_ns() const {
+    auto base_ns = g_enabled_timestamp.load(std::memory_order_relaxed);
+    PERFETTO_DCHECK(base_ns);
+    return base_ns + ((static_cast<uint64_t>(timestamp_ns_high) << 32) |
+                      timestamp_ns_low);
   }
 
-  MetaTrace(const std::string& str, size_t cpu)
-      : str_copy_(str), evt_name_(str_copy_.c_str()), cpu_(cpu) {
-    WriteEvent('B', evt_name_, cpu);
+  void set_timestamp(uint64_t ts) {
+    auto t_start = g_enabled_timestamp.load(std::memory_order_relaxed);
+    uint64_t diff = ts - t_start;
+    PERFETTO_DCHECK(diff < (1ull << 48));
+    timestamp_ns_low = static_cast<uint32_t>(diff);
+    timestamp_ns_high = static_cast<uint16_t>(diff >> 32);
   }
 
-  ~MetaTrace() { WriteEvent('E', evt_name_, cpu_); }
+  // This field holds the type (counter vs event) in the MSB and event ID (as
+  // defined in metatrace_events.h) in the lowest 15 bits. It is also used also
+  // as a linearization point: this is always written after all the other
+  // fields with a release-store. This is so the reader can determine whether it
+  // can safely process the other event fields after a load-acquire.
+  std::atomic<uint16_t> type_and_id;
 
- private:
-  MetaTrace(const MetaTrace&) = delete;
-  MetaTrace& operator=(const MetaTrace&) = delete;
+  // Timestamp is stored as a 48-bits value diffed against g_enabled_timestamp.
+  // This gives us 78 hours from Enabled().
+  uint16_t timestamp_ns_high;
+  uint32_t timestamp_ns_low;
 
-  void WriteEvent(char type, const char* evt_name, size_t cpu);
+  uint32_t thread_id;
 
-  std::string str_copy_;
-  const char* const evt_name_;
-  const size_t cpu_;
+  union {
+    uint32_t duration_ns;   // If type == event.
+    int32_t counter_value;  // If type == counter.
+  };
 };
 
+// Hold the meta-tracing data into a statically allocated array.
+// This class uses static storage (as opposite to being a singleton) to:
+// - Have the guarantee of always valid storage, so that meta-tracing can be
+//   safely used in any part of the codebase, including base/ itself.
+// - Avoid barriers that thread-safe static locals would require.
+class RingBuffer {
+ public:
+  static constexpr size_t kCapacity = 4096;  // 4096 * 16 bytes = 64K.
+
+  // This iterator is not idempotent and will bump the read index in the buffer
+  // at the end of the reads. There can be only one reader at any time.
+  // Usage: for (auto it = RingBuffer::GetReadIterator(); it; ++it) { it->... }
+  class ReadIterator {
+   public:
+    ReadIterator(ReadIterator&& other) {
+      PERFETTO_DCHECK(other.valid_);
+      cur_ = other.cur_;
+      end_ = other.end_;
+      valid_ = other.valid_;
+      other.valid_ = false;
+    }
+
+    ~ReadIterator() {
+      if (!valid_)
+        return;
+      PERFETTO_DCHECK(cur_ >= RingBuffer::rd_index_);
+      PERFETTO_DCHECK(cur_ <= RingBuffer::wr_index_);
+      RingBuffer::rd_index_.store(cur_, std::memory_order_release);
+    }
+
+    explicit operator bool() const { return cur_ < end_; }
+    const Record* operator->() const { return RingBuffer::At(cur_); }
+    const Record& operator*() const { return *operator->(); }
+
+    // This is for ++it. it++ is deliberately not supported.
+    ReadIterator& operator++() {
+      PERFETTO_DCHECK(cur_ < end_);
+      // Once a record has been read, mark it as free clearing its type_and_id,
+      // so if we encounter it in another read iteration while being written
+      // we know it's not fully written yet.
+      // The memory_order_relaxed below is enough because:
+      // - The reader is single-threaded and doesn't re-read the same records.
+      // - Before starting a read batch, the reader has an acquire barrier on
+      //   |rd_index_|.
+      // - After terminating a read batch, the ~ReadIterator dtor updates the
+      //   |rd_index_| with a release-store.
+      // - Reader and writer are typically kCapacity/2 apart. So unless an
+      //   overrun happens a writer won't reuse a newly released record any time
+      //   soon. If an overrun happens, everything is busted regardless.
+      At(cur_)->type_and_id.store(0, std::memory_order_relaxed);
+      ++cur_;
+      return *this;
+    }
+
+   private:
+    friend class RingBuffer;
+    ReadIterator(uint64_t begin, uint64_t end)
+        : cur_(begin), end_(end), valid_(true) {}
+    ReadIterator& operator=(const ReadIterator&) = delete;
+    ReadIterator(const ReadIterator&) = delete;
+
+    uint64_t cur_;
+    uint64_t end_;
+    bool valid_;
+  };
+
+  static Record* At(uint64_t index) {
+    // Doesn't really have to be pow2, but if not the compiler will emit
+    // arithmetic operations to compute the modulo instead of a bitwise AND.
+    static_assert(!(kCapacity & (kCapacity - 1)), "kCapacity must be pow2");
+    PERFETTO_DCHECK(index >= rd_index_);
+    PERFETTO_DCHECK(index <= wr_index_);
+    return &records_[index % kCapacity];
+  }
+
+  // Must be called on the same task runner passed to Enable()
+  static ReadIterator GetReadIterator() {
+    PERFETTO_DCHECK(RingBuffer::IsOnValidTaskRunner());
+    return ReadIterator(rd_index_.load(std::memory_order_acquire),
+                        wr_index_.load(std::memory_order_acquire));
+  }
+
+  static Record* AppendNewRecord();
+  static void Reset();
+
+  static bool has_overruns() {
+    return has_overruns_.load(std::memory_order_acquire);
+  }
+
+  // Can temporarily return a value >= kCapacity but is eventually consistent.
+  // This would happen in case of overruns until threads hit the --wr_index_
+  // in AppendNewRecord().
+  static uint64_t GetSizeForTesting() {
+    auto wr_index = wr_index_.load(std::memory_order_relaxed);
+    auto rd_index = rd_index_.load(std::memory_order_relaxed);
+    PERFETTO_DCHECK(wr_index >= rd_index);
+    return wr_index - rd_index;
+  }
+
+ private:
+  friend class ReadIterator;
+
+  // Returns true if the caller is on the task runner passed to Enable().
+  // Used only for DCHECKs.
+  static bool IsOnValidTaskRunner();
+
+  static std::array<Record, kCapacity> records_;
+  static std::atomic<bool> read_task_queued_;
+  static std::atomic<uint64_t> wr_index_;
+  static std::atomic<uint64_t> rd_index_;
+  static std::atomic<bool> has_overruns_;
+  static Record bankruptcy_record_;  // Used in case of overruns.
+};
+
+inline void TraceCounter(uint32_t tag, uint16_t id, int32_t value) {
+  // memory_order_relaxed is okay because the storage has static lifetime.
+  // It is safe to accidentally log an event soon after disabling.
+  auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
+  if (PERFETTO_LIKELY((enabled_tags & tag) == 0))
+    return;
+  Record* record = RingBuffer::AppendNewRecord();
+  record->thread_id = static_cast<uint32_t>(base::GetThreadId());
+  record->set_timestamp(TraceTimeNowNs());
+  record->counter_value = value;
+  record->type_and_id.store(Record::kTypeCounter | id,
+                            std::memory_order_release);
+}
+
+class ScopedEvent {
+ public:
+  ScopedEvent(uint32_t tag, uint16_t event_id) {
+    auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
+    if (PERFETTO_LIKELY((enabled_tags & tag) == 0))
+      return;
+    event_id_ = event_id;
+    record_ = RingBuffer::AppendNewRecord();
+    record_->thread_id = static_cast<uint32_t>(base::GetThreadId());
+    record_->set_timestamp(TraceTimeNowNs());
+  }
+
+  ~ScopedEvent() {
+    if (PERFETTO_LIKELY(!record_))
+      return;
+    auto now = TraceTimeNowNs();
+    record_->duration_ns = static_cast<uint32_t>(now - record_->timestamp_ns());
+    record_->type_and_id.store(Record::kTypeEvent | event_id_,
+                               std::memory_order_release);
+  }
+
+ private:
+  Record* record_ = nullptr;
+  uint16_t event_id_ = 0;
+  ScopedEvent(const ScopedEvent&) = delete;
+  ScopedEvent& operator=(const ScopedEvent&) = delete;
+};
+
+// Boilerplate to derive a unique variable name for the event.
 #define PERFETTO_METATRACE_UID2(a, b) a##b
 #define PERFETTO_METATRACE_UID(x) PERFETTO_METATRACE_UID2(metatrace_, x)
-#if PERFETTO_DCHECK_IS_ON() && PERFETTO_BUILDFLAG(PERFETTO_STANDALONE_BUILD)
 
-#define PERFETTO_METATRACE(...) \
-  ::perfetto::base::MetaTrace PERFETTO_METATRACE_UID(__COUNTER__)(__VA_ARGS__)
-#else
-#define PERFETTO_METATRACE(...) ::perfetto::base::ignore_result(__VA_ARGS__)
-#endif
+#define PERFETTO_METATRACE_SCOPED(TAG, ID)                                \
+  ::perfetto::metatrace::ScopedEvent PERFETTO_METATRACE_UID(__COUNTER__)( \
+      ::perfetto::metatrace::TAG, ::perfetto::metatrace::ID)
 
-}  // namespace base
+#define PERFETTO_METATRACE_COUNTER(TAG, ID, VALUE)                \
+  ::perfetto::metatrace::TraceCounter(::perfetto::metatrace::TAG, \
+                                      ::perfetto::metatrace::ID,  \
+                                      static_cast<int32_t>(VALUE))
+
+}  // namespace metatrace
 }  // namespace perfetto
 
 #endif  // INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
diff --git a/include/perfetto/ext/base/metatrace_events.h b/include/perfetto/ext/base/metatrace_events.h
new file mode 100644
index 0000000..9814e3e
--- /dev/null
+++ b/include/perfetto/ext/base/metatrace_events.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_EXT_BASE_METATRACE_EVENTS_H_
+#define INCLUDE_PERFETTO_EXT_BASE_METATRACE_EVENTS_H_
+
+namespace perfetto {
+namespace metatrace {
+
+enum Tags : uint32_t {
+  TAG_NONE = 0,
+  TAG_ANY = uint32_t(-1),
+  TAG_FTRACE = 1 << 0,
+  TAG_PROC_POLLERS = 1 << 1,
+};
+
+// Compile time list of parsing and processing stats.
+// The macros below generate matching enums and arrays of string literals.
+// This is to avoid maintaining string maps manually.
+
+// clang-format off
+
+// DO NOT remove or reshuffle items in this list, only append. The ID of these
+// events are an ABI, the trace processor relies on these to open old traces.
+#define PERFETTO_METATRACE_EVENTS(F) \
+  F(EVENT_ZERO_UNUSED),\
+  F(FTRACE_CPU_READER_READ), \
+  F(FTRACE_DRAIN_CPUS), \
+  F(FTRACE_UNBLOCK_READERS), \
+  F(FTRACE_CPU_READ_NONBLOCK), \
+  F(FTRACE_CPU_READ_BLOCK), \
+  F(FTRACE_CPU_SPLICE_NONBLOCK), \
+  F(FTRACE_CPU_SPLICE_BLOCK), \
+  F(FTRACE_CPU_WAIT_CMD), \
+  F(FTRACE_CPU_RUN_CYCLE), \
+  F(FTRACE_CPU_FLUSH), \
+  F(FTRACE_CPU_DRAIN), \
+  F(READ_SYS_STATS), \
+  F(PS_WRITE_ALL_PROCESSES), \
+  F(PS_ON_PIDS), \
+  F(PS_ON_RENAME_PIDS), \
+  F(PS_WRITE_ALL_PROCESS_STATS)
+
+// Append only, see above.
+#define PERFETTO_METATRACE_COUNTERS(F) \
+  F(COUNTER_ZERO_UNUSED),\
+  F(FTRACE_PAGES_DRAINED), \
+  F(PS_PIDS_SCANNED)
+
+// clang-format on
+
+#define PERFETTO_METATRACE_IDENTITY(name) name
+#define PERFETTO_METATRACE_TOSTRING(name) #name
+
+enum Events : uint16_t {
+  PERFETTO_METATRACE_EVENTS(PERFETTO_METATRACE_IDENTITY),
+  EVENTS_MAX
+};
+constexpr char const* kEventNames[] = {
+    PERFETTO_METATRACE_EVENTS(PERFETTO_METATRACE_TOSTRING)};
+
+enum Counters : uint16_t {
+  PERFETTO_METATRACE_COUNTERS(PERFETTO_METATRACE_IDENTITY),
+  COUNTERS_MAX
+};
+constexpr char const* kCounterNames[] = {
+    PERFETTO_METATRACE_COUNTERS(PERFETTO_METATRACE_TOSTRING)};
+
+}  // namespace metatrace
+}  // namespace perfetto
+
+#endif  // INCLUDE_PERFETTO_EXT_BASE_METATRACE_EVENTS_H_
diff --git a/protos/BUILD b/protos/BUILD
index bbf632a..1adcb01 100644
--- a/protos/BUILD
+++ b/protos/BUILD
@@ -381,6 +381,7 @@
         "//third_party/perfetto/protos:trace_ftrace",
         "//third_party/perfetto/protos:trace_interned_data",
         "//third_party/perfetto/protos:trace_minimal",
+        "//third_party/perfetto/protos:trace_perfetto",
         "//third_party/perfetto/protos:trace_power",
         "//third_party/perfetto/protos:trace_profiling",
         "//third_party/perfetto/protos:trace_ps",
@@ -836,6 +837,59 @@
     ],
 )
 
+# GN target: //protos/perfetto/trace/perfetto:lite_gen
+proto_library(
+    name = "trace_perfetto",
+    srcs = [
+        "perfetto/trace/perfetto/perfetto_metatrace.proto",
+    ],
+    has_services = 1,
+    cc_api_version = 2,
+    cc_generic_services = 1,
+    visibility = [
+        "//visibility:public",
+    ],
+)
+
+# GN target: //protos/perfetto/trace/perfetto:lite_gen
+cc_proto_library(
+    name = "trace_perfetto_cc_proto",
+    visibility = [
+        "//visibility:public",
+    ],
+    deps = [
+        "//third_party/perfetto/protos:trace_perfetto",
+    ],
+)
+
+# GN target: //protos/perfetto/trace/perfetto:lite_gen
+java_proto_library(
+    name = "trace_perfetto_java_proto",
+    visibility = [
+        "//visibility:public",
+    ],
+    deps = [
+        "//third_party/perfetto/protos:trace_perfetto",
+    ],
+)
+
+# GN target: //protos/perfetto/trace/perfetto:zero_gen
+proto_library(
+    name = "trace_perfetto_zero",
+    srcs = [
+        "perfetto/trace/perfetto/perfetto_metatrace.proto",
+    ],
+)
+
+# GN target: //protos/perfetto/trace/perfetto:zero_gen
+pbzero_cc_proto_library(
+    name = "trace_perfetto_zero_cc_proto",
+    src_proto_library = "//third_party/perfetto/protos:trace_perfetto_zero",
+    deps = [
+        "//third_party/perfetto:libprotozero",
+    ],
+)
+
 # GN target: //protos/perfetto/trace/power:lite_gen
 proto_library(
     name = "trace_power",
@@ -1193,6 +1247,7 @@
         "//third_party/perfetto/protos:trace_filesystem_zero",
         "//third_party/perfetto/protos:trace_ftrace_zero",
         "//third_party/perfetto/protos:trace_interned_data_zero",
+        "//third_party/perfetto/protos:trace_perfetto_zero",
         "//third_party/perfetto/protos:trace_power_zero",
         "//third_party/perfetto/protos:trace_profiling_zero",
         "//third_party/perfetto/protos:trace_ps_zero",
diff --git a/protos/perfetto/trace/BUILD.gn b/protos/perfetto/trace/BUILD.gn
index f049930..3a91d78 100644
--- a/protos/perfetto/trace/BUILD.gn
+++ b/protos/perfetto/trace/BUILD.gn
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import("../../../gn/perfetto.gni")
-import("../../../gn/proto_library.gni")
 import("../../../gn/proto_descriptor.gni")
+import("../../../gn/proto_library.gni")
 import("../../../gn/protozero_library.gni")
 
 # Common protos used by both the ":minimal_lite" target (for the service) and
@@ -42,6 +42,7 @@
     "filesystem:zero",
     "ftrace:zero",
     "interned_data:zero",
+    "perfetto:zero",
     "power:zero",
     "profiling:zero",
     "ps:zero",
@@ -65,6 +66,7 @@
     "filesystem:lite",
     "ftrace:lite",
     "interned_data:lite",
+    "perfetto:lite",
     "power:lite",
     "profiling:lite",
     "ps:lite",
diff --git a/protos/perfetto/trace/perfetto/BUILD.gn b/protos/perfetto/trace/perfetto/BUILD.gn
new file mode 100644
index 0000000..9bd2760
--- /dev/null
+++ b/protos/perfetto/trace/perfetto/BUILD.gn
@@ -0,0 +1,33 @@
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import("../../../../gn/perfetto.gni")
+import("../../../../gn/proto_library.gni")
+import("../../../../gn/protozero_library.gni")
+
+event_proto_names = [ "perfetto_metatrace.proto" ]
+
+proto_library("lite") {
+  generate_python = false
+  sources = event_proto_names
+  proto_in_dir = "$perfetto_root_path/protos"
+  proto_out_dir = "$perfetto_root_path/protos"
+}
+
+protozero_library("zero") {
+  sources = event_proto_names
+  proto_in_dir = "$perfetto_root_path/protos"
+  proto_out_dir = "$perfetto_root_path/protos"
+  generator_plugin_options = "wrapper_namespace=pbzero"
+}
diff --git a/protos/perfetto/trace/perfetto/perfetto_metatrace.proto b/protos/perfetto/trace/perfetto/perfetto_metatrace.proto
new file mode 100644
index 0000000..63ceacb
--- /dev/null
+++ b/protos/perfetto/trace/perfetto/perfetto_metatrace.proto
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto2";
+option optimize_for = LITE_RUNTIME;
+
+package perfetto.protos;
+
+// Used to trace the execution of perfetto itself.
+message PerfettoMetatrace {
+  // See base/metatrace_events.h for definitions.
+  oneof record_type {
+    uint32 event_id = 1;
+    uint32 counter_id = 2;
+  }
+
+  // Only when using |event_id|.
+  optional uint32 event_duration_ns = 3;
+
+  // Only when using |counter_id|.
+  optional int32 counter_value = 4;
+
+  // ID of the thread that emitted the event.
+  optional uint32 thread_id = 5;
+
+  // If true the meta-tracing ring buffer had overruns and hence some data is
+  // missing from this point.
+  optional bool has_overruns = 6;
+}
diff --git a/protos/perfetto/trace/perfetto_trace.proto b/protos/perfetto/trace/perfetto_trace.proto
index e404c75..0459368 100644
--- a/protos/perfetto/trace/perfetto_trace.proto
+++ b/protos/perfetto/trace/perfetto_trace.proto
@@ -2542,6 +2542,32 @@
 
 // End of protos/perfetto/trace/interned_data/interned_data.proto
 
+// Begin of protos/perfetto/trace/perfetto/perfetto_metatrace.proto
+
+// Used to trace the execution of perfetto itself.
+message PerfettoMetatrace {
+  // See base/metatrace_events.h for definitions.
+  oneof record_type {
+    uint32 event_id = 1;
+    uint32 counter_id = 2;
+  }
+
+  // Only when using |event_id|.
+  optional uint32 event_duration_ns = 3;
+
+  // Only when using |counter_id|.
+  optional int32 counter_value = 4;
+
+  // ID of the thread that emitted the event.
+  optional uint32 thread_id = 5;
+
+  // If true the meta-tracing ring buffer had overruns and hence some data is
+  // missing from this point.
+  optional bool has_overruns = 6;
+}
+
+// End of protos/perfetto/trace/perfetto/perfetto_metatrace.proto
+
 // Begin of protos/perfetto/trace/power/battery_counters.proto
 
 message BatteryCounters {
@@ -2908,7 +2934,7 @@
 // TracePacket(s).
 //
 // Next reserved id: 13 (up to 15).
-// Next id: 49.
+// Next id: 50.
 message TracePacket {
   // TODO(primiano): in future we should add a timestamp_clock_domain field to
   // allow mixing timestamps from different clock domains.
@@ -2938,6 +2964,7 @@
     Trigger trigger = 46;
     PackagesList packages_list = 47;
     ChromeBenchmarkMetadata chrome_benchmark_metadata = 48;
+    PerfettoMetatrace perfetto_metatrace = 49;
 
     // Only used by TrackEvent.
     ProcessDescriptor process_descriptor = 43;
diff --git a/protos/perfetto/trace/trace_packet.proto b/protos/perfetto/trace/trace_packet.proto
index 4067297..8e9d540 100644
--- a/protos/perfetto/trace/trace_packet.proto
+++ b/protos/perfetto/trace/trace_packet.proto
@@ -28,6 +28,7 @@
 import "perfetto/trace/ftrace/ftrace_event_bundle.proto";
 import "perfetto/trace/ftrace/ftrace_stats.proto";
 import "perfetto/trace/interned_data/interned_data.proto";
+import "perfetto/trace/perfetto/perfetto_metatrace.proto";
 import "perfetto/trace/power/battery_counters.proto";
 import "perfetto/trace/power/power_rails.proto";
 import "perfetto/trace/profiling/profile_packet.proto";
@@ -47,7 +48,7 @@
 // TracePacket(s).
 //
 // Next reserved id: 13 (up to 15).
-// Next id: 49.
+// Next id: 50.
 message TracePacket {
   // TODO(primiano): in future we should add a timestamp_clock_domain field to
   // allow mixing timestamps from different clock domains.
@@ -77,6 +78,7 @@
     Trigger trigger = 46;
     PackagesList packages_list = 47;
     ChromeBenchmarkMetadata chrome_benchmark_metadata = 48;
+    PerfettoMetatrace perfetto_metatrace = 49;
 
     // Only used by TrackEvent.
     ProcessDescriptor process_descriptor = 43;
diff --git a/src/base/BUILD.gn b/src/base/BUILD.gn
index 7359374..bff0640 100644
--- a/src/base/BUILD.gn
+++ b/src/base/BUILD.gn
@@ -145,6 +145,7 @@
   }
   sources = [
     "circular_queue_unittest.cc",
+    "metatrace_unittest.cc",
     "no_destructor_unittest.cc",
     "optional_unittest.cc",
     "paged_memory_unittest.cc",
diff --git a/src/base/metatrace.cc b/src/base/metatrace.cc
index 66954bf..c4e0eea 100644
--- a/src/base/metatrace.cc
+++ b/src/base/metatrace.cc
@@ -16,48 +16,125 @@
 
 #include "perfetto/ext/base/metatrace.h"
 
-#include <fcntl.h>
-#include <stdlib.h>
-
-#include "perfetto/base/build_config.h"
+#include "perfetto/base/task_runner.h"
 #include "perfetto/ext/base/file_utils.h"
 #include "perfetto/ext/base/time.h"
 
-#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
-#include <corecrt_io.h>
-#endif
-
 namespace perfetto {
-namespace base {
+namespace metatrace {
+
+std::atomic<uint32_t> g_enabled_tags{0};
+std::atomic<uint64_t> g_enabled_timestamp{0};
+
+// static members
+constexpr size_t RingBuffer::kCapacity;
+std::array<Record, RingBuffer::kCapacity> RingBuffer::records_;
+std::atomic<bool> RingBuffer::read_task_queued_;
+std::atomic<uint64_t> RingBuffer::wr_index_;
+std::atomic<uint64_t> RingBuffer::rd_index_;
+std::atomic<bool> RingBuffer::has_overruns_;
+Record RingBuffer::bankruptcy_record_;
+
+constexpr uint16_t Record::kTypeMask;
+constexpr uint16_t Record::kTypeCounter;
+constexpr uint16_t Record::kTypeEvent;
 
 namespace {
-int MaybeOpenTraceFile() {
-  static const char* tracing_path = getenv("PERFETTO_METATRACE_FILE");
-  if (tracing_path == nullptr)
-    return -1;
-  static int fd = open(tracing_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
-  return fd;
-}
+
+// std::function<> is not trivially de/constructible. This struct wraps it in a
+// heap-allocated struct to avoid static initializers.
+struct Delegate {
+  static Delegate* GetInstance() {
+    static Delegate* instance = new Delegate();
+    return instance;
+  }
+
+  base::TaskRunner* task_runner = nullptr;
+  std::function<void()> read_task;
+};
+
 }  // namespace
 
-constexpr uint32_t MetaTrace::kMainThreadCpu;
+bool Enable(std::function<void()> read_task,
+            base::TaskRunner* task_runner,
+            uint32_t tags) {
+  PERFETTO_DCHECK(read_task);
+  PERFETTO_DCHECK(task_runner->RunsTasksOnCurrentThread());
+  if (g_enabled_tags.load(std::memory_order_acquire))
+    return false;
 
-void MetaTrace::WriteEvent(char type, const char* evt_name, size_t cpu) {
-  int fd = MaybeOpenTraceFile();
-  if (fd == -1)
-    return;
-
-  // The JSON event format expects both "pid" and "tid" fields to create
-  // per-process tracks. Here what we really want to achieve is having one track
-  // per cpu. So we just pretend that each CPU is its own process with
-  // pid == tid == cpu.
-  char json[256];
-  int len = sprintf(json,
-                    "{\"ts\": %f, \"cat\": \"PERF\", \"ph\": \"%c\", \"name\": "
-                    "\"%s\", \"pid\": %zu, \"tid\": %zu},\n",
-                    GetWallTimeNs().count() / 1000.0, type, evt_name, cpu, cpu);
-  ignore_result(WriteAll(fd, json, static_cast<size_t>(len)));
+  Delegate* dg = Delegate::GetInstance();
+  dg->task_runner = task_runner;
+  dg->read_task = std::move(read_task);
+  RingBuffer::Reset();
+  g_enabled_timestamp.store(TraceTimeNowNs(), std::memory_order_relaxed);
+  g_enabled_tags.store(tags, std::memory_order_release);
+  return true;
 }
 
-}  // namespace base
+void Disable() {
+  g_enabled_tags.store(0, std::memory_order_release);
+  Delegate* dg = Delegate::GetInstance();
+  PERFETTO_DCHECK(!dg->task_runner ||
+                  dg->task_runner->RunsTasksOnCurrentThread());
+  dg->task_runner = nullptr;
+  dg->read_task = nullptr;
+}
+
+// static
+void RingBuffer::Reset() {
+  static_assert(std::is_trivially_constructible<Record>::value &&
+                    std::is_trivially_destructible<Record>::value,
+                "Record must be trivial");
+  memset(&records_[0], 0, sizeof(records_));
+  memset(&bankruptcy_record_, 0, sizeof(bankruptcy_record_));
+  wr_index_ = 0;
+  rd_index_ = 0;
+  has_overruns_ = false;
+  read_task_queued_ = false;
+}
+
+// static
+Record* RingBuffer::AppendNewRecord() {
+  auto wr_index = wr_index_.fetch_add(1, std::memory_order_acq_rel);
+
+  // rd_index can only monotonically increase, we don't care if we read an
+  // older value, we'll just hit the slow-path a bit earlier if it happens.
+  auto rd_index = rd_index_.load(std::memory_order_relaxed);
+
+  PERFETTO_DCHECK(wr_index >= rd_index);
+  auto size = wr_index - rd_index;
+  if (PERFETTO_LIKELY(size < kCapacity / 2))
+    return At(wr_index);
+
+  // Slow-path: Enqueue the read task and handle overruns.
+  bool expected = false;
+  if (RingBuffer::read_task_queued_.compare_exchange_strong(expected, true)) {
+    Delegate* dg = Delegate::GetInstance();
+    if (dg->task_runner) {
+      dg->task_runner->PostTask([] {
+        // Meta-tracing might have been disabled in the meantime.
+        auto read_task = Delegate::GetInstance()->read_task;
+        if (read_task)
+          read_task();
+        RingBuffer::read_task_queued_ = false;
+      });
+    }
+  }
+
+  if (PERFETTO_LIKELY(size < kCapacity))
+    return At(wr_index);
+
+  has_overruns_.store(true, std::memory_order_release);
+  wr_index_.fetch_sub(1, std::memory_order_acq_rel);
+  return &bankruptcy_record_;
+}
+
+// static
+bool RingBuffer::IsOnValidTaskRunner() {
+  auto* task_runner = Delegate::GetInstance()->task_runner;
+  return task_runner && task_runner->RunsTasksOnCurrentThread();
+}
+
+}  // namespace metatrace
 }  // namespace perfetto
diff --git a/src/base/metatrace_unittest.cc b/src/base/metatrace_unittest.cc
new file mode 100644
index 0000000..48038cb
--- /dev/null
+++ b/src/base/metatrace_unittest.cc
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <array>
+#include <chrono>
+#include <deque>
+#include <thread>
+
+#include "perfetto/ext/base/metatrace.h"
+#include "src/base/test/test_task_runner.h"
+
+namespace perfetto {
+namespace {
+
+namespace m = ::perfetto::metatrace;
+using ::testing::Invoke;
+
+class MetatraceTest : public ::testing::Test {
+ public:
+  void SetUp() override { m::Disable(); }
+
+  void TearDown() override {
+    task_runner_.RunUntilIdle();
+    m::Disable();
+  }
+
+  void Enable(uint32_t tags) {
+    m::Enable([this] { ReadCallback(); }, &task_runner_, tags);
+  }
+
+  MOCK_METHOD0(ReadCallback, void());
+  base::TestTaskRunner task_runner_;
+};
+
+TEST_F(MetatraceTest, TagEnablingLogic) {
+  EXPECT_CALL(*this, ReadCallback()).Times(0);
+  for (int iteration = 0; iteration < 3; iteration++) {
+    ASSERT_EQ(m::RingBuffer::GetSizeForTesting(), 0u);
+
+    // No events should be traced before enabling.
+    m::TraceCounter(m::TAG_ANY, /*id=*/1, /*value=*/42);
+    { m::ScopedEvent evt(m::TAG_ANY, /*id=*/1); }
+    ASSERT_EQ(m::RingBuffer::GetSizeForTesting(), 0u);
+
+    // Enable tags bit 1 (=2) and 2 (=4) and verify that only those events are
+    // added.
+    auto t_start = metatrace::TraceTimeNowNs();
+    Enable(/*tags=*/2 | 4);
+    m::TraceCounter(/*tag=*/1, /*id=*/42, /*value=*/10);      // No.
+    m::TraceCounter(/*tag=*/2, /*id=*/42, /*value=*/11);      // Yes.
+    m::TraceCounter(/*tag=*/4, /*id=*/42, /*value=*/12);      // Yes.
+    m::TraceCounter(/*tag=*/1 | 2, /*id=*/42, /*value=*/13);  // Yes.
+    m::TraceCounter(/*tag=*/1 | 4, /*id=*/42, /*value=*/14);  // Yes.
+    m::TraceCounter(/*tag=*/2 | 4, /*id=*/42, /*value=*/15);  // Yes.
+    m::TraceCounter(/*tag=*/4 | 8, /*id=*/42, /*value=*/16);  // Yes.
+    m::TraceCounter(/*tag=*/1 | 8, /*id=*/42, /*value=*/17);  // No.
+    m::TraceCounter(m::TAG_ANY, /*id=*/42, /*value=*/18);     // Yes.
+    { m::ScopedEvent evt(/*tag=*/1, /*id=*/20); }             // No.
+    { m::ScopedEvent evt(/*tag=*/8, /*id=*/21); }             // No.
+    { m::ScopedEvent evt(/*tag=*/2, /*id=*/22); }             // Yes.
+    { m::ScopedEvent evt(/*tag=*/4 | 8, /*id=*/23); }         // Yes.
+    { m::ScopedEvent evt(m::TAG_ANY, /*id=*/24); }            // Yes.
+
+    {
+      auto it = m::RingBuffer::GetReadIterator();
+      ASSERT_TRUE(it);
+      ASSERT_EQ(it->counter_value, 11);
+      ASSERT_TRUE(++it);
+      ASSERT_EQ(it->counter_value, 12);
+      ASSERT_TRUE(++it);
+      ASSERT_EQ(it->counter_value, 13);
+      ASSERT_TRUE(++it);
+      ASSERT_EQ(it->counter_value, 14);
+    }
+
+    // Test that destroying and re-creating the iterator resumes reading from
+    // the right place.
+    {
+      auto it = m::RingBuffer::GetReadIterator();
+      ASSERT_TRUE(++it);
+      ASSERT_EQ(it->counter_value, 15);
+      ASSERT_TRUE(++it);
+      ASSERT_EQ(it->counter_value, 16);
+      ASSERT_TRUE(++it);
+      ASSERT_EQ(it->counter_value, 18);
+      ASSERT_TRUE(++it);
+      ASSERT_EQ(it->type_and_id, 22);
+      ASSERT_TRUE(++it);
+      ASSERT_EQ(it->type_and_id, 23);
+      ASSERT_TRUE(++it);
+      ASSERT_EQ(it->type_and_id, 24);
+      ASSERT_FALSE(++it);
+    }
+
+    // Test that we can write pids up to 32 bit TIDs (I observed up to 262144
+    // from /proc/sys/kernel/pid_max) and up to 2 days of timestamps.
+    {
+      auto* record = m::RingBuffer::AppendNewRecord();
+      record->counter_value = 42;
+      constexpr uint64_t kTwoDays = 48ULL * 3600 * 1000 * 1000 * 1000;
+      record->set_timestamp(t_start + kTwoDays);
+      record->thread_id = 0xbabaf00d;
+      record->type_and_id = m::Record::kTypeCounter;
+
+      auto it = m::RingBuffer::GetReadIterator();
+      ASSERT_TRUE(it);
+      ASSERT_EQ(it->timestamp_ns(), t_start + kTwoDays);
+      ASSERT_EQ(it->thread_id, 0xbabaf00d);
+      ASSERT_FALSE(++it);
+    }
+
+    m::Disable();
+  }
+}
+
+// Test that overruns are handled properly and that the writer re-synchronizes
+// after the reader catches up.
+TEST_F(MetatraceTest, HandleOverruns) {
+  int cnt = 0;
+  int exp_cnt = 0;
+  for (size_t iteration = 0; iteration < 3; iteration++) {
+    Enable(m::TAG_ANY);
+    std::string checkpoint_name = "ReadTask " + std::to_string(iteration);
+    auto checkpoint = task_runner_.CreateCheckpoint(checkpoint_name);
+    EXPECT_CALL(*this, ReadCallback()).WillOnce(Invoke(checkpoint));
+
+    for (size_t i = 0; i < m::RingBuffer::kCapacity; i++)
+      m::TraceCounter(/*tag=*/1, /*id=*/42, /*value=*/cnt++);
+    ASSERT_EQ(m::RingBuffer::GetSizeForTesting(), m::RingBuffer::kCapacity);
+    ASSERT_FALSE(m::RingBuffer::has_overruns());
+
+    for (int n = 0; n < 3; n++)
+      m::TraceCounter(/*tag=*/1, /*id=*/42, /*value=*/-1);  // Will overrun.
+
+    ASSERT_TRUE(m::RingBuffer::has_overruns());
+    ASSERT_EQ(m::RingBuffer::GetSizeForTesting(), m::RingBuffer::kCapacity);
+
+    for (auto it = m::RingBuffer::GetReadIterator(); it; ++it)
+      ASSERT_EQ(it->counter_value, exp_cnt++);
+
+    ASSERT_EQ(m::RingBuffer::GetSizeForTesting(), 0);
+
+    task_runner_.RunUntilCheckpoint(checkpoint_name);
+    m::Disable();
+  }
+}
+
+// Sets up a scenario where the writer writes constantly (however, guaranteeing
+// to not overrun) and the reader catches up. Tests that all events are seen
+// consistently without gaps.
+TEST_F(MetatraceTest, InterleavedReadWrites) {
+  Enable(m::TAG_ANY);
+  constexpr int kMaxValue = m::RingBuffer::kCapacity * 10;
+
+  std::atomic<int> last_value_read{-1};
+  auto read_task = [&last_value_read] {
+    int last = last_value_read;
+    for (auto it = m::RingBuffer::GetReadIterator(); it; ++it) {
+      EXPECT_EQ(it->counter_value, last + 1);
+      last = it->counter_value;
+    }
+    // The read pointer is incremented only after destroying the iterator.
+    // Publish the last read value after the loop.
+    last_value_read = last;
+  };
+
+  EXPECT_CALL(*this, ReadCallback()).WillRepeatedly(Invoke(read_task));
+
+  // The writer will write continuously counters from 0 to kMaxValue.
+  auto writer_done = task_runner_.CreateCheckpoint("writer_done");
+  std::thread writer_thread([this, &writer_done, &last_value_read] {
+    for (int i = 0; i < kMaxValue; i++) {
+      m::TraceCounter(/*tag=*/1, /*id=*/1, i);
+      const int kCapacity = static_cast<int>(m::RingBuffer::kCapacity);
+
+      // Wait for the reader to avoid overruns.
+      while (i - last_value_read >= kCapacity - 1)
+        std::this_thread::sleep_for(std::chrono::nanoseconds(1));
+    }
+    task_runner_.PostTask(writer_done);
+  });
+
+  task_runner_.RunUntilCheckpoint("writer_done");
+  writer_thread.join();
+
+  read_task();  // Do a final read pass.
+  EXPECT_FALSE(m::RingBuffer::has_overruns());
+  EXPECT_EQ(last_value_read, kMaxValue - 1);
+}
+
+// Try to hit potential thread races:
+// - Test that the read callback is posted only once per cycle.
+// - Test that the final size of the ring buffeer is sane.
+// - Test that event records are consistent within each thread's event stream.
+TEST_F(MetatraceTest, ThreadRaces) {
+  for (size_t iteration = 0; iteration < 10; iteration++) {
+    Enable(m::TAG_ANY);
+
+    std::string checkpoint_name = "ReadTask " + std::to_string(iteration);
+    auto checkpoint = task_runner_.CreateCheckpoint(checkpoint_name);
+    EXPECT_CALL(*this, ReadCallback()).WillOnce(Invoke(checkpoint));
+
+    auto thread_main = [](uint16_t thd_idx) {
+      for (size_t i = 0; i < m::RingBuffer::kCapacity + 500; i++)
+        m::TraceCounter(/*tag=*/1, thd_idx, static_cast<int>(i));
+    };
+
+    std::array<std::thread, 8> threads;
+    for (size_t thd_idx = 0; thd_idx < threads.size(); thd_idx++)
+      threads[thd_idx] = std::thread(thread_main, thd_idx);
+
+    for (auto& t : threads)
+      t.join();
+
+    task_runner_.RunUntilCheckpoint(checkpoint_name);
+    ASSERT_EQ(m::RingBuffer::GetSizeForTesting(), m::RingBuffer::kCapacity);
+
+    std::array<int, threads.size()> last_val{};  // Last value for each thread.
+    for (auto it = m::RingBuffer::GetReadIterator(); it; ++it) {
+      using Record = m::Record;
+      ASSERT_EQ(it->type_and_id & Record::kTypeMask, Record::kTypeCounter);
+      auto thd_idx = static_cast<size_t>(it->type_and_id & ~Record::kTypeMask);
+      ASSERT_EQ(it->counter_value, last_val[thd_idx]);
+      last_val[thd_idx]++;
+    }
+
+    m::Disable();
+  }
+}
+
+}  // namespace
+}  // namespace perfetto
diff --git a/src/trace_processor/BUILD.gn b/src/trace_processor/BUILD.gn
index e5d33e2..8950cee 100644
--- a/src/trace_processor/BUILD.gn
+++ b/src/trace_processor/BUILD.gn
@@ -176,6 +176,7 @@
     "../../protos/perfetto/trace/chrome:zero",
     "../../protos/perfetto/trace/ftrace:zero",
     "../../protos/perfetto/trace/interned_data:zero",
+    "../../protos/perfetto/trace/perfetto:zero",
     "../../protos/perfetto/trace/power:zero",
     "../../protos/perfetto/trace/profiling:zero",
     "../../protos/perfetto/trace/ps:zero",
diff --git a/src/trace_processor/proto_trace_parser.cc b/src/trace_processor/proto_trace_parser.cc
index 3355766..b13c41e 100644
--- a/src/trace_processor/proto_trace_parser.cc
+++ b/src/trace_processor/proto_trace_parser.cc
@@ -22,6 +22,7 @@
 #include <string>
 
 #include "perfetto/base/logging.h"
+#include "perfetto/ext/base/metatrace_events.h"
 #include "perfetto/ext/base/optional.h"
 #include "perfetto/ext/base/string_view.h"
 #include "perfetto/ext/base/utils.h"
@@ -59,6 +60,7 @@
 #include "perfetto/trace/ftrace/signal.pbzero.h"
 #include "perfetto/trace/ftrace/task.pbzero.h"
 #include "perfetto/trace/interned_data/interned_data.pbzero.h"
+#include "perfetto/trace/perfetto/perfetto_metatrace.pbzero.h"
 #include "perfetto/trace/power/battery_counters.pbzero.h"
 #include "perfetto/trace/power/power_rails.pbzero.h"
 #include "perfetto/trace/profiling/profile_packet.pbzero.h"
@@ -119,6 +121,7 @@
       ion_total_unknown_id_(context->storage->InternString("mem.ion.unknown")),
       ion_change_unknown_id_(
           context->storage->InternString("mem.ion_change.unknown")),
+      metatrace_id_(context->storage->InternString("metatrace")),
       task_file_name_args_key_id_(
           context->storage->InternString("task.posted_from.file_name")),
       task_function_name_args_key_id_(
@@ -262,6 +265,10 @@
     ParseChromeBenchmarkMetadata(packet.chrome_benchmark_metadata());
   }
 
+  if (packet.has_perfetto_metatrace()) {
+    ParseMetatraceEvent(ts, packet.perfetto_metatrace());
+  }
+
   // TODO(lalitm): maybe move this to the flush method in the trace processor
   // once we have it. This may reduce performance in the ArgsTracker though so
   // needs to be handled carefully.
@@ -1720,5 +1727,39 @@
   }
 }
 
+void ProtoTraceParser::ParseMetatraceEvent(int64_t ts, ConstBytes blob) {
+  protos::pbzero::PerfettoMetatrace::Decoder event(blob.data, blob.size);
+  auto utid = context_->process_tracker->GetOrCreateThread(event.thread_id());
+
+  StringId cat_id = metatrace_id_;
+  StringId name_id = 0;
+  char fallback[64];
+
+  if (event.has_event_id()) {
+    auto eid = event.event_id();
+    if (eid < metatrace::EVENTS_MAX) {
+      name_id = context_->storage->InternString(metatrace::kEventNames[eid]);
+    } else {
+      sprintf(fallback, "Event %d", eid);
+      name_id = context_->storage->InternString(fallback);
+    }
+    context_->slice_tracker->Scoped(ts, utid, cat_id, name_id,
+                                    event.event_duration_ns());
+  } else if (event.has_counter_id()) {
+    auto cid = event.counter_id();
+    if (cid < metatrace::COUNTERS_MAX) {
+      name_id = context_->storage->InternString(metatrace::kCounterNames[cid]);
+    } else {
+      sprintf(fallback, "Counter %d", cid);
+      name_id = context_->storage->InternString(fallback);
+    }
+    context_->event_tracker->PushCounter(ts, event.counter_value(), name_id,
+                                         utid, RefType::kRefUtid);
+  }
+
+  if (event.has_overruns())
+    context_->storage->IncrementStats(stats::metatrace_overruns);
+}
+
 }  // namespace trace_processor
 }  // namespace perfetto
diff --git a/src/trace_processor/proto_trace_parser.h b/src/trace_processor/proto_trace_parser.h
index 0fbe329..1c07e90 100644
--- a/src/trace_processor/proto_trace_parser.h
+++ b/src/trace_processor/proto_trace_parser.h
@@ -109,6 +109,7 @@
       ArgsTracker* args_tracker,
       RowId row);
   void ParseChromeBenchmarkMetadata(ConstBytes);
+  void ParseMetatraceEvent(int64_t ts, ConstBytes);
 
  private:
   TraceProcessorContext* context_;
@@ -139,6 +140,7 @@
   const StringId oom_score_adj_id_;
   const StringId ion_total_unknown_id_;
   const StringId ion_change_unknown_id_;
+  const StringId metatrace_id_;
   const StringId task_file_name_args_key_id_;
   const StringId task_function_name_args_key_id_;
   std::vector<StringId> meminfo_strs_id_;
diff --git a/src/trace_processor/stats.h b/src/trace_processor/stats.h
index 038981e..726eefb 100644
--- a/src/trace_processor/stats.h
+++ b/src/trace_processor/stats.h
@@ -102,7 +102,8 @@
   F(heapprofd_invalid_string_id,              kSingle,  kError,    kTrace),    \
   F(heapprofd_invalid_mapping_id,             kSingle,  kError,    kTrace),    \
   F(heapprofd_invalid_frame_id,               kSingle,  kError,    kTrace),    \
-  F(heapprofd_invalid_callstack_id,           kSingle,  kError,    kTrace)
+  F(heapprofd_invalid_callstack_id,           kSingle,  kError,    kTrace),    \
+  F(metatrace_overruns,                       kSingle,  kError,    kTrace)
 // clang-format on
 
 enum Type {
diff --git a/src/traced/probes/BUILD.gn b/src/traced/probes/BUILD.gn
index 537ad24..ec361fc 100644
--- a/src/traced/probes/BUILD.gn
+++ b/src/traced/probes/BUILD.gn
@@ -40,6 +40,7 @@
     "../../tracing:tracing",
     "android_log",
     "filesystem",
+    "metatrace",
     "packages_list",
     "power",
     "ps",
diff --git a/src/traced/probes/ftrace/cpu_reader.cc b/src/traced/probes/ftrace/cpu_reader.cc
index 9f0603d..dbd7a75 100644
--- a/src/traced/probes/ftrace/cpu_reader.cc
+++ b/src/traced/probes/ftrace/cpu_reader.cc
@@ -240,12 +240,15 @@
   // This lambda function reads the ftrace raw pipe using either read() or
   // splice(), either in blocking or non-blocking mode.
   // Returns the number of ftrace bytes read, or -1 in case of failure.
-  auto read_ftrace_pipe = [&sync_pipe, trace_fd, pool, cpu, header_size_len](
+  auto read_ftrace_pipe = [&sync_pipe, trace_fd, pool, header_size_len](
                               ReadMode mode, Block block) -> int {
-    static const char* const kModesStr[] = {"read-nonblock", "read-block",
-                                            "splice-nonblock", "splice-block"};
-    const char* mode_str = kModesStr[(mode == kSplice) * 2 + (block == kBlock)];
-    PERFETTO_METATRACE(mode_str, cpu);
+    auto eid = mode == kRead
+                   ? (block == kNonBlock ? metatrace::FTRACE_CPU_READ_NONBLOCK
+                                         : metatrace::FTRACE_CPU_READ_BLOCK)
+                   : (block == kNonBlock ? metatrace::FTRACE_CPU_SPLICE_NONBLOCK
+                                         : metatrace::FTRACE_CPU_SPLICE_BLOCK);
+    metatrace::ScopedEvent evt(metatrace::TAG_FTRACE, eid);
+
     uint8_t* pool_page = pool->BeginWrite();
     PERFETTO_DCHECK(pool_page);
 
@@ -322,7 +325,7 @@
     // Commands are tagged with an ID, every new command has a new |cmd_id|, so
     // we can distinguish spurious wakeups from actual cmd requests.
     {
-      PERFETTO_METATRACE("wait cmd", cpu);
+      PERFETTO_METATRACE_SCOPED(TAG_FTRACE, FTRACE_CPU_WAIT_CMD);
       std::unique_lock<std::mutex> lock(thread_sync->mutex);
       while (thread_sync->cmd_id == last_cmd_id)
         thread_sync->cond.wait(lock);
@@ -345,7 +348,7 @@
         break;
 
       case FtraceThreadSync::kRun: {
-        PERFETTO_METATRACE(cur_mode == kRead ? "read" : "splice", cpu);
+        PERFETTO_METATRACE_SCOPED(TAG_FTRACE, FTRACE_CPU_RUN_CYCLE);
 
         // Do a blocking read/splice. This can fail for a variety of reasons:
         // - FtraceController interrupts us with a signal for a new cmd
@@ -367,17 +370,19 @@
         // Do as many non-blocking read/splice as we can.
         while (read_ftrace_pipe(cur_mode, kNonBlock) > kRoughlyAPage) {
         }
-        pool->CommitWrittenPages();
+        size_t num_pages = pool->CommitWrittenPages();
+        PERFETTO_METATRACE_COUNTER(TAG_FTRACE, FTRACE_PAGES_DRAINED, num_pages);
         FtraceController::OnCpuReaderRead(cpu, generation, thread_sync);
         break;
       }
 
       case FtraceThreadSync::kFlush: {
-        PERFETTO_METATRACE("flush", cpu);
+        PERFETTO_METATRACE_SCOPED(TAG_FTRACE, FTRACE_CPU_FLUSH);
         cur_mode = kRead;
         while (read_ftrace_pipe(cur_mode, kNonBlock) > kRoughlyAPage) {
         }
-        pool->CommitWrittenPages();
+        size_t num_pages = pool->CommitWrittenPages();
+        PERFETTO_METATRACE_COUNTER(TAG_FTRACE, FTRACE_PAGES_DRAINED, num_pages);
         FtraceController::OnCpuReaderFlush(cpu, generation, thread_sync);
         break;
       }
@@ -399,8 +404,7 @@
 // first CPU wakes up from the blocking read()/splice().
 void CpuReader::Drain(const std::set<FtraceDataSource*>& data_sources) {
   PERFETTO_DCHECK_THREAD(thread_checker_);
-  PERFETTO_METATRACE("Drain(" + std::to_string(cpu_) + ")",
-                     base::MetaTrace::kMainThreadCpu);
+  PERFETTO_METATRACE_SCOPED(TAG_FTRACE, FTRACE_CPU_DRAIN);
 
   auto page_blocks = pool_.BeginRead();
   for (const auto& page_block : page_blocks) {
diff --git a/src/traced/probes/ftrace/ftrace_controller.cc b/src/traced/probes/ftrace/ftrace_controller.cc
index 9a82289..907e6f1 100644
--- a/src/traced/probes/ftrace/ftrace_controller.cc
+++ b/src/traced/probes/ftrace/ftrace_controller.cc
@@ -164,8 +164,7 @@
 void FtraceController::OnCpuReaderRead(size_t cpu,
                                        int generation,
                                        FtraceThreadSync* thread_sync) {
-  PERFETTO_METATRACE("OnCpuReaderRead()", cpu);
-
+  PERFETTO_METATRACE_SCOPED(TAG_FTRACE, FTRACE_CPU_READER_READ);
   {
     std::lock_guard<std::mutex> lock(thread_sync->mutex);
     // If this was the first CPU to wake up, schedule a drain for the next
@@ -219,7 +218,7 @@
 
 void FtraceController::DrainCPUs(int generation) {
   PERFETTO_DCHECK_THREAD(thread_checker_);
-  PERFETTO_METATRACE("DrainCPUs()", base::MetaTrace::kMainThreadCpu);
+  PERFETTO_METATRACE_SCOPED(TAG_FTRACE, FTRACE_DRAIN_CPUS);
 
   if (generation != generation_)
     return;
@@ -278,7 +277,7 @@
 }
 
 void FtraceController::UnblockReaders() {
-  PERFETTO_METATRACE("UnblockReaders()", base::MetaTrace::kMainThreadCpu);
+  PERFETTO_METATRACE_SCOPED(TAG_FTRACE, FTRACE_UNBLOCK_READERS);
 
   // If a flush or a quit is pending, do nothing.
   std::unique_lock<std::mutex> lock(thread_sync_.mutex);
diff --git a/src/traced/probes/ftrace/page_pool.h b/src/traced/probes/ftrace/page_pool.h
index b0b5fc3..de5c250 100644
--- a/src/traced/probes/ftrace/page_pool.h
+++ b/src/traced/probes/ftrace/page_pool.h
@@ -140,13 +140,16 @@
   }
 
   // Makes all written pages available to the reader.
-  void CommitWrittenPages() {
+  // Returns an upper bound on the number of pages written.
+  size_t CommitWrittenPages() {
     PERFETTO_DCHECK_THREAD(writer_thread_);
+    size_t size = write_queue_.size() * PagePool::PageBlock::kPagesPerBlock;
     std::lock_guard<std::mutex> lock(mutex_);
     read_queue_.insert(read_queue_.end(),
                        std::make_move_iterator(write_queue_.begin()),
                        std::make_move_iterator(write_queue_.end()));
     write_queue_.clear();
+    return size;
   }
 
   // Moves ownership of all the page blocks in the read queue to the caller.
diff --git a/src/traced/probes/metatrace/BUILD.gn b/src/traced/probes/metatrace/BUILD.gn
new file mode 100644
index 0000000..489b85a
--- /dev/null
+++ b/src/traced/probes/metatrace/BUILD.gn
@@ -0,0 +1,31 @@
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+source_set("metatrace") {
+  public_deps = [
+    "../../../tracing",
+  ]
+  deps = [
+    "..:data_source",
+    "../../../../gn:default_deps",
+    "../../../../include/perfetto/ext/traced",
+    "../../../../protos/perfetto/trace/perfetto:zero",
+    "../../../base",
+    "../../../tracing",
+  ]
+  sources = [
+    "metatrace_data_source.cc",
+    "metatrace_data_source.h",
+  ]
+}
diff --git a/src/traced/probes/metatrace/metatrace_data_source.cc b/src/traced/probes/metatrace/metatrace_data_source.cc
new file mode 100644
index 0000000..2b93bc0
--- /dev/null
+++ b/src/traced/probes/metatrace/metatrace_data_source.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/traced/probes/metatrace/metatrace_data_source.h"
+
+#include <vector>
+
+#include "perfetto/base/logging.h"
+#include "perfetto/base/task_runner.h"
+#include "perfetto/ext/tracing/core/trace_packet.h"
+#include "perfetto/ext/tracing/core/trace_writer.h"
+#include "src/tracing/core/metatrace_writer.h"
+
+#include "perfetto/trace/trace_packet.pbzero.h"
+
+namespace perfetto {
+
+// static
+const char* MetatraceDataSource::kDataSourceName =
+    MetatraceWriter::kDataSourceName;
+
+MetatraceDataSource::MetatraceDataSource(base::TaskRunner* task_runner,
+                                         TracingSessionID session_id,
+                                         std::unique_ptr<TraceWriter> writer)
+    : ProbesDataSource(session_id, kTypeId),
+      task_runner_(task_runner),
+      trace_writer_(std::move(writer)) {}
+
+MetatraceDataSource::~MetatraceDataSource() {
+  metatrace_writer_->Disable();
+}
+
+void MetatraceDataSource::Start() {
+  metatrace_writer_.reset(new MetatraceWriter());
+  metatrace_writer_->Enable(task_runner_, std::move(trace_writer_),
+                            metatrace::TAG_ANY);
+}
+
+void MetatraceDataSource::Flush(FlushRequestID,
+                                std::function<void()> callback) {
+  metatrace_writer_->WriteAllAndFlushTraceWriter(std::move(callback));
+}
+
+}  // namespace perfetto
diff --git a/src/traced/probes/metatrace/metatrace_data_source.h b/src/traced/probes/metatrace/metatrace_data_source.h
new file mode 100644
index 0000000..3bd0d04
--- /dev/null
+++ b/src/traced/probes/metatrace/metatrace_data_source.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACED_PROBES_METATRACE_METATRACE_DATA_SOURCE_H_
+#define SRC_TRACED_PROBES_METATRACE_METATRACE_DATA_SOURCE_H_
+
+#include <memory>
+
+#include "src/traced/probes/probes_data_source.h"
+
+namespace perfetto {
+
+class MetatraceWriter;
+class TraceWriter;
+
+namespace base {
+class TaskRunner;
+}
+
+class MetatraceDataSource : public ProbesDataSource {
+ public:
+  static constexpr int kTypeId = 8;
+  static const char* kDataSourceName;
+
+  MetatraceDataSource(base::TaskRunner*,
+                      TracingSessionID,
+                      std::unique_ptr<TraceWriter> writer);
+
+  ~MetatraceDataSource() override;
+
+  // ProbesDataSource implementation.
+  void Start() override;
+  void Flush(FlushRequestID, std::function<void()> callback) override;
+
+ private:
+  base::TaskRunner* const task_runner_;
+  std::unique_ptr<TraceWriter> trace_writer_;
+  std::unique_ptr<MetatraceWriter> metatrace_writer_;
+};
+
+}  // namespace perfetto
+
+#endif  // SRC_TRACED_PROBES_METATRACE_METATRACE_DATA_SOURCE_H_
diff --git a/src/traced/probes/probes_producer.cc b/src/traced/probes/probes_producer.cc
index 9e4596c..a8237b5 100644
--- a/src/traced/probes/probes_producer.cc
+++ b/src/traced/probes/probes_producer.cc
@@ -36,6 +36,7 @@
 #include "src/traced/probes/filesystem/inode_file_data_source.h"
 #include "src/traced/probes/ftrace/ftrace_config.h"
 #include "src/traced/probes/ftrace/ftrace_data_source.h"
+#include "src/traced/probes/metatrace/metatrace_data_source.h"
 #include "src/traced/probes/packages_list/packages_list_data_source.h"
 #include "src/traced/probes/power/android_power_data_source.h"
 #include "src/traced/probes/probes_data_source.h"
@@ -129,6 +130,12 @@
     desc.set_name(kPackagesListSourceName);
     endpoint_->RegisterDataSource(desc);
   }
+
+  {
+    DataSourceDescriptor desc;
+    desc.set_name(MetatraceDataSource::kDataSourceName);
+    endpoint_->RegisterDataSource(desc);
+  }
 }
 
 void ProbesProducer::OnDisconnect() {
@@ -183,6 +190,8 @@
     data_source = CreateAndroidLogDataSource(session_id, config);
   } else if (config.name() == kPackagesListSourceName) {
     data_source = CreatePackagesListDataSource(session_id, config);
+  } else if (config.name() == MetatraceDataSource::kDataSourceName) {
+    data_source = CreateMetatraceDataSource(session_id, config);
   }
 
   if (!data_source) {
@@ -312,6 +321,14 @@
                              endpoint_->CreateTraceWriter(buffer_id), config));
 }
 
+std::unique_ptr<ProbesDataSource> ProbesProducer::CreateMetatraceDataSource(
+    TracingSessionID session_id,
+    const DataSourceConfig& config) {
+  auto buffer_id = static_cast<BufferID>(config.target_buffer());
+  return std::unique_ptr<ProbesDataSource>(new MetatraceDataSource(
+      task_runner_, session_id, endpoint_->CreateTraceWriter(buffer_id)));
+}
+
 void ProbesProducer::StopDataSource(DataSourceInstanceID id) {
   PERFETTO_LOG("Producer stop (id=%" PRIu64 ")", id);
   auto it = data_sources_.find(id);
@@ -472,6 +489,7 @@
       case SysStatsDataSource::kTypeId:
       case AndroidLogDataSource::kTypeId:
       case PackagesListDataSource::kTypeId:
+      case MetatraceDataSource::kTypeId:
         break;
       default:
         PERFETTO_DFATAL("Invalid data source.");
diff --git a/src/traced/probes/probes_producer.h b/src/traced/probes/probes_producer.h
index 9d28c8a..4922782 100644
--- a/src/traced/probes/probes_producer.h
+++ b/src/traced/probes/probes_producer.h
@@ -84,6 +84,9 @@
   std::unique_ptr<ProbesDataSource> CreatePackagesListDataSource(
       TracingSessionID session_id,
       const DataSourceConfig& config);
+  std::unique_ptr<ProbesDataSource> CreateMetatraceDataSource(
+      TracingSessionID session_id,
+      const DataSourceConfig& config);
 
  private:
   enum State {
diff --git a/src/traced/probes/ps/process_stats_data_source.cc b/src/traced/probes/ps/process_stats_data_source.cc
index 86265ed..16af30f 100644
--- a/src/traced/probes/ps/process_stats_data_source.cc
+++ b/src/traced/probes/ps/process_stats_data_source.cc
@@ -138,7 +138,7 @@
 }
 
 void ProcessStatsDataSource::WriteAllProcesses() {
-  PERFETTO_METATRACE("WriteAllProcesses", 0);
+  PERFETTO_METATRACE_SCOPED(TAG_PROC_POLLERS, PS_WRITE_ALL_PROCESSES);
   PERFETTO_DCHECK(!cur_ps_tree_);
 
   CacheProcFsScanStartTimestamp();
@@ -171,20 +171,23 @@
 }
 
 void ProcessStatsDataSource::OnPids(const std::vector<int32_t>& pids) {
-  PERFETTO_METATRACE("OnPids", 0);
+  PERFETTO_METATRACE_SCOPED(TAG_PROC_POLLERS, PS_ON_PIDS);
   if (!enable_on_demand_dumps_)
     return;
   PERFETTO_DCHECK(!cur_ps_tree_);
+  int pids_scanned = 0;
   for (int32_t pid : pids) {
     if (seen_pids_.count(pid) || pid == 0)
       continue;
     WriteProcessOrThread(pid);
+    pids_scanned++;
   }
   FinalizeCurPacket();
+  PERFETTO_METATRACE_COUNTER(TAG_PROC_POLLERS, PS_PIDS_SCANNED, pids_scanned);
 }
 
 void ProcessStatsDataSource::OnRenamePids(const std::vector<int32_t>& pids) {
-  PERFETTO_METATRACE("OnRenamePids", 0);
+  PERFETTO_METATRACE_SCOPED(TAG_PROC_POLLERS, PS_ON_RENAME_PIDS);
   if (!enable_on_demand_dumps_)
     return;
   PERFETTO_DCHECK(!cur_ps_tree_);
@@ -367,7 +370,7 @@
   // proc files over and over. Same for non-whitelist processes (see above).
 
   CacheProcFsScanStartTimestamp();
-  PERFETTO_METATRACE("WriteAllProcessStats", 0);
+  PERFETTO_METATRACE_SCOPED(TAG_PROC_POLLERS, PS_WRITE_ALL_PROCESS_STATS);
   base::ScopedDir proc_dir = OpenProcDir();
   if (!proc_dir)
     return;
diff --git a/src/traced/probes/sys_stats/sys_stats_data_source.cc b/src/traced/probes/sys_stats/sys_stats_data_source.cc
index ee0de79..7b6645a 100644
--- a/src/traced/probes/sys_stats/sys_stats_data_source.cc
+++ b/src/traced/probes/sys_stats/sys_stats_data_source.cc
@@ -160,7 +160,7 @@
 SysStatsDataSource::~SysStatsDataSource() = default;
 
 void SysStatsDataSource::ReadSysStats() {
-  PERFETTO_METATRACE("ReadSysStats", 0);
+  PERFETTO_METATRACE_SCOPED(TAG_PROC_POLLERS, READ_SYS_STATS);
   auto packet = writer_->NewTracePacket();
 
   packet->set_timestamp(static_cast<uint64_t>(base::GetBootTimeNs().count()));
diff --git a/src/tracing/BUILD.gn b/src/tracing/BUILD.gn
index 39fbf25..57d7971 100644
--- a/src/tracing/BUILD.gn
+++ b/src/tracing/BUILD.gn
@@ -29,6 +29,7 @@
     "../../gn:default_deps",
     "../../include/perfetto/tracing",
     "../../protos/perfetto/config:lite",
+    "../../protos/perfetto/trace/perfetto:zero",  # For MetatraceWriter.
     "../base",
     "../protozero",
   ]
@@ -39,6 +40,8 @@
     "core/data_source_descriptor.cc",
     "core/id_allocator.cc",
     "core/id_allocator.h",
+    "core/metatrace_writer.cc",
+    "core/metatrace_writer.h",
     "core/null_trace_writer.cc",
     "core/null_trace_writer.h",
     "core/observable_events.cc",
diff --git a/src/tracing/core/metatrace_writer.cc b/src/tracing/core/metatrace_writer.cc
new file mode 100644
index 0000000..1a1f7ac
--- /dev/null
+++ b/src/tracing/core/metatrace_writer.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/tracing/core/metatrace_writer.h"
+
+#include "perfetto/base/logging.h"
+#include "perfetto/base/task_runner.h"
+#include "perfetto/ext/tracing/core/data_source_descriptor.h"
+#include "perfetto/ext/tracing/core/trace_writer.h"
+
+#include "perfetto/trace/perfetto/perfetto_metatrace.pbzero.h"
+#include "perfetto/trace/trace_packet.pbzero.h"
+
+namespace perfetto {
+
+// static
+constexpr char MetatraceWriter::kDataSourceName[];
+
+MetatraceWriter::MetatraceWriter() : weak_ptr_factory_(this) {}
+
+MetatraceWriter::~MetatraceWriter() {
+  Disable();
+}
+
+void MetatraceWriter::Enable(base::TaskRunner* task_runner,
+                             std::unique_ptr<TraceWriter> trace_writer,
+                             uint32_t tags) {
+  PERFETTO_DCHECK_THREAD(thread_checker_);
+  if (started_) {
+    PERFETTO_DFATAL_OR_ELOG("Metatrace already started from this instance");
+    return;
+  }
+  task_runner_ = task_runner;
+  trace_writer_ = std::move(trace_writer);
+  auto weak_ptr = weak_ptr_factory_.GetWeakPtr();
+  bool enabled = metatrace::Enable(
+      [weak_ptr] {
+        if (weak_ptr)
+          weak_ptr->WriteAllAvailableEvents();
+      },
+      task_runner, tags);
+  if (!enabled)
+    return;
+  started_ = true;
+}
+
+void MetatraceWriter::Disable() {
+  PERFETTO_DCHECK_THREAD(thread_checker_);
+  if (!started_)
+    return;
+  metatrace::Disable();
+  started_ = false;
+  trace_writer_.reset();
+}
+
+void MetatraceWriter::WriteAllAvailableEvents() {
+  PERFETTO_DCHECK_THREAD(thread_checker_);
+  if (!started_)
+    return;
+  for (auto it = metatrace::RingBuffer::GetReadIterator(); it; ++it) {
+    auto type_and_id = it->type_and_id.load(std::memory_order_acquire);
+    if (type_and_id == 0)
+      break;  // Stop at the first incomplete event.
+
+    auto packet = trace_writer_->NewTracePacket();
+    packet->set_timestamp(it->timestamp_ns());
+    auto* evt = packet->set_perfetto_metatrace();
+    uint16_t type = type_and_id & metatrace::Record::kTypeMask;
+    uint16_t id = type_and_id & ~metatrace::Record::kTypeMask;
+    if (type == metatrace::Record::kTypeCounter) {
+      evt->set_counter_id(id);
+      evt->set_counter_value(it->counter_value);
+    } else {
+      evt->set_event_id(id);
+      evt->set_event_duration_ns(it->duration_ns);
+    }
+
+    evt->set_thread_id(static_cast<uint32_t>(it->thread_id));
+
+    if (metatrace::RingBuffer::has_overruns())
+      evt->set_has_overruns(true);
+  }
+  // The |it| destructor will automatically update the read index position in
+  // the meta-trace ring buffer.
+}
+
+void MetatraceWriter::WriteAllAndFlushTraceWriter(
+    std::function<void()> callback) {
+  PERFETTO_DCHECK_THREAD(thread_checker_);
+  WriteAllAvailableEvents();
+  trace_writer_->Flush(std::move(callback));
+}
+
+}  // namespace perfetto
diff --git a/src/tracing/core/metatrace_writer.h b/src/tracing/core/metatrace_writer.h
new file mode 100644
index 0000000..5d2c8e7
--- /dev/null
+++ b/src/tracing/core/metatrace_writer.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACING_CORE_METATRACE_WRITER_H_
+#define SRC_TRACING_CORE_METATRACE_WRITER_H_
+
+#include <functional>
+#include <memory>
+
+#include "perfetto/ext/base/metatrace.h"
+#include "perfetto/ext/base/thread_checker.h"
+#include "perfetto/ext/base/weak_ptr.h"
+
+namespace perfetto {
+
+namespace base {
+class TaskRunner;
+}
+
+class TraceWriter;
+
+// Complements the base::metatrace infrastructure.
+// It hooks a callback to metatrace::Enable() and writes metatrace events into
+// a TraceWriter whenever the metatrace ring buffer is half full.
+// It is safe to create and attempt to start multiple instances of this class,
+// however only the first one will succeed because the metatrace framework
+// doesn't support multiple instances.
+// This class is defined here (instead of directly in src/probes/) so it can
+// be reused by other components (e.g. heapprofd).
+class MetatraceWriter {
+ public:
+  static constexpr char kDataSourceName[] = "perfetto.metatrace";
+
+  MetatraceWriter();
+  ~MetatraceWriter();
+  void Enable(base::TaskRunner*, std::unique_ptr<TraceWriter>, uint32_t tags);
+  void Disable();
+  void WriteAllAndFlushTraceWriter(std::function<void()> callback);
+
+ private:
+  void WriteAllAvailableEvents();
+
+  bool started_ = false;
+  base::TaskRunner* task_runner_ = nullptr;
+  std::unique_ptr<TraceWriter> trace_writer_;
+  PERFETTO_THREAD_CHECKER(thread_checker_)
+  base::WeakPtrFactory<MetatraceWriter> weak_ptr_factory_;  // Keep last.
+};
+
+}  // namespace perfetto
+
+#endif  // SRC_TRACING_CORE_METATRACE_WRITER_H_
diff --git a/test/configs/ftrace.cfg b/test/configs/ftrace.cfg
index a092af3..27b80f5 100644
--- a/test/configs/ftrace.cfg
+++ b/test/configs/ftrace.cfg
@@ -1,5 +1,5 @@
 buffers {
-  size_kb: 100024
+  size_kb: 65536
   fill_policy: RING_BUFFER
 }
 
@@ -8,8 +8,8 @@
     name: "linux.ftrace"
     target_buffer: 0
     ftrace_config {
-      buffer_size_kb: 512 # 4 (page size) * 128
-      drain_period_ms: 200
+      buffer_size_kb: 8192
+      drain_period_ms: 500
       ftrace_events: "binder_lock"
       ftrace_events: "binder_locked"
       ftrace_events: "binder_set_priority"
@@ -280,6 +280,13 @@
   }
 }
 
+data_sources {
+  config {
+    name: "perfetto.metatrace"
+    target_buffer: 0
+  }
+}
+
 producers {
   producer_name: "perfetto.traced_probes"
   shm_size_kb: 4096
diff --git a/tools/gen_merged_protos b/tools/gen_merged_protos
index 15245f7..c226713 100755
--- a/tools/gen_merged_protos
+++ b/tools/gen_merged_protos
@@ -69,6 +69,7 @@
   'protos/perfetto/trace/ftrace/task.proto',
   'protos/perfetto/trace/ftrace/vmscan.proto',
   'protos/perfetto/trace/interned_data/interned_data.proto',
+  'protos/perfetto/trace/perfetto/perfetto_metatrace.proto',
   'protos/perfetto/trace/power/battery_counters.proto',
   'protos/perfetto/trace/power/power_rails.proto',
   'protos/perfetto/trace/profiling/profile_packet.proto',
diff --git a/tools/tmux b/tools/tmux
index 4f626b7..2e365ba 100755
--- a/tools/tmux
+++ b/tools/tmux
@@ -204,7 +204,7 @@
 tmux select-pane -t 0
 tmux send-keys "PS1='[traced]$ '" Enter
 tmux send-keys "cd $DIR" Enter
-tmux send-keys "$PREFIX PERFETTO_METATRACE_FILE=mtrace ./traced_probes $POSTFIX" Enter
+tmux send-keys "$PREFIX ./traced_probes $POSTFIX" Enter
 
 tmux select-pane -t 2
 tmux send-keys "PS1='[consumer]$ '" Enter
@@ -228,8 +228,3 @@
 echo -e "\n\x1b[32mPulling trace into $TRACE.json\x1b[0m"
 $OUT/trace_to_text systrace < /tmp/trace.protobuf > $TRACE.json
 # Keep this last so it can fail.
-pull mtrace /tmp/mtrace.json
-# Add [ to beginning of file and replace trailing , with ] to turn into valid
-# JSON array.
-sed -i -e '$ s/.$/]/' /tmp/mtrace.json
-sed -i -e '1s/^/[/' /tmp/mtrace.json