Add EXPERIMENTAL_ANNOTATED_PERF_PROFILE function

Similar to EXPERIMENTAL_PERF_PROFILE generates a pprof profile but it
also annotates function names with Android specific annotations.

Change-Id: I249c4fda5034b52204092e5243076884b7c523fc
diff --git a/Android.bp b/Android.bp
index 880ae1c..4f8a3c6 100644
--- a/Android.bp
+++ b/Android.bp
@@ -9610,6 +9610,7 @@
 filegroup {
     name: "perfetto_src_trace_processor_util_profile_builder",
     srcs: [
+        "src/trace_processor/util/annotated_callsites.cc",
         "src/trace_processor/util/profile_builder.cc",
     ],
 }
diff --git a/BUILD b/BUILD
index 84a96e1..ab137e9 100644
--- a/BUILD
+++ b/BUILD
@@ -1548,6 +1548,8 @@
 perfetto_filegroup(
     name = "src_trace_processor_util_profile_builder",
     srcs = [
+        "src/trace_processor/util/annotated_callsites.cc",
+        "src/trace_processor/util/annotated_callsites.h",
         "src/trace_processor/util/profile_builder.cc",
         "src/trace_processor/util/profile_builder.h",
     ],
diff --git a/src/trace_processor/dynamic/experimental_annotated_stack_generator.cc b/src/trace_processor/dynamic/experimental_annotated_stack_generator.cc
index 3cee91a..063583b 100644
--- a/src/trace_processor/dynamic/experimental_annotated_stack_generator.cc
+++ b/src/trace_processor/dynamic/experimental_annotated_stack_generator.cc
@@ -120,6 +120,8 @@
                    : base::ErrStatus("Failed to find required constraints");
 }
 
+// TODO(carlscab): Replace annotation logic with
+// src/trace_processor/util/annotated_callsites.h
 base::Status ExperimentalAnnotatedStackGenerator::ComputeTable(
     const std::vector<Constraint>& cs,
     const std::vector<Order>&,
diff --git a/src/trace_processor/sqlite/pprof_functions.cc b/src/trace_processor/sqlite/pprof_functions.cc
index aab2461..dbd713f 100644
--- a/src/trace_processor/sqlite/pprof_functions.cc
+++ b/src/trace_processor/sqlite/pprof_functions.cc
@@ -32,140 +32,156 @@
 namespace trace_processor {
 namespace {
 
-constexpr const char* kPerfProfileFunctionName = "EXPERIMENTAL_PERF_PROFILE";
-
 void SetSqliteError(sqlite3_context* ctx, const base::Status& status) {
   if (!status.ok()) {
     sqlite3_result_error(ctx, status.c_message(), -1);
   }
 }
 
-class Profile {
+class PerfProfile {
  public:
-  static void Step(sqlite3_context* ctx, int argc, sqlite3_value** argv);
-  static void Final(sqlite3_context* ctx);
+  explicit PerfProfile(const TraceProcessorContext* context, bool annotated)
+      : builder_(context, {{"samples", "count"}}, annotated) {
+    single_count_value_.Append(1);
+  }
+
+  base::Status Step(uint32_t callsite_id) {
+    builder_.AddSample(callsite_id, single_count_value_);
+    return util::OkStatus();
+  }
+
+  base::Status Final(sqlite3_context* ctx) {
+    // TODO(carlscab): A lot of copies are happening here.
+    std::string profile_proto = builder_.Build();
+
+    std::unique_ptr<uint8_t[], base::FreeDeleter> data(
+        static_cast<uint8_t*>(malloc(profile_proto.size())));
+    memcpy(data.get(), profile_proto.data(), profile_proto.size());
+    sqlite3_result_blob(ctx, data.release(),
+                        static_cast<int>(profile_proto.size()), free);
+    return util::OkStatus();
+  }
 
  private:
-  static std::unique_ptr<Profile> Release(sqlite3_context* ctx) {
-    Profile** profile =
-        reinterpret_cast<Profile**>(sqlite3_aggregate_context(ctx, 0));
+  GProfileBuilder builder_;
+  protozero::PackedVarInt single_count_value_;
+};
+
+template <typename Function>
+class ProfileFunction {
+ public:
+  static base::Status Register(sqlite3* db, TraceProcessorContext* context) {
+    int flags = SQLITE_UTF8 | SQLITE_DETERMINISTIC;
+    int ret = sqlite3_create_function_v2(db, Function::kName, 1, flags, context,
+                                         nullptr, Step, Final, nullptr);
+    if (ret != SQLITE_OK) {
+      return base::ErrStatus("Unable to register function with name %s",
+                             Function::kName);
+    }
+    return base::OkStatus();
+  }
+
+ private:
+  static std::unique_ptr<PerfProfile> ReleaseProfile(sqlite3_context* ctx) {
+    PerfProfile** profile =
+        reinterpret_cast<PerfProfile**>(sqlite3_aggregate_context(ctx, 0));
 
     if (!profile) {
       return nullptr;
     }
 
-    return std::unique_ptr<Profile>(*profile);
+    return std::unique_ptr<PerfProfile>(*profile);
   }
 
-  static Profile* GetOrCreate(sqlite3_context* ctx) {
-    Profile** profile = reinterpret_cast<Profile**>(
-        sqlite3_aggregate_context(ctx, sizeof(Profile*)));
+  static PerfProfile* GetOrCreateProfile(sqlite3_context* ctx) {
+    PerfProfile** profile = reinterpret_cast<PerfProfile**>(
+        sqlite3_aggregate_context(ctx, sizeof(PerfProfile*)));
     if (!profile) {
       return nullptr;
     }
 
     if (!*profile) {
-      *profile = new Profile(
-          reinterpret_cast<TraceProcessorContext*>(sqlite3_user_data(ctx)));
+      *profile = new PerfProfile(reinterpret_cast<const TraceProcessorContext*>(
+                                     sqlite3_user_data(ctx)),
+                                 Function::kAnnotate);
     }
 
     return *profile;
   }
 
-  explicit Profile(TraceProcessorContext* context);
+  static void Step(sqlite3_context* ctx, int argc, sqlite3_value** argv) {
+    if (argc != 1) {
+      return SetSqliteError(ctx, base::ErrStatus("%s: invalid number of args; "
+                                                 "expected 1, received %d",
+                                                 Function::kName, argc));
+    }
 
-  base::Status StepImpl(uint32_t callsite_id);
-  base::Status FinalImpl(sqlite3_context*);
+    base::Status status = TypeCheckSqliteValue(argv[0], SqlValue::kLong);
+    if (!status.ok()) {
+      return SetSqliteError(
+          ctx, base::ErrStatus("%s: argument callsite_id %s", Function::kName,
+                               status.c_message()));
+    }
+    int64_t value = sqlite3_value_int64(argv[0]);
 
-  GProfileBuilder builder_;
-  protozero::PackedVarInt single_count_value_;
+    if (value < 0 || value > std::numeric_limits<uint32_t>::max()) {
+      return SetSqliteError(ctx,
+                            base::ErrStatus("%s: invalid callsite_id %" PRId64,
+                                            Function::kName, value));
+    }
+
+    uint32_t callsite_id = static_cast<uint32_t>(value);
+
+    PerfProfile* profile = GetOrCreateProfile(ctx);
+
+    if (!profile) {
+      return SetSqliteError(
+          ctx, base::ErrStatus("%s: Failed to allocate aggregate context",
+                               Function::kName));
+    }
+
+    status = profile->Step(callsite_id);
+
+    if (!status.ok()) {
+      return SetSqliteError(ctx, status);
+    }
+  }
+
+  static void Final(sqlite3_context* ctx) {
+    std::unique_ptr<PerfProfile> profile = ReleaseProfile(ctx);
+    if (!profile) {
+      return;
+    }
+
+    base::Status status = profile->Final(ctx);
+    if (!status.ok()) {
+      return SetSqliteError(ctx, status);
+    }
+  }
 };
 
-Profile::Profile(TraceProcessorContext* context)
-    : builder_(context, {{"samples", "count"}}) {
-  single_count_value_.Append(1);
-}
+class PerfProfileFunction : public ProfileFunction<PerfProfileFunction> {
+ public:
+  static constexpr const char* kName = "EXPERIMENTAL_PERF_PROFILE";
+  static constexpr bool kAnnotate = false;
+};
 
-void Profile::Step(sqlite3_context* ctx, int argc, sqlite3_value** argv) {
-  if (argc != 1) {
-    return SetSqliteError(ctx, base::ErrStatus("%s: invalid number of args; "
-                                               "expected 1, received %d",
-                                               kPerfProfileFunctionName, argc));
-  }
-
-  base::Status status = TypeCheckSqliteValue(argv[0], SqlValue::kLong);
-  if (!status.ok()) {
-    return SetSqliteError(
-        ctx, base::ErrStatus("%s: argument callsite_id %s",
-                             kPerfProfileFunctionName, status.c_message()));
-  }
-  int64_t value = sqlite3_value_int64(argv[0]);
-
-  if (value < 0 || value > std::numeric_limits<uint32_t>::max()) {
-    return SetSqliteError(ctx,
-                          base::ErrStatus("%s: invalid callsite_id %" PRId64,
-                                          kPerfProfileFunctionName, value));
-  }
-
-  uint32_t callsite_id = static_cast<uint32_t>(value);
-
-  Profile* profile = Profile::GetOrCreate(ctx);
-
-  if (!profile) {
-    return SetSqliteError(
-        ctx, base::ErrStatus("%s: Failed to allocate aggregate context",
-                             kPerfProfileFunctionName));
-  }
-
-  status = profile->StepImpl(callsite_id);
-
-  if (!status.ok()) {
-    return SetSqliteError(ctx, status);
-  }
-}
-
-void Profile::Final(sqlite3_context* ctx) {
-  std::unique_ptr<Profile> profile = Profile::Release(ctx);
-  if (!profile) {
-    return;
-  }
-
-  base::Status status = profile->FinalImpl(ctx);
-  if (!status.ok()) {
-    return SetSqliteError(ctx, status);
-  }
-}
-
-base::Status Profile::StepImpl(uint32_t callsite_id) {
-  builder_.AddSample(callsite_id, single_count_value_);
-  return util::OkStatus();
-}
-
-base::Status Profile::FinalImpl(sqlite3_context* ctx) {
-  // TODO(carlscab): A lot of copies are happening here.
-  std::string profile_proto = builder_.Build();
-
-  std::unique_ptr<uint8_t[], base::FreeDeleter> data(
-      static_cast<uint8_t*>(malloc(profile_proto.size())));
-  memcpy(data.get(), profile_proto.data(), profile_proto.size());
-  sqlite3_result_blob(ctx, data.release(),
-                      static_cast<int>(profile_proto.size()), free);
-  return util::OkStatus();
-}
+class AnnotatedPerfProfileFunction
+    : public ProfileFunction<AnnotatedPerfProfileFunction> {
+ public:
+  static constexpr const char* kName = "EXPERIMENTAL_ANNOTATED_PERF_PROFILE";
+  static constexpr bool kAnnotate = true;
+};
 
 }  // namespace
 
 base::Status PprofFunctions::Register(sqlite3* db,
                                       TraceProcessorContext* context) {
-  int flags = SQLITE_UTF8 | SQLITE_DETERMINISTIC;
-  int ret = sqlite3_create_function_v2(db, kPerfProfileFunctionName, 1, flags,
-                                       context, nullptr, Profile::Step,
-                                       Profile::Final, nullptr);
-  if (ret != SQLITE_OK) {
-    return base::ErrStatus("Unable to register function with name %s",
-                           kPerfProfileFunctionName);
+  auto status = PerfProfileFunction::Register(db, context);
+  if (!status.ok()) {
+    return status;
   }
-  return base::OkStatus();
+  return AnnotatedPerfProfileFunction::Register(db, context);
 }
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/util/BUILD.gn b/src/trace_processor/util/BUILD.gn
index bd08bd7..781f393 100644
--- a/src/trace_processor/util/BUILD.gn
+++ b/src/trace_processor/util/BUILD.gn
@@ -156,6 +156,8 @@
 
 source_set("profile_builder") {
   sources = [
+    "annotated_callsites.cc",
+    "annotated_callsites.h",
     "profile_builder.cc",
     "profile_builder.h",
   ]
diff --git a/src/trace_processor/util/annotated_callsites.cc b/src/trace_processor/util/annotated_callsites.cc
new file mode 100644
index 0000000..3428330
--- /dev/null
+++ b/src/trace_processor/util/annotated_callsites.cc
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_processor/util/annotated_callsites.h"
+
+#include <iostream>
+
+#include "perfetto/ext/base/optional.h"
+#include "src/trace_processor/tables/profiler_tables.h"
+#include "src/trace_processor/types/trace_processor_context.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+AnnotatedCallsites::AnnotatedCallsites(const TraceProcessorContext* context)
+    : context_(*context),
+      // String to identify trampoline frames. If the string does not exist in
+      // TraceProcessor's StringPool (nullopt) then there will be no trampoline
+      // frames in the trace so there is no point in adding it to the pool to do
+      // all comparisons, instead we initialize the member to nullopt and the
+      // string comparisons will all fail.
+      art_jni_trampoline_(
+          context->storage->string_pool().GetId("art_jni_trampoline")) {}
+
+AnnotatedCallsites::State AnnotatedCallsites::GetState(
+    base::Optional<CallsiteId> id) {
+  if (!id) {
+    return State::kInitial;
+  }
+  auto it = states_.find(*id);
+  if (it != states_.end()) {
+    return it->second;
+  }
+
+  State state =
+      Get(*context_.storage->stack_profile_callsite_table().FindById(*id))
+          .first;
+  states_.emplace(*id, state);
+  return state;
+}
+
+std::pair<AnnotatedCallsites::State, CallsiteAnnotation>
+AnnotatedCallsites::Get(
+    const tables::StackProfileCallsiteTable::ConstRowReference& callsite) {
+  State state = GetState(callsite.parent_id());
+
+  // Keep immediate callee of a JNI trampoline, but keep tagging all
+  // successive libart frames as common.
+  if (state == State::kKeepNext) {
+    return {State::kEraseLibart, CallsiteAnnotation::kNone};
+  }
+
+  // Special-case "art_jni_trampoline" frames, keeping their immediate callee
+  // even if it is in libart, as it could be a native implementation of a
+  // managed method. Example for "java.lang.reflect.Method.Invoke":
+  //   art_jni_trampoline
+  //   art::Method_invoke(_JNIEnv*, _jobject*, _jobject*, _jobjectArray*)
+  //
+  // Simpleperf also relies on this frame name, so it should be fairly stable.
+  // TODO(rsavitski): consider detecting standard JNI upcall entrypoints -
+  // _JNIEnv::Call*. These are sometimes inlined into other DSOs, so erasing
+  // only the libart frames does not clean up all of the JNI-related frames.
+  auto frame = *context_.storage->stack_profile_frame_table().FindById(
+      callsite.frame_id());
+  // art_jni_trampoline_ could be nullopt if the string does not exist in the
+  // StringPool, but that also means no frame will ever have that name.
+  if (art_jni_trampoline_.has_value() &&
+      frame.name() == art_jni_trampoline_.value()) {
+    return {State::kKeepNext, CallsiteAnnotation::kCommonFrame};
+  }
+
+  MapType map_type = GetMapType(frame.mapping());
+
+  // Annotate managed frames.
+  if (map_type == MapType::kArtInterp ||  //
+      map_type == MapType::kArtJit ||     //
+      map_type == MapType::kArtAot) {
+    // Now know to be in a managed callstack - erase subsequent ART frames.
+    if (state == State::kInitial) {
+      state = State::kEraseLibart;
+    }
+
+    if (map_type == MapType::kArtInterp)
+      return {state, CallsiteAnnotation::kArtInterpreted};
+    if (map_type == MapType::kArtJit)
+      return {state, CallsiteAnnotation::kArtJit};
+    if (map_type == MapType::kArtAot)
+      return {state, CallsiteAnnotation::kArtAot};
+  }
+
+  if (state == State::kEraseLibart && map_type == MapType::kNativeLibart) {
+    states_.emplace(callsite.id(), state);
+    return {state, CallsiteAnnotation::kCommonFrame};
+  }
+
+  return {state, CallsiteAnnotation::kNone};
+}
+
+AnnotatedCallsites::MapType AnnotatedCallsites::GetMapType(MappingId id) {
+  auto it = map_types_.find(id);
+  if (it != map_types_.end()) {
+    return it->second;
+  }
+
+  return map_types_
+      .emplace(id, ClassifyMap(context_.storage->GetString(
+                       context_.storage->stack_profile_mapping_table()
+                           .FindById(id)
+                           ->name())))
+      .first->second;
+}
+
+AnnotatedCallsites::MapType AnnotatedCallsites::ClassifyMap(
+    NullTermStringView map) {
+  if (map.empty())
+    return MapType::kOther;
+
+  // Primary mapping where modern ART puts jitted code.
+  // TODO(rsavitski): look into /memfd:jit-zygote-cache.
+  if (!strncmp(map.c_str(), "/memfd:jit-cache", 16))
+    return MapType::kArtJit;
+
+  size_t last_slash_pos = map.rfind('/');
+  if (last_slash_pos != NullTermStringView::npos) {
+    if (!strncmp(map.c_str() + last_slash_pos, "/libart.so", 10))
+      return MapType::kNativeLibart;
+    if (!strncmp(map.c_str() + last_slash_pos, "/libartd.so", 11))
+      return MapType::kNativeLibart;
+  }
+
+  size_t extension_pos = map.rfind('.');
+  if (extension_pos != NullTermStringView::npos) {
+    if (!strncmp(map.c_str() + extension_pos, ".so", 3))
+      return MapType::kNativeOther;
+    // dex with verification speedup info, produced by dex2oat
+    if (!strncmp(map.c_str() + extension_pos, ".vdex", 5))
+      return MapType::kArtInterp;
+    // possibly uncompressed dex in a jar archive
+    if (!strncmp(map.c_str() + extension_pos, ".jar", 4))
+      return MapType::kArtInterp;
+    // ahead of time compiled ELFs
+    if (!strncmp(map.c_str() + extension_pos, ".oat", 4))
+      return MapType::kArtAot;
+    // older/alternative name for .oat
+    if (!strncmp(map.c_str() + extension_pos, ".odex", 5))
+      return MapType::kArtAot;
+  }
+  return MapType::kOther;
+}
+
+}  // namespace trace_processor
+}  // namespace perfetto
diff --git a/src/trace_processor/util/annotated_callsites.h b/src/trace_processor/util/annotated_callsites.h
new file mode 100644
index 0000000..4e617f6
--- /dev/null
+++ b/src/trace_processor/util/annotated_callsites.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_UTIL_ANNOTATED_CALLSITES_H_
+#define SRC_TRACE_PROCESSOR_UTIL_ANNOTATED_CALLSITES_H_
+
+#include <unordered_map>
+#include "perfetto/ext/base/optional.h"
+#include "src/trace_processor/containers/string_pool.h"
+#include "src/trace_processor/storage/trace_storage.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+class TraceProcessorContext;
+
+enum class CallsiteAnnotation {
+  kNone,
+  kCommonFrame,
+  kArtInterpreted,
+  kArtJit,
+  kArtAot,
+};
+
+// Helper class to augment callsite with (currently Android-specific)
+// annotations. A given callsite will always have the same annotation. This
+// class will internally cache already computed annotations. An annotation
+// depends only of the current callsite and the annotation sof its parent
+// callsites (going to the root).
+class AnnotatedCallsites {
+ public:
+  explicit AnnotatedCallsites(const TraceProcessorContext* context);
+
+  CallsiteAnnotation GetAnnotation(
+      const tables::StackProfileCallsiteTable::ConstRowReference& callsite) {
+    return Get(callsite).second;
+  }
+
+ private:
+  enum class MapType {
+    kArtInterp,
+    kArtJit,
+    kArtAot,
+    kNativeLibart,
+    kNativeOther,
+    kOther
+  };
+
+  // Annotation FSM states:
+  // * kInitial: default, native-only callstacks never leave this state.
+  // * kEraseLibart: we've seen a managed frame, and will now "erase" (i.e. tag
+  //                 as a common-frame) frames belonging to the ART runtime.
+  // * kKeepNext: we've seen a special JNI trampoline for managed->native
+  //              transition, keep the immediate child (even if it is in ART),
+  //              and then go back to kEraseLibart.
+  // Regardless of the state, managed frames get annotated with their execution
+  // mode, based on the mapping.
+  enum class State { kInitial, kEraseLibart, kKeepNext };
+
+  State GetState(base::Optional<CallsiteId> id);
+
+  std::pair<State, CallsiteAnnotation> Get(
+      const tables::StackProfileCallsiteTable::ConstRowReference& callsite);
+
+  MapType GetMapType(MappingId id);
+  MapType ClassifyMap(NullTermStringView map);
+
+  const TraceProcessorContext& context_;
+  const base::Optional<StringPool::Id> art_jni_trampoline_;
+
+  std::unordered_map<MappingId, MapType> map_types_;
+  std::unordered_map<CallsiteId, State> states_;
+};
+
+}  // namespace trace_processor
+}  // namespace perfetto
+
+#endif  // SRC_TRACE_PROCESSOR_UTIL_ANNOTATED_CALLSITES_H_
diff --git a/src/trace_processor/util/profile_builder.cc b/src/trace_processor/util/profile_builder.cc
index 9459644..ec25432 100644
--- a/src/trace_processor/util/profile_builder.cc
+++ b/src/trace_processor/util/profile_builder.cc
@@ -17,6 +17,7 @@
 #include "src/trace_processor/util/profile_builder.h"
 #include <algorithm>
 #include <cstdint>
+#include <iostream>
 #include <iterator>
 
 #include "perfetto/base/logging.h"
@@ -26,9 +27,29 @@
 #include "perfetto/ext/trace_processor/demangle.h"
 #include "src/trace_processor/containers/string_pool.h"
 #include "src/trace_processor/types/trace_processor_context.h"
+#include "src/trace_processor/util/annotated_callsites.h"
 
 namespace perfetto {
 namespace trace_processor {
+namespace {
+
+base::StringView ToString(CallsiteAnnotation annotation) {
+  switch (annotation) {
+    case CallsiteAnnotation::kNone:
+      return "";
+    case CallsiteAnnotation::kArtAot:
+      return "aot";
+    case CallsiteAnnotation::kArtInterpreted:
+      return "interp";
+    case CallsiteAnnotation::kArtJit:
+      return "jit";
+    case CallsiteAnnotation::kCommonFrame:
+      return "common-frame";
+  }
+  PERFETTO_FATAL("For GCC");
+}
+
+}  // namespace
 
 GProfileBuilder::StringTable::StringTable(
     protozero::HeapBuffered<third_party::perftools::profiles::pbzero::Profile>*
@@ -72,6 +93,25 @@
   return index;
 }
 
+int64_t GProfileBuilder::StringTable::GetAnnotatedString(
+    StringPool::Id str,
+    CallsiteAnnotation annotation) {
+  if (str.is_null() || annotation == CallsiteAnnotation::kNone) {
+    return InternString(str);
+  }
+  return GetAnnotatedString(string_pool_.Get(str), annotation);
+}
+
+int64_t GProfileBuilder::StringTable::GetAnnotatedString(
+    base::StringView str,
+    CallsiteAnnotation annotation) {
+  if (str.empty() || annotation == CallsiteAnnotation::kNone) {
+    return InternString(str);
+  }
+  return InternString(base::StringView(
+      str.ToStdString() + " [" + ToString(annotation).ToStdString() + "]"));
+}
+
 int64_t GProfileBuilder::StringTable::WriteString(base::StringView str) {
   result_->add_string_table(str.data(), str.size());
   return next_index_++;
@@ -80,12 +120,7 @@
 GProfileBuilder::MappingKey::MappingKey(
     const tables::StackProfileMappingTable::ConstRowReference& mapping,
     StringTable& string_table) {
-  // Round up to next 4K boundary to avoid small discrepancies.
-  constexpr uint64_t kRounding = 0x1000;
   size = static_cast<uint64_t>(mapping.end() - mapping.start());
-  size = size + kRounding - 1;
-  size = size & ~(kRounding - 1);
-
   file_offset = static_cast<uint64_t>(mapping.exact_offset());
   build_id_or_filename = string_table.InternString(mapping.build_id());
   if (build_id_or_filename == kEmptyStringIndex) {
@@ -153,11 +188,15 @@
 }
 
 GProfileBuilder::GProfileBuilder(
-    TraceProcessorContext* context,
-    const std::vector<std::pair<std::string, std::string>>& sample_types)
+    const TraceProcessorContext* context,
+    const std::vector<std::pair<std::string, std::string>>& sample_types,
+    bool annotated)
     : context_(*context),
       string_table_(&result_, &context->storage->string_pool()),
       num_sample_types_(sample_types.size()) {
+  if (annotated) {
+    annotations_.emplace(context);
+  }
   WriteSampleTypes(sample_types);
 }
 
@@ -208,6 +247,15 @@
   return result_.SerializeAsString();
 }
 
+CallsiteAnnotation GProfileBuilder::GetAnnotation(
+    const tables::StackProfileCallsiteTable::ConstRowReference& callsite) {
+  if (!annotations_) {
+    return CallsiteAnnotation::kNone;
+  }
+
+  return annotations_->GetAnnotation(callsite);
+}
+
 const protozero::PackedVarInt& GProfileBuilder::GetLocationIdsForCallsite(
     const CallsiteId& callsite_id) {
   auto it = cached_location_ids_.find(callsite_id);
@@ -225,26 +273,28 @@
     return location_ids;
   }
 
-  location_ids.Append(WriteLocationIfNeeded(start_ref->frame_id()));
+  location_ids.Append(WriteLocationIfNeeded(*start_ref));
 
   base::Optional<CallsiteId> parent_id = start_ref->parent_id();
   while (parent_id) {
     auto parent_ref = cs_table.FindById(*parent_id);
-    location_ids.Append(WriteLocationIfNeeded(parent_ref->frame_id()));
+    location_ids.Append(WriteLocationIfNeeded(*parent_ref));
     parent_id = parent_ref->parent_id();
   }
 
   return location_ids;
 }
 
-uint64_t GProfileBuilder::WriteLocationIfNeeded(const FrameId& frame_id) {
-  auto it = seen_locations_.find(frame_id);
+uint64_t GProfileBuilder::WriteLocationIfNeeded(
+    const tables::StackProfileCallsiteTable::ConstRowReference& callsite) {
+  AnnotatedFrameId key{callsite.frame_id(), GetAnnotation(callsite)};
+  auto it = seen_locations_.find(key);
   if (it != seen_locations_.end()) {
     return it->second;
   }
 
   auto& frames = context_.storage->stack_profile_frame_table();
-  auto frame = *frames.FindById(frame_id);
+  auto frame = *frames.FindById(key.frame_id);
 
   const auto& mappings = context_.storage->stack_profile_mapping_table();
   auto mapping = *mappings.FindById(frame.mapping());
@@ -252,13 +302,13 @@
 
   uint64_t& id =
       locations_[Location{mapping_id, static_cast<uint64_t>(frame.rel_pc()),
-                          GetLines(frame, mapping_id)}];
+                          GetLines(frame, key.annotation, mapping_id)}];
 
   if (id == 0) {
     id = locations_.size();
   }
 
-  seen_locations_.insert({frame_id, id});
+  seen_locations_.insert({key, id});
 
   return id;
 }
@@ -283,11 +333,12 @@
 
 std::vector<GProfileBuilder::Line> GProfileBuilder::GetLines(
     const tables::StackProfileFrameTable::ConstRowReference& frame,
+    CallsiteAnnotation annotation,
     uint64_t mapping_id) {
   std::vector<Line> lines =
-      GetLinesForSymbolSetId(frame.symbol_set_id(), mapping_id);
+      GetLinesForSymbolSetId(frame.symbol_set_id(), annotation, mapping_id);
   if (lines.empty()) {
-    uint64_t function_id = WriteFunctionIfNeeded(frame, mapping_id);
+    uint64_t function_id = WriteFunctionIfNeeded(frame, annotation, mapping_id);
     lines.push_back({function_id, 0});
   }
 
@@ -296,6 +347,7 @@
 
 std::vector<GProfileBuilder::Line> GProfileBuilder::GetLinesForSymbolSetId(
     base::Optional<uint32_t> symbol_set_id,
+    CallsiteAnnotation annotation,
     uint64_t mapping_id) {
   if (!symbol_set_id) {
     return {};
@@ -317,8 +369,8 @@
 
   std::vector<GProfileBuilder::Line> lines;
   for (const RowRef& symbol : symbol_set) {
-    lines.push_back(
-        {WriteFunctionIfNeeded(symbol, mapping_id), symbol.line_number()});
+    lines.push_back({WriteFunctionIfNeeded(symbol, annotation, mapping_id),
+                     symbol.line_number()});
   }
 
   GetMapping(mapping_id).debug_info.has_inline_frames = true;
@@ -329,8 +381,9 @@
 
 uint64_t GProfileBuilder::WriteFunctionIfNeeded(
     const tables::SymbolTable::ConstRowReference& symbol,
+    CallsiteAnnotation annotation,
     uint64_t mapping_id) {
-  int64_t name = string_table_.InternString(symbol.name());
+  int64_t name = string_table_.GetAnnotatedString(symbol.name(), annotation);
   int64_t filename = string_table_.InternString(symbol.source_file());
 
   auto ins = functions_.insert(
@@ -351,29 +404,36 @@
 
 uint64_t GProfileBuilder::WriteFunctionIfNeeded(
     const tables::StackProfileFrameTable::ConstRowReference& frame,
+    CallsiteAnnotation annotation,
     uint64_t mapping_id) {
-  auto it = seen_functions_.find(frame.id());
+  AnnotatedFrameId key{frame.id(), annotation};
+  auto it = seen_functions_.find(key);
   if (it != seen_functions_.end()) {
     return it->second;
   }
 
   int64_t system_name = string_table_.InternString(frame.name());
-  int64_t name = 0;
+  int64_t name = kEmptyStringIndex;
 
   if (frame.deobfuscated_name()) {
-    name = string_table_.InternString(*frame.deobfuscated_name());
+    name = string_table_.GetAnnotatedString(*frame.deobfuscated_name(),
+                                            annotation);
   } else if (system_name != kEmptyStringIndex) {
     std::unique_ptr<char, base::FreeDeleter> demangled =
         demangle::Demangle(context_.storage->GetString(frame.name()).c_str());
     if (demangled) {
-      name = string_table_.InternString(demangled.get());
+      name = string_table_.GetAnnotatedString(demangled.get(), annotation);
+    } else {
+      // demangling failed, expected if the name wasn't mangled. In any case
+      // reuse the system_name as this is what UI will usually display.
+      name = string_table_.GetAnnotatedString(frame.name(), annotation);
     }
   }
 
   auto ins = functions_.insert(
       {Function{name, system_name, kEmptyStringIndex}, functions_.size() + 1});
   uint64_t id = ins.first->second;
-  seen_functions_.insert({frame.id(), id});
+  seen_functions_.insert({key, id});
 
   if (ins.second &&
       (name != kEmptyStringIndex || system_name != kEmptyStringIndex)) {
diff --git a/src/trace_processor/util/profile_builder.h b/src/trace_processor/util/profile_builder.h
index 7eb1357..efec89d 100644
--- a/src/trace_processor/util/profile_builder.h
+++ b/src/trace_processor/util/profile_builder.h
@@ -25,9 +25,11 @@
 #include "src/trace_processor/containers/string_pool.h"
 #include "src/trace_processor/storage/trace_storage.h"
 #include "src/trace_processor/tables/profiler_tables.h"
+#include "src/trace_processor/util/annotated_callsites.h"
 
 #include <algorithm>
 #include <cstdint>
+#include <functional>
 #include <unordered_map>
 #include <vector>
 
@@ -40,9 +42,20 @@
 class GProfileBuilder {
  public:
   // |sample_types| A description of the values stored with each sample.
+  // |annotated| Whether to annotate callstack frames.
+  //
+  // Important: Annotations might interfere with certain aggregations, as we
+  // will could have a frame that is annotated with different annotations. That
+  // will lead to multiple functions being generated (sane name, line etc, but
+  // different annotation). Since there is no field in a Profile proto to track
+  // these annotations we extend the function name (my_func [annotation]), so
+  // from pprof perspective we now have different functions. So in flame graphs
+  // for example you will have one separate slice for each of these same
+  // functions with different annotations.
   GProfileBuilder(
-      TraceProcessorContext* context,
-      const std::vector<std::pair<std::string, std::string>>& sample_types);
+      const TraceProcessorContext* context,
+      const std::vector<std::pair<std::string, std::string>>& sample_types,
+      bool annotated);
   ~GProfileBuilder();
   void AddSample(uint32_t callsite_id, const protozero::PackedVarInt& values);
 
@@ -79,6 +92,11 @@
     // of writing a message to the proto.
     int64_t InternString(StringPool::Id id);
 
+    int64_t GetAnnotatedString(StringPool::Id str,
+                               CallsiteAnnotation annotation);
+    int64_t GetAnnotatedString(base::StringView str,
+                               CallsiteAnnotation annotation);
+
    private:
     // Unconditionally writes the given string to the table and returns its
     // index.
@@ -95,6 +113,22 @@
     int64_t next_index_{0};
   };
 
+  struct AnnotatedFrameId {
+    struct Hash {
+      size_t operator()(const AnnotatedFrameId& id) const {
+        return static_cast<size_t>(perfetto::base::Hash::Combine(
+            id.frame_id.value, static_cast<int>(id.annotation)));
+      }
+    };
+
+    FrameId frame_id;
+    CallsiteAnnotation annotation;
+
+    bool operator==(const AnnotatedFrameId& other) const {
+      return frame_id == other.frame_id && annotation == other.annotation;
+    }
+  };
+
   struct Line {
     uint64_t function_id;
     int64_t line;
@@ -195,9 +229,8 @@
   struct Function {
     struct Hash {
       size_t operator()(const Function& func) const {
-        perfetto::base::Hash hasher;
-        hasher.UpdateAll(func.name, func.system_name, func.filename);
-        return static_cast<size_t>(hasher.digest());
+        return static_cast<size_t>(perfetto::base::Hash::Combine(
+            func.name, func.system_name, func.filename));
       }
     };
 
@@ -211,24 +244,33 @@
     }
   };
 
+  CallsiteAnnotation GetAnnotation(
+      const tables::StackProfileCallsiteTable::ConstRowReference& callsite);
+
   const protozero::PackedVarInt& GetLocationIdsForCallsite(
       const CallsiteId& callsite_id);
 
   std::vector<Line> GetLinesForSymbolSetId(
       base::Optional<uint32_t> symbol_set_id,
+      CallsiteAnnotation annotation,
       uint64_t mapping_id);
 
   std::vector<Line> GetLines(
       const tables::StackProfileFrameTable::ConstRowReference& frame,
+      CallsiteAnnotation annotation,
       uint64_t mapping_id);
 
-  uint64_t WriteLocationIfNeeded(const FrameId& frame_id);
+  uint64_t WriteLocationIfNeeded(
+      const tables::StackProfileCallsiteTable::ConstRowReference& callsite);
 
   uint64_t WriteFunctionIfNeeded(
       const tables::SymbolTable::ConstRowReference& symbol,
+      CallsiteAnnotation annotation,
+
       uint64_t mapping_id);
   uint64_t WriteFunctionIfNeeded(
       const tables::StackProfileFrameTable::ConstRowReference& frame,
+      CallsiteAnnotation annotation,
       uint64_t mapping_id);
 
   uint64_t WriteMappingIfNeeded(
@@ -255,12 +297,13 @@
   protozero::HeapBuffered<third_party::perftools::profiles::pbzero::Profile>
       result_;
 
-  TraceProcessorContext& context_;
+  const TraceProcessorContext& context_;
   StringTable string_table_;
 
   const size_t num_sample_types_;
 
   bool finalized_{false};
+  base::Optional<AnnotatedCallsites> annotations_;
 
   // Caches a CallsiteId (callstack) to the list of locations emitted to the
   // profile.
@@ -268,9 +311,11 @@
 
   // Helpers to map TraceProcessor rows to already written Profile entities
   // (their ids).
-  std::unordered_map<FrameId, uint64_t> seen_locations_;
+  std::unordered_map<AnnotatedFrameId, uint64_t, AnnotatedFrameId::Hash>
+      seen_locations_;
+  std::unordered_map<AnnotatedFrameId, uint64_t, AnnotatedFrameId::Hash>
+      seen_functions_;
   std::unordered_map<MappingId, uint64_t> seen_mappings_;
-  std::unordered_map<FrameId, uint64_t> seen_functions_;
 
   // Helpers to deduplicate entries. Map entity to its id. These also serve as a
   // staging area until written out to the profile proto during `Finalize`. Ids