Merge "tp: binder_tracker: fix performance issue due to insert/erase heavy workload + FlatHashMap" into main
diff --git a/include/perfetto/ext/base/flat_hash_map.h b/include/perfetto/ext/base/flat_hash_map.h
index 0be192d..f4046ea 100644
--- a/include/perfetto/ext/base/flat_hash_map.h
+++ b/include/perfetto/ext/base/flat_hash_map.h
@@ -23,7 +23,6 @@
 #include "perfetto/ext/base/utils.h"
 
 #include <algorithm>
-#include <functional>
 #include <limits>
 
 namespace perfetto {
@@ -45,6 +44,11 @@
 // tsl::robin_map:            931,403,397 ns    243.622M insertions/s
 // absl::flat_hash_map:       998,013,459 ns    227.379M insertions/s
 // FollyF14FastMap:         1,181,480,602 ns    192.074M insertions/s
+//
+// TODO(primiano): the table regresses for heavy insert+erase workloads since we
+// don't clean up tombstones outside of resizes. In the limit, the entire
+// table's capacity is made up of values/tombstones, so each search has to
+// exhaustively scan the full capacity.
 
 // The structs below define the probing algorithm used to probe slots upon a
 // collision. They are guaranteed to visit all slots as our table size is always
diff --git a/include/perfetto/ext/base/hash.h b/include/perfetto/ext/base/hash.h
index 548003e..7809ed7 100644
--- a/include/perfetto/ext/base/hash.h
+++ b/include/perfetto/ext/base/hash.h
@@ -62,7 +62,7 @@
 
   // Allow hashing anything that has a |data| field, a |size| field,
   // and has the kHashable trait (e.g., base::StringView).
-  template <typename T, typename = std::enable_if<T::kHashable>>
+  template <typename T, typename = std::enable_if_t<T::kHashable>>
   void Update(const T& t) {
     Update(t.data(), t.size());
   }
diff --git a/src/trace_processor/importers/ftrace/binder_tracker.cc b/src/trace_processor/importers/ftrace/binder_tracker.cc
index 29d0c2c..48cdf58 100644
--- a/src/trace_processor/importers/ftrace/binder_tracker.cc
+++ b/src/trace_processor/importers/ftrace/binder_tracker.cc
@@ -296,7 +296,7 @@
   transaction.args_inserter = args_inserter;
   transaction.send_track_id = track_id;
   transaction.send_slice_id = insert_slice();
-  outstanding_transactions_.Insert(transaction_id, std::move(transaction));
+  outstanding_transactions_[transaction_id] = std::move(transaction);
   auto* frame = GetTidTopFrame(tid);
   if (frame) {
     if (frame->state == TxnFrame::kSndAfterBC_TRANSACTION) {
@@ -315,18 +315,16 @@
 void BinderTracker::TransactionReceived(int64_t ts,
                                         uint32_t pid,
                                         int32_t transaction_id) {
-  const OutstandingTransaction* opt_transaction =
-      outstanding_transactions_.Find(transaction_id);
-  if (!opt_transaction) {
+  auto it = outstanding_transactions_.find(transaction_id);
+  if (it == outstanding_transactions_.end()) {
     // If we don't know what type of transaction it is, we don't know how to
     // insert the slice.
     // TODO(lalitm): maybe we should insert a dummy slice anyway - seems like
     // a questionable idea to just ignore these completely.
     return;
   }
-
-  OutstandingTransaction transaction(std::move(*opt_transaction));
-  outstanding_transactions_.Erase(transaction_id);
+  OutstandingTransaction transaction(std::move(it->second));
+  outstanding_transactions_.erase(it);
 
   UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
   TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
diff --git a/src/trace_processor/importers/ftrace/binder_tracker.h b/src/trace_processor/importers/ftrace/binder_tracker.h
index f1d682c..5912904 100644
--- a/src/trace_processor/importers/ftrace/binder_tracker.h
+++ b/src/trace_processor/importers/ftrace/binder_tracker.h
@@ -95,6 +95,8 @@
   bool utid_stacks_empty() const { return utid_stacks_.size() == 0; }
 
  private:
+  TraceProcessorContext* const context_;
+
   struct OutstandingTransaction {
     bool is_reply = false;
     bool is_oneway = false;
@@ -102,10 +104,10 @@
     std::optional<TrackId> send_track_id;
     std::optional<SliceId> send_slice_id;
   };
+  // TODO(rsavitski): switch back to FlatHashMap once the latter's perf is fixed
+  // for insert+erase heavy workfloads.
+  std::unordered_map<int32_t, OutstandingTransaction> outstanding_transactions_;
 
-  TraceProcessorContext* const context_;
-
-  base::FlatHashMap<int32_t, OutstandingTransaction> outstanding_transactions_;
   struct TxnFrame {
     // The state of this thread at this stack level.
     enum State : uint32_t;