dbv2: improve sorting performance

Sorting without using std::vector<uint32_t> has atrocious performance; so much so that it's better to pay the cost and convert to indexvector every time.

Change-Id: Iae883abfac0e22f30873d70284aee634d772b201
diff --git a/src/trace_processor/containers/row_map.h b/src/trace_processor/containers/row_map.h
index 283455c..45eedf5 100644
--- a/src/trace_processor/containers/row_map.h
+++ b/src/trace_processor/containers/row_map.h
@@ -20,6 +20,7 @@
 #include <stdint.h>
 
 #include <memory>
+#include <numeric>
 #include <optional>
 #include <variant>
 #include <vector>
@@ -429,6 +430,27 @@
     NoVariantMatched();
   }
 
+  // Converts this RowMap to an index vector in the most efficient way
+  // possible.
+  std::vector<uint32_t> TakeAsIndexVector() const&& {
+    if (auto* range = std::get_if<Range>(&data_)) {
+      std::vector<uint32_t> rm(range->size());
+      std::iota(rm.begin(), rm.end(), range->start);
+      return rm;
+    }
+    if (auto* bv = std::get_if<BitVector>(&data_)) {
+      std::vector<uint32_t> rm(bv->CountSetBits());
+      for (auto it = bv->IterateSetBits(); it; it.Next()) {
+        rm[it.ordinal()] = it.index();
+      }
+      return rm;
+    }
+    if (auto* vec = std::get_if<IndexVector>(&data_)) {
+      return std::move(*vec);
+    }
+    NoVariantMatched();
+  }
+
   // Returns the iterator over the rows in this RowMap.
   Iterator IterateRows() const { return Iterator(this); }
 
diff --git a/src/trace_processor/db/query_executor.cc b/src/trace_processor/db/query_executor.cc
index 6478c19..94ff142 100644
--- a/src/trace_processor/db/query_executor.cc
+++ b/src/trace_processor/db/query_executor.cc
@@ -124,15 +124,14 @@
   if (rm->empty())
     return;
 
+  uint32_t rm_size = rm->size();
   uint32_t rm_first = rm->Get(0);
-  uint32_t rm_last = rm->Get(rm->size() - 1);
+  uint32_t rm_last = rm->Get(rm_size - 1);
   uint32_t range_size = rm_last - rm_first;
-  // If the range is less than 50% full and size() < 1024, choose the index
-  // algorithm.
-  // TODO(b/283763282):Use Overlay estimations.
-  if (rm->IsIndexVector() ||
-      (rm->size() < 1024 &&
-       (static_cast<double>(rm->size()) / range_size < 0.5))) {
+  // If the number of elements in the rowmap is small or the number of elements
+  // is less than 1/10th of the range, use indexed filtering.
+  // TODO(b/283763282): use Overlay estimations.
+  if (rm->IsIndexVector() || rm_size < 1024 || rm_size * 10 < range_size) {
     *rm = IndexSearch(c, col, rm);
     return;
   }
diff --git a/src/trace_processor/db/table.h b/src/trace_processor/db/table.h
index b8adb3d..652799e 100644
--- a/src/trace_processor/db/table.h
+++ b/src/trace_processor/db/table.h
@@ -25,6 +25,7 @@
 #include <vector>
 
 #include "perfetto/base/logging.h"
+#include "src/trace_processor/containers/row_map.h"
 #include "src/trace_processor/containers/string_pool.h"
 #include "src/trace_processor/db/column.h"
 #include "src/trace_processor/db/column_storage_overlay.h"
@@ -118,8 +119,12 @@
   RowMap FilterToRowMap(
       const std::vector<Constraint>& cs,
       RowMap::OptimizeFor optimize_for = RowMap::OptimizeFor::kMemory) const {
-    if (kUseFilterV2)
-      return QueryExecutor::FilterLegacy(this, cs);
+    if (kUseFilterV2) {
+      if (optimize_for == RowMap::OptimizeFor::kMemory) {
+        return QueryExecutor::FilterLegacy(this, cs);
+      }
+      return RowMap(QueryExecutor::FilterLegacy(this, cs).TakeAsIndexVector());
+    }
 
     RowMap rm(0, row_count_, optimize_for);
     for (const Constraint& c : cs) {