Merge "Add default log from level option to the protolog config" into main
diff --git a/src/trace_processor/containers/bit_vector.h b/src/trace_processor/containers/bit_vector.h
index 00cec59..e53f6d9 100644
--- a/src/trace_processor/containers/bit_vector.h
+++ b/src/trace_processor/containers/bit_vector.h
@@ -165,9 +165,7 @@
   // Returns whether the bit at |idx| is set.
   bool IsSet(uint32_t idx) const {
     PERFETTO_DCHECK(idx < size());
-
-    Address addr = IndexToAddress(idx);
-    return ConstBlockFromIndex(addr.block_idx).IsSet(addr.block_offset);
+    return ConstBitWord(&words_[WordFloor(idx)]).IsSet(idx % BitWord::kBits);
   }
 
   // Returns the number of set bits in the BitVector.
diff --git a/src/trace_processor/containers/bit_vector_benchmark.cc b/src/trace_processor/containers/bit_vector_benchmark.cc
index de4e85d..bab45c6 100644
--- a/src/trace_processor/containers/bit_vector_benchmark.cc
+++ b/src/trace_processor/containers/bit_vector_benchmark.cc
@@ -94,6 +94,28 @@
 }
 BENCHMARK(BM_BitVectorAppendFalse);
 
+static void BM_BitVectorIsSet(benchmark::State& state) {
+  static constexpr uint32_t kRandomSeed = 42;
+  std::minstd_rand0 rnd_engine(kRandomSeed);
+
+  BitVector bv = BvWithSizeAndSetPercentage(8192, 50);
+
+  static constexpr uint32_t kPoolSize = 1024 * 1024;
+  std::vector<bool> bit_pool(kPoolSize);
+  std::vector<uint32_t> row_pool(kPoolSize);
+  for (uint32_t i = 0; i < kPoolSize; ++i) {
+    bit_pool[i] = rnd_engine() % 2;
+    row_pool[i] = rnd_engine() % 8192;
+  }
+
+  uint32_t pool_idx = 0;
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(bv.IsSet(row_pool[pool_idx]));
+    pool_idx = (pool_idx + 1) % kPoolSize;
+  }
+}
+BENCHMARK(BM_BitVectorIsSet);
+
 static void BM_BitVectorSet(benchmark::State& state) {
   static constexpr uint32_t kRandomSeed = 42;
   std::minstd_rand0 rnd_engine(kRandomSeed);
diff --git a/src/trace_processor/db/column/arrangement_overlay.cc b/src/trace_processor/db/column/arrangement_overlay.cc
index 9b4ce4d..7171d6c 100644
--- a/src/trace_processor/db/column/arrangement_overlay.cc
+++ b/src/trace_processor/db/column/arrangement_overlay.cc
@@ -35,9 +35,11 @@
 
 namespace perfetto::trace_processor::column {
 
-void ArrangementOverlay::Flatten(std::vector<Token>& tokens) {
-  for (auto& token : tokens) {
-    token.index = (*arrangement_)[token.index];
+void ArrangementOverlay::Flatten(uint32_t* start,
+                                 const uint32_t* end,
+                                 uint32_t stride) {
+  for (uint32_t* it = start; it < end; it += stride) {
+    *it = (*arrangement_)[*it];
   }
 }
 
diff --git a/src/trace_processor/db/column/arrangement_overlay.h b/src/trace_processor/db/column/arrangement_overlay.h
index 8d5ea24..96a0474 100644
--- a/src/trace_processor/db/column/arrangement_overlay.h
+++ b/src/trace_processor/db/column/arrangement_overlay.h
@@ -39,7 +39,7 @@
                      DataLayerChain::Indices::State arrangement_state);
   ~ArrangementOverlay() override;
 
-  void Flatten(std::vector<Token>&) override;
+  void Flatten(uint32_t* start, const uint32_t* end, uint32_t stride) override;
 
   std::unique_ptr<DataLayerChain> MakeChain(
       std::unique_ptr<DataLayerChain>,
diff --git a/src/trace_processor/db/column/dense_null_overlay.cc b/src/trace_processor/db/column/dense_null_overlay.cc
index 9f621a1..f1563e4 100644
--- a/src/trace_processor/db/column/dense_null_overlay.cc
+++ b/src/trace_processor/db/column/dense_null_overlay.cc
@@ -62,10 +62,12 @@
 }
 }  // namespace
 
-void DenseNullOverlay::Flatten(std::vector<Token>& tokens) {
-  for (auto& token : tokens) {
-    if (!non_null_->IsSet(token.index)) {
-      token.index = std::numeric_limits<uint32_t>::max();
+void DenseNullOverlay::Flatten(uint32_t* start,
+                               const uint32_t* end,
+                               uint32_t stride) {
+  for (uint32_t* it = start; it < end; it += stride) {
+    if (!non_null_->IsSet(*it)) {
+      *it = std::numeric_limits<uint32_t>::max();
     }
   }
 }
diff --git a/src/trace_processor/db/column/dense_null_overlay.h b/src/trace_processor/db/column/dense_null_overlay.h
index 2e93a49..64da7ca 100644
--- a/src/trace_processor/db/column/dense_null_overlay.h
+++ b/src/trace_processor/db/column/dense_null_overlay.h
@@ -21,7 +21,6 @@
 #include <memory>
 #include <optional>
 #include <string>
-#include <vector>
 
 #include "perfetto/trace_processor/basic_types.h"
 #include "src/trace_processor/containers/bit_vector.h"
@@ -39,7 +38,7 @@
   explicit DenseNullOverlay(const BitVector* non_null);
   ~DenseNullOverlay() override;
 
-  void Flatten(std::vector<Token>&) override;
+  void Flatten(uint32_t* start, const uint32_t* end, uint32_t stride) override;
 
   std::unique_ptr<DataLayerChain> MakeChain(
       std::unique_ptr<DataLayerChain>,
diff --git a/src/trace_processor/db/column/null_overlay.cc b/src/trace_processor/db/column/null_overlay.cc
index 6e8325d..66955a1 100644
--- a/src/trace_processor/db/column/null_overlay.cc
+++ b/src/trace_processor/db/column/null_overlay.cc
@@ -109,12 +109,14 @@
 
 }  // namespace
 
-void NullOverlay::Flatten(std::vector<Token>& tokens) {
-  for (auto& token : tokens) {
-    if (non_null_->IsSet(token.index)) {
-      token.index = non_null_->CountSetBits(token.index);
+void NullOverlay::Flatten(uint32_t* start,
+                          const uint32_t* end,
+                          uint32_t stride) {
+  for (uint32_t* it = start; it < end; it += stride) {
+    if (non_null_->IsSet(*it)) {
+      *it = non_null_->CountSetBits(*it);
     } else {
-      token.index = std::numeric_limits<uint32_t>::max();
+      *it = std::numeric_limits<uint32_t>::max();
     }
   }
 }
diff --git a/src/trace_processor/db/column/null_overlay.h b/src/trace_processor/db/column/null_overlay.h
index 86c566e..fb63d45 100644
--- a/src/trace_processor/db/column/null_overlay.h
+++ b/src/trace_processor/db/column/null_overlay.h
@@ -21,7 +21,6 @@
 #include <memory>
 #include <optional>
 #include <string>
-#include <vector>
 
 #include "perfetto/trace_processor/basic_types.h"
 #include "src/trace_processor/containers/bit_vector.h"
@@ -38,7 +37,7 @@
   explicit NullOverlay(const BitVector* non_null);
   ~NullOverlay() override;
 
-  void Flatten(std::vector<Token>&) override;
+  void Flatten(uint32_t* start, const uint32_t* end, uint32_t stride) override;
 
   std::unique_ptr<DataLayerChain> MakeChain(
       std::unique_ptr<DataLayerChain>,
diff --git a/src/trace_processor/db/column/overlay_layer.h b/src/trace_processor/db/column/overlay_layer.h
index e667cfe..fd96c9c 100644
--- a/src/trace_processor/db/column/overlay_layer.h
+++ b/src/trace_processor/db/column/overlay_layer.h
@@ -17,10 +17,9 @@
 #ifndef SRC_TRACE_PROCESSOR_DB_COLUMN_OVERLAY_LAYER_H_
 #define SRC_TRACE_PROCESSOR_DB_COLUMN_OVERLAY_LAYER_H_
 
-#include <vector>
+#include <cstdint>
 
 #include "src/trace_processor/db/column/data_layer.h"
-#include "src/trace_processor/db/column/types.h"
 
 namespace perfetto::trace_processor::column {
 
@@ -28,7 +27,16 @@
  public:
   ~OverlayLayer() override;
 
-  virtual void Flatten(std::vector<Token>&) = 0;
+  // Translates the indices separtated by |stride| between |start| and |end|
+  // through this overlay.
+  //
+  // Implementations should do something like this:
+  //   for (auto* it = start; it < end; it += stride) {
+  //     *it = (...translate the index at *it);
+  //   }
+  virtual void Flatten(uint32_t* start,
+                       const uint32_t* end,
+                       uint32_t stride) = 0;
 
  protected:
   explicit OverlayLayer(Impl impl);
diff --git a/src/trace_processor/db/column/range_overlay.cc b/src/trace_processor/db/column/range_overlay.cc
index b60e97b..b31d1ae 100644
--- a/src/trace_processor/db/column/range_overlay.cc
+++ b/src/trace_processor/db/column/range_overlay.cc
@@ -43,8 +43,12 @@
 
 }  // namespace
 
-void RangeOverlay::Flatten(std::vector<Token>& tokens) {
-  AddOffsetToTokenIndex(tokens, range_->start);
+void RangeOverlay::Flatten(uint32_t* start,
+                           const uint32_t* end,
+                           uint32_t stride) {
+  for (uint32_t* it = start; it < end; it += stride) {
+    *it += range_->start;
+  }
 }
 
 RangeOverlay::ChainImpl::ChainImpl(std::unique_ptr<DataLayerChain> inner,
diff --git a/src/trace_processor/db/column/range_overlay.h b/src/trace_processor/db/column/range_overlay.h
index f3a054c..5340fb2 100644
--- a/src/trace_processor/db/column/range_overlay.h
+++ b/src/trace_processor/db/column/range_overlay.h
@@ -35,7 +35,7 @@
   explicit RangeOverlay(const Range*);
   ~RangeOverlay() override;
 
-  void Flatten(std::vector<Token>&) override;
+  void Flatten(uint32_t* start, const uint32_t* end, uint32_t stride) override;
 
   std::unique_ptr<DataLayerChain> MakeChain(
       std::unique_ptr<DataLayerChain>,
diff --git a/src/trace_processor/db/column/selector_overlay.cc b/src/trace_processor/db/column/selector_overlay.cc
index 9868c09..af74c50 100644
--- a/src/trace_processor/db/column/selector_overlay.cc
+++ b/src/trace_processor/db/column/selector_overlay.cc
@@ -41,14 +41,12 @@
   if (selector.size() == selector.CountSetBits()) {
     return;
   }
-
   if (tokens.size() < selector.size() / kIndexOfNthSetRatio) {
     for (auto& token : tokens) {
       token.index = selector.IndexOfNthSet(token.index);
     }
     return;
   }
-
   // TODO(mayzner): once we have a reverse index for IndexOfNthSet in
   // BitVector, this should no longer be necessary.
   std::vector<uint32_t> lookup = selector.GetSetBitIndices();
@@ -57,10 +55,34 @@
   }
 }
 
+void TranslateToInnerIndices(const BitVector& selector,
+                             uint32_t* start,
+                             const uint32_t* end,
+                             uint32_t stride) {
+  if (selector.size() == selector.CountSetBits()) {
+    return;
+  }
+  auto size = static_cast<uint32_t>(end - start);
+  if (size < selector.size() / kIndexOfNthSetRatio) {
+    for (uint32_t* it = start; it < end; it += stride) {
+      *it = selector.IndexOfNthSet(*it);
+    }
+    return;
+  }
+  // TODO(mayzner): once we have a reverse index for IndexOfNthSet in
+  // BitVector, this should no longer be necessary.
+  std::vector<uint32_t> lookup = selector.GetSetBitIndices();
+  for (uint32_t* it = start; it < end; it += stride) {
+    *it = lookup[*it];
+  }
+}
+
 }  // namespace
 
-void SelectorOverlay::Flatten(std::vector<Token>& tokens) {
-  TranslateToInnerIndices(*selector_, tokens);
+void SelectorOverlay::Flatten(uint32_t* start,
+                              const uint32_t* end,
+                              uint32_t stride) {
+  TranslateToInnerIndices(*selector_, start, end, stride);
 }
 
 SelectorOverlay::ChainImpl::ChainImpl(std::unique_ptr<DataLayerChain> inner,
diff --git a/src/trace_processor/db/column/selector_overlay.h b/src/trace_processor/db/column/selector_overlay.h
index 4bf0f90..692b068 100644
--- a/src/trace_processor/db/column/selector_overlay.h
+++ b/src/trace_processor/db/column/selector_overlay.h
@@ -21,7 +21,6 @@
 #include <memory>
 #include <optional>
 #include <string>
-#include <vector>
 
 #include "perfetto/trace_processor/basic_types.h"
 #include "src/trace_processor/containers/bit_vector.h"
@@ -39,7 +38,7 @@
   explicit SelectorOverlay(const BitVector*);
   ~SelectorOverlay() override;
 
-  void Flatten(std::vector<Token>&) override;
+  void Flatten(uint32_t* start, const uint32_t* end, uint32_t stride) override;
 
   std::unique_ptr<DataLayerChain> MakeChain(
       std::unique_ptr<DataLayerChain>,