Merge "tp: Add Distinct to CEngine API" into main
diff --git a/src/trace_processor/db/column/arrangement_overlay.cc b/src/trace_processor/db/column/arrangement_overlay.cc
index 17063c9..b7f3dfe 100644
--- a/src/trace_processor/db/column/arrangement_overlay.cc
+++ b/src/trace_processor/db/column/arrangement_overlay.cc
@@ -19,6 +19,7 @@
 #include <algorithm>
 #include <cstdint>
 #include <memory>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -146,6 +147,24 @@
   inner_->StableSort(start, end, direction);
 }
 
+void ArrangementOverlay::ChainImpl::Distinct(Indices& indices) const {
+  PERFETTO_TP_TRACE(metatrace::Category::DB,
+                    "ArrangementOverlay::ChainImpl::Distinct");
+  // TODO(mayzner): Utilize `does_arrangmeent_order_storage_`.
+  std::unordered_set<uint32_t> s;
+  indices.tokens.erase(
+      std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+                     [this, &s](Indices::Token& idx) {
+                       if (s.insert(idx.index).second) {
+                         idx.index = (*arrangement_)[idx.index];
+                         return false;
+                       }
+                       return true;
+                     }),
+      indices.tokens.end());
+  inner_->Distinct(indices);
+}
+
 void ArrangementOverlay::ChainImpl::Serialize(StorageProto* storage) const {
   auto* arrangement_overlay = storage->set_arrangement_overlay();
   arrangement_overlay->set_values(
diff --git a/src/trace_processor/db/column/arrangement_overlay.h b/src/trace_processor/db/column/arrangement_overlay.h
index 2bc61cb..3f9e4ca 100644
--- a/src/trace_processor/db/column/arrangement_overlay.h
+++ b/src/trace_processor/db/column/arrangement_overlay.h
@@ -72,6 +72,8 @@
                     SortToken* end,
                     SortDirection) const override;
 
+    void Distinct(Indices&) const override;
+
     void Serialize(StorageProto*) const override;
 
     uint32_t size() const override {
diff --git a/src/trace_processor/db/column/arrangement_overlay_unittest.cc b/src/trace_processor/db/column/arrangement_overlay_unittest.cc
index 3a26f0c..55f310b 100644
--- a/src/trace_processor/db/column/arrangement_overlay_unittest.cc
+++ b/src/trace_processor/db/column/arrangement_overlay_unittest.cc
@@ -109,8 +109,7 @@
 
 TEST(ArrangementOverlay, OrderingSearch) {
   std::vector<uint32_t> arrangement{0, 2, 4, 1, 3};
-  auto fake = FakeStorageChain::SearchSubset(
-      5, BitVector({false, true, false, true, false}));
+  auto fake = FakeStorageChain::SearchSubset(5, BitVector({0, 1, 0, 1, 0}));
   ArrangementOverlay storage(&arrangement, Indices::State::kNonmonotonic);
   auto chain =
       storage.MakeChain(std::move(fake), DataLayer::ChainCreationArgs(true));
@@ -141,5 +140,21 @@
               ElementsAre(0, 3, 1, 4, 2));
 }
 
+TEST(ArrangementOverlay, Distinct) {
+  std::vector<uint32_t> numeric_data{10, 20, 10, 20, 30};
+  NumericStorage<uint32_t> numeric(&numeric_data, ColumnType::kUint32, false);
+
+  // 30, 30, 10, 20, 10, 20
+  std::vector<uint32_t> arrangement{4, 4, 0, 1, 2, 3};
+  ArrangementOverlay storage(&arrangement, Indices::State::kNonmonotonic);
+  auto chain = storage.MakeChain(numeric.MakeChain());
+
+  // 30, 30, 20, 20
+  auto indices = Indices::CreateWithIndexPayloadForTesting(
+      {0, 1, 3, 3}, Indices::State::kNonmonotonic);
+  chain->Distinct(indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 2));
+}
+
 }  // namespace
 }  // namespace perfetto::trace_processor::column
diff --git a/src/trace_processor/db/column/data_layer.h b/src/trace_processor/db/column/data_layer.h
index c8d74a8..85e93b9 100644
--- a/src/trace_processor/db/column/data_layer.h
+++ b/src/trace_processor/db/column/data_layer.h
@@ -283,6 +283,14 @@
                           SortToken* end,
                           SortDirection direction) const = 0;
 
+  // Removes all indices pointing to values that are duplicates, as a result the
+  // indices will only point to distinct (not duplicated) values.
+  //
+  // Notes for implementors:
+  // * Each layer that might introduce duplicates is responsible for removing
+  // them.
+  virtual void Distinct(Indices&) const = 0;
+
   // Serializes storage data to proto format.
   virtual void Serialize(StorageProto*) const = 0;
 
diff --git a/src/trace_processor/db/column/dense_null_overlay.cc b/src/trace_processor/db/column/dense_null_overlay.cc
index 55003fa..50a5a7e 100644
--- a/src/trace_processor/db/column/dense_null_overlay.cc
+++ b/src/trace_processor/db/column/dense_null_overlay.cc
@@ -250,6 +250,35 @@
   }
 }
 
+void DenseNullOverlay::ChainImpl::Distinct(Indices& indices) const {
+  PERFETTO_TP_TRACE(metatrace::Category::DB,
+                    "DenseNullOverlay::ChainImpl::Distinct");
+  // Find first NULL.
+  auto first_null_it = std::find_if(
+      indices.tokens.begin(), indices.tokens.end(),
+      [this](const Indices::Token& t) { return !non_null_->IsSet(t.index); });
+
+  // Save first NULL.
+  std::optional<Indices::Token> null_tok;
+  if (first_null_it != indices.tokens.end()) {
+    null_tok = *first_null_it;
+  }
+
+  // Erase all NULLs.
+  indices.tokens.erase(std::remove_if(first_null_it, indices.tokens.end(),
+                                      [this](const Indices::Token& idx) {
+                                        return !non_null_->IsSet(idx.index);
+                                      }),
+                       indices.tokens.end());
+
+  inner_->Distinct(indices);
+
+  // Add the only null as it is distinct value.
+  if (null_tok.has_value()) {
+    indices.tokens.push_back(*null_tok);
+  }
+}
+
 void DenseNullOverlay::ChainImpl::Serialize(StorageProto* storage) const {
   auto* null_overlay = storage->set_dense_null_overlay();
   non_null_->Serialize(null_overlay->set_bit_vector());
diff --git a/src/trace_processor/db/column/dense_null_overlay.h b/src/trace_processor/db/column/dense_null_overlay.h
index b27beb5..1edb84e 100644
--- a/src/trace_processor/db/column/dense_null_overlay.h
+++ b/src/trace_processor/db/column/dense_null_overlay.h
@@ -64,6 +64,8 @@
                     SortToken* end,
                     SortDirection) const override;
 
+    void Distinct(Indices&) const override;
+
     void Serialize(StorageProto*) const override;
 
     uint32_t size() const override { return non_null_->size(); }
diff --git a/src/trace_processor/db/column/dense_null_overlay_unittest.cc b/src/trace_processor/db/column/dense_null_overlay_unittest.cc
index efda157..24b1eee 100644
--- a/src/trace_processor/db/column/dense_null_overlay_unittest.cc
+++ b/src/trace_processor/db/column/dense_null_overlay_unittest.cc
@@ -35,6 +35,7 @@
 
 using testing::ElementsAre;
 using testing::IsEmpty;
+using testing::UnorderedElementsAre;
 
 using Indices = DataLayerChain::Indices;
 using OrderedIndices = DataLayerChain::OrderedIndices;
@@ -237,5 +238,22 @@
   }
 }
 
+TEST(DenseNullOverlay, Distinct) {
+  std::vector<uint32_t> numeric_data{0, 3, 0, 1, 0, 2, 4};
+  NumericStorage<uint32_t> numeric(&numeric_data, ColumnType::kUint32, false);
+
+  // NULL, 1, NULL, 1, 0, 2, 4
+  BitVector null{0, 1, 0, 1, 1, 1, 1};
+  DenseNullOverlay overlay(&null);
+  auto chain = overlay.MakeChain(numeric.MakeChain());
+
+  // NULL, 0, 1, 1
+  auto indices = Indices::CreateWithIndexPayloadForTesting(
+      {0, 1, 3, 3}, Indices::State::kNonmonotonic);
+  chain->Distinct(indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              UnorderedElementsAre(0, 1, 2));
+}
+
 }  // namespace
 }  // namespace perfetto::trace_processor::column
diff --git a/src/trace_processor/db/column/dummy_storage.cc b/src/trace_processor/db/column/dummy_storage.cc
index 27b464f..70418e7 100644
--- a/src/trace_processor/db/column/dummy_storage.cc
+++ b/src/trace_processor/db/column/dummy_storage.cc
@@ -62,6 +62,10 @@
   PERFETTO_FATAL("Shouldn't be called");
 }
 
+void DummyStorage::ChainImpl::Distinct(Indices&) const {
+  PERFETTO_FATAL("Shouldn't be called");
+}
+
 uint32_t DummyStorage::ChainImpl::size() const {
   return 0;
 }
diff --git a/src/trace_processor/db/column/dummy_storage.h b/src/trace_processor/db/column/dummy_storage.h
index 80674bb..08bd319 100644
--- a/src/trace_processor/db/column/dummy_storage.h
+++ b/src/trace_processor/db/column/dummy_storage.h
@@ -53,6 +53,8 @@
                     SortToken* end,
                     SortDirection) const override;
 
+    void Distinct(Indices&) const override;
+
     void Serialize(StorageProto*) const override;
 
     uint32_t size() const override;
diff --git a/src/trace_processor/db/column/fake_storage.cc b/src/trace_processor/db/column/fake_storage.cc
index babfb7c..8a7cb48 100644
--- a/src/trace_processor/db/column/fake_storage.cc
+++ b/src/trace_processor/db/column/fake_storage.cc
@@ -149,6 +149,12 @@
   PERFETTO_FATAL("For GCC");
 }
 
+void FakeStorageChain::Distinct(Indices&) const {
+  // Fake storage shouldn't implement Distinct as it's not a binary (this index
+  // passes or not) operation on a column.
+  PERFETTO_FATAL("Not implemented");
+}
+
 void FakeStorageChain::StableSort(SortToken*, SortToken*, SortDirection) const {
   PERFETTO_FATAL("Not implemented");
 }
diff --git a/src/trace_processor/db/column/fake_storage.h b/src/trace_processor/db/column/fake_storage.h
index bc02300..50a981d 100644
--- a/src/trace_processor/db/column/fake_storage.h
+++ b/src/trace_processor/db/column/fake_storage.h
@@ -92,6 +92,8 @@
                   SortToken* end,
                   SortDirection) const override;
 
+  void Distinct(Indices&) const override;
+
   void Serialize(StorageProto*) const override;
 
   uint32_t size() const override { return size_; }
diff --git a/src/trace_processor/db/column/fake_storage_unittest.cc b/src/trace_processor/db/column/fake_storage_unittest.cc
index 0bc0211..908b7e0 100644
--- a/src/trace_processor/db/column/fake_storage_unittest.cc
+++ b/src/trace_processor/db/column/fake_storage_unittest.cc
@@ -119,6 +119,40 @@
   }
 }
 
+TEST(FakeStorage, Search) {
+  {
+    // All passes
+    auto fake = FakeStorageChain::SearchAll(5);
+    auto ret = fake->Search(FilterOp::kEq, SqlValue(), Range(1, 3));
+    ASSERT_THAT(utils::ToIndexVectorForTests(ret), ElementsAre(1, 2));
+  }
+  {
+    // None passes
+    auto fake = FakeStorageChain::SearchNone(5);
+    auto ret = fake->Search(FilterOp::kEq, SqlValue(), Range(1, 3));
+    ASSERT_THAT(utils::ToIndexVectorForTests(ret), ElementsAre());
+  }
+  {
+    // Index vector
+    auto fake =
+        FakeStorageChain::SearchSubset(5, std::vector<uint32_t>{1, 2, 4, 5});
+    auto ret = fake->Search(FilterOp::kEq, SqlValue(), Range(0, 3));
+    ASSERT_THAT(utils::ToIndexVectorForTests(ret), ElementsAre(1, 2));
+  }
+  {
+    // BitVector
+    auto fake = FakeStorageChain::SearchSubset(5, BitVector{0, 1, 0, 1, 0});
+    auto ret = fake->Search(FilterOp::kEq, SqlValue(), Range(1, 3));
+    ASSERT_THAT(utils::ToIndexVectorForTests(ret), ElementsAre(1));
+  }
+  {
+    // Range
+    auto fake = FakeStorageChain::SearchSubset(5, Range(2, 4));
+    auto ret = fake->Search(FilterOp::kEq, SqlValue(), Range(1, 3));
+    ASSERT_THAT(utils::ToIndexVectorForTests(ret), ElementsAre(2));
+  }
+}
+
 TEST(FakeStorage, IndexSearchValidated) {
   {
     // All passes
diff --git a/src/trace_processor/db/column/id_storage.cc b/src/trace_processor/db/column/id_storage.cc
index e3f4d8a..1f1ee76 100644
--- a/src/trace_processor/db/column/id_storage.cc
+++ b/src/trace_processor/db/column/id_storage.cc
@@ -22,6 +22,7 @@
 #include <iterator>
 #include <limits>
 #include <string>
+#include <unordered_set>
 #include <utility>
 
 #include "perfetto/base/logging.h"
@@ -313,6 +314,8 @@
 void IdStorage::ChainImpl::StableSort(SortToken* start,
                                       SortToken* end,
                                       SortDirection direction) const {
+  PERFETTO_TP_TRACE(metatrace::Category::DB,
+                    "IdStorage::ChainImpl::StableSort");
   switch (direction) {
     case SortDirection::kAscending:
       std::stable_sort(start, end, [](const SortToken& a, const SortToken& b) {
@@ -328,6 +331,17 @@
   PERFETTO_FATAL("For GCC");
 }
 
+void IdStorage::ChainImpl::Distinct(Indices& indices) const {
+  PERFETTO_TP_TRACE(metatrace::Category::DB, "IdStorage::ChainImpl::Distinct");
+  std::unordered_set<uint32_t> s;
+  indices.tokens.erase(
+      std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+                     [&s](const Indices::Token& idx) {
+                       return !s.insert(idx.index).second;
+                     }),
+      indices.tokens.end());
+}
+
 void IdStorage::ChainImpl::Serialize(StorageProto* storage) const {
   storage->set_id_storage();
 }
diff --git a/src/trace_processor/db/column/id_storage.h b/src/trace_processor/db/column/id_storage.h
index 51b9f0d..b25f7f8 100644
--- a/src/trace_processor/db/column/id_storage.h
+++ b/src/trace_processor/db/column/id_storage.h
@@ -63,6 +63,8 @@
                     SortToken* end,
                     SortDirection) const override;
 
+    void Distinct(Indices&) const override;
+
     void Serialize(StorageProto*) const override;
 
     uint32_t size() const override {
diff --git a/src/trace_processor/db/column/id_storage_unittest.cc b/src/trace_processor/db/column/id_storage_unittest.cc
index 2d6fd44..3e00ff4 100644
--- a/src/trace_processor/db/column/id_storage_unittest.cc
+++ b/src/trace_processor/db/column/id_storage_unittest.cc
@@ -373,6 +373,16 @@
               ElementsAre(4, 3, 2, 1, 0));
 }
 
+TEST(IdStorage, Distinct) {
+  IdStorage storage;
+  auto chain = storage.MakeChain();
+
+  auto indices = Indices::CreateWithIndexPayloadForTesting(
+      {1, 1, 0, 3}, Indices::State::kNonmonotonic);
+  chain->Distinct(indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 2, 3));
+}
+
 }  // namespace
 }  // namespace column
 }  // namespace perfetto::trace_processor
diff --git a/src/trace_processor/db/column/null_overlay.cc b/src/trace_processor/db/column/null_overlay.cc
index b1f8f45..78b0ae8 100644
--- a/src/trace_processor/db/column/null_overlay.cc
+++ b/src/trace_processor/db/column/null_overlay.cc
@@ -20,6 +20,7 @@
 #include <cstdint>
 #include <iterator>
 #include <memory>
+#include <optional>
 #include <utility>
 #include <vector>
 
@@ -104,9 +105,9 @@
   PERFETTO_FATAL("For GCC");
 }
 
-NullOverlay::ChainImpl::ChainImpl(std::unique_ptr<DataLayerChain> innner,
+NullOverlay::ChainImpl::ChainImpl(std::unique_ptr<DataLayerChain> inner,
                                   const BitVector* non_null)
-    : inner_(std::move(innner)), non_null_(non_null) {
+    : inner_(std::move(inner)), non_null_(non_null) {
   PERFETTO_DCHECK(non_null_->CountSetBits() <= inner_->size());
 }
 
@@ -279,6 +280,8 @@
 void NullOverlay::ChainImpl::StableSort(SortToken* start,
                                         SortToken* end,
                                         SortDirection direction) const {
+  PERFETTO_TP_TRACE(metatrace::Category::DB,
+                    "NullOverlay::ChainImpl::StableSort");
   SortToken* middle = std::stable_partition(
       start, end,
       [this](const SortToken& idx) { return !non_null_->IsSet(idx.index); });
@@ -291,6 +294,40 @@
   }
 }
 
+void NullOverlay::ChainImpl::Distinct(Indices& indices) const {
+  PERFETTO_TP_TRACE(metatrace::Category::DB,
+                    "NullOverlay::ChainImpl::Distinct");
+  // Find first NULL.
+  auto first_null_it = std::find_if(
+      indices.tokens.begin(), indices.tokens.end(),
+      [this](const Indices::Token& t) { return !non_null_->IsSet(t.index); });
+
+  // Save first NULL.
+  std::optional<Indices::Token> null_tok;
+  if (first_null_it != indices.tokens.end()) {
+    null_tok = *first_null_it;
+  }
+
+  // Erase all NULLs.
+  indices.tokens.erase(std::remove_if(first_null_it, indices.tokens.end(),
+                                      [this](const Indices::Token& idx) {
+                                        return !non_null_->IsSet(idx.index);
+                                      }),
+                       indices.tokens.end());
+
+  // Update index of each token so they all point to inner.
+  for (auto& token : indices.tokens) {
+    token.index = non_null_->CountSetBits(token.index);
+  }
+
+  inner_->Distinct(indices);
+
+  // Add the only null as it is distinct value.
+  if (null_tok.has_value()) {
+    indices.tokens.push_back(*null_tok);
+  }
+}
+
 void NullOverlay::ChainImpl::Serialize(StorageProto* storage) const {
   auto* null_storage = storage->set_null_overlay();
   non_null_->Serialize(null_storage->set_bit_vector());
diff --git a/src/trace_processor/db/column/null_overlay.h b/src/trace_processor/db/column/null_overlay.h
index 625959d..5f7d341 100644
--- a/src/trace_processor/db/column/null_overlay.h
+++ b/src/trace_processor/db/column/null_overlay.h
@@ -63,6 +63,8 @@
                     SortToken* end,
                     SortDirection) const override;
 
+    void Distinct(Indices&) const override;
+
     void Serialize(StorageProto*) const override;
 
     uint32_t size() const override { return non_null_->size(); }
diff --git a/src/trace_processor/db/column/null_overlay_unittest.cc b/src/trace_processor/db/column/null_overlay_unittest.cc
index 8f15746..981c7bd 100644
--- a/src/trace_processor/db/column/null_overlay_unittest.cc
+++ b/src/trace_processor/db/column/null_overlay_unittest.cc
@@ -35,6 +35,7 @@
 
 using testing::ElementsAre;
 using testing::IsEmpty;
+using testing::UnorderedElementsAre;
 
 using Indices = DataLayerChain::Indices;
 using OrderedIndices = DataLayerChain::OrderedIndices;
@@ -276,5 +277,22 @@
   }
 }
 
+TEST(NullOverlay, Distinct) {
+  std::vector<uint32_t> numeric_data{0, 0, 1, 1, 2};
+  NumericStorage<uint32_t> numeric(&numeric_data, ColumnType::kUint32, false);
+
+  // NULL, 0, NULL, 0, 1, 1, 2
+  BitVector null{0, 1, 0, 1, 1, 1, 1};
+  NullOverlay overlay(&null);
+  auto chain = overlay.MakeChain(numeric.MakeChain());
+
+  // NULL, 0, 1, 1, 2
+  auto indices = Indices::CreateWithIndexPayloadForTesting(
+      {0, 1, 4, 5, 6}, Indices::State::kNonmonotonic);
+  chain->Distinct(indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              UnorderedElementsAre(0, 1, 2, 4));
+}
+
 }  // namespace
 }  // namespace perfetto::trace_processor::column
diff --git a/src/trace_processor/db/column/numeric_storage.h b/src/trace_processor/db/column/numeric_storage.h
index 821fd40..77fbd2e 100644
--- a/src/trace_processor/db/column/numeric_storage.h
+++ b/src/trace_processor/db/column/numeric_storage.h
@@ -19,13 +19,16 @@
 #include <cstdint>
 #include <limits>
 #include <memory>
+#include <set>
 #include <string>
+#include <unordered_set>
 #include <variant>
 #include <vector>
 
 #include "perfetto/base/compiler.h"
 #include "perfetto/trace_processor/basic_types.h"
 #include "src/trace_processor/containers/bit_vector.h"
+#include "src/trace_processor/containers/row_map.h"
 #include "src/trace_processor/db/column/data_layer.h"
 #include "src/trace_processor/db/column/types.h"
 #include "src/trace_processor/db/column/utils.h"
@@ -103,6 +106,16 @@
       return utils::SingleSearchNumeric(op, (*vector_)[i], sql_val);
     }
 
+    void Distinct(Indices& indices) const override {
+      std::unordered_set<T> s;
+      indices.tokens.erase(
+          std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+                         [&s, this](const Indices::Token& idx) {
+                           return !s.insert((*vector_)[idx.index]).second;
+                         }),
+          indices.tokens.end());
+    }
+
     void StableSort(SortToken* start,
                     SortToken* end,
                     SortDirection direction) const override {
diff --git a/src/trace_processor/db/column/numeric_storage_unittest.cc b/src/trace_processor/db/column/numeric_storage_unittest.cc
index 46ef920..e80c634 100644
--- a/src/trace_processor/db/column/numeric_storage_unittest.cc
+++ b/src/trace_processor/db/column/numeric_storage_unittest.cc
@@ -766,6 +766,19 @@
   }
 }
 
+TEST(NumericStorage, DistinctFromIndexVector) {
+  std::vector<int64_t> data{
+      1, 100, 2, 100, 2,
+  };
+  NumericStorage<int64_t> storage(&data, ColumnType::kInt64, false);
+  auto chain = storage.MakeChain();
+
+  auto indices = Indices::CreateWithIndexPayloadForTesting(
+      {2, 1, 0, 3}, Indices::State::kNonmonotonic);
+  chain->Distinct(indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 1, 2));
+}
+
 }  // namespace
 }  // namespace column
 }  // namespace perfetto::trace_processor
diff --git a/src/trace_processor/db/column/range_overlay.cc b/src/trace_processor/db/column/range_overlay.cc
index 84f19ce..65ba7cb 100644
--- a/src/trace_processor/db/column/range_overlay.cc
+++ b/src/trace_processor/db/column/range_overlay.cc
@@ -143,6 +143,14 @@
   inner_->StableSort(start, end, direction);
 }
 
+void RangeOverlay::ChainImpl::Distinct(Indices& indices) const {
+  PERFETTO_TP_TRACE(metatrace::Category::DB, "RangeOverlay::Distinct");
+  for (auto& token : indices.tokens) {
+    token.index += range_->start;
+  }
+  inner_->Distinct(indices);
+}
+
 void RangeOverlay::ChainImpl::Serialize(StorageProto*) const {
   PERFETTO_FATAL("Not implemented");
 }
diff --git a/src/trace_processor/db/column/range_overlay.h b/src/trace_processor/db/column/range_overlay.h
index 3adb319..128d94a 100644
--- a/src/trace_processor/db/column/range_overlay.h
+++ b/src/trace_processor/db/column/range_overlay.h
@@ -60,6 +60,8 @@
                     SortToken* end,
                     SortDirection) const override;
 
+    void Distinct(Indices&) const override;
+
     void Serialize(StorageProto*) const override;
 
     uint32_t size() const override { return range_->size(); }
diff --git a/src/trace_processor/db/column/range_overlay_unittest.cc b/src/trace_processor/db/column/range_overlay_unittest.cc
index 240e998..22ea1b0 100644
--- a/src/trace_processor/db/column/range_overlay_unittest.cc
+++ b/src/trace_processor/db/column/range_overlay_unittest.cc
@@ -126,5 +126,20 @@
   ASSERT_THAT(utils::ExtractPayloadForTesting(tokens), ElementsAre(1, 2, 0));
 }
 
+TEST(RangeOverlay, Distinct) {
+  std::vector<uint32_t> numeric_data{100, 99, 2, 0, 1};
+  NumericStorage<uint32_t> numeric(&numeric_data, ColumnType::kUint32, false);
+
+  // 99, 2, 0, 1
+  Range range(1, 4);
+  RangeOverlay storage(&range);
+  auto chain = storage.MakeChain(numeric.MakeChain());
+
+  auto indices = Indices::CreateWithIndexPayloadForTesting(
+      {0, 0, 0}, Indices::State::kNonmonotonic);
+  chain->Distinct(indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0));
+}
+
 }  // namespace
 }  // namespace perfetto::trace_processor::column
diff --git a/src/trace_processor/db/column/selector_overlay.cc b/src/trace_processor/db/column/selector_overlay.cc
index 999e4bb..0577ae4 100644
--- a/src/trace_processor/db/column/selector_overlay.cc
+++ b/src/trace_processor/db/column/selector_overlay.cc
@@ -123,6 +123,13 @@
   inner_->StableSort(start, end, direction);
 }
 
+void SelectorOverlay::ChainImpl::Distinct(Indices& indices) const {
+  for (auto& token : indices.tokens) {
+    token.index = selector_->IndexOfNthSet(token.index);
+  }
+  inner_->Distinct(indices);
+}
+
 void SelectorOverlay::ChainImpl::Serialize(StorageProto* storage) const {
   auto* selector_overlay = storage->set_selector_overlay();
   inner_->Serialize(selector_overlay->set_storage());
diff --git a/src/trace_processor/db/column/selector_overlay.h b/src/trace_processor/db/column/selector_overlay.h
index 82e2fe3..84c3ec3 100644
--- a/src/trace_processor/db/column/selector_overlay.h
+++ b/src/trace_processor/db/column/selector_overlay.h
@@ -64,6 +64,8 @@
                     SortToken* end,
                     SortDirection) const override;
 
+    void Distinct(Indices&) const override;
+
     void Serialize(StorageProto*) const override;
 
     uint32_t size() const override { return selector_->size(); }
diff --git a/src/trace_processor/db/column/set_id_storage.cc b/src/trace_processor/db/column/set_id_storage.cc
index ae1c167..95825ac 100644
--- a/src/trace_processor/db/column/set_id_storage.cc
+++ b/src/trace_processor/db/column/set_id_storage.cc
@@ -22,7 +22,9 @@
 #include <iterator>
 #include <limits>
 #include <memory>
+#include <set>
 #include <string>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -252,6 +254,8 @@
     FilterOp op,
     SqlValue sql_val,
     const OrderedIndices& indices) const {
+  PERFETTO_TP_TRACE(metatrace::Category::DB,
+                    "SetIdStorage::ChainImpl::OrderedIndexSearch");
   // OrderedIndices are monotonic non-contiguous values.
   auto res = SearchValidated(
       op, sql_val, Range(indices.data[0], indices.data[indices.size - 1] + 1));
@@ -303,6 +307,8 @@
 void SetIdStorage::ChainImpl::StableSort(SortToken* start,
                                          SortToken* end,
                                          SortDirection direction) const {
+  PERFETTO_TP_TRACE(metatrace::Category::DB,
+                    "SetIdStorage::ChainImpl::StableSort");
   switch (direction) {
     case SortDirection::kAscending:
       std::stable_sort(start, end,
@@ -319,6 +325,18 @@
   }
 }
 
+void SetIdStorage::ChainImpl::Distinct(Indices& indices) const {
+  PERFETTO_TP_TRACE(metatrace::Category::DB,
+                    "SetIdStorage::ChainImpl::Distinct");
+  std::unordered_set<uint32_t> s;
+  indices.tokens.erase(
+      std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+                     [&s, this](const Indices::Token& idx) {
+                       return !s.insert((*values_)[idx.index]).second;
+                     }),
+      indices.tokens.end());
+}
+
 void SetIdStorage::ChainImpl::Serialize(StorageProto* msg) const {
   auto* vec_msg = msg->set_set_id_storage();
   vec_msg->set_values(reinterpret_cast<const uint8_t*>(values_->data()),
diff --git a/src/trace_processor/db/column/set_id_storage.h b/src/trace_processor/db/column/set_id_storage.h
index e5bdb76..e408f5f 100644
--- a/src/trace_processor/db/column/set_id_storage.h
+++ b/src/trace_processor/db/column/set_id_storage.h
@@ -64,6 +64,8 @@
 
     void Serialize(StorageProto*) const override;
 
+    void Distinct(Indices&) const override;
+
     uint32_t size() const override {
       return static_cast<uint32_t>(values_->size());
     }
diff --git a/src/trace_processor/db/column/set_id_storage_unittest.cc b/src/trace_processor/db/column/set_id_storage_unittest.cc
index e3c40fa..2589a3f 100644
--- a/src/trace_processor/db/column/set_id_storage_unittest.cc
+++ b/src/trace_processor/db/column/set_id_storage_unittest.cc
@@ -417,6 +417,17 @@
   }
 }
 
+TEST(SetIdStorage, Distinct) {
+  std::vector<uint32_t> storage_data{0, 0, 0, 3, 3, 3, 6, 6, 6, 9, 9, 9};
+  SetIdStorage storage(&storage_data);
+  auto chain = storage.MakeChain();
+
+  auto indices = Indices::CreateWithIndexPayloadForTesting(
+      {10, 9, 0, 1, 4}, Indices::State::kNonmonotonic);
+  chain->Distinct(indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 2, 4));
+}
+
 }  // namespace
 }  // namespace column
 }  // namespace perfetto::trace_processor
diff --git a/src/trace_processor/db/column/string_storage.cc b/src/trace_processor/db/column/string_storage.cc
index 9ab8c1f..aa45218 100644
--- a/src/trace_processor/db/column/string_storage.cc
+++ b/src/trace_processor/db/column/string_storage.cc
@@ -20,9 +20,9 @@
 #include <cstdint>
 #include <functional>
 #include <iterator>
-#include <memory>
 #include <optional>
 #include <string>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -522,6 +522,8 @@
     FilterOp op,
     SqlValue sql_val,
     const OrderedIndices& indices) const {
+  PERFETTO_TP_TRACE(metatrace::Category::DB,
+                    "StringStorage::ChainImpl::OrderedIndexSearch");
   StringPool::Id val =
       (op == FilterOp::kIsNull || op == FilterOp::kIsNotNull)
           ? StringPool::Id::Null()
@@ -619,6 +621,8 @@
 void StringStorage::ChainImpl::StableSort(SortToken* start,
                                           SortToken* end,
                                           SortDirection direction) const {
+  PERFETTO_TP_TRACE(metatrace::Category::DB,
+                    "StringStorage::ChainImpl::StableSort");
   switch (direction) {
     case SortDirection::kAscending: {
       std::stable_sort(start, end,
@@ -669,6 +673,18 @@
   PERFETTO_FATAL("For GCC");
 }
 
+void StringStorage::ChainImpl::Distinct(Indices& indices) const {
+  PERFETTO_TP_TRACE(metatrace::Category::DB,
+                    "StringStorage::ChainImpl::Distinct");
+  std::unordered_set<StringPool::Id> s;
+  indices.tokens.erase(
+      std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+                     [&s, this](const Indices::Token& idx) {
+                       return !s.insert((*data_)[idx.index]).second;
+                     }),
+      indices.tokens.end());
+}
+
 void StringStorage::ChainImpl::Serialize(StorageProto* msg) const {
   auto* string_storage_msg = msg->set_string_storage();
   string_storage_msg->set_is_sorted(is_sorted_);
diff --git a/src/trace_processor/db/column/string_storage.h b/src/trace_processor/db/column/string_storage.h
index f037bd1..870fd68 100644
--- a/src/trace_processor/db/column/string_storage.h
+++ b/src/trace_processor/db/column/string_storage.h
@@ -65,6 +65,8 @@
                     SortToken* end,
                     SortDirection direction) const override;
 
+    void Distinct(Indices&) const override;
+
     void Serialize(StorageProto*) const override;
 
     uint32_t size() const override {
diff --git a/src/trace_processor/db/column/string_storage_unittest.cc b/src/trace_processor/db/column/string_storage_unittest.cc
index 4e06185..4474ea9 100644
--- a/src/trace_processor/db/column/string_storage_unittest.cc
+++ b/src/trace_processor/db/column/string_storage_unittest.cc
@@ -503,5 +503,22 @@
   }
 }
 
+TEST(StringStorage, DistinctFromIndexVector) {
+  std::vector<std::string> strings{"cheese", "pasta", "cheese",
+                                   "fries",  "pasta", "fries"};
+  std::vector<StringPool::Id> ids(2, StringPool::Id::Null());
+  StringPool pool;
+  for (const auto& string : strings) {
+    ids.push_back(pool.InternString(base::StringView(string)));
+  }
+  StringStorage storage(&pool, &ids);
+  auto chain = storage.MakeChain();
+
+  auto indices = Indices::CreateWithIndexPayloadForTesting(
+      {1, 0, 6, 3}, Indices::State::kNonmonotonic);
+  chain->Distinct(indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 2));
+}
+
 }  // namespace
 }  // namespace perfetto::trace_processor::column