diff --git a/src/trace_processor/db/column/selector_overlay.cc b/src/trace_processor/db/column/selector_overlay.cc
index 0577ae4..4749495 100644
--- a/src/trace_processor/db/column/selector_overlay.cc
+++ b/src/trace_processor/db/column/selector_overlay.cc
@@ -33,6 +33,11 @@
 #include "protos/perfetto/trace_processor/serialization.pbzero.h"
 
 namespace perfetto::trace_processor::column {
+namespace {
+
+static constexpr uint32_t kIndexOfNthSetRatio = 32;
+
+}
 
 SelectorOverlay::ChainImpl::ChainImpl(std::unique_ptr<DataLayerChain> inner,
                                       const BitVector* selector)
@@ -124,10 +129,22 @@
 }
 
 void SelectorOverlay::ChainImpl::Distinct(Indices& indices) const {
-  for (auto& token : indices.tokens) {
-    token.index = selector_->IndexOfNthSet(token.index);
+  if (selector_->size() == selector_->CountSetBits()) {
+    return inner_->Distinct(indices);
   }
-  inner_->Distinct(indices);
+  if (indices.tokens.size() < selector_->size() / kIndexOfNthSetRatio) {
+    for (auto& token : indices.tokens) {
+      token.index = selector_->IndexOfNthSet(token.index);
+    }
+  } else {
+    // TODO(mayzner): once we have a reverse index for IndexOfNthSet in
+    // BitVector, this should no longer be necessary.
+    std::vector<uint32_t> lookup = selector_->GetSetBitIndices();
+    for (auto& token : indices.tokens) {
+      token.index = lookup[token.index];
+    }
+  }
+  return inner_->Distinct(indices);
 }
 
 void SelectorOverlay::ChainImpl::Serialize(StorageProto* storage) const {
diff --git a/src/trace_processor/db/column/types.h b/src/trace_processor/db/column/types.h
index 6ec11fa..53e5b06 100644
--- a/src/trace_processor/db/column/types.h
+++ b/src/trace_processor/db/column/types.h
@@ -99,9 +99,21 @@
 // Structured data used to determine what Trace Processor will query using
 // CEngine.
 struct Query {
+  enum class OrderType {
+    // Order should only be used for sorting.
+    kSort = 0,
+    // Distinct, `orders` signify which columns are supposed to be distinct and
+    // used for sorting.
+    kDistinctAndSort = 1,
+    // Distinct and `orders` signify only columns are supposed to be distinct,
+    // don't need additional sorting.
+    kDistinct = 2
+  };
+  OrderType distinct = OrderType::kSort;
   // Query constraints.
   std::vector<Constraint> constraints;
-  // Query order bys.
+  // Query order bys. Check distinct to know whether they should be used for
+  // sorting.
   std::vector<Order> orders;
 };
 
diff --git a/src/trace_processor/db/table.cc b/src/trace_processor/db/table.cc
index 0505bf4..6b02336 100644
--- a/src/trace_processor/db/table.cc
+++ b/src/trace_processor/db/table.cc
@@ -38,6 +38,10 @@
 
 namespace perfetto::trace_processor {
 
+namespace {
+using Indices = column::DataLayerChain::Indices;
+}
+
 Table::Table(StringPool* pool,
              uint32_t row_count,
              std::vector<ColumnLegacy> columns,
@@ -88,8 +92,6 @@
 }
 
 RowMap Table::QueryToRowMap(const Query& q) const {
-  const auto& cs = q.constraints;
-  const auto& ob = q.orders;
   // We need to delay creation of the chains to this point because of Chrome
   // does not want the binary size overhead of including the chain
   // implementations. As they also don't query tables (instead just iterating)
@@ -102,9 +104,52 @@
     CreateChains();
   }
 
-  RowMap rm = QueryExecutor::FilterLegacy(this, cs);
-  if (ob.empty())
+  // Apply the query constraints.
+  RowMap rm = QueryExecutor::FilterLegacy(this, q.constraints);
+
+  const auto& ob = q.orders;
+
+  // If the `orders` is empty, there is no need to sort or run DISTINCT.
+  if (ob.empty()) {
+    PERFETTO_DCHECK(q.distinct == Query::OrderType::kSort);
     return rm;
+  }
+
+  if (q.distinct != Query::OrderType::kSort) {
+    // `q.orders` should be treated here only as information on what should we
+    // run distinct on, they should not be used for subsequent sorting.
+    // TODO(mayzner): Remove the check after we implement the multi column
+    // distinct.
+    PERFETTO_DCHECK(ob.size() == 1);
+
+    std::vector<uint32_t> table_indices = std::move(rm).TakeAsIndexVector();
+
+    auto indices = Indices::Create(table_indices, Indices::State::kMonotonic);
+    ChainForColumn(ob.front().col_idx).Distinct(indices);
+
+    PERFETTO_DCHECK(indices.tokens.size() <= table_indices.size());
+    for (uint32_t i = 0; i < indices.tokens.size(); ++i) {
+      table_indices[i] = indices.tokens[i].payload;
+    }
+    table_indices.resize(indices.tokens.size());
+
+    if (q.distinct == Query::OrderType::kDistinct) {
+      return RowMap(std::move(table_indices));
+    }
+    PERFETTO_DCHECK(q.distinct == Query::OrderType::kDistinctAndSort);
+
+    // Sorting that happens later might require indices to preserve ordering.
+    // TODO(mayzner): Needs to be changed after implementing multi column
+    // distinct.
+    std::sort(table_indices.begin(), table_indices.end());
+    rm = RowMap(std::move(table_indices));
+  }
+
+  // We don't need to sort for `kUnsorted`, as the `q.orders` were only as
+  // information about which column should we run distinct on.
+  if (q.distinct == Query::OrderType::kDistinct) {
+    return rm;
+  }
 
   // Return the RowMap directly if there is a single constraint to sort the
   // table by a column which is already sorted.
diff --git a/src/trace_processor/perfetto_sql/intrinsics/table_functions/descendant.cc b/src/trace_processor/perfetto_sql/intrinsics/table_functions/descendant.cc
index d1b965f..4d3ab12 100644
--- a/src/trace_processor/perfetto_sql/intrinsics/table_functions/descendant.cc
+++ b/src/trace_processor/perfetto_sql/intrinsics/table_functions/descendant.cc
@@ -137,8 +137,9 @@
           start_id_uint, slices, std::move(descendants));
     }
     case Type::kSliceByStack: {
-      auto sbs_cs = {slices.stack_id().eq(start_id)};
-      for (auto it = slices.FilterToIterator(Query{sbs_cs, {}}); it; ++it) {
+      Query q;
+      q.constraints = {slices.stack_id().eq(start_id)};
+      for (auto it = slices.FilterToIterator(q); it; ++it) {
         RETURN_IF_ERROR(GetDescendants(slices, it.id(), descendants));
       }
       return ExtendWithStartId<tables::DescendantSliceByStackTable>(
diff --git a/src/trace_processor/perfetto_sql/intrinsics/table_functions/flamegraph_construction_algorithms.cc b/src/trace_processor/perfetto_sql/intrinsics/table_functions/flamegraph_construction_algorithms.cc
index 4ad0b9a..d47f834 100644
--- a/src/trace_processor/perfetto_sql/intrinsics/table_functions/flamegraph_construction_algorithms.cc
+++ b/src/trace_processor/perfetto_sql/intrinsics/table_functions/flamegraph_construction_algorithms.cc
@@ -368,8 +368,9 @@
   // 2. Create set of all utids mapped to the given vector of upids
   std::unordered_set<UniqueTid> utids;
   {
-    auto it = storage->thread_table().FilterToIterator(
-        Query{{storage->thread_table().upid().is_not_null()}, {}});
+    Query q;
+    q.constraints = {storage->thread_table().upid().is_not_null()};
+    auto it = storage->thread_table().FilterToIterator(q);
     for (; it; ++it) {
       if (upids.count(*it.upid())) {
         utids.emplace(it.id().value);
@@ -393,7 +394,9 @@
   }
   std::vector<uint32_t> cs_rows;
   {
-    auto it = storage->perf_sample_table().FilterToIterator(Query{cs, {}});
+    Query q;
+    q.constraints = cs;
+    auto it = storage->perf_sample_table().FilterToIterator(q);
     for (; it; ++it) {
       if (utids.find(it.utid()) != utids.end()) {
         cs_rows.push_back(it.row_number().row_number());
diff --git a/src/trace_processor/sqlite/db_sqlite_table.cc b/src/trace_processor/sqlite/db_sqlite_table.cc
index dc64f34..9c5f73a 100644
--- a/src/trace_processor/sqlite/db_sqlite_table.cc
+++ b/src/trace_processor/sqlite/db_sqlite_table.cc
@@ -156,27 +156,29 @@
   return SQLITE_OK;
 }
 
-int UpdateConstraintsAndOrderByFromIndex(DbSqliteModule::Cursor* c,
-                                         const char* idx_str,
-                                         sqlite3_value** argv) {
+int ReadIdxStr(DbSqliteModule::Cursor* cursor,
+               const char* idx_str,
+               sqlite3_value** argv) {
   base::StringSplitter splitter(idx_str, ',');
+
   PERFETTO_CHECK(splitter.Next());
   PERFETTO_DCHECK(splitter.cur_token_size() >= 2);
   PERFETTO_DCHECK(splitter.cur_token()[0] == 'C');
 
-  uint32_t cs_count = *base::CStringToUInt32(splitter.cur_token() + 1);
+  Query q;
 
-  // We reuse this vector to reduce memory allocations on nested subqueries.
+  uint32_t cs_count = *base::CStringToUInt32(splitter.cur_token() + 1);
   uint32_t c_offset = 0;
-  c->query.constraints.resize(cs_count);
-  for (auto& cs : c->query.constraints) {
+  q.constraints.resize(cs_count);
+
+  for (auto& cs : q.constraints) {
     PERFETTO_CHECK(splitter.Next());
     cs.col_idx = *base::CStringToUInt32(splitter.cur_token());
     PERFETTO_CHECK(splitter.Next());
     cs.op = static_cast<FilterOp>(*base::CStringToUInt32(splitter.cur_token()));
 
     if (int ret = SqliteValueToSqlValueChecked(&cs.value, argv[c_offset++], cs,
-                                               c->pVtab);
+                                               cursor->pVtab);
         ret != SQLITE_OK) {
       return ret;
     }
@@ -188,14 +190,21 @@
 
   uint32_t ob_count = *base::CStringToUInt32(splitter.cur_token() + 1);
 
-  // We reuse this vector to reduce memory allocations on nested subqueries.
-  c->query.orders.resize(ob_count);
-  for (auto& ob : c->query.orders) {
+  q.orders.resize(ob_count);
+  for (auto& ob : q.orders) {
     PERFETTO_CHECK(splitter.Next());
     ob.col_idx = *base::CStringToUInt32(splitter.cur_token());
     PERFETTO_CHECK(splitter.Next());
     ob.desc = *base::CStringToUInt32(splitter.cur_token());
   }
+
+  PERFETTO_CHECK(splitter.Next());
+  PERFETTO_DCHECK(splitter.cur_token_size() == 2);
+  PERFETTO_DCHECK(splitter.cur_token()[0] == 'D');
+  q.distinct = static_cast<Query::OrderType>(
+      *base::CStringToUInt32(splitter.cur_token() + 1));
+
+  cursor->query = std::move(q);
   return SQLITE_OK;
 }
 
@@ -488,7 +497,19 @@
     ob_idxes.resize(ob_idxes.size() - static_cast<uint32_t>(pop_count));
   }
 
-  std::string cs_idx_str;
+  // Create index string. It contains information query Trace Processor will
+  // have to run. It can be split into 3 segments: C (constraints), O (orders)
+  // and D (distinct). It can be directly mapped into `Query` type. The number
+  // after C and O signifies how many constraints/orders there are. The number
+  // after D maps to the Query::Distinct enum value.
+  // "C2,0,0,2,1,O1,0,1,D1" maps to:
+  // - two constraints: kEq on first column and kNe on third column
+  // - one order by: descending on first column
+  // - kUnsorted distinct.
+
+  // Constraints:
+  std::string idx_str = "C";
+  idx_str += std::to_string(cs_idxes.size());
   for (int i : cs_idxes) {
     const auto& c = info->aConstraint[i];
     auto& o = info->aConstraintUsage[i];
@@ -498,16 +519,14 @@
     auto op = SqliteOpToFilterOp(c.op);
     PERFETTO_DCHECK(op);
 
-    cs_idx_str += ',';
-    cs_idx_str += std::to_string(c.iColumn);
-    cs_idx_str += ',';
-    cs_idx_str += std::to_string(static_cast<uint32_t>(*op));
+    idx_str += ',';
+    idx_str += std::to_string(c.iColumn);
+    idx_str += ',';
+    idx_str += std::to_string(static_cast<uint32_t>(*op));
   }
-
-  std::string idx_str = "C";
-  idx_str += std::to_string(cs_idxes.size());
-  idx_str += cs_idx_str;
   idx_str += ",";
+
+  // Orders:
   idx_str += "O";
   idx_str += std::to_string(ob_idxes.size());
   for (int i : ob_idxes) {
@@ -516,9 +535,36 @@
     idx_str += ',';
     idx_str += std::to_string(info->aOrderBy[i].desc);
   }
+  idx_str += ",";
+
+  // Distinct:
+  idx_str += "D";
+  if (ob_idxes.size() == 1) {
+    switch (sqlite3_vtab_distinct(info)) {
+      case 0:
+      case 1:
+        idx_str += std::to_string(static_cast<int>(Query::OrderType::kSort));
+        break;
+      case 2:
+        idx_str +=
+            std::to_string(static_cast<int>(Query::OrderType::kDistinct));
+        break;
+      case 3:
+        idx_str += std::to_string(
+            static_cast<int>(Query::OrderType::kDistinctAndSort));
+        break;
+      default:
+        PERFETTO_FATAL("Invalid sqlite3_vtab_distinct result");
+    }
+  } else {
+    // TODO(mayzner): Remove this if condition after implementing multicolumn
+    // distinct.
+    idx_str += std::to_string(static_cast<int>(Query::OrderType::kSort));
+  }
+
+  info->idxStr = sqlite3_mprintf("%s", idx_str.c_str());
 
   info->idxNum = t->best_index_num++;
-  info->idxStr = sqlite3_mprintf("%s", idx_str.c_str());
   info->needToFreeIdxStr = true;
 
   // We can sort on any column correctly.
@@ -581,8 +627,7 @@
       }
     }
   } else {
-    if (int r = UpdateConstraintsAndOrderByFromIndex(c, idx_str, argv + offset);
-        r != SQLITE_OK) {
+    if (int r = ReadIdxStr(c, idx_str, argv + offset); r != SQLITE_OK) {
       return r;
     }
     c->last_idx_num = idx_num;
