Revert "trace_processor: collapse sched slice table to a flat table"
This reverts commit 9dacc8523a6274cc2b29ceed93fcfc3c61e75e1d.
Reason for revert: <INSERT REASONING HERE>
Change-Id: I2935efcad9e71c8a47a25833429aa6383fb9efc9
diff --git a/src/trace_processor/sched_slice_table.cc b/src/trace_processor/sched_slice_table.cc
index ee0bff7..352d244 100644
--- a/src/trace_processor/sched_slice_table.cc
+++ b/src/trace_processor/sched_slice_table.cc
@@ -101,8 +101,12 @@
          "ts UNSIGNED BIG INT, "
          "cpu UNSIGNED INT, "
          "dur UNSIGNED BIG INT, "
+         "quantized_group UNSIGNED BIG INT, "
          "utid UNSIGNED INT, "
          "cycles UNSIGNED BIG INT, "
+         "quantum HIDDEN BIG INT, "
+         "ts_lower_bound HIDDEN BIG INT, "
+         "ts_clip HIDDEN BOOLEAN, "
          "PRIMARY KEY(cpu, ts)"
          ") WITHOUT ROWID;";
 }
@@ -116,16 +120,82 @@
   bool is_time_constrained = false;
   for (size_t i = 0; i < qc.constraints().size(); i++) {
     const auto& cs = qc.constraints()[i];
-    if (cs.iColumn == Column::kTimestamp)
+
+    // Omit SQLite constraint checks on the hidden columns, so the client can
+    // write queries of the form "quantum=x" "ts_lower_bound=x" "ts_clip=true".
+    // Disallow any other constraint on these columns.
+    if (cs.iColumn == Column::kTimestampLowerBound ||
+        cs.iColumn == Column::kQuantizedGroup ||
+        cs.iColumn == Column::kClipTimestamp) {
+      if (!IsOpEq(cs.op))
+        return SQLITE_CONSTRAINT_FUNCTION;
+      info->omit[i] = true;
+    }
+
+    if (cs.iColumn == Column::kTimestampLowerBound ||
+        cs.iColumn == Column::kTimestamp) {
       is_time_constrained = true;
+    }
   }
 
   info->estimated_cost = is_time_constrained ? 10 : 10000;
-  info->order_by_consumed = true;
+
+  bool is_quantized_group_order_desc = false;
+  bool is_duration_timestamp_order = false;
+  for (const auto& ob : qc.order_by()) {
+    switch (ob.iColumn) {
+      case Column::kQuantizedGroup:
+        if (ob.desc)
+          is_quantized_group_order_desc = true;
+        break;
+      case Column::kTimestamp:
+      case Column::kDuration:
+        is_duration_timestamp_order = true;
+        break;
+      case Column::kCpu:
+        break;
+
+      // Can't order on hidden columns.
+      case Column::kQuantum:
+      case Column::kTimestampLowerBound:
+      case Column::kClipTimestamp:
+        return SQLITE_CONSTRAINT_FUNCTION;
+    }
+  }
+
+  bool has_quantum_constraint = false;
+  for (const auto& cs : qc.constraints()) {
+    if (cs.iColumn == Column::kQuantum)
+      has_quantum_constraint = true;
+  }
+
+  // If a quantum constraint is present, we don't support native ordering by
+  // time related parameters or by quantized group in descending order.
+  bool needs_sqlite_orderby =
+      has_quantum_constraint &&
+      (is_duration_timestamp_order || is_quantized_group_order_desc);
+
+  info->order_by_consumed = !needs_sqlite_orderby;
 
   return SQLITE_OK;
 }
 
+int SchedSliceTable::FindFunction(const char* name,
+                                  FindFunctionFn fn,
+                                  void** args) {
+  // Add an identity match function to prevent throwing an exception when
+  // matching on the quantum column.
+  if (strcmp(name, "match") == 0) {
+    *fn = [](sqlite3_context* ctx, int n, sqlite3_value** v) {
+      PERFETTO_DCHECK(n == 2 && sqlite3_value_type(v[0]) == SQLITE_INTEGER);
+      sqlite3_result_int64(ctx, sqlite3_value_int64(v[0]));
+    };
+    *args = nullptr;
+    return 1;
+  }
+  return 0;
+}
+
 SchedSliceTable::Cursor::Cursor(const TraceStorage* storage)
     : storage_(storage) {}
 
@@ -136,34 +206,71 @@
 }
 
 int SchedSliceTable::Cursor::Next() {
-  filter_state_->FindNextSlice();
+  auto* state = filter_state_->StateForCpu(filter_state_->next_cpu());
+  state->FindNextSlice();
+  filter_state_->FindCpuWithNextSlice();
   return SQLITE_OK;
 }
 
 int SchedSliceTable::Cursor::Eof() {
-  return !filter_state_->IsNextRowIdIndexValid();
+  return !filter_state_->IsNextCpuValid();
 }
 
 int SchedSliceTable::Cursor::Column(sqlite3_context* context, int N) {
-  PERFETTO_DCHECK(filter_state_->IsNextRowIdIndexValid());
+  if (!filter_state_->IsNextCpuValid())
+    return SQLITE_ERROR;
 
-  size_t row = filter_state_->next_row_id();
-  const auto& slices = storage_->slices();
+  uint64_t quantum = filter_state_->quantum();
+  uint32_t cpu = filter_state_->next_cpu();
+  const auto* state = filter_state_->StateForCpu(cpu);
+  size_t row = state->next_row_id();
+  const auto& slices = storage_->SlicesForCpu(cpu);
   switch (N) {
     case Column::kTimestamp: {
-      uint64_t ts = slices.start_ns()[row];
-      sqlite3_result_int64(context, static_cast<sqlite3_int64>(ts));
+      uint64_t timestamp = state->next_timestamp();
+      timestamp = std::max(timestamp, state->ts_clip_min());
+      sqlite3_result_int64(context, static_cast<sqlite3_int64>(timestamp));
       break;
     }
     case Column::kCpu: {
-      sqlite3_result_int(context, static_cast<int>(slices.cpus()[row]));
+      sqlite3_result_int(context, static_cast<int>(cpu));
       break;
     }
     case Column::kDuration: {
-      uint64_t duration = slices.durations()[row];
+      uint64_t duration;
+      if (quantum == 0) {
+        duration = slices.durations()[row];
+        uint64_t start_ns = state->next_timestamp();
+        uint64_t end_ns = start_ns + duration;
+        uint64_t clip_trim_ns = 0;
+        if (state->ts_clip_min() > start_ns)
+          clip_trim_ns += state->ts_clip_min() - start_ns;
+        if (end_ns > state->ts_clip_max())
+          clip_trim_ns += end_ns - state->ts_clip_min();
+        duration -= std::min(clip_trim_ns, duration);
+      } else {
+        uint64_t start_quantised_group = state->next_timestamp() / quantum;
+        uint64_t end = slices.start_ns()[row] + slices.durations()[row];
+        uint64_t next_group_start = (start_quantised_group + 1) * quantum;
+
+        // Compute the minimum of the start of the next group boundary and the
+        // end of this slice.
+        uint64_t min_slice_end = std::min<uint64_t>(end, next_group_start);
+        duration = min_slice_end - state->next_timestamp();
+      }
       sqlite3_result_int64(context, static_cast<sqlite3_int64>(duration));
       break;
     }
+    case Column::kQuantizedGroup: {
+      auto group = quantum == 0 ? state->next_timestamp()
+                                : state->next_timestamp() / quantum;
+      sqlite3_result_int64(context, static_cast<sqlite3_int64>(group));
+      break;
+    }
+    case Column::kQuantum: {
+      sqlite3_result_int64(context, static_cast<sqlite3_int64>(quantum));
+      break;
+    }
     case Column::kUtid: {
       sqlite3_result_int64(context, slices.utids()[row]);
       break;
@@ -187,6 +294,8 @@
 
   uint64_t min_ts = 0;
   uint64_t max_ts = kUint64Max;
+  uint64_t ts_lower_bound = 0;
+  bool ts_clip = false;
 
   for (size_t i = 0; i < query_constraints.constraints().size(); i++) {
     const auto& cs = query_constraints.constraints()[i];
@@ -194,6 +303,15 @@
       case Column::kCpu:
         PopulateFilterBitmap(cs.op, argv[i], &cpu_filter);
         break;
+      case Column::kQuantum:
+        quantum_ = static_cast<uint64_t>(sqlite3_value_int64(argv[i]));
+        break;
+      case Column::kTimestampLowerBound:
+        ts_lower_bound = static_cast<uint64_t>(sqlite3_value_int64(argv[i]));
+        break;
+      case Column::kClipTimestamp:
+        ts_clip = sqlite3_value_int(argv[i]) ? true : false;
+        break;
       case Column::kTimestamp: {
         auto ts = static_cast<uint64_t>(sqlite3_value_int64(argv[i]));
         if (IsOpGe(cs.op) || IsOpGt(cs.op)) {
@@ -205,22 +323,92 @@
       }
     }
   }
-  SetupSortedRowIds(min_ts, max_ts);
 
-  // Filter rows on CPUs if any CPUs need to be excluded.
-  const auto& slices = storage_->slices();
-  row_filter_.resize(sorted_row_ids_.size(), true);
-  if (cpu_filter.count() < cpu_filter.size()) {
-    for (size_t i = 0; i < sorted_row_ids_.size(); i++) {
-      row_filter_[i] = cpu_filter.test(slices.cpus()[sorted_row_ids_[i]]);
-    }
+  if (ts_clip) {
+    PERFETTO_DCHECK(ts_lower_bound == 0);
+    if (ts_lower_bound)
+      PERFETTO_ELOG("Cannot use ts_lower_bound and ts_clip together");
+    ts_lower_bound = min_ts;
+    min_ts = 0;
   }
-  FindNextRowAndTimestamp();
+
+  // If the query specifies a lower bound on ts, find that bound across all
+  // CPUs involved in the query and turn that into a min_ts constraint.
+  // ts_lower_bound is defined as the largest timestamp < X, or if none, the
+  // smallest timestamp >= X.
+  if (ts_lower_bound > 0) {
+    uint64_t largest_ts_before = 0;
+    uint64_t smallest_ts_after = kUint64Max;
+    for (uint32_t cpu = 0; cpu < base::kMaxCpus; cpu++) {
+      if (!cpu_filter.test(cpu))
+        continue;
+      const auto& start_ns = storage_->SlicesForCpu(cpu).start_ns();
+      // std::lower_bound will find the first timestamp >= |ts_lower_bound|.
+      // From there we need to move one back, if possible.
+      auto it =
+          std::lower_bound(start_ns.begin(), start_ns.end(), ts_lower_bound);
+      if (std::distance(start_ns.begin(), it) > 0)
+        it--;
+      if (it == start_ns.end())
+        continue;
+      if (*it < ts_lower_bound) {
+        largest_ts_before = std::max(largest_ts_before, *it);
+      } else {
+        smallest_ts_after = std::min(smallest_ts_after, *it);
+      }
+    }
+    uint64_t lower_bound = std::min(largest_ts_before, smallest_ts_after);
+    min_ts = std::max(min_ts, lower_bound);
+  }  // if (ts_lower_bound)
+
+  // Setup CPU filtering because the trace storage is indexed by CPU.
+  for (uint32_t cpu = 0; cpu < base::kMaxCpus; cpu++) {
+    if (!cpu_filter.test(cpu))
+      continue;
+    uint64_t ts_clip_min = ts_clip ? min_ts : 0;
+    uint64_t ts_clip_max = ts_clip ? max_ts : kUint64Max;
+    StateForCpu(cpu)->Initialize(
+        cpu, storage_, quantum_, ts_clip_min, ts_clip_max,
+        CreateSortedIndexVectorForCpu(cpu, min_ts, max_ts));
+  }
+
+  // Set the cpu index to be the first item to look at.
+  FindCpuWithNextSlice();
 }
 
-void SchedSliceTable::FilterState::SetupSortedRowIds(uint64_t min_ts,
-                                                     uint64_t max_ts) {
-  const auto& slices = storage_->slices();
+void SchedSliceTable::FilterState::FindCpuWithNextSlice() {
+  next_cpu_ = base::kMaxCpus;
+
+  for (uint32_t cpu = 0; cpu < base::kMaxCpus; cpu++) {
+    const auto& cpu_state = per_cpu_state_[cpu];
+    if (!cpu_state.IsNextRowIdIndexValid())
+      continue;
+
+    // The first CPU with a valid slice can be set to the next CPU.
+    if (next_cpu_ == base::kMaxCpus) {
+      next_cpu_ = cpu;
+      continue;
+    }
+
+    // If the current CPU is ordered before the current "next" CPU, then update
+    // the cpu value.
+    int cmp = CompareCpuToNextCpu(cpu);
+    if (cmp < 0)
+      next_cpu_ = cpu;
+  }
+}
+
+int SchedSliceTable::FilterState::CompareCpuToNextCpu(uint32_t cpu) {
+  size_t next_row = per_cpu_state_[next_cpu_].next_row_id();
+  size_t row = per_cpu_state_[cpu].next_row_id();
+  return CompareSlices(cpu, row, next_cpu_, next_row);
+}
+
+std::vector<uint32_t>
+SchedSliceTable::FilterState::CreateSortedIndexVectorForCpu(uint32_t cpu,
+                                                            uint64_t min_ts,
+                                                            uint64_t max_ts) {
+  const auto& slices = storage_->SlicesForCpu(cpu);
   const auto& start_ns = slices.start_ns();
   PERFETTO_CHECK(slices.slice_count() <= std::numeric_limits<uint32_t>::max());
 
@@ -229,22 +417,26 @@
   ptrdiff_t dist = std::distance(min_it, max_it);
   PERFETTO_CHECK(dist >= 0 && static_cast<size_t>(dist) <= start_ns.size());
 
+  std::vector<uint32_t> indices(static_cast<size_t>(dist));
+
   // Fill |indices| with the consecutive row numbers affected by the filtering.
-  sorted_row_ids_.resize(static_cast<size_t>(dist));
-  std::iota(sorted_row_ids_.begin(), sorted_row_ids_.end(),
+  std::iota(indices.begin(), indices.end(),
             std::distance(start_ns.begin(), min_it));
 
-  // Sort if there is any order by constraints.
-  if (!order_by_.empty()) {
-    std::sort(
-        sorted_row_ids_.begin(), sorted_row_ids_.end(),
-        [this](uint32_t f, uint32_t s) { return CompareSlices(f, s) < 0; });
-  }
+  // In other cases, sort by the given criteria.
+  std::sort(indices.begin(), indices.end(),
+            [this, cpu](uint32_t f, uint32_t s) {
+              return CompareSlices(cpu, f, cpu, s) < 0;
+            });
+  return indices;
 }
 
-int SchedSliceTable::FilterState::CompareSlices(size_t f_idx, size_t s_idx) {
+int SchedSliceTable::FilterState::CompareSlices(uint32_t f_cpu,
+                                                size_t f_idx,
+                                                uint32_t s_cpu,
+                                                size_t s_idx) {
   for (const auto& ob : order_by_) {
-    int c = CompareSlicesOnColumn(f_idx, s_idx, ob);
+    int c = CompareSlicesOnColumn(f_cpu, f_idx, s_cpu, s_idx, ob);
     if (c != 0)
       return c;
   }
@@ -252,37 +444,85 @@
 }
 
 int SchedSliceTable::FilterState::CompareSlicesOnColumn(
+    uint32_t f_cpu,
     size_t f_idx,
+    uint32_t s_cpu,
     size_t s_idx,
     const QueryConstraints::OrderBy& ob) {
-  const auto& sl = storage_->slices();
+  const auto& f_sl = storage_->SlicesForCpu(f_cpu);
+  const auto& s_sl = storage_->SlicesForCpu(s_cpu);
   switch (ob.iColumn) {
+    case SchedSliceTable::Column::kQuantum:
+    case SchedSliceTable::Column::kTimestampLowerBound:
+      PERFETTO_CHECK(false);
     case SchedSliceTable::Column::kTimestamp:
-      return Compare(sl.start_ns()[f_idx], sl.start_ns()[s_idx], ob.desc);
+      return Compare(f_sl.start_ns()[f_idx], s_sl.start_ns()[s_idx], ob.desc);
     case SchedSliceTable::Column::kDuration:
-      return Compare(sl.durations()[f_idx], sl.durations()[s_idx], ob.desc);
+      return Compare(f_sl.durations()[f_idx], s_sl.durations()[s_idx], ob.desc);
     case SchedSliceTable::Column::kCpu:
-      return Compare(sl.cpus()[f_idx], sl.cpus()[s_idx], ob.desc);
+      return Compare(f_cpu, s_cpu, ob.desc);
     case SchedSliceTable::Column::kUtid:
-      return Compare(sl.utids()[f_idx], sl.utids()[s_idx], ob.desc);
+      return Compare(f_sl.utids()[f_idx], s_sl.utids()[s_idx], ob.desc);
     case SchedSliceTable::Column::kCycles:
-      return Compare(sl.cycles()[f_idx], sl.cycles()[s_idx], ob.desc);
+      return Compare(f_sl.cycles()[f_idx], s_sl.cycles()[s_idx], ob.desc);
+    case SchedSliceTable::Column::kQuantizedGroup: {
+      // We don't support sorting in descending order on quantized group when
+      // we have a non-zero quantum.
+      PERFETTO_CHECK(!ob.desc || quantum_ == 0);
+
+      uint64_t f_timestamp = StateForCpu(f_cpu)->next_timestamp();
+      uint64_t s_timestamp = StateForCpu(s_cpu)->next_timestamp();
+
+      uint64_t f_group = quantum_ == 0 ? f_timestamp : f_timestamp / quantum_;
+      uint64_t s_group = quantum_ == 0 ? s_timestamp : s_timestamp / quantum_;
+      return Compare(f_group, s_group, ob.desc);
+    }
   }
   PERFETTO_FATAL("Unexpected column %d", ob.iColumn);
 }
 
-void SchedSliceTable::FilterState::FindNextSlice() {
-  next_row_id_index_++;
-  FindNextRowAndTimestamp();
+void SchedSliceTable::PerCpuState::Initialize(
+    uint32_t cpu,
+    const TraceStorage* storage,
+    uint64_t quantum,
+    uint64_t ts_clip_min,
+    uint64_t ts_clip_max,
+    std::vector<uint32_t> sorted_row_ids) {
+  cpu_ = cpu;
+  storage_ = storage;
+  quantum_ = quantum;
+  ts_clip_min_ = ts_clip_min;
+  ts_clip_max_ = ts_clip_max;
+  sorted_row_ids_ = std::move(sorted_row_ids);
+  UpdateNextTimestampForNextRow();
 }
 
-void SchedSliceTable::FilterState::FindNextRowAndTimestamp() {
-  auto start =
-      row_filter_.begin() +
-      static_cast<decltype(row_filter_)::difference_type>(next_row_id_index_);
-  auto next_it = std::find(start, row_filter_.end(), true);
-  next_row_id_index_ =
-      static_cast<uint32_t>(std::distance(row_filter_.begin(), next_it));
+void SchedSliceTable::PerCpuState::FindNextSlice() {
+  PERFETTO_DCHECK(next_timestamp_ != 0);
+
+  const auto& slices = Slices();
+  if (quantum_ == 0) {
+    next_row_id_index_++;
+    UpdateNextTimestampForNextRow();
+    return;
+  }
+
+  uint64_t start_group = next_timestamp_ / quantum_;
+  uint64_t end_slice =
+      slices.start_ns()[next_row_id()] + slices.durations()[next_row_id()];
+  uint64_t next_group_start = (start_group + 1) * quantum_;
+
+  if (next_group_start >= end_slice) {
+    next_row_id_index_++;
+    UpdateNextTimestampForNextRow();
+  } else {
+    next_timestamp_ = next_group_start;
+  }
+}
+
+void SchedSliceTable::PerCpuState::UpdateNextTimestampForNextRow() {
+  next_timestamp_ =
+      IsNextRowIdIndexValid() ? Slices().start_ns()[next_row_id()] : 0;
 }
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/sched_slice_table.h b/src/trace_processor/sched_slice_table.h
index 25e4ee1..1cfca45 100644
--- a/src/trace_processor/sched_slice_table.h
+++ b/src/trace_processor/sched_slice_table.h
@@ -37,8 +37,14 @@
     kTimestamp = 0,
     kCpu = 1,
     kDuration = 2,
-    kUtid = 3,
-    kCycles = 4,
+    kQuantizedGroup = 3,
+    kUtid = 4,
+    kCycles = 5,
+
+    // Hidden columns.
+    kQuantum = 6,
+    kTimestampLowerBound = 7,
+    kClipTimestamp = 8,
   };
 
   SchedSliceTable(sqlite3*, const TraceStorage* storage);
@@ -49,8 +55,60 @@
   std::string CreateTableStmt(int argc, const char* const* argv) override;
   std::unique_ptr<Table::Cursor> CreateCursor() override;
   int BestIndex(const QueryConstraints&, BestIndexInfo*) override;
+  int FindFunction(const char* name, FindFunctionFn fn, void** args) override;
 
  private:
+  // Transient filter state for each CPU of this trace.
+  class PerCpuState {
+   public:
+    void Initialize(uint32_t cpu,
+                    const TraceStorage* storage,
+                    uint64_t quantum,
+                    uint64_t ts_clip_min,
+                    uint64_t ts_clip_max,
+                    std::vector<uint32_t> sorted_row_ids);
+    void FindNextSlice();
+    bool IsNextRowIdIndexValid() const {
+      return next_row_id_index_ < sorted_row_ids_.size();
+    }
+
+    size_t next_row_id() const { return sorted_row_ids_[next_row_id_index_]; }
+    uint64_t next_timestamp() const { return next_timestamp_; }
+    uint64_t ts_clip_min() const { return ts_clip_min_; }
+    uint64_t ts_clip_max() const { return ts_clip_max_; }
+
+   private:
+    const TraceStorage::SlicesPerCpu& Slices() {
+      return storage_->SlicesForCpu(cpu_);
+    }
+
+    void UpdateNextTimestampForNextRow();
+
+    // Vector of row ids sorted by the the given order by constraints.
+    std::vector<uint32_t> sorted_row_ids_;
+
+    // An offset into |sorted_row_ids_| indicating the next row to return.
+    uint32_t next_row_id_index_ = 0;
+
+    // The timestamp of the row to index. This is either the timestamp of
+    // the slice at |next_row_id_index_| or the timestamp of the next quantized
+    // group boundary.
+    uint64_t next_timestamp_ = 0;
+
+    // The CPU this state is associated with.
+    uint32_t cpu_ = 0;
+
+    // The quantum the output slices should fall within.
+    uint64_t quantum_ = 0;
+
+    // When clipping is applied (i.e. WHERE ts_clip between X and Y), slices are
+    // cut and shrunk around the min-max boundaries to fit in the clip window.
+    uint64_t ts_clip_min_ = 0;
+    uint64_t ts_clip_max_ = std::numeric_limits<uint64_t>::max();
+
+    const TraceStorage* storage_ = nullptr;
+  };
+
   // Transient state for a filter operation on a Cursor.
   class FilterState {
    public:
@@ -58,43 +116,57 @@
                 const QueryConstraints& query_constraints,
                 sqlite3_value** argv);
 
-    void FindNextSlice();
+    // Chooses the next CPU which should be returned according to the sorting
+    // criteria specified by |order_by_|.
+    void FindCpuWithNextSlice();
 
-    inline bool IsNextRowIdIndexValid() const {
-      return next_row_id_index_ < sorted_row_ids_.size();
-    }
+    // Returns whether the next CPU to be returned by this filter operation is
+    // valid.
+    bool IsNextCpuValid() const { return next_cpu_ < per_cpu_state_.size(); }
 
-    size_t next_row_id() const { return sorted_row_ids_[next_row_id_index_]; }
+    // Returns the transient state associated with a single CPU.
+    PerCpuState* StateForCpu(uint32_t cpu) { return &per_cpu_state_[cpu]; }
+
+    uint32_t next_cpu() const { return next_cpu_; }
+    uint64_t quantum() const { return quantum_; }
 
    private:
-    // Updates |sorted_row_ids_| with the indices into the slices sorted by the
-    // order by criteria.
-    void SetupSortedRowIds(uint64_t min_ts, uint64_t max_ts);
+    // Creates a vector of indices into the slices for the given |cpu| sorted
+    // by the order by criteria.
+    std::vector<uint32_t> CreateSortedIndexVectorForCpu(uint32_t cpu,
+                                                        uint64_t min_ts,
+                                                        uint64_t max_ts);
 
-    // Compares the slice at index |f| with the slice at index |s|on all
-    // columns.
+    // Compares the next slice of the given |cpu| with the next slice of the
+    // |next_cpu_|. Return <0 if |cpu| is ordered before, >0 if ordered after,
+    // and 0 if they are equal.
+    int CompareCpuToNextCpu(uint32_t cpu);
+
+    // Compares the slice at index |f| in |f_slices| for CPU |f_cpu| with the
+    // slice at index |s| in |s_slices| for CPU |s_cpu| on all columns.
     // Returns -1 if the first slice is before the second in the ordering, 1 if
     // the first slice is after the second and 0 if they are equal.
-    int CompareSlices(size_t f, size_t s);
+    int CompareSlices(uint32_t f_cpu, size_t f, uint32_t s_cpu, size_t s);
 
-    // Compares the slice at index |f| with the slice at index |s| on the
-    // criteria in |order_by|.
+    // Compares the slice at index |f| in |f_slices| for CPU |f_cpu| with the
+    // slice at index |s| in |s_slices| for CPU |s_cpu| on the criteria in
+    // |order_by|.
     // Returns -1 if the first slice is before the second in the ordering, 1 if
     // the first slice is after the second and 0 if they are equal.
-    int CompareSlicesOnColumn(size_t f,
+    int CompareSlicesOnColumn(uint32_t f_cpu,
+                              size_t f,
+                              uint32_t s_cpu,
                               size_t s,
                               const QueryConstraints::OrderBy& order_by);
 
-    void FindNextRowAndTimestamp();
+    // One entry for each cpu which is used in filtering.
+    std::array<PerCpuState, base::kMaxCpus> per_cpu_state_;
 
-    // Vector of row ids sorted by the the given order by constraints.
-    std::vector<uint32_t> sorted_row_ids_;
+    // The next CPU which should be returned to the user.
+    uint32_t next_cpu_ = 0;
 
-    // Bitset for filtering slices.
-    std::vector<bool> row_filter_;
-
-    // An offset into |sorted_row_ids_| indicating the next row to return.
-    uint32_t next_row_id_index_ = 0;
+    // The quantum the output slices should fall within.
+    uint64_t quantum_ = 0;
 
     // The sorting criteria for this filter operation.
     std::vector<QueryConstraints::OrderBy> order_by_;
diff --git a/src/trace_processor/sched_slice_table_unittest.cc b/src/trace_processor/sched_slice_table_unittest.cc
index f5033fc..fbc84ff 100644
--- a/src/trace_processor/sched_slice_table_unittest.cc
+++ b/src/trace_processor/sched_slice_table_unittest.cc
@@ -169,6 +169,124 @@
   ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_DONE);
 }
 
+TEST_F(SchedSliceTableTest, QuanitsiationCpuNativeOrder) {
+  uint32_t cpu_1 = 3;
+  uint32_t cpu_2 = 8;
+  uint64_t timestamp = 100;
+  uint32_t pid_1 = 2;
+  uint32_t prev_state = 32;
+  static const char kCommProc1[] = "process1";
+  static const char kCommProc2[] = "process2";
+  uint32_t pid_2 = 4;
+  context_.sched_tracker->PushSchedSwitch(cpu_2, timestamp, pid_1, prev_state,
+                                          kCommProc1, pid_2);
+  context_.sched_tracker->PushSchedSwitch(cpu_1, timestamp + 3, pid_2,
+                                          prev_state, kCommProc2, pid_1);
+  context_.sched_tracker->PushSchedSwitch(cpu_2, timestamp + 4, pid_1,
+                                          prev_state, kCommProc1, pid_2);
+  context_.sched_tracker->PushSchedSwitch(cpu_1, timestamp + 10, pid_2,
+                                          prev_state, kCommProc2, pid_1);
+
+  PrepareValidStatement(
+      "SELECT dur, ts, cpu FROM sched WHERE quantum = 5 ORDER BY cpu");
+
+  // Event at ts + 3 sliced off at quantum boundary (105).
+  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 2 /* duration */);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 1), timestamp + 3);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 2), cpu_1);
+
+  // Remainder of event at ts + 3 after quantum boundary (105 onwards).
+  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 5 /* duration */);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 1), timestamp + 5);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 2), cpu_1);
+
+  // Full event at ts.
+  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 4 /* duration */);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 1), timestamp);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 2), cpu_2);
+
+  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_DONE);
+}
+
+TEST_F(SchedSliceTableTest, QuantizationSqliteDurationOrder) {
+  uint32_t cpu_1 = 3;
+  uint32_t cpu_2 = 8;
+  uint64_t timestamp = 100;
+  uint32_t pid_1 = 2;
+  uint32_t prev_state = 32;
+  static const char kCommProc1[] = "process1";
+  static const char kCommProc2[] = "process2";
+  uint32_t pid_2 = 4;
+  context_.sched_tracker->PushSchedSwitch(cpu_1, timestamp, pid_1, prev_state,
+                                          kCommProc1, pid_2);
+  context_.sched_tracker->PushSchedSwitch(cpu_2, timestamp + 3, pid_2,
+                                          prev_state, kCommProc2, pid_1);
+  context_.sched_tracker->PushSchedSwitch(cpu_1, timestamp + 4, pid_1,
+                                          prev_state, kCommProc1, pid_2);
+  context_.sched_tracker->PushSchedSwitch(cpu_2, timestamp + 10, pid_2,
+                                          prev_state, kCommProc2, pid_1);
+
+  PrepareValidStatement(
+      "SELECT dur, ts, cpu FROM sched WHERE quantum = 5 ORDER BY dur");
+
+  // Event at ts + 3 sliced off at quantum boundary (105).
+  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 2 /* duration */);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 1), timestamp + 3);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 2), cpu_2);
+
+  // Full event at ts.
+  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 4 /* duration */);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 1), timestamp);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 2), cpu_1);
+
+  // Remainder of event at ts + 3 after quantum boundary (105 onwards).
+  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 5 /* duration */);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 1), timestamp + 5);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 2), cpu_2);
+
+  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_DONE);
+}
+
+TEST_F(SchedSliceTableTest, QuantizationGroupAndSum) {
+  uint32_t cpu_1 = 3;
+  uint32_t cpu_2 = 8;
+  uint64_t timestamp = 100;
+  uint32_t pid_1 = 2;
+  uint32_t prev_state = 32;
+  static const char kCommProc1[] = "process1";
+  static const char kCommProc2[] = "process2";
+  uint32_t pid_2 = 4;
+  context_.sched_tracker->PushSchedSwitch(cpu_1, timestamp, pid_1, prev_state,
+                                          kCommProc1, pid_2);
+  context_.sched_tracker->PushSchedSwitch(cpu_2, timestamp + 3, pid_2,
+                                          prev_state, kCommProc2, pid_1);
+  context_.sched_tracker->PushSchedSwitch(cpu_1, timestamp + 4, pid_1,
+                                          prev_state, kCommProc1, pid_2);
+  context_.sched_tracker->PushSchedSwitch(cpu_2, timestamp + 10, pid_2,
+                                          prev_state, kCommProc2, pid_1);
+
+  PrepareValidStatement(
+      "SELECT SUM(dur) as sum_dur "
+      "FROM sched "
+      "WHERE quantum = 5 "
+      "GROUP BY quantized_group "
+      "ORDER BY sum_dur");
+
+  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 5 /* SUM(duration) */);
+
+  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 6 /* SUM(duration) */);
+
+  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_DONE);
+}
+
 TEST_F(SchedSliceTableTest, UtidTest) {
   uint32_t cpu = 3;
   uint64_t timestamp = 100;
@@ -231,6 +349,21 @@
   ASSERT_THAT(query("ts >= 55 and ts < 52"), IsEmpty());
   ASSERT_THAT(query("ts >= 70 and ts < 71"), ElementsAre(70));
   ASSERT_THAT(query("ts >= 59 and ts < 73"), ElementsAre(59, 60, 70, 71, 72));
+
+  // Test the special ts_lower_bound column.
+  ASSERT_THAT(query("ts_lower_bound = 1 and ts < 10"), IsEmpty());
+  ASSERT_THAT(query("ts_lower_bound = 50 and ts <= 50"), ElementsAre(50));
+  ASSERT_THAT(query("ts_lower_bound = 100"), ElementsAre(80));
+  ASSERT_THAT(query("ts_lower_bound = 100 and cpu = 5"), ElementsAre(60));
+  ASSERT_THAT(query("ts_lower_bound = 100 and cpu = 7"), ElementsAre(80));
+  ASSERT_THAT(query("ts_lower_bound = 1 and ts <= 52"),
+              ElementsAre(50, 51, 52));
+  ASSERT_THAT(query("ts_lower_bound = 70 and ts <= 71"),
+              ElementsAre(60, 70, 71));
+  ASSERT_THAT(query("ts_lower_bound = 60 and ts > 58 and ts <= 71"),
+              ElementsAre(59, 60, 70, 71));
+  ASSERT_THAT(query("ts_lower_bound = 70 and ts > 70 and ts <= 71"),
+              ElementsAre(71));
 }
 
 TEST_F(SchedSliceTableTest, CyclesOrdering) {
diff --git a/src/trace_processor/sched_tracker_unittest.cc b/src/trace_processor/sched_tracker_unittest.cc
index e22d768..b50f135 100644
--- a/src/trace_processor/sched_tracker_unittest.cc
+++ b/src/trace_processor/sched_tracker_unittest.cc
@@ -49,7 +49,7 @@
   static const char kCommProc2[] = "process2";
   uint32_t pid_2 = 4;
 
-  const auto& timestamps = context.storage->slices().start_ns();
+  const auto& timestamps = context.storage->SlicesForCpu(cpu).start_ns();
   context.sched_tracker->PushSchedSwitch(cpu, timestamp, pid_1, prev_state,
                                          kCommProc1, pid_2);
   ASSERT_EQ(timestamps.size(), 0);
@@ -63,7 +63,7 @@
   ASSERT_EQ(std::string(context.storage->GetString(
                 context.storage->GetThread(1).name_id)),
             kCommProc2);
-  ASSERT_EQ(context.storage->slices().utids().front(), 1);
+  ASSERT_EQ(context.storage->SlicesForCpu(cpu).utids().front(), 1);
 }
 
 TEST_F(SchedTrackerTest, InsertThirdSched_SameThread) {
@@ -73,7 +73,7 @@
   static const char kCommProc1[] = "process1";
   static const char kCommProc2[] = "process2";
 
-  const auto& timestamps = context.storage->slices().start_ns();
+  const auto& timestamps = context.storage->SlicesForCpu(cpu).start_ns();
   context.sched_tracker->PushSchedSwitch(cpu, timestamp, /*tid=*/4, prev_state,
                                          kCommProc1,
                                          /*tid=*/2);
@@ -92,12 +92,12 @@
   ASSERT_EQ(timestamps.size(), 3ul);
   ASSERT_EQ(timestamps[0], timestamp);
   ASSERT_EQ(context.storage->GetThread(1).start_ns, timestamp);
-  ASSERT_EQ(context.storage->slices().durations().at(0), 1u);
-  ASSERT_EQ(context.storage->slices().durations().at(1), 11u - 1u);
-  ASSERT_EQ(context.storage->slices().durations().at(2), 31u - 11u);
-  ASSERT_EQ(context.storage->slices().utids().at(0),
-            context.storage->slices().utids().at(2));
-  ASSERT_EQ(context.storage->slices().cycles().at(0), 0);
+  ASSERT_EQ(context.storage->SlicesForCpu(cpu).durations().at(0), 1u);
+  ASSERT_EQ(context.storage->SlicesForCpu(cpu).durations().at(1), 11u - 1u);
+  ASSERT_EQ(context.storage->SlicesForCpu(cpu).durations().at(2), 31u - 11u);
+  ASSERT_EQ(context.storage->SlicesForCpu(cpu).utids().at(0),
+            context.storage->SlicesForCpu(cpu).utids().at(2));
+  ASSERT_EQ(context.storage->SlicesForCpu(cpu).cycles().at(0), 0);
 }
 
 TEST_F(SchedTrackerTest, TestCyclesCalculation) {
@@ -122,7 +122,7 @@
       cpu, static_cast<uint64_t>(timestamp + 3e8L), /*tid=*/4, prev_state,
       kCommProc2,
       /*tid=*/2);
-  ASSERT_EQ(context.storage->slices().cycles().at(0), 590000000);
+  ASSERT_EQ(context.storage->SlicesForCpu(cpu).cycles().at(0), 590000000);
 }
 
 }  // namespace
diff --git a/src/trace_processor/trace_storage.cc b/src/trace_processor/trace_storage.cc
index 813b24a..ce5fae1 100644
--- a/src/trace_processor/trace_storage.cc
+++ b/src/trace_processor/trace_storage.cc
@@ -41,8 +41,8 @@
                                  uint64_t duration_ns,
                                  UniqueTid utid,
                                  uint64_t cycles) {
-  slices_.AddSlice(cpu, start_ns, duration_ns, utid, cycles);
-}
+  cpu_events_[cpu].AddSlice(start_ns, duration_ns, utid, cycles);
+};
 
 StringId TraceStorage::InternString(base::StringView str) {
   auto hash = str.Hash();
diff --git a/src/trace_processor/trace_storage.h b/src/trace_processor/trace_storage.h
index f267f65..79b1492 100644
--- a/src/trace_processor/trace_storage.h
+++ b/src/trace_processor/trace_storage.h
@@ -80,14 +80,12 @@
     uint32_t tid = 0;
   };
 
-  class Slices {
+  class SlicesPerCpu {
    public:
-    inline void AddSlice(uint32_t cpu,
-                         uint64_t start_ns,
+    inline void AddSlice(uint64_t start_ns,
                          uint64_t duration_ns,
                          UniqueTid utid,
                          uint64_t cycles) {
-      cpus_.emplace_back(cpu);
       start_ns_.emplace_back(start_ns);
       durations_.emplace_back(duration_ns);
       utids_.emplace_back(utid);
@@ -96,8 +94,6 @@
 
     size_t slice_count() const { return start_ns_.size(); }
 
-    const std::deque<uint32_t>& cpus() const { return cpus_; }
-
     const std::deque<uint64_t>& start_ns() const { return start_ns_; }
 
     const std::deque<uint64_t>& durations() const { return durations_; }
@@ -109,7 +105,6 @@
    private:
     // Each deque below has the same number of entries (the number of slices
     // in the trace for the CPU).
-    std::deque<uint32_t> cpus_;
     std::deque<uint64_t> start_ns_;
     std::deque<uint64_t> durations_;
     std::deque<UniqueTid> utids_;
@@ -194,6 +189,11 @@
   }
 
   // Reading methods.
+  const SlicesPerCpu& SlicesForCpu(uint32_t cpu) const {
+    PERFETTO_DCHECK(cpu < cpu_events_.size());
+    return cpu_events_[cpu];
+  }
+
   const std::string& GetString(StringId id) const {
     PERFETTO_DCHECK(id < string_pool_.size());
     return string_pool_[id];
@@ -210,7 +210,6 @@
     return unique_threads_[utid];
   }
 
-  const Slices& slices() const { return slices_; }
   const NestableSlices& nestable_slices() const { return nestable_slices_; }
   NestableSlices* mutable_nestable_slices() { return &nestable_slices_; }
 
@@ -253,7 +252,7 @@
   Stats stats_;
 
   // One entry for each CPU in the trace.
-  Slices slices_;
+  std::array<SlicesPerCpu, base::kMaxCpus> cpu_events_;
 
   // One map containing frequencies for every CPU in the trace. The map contains
   // timestamps and the cpu frequency value at that time.