TraceProcessor: Add clipping support to sched table + minor fixes - Fix mac build by not building libunwind stack on mac - Fix cost estimation of the sched_slices table, which was always return an estimated cost of 0. - Fix heuristics for sorting. - Introduce ts_clip=true|false column in sched table. When set to true, it truncates the start and duration of slices around the ts >< operators. For instance: Given shced slices: [ A ][ B ][ C ] And WHERE ts BETWEEN x and y ^x y^ The output would be: [ A ][ B ][ C ] Change-Id: I0c9a244f8b539e1b1e509cc7709cc5a98451a40b
diff --git a/BUILD.gn b/BUILD.gn index aee8466..01c1e0d 100644 --- a/BUILD.gn +++ b/BUILD.gn
@@ -34,7 +34,7 @@ # libunwindstack requires API level 26 or newer. should_build_heapprofd = - !build_with_chromium && is_clang && + !build_with_chromium && is_clang && (is_linux || is_android) && (!is_android || android_api_level >= 26 || build_with_android) } assert(!monolithic_binaries || !build_with_android)
diff --git a/src/trace_processor/sched_slice_table.cc b/src/trace_processor/sched_slice_table.cc index eeb4bb2..6ddec12 100644 --- a/src/trace_processor/sched_slice_table.cc +++ b/src/trace_processor/sched_slice_table.cc
@@ -32,6 +32,8 @@ using namespace sqlite_utils; +constexpr uint64_t kUint64Max = std::numeric_limits<uint64_t>::max(); + template <size_t N = base::kMaxCpus> bool PopulateFilterBitmap(int op, sqlite3_value* value, @@ -93,14 +95,15 @@ void SchedSliceTable::RegisterTable(sqlite3* db, const TraceStorage* storage) { Table::Register<SchedSliceTable>(db, storage, "CREATE TABLE sched(" - "quantum HIDDEN BIG INT, " - "ts_lower_bound HIDDEN BIG INT, " "ts UNSIGNED BIG INT, " "cpu UNSIGNED INT, " "dur UNSIGNED BIG INT, " "quantized_group UNSIGNED BIG INT, " - "utid UNSIGNED INT," - "cycles UNSIGNED BIG INT," + "utid UNSIGNED INT, " + "cycles UNSIGNED BIG INT, " + "quantum HIDDEN BIG INT, " + "ts_lower_bound HIDDEN BIG INT, " + "ts_clip HIDDEN BOOLEAN, " "PRIMARY KEY(cpu, ts)" ") WITHOUT ROWID;"); } @@ -111,19 +114,29 @@ int SchedSliceTable::BestIndex(const QueryConstraints& qc, BestIndexInfo* info) { - // Omit SQLite constraint checks on the hidden columns, so the client can - // write queries of the form "quantum == x" and "ts_lower_bound == x". - // Disallow any other constraint on these columns. + bool is_time_constrained = false; for (size_t i = 0; i < qc.constraints().size(); i++) { const auto& cs = qc.constraints()[i]; + + // Omit SQLite constraint checks on the hidden columns, so the client can + // write queries of the form "quantum=x" "ts_lower_bound=x" "ts_clip=true". + // Disallow any other constraint on these columns. if (cs.iColumn == Column::kTimestampLowerBound || - cs.iColumn == Column::kQuantizedGroup) { + cs.iColumn == Column::kQuantizedGroup || + cs.iColumn == Column::kClipTimestamp) { if (!IsOpEq(cs.op)) return SQLITE_CONSTRAINT_FUNCTION; info->omit[i] = true; } + + if (cs.iColumn == Column::kTimestampLowerBound || + cs.iColumn == Column::kTimestamp) { + is_time_constrained = true; + } } + info->estimated_cost = is_time_constrained ? 10 : 10000; + bool is_quantized_group_order_desc = false; bool is_duration_timestamp_order = false; for (const auto& ob : qc.order_by()) { @@ -136,9 +149,14 @@ case Column::kDuration: is_duration_timestamp_order = true; break; - case Column::kQuantum: case Column::kCpu: break; + + // Can't order on hidden columns. + case Column::kQuantum: + case Column::kTimestampLowerBound: + case Column::kClipTimestamp: + return SQLITE_CONSTRAINT_FUNCTION; } } @@ -206,7 +224,8 @@ const auto& slices = storage_->SlicesForCpu(cpu); switch (N) { case Column::kTimestamp: { - auto timestamp = state->next_timestamp(); + uint64_t timestamp = state->next_timestamp(); + timestamp = std::max(timestamp, state->ts_clip_min()); sqlite3_result_int64(context, static_cast<sqlite3_int64>(timestamp)); break; } @@ -218,6 +237,14 @@ uint64_t duration; if (quantum == 0) { duration = slices.durations()[row]; + uint64_t start_ns = state->next_timestamp(); + uint64_t end_ns = start_ns + duration; + uint64_t clip_trim_ns = 0; + if (state->ts_clip_min() > start_ns) + clip_trim_ns += state->ts_clip_min() - start_ns; + if (end_ns > state->ts_clip_max()) + clip_trim_ns += end_ns - state->ts_clip_min(); + duration -= std::min(clip_trim_ns, duration); } else { uint64_t start_quantised_group = state->next_timestamp() / quantum; uint64_t end = slices.start_ns()[row] + slices.durations()[row]; @@ -263,8 +290,9 @@ cpu_filter.set(); uint64_t min_ts = 0; - uint64_t max_ts = std::numeric_limits<uint64_t>::max(); + uint64_t max_ts = kUint64Max; uint64_t ts_lower_bound = 0; + bool ts_clip = false; for (size_t i = 0; i < query_constraints.constraints().size(); i++) { const auto& cs = query_constraints.constraints()[i]; @@ -278,6 +306,9 @@ case Column::kTimestampLowerBound: ts_lower_bound = static_cast<uint64_t>(sqlite3_value_int64(argv[i])); break; + case Column::kClipTimestamp: + ts_clip = sqlite3_value_int(argv[i]) ? true : false; + break; case Column::kTimestamp: { auto ts = static_cast<uint64_t>(sqlite3_value_int64(argv[i])); if (IsOpGe(cs.op) || IsOpGt(cs.op)) { @@ -290,13 +321,21 @@ } } + if (ts_clip) { + PERFETTO_DCHECK(ts_lower_bound == 0); + if (ts_lower_bound) + PERFETTO_ELOG("Cannot use ts_lower_bound and ts_clip together"); + ts_lower_bound = min_ts; + min_ts = 0; + } + // If the query specifies a lower bound on ts, find that bound across all // CPUs involved in the query and turn that into a min_ts constraint. // ts_lower_bound is defined as the largest timestamp < X, or if none, the // smallest timestamp >= X. if (ts_lower_bound > 0) { uint64_t largest_ts_before = 0; - uint64_t smallest_ts_after = std::numeric_limits<uint64_t>::max(); + uint64_t smallest_ts_after = kUint64Max; for (uint32_t cpu = 0; cpu < base::kMaxCpus; cpu++) { if (!cpu_filter.test(cpu)) continue; @@ -323,8 +362,10 @@ for (uint32_t cpu = 0; cpu < base::kMaxCpus; cpu++) { if (!cpu_filter.test(cpu)) continue; + uint64_t ts_clip_min = ts_clip ? min_ts : 0; + uint64_t ts_clip_max = ts_clip ? max_ts : kUint64Max; StateForCpu(cpu)->Initialize( - cpu, storage_, quantum_, + cpu, storage_, quantum_, ts_clip_min, ts_clip_max, CreateSortedIndexVectorForCpu(cpu, min_ts, max_ts)); } @@ -441,10 +482,14 @@ uint32_t cpu, const TraceStorage* storage, uint64_t quantum, + uint64_t ts_clip_min, + uint64_t ts_clip_max, std::vector<uint32_t> sorted_row_ids) { cpu_ = cpu; storage_ = storage; quantum_ = quantum; + ts_clip_min_ = ts_clip_min; + ts_clip_max_ = ts_clip_max; sorted_row_ids_ = std::move(sorted_row_ids); UpdateNextTimestampForNextRow(); }
diff --git a/src/trace_processor/sched_slice_table.h b/src/trace_processor/sched_slice_table.h index 80f5a66..06f5799 100644 --- a/src/trace_processor/sched_slice_table.h +++ b/src/trace_processor/sched_slice_table.h
@@ -34,14 +34,17 @@ class SchedSliceTable : public Table { public: enum Column { - kQuantum = 0, - kTimestampLowerBound = 1, - kTimestamp = 2, - kCpu = 3, - kDuration = 4, - kQuantizedGroup = 5, - kUtid = 6, - kCycles = 7, + kTimestamp = 0, + kCpu = 1, + kDuration = 2, + kQuantizedGroup = 3, + kUtid = 4, + kCycles = 5, + + // Hidden columns. + kQuantum = 6, + kTimestampLowerBound = 7, + kClipTimestamp = 8, }; SchedSliceTable(const TraceStorage* storage); @@ -60,6 +63,8 @@ void Initialize(uint32_t cpu, const TraceStorage* storage, uint64_t quantum, + uint64_t ts_clip_min, + uint64_t ts_clip_max, std::vector<uint32_t> sorted_row_ids); void FindNextSlice(); bool IsNextRowIdIndexValid() const { @@ -68,6 +73,8 @@ size_t next_row_id() const { return sorted_row_ids_[next_row_id_index_]; } uint64_t next_timestamp() const { return next_timestamp_; } + uint64_t ts_clip_min() const { return ts_clip_min_; } + uint64_t ts_clip_max() const { return ts_clip_max_; } private: const TraceStorage::SlicesPerCpu& Slices() { @@ -93,6 +100,11 @@ // The quantum the output slices should fall within. uint64_t quantum_ = 0; + // When clipping is applied (i.e. WHERE ts_clip between X and Y), slices are + // cut and shrunk around the min-max boundaries to fit in the clip window. + uint64_t ts_clip_min_ = 0; + uint64_t ts_clip_max_ = std::numeric_limits<uint64_t>::max(); + const TraceStorage* storage_ = nullptr; }; @@ -104,7 +116,7 @@ sqlite3_value** argv); // Chooses the next CPU which should be returned according to the sorting - // citeria specified by |order_by_|. + // criteria specified by |order_by_|. void FindCpuWithNextSlice(); // Returns whether the next CPU to be returned by this filter operation is
diff --git a/src/trace_processor/sched_tracker.cc b/src/trace_processor/sched_tracker.cc index f2619c2..df2a689 100644 --- a/src/trace_processor/sched_tracker.cc +++ b/src/trace_processor/sched_tracker.cc
@@ -36,7 +36,11 @@ base::StringView prev_comm, uint32_t next_pid) { // At this stage all events should be globally timestamp ordered. - PERFETTO_DCHECK(prev_timestamp_ <= timestamp); + if (timestamp < prev_timestamp_) { + PERFETTO_ELOG("sched_switch event out of order by %.4f ms, skipping", + (prev_timestamp_ - timestamp) / 1e6); + return; + } prev_timestamp_ = timestamp; PERFETTO_DCHECK(cpu < base::kMaxCpus); SchedSwitchEvent* prev = &last_sched_per_cpu_[cpu];
diff --git a/src/trace_processor/trace_processor.h b/src/trace_processor/trace_processor.h index 1dfac9a..5235514 100644 --- a/src/trace_processor/trace_processor.h +++ b/src/trace_processor/trace_processor.h
@@ -40,7 +40,7 @@ public: struct Config { OptimizationMode optimization_mode = OptimizationMode::kMaxBandwidth; - uint64_t window_size_ns = 10 * 1000 * 1000 * 1000ULL; // 10 seconds. + uint64_t window_size_ns = 60 * 1000 * 1000 * 1000ULL; // 60 seconds. }; explicit TraceProcessor(const Config&); ~TraceProcessor();
diff --git a/src/trace_processor/trace_sorter.h b/src/trace_processor/trace_sorter.h index 4294f6d..484090c 100644 --- a/src/trace_processor/trace_sorter.h +++ b/src/trace_processor/trace_sorter.h
@@ -154,7 +154,7 @@ // too many events in the staging area. if (optimization_ == OptimizationMode::kMaxBandwidth && latest_timestamp_ - earliest_timestamp_ < window_size_ns_ * 10 && - events_.size() < 1e6) { + events_.size() < 5 * 1e6) { return; }
diff --git a/src/trace_processor/trace_storage.h b/src/trace_processor/trace_storage.h index f5a9dbc..925f307 100644 --- a/src/trace_processor/trace_storage.h +++ b/src/trace_processor/trace_storage.h
@@ -217,7 +217,11 @@ uint32_t cpu, uint32_t new_freq) { auto& freqs = cpu_freq_[cpu]; - PERFETTO_DCHECK(freqs.empty() || timestamp > freqs.back().first); + if (!freqs.empty() && timestamp < freqs.back().first) { + PERFETTO_ELOG("cpufreq out of order by %.4f ms, skipping", + (freqs.back().first - timestamp) / 1e6); + return; + } freqs.emplace_back(timestamp, new_freq); }