Merge "Optimise read time_in_state"
diff --git a/src/traced/probes/ps/process_stats_data_source.cc b/src/traced/probes/ps/process_stats_data_source.cc
index ddbc152..2e4f53d 100644
--- a/src/traced/probes/ps/process_stats_data_source.cc
+++ b/src/traced/probes/ps/process_stats_data_source.cc
@@ -24,6 +24,7 @@
#include "perfetto/base/task_runner.h"
#include "perfetto/base/time.h"
#include "perfetto/ext/base/file_utils.h"
+#include "perfetto/ext/base/hash.h"
#include "perfetto/ext/base/metatrace.h"
#include "perfetto/ext/base/scoped_file.h"
#include "perfetto/ext/base/string_splitter.h"
@@ -109,10 +110,6 @@
ProcessStatsConfig::Decoder cfg(ds_config.process_stats_config_raw());
record_thread_names_ = cfg.record_thread_names();
dump_all_procs_on_start_ = cfg.scan_all_processes_on_start();
- record_thread_time_in_state_ = cfg.record_thread_time_in_state();
- thread_time_in_state_cache_size_ = cfg.thread_time_in_state_cache_size();
- if (thread_time_in_state_cache_size_ == 0)
- thread_time_in_state_cache_size_ = kThreadTimeInStateCacheSize;
enable_on_demand_dumps_ = true;
for (auto quirk = cfg.quirks(); quirk; ++quirk) {
@@ -133,6 +130,12 @@
process_stats_cache_ttl_ticks_ =
std::max(proc_stats_ttl_ms / poll_period_ms_, 1u);
}
+
+ record_thread_time_in_state_ = cfg.record_thread_time_in_state();
+ thread_time_in_state_cache_size_ = cfg.thread_time_in_state_cache_size();
+ if (thread_time_in_state_cache_size_ == 0)
+ thread_time_in_state_cache_size_ = kThreadTimeInStateCacheSize;
+ thread_time_in_state_cache_.resize(thread_time_in_state_cache_size_);
}
ProcessStatsDataSource::~ProcessStatsDataSource() = default;
@@ -390,6 +393,8 @@
thiz.cache_ticks_ = 0;
thiz.process_stats_cache_.clear();
thiz.thread_time_in_state_cache_.clear();
+ thiz.thread_time_in_state_cache_.resize(
+ thiz.thread_time_in_state_cache_size_);
}
}
@@ -435,7 +440,7 @@
}
}
- if (record_thread_time_in_state_) {
+ if (record_thread_time_in_state_ && ShouldWriteThreadStats(pid)) {
if (auto task_dir = OpenProcTaskDir(pid)) {
while (int32_t tid = ReadNextNumericDir(*task_dir))
WriteThreadStats(pid, tid);
@@ -449,15 +454,6 @@
// Ensure that we write once long-term process info (e.g., name) for new pids
// that we haven't seen before.
WriteProcessTree(pids);
-
- // Ensure the cache stays within bounds by erasing some entries.
- while (thread_time_in_state_cache_.size() >
- thread_time_in_state_cache_size_) {
- auto random = thread_time_in_state_cache_.begin();
- std::advance(random, rand() % static_cast<int32_t>(
- thread_time_in_state_cache_.size()));
- thread_time_in_state_cache_.erase(random);
- }
}
// Returns true if the stats for the given |pid| have been written, false it
@@ -571,6 +567,42 @@
return proc_status_has_mem_counters;
}
+// Fast check to avoid reading information about all threads of a process.
+// If the total process cpu time has not changed, we can skip reading
+// time_in_state for all its threads.
+bool ProcessStatsDataSource::ShouldWriteThreadStats(int32_t pid) {
+ std::string stat = ReadProcPidFile(pid, "stat");
+ // /proc/pid/stat may contain an additional space inside comm. For example:
+ // 1 (comm foo) 2 3 ...
+ // We strip the prefix including comm. So the result is: 2 3 ...
+ size_t comm_end = stat.rfind(") ");
+ if (comm_end == std::string::npos)
+ return false;
+ std::string stat_after_comm = stat.substr(comm_end + 2);
+
+ // Indices of space separated fields in /proc/pid/stat offset by 2 to make
+ // up for fields removed by stripping the prefix including comm.
+ const uint32_t kStatCTimeIndex = 13 - 2;
+ const uint32_t kStatSTimeIndex = 14 - 2;
+
+ auto stat_parts = base::SplitString(stat_after_comm, " ");
+ if (stat_parts.size() <= kStatSTimeIndex)
+ return false;
+ auto maybe_ctime = base::StringToUInt64(stat_parts[kStatCTimeIndex]);
+ if (!maybe_ctime.has_value())
+ return false;
+ auto maybe_stime = base::StringToUInt64(stat_parts[kStatSTimeIndex]);
+ if (!maybe_stime.has_value())
+ return false;
+ uint64_t current = maybe_ctime.value() + maybe_stime.value();
+ uint64_t& cached = process_stats_cache_[pid].cpu_time;
+ if (current != cached) {
+ cached = current;
+ return true;
+ }
+ return false;
+}
+
void ProcessStatsDataSource::WriteThreadStats(int32_t pid, int32_t tid) {
// Reads /proc/tid/time_in_state, which looks like:
// cpu0
@@ -597,7 +629,6 @@
continue;
uint32_t freq = ToU32(key_value.cur_token());
uint32_t freq_index = cpu_freq_info_->GetCpuFreqIndex(last_cpu, freq);
- TidCpuFreqIndex key = {tid, freq_index};
if (!key_value.Next())
continue;
auto maybe_ticks = base::CStringToUInt64(key_value.cur_token());
@@ -606,15 +637,22 @@
uint64_t ticks = maybe_ticks.value();
if (ticks == 0)
continue;
- auto& cached_ticks = thread_time_in_state_cache_[key];
- if (ticks != cached_ticks) {
+ base::Hash key_hash;
+ key_hash.Update(tid);
+ key_hash.Update(freq_index);
+ size_t key = key_hash.digest() % thread_time_in_state_cache_size_;
+ PERFETTO_DCHECK(thread_time_in_state_cache_.size() ==
+ thread_time_in_state_cache_size_);
+ TimeInStateCacheEntry& cached = thread_time_in_state_cache_[key];
+ TimeInStateCacheEntry current = {tid, freq_index, ticks};
+ if (current != cached) {
+ cached = current;
if (thread == nullptr) {
thread = GetOrCreateStatsProcess(pid)->add_threads();
thread->set_tid(tid);
}
thread->add_cpu_freq_indices(freq_index);
thread->add_cpu_freq_ticks(ticks);
- cached_ticks = ticks;
}
}
}
@@ -634,6 +672,7 @@
cache_ticks_ = 0;
process_stats_cache_.clear();
thread_time_in_state_cache_.clear();
+ thread_time_in_state_cache_.resize(thread_time_in_state_cache_size_);
// Set the relevant flag in the next packet.
did_clear_incremental_state_ = true;
diff --git a/src/traced/probes/ps/process_stats_data_source.h b/src/traced/probes/ps/process_stats_data_source.h
index 19a9d4a..9d5640a 100644
--- a/src/traced/probes/ps/process_stats_data_source.h
+++ b/src/traced/probes/ps/process_stats_data_source.h
@@ -18,7 +18,6 @@
#define SRC_TRACED_PROBES_PS_PROCESS_STATS_DATA_SOURCE_H_
#include <limits>
-#include <map>
#include <memory>
#include <set>
#include <unordered_map>
@@ -87,6 +86,9 @@
uint32_t vm_locked_kb = std::numeric_limits<uint32_t>::max();
uint32_t vm_hvm_kb = std::numeric_limits<uint32_t>::max();
int oom_score_adj = std::numeric_limits<int>::max();
+
+ // ctime + stime from /proc/pid/stat
+ uint64_t cpu_time = std::numeric_limits<uint64_t>::max();
};
// Common functions.
@@ -109,6 +111,7 @@
static void Tick(base::WeakPtr<ProcessStatsDataSource>);
void WriteAllProcessStats();
bool WriteMemCounters(int32_t pid, const std::string& proc_status);
+ bool ShouldWriteThreadStats(int32_t pid);
void WriteThreadStats(int32_t pid, int32_t tid);
// Scans /proc/pid/status and writes the ProcessTree packet for input pids.
@@ -158,9 +161,13 @@
uint32_t process_stats_cache_ttl_ticks_ = 0;
std::unordered_map<int32_t, CachedProcessStats> process_stats_cache_;
- using TidCpuFreqIndex =
- std::tuple</* tid */ int32_t, /* cpu_freq_index */ uint32_t>;
- std::map<TidCpuFreqIndex, uint64_t> thread_time_in_state_cache_;
+ using TimeInStateCacheEntry = std::tuple</* tid */ int32_t,
+ /* cpu_freq_index */ uint32_t,
+ /* ticks */ uint64_t>;
+
+ // Cache for time in state. Size specificed in the config. Values are stored
+ // at index: hash(tid, cpu_freq_index) % thread_time_in_state_cache_size_.
+ std::vector<TimeInStateCacheEntry> thread_time_in_state_cache_;
uint32_t thread_time_in_state_cache_size_;
std::unique_ptr<CpuFreqInfo> cpu_freq_info_;
diff --git a/src/traced/probes/ps/process_stats_data_source_unittest.cc b/src/traced/probes/ps/process_stats_data_source_unittest.cc
index f9a2697..a9c62d3 100644
--- a/src/traced/probes/ps/process_stats_data_source_unittest.cc
+++ b/src/traced/probes/ps/process_stats_data_source_unittest.cc
@@ -486,12 +486,18 @@
auto fake_proc = base::TempDir::Create();
const int kPid = 1;
make_proc_path(fake_proc, kPid);
+ const int kIgnoredPid = 5;
+ make_proc_path(fake_proc, kIgnoredPid);
// Populate a fake /proc/1/task directory.
auto fake_proc_task = base::TempDir::Create();
const int kTids[] = {1, 2};
for (int tid : kTids)
make_proc_path(fake_proc_task, tid);
+ // Populate a fake /proc/5/task directory.
+ auto fake_ignored_proc_task = base::TempDir::Create();
+ const int kIgnoredTid = 5;
+ make_proc_path(fake_ignored_proc_task, kIgnoredTid);
auto checkpoint = task_runner_.CreateCheckpoint("all_done");
@@ -500,9 +506,16 @@
}));
EXPECT_CALL(*data_source, ReadProcPidFile(kPid, "status"))
.WillRepeatedly(
- Return("Name: pid_10\nVmSize: 100 kB\nVmRSS:\t100 kB\n"));
+ Return("Name: pid_1\nVmSize: 100 kB\nVmRSS:\t100 kB\n"));
EXPECT_CALL(*data_source, ReadProcPidFile(kPid, "oom_score_adj"))
- .WillRepeatedly(Return("900"));
+ .WillRepeatedly(Return("901"));
+ EXPECT_CALL(*data_source, ReadProcPidFile(kPid, "stat"))
+ .WillOnce(Return("1 (pid_1) S 1 1 0 0 -1 4210944 2197 2451 0 1 54 117 4"))
+ // ctime++
+ .WillOnce(Return("1 (pid_1) S 1 1 0 0 -1 4210944 2197 2451 0 1 55 117 4"))
+ // stime++
+ .WillOnce(
+ Return("1 (pid_1) S 1 1 0 0 -1 4210944 2197 2451 0 1 55 118 4"));
EXPECT_CALL(*data_source, OpenProcTaskDir(kPid))
.WillRepeatedly(Invoke([&fake_proc_task](int32_t) {
return base::ScopedDir(opendir(fake_proc_task.path().c_str()));
@@ -521,17 +534,39 @@
return "cpu0\n300000 200\n748800 0\n1324800 30\ncpu1\n300000 "
"100\n652800 60\n";
}));
+ EXPECT_CALL(*data_source, ReadProcPidFile(kIgnoredPid, "status"))
+ .WillRepeatedly(
+ Return("Name: pid_5\nVmSize: 100 kB\nVmRSS:\t100 kB\n"));
+ EXPECT_CALL(*data_source, ReadProcPidFile(kIgnoredPid, "oom_score_adj"))
+ .WillRepeatedly(Return("905"));
+ EXPECT_CALL(*data_source, OpenProcTaskDir(kIgnoredPid))
+ .WillRepeatedly(Invoke([&fake_ignored_proc_task](int32_t) {
+ return base::ScopedDir(opendir(fake_ignored_proc_task.path().c_str()));
+ }));
+ EXPECT_CALL(*data_source, ReadProcPidFile(kIgnoredPid, "stat"))
+ .WillRepeatedly(
+ Return("5 (pid_5) S 1 5 0 0 -1 4210944 2197 2451 0 1 99 99 4"));
+ EXPECT_CALL(*data_source, ReadProcPidFile(kIgnoredTid, "time_in_state"))
+ .Times(2)
+ .WillRepeatedly(
+ Return("cpu0\n300000 10\n748800 0\ncpu1\n300000 00\n652800 20\n"));
data_source->Start();
task_runner_.RunUntilCheckpoint("all_done");
data_source->Flush(1 /* FlushRequestId */, []() {});
- std::vector<protos::gen::ProcessStats::Process> processes;
+ // Collect all process packets order by their timestamp and pid.
+ using TimestampPid = std::pair</* timestamp */ uint64_t, /* pid */ int32_t>;
+ std::map<TimestampPid, protos::gen::ProcessStats::Process> processes_map;
for (const auto& packet : writer_raw_->GetAllTracePackets())
for (const auto& process : packet.process_stats().processes())
- processes.push_back(process);
+ processes_map.insert({{packet.timestamp(), process.pid()}, process});
+ std::vector<protos::gen::ProcessStats::Process> processes;
+ for (auto it : processes_map)
+ processes.push_back(it.second);
- EXPECT_EQ(processes.size(), 3u);
+ // 3 packets for pid=1, 2 packets for pid=5.
+ EXPECT_EQ(processes.size(), 5u);
auto compare_tid = [](protos::gen::ProcessStats_Thread& l,
protos::gen::ProcessStats_Thread& r) {
@@ -539,6 +574,7 @@
};
// First pull has all threads.
+ // Check pid = 1.
auto threads = processes[0].threads();
EXPECT_EQ(threads.size(), 2u);
std::sort(threads.begin(), threads.end(), compare_tid);
@@ -550,9 +586,14 @@
EXPECT_EQ(thread.tid(), 2);
EXPECT_THAT(thread.cpu_freq_indices(), ElementsAre(1u, 10u, 11u));
EXPECT_THAT(thread.cpu_freq_ticks(), ElementsAre(10, 50, 60));
+ // Check pid = 5.
+ threads = processes[1].threads();
+ EXPECT_EQ(threads.size(), 1u);
+ EXPECT_EQ(threads[0].tid(), 5);
+ EXPECT_THAT(threads[0].cpu_freq_ticks(), ElementsAre(10, 20));
// Second pull has only one thread with delta.
- threads = processes[1].threads();
+ threads = processes[2].threads();
EXPECT_EQ(threads.size(), 1u);
thread = threads[0];
EXPECT_EQ(thread.tid(), 2);
@@ -560,7 +601,8 @@
EXPECT_THAT(thread.cpu_freq_ticks(), ElementsAre(20, 30, 100));
// Third pull has all thread because cache was cleared.
- threads = processes[2].threads();
+ // Check pid = 1.
+ threads = processes[3].threads();
EXPECT_EQ(threads.size(), 2u);
std::sort(threads.begin(), threads.end(), compare_tid);
thread = threads[0];
@@ -571,6 +613,11 @@
EXPECT_EQ(thread.tid(), 2);
EXPECT_THAT(thread.cpu_freq_indices(), ElementsAre(1u, 6u, 10u, 11u));
EXPECT_THAT(thread.cpu_freq_ticks(), ElementsAre(200, 30, 100, 60));
+ // Check pid = 5.
+ threads = processes[4].threads();
+ EXPECT_EQ(threads.size(), 1u);
+ EXPECT_EQ(threads[0].tid(), 5);
+ EXPECT_THAT(threads[0].cpu_freq_ticks(), ElementsAre(10, 20));
for (const std::string& path : dirs_to_delete)
rmdir(path.c_str());