traced_probes: fix tid/pid incremental mapping and reduce overhead

This CL:
1) Fixes an architectural bug related to the way tid/pid mapping is
   established. The current proto structure, in fact, is strictly
   hierarchical and requires that a process and all its threads are
   dumped together. This has two problems: (i) it either requires to
   always dump all threads for a process or re-dump the process
   record for each thread; (ii) doesn't deal with the fact that new
   threads can be spawned after the initial dump.
2) Fixes a performance bug, reducing traced_probes cpu usage by more
   than 30% when process metadata collection is enabled.
3) Removes unused/forked code and puts everything into
   process_stats_data_source.cc.

In terms of risk this CL should be rather safe. The entire
process_stats_data_source.cc is an optional module and can be
disabled at any point through the TraceConfig.

Bug: b/78453224
Bug: b/78106582
Bug: b/77316877
Change-Id: I4592c9c0317c6f66b68357684ce9cd0577c9bdb5
diff --git a/Android.bp b/Android.bp
index 1f25dec..f36b881 100644
--- a/Android.bp
+++ b/Android.bp
@@ -54,8 +54,6 @@
     "src/ipc/service_proxy.cc",
     "src/ipc/unix_socket.cc",
     "src/ipc/virtual_destructors.cc",
-    "src/process_stats/file_utils.cc",
-    "src/process_stats/procfs_utils.cc",
     "src/protozero/message.cc",
     "src/protozero/message_handle.cc",
     "src/protozero/proto_utils.cc",
@@ -292,8 +290,6 @@
     "src/ipc/service_proxy.cc",
     "src/ipc/unix_socket.cc",
     "src/ipc/virtual_destructors.cc",
-    "src/process_stats/file_utils.cc",
-    "src/process_stats/procfs_utils.cc",
     "src/protozero/message.cc",
     "src/protozero/message_handle.cc",
     "src/protozero/proto_utils.cc",
@@ -3623,8 +3619,6 @@
     "src/perfetto_cmd/perfetto_cmd.cc",
     "src/perfetto_cmd/rate_limiter.cc",
     "src/perfetto_cmd/rate_limiter_unittest.cc",
-    "src/process_stats/file_utils.cc",
-    "src/process_stats/procfs_utils.cc",
     "src/protozero/message.cc",
     "src/protozero/message_handle.cc",
     "src/protozero/message_handle_unittest.cc",
diff --git a/protos/perfetto/trace/perfetto_trace.proto b/protos/perfetto/trace/perfetto_trace.proto
index 5e62816..de7bc95 100644
--- a/protos/perfetto/trace/perfetto_trace.proto
+++ b/protos/perfetto/trace/perfetto_trace.proto
@@ -391,11 +391,14 @@
 // Begin of protos/perfetto/trace/ps/process_tree.proto
 
 message ProcessTree {
-  // Representation of a thread
+  // Representation of a thread.
   message Thread {
     // The thread id (as per gettid())
     optional int32 tid = 1;
 
+    // Thread group id (i.e. the PID of the process, == TID of the main thread)
+    optional int32 tgid = 3;
+
     // The name of the thread.
     optional string name = 2;
   }
@@ -413,12 +416,20 @@
     // and it will contain /proc/pid/comm.
     repeated string cmdline = 3;
 
-    // The list of all threads in the process.
-    repeated Thread threads = 4;
+    // No longer used as of Apr 2018, when the dedicated |threads| field was
+    // introduced in ProcessTree.
+    repeated Thread threads_deprecated = 4 [deprecated = true];
   }
 
-  // List of all processes in the client.
+  // List of processes and threads in the client. These lists are incremental
+  // and not exhaustive. A process and its threads might show up separately in
+  // different ProcessTree messages. A thread might event not show up at all, if
+  // no sched_switch activity was detected, for instance:
+  // #0 { processes: [{pid: 10, ...}], threads: [{pid: 11, tgid: 10}] }
+  // #1 { threads: [{pid: 12, tgid: 10}] }
+  // #2 { processes: [{pid: 20, ...}], threads: [{pid: 13, tgid: 10}] }
   repeated Process processes = 1;
+  repeated Thread threads = 2;
 }
 
 // End of protos/perfetto/trace/ps/process_tree.proto
diff --git a/protos/perfetto/trace/ps/process_tree.proto b/protos/perfetto/trace/ps/process_tree.proto
index c1538fe..50579e5 100644
--- a/protos/perfetto/trace/ps/process_tree.proto
+++ b/protos/perfetto/trace/ps/process_tree.proto
@@ -19,11 +19,14 @@
 package perfetto.protos;
 
 message ProcessTree {
-  // Representation of a thread
+  // Representation of a thread.
   message Thread {
     // The thread id (as per gettid())
     optional int32 tid = 1;
 
+    // Thread group id (i.e. the PID of the process, == TID of the main thread)
+    optional int32 tgid = 3;
+
     // The name of the thread.
     optional string name = 2;
   }
@@ -41,10 +44,18 @@
     // and it will contain /proc/pid/comm.
     repeated string cmdline = 3;
 
-    // The list of all threads in the process.
-    repeated Thread threads = 4;
+    // No longer used as of Apr 2018, when the dedicated |threads| field was
+    // introduced in ProcessTree.
+    repeated Thread threads_deprecated = 4 [deprecated = true];
   }
 
-  // List of all processes in the client.
+  // List of processes and threads in the client. These lists are incremental
+  // and not exhaustive. A process and its threads might show up separately in
+  // different ProcessTree messages. A thread might event not show up at all, if
+  // no sched_switch activity was detected, for instance:
+  // #0 { processes: [{pid: 10, ...}], threads: [{pid: 11, tgid: 10}] }
+  // #1 { threads: [{pid: 12, tgid: 10}] }
+  // #2 { processes: [{pid: 20, ...}], threads: [{pid: 13, tgid: 10}] }
   repeated Process processes = 1;
+  repeated Thread threads = 2;
 }
diff --git a/src/process_stats/BUILD.gn b/src/process_stats/BUILD.gn
deleted file mode 100644
index 8743c7d..0000000
--- a/src/process_stats/BUILD.gn
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (C) 2018 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-source_set("process_stats") {
-  public_deps = []
-  deps = [
-    "../../gn:default_deps",
-    "../../protos/perfetto/config",
-    "../base",
-  ]
-  sources = [
-    "file_utils.cc",
-    "file_utils.h",
-    "process_info.h",
-    "procfs_utils.cc",
-    "procfs_utils.h",
-  ]
-}
diff --git a/src/process_stats/file_utils.cc b/src/process_stats/file_utils.cc
deleted file mode 100644
index 5c75e1d..0000000
--- a/src/process_stats/file_utils.cc
+++ /dev/null
@@ -1,109 +0,0 @@
-// Copyright 2017 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "src/process_stats/file_utils.h"
-
-#include <ctype.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-namespace file_utils {
-bool IsNumeric(const char* str) {
-  if (!str[0])
-    return false;
-  for (const char* c = str; *c; c++) {
-    if (!isdigit(*c))
-      return false;
-  }
-  return true;
-}
-
-void ForEachPidInProcPath(const char* proc_path,
-                          const std::function<void(int)>& predicate) {
-  DIR* root_dir = opendir(proc_path);
-  ScopedDir autoclose(root_dir);
-  struct dirent* child_dir;
-  while ((child_dir = readdir(root_dir))) {
-    if (child_dir->d_type != DT_DIR || !IsNumeric(child_dir->d_name))
-      continue;
-    predicate(atoi(child_dir->d_name));
-  }
-}
-
-ssize_t ReadFile(const char* path, char* buf, size_t length) {
-  buf[0] = '\0';
-  int fd = open(path, O_RDONLY);
-  if (fd < 0 && errno == ENOENT)
-    return -1;
-  ScopedFD autoclose(fd);
-  size_t tot_read = 0;
-  do {
-    ssize_t rsize = read(fd, buf + tot_read, length - tot_read);
-    if (rsize == 0)
-      break;
-    if (rsize == -1 && errno == EINTR)
-      continue;
-    if (rsize < 0)
-      return -1;
-    tot_read += static_cast<size_t>(rsize);
-  } while (tot_read < length);
-  buf[tot_read < length ? tot_read : length - 1] = '\0';
-  return static_cast<ssize_t>(tot_read);
-}
-
-bool ReadFileTrimmed(const char* path, char* buf, size_t length) {
-  ssize_t rsize = ReadFile(path, buf, length);
-  if (rsize < 0)
-    return false;
-  for (ssize_t i = 0; i < rsize; i++) {
-    const char c = buf[i];
-    if (c == '\0' || c == '\r' || c == '\n') {
-      buf[i] = '\0';
-      break;
-    }
-    buf[i] = isprint(c) ? c : '?';
-  }
-  return true;
-}
-
-ssize_t ReadProcFile(int pid, const char* proc_file, char* buf, size_t length) {
-  char proc_path[128];
-  snprintf(proc_path, sizeof(proc_path), "/proc/%d/%s", pid, proc_file);
-  return ReadFile(proc_path, buf, length);
-}
-
-// Reads a single-line proc file, stripping out any \0, \r, \n and replacing
-// non-printable charcters with '?'.
-bool ReadProcFileTrimmed(int pid,
-                         const char* proc_file,
-                         char* buf,
-                         size_t length) {
-  char proc_path[128];
-  snprintf(proc_path, sizeof(proc_path), "/proc/%d/%s", pid, proc_file);
-  return ReadFileTrimmed(proc_path, buf, length);
-}
-
-LineReader::LineReader(char* buf, size_t size) : ptr_(buf), end_(buf + size) {}
-
-LineReader::~LineReader() {}
-
-const char* LineReader::NextLine() {
-  if (ptr_ >= end_)
-    return nullptr;
-  const char* cur = ptr_;
-  char* next = strchr(ptr_, '\n');
-  if (next) {
-    *next = '\0';
-    ptr_ = next + 1;
-  } else {
-    ptr_ = end_;
-  }
-  return cur;
-}
-
-}  // namespace file_utils
diff --git a/src/process_stats/file_utils.h b/src/process_stats/file_utils.h
deleted file mode 100644
index 9c160da..0000000
--- a/src/process_stats/file_utils.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2017 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef SRC_PROCESS_STATS_FILE_UTILS_H_
-#define SRC_PROCESS_STATS_FILE_UTILS_H_
-
-#include <ctype.h>
-#include <dirent.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include <functional>
-#include <map>
-#include <memory>
-
-namespace file_utils {
-
-// TODO(taylori): Migrate to using perfetto:base:ScopedFD.
-
-// RAII classes for auto-releasing fd/dirs.
-template <typename RESOURCE_TYPE, int (*CLOSE_FN)(RESOURCE_TYPE)>
-struct ScopedResource {
-  explicit ScopedResource(RESOURCE_TYPE r) : r_(r) {}
-  ~ScopedResource() { CLOSE_FN(r_); }
-  RESOURCE_TYPE r_;
-};
-
-using ScopedFD = ScopedResource<int, close>;
-using ScopedDir = ScopedResource<DIR*, closedir>;
-
-bool IsNumeric(const char* str);
-
-// Invokes predicate(pid) for each folder in |proc_path|/[0-9]+ which has
-// a numeric name (typically pids and tids).
-void ForEachPidInProcPath(const char* proc_path,
-                          const std::function<void(int)>& predicate);
-
-// Reads the contents of |path| fully into |buf| up to |length| chars.
-// |buf| is guaranteed to be null terminated.
-ssize_t ReadFile(const char* path, char* buf, size_t length);
-
-// Reads a single-line file, stripping out any \0, \r, \n and replacing
-// non-printable charcters with '?'. |buf| is guaranteed to be null terminated.
-bool ReadFileTrimmed(const char* path, char* buf, size_t length);
-
-// Convenience wrappers for /proc/|pid|/|proc_file| paths.
-ssize_t ReadProcFile(int pid, const char* proc_file, char* buf, size_t length);
-bool ReadProcFileTrimmed(int pid,
-                         const char* proc_file,
-                         char* buf,
-                         size_t length);
-
-// Takes a C string buffer and chunks it into lines without creating any
-// copies. It modifies the original buffer, by replacing \n with \0.
-class LineReader {
- public:
-  LineReader(char* buf, size_t size);
-  ~LineReader();
-
-  const char* NextLine();
-
- private:
-  char* ptr_;
-  char* end_;
-};
-
-}  // namespace file_utils
-
-#endif  // SRC_PROCESS_STATS_FILE_UTILS_H_
diff --git a/src/process_stats/process_info.h b/src/process_stats/process_info.h
deleted file mode 100644
index 2739644..0000000
--- a/src/process_stats/process_info.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2017 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef SRC_PROCESS_STATS_PROCESS_INFO_H_
-#define SRC_PROCESS_STATS_PROCESS_INFO_H_
-
-#include <map>
-#include <vector>
-
-struct ThreadInfo {
-  int tid;
-  char name[16];
-};
-
-struct ProcessInfo {
-  int pid;
-  int ppid;
-  bool in_kernel;
-  bool is_app;
-  char exe[256];
-  std::vector<std::string> cmdline;
-  std::map<int, ThreadInfo> threads;
-};
-
-#endif  // SRC_PROCESS_STATS_PROCESS_INFO_H_
diff --git a/src/process_stats/procfs_utils.cc b/src/process_stats/procfs_utils.cc
deleted file mode 100644
index 31ea824..0000000
--- a/src/process_stats/procfs_utils.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright 2017 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "src/process_stats/procfs_utils.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <fstream>
-
-#include "src/process_stats/file_utils.h"
-
-#include "perfetto/base/logging.h"
-#include "perfetto/base/string_splitter.h"
-
-using file_utils::ForEachPidInProcPath;
-using file_utils::ReadProcFile;
-using file_utils::ReadProcFileTrimmed;
-
-namespace procfs_utils {
-
-namespace {
-
-constexpr const char kJavaAppPrefix[] = "/system/bin/app_process";
-constexpr const char kZygotePrefix[] = "zygote";
-
-inline void ReadProcString(int pid, const char* path, char* buf, size_t size) {
-  if (!file_utils::ReadProcFileTrimmed(pid, path, buf, size))
-    buf[0] = '\0';
-}
-
-inline void ReadExePath(int pid, char* buf, size_t size) {
-  char exe_path[64];
-  sprintf(exe_path, "/proc/%d/exe", pid);
-  ssize_t res = readlink(exe_path, buf, size - 1);
-  if (res >= 0)
-    buf[res] = '\0';
-  else
-    buf[0] = '\0';
-}
-
-inline bool IsApp(const char* name, const char* exe) {
-  return strncmp(exe, kJavaAppPrefix, sizeof(kJavaAppPrefix) - 1) == 0 &&
-         strncmp(name, kZygotePrefix, sizeof(kZygotePrefix) - 1) != 0;
-}
-
-inline int ReadStatusLine(int pid, const char* status_string) {
-  char buf[512];
-  ssize_t rsize = ReadProcFile(pid, "status", buf, sizeof(buf));
-  if (rsize <= 0)
-    return -1;
-  const char* line = strstr(buf, status_string);
-  PERFETTO_DCHECK(line);
-  return atoi(line + strlen(status_string));
-}
-
-}  // namespace
-
-int ReadTgid(int pid) {
-  return ReadStatusLine(pid, "\nTgid:");
-}
-
-int ReadPpid(int pid) {
-  return ReadStatusLine(pid, "\nPPid:");
-}
-
-std::unique_ptr<ProcessInfo> ReadProcessInfo(int pid) {
-  ProcessInfo* process = new ProcessInfo();
-  process->pid = pid;
-  // It's not enough to just null terminate this since cmdline uses null as
-  // the argument seperator:
-  char cmdline_buf[256]{};
-  ReadProcString(pid, "cmdline", cmdline_buf, sizeof(cmdline_buf));
-  if (cmdline_buf[0] == 0) {
-    // Nothing in cmdline_buf so read name from /comm instead.
-    char name[256];
-    ReadProcString(pid, "comm", name, sizeof(name));
-    process->cmdline.push_back(name);
-    process->in_kernel = true;
-  } else {
-    using perfetto::base::StringSplitter;
-    for (StringSplitter ss(cmdline_buf, sizeof(cmdline_buf), '\0'); ss.Next();)
-      process->cmdline.push_back(ss.cur_token());
-    ReadExePath(pid, process->exe, sizeof(process->exe));
-    process->is_app = IsApp(process->cmdline[0].c_str(), process->exe);
-  }
-  process->ppid = ReadPpid(pid);
-  return std::unique_ptr<ProcessInfo>(process);
-}
-
-void ReadProcessThreads(ProcessInfo* process) {
-  if (process->in_kernel)
-    return;
-
-  char tasks_path[64];
-  sprintf(tasks_path, "/proc/%d/task", process->pid);
-  ForEachPidInProcPath(tasks_path, [process](int tid) {
-    if (process->threads.count(tid))
-      return;
-    ThreadInfo thread = {tid, ""};
-    char task_comm[64];
-    sprintf(task_comm, "task/%d/comm", tid);
-    ReadProcString(process->pid, task_comm, thread.name, sizeof(thread.name));
-    if (thread.name[0] == '\0' && process->is_app)
-      strcpy(thread.name, "UI Thread");
-    process->threads[tid] = thread;
-  });
-}
-
-}  // namespace procfs_utils
diff --git a/src/process_stats/procfs_utils.h b/src/process_stats/procfs_utils.h
deleted file mode 100644
index 0a20c72..0000000
--- a/src/process_stats/procfs_utils.h
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2017 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef SRC_PROCESS_STATS_PROCFS_UTILS_H_
-#define SRC_PROCESS_STATS_PROCFS_UTILS_H_
-
-#include <map>
-#include <memory>
-#include <string>
-
-#include "src/process_stats/process_info.h"
-
-namespace procfs_utils {
-
-using ProcessMap = std::map<int, std::unique_ptr<ProcessInfo>>;
-
-// ProcFS doesn't necessarly distinguish PID vs. TID, but all threads of a
-// process have the same Thread Group ID which is equal to Process ID.
-int ReadTgid(int pid);
-int ReadPpid(int pid);
-std::unique_ptr<ProcessInfo> ReadProcessInfo(int pid);
-void ReadProcessThreads(ProcessInfo* process);
-
-}  // namespace procfs_utils
-
-#endif  // SRC_PROCESS_STATS_PROCFS_UTILS_H_
diff --git a/src/traced/probes/BUILD.gn b/src/traced/probes/BUILD.gn
index a823fcf..43b4d0e 100644
--- a/src/traced/probes/BUILD.gn
+++ b/src/traced/probes/BUILD.gn
@@ -33,7 +33,6 @@
   deps = [
     "../../../gn:default_deps",
     "../../base",
-    "../../process_stats:process_stats",
     "../../tracing:tracing",
     "filesystem",
   ]
@@ -51,7 +50,6 @@
     ":probes_src",
     "../../../gn:default_deps",
     "../../../gn:gtest_deps",
-    "../../process_stats:process_stats",
     "../../tracing:unittests",
   ]
   sources = [
diff --git a/src/traced/probes/process_stats_data_source.cc b/src/traced/probes/process_stats_data_source.cc
index 53e7dd3..7d410a9 100644
--- a/src/traced/probes/process_stats_data_source.cc
+++ b/src/traced/probes/process_stats_data_source.cc
@@ -16,14 +16,53 @@
 
 #include "src/traced/probes/process_stats_data_source.h"
 
+#include <stdlib.h>
+
 #include <utility>
 
+#include "perfetto/base/file_utils.h"
+#include "perfetto/base/scoped_file.h"
+#include "perfetto/base/string_splitter.h"
 #include "perfetto/trace/trace_packet.pbzero.h"
-#include "src/process_stats/file_utils.h"
-#include "src/process_stats/procfs_utils.h"
+
+// TODO(primiano): the code in this file assumes that PIDs are never recycled
+// and that processes/threads never change names. Neither is always true.
+
+// The notion of PID in the Linux kernel is a bit confusing.
+// - PID: is really the thread id (for the main thread: PID == TID).
+// - TGID (thread group ID): is the Unix Process ID (the actual PID).
+// - PID == TGID for the main thread: the TID of the main thread is also the PID
+//   of the process.
+// So, in this file, |pid| might refer to either a process id or a thread id.
 
 namespace perfetto {
 
+namespace {
+
+bool IsNumeric(const char* str) {
+  if (!str || !*str)
+    return false;
+  for (const char* c = str; *c; c++) {
+    if (!isdigit(*c))
+      return false;
+  }
+  return true;
+}
+
+int32_t ReadNextNumericDir(DIR* dirp) {
+  while (struct dirent* dir_ent = readdir(dirp)) {
+    if (dir_ent->d_type == DT_DIR && IsNumeric(dir_ent->d_name))
+      return atoi(dir_ent->d_name);
+  }
+  return 0;
+}
+
+inline int ToInt(const std::string& str) {
+  return atoi(str.c_str());
+}
+
+}  // namespace
+
 ProcessStatsDataSource::ProcessStatsDataSource(
     TracingSessionID id,
     std::unique_ptr<TraceWriter> writer,
@@ -41,36 +80,37 @@
 }
 
 void ProcessStatsDataSource::WriteAllProcesses() {
-  auto trace_packet = writer_->NewTracePacket();
+  base::ScopedDir proc_dir(opendir("/proc"));
+  if (!proc_dir) {
+    PERFETTO_PLOG("Failed to opendir(/proc)");
+    return;
+  }
+  TraceWriter::TracePacketHandle trace_packet = writer_->NewTracePacket();
   auto* process_tree = trace_packet->set_process_tree();
-  std::set<int32_t>* seen_pids = &seen_pids_;
-
-  file_utils::ForEachPidInProcPath("/proc",
-                                   [this, process_tree, seen_pids](int pid) {
-                                     // ForEachPid will list all processes and
-                                     // threads. Here we want to iterate first
-                                     // only by processes (for which pid ==
-                                     // thread group id)
-                                     if (procfs_utils::ReadTgid(pid) != pid)
-                                       return;
-
-                                     WriteProcess(pid, process_tree);
-                                     seen_pids->insert(pid);
-                                   });
+  while (int32_t pid = ReadNextNumericDir(*proc_dir)) {
+    WriteProcessOrThread(pid, process_tree);
+    char task_path[255];
+    sprintf(task_path, "/proc/%d/task", pid);
+    base::ScopedDir task_dir(opendir(task_path));
+    if (!task_dir)
+      continue;
+    while (int32_t tid = ReadNextNumericDir(*task_dir))
+      WriteProcessOrThread(tid, process_tree);
+  }
 }
 
 void ProcessStatsDataSource::OnPids(const std::vector<int32_t>& pids) {
   TraceWriter::TracePacketHandle trace_packet{};
   protos::pbzero::ProcessTree* process_tree = nullptr;
+
   for (int32_t pid : pids) {
-    auto it_and_inserted = seen_pids_.emplace(pid);
-    if (!it_and_inserted.second)
+    if (seen_pids_.count(pid))
       continue;
     if (!process_tree) {
       trace_packet = writer_->NewTracePacket();
       process_tree = trace_packet->set_process_tree();
     }
-    WriteProcess(pid, process_tree);
+    WriteProcessOrThread(pid, process_tree);
   }
 }
 
@@ -78,25 +118,75 @@
   writer_->Flush();
 }
 
-// To be overidden for testing.
-std::unique_ptr<ProcessInfo> ProcessStatsDataSource::ReadProcessInfo(int pid) {
-  return procfs_utils::ReadProcessInfo(pid);
+void ProcessStatsDataSource::WriteProcessOrThread(
+    int32_t pid,
+    protos::pbzero::ProcessTree* tree) {
+  std::string proc_status = ReadProcPidFile(pid, "status");
+  if (proc_status.empty())
+    return;
+  int tgid = ToInt(ReadProcStatusEntry(proc_status, "Tgid:"));
+  if (tgid <= 0)
+    return;
+  if (!seen_pids_.count(tgid))
+    WriteProcess(tgid, proc_status, tree);
+  if (pid != tgid) {
+    PERFETTO_DCHECK(!seen_pids_.count(pid));
+    WriteThread(pid, tgid, proc_status, tree);
+  }
 }
 
 void ProcessStatsDataSource::WriteProcess(int32_t pid,
+                                          const std::string& proc_status,
                                           protos::pbzero::ProcessTree* tree) {
-  std::unique_ptr<ProcessInfo> process = ReadProcessInfo(pid);
-  procfs_utils::ReadProcessThreads(process.get());
-  auto* process_writer = tree->add_processes();
-  process_writer->set_pid(process->pid);
-  process_writer->set_ppid(process->ppid);
-  for (const auto& field : process->cmdline)
-    process_writer->add_cmdline(field.c_str());
-  for (auto& thread : process->threads) {
-    auto* thread_writer = process_writer->add_threads();
-    thread_writer->set_tid(thread.second.tid);
-    thread_writer->set_name(thread.second.name);
+  PERFETTO_DCHECK(ToInt(ReadProcStatusEntry(proc_status, "Tgid:")) == pid);
+  auto* proc = tree->add_processes();
+  proc->set_pid(pid);
+  proc->set_ppid(ToInt(ReadProcStatusEntry(proc_status, "PPid:")));
+
+  std::string cmdline = ReadProcPidFile(pid, "cmdline");
+  if (!cmdline.empty()) {
+    using base::StringSplitter;
+    for (StringSplitter ss(&cmdline[0], cmdline.size(), '\0'); ss.Next();)
+      proc->add_cmdline(ss.cur_token());
+  } else {
+    // Nothing in cmdline so use the thread name instead (which is == "comm").
+    proc->add_cmdline(ReadProcStatusEntry(proc_status, "Name:").c_str());
   }
+  seen_pids_.emplace(pid);
+}
+
+void ProcessStatsDataSource::WriteThread(int32_t tid,
+                                         int32_t tgid,
+                                         const std::string& proc_status,
+                                         protos::pbzero::ProcessTree* tree) {
+  auto* thread = tree->add_threads();
+  thread->set_tid(tid);
+  thread->set_tgid(tgid);
+  thread->set_name(ReadProcStatusEntry(proc_status, "Name:").c_str());
+  seen_pids_.emplace(tid);
+}
+
+std::string ProcessStatsDataSource::ReadProcPidFile(int32_t pid,
+                                                    const std::string& file) {
+  std::string contents;
+  contents.reserve(4096);
+  if (!base::ReadFile("/proc/" + std::to_string(pid) + "/" + file, &contents))
+    return "";
+  return contents;
+}
+
+std::string ProcessStatsDataSource::ReadProcStatusEntry(const std::string& buf,
+                                                        const char* key) {
+  auto begin = buf.find(key);
+  if (begin == std::string::npos)
+    return "";
+  begin = buf.find_first_not_of(" \t", begin + strlen(key));
+  if (begin == std::string::npos)
+    return "";
+  auto end = buf.find('\n', begin);
+  if (end == std::string::npos || end <= begin)
+    return "";
+  return buf.substr(begin, end - begin);
 }
 
 }  // namespace perfetto
diff --git a/src/traced/probes/process_stats_data_source.h b/src/traced/probes/process_stats_data_source.h
index 32d2057..4acff60 100644
--- a/src/traced/probes/process_stats_data_source.h
+++ b/src/traced/probes/process_stats_data_source.h
@@ -26,7 +26,6 @@
 #include "perfetto/tracing/core/basic_types.h"
 #include "perfetto/tracing/core/data_source_config.h"
 #include "perfetto/tracing/core/trace_writer.h"
-#include "src/process_stats/process_info.h"
 
 namespace perfetto {
 
@@ -46,19 +45,32 @@
   void Flush();
 
   // Virtual for testing.
-  virtual std::unique_ptr<ProcessInfo> ReadProcessInfo(int pid);
+  virtual std::string ReadProcPidFile(int32_t pid, const std::string& file);
 
  private:
   ProcessStatsDataSource(const ProcessStatsDataSource&) = delete;
   ProcessStatsDataSource& operator=(const ProcessStatsDataSource&) = delete;
 
-  void WriteProcess(int32_t pid, protos::pbzero::ProcessTree*);
+  void WriteProcess(int32_t pid,
+                    const std::string& proc_status,
+                    protos::pbzero::ProcessTree*);
+  void WriteThread(int32_t tid,
+                   int32_t tgid,
+                   const std::string& proc_status,
+                   protos::pbzero::ProcessTree*);
+  void WriteProcessOrThread(int32_t pid, protos::pbzero::ProcessTree*);
+  std::string ReadProcStatusEntry(const std::string& buf, const char* key);
 
   const TracingSessionID session_id_;
   std::unique_ptr<TraceWriter> writer_;
   const DataSourceConfig config_;
+
+  // This set contains PIDs as per the Linux kernel notion of a PID (which is
+  // really a TID). In practice this set will contain all TIDs for all processes
+  // seen, not just the main thread id (aka thread group ID).
   // TODO(b/76663469): Optimization: use a bitmap.
   std::set<int32_t> seen_pids_;
+
   base::WeakPtrFactory<ProcessStatsDataSource> weak_factory_;  // Keep last.
 };
 
diff --git a/src/traced/probes/process_stats_data_source_unittest.cc b/src/traced/probes/process_stats_data_source_unittest.cc
index 1d4e040..6766ec2 100644
--- a/src/traced/probes/process_stats_data_source_unittest.cc
+++ b/src/traced/probes/process_stats_data_source_unittest.cc
@@ -15,16 +15,16 @@
  */
 
 #include "src/traced/probes/process_stats_data_source.h"
-#include "perfetto/trace/trace_packet.pb.h"
-#include "perfetto/trace/trace_packet.pbzero.h"
-
-#include "src/process_stats/process_info.h"
-#include "src/tracing/core/trace_writer_for_testing.h"
-
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "perfetto/trace/trace_packet.pb.h"
+#include "perfetto/trace/trace_packet.pbzero.h"
+#include "src/tracing/core/trace_writer_for_testing.h"
 
+using ::testing::_;
 using ::testing::Invoke;
+using ::testing::Return;
+using ::testing::ElementsAreArray;
 
 namespace perfetto {
 namespace {
@@ -36,7 +36,7 @@
                              const DataSourceConfig& config)
       : ProcessStatsDataSource(id, std::move(writer), config) {}
 
-  MOCK_METHOD1(ReadProcessInfo, std::unique_ptr<ProcessInfo>(int pid));
+  MOCK_METHOD2(ReadProcPidFile, std::string(int32_t pid, const std::string&));
 };
 
 class ProcessStatsDataSourceTest : public ::testing::Test {
@@ -53,30 +53,64 @@
     return std::unique_ptr<TestProcessStatsDataSource>(
         new TestProcessStatsDataSource(0, std::move(writer), cfg));
   }
-
-  static std::unique_ptr<ProcessInfo> GetProcessInfo(int pid) {
-    ProcessInfo* process = new ProcessInfo();
-    process->pid = pid;
-    process->in_kernel = true;  // So that it doesn't try to read threads.
-    process->ppid = 0;
-    process->cmdline.push_back("test_process");
-    return std::unique_ptr<ProcessInfo>(process);
-  }
 };
 
-TEST_F(ProcessStatsDataSourceTest, TestWriteOnDemand) {
-  DataSourceConfig config;
-  auto data_source = GetProcessStatsDataSource(config);
-  EXPECT_CALL(*data_source, ReadProcessInfo(42))
-      .WillRepeatedly(Invoke(GetProcessInfo));
+TEST_F(ProcessStatsDataSourceTest, WriteOnceProcess) {
+  auto data_source = GetProcessStatsDataSource(DataSourceConfig());
+  EXPECT_CALL(*data_source, ReadProcPidFile(42, "status"))
+      .WillOnce(Return("Name: foo\nTgid:\t42\nPid:   42\nPPid:  17\n"));
+  EXPECT_CALL(*data_source, ReadProcPidFile(42, "cmdline"))
+      .WillOnce(Return(std::string("foo\0bar\0baz\0", 12)));
+
   data_source->OnPids({42});
   std::unique_ptr<protos::TracePacket> packet = writer_raw_->ParseProto();
   ASSERT_TRUE(packet->has_process_tree());
   ASSERT_EQ(packet->process_tree().processes_size(), 1);
   auto first_process = packet->process_tree().processes(0);
   ASSERT_EQ(first_process.pid(), 42);
-  ASSERT_EQ(first_process.ppid(), 0);
-  ASSERT_EQ(first_process.cmdline(0), std::string("test_process"));
+  ASSERT_EQ(first_process.ppid(), 17);
+  EXPECT_THAT(first_process.cmdline(), ElementsAreArray({"foo", "bar", "baz"}));
+}
+
+TEST_F(ProcessStatsDataSourceTest, DontRescanCachedPIDsAndTIDs) {
+  auto data_source = GetProcessStatsDataSource(DataSourceConfig());
+  for (int p : {10, 11, 12, 20, 21, 22, 30, 31, 32}) {
+    EXPECT_CALL(*data_source, ReadProcPidFile(p, "status"))
+        .WillOnce(Invoke([](int32_t pid, const std::string&) {
+          int32_t tgid = (pid / 10) * 10;
+          return "Name: \tthread_" + std::to_string(pid) +
+                 "\nTgid:  " + std::to_string(tgid) +
+                 "\nPid:   " + std::to_string(pid) + "\nPPid:  1\n";
+        }));
+    if (p % 10 == 0) {
+      std::string proc_name = "proc_" + std::to_string(p);
+      proc_name.resize(proc_name.size() + 1);  // Add a trailing \0.
+      EXPECT_CALL(*data_source, ReadProcPidFile(p, "cmdline"))
+          .WillOnce(Return(proc_name));
+    }
+  }
+
+  data_source->OnPids({10, 11, 12, 20, 21, 22, 10, 20, 11, 21});
+  data_source->OnPids({30});
+  data_source->OnPids({10, 30, 10, 31, 32});
+
+  std::unique_ptr<protos::TracePacket> packet = writer_raw_->ParseProto();
+  ASSERT_TRUE(packet->has_process_tree());
+  const auto& proceses = packet->process_tree().processes();
+  const auto& threads = packet->process_tree().threads();
+  ASSERT_EQ(proceses.size(), 3);
+  int tid_idx = 0;
+  for (int pid_idx = 0; pid_idx < 3; pid_idx++) {
+    int pid = (pid_idx + 1) * 10;
+    std::string proc_name = "proc_" + std::to_string(pid);
+    ASSERT_EQ(proceses.Get(pid_idx).pid(), pid);
+    ASSERT_EQ(proceses.Get(pid_idx).cmdline().Get(0), proc_name);
+    for (int tid = pid + 1; tid < pid + 3; tid++, tid_idx++) {
+      ASSERT_EQ(threads.Get(tid_idx).tid(), tid);
+      ASSERT_EQ(threads.Get(tid_idx).tgid(), pid);
+      ASSERT_EQ(threads.Get(tid_idx).name(), "thread_" + std::to_string(tid));
+    }
+  }
 }
 
 }  // namespace
diff --git a/tools/trace_to_text/main.cc b/tools/trace_to_text/main.cc
index dcbc20d..30bca4b 100644
--- a/tools/trace_to_text/main.cc
+++ b/tools/trace_to_text/main.cc
@@ -449,10 +449,11 @@
           const ProcessTree& tree = packet.process_tree();
           for (Process process : tree.processes()) {
             tids_in_tree.insert(process.pid());
-            for (ProcessTree::Thread thread : process.threads()) {
+            for (ProcessTree::Thread thread : process.threads_deprecated())
               tids_in_tree.insert(thread.tid());
-            }
           }
+          for (ProcessTree::Thread thread : tree.threads())
+            tids_in_tree.insert(thread.tid());
         }
 
         if (packet.has_inode_file_map()) {