blob: b92e9cec1a1e48b26c2617ad7c34c9ff8fb62729 [file] [log] [blame]
/*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "perfetto/ext/base/watchdog.h"
#if PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)
#include <fcntl.h>
#include <poll.h>
#include <signal.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <sys/timerfd.h>
#include <unistd.h>
#include <algorithm>
#include <cinttypes>
#include <fstream>
#include <thread>
#include "perfetto/base/build_config.h"
#include "perfetto/base/logging.h"
#include "perfetto/base/thread_utils.h"
#include "perfetto/base/time.h"
#include "perfetto/ext/base/crash_keys.h"
#include "perfetto/ext/base/file_utils.h"
#include "perfetto/ext/base/scoped_file.h"
#include "perfetto/ext/base/utils.h"
namespace perfetto {
namespace base {
namespace {
constexpr uint32_t kDefaultPollingInterval = 30 * 1000;
base::CrashKey g_crash_key_reason("wdog_reason");
bool IsMultipleOf(uint32_t number, uint32_t divisor) {
return number >= divisor && number % divisor == 0;
}
double MeanForArray(const uint64_t array[], size_t size) {
uint64_t total = 0;
for (size_t i = 0; i < size; i++) {
total += array[i];
}
return static_cast<double>(total / size);
}
} // namespace
bool ReadProcStat(int fd, ProcStat* out) {
char c[512];
size_t c_pos = 0;
while (c_pos < sizeof(c) - 1) {
ssize_t rd = PERFETTO_EINTR(read(fd, c + c_pos, sizeof(c) - c_pos));
if (rd < 0) {
PERFETTO_ELOG("Failed to read stat file to enforce resource limits.");
return false;
}
if (rd == 0)
break;
c_pos += static_cast<size_t>(rd);
}
PERFETTO_CHECK(c_pos < sizeof(c));
c[c_pos] = '\0';
if (sscanf(c,
"%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu "
"%lu %*d %*d %*d %*d %*d %*d %*u %*u %ld",
&out->utime, &out->stime, &out->rss_pages) != 3) {
PERFETTO_ELOG("Invalid stat format: %s", c);
return false;
}
return true;
}
Watchdog::Watchdog(uint32_t polling_interval_ms)
: polling_interval_ms_(polling_interval_ms) {}
Watchdog::~Watchdog() {
if (!thread_.joinable()) {
PERFETTO_DCHECK(!enabled_);
return;
}
PERFETTO_DCHECK(enabled_);
enabled_ = false;
// Rearm the timer to 1ns from now. This will cause the watchdog thread to
// wakeup from the poll() and see |enabled_| == false.
// This code path is used only in tests. In production code the watchdog is
// a singleton and is never destroyed.
struct itimerspec ts {};
ts.it_value.tv_sec = 0;
ts.it_value.tv_nsec = 1;
timerfd_settime(*timer_fd_, /*flags=*/0, &ts, nullptr);
thread_.join();
}
Watchdog* Watchdog::GetInstance() {
static Watchdog* watchdog = new Watchdog(kDefaultPollingInterval);
return watchdog;
}
// Can be called from any thread.
Watchdog::Timer Watchdog::CreateFatalTimer(uint32_t ms,
WatchdogCrashReason crash_reason) {
if (!enabled_.load(std::memory_order_relaxed))
return Watchdog::Timer(this, 0, crash_reason);
return Watchdog::Timer(this, ms, crash_reason);
}
// Can be called from any thread.
void Watchdog::AddFatalTimer(TimerData timer) {
std::lock_guard<std::mutex> guard(mutex_);
timers_.emplace_back(std::move(timer));
RearmTimerFd_Locked();
}
// Can be called from any thread.
void Watchdog::RemoveFatalTimer(TimerData timer) {
std::lock_guard<std::mutex> guard(mutex_);
for (auto it = timers_.begin(); it != timers_.end(); it++) {
if (*it == timer) {
timers_.erase(it);
break; // Remove only one. Doesn't matter which one.
}
}
RearmTimerFd_Locked();
}
void Watchdog::RearmTimerFd_Locked() {
if (!enabled_)
return;
auto it = std::min_element(timers_.begin(), timers_.end());
// We use one timerfd to handle all the oustanding |timers_|. Keep it armed
// to the task expiring soonest.
struct itimerspec ts {};
if (it != timers_.end()) {
ts.it_value = ToPosixTimespec(it->deadline);
}
// If |timers_| is empty (it == end()) |ts.it_value| will remain
// zero-initialized and that will disarm the timer in the call below.
int res = timerfd_settime(*timer_fd_, TFD_TIMER_ABSTIME, &ts, nullptr);
PERFETTO_DCHECK(res == 0);
}
void Watchdog::Start() {
std::lock_guard<std::mutex> guard(mutex_);
if (thread_.joinable()) {
PERFETTO_DCHECK(enabled_);
} else {
PERFETTO_DCHECK(!enabled_);
#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \
PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
// Kick the thread to start running but only on Android or Linux.
timer_fd_.reset(
timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK));
if (!timer_fd_) {
PERFETTO_PLOG(
"timerfd_create failed, the Perfetto watchdog is not available");
return;
}
enabled_ = true;
RearmTimerFd_Locked(); // Deal with timers created before Start().
thread_ = std::thread(&Watchdog::ThreadMain, this);
#endif
}
}
void Watchdog::SetMemoryLimit(uint64_t bytes, uint32_t window_ms) {
// Update the fields under the lock.
std::lock_guard<std::mutex> guard(mutex_);
PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) || bytes == 0);
size_t size = bytes == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
memory_window_bytes_.Reset(size);
memory_limit_bytes_ = bytes;
}
void Watchdog::SetCpuLimit(uint32_t percentage, uint32_t window_ms) {
std::lock_guard<std::mutex> guard(mutex_);
PERFETTO_CHECK(percentage <= 100);
PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) ||
percentage == 0);
size_t size = percentage == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
cpu_window_time_ticks_.Reset(size);
cpu_limit_percentage_ = percentage;
}
void Watchdog::ThreadMain() {
// Register crash keys explicitly to avoid running out of slots at crash time.
g_crash_key_reason.Register();
base::ScopedFile stat_fd(base::OpenFile("/proc/self/stat", O_RDONLY));
if (!stat_fd) {
PERFETTO_ELOG("Failed to open stat file to enforce resource limits.");
return;
}
PERFETTO_DCHECK(timer_fd_);
constexpr uint8_t kFdCount = 1;
struct pollfd fds[kFdCount]{};
fds[0].fd = *timer_fd_;
fds[0].events = POLLIN;
for (;;) {
// We use the poll() timeout to drive the periodic ticks for the cpu/memory
// checks. The only other case when the poll() unblocks is when we crash
// (or have to quit via enabled_ == false, but that happens only in tests).
auto ret = poll(fds, kFdCount, static_cast<int>(polling_interval_ms_));
if (!enabled_)
return;
if (ret < 0) {
if (errno == ENOMEM || errno == EINTR) {
// Should happen extremely rarely.
std::this_thread::sleep_for(std::chrono::milliseconds(100));
continue;
}
PERFETTO_FATAL("watchdog poll() failed");
}
// If we get here either:
// 1. poll() timed out, in which case we should process cpu/mem guardrails.
// 2. A timer expired, in which case we shall crash.
uint64_t expired = 0; // Must be exactly 8 bytes.
auto res = PERFETTO_EINTR(read(*timer_fd_, &expired, sizeof(expired)));
PERFETTO_DCHECK((res < 0 && (errno == EAGAIN)) ||
(res == sizeof(expired) && expired > 0));
const auto now = GetWallTimeMs();
// Check if any of the timers expired.
int tid_to_kill = 0;
WatchdogCrashReason crash_reason{};
std::unique_lock<std::mutex> guard(mutex_);
for (const auto& timer : timers_) {
if (now >= timer.deadline) {
tid_to_kill = timer.thread_id;
crash_reason = timer.crash_reason;
break;
}
}
guard.unlock();
if (tid_to_kill)
SerializeLogsAndKillThread(tid_to_kill, crash_reason);
// Check CPU and memory guardrails (if enabled).
lseek(stat_fd.get(), 0, SEEK_SET);
ProcStat stat;
if (!ReadProcStat(stat_fd.get(), &stat))
continue;
uint64_t cpu_time = stat.utime + stat.stime;
uint64_t rss_bytes =
static_cast<uint64_t>(stat.rss_pages) * base::GetSysPageSize();
bool threshold_exceeded = false;
guard.lock();
if (CheckMemory_Locked(rss_bytes)) {
threshold_exceeded = true;
crash_reason = WatchdogCrashReason::kMemGuardrail;
} else if (CheckCpu_Locked(cpu_time)) {
threshold_exceeded = true;
crash_reason = WatchdogCrashReason::kCpuGuardrail;
}
guard.unlock();
if (threshold_exceeded)
SerializeLogsAndKillThread(getpid(), crash_reason);
}
}
void Watchdog::SerializeLogsAndKillThread(int tid,
WatchdogCrashReason crash_reason) {
g_crash_key_reason.Set(static_cast<int>(crash_reason));
// We are about to die. Serialize the logs into the crash buffer so the
// debuggerd crash handler picks them up and attaches to the bugreport.
// In the case of a PERFETTO_CHECK/PERFETTO_FATAL this is done in logging.h.
// But in the watchdog case, we don't hit that codepath and must do ourselves.
MaybeSerializeLastLogsForCrashReporting();
// Send a SIGABRT to the thread that armed the timer. This is to see the
// callstack of the thread that is stuck in a long task rather than the
// watchdog thread.
if (syscall(__NR_tgkill, getpid(), tid, SIGABRT) < 0) {
// At this point the process must die. If for any reason the tgkill doesn't
// work (e.g. the thread has disappeared), force a crash from here.
abort();
}
if (disable_kill_failsafe_for_testing_)
return;
// The tgkill() above will take some milliseconds to cause a crash, as it
// involves the kernel to queue the SIGABRT on the target thread (often the
// main thread, which is != watchdog thread) and do a scheduling round.
// If something goes wrong though (the target thread has signals masked or
// is stuck in an uninterruptible+wakekill syscall) force quit from this
// thread.
std::this_thread::sleep_for(std::chrono::seconds(10));
abort();
}
bool Watchdog::CheckMemory_Locked(uint64_t rss_bytes) {
if (memory_limit_bytes_ == 0)
return false;
// Add the current stat value to the ring buffer and check that the mean
// remains under our threshold.
if (memory_window_bytes_.Push(rss_bytes)) {
if (memory_window_bytes_.Mean() >
static_cast<double>(memory_limit_bytes_)) {
PERFETTO_ELOG(
"Memory watchdog trigger. Memory window of %f bytes is above the "
"%" PRIu64 " bytes limit.",
memory_window_bytes_.Mean(), memory_limit_bytes_);
return true;
}
}
return false;
}
bool Watchdog::CheckCpu_Locked(uint64_t cpu_time) {
if (cpu_limit_percentage_ == 0)
return false;
// Add the cpu time to the ring buffer.
if (cpu_window_time_ticks_.Push(cpu_time)) {
// Compute the percentage over the whole window and check that it remains
// under the threshold.
uint64_t difference_ticks = cpu_window_time_ticks_.NewestWhenFull() -
cpu_window_time_ticks_.OldestWhenFull();
double window_interval_ticks =
(static_cast<double>(WindowTimeForRingBuffer(cpu_window_time_ticks_)) /
1000.0) *
static_cast<double>(sysconf(_SC_CLK_TCK));
double percentage = static_cast<double>(difference_ticks) /
static_cast<double>(window_interval_ticks) * 100;
if (percentage > cpu_limit_percentage_) {
PERFETTO_ELOG("CPU watchdog trigger. %f%% CPU use is above the %" PRIu32
"%% CPU limit.",
percentage, cpu_limit_percentage_);
return true;
}
}
return false;
}
uint32_t Watchdog::WindowTimeForRingBuffer(const WindowedInterval& window) {
return static_cast<uint32_t>(window.size() - 1) * polling_interval_ms_;
}
bool Watchdog::WindowedInterval::Push(uint64_t sample) {
// Add the sample to the current position in the ring buffer.
buffer_[position_] = sample;
// Update the position with next one circularily.
position_ = (position_ + 1) % size_;
// Set the filled flag the first time we wrap.
filled_ = filled_ || position_ == 0;
return filled_;
}
double Watchdog::WindowedInterval::Mean() const {
return MeanForArray(buffer_.get(), size_);
}
void Watchdog::WindowedInterval::Clear() {
position_ = 0;
buffer_.reset(new uint64_t[size_]());
}
void Watchdog::WindowedInterval::Reset(size_t new_size) {
position_ = 0;
size_ = new_size;
buffer_.reset(new_size == 0 ? nullptr : new uint64_t[new_size]());
}
Watchdog::Timer::Timer(Watchdog* watchdog,
uint32_t ms,
WatchdogCrashReason crash_reason)
: watchdog_(watchdog) {
if (!ms)
return; // No-op timer created when the watchdog is disabled.
timer_data_.deadline = GetWallTimeMs() + std::chrono::milliseconds(ms);
timer_data_.thread_id = GetThreadId();
timer_data_.crash_reason = crash_reason;
PERFETTO_DCHECK(watchdog_);
watchdog_->AddFatalTimer(timer_data_);
}
Watchdog::Timer::~Timer() {
if (timer_data_.deadline.count())
watchdog_->RemoveFatalTimer(timer_data_);
}
Watchdog::Timer::Timer(Timer&& other) noexcept {
watchdog_ = std::move(other.watchdog_);
other.watchdog_ = nullptr;
timer_data_ = std::move(other.timer_data_);
other.timer_data_ = TimerData();
}
} // namespace base
} // namespace perfetto
#endif // PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)