base: Reland RTFutex for android (#2865)
Bug: 443178555
diff --git a/gn/perfetto.gni b/gn/perfetto.gni
index a79d0be..2497421 100644
--- a/gn/perfetto.gni
+++ b/gn/perfetto.gni
@@ -267,16 +267,15 @@
perfetto_build_standalone && !is_perfetto_build_generator &&
defined(use_custom_libcxx) && use_custom_libcxx
- # Enables the use of priority-inheritance mutexes via PTHREAD_PRIO_INHERIT.
+ # Enables the use of priority-inheritance mutexes via PTHREAD_PRIO_INHERIT
+ # or wrapper around PI Futexes (dependant on OS).
# Note that on Android platform (non-standalone) builds this flag is ignored
- # and the Android flag "use_rt_mutex" is used instead (perfetto_flags.aconfig)
+ # and Android flags "use_rt_mutex", "use_rt_futex" are used instead (perfetto_flags.aconfig)
# This is disabled in chromium, because the BPF-sandbox allows PI-futex only
# when a field-trial is enabled, which is incompatible with a build time flag.
enable_perfetto_rt_mutex =
- (!is_wasm &&
- (((build_with_chromium && is_android) || perfetto_build_standalone) &&
- (current_cpu == "x64" || current_cpu == "arm64"))) ||
- is_perfetto_build_generator
+ !is_wasm && (perfetto_build_standalone || is_perfetto_build_generator) &&
+ !build_with_chromium
# This flag is used for the migration of UnixTaskRunner -> LockFreeTaskRunner.
# It determines whether MaybeLockFreeTaskRunner is backed by UnixTaskRunner
diff --git a/include/perfetto/ext/base/flags.h b/include/perfetto/ext/base/flags.h
index ca71497..ffb36b1 100644
--- a/include/perfetto/ext/base/flags.h
+++ b/include/perfetto/ext/base/flags.h
@@ -32,17 +32,19 @@
// in `perfetto_flags.aconfig`.
// The second argument is the default value of the flag in non-Android platform
// contexts.
+//
+// Note: For rt_mutex and rt_futex, the source of truth for non-Android platform
+// is in rt_mutex.h
#define PERFETTO_READ_ONLY_FLAGS(X) \
X(test_read_only_flag, NonAndroidPlatformDefault_FALSE) \
X(use_murmur_hash_for_flat_hash_map, NonAndroidPlatformDefault_TRUE) \
X(ftrace_clear_offline_cpus_only, NonAndroidPlatformDefault_TRUE) \
- X(use_rt_mutex, PERFETTO_BUILDFLAG(PERFETTO_ENABLE_RT_MUTEX) \
- ? NonAndroidPlatformDefault_TRUE \
- : NonAndroidPlatformDefault_FALSE) \
X(use_lockfree_taskrunner, \
PERFETTO_BUILDFLAG(PERFETTO_ENABLE_LOCKFREE_TASKRUNNER) \
? NonAndroidPlatformDefault_TRUE \
- : NonAndroidPlatformDefault_FALSE)
+ : NonAndroidPlatformDefault_FALSE) \
+ X(use_rt_mutex, NonAndroidPlatformDefault_FALSE) \
+ X(use_rt_futex, NonAndroidPlatformDefault_FALSE)
////////////////////////////////////////////////////////////////////////////////
// //
diff --git a/include/perfetto/ext/base/rt_mutex.h b/include/perfetto/ext/base/rt_mutex.h
index fa65e50..248f6ad 100644
--- a/include/perfetto/ext/base/rt_mutex.h
+++ b/include/perfetto/ext/base/rt_mutex.h
@@ -23,8 +23,8 @@
// In the contended case RtMutex is generally slower than a std::mutex (or any
// non-RT implementation).
// Under the hoods this class does the following:
-// - Linux/Android: it uses PI futexes.
-// - MacOS/iOS: it uses pthread_mutex with PTHREAD_PRIO_INHERIT.
+// - Android: it uses PI futexes.
+// - Linux/MacOS/iOS: it uses pthread_mutex with PTHREAD_PRIO_INHERIT.
// - Other platforms: falls back on a standard std::mutex. On Windows 11+
// std::mutex has effectively PI semantics due to AutoBoost
// https://github.com/MicrosoftDocs/win32/commit/a43cb3b5039c5cfc53642bfcea174003a2f1168f
@@ -34,13 +34,49 @@
#include "perfetto/ext/base/flags.h"
#include "perfetto/public/compiler.h"
-#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) || \
- PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \
- PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
-#define PERFETTO_HAS_POSIX_RT_MUTEX() true
-#else
-#define PERFETTO_HAS_POSIX_RT_MUTEX() false
+#define _PERFETTO_MUTEX_MODE_STD 0
+#define _PERFETTO_MUTEX_MODE_RT_FUTEX 1
+#define _PERFETTO_MUTEX_MODE_RT_MUTEX 2
+
+// The logic below determines which mutex implementation to use.
+// For Android platform builds, the choice is controlled by aconfig flags.
+// For other builds, it's determined by OS support and GN build arguments.
+//
+// Rationale for platform-specific choices:
+// 1. `RtFutex` is enabled only on Android because it relies on `gettid()` being
+// a cheap thread-local storage access provided by Bionic. On Linux with
+// glibc, `gettid()` is a full syscall, making the pthread-based
+// implementation faster.
+// 2. The pthread-based `RtPosixMutex` is not viable on all Android versions, as
+// `pthread_mutexattr_setprotocol` was introduced in API level 28. Using
+// `dlsym` to backport it can lead to deadlocks with the loader lock if
+// tracing is initialized from a static constructor (see b/443178555).
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) && \
+ PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
+#if PERFETTO_FLAGS_USE_RT_FUTEX
+#define _PERFETTO_MUTEX_MODE _PERFETTO_MUTEX_MODE_RT_FUTEX
+#elif PERFETTO_FLAGS_USE_RT_MUTEX
+#define _PERFETTO_MUTEX_MODE _PERFETTO_MUTEX_MODE_RT_MUTEX
#endif
+#elif PERFETTO_BUILDFLAG(PERFETTO_ENABLE_RT_MUTEX)
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
+#define _PERFETTO_MUTEX_MODE _PERFETTO_MUTEX_MODE_RT_FUTEX
+#elif PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \
+ PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
+#define _PERFETTO_MUTEX_MODE _PERFETTO_MUTEX_MODE_RT_MUTEX
+#endif
+#endif
+
+// If no RT implementation was selected, default to std::mutex.
+#ifndef _PERFETTO_MUTEX_MODE
+#define _PERFETTO_MUTEX_MODE _PERFETTO_MUTEX_MODE_STD
+#endif
+
+// Public macros for conditional compilation based on the selected mutex type.
+#define PERFETTO_HAS_POSIX_RT_MUTEX() \
+ (_PERFETTO_MUTEX_MODE == _PERFETTO_MUTEX_MODE_RT_MUTEX)
+#define PERFETTO_HAS_RT_FUTEX() \
+ (_PERFETTO_MUTEX_MODE == _PERFETTO_MUTEX_MODE_RT_FUTEX)
#include <atomic>
#include <mutex>
@@ -50,10 +86,95 @@
#include <pthread.h>
#endif
+#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+#include <unistd.h> // For gettid().
+#endif
+
namespace perfetto::base {
namespace internal {
+#if PERFETTO_HAS_RT_FUTEX()
+// A wrapper around PI Futexes. A futex is a wrapper around an atomic integer
+// with an ABI shared with the kernel to handle the slowpath in the cases when
+// the mutex is held, or we find out that there are waiters queued when we
+// unlock. The operating principle is the following:
+// - In the no-contention case, a futex boils down to an atomic
+// compare-and-exchange, without involving the kernel.
+// - If a lock is contented at acquire time, we have to enter the kernel to
+// suspend our execution and join a wait chain.
+// - It could still happen that we acquire the mutex via the fastpath (without
+// involving the kernel) but other waiters might queue up while we hold the
+// mutex. In that case the kernel will add a bit to the atomic int. That bit
+// will cause the unlock() compare-and-exchange to fail (because it no longer
+// matches our tid) which in turn will signal us to do a syscall to notify the
+// waiters.
+class PERFETTO_LOCKABLE RtFutex {
+ public:
+ RtFutex() { PERFETTO_TSAN_MUTEX_CREATE(this, __tsan_mutex_not_static); }
+ ~RtFutex() { PERFETTO_TSAN_MUTEX_DESTROY(this, __tsan_mutex_not_static); }
+
+ // Disable copy or move. Copy doesn't make sense. Move isn't feasible because
+ // the pointer to the atomic integer is the handle used by the kernel to setup
+ // the wait chain. A movable futex would require the atomic integer to be heap
+ // allocated, but that would create an indirection layer that is not needed in
+ // most cases. If you really need a movable RtMutex, wrap it in a unique_ptr.
+ RtFutex(const RtFutex&) = delete;
+ RtFutex& operator=(const RtFutex&) = delete;
+ RtFutex(RtFutex&&) = delete;
+ RtFutex& operator=(RtFutex&&) = delete;
+
+ inline bool TryLockFastpath() noexcept {
+ int expected = 0;
+ return lock_.compare_exchange_strong(expected, ::gettid(),
+ std::memory_order_acquire,
+ std::memory_order_relaxed);
+ }
+
+ bool try_lock() noexcept PERFETTO_EXCLUSIVE_TRYLOCK_FUNCTION(true) {
+ PERFETTO_TSAN_MUTEX_PRE_LOCK(this, __tsan_mutex_try_lock);
+ if (PERFETTO_LIKELY(TryLockFastpath()) || TryLockSlowpath()) {
+ PERFETTO_TSAN_MUTEX_POST_LOCK(this, __tsan_mutex_try_lock, 0);
+ return true;
+ }
+ PERFETTO_TSAN_MUTEX_POST_LOCK(
+ this, __tsan_mutex_try_lock | __tsan_mutex_try_lock_failed, 0);
+ return false;
+ }
+
+ void lock() PERFETTO_EXCLUSIVE_LOCK_FUNCTION() {
+ PERFETTO_TSAN_MUTEX_PRE_LOCK(this, 0);
+ if (!PERFETTO_LIKELY(TryLockFastpath())) {
+ LockSlowpath();
+ }
+ PERFETTO_TSAN_MUTEX_POST_LOCK(this, 0, 0);
+ }
+
+ void unlock() noexcept PERFETTO_UNLOCK_FUNCTION() {
+ PERFETTO_TSAN_MUTEX_PRE_UNLOCK(this, 0);
+ int expected = ::gettid();
+ // If the current value is our tid, we can unlock without a syscall since
+ // there are no current waiters.
+ if (!PERFETTO_LIKELY(lock_.compare_exchange_strong(
+ expected, 0, std::memory_order_release,
+ std::memory_order_relaxed))) {
+ // The tid doesn't match because the kernel appended the FUTEX_WAITERS
+ // bit. There are waiters, tell the kernel to notify them and unlock.
+ UnlockSlowpath();
+ }
+ PERFETTO_TSAN_MUTEX_POST_UNLOCK(this, 0);
+ }
+
+ private:
+ std::atomic<int> lock_{};
+
+ void LockSlowpath();
+ bool TryLockSlowpath();
+ void UnlockSlowpath();
+};
+
+#endif // PERFETTO_HAS_RT_FUTEX
+
#if PERFETTO_HAS_POSIX_RT_MUTEX()
class PERFETTO_LOCKABLE RtPosixMutex {
public:
@@ -76,17 +197,16 @@
#endif // PERFETTO_HAS_POSIX_RT_MUTEX
} // namespace internal
-// Pick the best implementation for the target platform.
-// See comments in the top of the doc.
-#if PERFETTO_HAS_POSIX_RT_MUTEX()
-using RtMutex = internal::RtPosixMutex;
+// Select the best real-time mutex implementation for the target platform, or
+// fall back to std::mutex if none is available.
+#if PERFETTO_HAS_RT_FUTEX()
+using MaybeRtMutex = internal::RtFutex;
+#elif PERFETTO_HAS_POSIX_RT_MUTEX()
+using MaybeRtMutex = internal::RtPosixMutex;
#else
-using RtMutex = std::mutex;
+using MaybeRtMutex = std::mutex;
#endif
-using MaybeRtMutex =
- std::conditional_t<base::flags::use_rt_mutex, RtMutex, std::mutex>;
-
} // namespace perfetto::base
#endif // INCLUDE_PERFETTO_EXT_BASE_RT_MUTEX_H_
diff --git a/perfetto_flags.aconfig b/perfetto_flags.aconfig
index 9f0af1d..45c5e65 100644
--- a/perfetto_flags.aconfig
+++ b/perfetto_flags.aconfig
@@ -42,3 +42,10 @@
bug: "441118768"
is_fixed_read_only: true
}
+flag {
+ name: "use_rt_futex"
+ namespace: "perfetto"
+ description: "Controls whether base::MaybeRtMutex will use base::RtFutex or resolved type of base::MaybeRtMutex for android."
+ bug: "443948543"
+ is_fixed_read_only: true
+}
diff --git a/src/base/rt_mutex.cc b/src/base/rt_mutex.cc
index cc84a55..5551fb5 100644
--- a/src/base/rt_mutex.cc
+++ b/src/base/rt_mutex.cc
@@ -21,37 +21,53 @@
#include "perfetto/base/logging.h"
#include "perfetto/ext/base/utils.h"
-#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
-#include <dlfcn.h>
+#if PERFETTO_HAS_RT_FUTEX()
+#include <linux/futex.h>
+#include <sys/syscall.h>
+#include <unistd.h>
#endif
namespace perfetto::base {
namespace internal {
+#if PERFETTO_HAS_RT_FUTEX()
+
+void RtFutex::LockSlowpath() {
+ auto res = PERFETTO_EINTR(
+ syscall(SYS_futex, &lock_, FUTEX_LOCK_PI_PRIVATE, 0, nullptr));
+ PERFETTO_CHECK(res == 0);
+}
+
+bool RtFutex::TryLockSlowpath() {
+ auto res = PERFETTO_EINTR(
+ syscall(SYS_futex, &lock_, FUTEX_TRYLOCK_PI_PRIVATE, 0, nullptr));
+ if (res == 0)
+ return true;
+ if (errno == EBUSY || errno == EDEADLK)
+ return false;
+ PERFETTO_FATAL("FUTEX_TRYLOCK_PI_PRIVATE failed");
+}
+
+void RtFutex::UnlockSlowpath() {
+ auto res = PERFETTO_EINTR(
+ syscall(SYS_futex, &lock_, FUTEX_UNLOCK_PI_PRIVATE, 0, nullptr));
+ PERFETTO_CHECK(res == 0);
+}
+
+#endif // PERFETTO_HAS_RT_FUTEX
+
#if PERFETTO_HAS_POSIX_RT_MUTEX()
RtPosixMutex::RtPosixMutex() noexcept {
- pthread_mutexattr_t at{};
- PERFETTO_CHECK(pthread_mutexattr_init(&at) == 0);
#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) && __ANDROID_API__ < 28
// pthread_mutexattr_setprotocol is only available on API 28.
- using SetprotocolFuncT = int (*)(pthread_mutexattr_t*, int);
- static auto setprotocol_func = reinterpret_cast<SetprotocolFuncT>(
- dlsym(RTLD_DEFAULT, "pthread_mutexattr_setprotocol"));
- if (setprotocol_func) {
- PERFETTO_CHECK(setprotocol_func(&at, PTHREAD_PRIO_INHERIT) == 0);
- } else {
- static uint64_t log_once = 0;
- if (log_once++ == 0) {
- PERFETTO_LOG(
- "Priority-inheritance RtMutex is not available in this version of "
- "Android.");
- }
- }
-#else // Not Android (but POSIX RT)
- PERFETTO_CHECK(pthread_mutexattr_setprotocol(&at, PTHREAD_PRIO_INHERIT) == 0);
+#error \
+ "Priority-inheritance RtMutex is not available in this version of Android."
#endif
+ pthread_mutexattr_t at{};
+ PERFETTO_CHECK(pthread_mutexattr_init(&at) == 0);
+ PERFETTO_CHECK(pthread_mutexattr_setprotocol(&at, PTHREAD_PRIO_INHERIT) == 0);
PERFETTO_CHECK(pthread_mutex_init(&mutex_, &at) == 0);
}
diff --git a/src/base/rt_mutex_benchmark.cc b/src/base/rt_mutex_benchmark.cc
index 473d339..453fcbe 100644
--- a/src/base/rt_mutex_benchmark.cc
+++ b/src/base/rt_mutex_benchmark.cc
@@ -102,6 +102,12 @@
BENCHMARK_TEMPLATE(BM_RtMutex_NoContention, std::mutex)->Apply(BenchmarkArgs);
BENCHMARK_TEMPLATE(BM_RtMutex_Contention, std::mutex)->Apply(BenchmarkArgs);
+#if PERFETTO_HAS_RT_FUTEX()
+using perfetto::base::internal::RtFutex;
+BENCHMARK_TEMPLATE(BM_RtMutex_NoContention, RtFutex)->Apply(BenchmarkArgs);
+BENCHMARK_TEMPLATE(BM_RtMutex_Contention, RtFutex)->Apply(BenchmarkArgs);
+#endif
+
#if PERFETTO_HAS_POSIX_RT_MUTEX()
using perfetto::base::internal::RtPosixMutex;
BENCHMARK_TEMPLATE(BM_RtMutex_NoContention, RtPosixMutex)->Apply(BenchmarkArgs);
diff --git a/src/base/rt_mutex_unittest.cc b/src/base/rt_mutex_unittest.cc
index cd4afdb..05a8b08 100644
--- a/src/base/rt_mutex_unittest.cc
+++ b/src/base/rt_mutex_unittest.cc
@@ -15,6 +15,7 @@
*/
#include "perfetto/ext/base/rt_mutex.h"
+#include "perfetto/ext/base/flags.h"
#include "test/gtest_and_gmock.h"
@@ -35,6 +36,10 @@
,
internal::RtPosixMutex
#endif
+#if PERFETTO_HAS_RT_FUTEX()
+ ,
+ internal::RtFutex
+#endif
>;
class NameGenerator {
@@ -47,6 +52,10 @@
if constexpr (std::is_same_v<T, internal::RtPosixMutex>)
return "RtPosix";
#endif
+#if PERFETTO_HAS_RT_FUTEX()
+ if constexpr (std::is_same_v<T, internal::RtFutex>)
+ return "RtFutex";
+#endif
}
};