Merge "base: Make UUID more unique" into main
diff --git a/src/base/uuid.cc b/src/base/uuid.cc
index 1f1ca03..180a651 100644
--- a/src/base/uuid.cc
+++ b/src/base/uuid.cc
@@ -28,19 +28,36 @@
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
} // namespace
-// See https://www.ietf.org/rfc/rfc4122.txt
+// A globally unique 128-bit number.
+// In the early days of perfetto we were (sorta) respecting rfc4122. Later we
+// started replacing the LSB of the UUID with the statsd subscription ID in
+// other parts of the codebase (see perfetto_cmd.cc) for the convenience of
+// trace lookups, so rfc4122 made no sense as it just reduced entropy.
Uuid Uuidv4() {
- static std::minstd_rand rng(static_cast<uint32_t>(GetBootTimeNs().count()));
+ // Mix different sources of entropy to reduce the chances of collisions.
+ // Only using boot time is not enough. Under the assumption that most traces
+ // are started around the same time at boot, within a 1s window, the birthday
+ // paradox gives a chance of 90% collisions with 70k traces over a 1e9 space
+ // (Number of ns in a 1s window).
+ // &kHexmap >> 14 is used to feed use indirectly ASLR as a source of entropy.
+ // We deliberately don't use /dev/urandom as that might block for
+ // unpredictable time if the system is idle.
+ // The UUID does NOT need to be cryptographically secure, but random enough
+ // to avoid collisions across a large number of devices.
+ static std::minstd_rand rng(
+ static_cast<uint32_t>(static_cast<uint64_t>(GetBootTimeNs().count()) ^
+ static_cast<uint64_t>(GetWallTimeNs().count()) ^
+ (reinterpret_cast<uintptr_t>(&kHexmap) >> 14)));
Uuid uuid;
auto& data = *uuid.data();
- for (size_t i = 0; i < 16; ++i)
- data[i] = static_cast<uint8_t>(rng());
-
- // version:
- data[6] = (data[6] & 0x0f) | 0x40;
- // clock_seq_hi_and_reserved:
- data[8] = (data[8] & 0x3f) | 0x80;
+ for (size_t i = 0; i < sizeof(data);) {
+ // Note: the 32-th bit of rng() is always 0 as minstd_rand operates modulo
+ // 2**31. Fill in blocks of 16b rather than 32b to not lose 1b of entropy.
+ const auto rnd_data = static_cast<uint16_t>(rng());
+ memcpy(&data[i], &rnd_data, sizeof(rnd_data));
+ i += sizeof(rnd_data);
+ }
return uuid;
}
diff --git a/src/base/uuid_unittest.cc b/src/base/uuid_unittest.cc
index 6cd22c9..ad96f1c 100644
--- a/src/base/uuid_unittest.cc
+++ b/src/base/uuid_unittest.cc
@@ -16,8 +16,10 @@
#include "perfetto/ext/base/uuid.h"
+#include <array>
#include <optional>
+#include "perfetto/base/logging.h"
#include "test/gtest_and_gmock.h"
namespace perfetto {
@@ -83,6 +85,36 @@
EXPECT_TRUE(uuid);
}
+// Generate kRounds UUIDs and check that, for each bit, we see roughly as many
+// zeros as ones.
+// Marking as DISABLED as this really checks the STD implementation not our
+// code. Invoke manually only when needed.
+TEST(UuidTest, DISABLED_BitRandomDistribution) {
+ const int kRounds = 100000;
+ std::array<int64_t, 128> bit_count{};
+ for (int i = 0; i < kRounds; i++) {
+ Uuid uuid = Uuidv4();
+ for (size_t b = 0; b < 64; b++) {
+ bit_count[b] += (uint64_t(uuid.lsb()) & (1ull << b)) ? 1 : -1;
+ bit_count[64 + b] += (uint64_t(uuid.msb()) & (1ull << b)) ? 1 : -1;
+ }
+ }
+
+ // By adding +1 / -1 for each one/zero, `bit_count` contains for each bit,
+ // their embalance. In an ideal world we expect `bit_count` to be 0 at each
+ // position. In practice we accept a 2% embalance to pass the test.
+ int64_t max_diff = 0;
+ for (size_t i = 0; i < bit_count.size(); i++)
+ max_diff = std::max(max_diff, std::abs(bit_count[i]));
+
+ const double diff_pct =
+ 100.0 * static_cast<double>(max_diff) / static_cast<double>(kRounds);
+ PERFETTO_DLOG("Max bit embalance: %.2f %%", diff_pct);
+
+ // Local runs show a 1% embalance. We take a 5x margin for the test.
+ ASSERT_LT(diff_pct, 5.0);
+}
+
} // namespace
} // namespace base
} // namespace perfetto