Merge "base: Make UUID more unique" into main
diff --git a/src/base/uuid.cc b/src/base/uuid.cc
index 1f1ca03..180a651 100644
--- a/src/base/uuid.cc
+++ b/src/base/uuid.cc
@@ -28,19 +28,36 @@
                             '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
 }  // namespace
 
-// See https://www.ietf.org/rfc/rfc4122.txt
+// A globally unique 128-bit number.
+// In the early days of perfetto we were (sorta) respecting rfc4122. Later we
+// started replacing the LSB of the UUID with the statsd subscription ID in
+// other parts of the codebase (see perfetto_cmd.cc) for the convenience of
+// trace lookups, so rfc4122 made no sense as it just reduced entropy.
 Uuid Uuidv4() {
-  static std::minstd_rand rng(static_cast<uint32_t>(GetBootTimeNs().count()));
+  // Mix different sources of entropy to reduce the chances of collisions.
+  // Only using boot time is not enough. Under the assumption that most traces
+  // are started around the same time at boot, within a 1s window, the birthday
+  // paradox gives a chance of 90% collisions with 70k traces over a 1e9 space
+  // (Number of ns in a 1s window).
+  // &kHexmap >> 14 is used to feed use indirectly ASLR as a source of entropy.
+  // We deliberately don't use /dev/urandom as that might block for
+  // unpredictable time if the system is idle.
+  // The UUID does NOT need to be cryptographically secure, but random enough
+  // to avoid collisions across a large number of devices.
+  static std::minstd_rand rng(
+      static_cast<uint32_t>(static_cast<uint64_t>(GetBootTimeNs().count()) ^
+                            static_cast<uint64_t>(GetWallTimeNs().count()) ^
+                            (reinterpret_cast<uintptr_t>(&kHexmap) >> 14)));
   Uuid uuid;
   auto& data = *uuid.data();
 
-  for (size_t i = 0; i < 16; ++i)
-    data[i] = static_cast<uint8_t>(rng());
-
-  // version:
-  data[6] = (data[6] & 0x0f) | 0x40;
-  // clock_seq_hi_and_reserved:
-  data[8] = (data[8] & 0x3f) | 0x80;
+  for (size_t i = 0; i < sizeof(data);) {
+    // Note: the 32-th bit of rng() is always 0 as minstd_rand operates modulo
+    // 2**31. Fill in blocks of 16b rather than 32b to not lose 1b of entropy.
+    const auto rnd_data = static_cast<uint16_t>(rng());
+    memcpy(&data[i], &rnd_data, sizeof(rnd_data));
+    i += sizeof(rnd_data);
+  }
 
   return uuid;
 }
diff --git a/src/base/uuid_unittest.cc b/src/base/uuid_unittest.cc
index 6cd22c9..ad96f1c 100644
--- a/src/base/uuid_unittest.cc
+++ b/src/base/uuid_unittest.cc
@@ -16,8 +16,10 @@
 
 #include "perfetto/ext/base/uuid.h"
 
+#include <array>
 #include <optional>
 
+#include "perfetto/base/logging.h"
 #include "test/gtest_and_gmock.h"
 
 namespace perfetto {
@@ -83,6 +85,36 @@
   EXPECT_TRUE(uuid);
 }
 
+// Generate kRounds UUIDs and check that, for each bit, we see roughly as many
+// zeros as ones.
+// Marking as DISABLED as this really checks the STD implementation not our
+// code. Invoke manually only when needed.
+TEST(UuidTest, DISABLED_BitRandomDistribution) {
+  const int kRounds = 100000;
+  std::array<int64_t, 128> bit_count{};
+  for (int i = 0; i < kRounds; i++) {
+    Uuid uuid = Uuidv4();
+    for (size_t b = 0; b < 64; b++) {
+      bit_count[b] += (uint64_t(uuid.lsb()) & (1ull << b)) ? 1 : -1;
+      bit_count[64 + b] += (uint64_t(uuid.msb()) & (1ull << b)) ? 1 : -1;
+    }
+  }
+
+  // By adding +1 / -1 for each one/zero, `bit_count` contains for each bit,
+  // their embalance. In an ideal world we expect `bit_count` to be 0 at each
+  // position. In practice we accept a 2% embalance to pass the test.
+  int64_t max_diff = 0;
+  for (size_t i = 0; i < bit_count.size(); i++)
+    max_diff = std::max(max_diff, std::abs(bit_count[i]));
+
+  const double diff_pct =
+      100.0 * static_cast<double>(max_diff) / static_cast<double>(kRounds);
+  PERFETTO_DLOG("Max bit embalance: %.2f %%", diff_pct);
+
+  // Local runs show a 1% embalance. We take a 5x margin for the test.
+  ASSERT_LT(diff_pct, 5.0);
+}
+
 }  // namespace
 }  // namespace base
 }  // namespace perfetto