blob: 23fbb3a64bac99b09c6408141a06e6ed7dbe30c6 [file] [log] [blame]
// Copyright (C) 2019 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <functional>
#include <random>
#include <unordered_map>
#include <benchmark/benchmark.h>
#include <stdio.h>
#include <sys/mman.h>
#include <unistd.h>
#include "perfetto/base/logging.h"
#include "perfetto/ext/base/file_utils.h"
#include "perfetto/ext/base/flat_hash_map.h"
#include "perfetto/ext/base/hash.h"
#include "perfetto/ext/base/scoped_file.h"
#include "perfetto/ext/base/string_view.h"
// This benchmark allows to compare our FlatHashMap implementation against
// reference implementations from Absl (Google), Folly F14 (FB), and Tssil's
// reference RobinHood hashmap.
// Those libraries are not checked in into the repo. If you want to reproduce
// the benchmark you need to:
// - Manually install the three libraries using following the instructions in
// their readme (they all use cmake).
// - When running cmake, remember to pass
// -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS='-DNDEBUG -O3 -msse4.2 -mavx'.
// That sets cflags for a more fair comparison.
// - Set is_debug=false in the GN args.
// - Set the GN var perfetto_benchmark_3p_libs_prefix="/usr/local" (or whatever
// other directory you set as DESTDIR when running make install).
// The presence of the perfetto_benchmark_3p_libs_prefix GN variable will
// automatically define PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS.
#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
// Last tested: https://github.com/abseil/abseil-cpp @ f2dbd918d.
#include <absl/container/flat_hash_map.h>
// Last tested: https://github.com/facebook/folly @ 028a9abae3.
#include <folly/container/F14Map.h>
// Last tested: https://github.com/Tessil/robin-map @ a603419b9.
#include <tsl/robin_map.h>
#endif
namespace {
using namespace perfetto;
using benchmark::Counter;
using perfetto::base::AlreadyHashed;
using perfetto::base::LinearProbe;
using perfetto::base::QuadraticHalfProbe;
using perfetto::base::QuadraticProbe;
// Our FlatHashMap doesn't have a STL-like interface, mainly because we use
// columnar-oriented storage, not array-of-tuples, so we can't easily map into
// that interface. This wrapper makes our FlatHashMap compatible with STL (just
// for what it takes to build this translation unit), at the cost of some small
// performance penalty (around 1-2%).
template <typename Key, typename Value, typename Hasher, typename Probe>
class Ours : public base::FlatHashMap<Key, Value, Hasher, Probe> {
public:
struct Iterator {
using value_type = std::pair<const Key&, Value&>;
Iterator(const Key& k, Value& v) : pair_{k, v} {}
value_type* operator->() { return &pair_; }
value_type& operator*() { return pair_; }
bool operator==(const Iterator& other) const {
return &pair_.first == &other.pair_.first;
}
bool operator!=(const Iterator& other) const { return !operator==(other); }
value_type pair_;
};
void insert(std::pair<Key, Value>&& pair) {
this->Insert(std::move(pair.first), std::move(pair.second));
}
Iterator find(const Key& key) {
const size_t idx = this->FindInternal(key);
return Iterator(this->keys_[idx], this->values_[idx]);
}
Iterator end() {
return Iterator(this->keys_[this->kNotFound],
this->values_[this->kNotFound]);
}
};
std::vector<uint64_t> LoadTraceStrings(benchmark::State& state) {
std::vector<uint64_t> str_hashes;
// This requires that the user has downloaded the file
// go/perfetto-benchmark-trace-strings into /tmp/trace_strings. The file is
// too big (2.3 GB after uncompression) and it's not worth adding it to the
// //test/data. Also it contains data from a team member's phone and cannot
// be public.
base::ScopedFstream f(fopen("/tmp/trace_strings", "re"));
if (!f) {
state.SkipWithError(
"Test strings missing. Googlers: download "
"go/perfetto-benchmark-trace-strings and save into /tmp/trace_strings");
return str_hashes;
}
char line[4096];
while (fgets(line, sizeof(line), *f)) {
base::Hasher hasher;
hasher.Update(line, strlen(line));
str_hashes.emplace_back(hasher.digest());
}
return str_hashes;
}
bool IsBenchmarkFunctionalOnly() {
return getenv("BENCHMARK_FUNCTIONAL_TEST_ONLY") != nullptr;
}
size_t num_samples() {
return IsBenchmarkFunctionalOnly() ? size_t(100) : size_t(10 * 1000 * 1000);
}
// Uses directly the base::FlatHashMap with no STL wrapper. Configures the map
// in append-only mode.
void BM_HashMap_InsertTraceStrings_AppendOnly(benchmark::State& state) {
std::vector<uint64_t> hashes = LoadTraceStrings(state);
for (auto _ : state) {
base::FlatHashMap<uint64_t, uint64_t, AlreadyHashed<uint64_t>, LinearProbe,
/*AppendOnly=*/true>
mapz;
for (uint64_t hash : hashes)
mapz.Insert(hash, 42);
benchmark::ClobberMemory();
state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
}
state.counters["tot"] = Counter(static_cast<double>(hashes.size()),
Counter::kIsIterationInvariant);
state.counters["rate"] = Counter(static_cast<double>(hashes.size()),
Counter::kIsIterationInvariantRate);
}
template <typename MapType>
void BM_HashMap_InsertTraceStrings(benchmark::State& state) {
std::vector<uint64_t> hashes = LoadTraceStrings(state);
for (auto _ : state) {
MapType mapz;
for (uint64_t hash : hashes)
mapz.insert({hash, 42});
benchmark::ClobberMemory();
state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
}
state.counters["tot"] = Counter(static_cast<double>(hashes.size()),
Counter::kIsIterationInvariant);
state.counters["rate"] = Counter(static_cast<double>(hashes.size()),
Counter::kIsIterationInvariantRate);
}
template <typename MapType>
void BM_HashMap_TraceTids(benchmark::State& state) {
std::vector<std::pair<char, int>> ops_and_tids;
{
base::ScopedFstream f(fopen("/tmp/tids", "re"));
if (!f) {
// This test requires a large (800MB) test file. It's not checked into the
// repository's //test/data because it would slow down all developers for
// a marginal benefit.
state.SkipWithError(
"Please run `curl -Lo /tmp/tids "
"https://storage.googleapis.com/perfetto/test_datalong_trace_tids.txt"
"` and try again.");
return;
}
char op;
int tid;
while (fscanf(*f, "%c %d\n", &op, &tid) == 2)
ops_and_tids.emplace_back(op, tid);
}
for (auto _ : state) {
MapType mapz;
for (const auto& ops_and_tid : ops_and_tids) {
if (ops_and_tid.first == '[') {
mapz[ops_and_tid.second]++;
} else {
mapz.insert({ops_and_tid.second, 0});
}
}
benchmark::ClobberMemory();
state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
}
state.counters["rate"] = Counter(static_cast<double>(ops_and_tids.size()),
Counter::kIsIterationInvariantRate);
}
template <typename MapType>
void BM_HashMap_InsertRandInts(benchmark::State& state) {
std::minstd_rand0 rng(0);
std::vector<size_t> keys(static_cast<size_t>(num_samples()));
std::shuffle(keys.begin(), keys.end(), rng);
for (auto _ : state) {
MapType mapz;
for (const auto key : keys)
mapz.insert({key, key});
benchmark::DoNotOptimize(mapz);
benchmark::ClobberMemory();
}
state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
Counter::kIsIterationInvariantRate);
}
// This test is performs insertions on integers that are designed to create
// lot of clustering on the same small set of buckets.
// This covers the unlucky case of using a map with a poor hashing function.
template <typename MapType>
void BM_HashMap_InsertCollidingInts(benchmark::State& state) {
std::vector<size_t> keys;
const size_t kNumSamples = num_samples();
// Generates numbers that are all distinct from each other, but that are
// designed to collide on the same buckets.
constexpr size_t kShift = 8; // Collide on the same 2^8 = 256 buckets.
for (size_t i = 0; i < kNumSamples; i++) {
size_t bucket = i & ((1 << kShift) - 1); // [0, 256].
size_t multiplier = i >> kShift; // 0,0,0... 1,1,1..., 2,2,2...
size_t key = 8192 * multiplier + bucket;
keys.push_back(key);
}
for (auto _ : state) {
MapType mapz;
for (const size_t key : keys)
mapz.insert({key, key});
benchmark::DoNotOptimize(mapz);
benchmark::ClobberMemory();
}
state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
Counter::kIsIterationInvariantRate);
}
// Unlike the previous benchmark, here integers don't just collide on the same
// buckets, they have a large number of duplicates with the same values.
// Most of those insertions are no-ops. This tests the ability of the hashmap
// to deal with cases where the hash function is good but the insertions contain
// lot of dupes (e.g. dealing with pids).
template <typename MapType>
void BM_HashMap_InsertDupeInts(benchmark::State& state) {
std::vector<size_t> keys;
const size_t kNumSamples = num_samples();
for (size_t i = 0; i < kNumSamples; i++)
keys.push_back(i % 16384);
for (auto _ : state) {
MapType mapz;
for (const size_t key : keys)
mapz.insert({key, key});
benchmark::DoNotOptimize(mapz);
benchmark::ClobberMemory();
}
state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
Counter::kIsIterationInvariantRate);
}
template <typename MapType>
void BM_HashMap_LookupRandInts(benchmark::State& state) {
std::minstd_rand0 rng(0);
std::vector<size_t> keys(static_cast<size_t>(num_samples()));
std::shuffle(keys.begin(), keys.end(), rng);
MapType mapz;
for (const size_t key : keys)
mapz.insert({key, key});
for (auto _ : state) {
int64_t total = 0;
for (const size_t key : keys) {
auto it = mapz.find(static_cast<uint64_t>(key));
PERFETTO_CHECK(it != mapz.end());
total += it->second;
}
benchmark::DoNotOptimize(total);
benchmark::ClobberMemory();
state.counters["sum"] = Counter(static_cast<double>(total));
}
state.counters["lookups"] = Counter(static_cast<double>(keys.size()),
Counter::kIsIterationInvariantRate);
}
} // namespace
using Ours_LinearProbing =
Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, LinearProbe>;
using Ours_QuadProbing =
Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, QuadraticProbe>;
using Ours_QuadCompProbing =
Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, QuadraticHalfProbe>;
using StdUnorderedMap =
std::unordered_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
using RobinMap = tsl::robin_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
using AbslFlatHashMap =
absl::flat_hash_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
using FollyF14FastMap =
folly::F14FastMap<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
#endif
BENCHMARK(BM_HashMap_InsertTraceStrings_AppendOnly);
BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, Ours_LinearProbing);
BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, Ours_QuadProbing);
BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, StdUnorderedMap);
#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, RobinMap);
BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, AbslFlatHashMap);
BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, FollyF14FastMap);
#endif
#define TID_ARGS int, uint64_t, std::hash<int>
BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, LinearProbe>);
BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, QuadraticProbe>);
BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, QuadraticHalfProbe>);
BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, std::unordered_map<TID_ARGS>);
#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, tsl::robin_map<TID_ARGS>);
BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, absl::flat_hash_map<TID_ARGS>);
BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, folly::F14FastMap<TID_ARGS>);
#endif
BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, Ours_LinearProbing);
BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, Ours_QuadProbing);
BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, StdUnorderedMap);
#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, RobinMap);
BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, AbslFlatHashMap);
BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, FollyF14FastMap);
#endif
BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_LinearProbing);
BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_QuadProbing);
BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_QuadCompProbing);
BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, StdUnorderedMap);
#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, RobinMap);
BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, AbslFlatHashMap);
BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, FollyF14FastMap);
#endif
BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_LinearProbing);
BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_QuadProbing);
BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_QuadCompProbing);
BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, StdUnorderedMap);
#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, RobinMap);
BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, AbslFlatHashMap);
BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, FollyF14FastMap);
#endif
BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, Ours_LinearProbing);
BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, Ours_QuadProbing);
BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, StdUnorderedMap);
#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, RobinMap);
BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, AbslFlatHashMap);
BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, FollyF14FastMap);
#endif