| // Copyright (C) 2019 The Android Open Source Project |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include <functional> |
| #include <random> |
| #include <unordered_map> |
| |
| #include <benchmark/benchmark.h> |
| #include <stdio.h> |
| #include <sys/mman.h> |
| #include <unistd.h> |
| |
| #include "perfetto/base/logging.h" |
| #include "perfetto/ext/base/file_utils.h" |
| #include "perfetto/ext/base/flat_hash_map.h" |
| #include "perfetto/ext/base/hash.h" |
| #include "perfetto/ext/base/scoped_file.h" |
| #include "perfetto/ext/base/string_view.h" |
| |
| // This benchmark allows to compare our FlatHashMap implementation against |
| // reference implementations from Absl (Google), Folly F14 (FB), and Tssil's |
| // reference RobinHood hashmap. |
| // Those libraries are not checked in into the repo. If you want to reproduce |
| // the benchmark you need to: |
| // - Manually install the three libraries using following the instructions in |
| // their readme (they all use cmake). |
| // - When running cmake, remember to pass |
| // -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS='-DNDEBUG -O3 -msse4.2 -mavx'. |
| // That sets cflags for a more fair comparison. |
| // - Set is_debug=false in the GN args. |
| // - Set the GN var perfetto_benchmark_3p_libs_prefix="/usr/local" (or whatever |
| // other directory you set as DESTDIR when running make install). |
| // The presence of the perfetto_benchmark_3p_libs_prefix GN variable will |
| // automatically define PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS. |
| |
| #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS) |
| |
| // Last tested: https://github.com/abseil/abseil-cpp @ f2dbd918d. |
| #include <absl/container/flat_hash_map.h> |
| |
| // Last tested: https://github.com/facebook/folly @ 028a9abae3. |
| #include <folly/container/F14Map.h> |
| |
| // Last tested: https://github.com/Tessil/robin-map @ a603419b9. |
| #include <tsl/robin_map.h> |
| #endif |
| |
| namespace { |
| |
| using namespace perfetto; |
| using benchmark::Counter; |
| using perfetto::base::AlreadyHashed; |
| using perfetto::base::LinearProbe; |
| using perfetto::base::QuadraticHalfProbe; |
| using perfetto::base::QuadraticProbe; |
| |
| // Our FlatHashMap doesn't have a STL-like interface, mainly because we use |
| // columnar-oriented storage, not array-of-tuples, so we can't easily map into |
| // that interface. This wrapper makes our FlatHashMap compatible with STL (just |
| // for what it takes to build this translation unit), at the cost of some small |
| // performance penalty (around 1-2%). |
| template <typename Key, typename Value, typename Hasher, typename Probe> |
| class Ours : public base::FlatHashMap<Key, Value, Hasher, Probe> { |
| public: |
| struct Iterator { |
| using value_type = std::pair<const Key&, Value&>; |
| Iterator(const Key& k, Value& v) : pair_{k, v} {} |
| value_type* operator->() { return &pair_; } |
| value_type& operator*() { return pair_; } |
| bool operator==(const Iterator& other) const { |
| return &pair_.first == &other.pair_.first; |
| } |
| bool operator!=(const Iterator& other) const { return !operator==(other); } |
| value_type pair_; |
| }; |
| |
| void insert(std::pair<Key, Value>&& pair) { |
| this->Insert(std::move(pair.first), std::move(pair.second)); |
| } |
| |
| Iterator find(const Key& key) { |
| const size_t idx = this->FindInternal(key); |
| return Iterator(this->keys_[idx], this->values_[idx]); |
| } |
| |
| Iterator end() { |
| return Iterator(this->keys_[this->kNotFound], |
| this->values_[this->kNotFound]); |
| } |
| }; |
| |
| std::vector<uint64_t> LoadTraceStrings(benchmark::State& state) { |
| std::vector<uint64_t> str_hashes; |
| // This requires that the user has downloaded the file |
| // go/perfetto-benchmark-trace-strings into /tmp/trace_strings. The file is |
| // too big (2.3 GB after uncompression) and it's not worth adding it to the |
| // //test/data. Also it contains data from a team member's phone and cannot |
| // be public. |
| base::ScopedFstream f(fopen("/tmp/trace_strings", "re")); |
| if (!f) { |
| state.SkipWithError( |
| "Test strings missing. Googlers: download " |
| "go/perfetto-benchmark-trace-strings and save into /tmp/trace_strings"); |
| return str_hashes; |
| } |
| char line[4096]; |
| while (fgets(line, sizeof(line), *f)) { |
| base::Hash hasher; |
| hasher.Update(line, strlen(line)); |
| str_hashes.emplace_back(hasher.digest()); |
| } |
| return str_hashes; |
| } |
| |
| bool IsBenchmarkFunctionalOnly() { |
| return getenv("BENCHMARK_FUNCTIONAL_TEST_ONLY") != nullptr; |
| } |
| |
| size_t num_samples() { |
| return IsBenchmarkFunctionalOnly() ? size_t(100) : size_t(10 * 1000 * 1000); |
| } |
| |
| // Uses directly the base::FlatHashMap with no STL wrapper. Configures the map |
| // in append-only mode. |
| void BM_HashMap_InsertTraceStrings_AppendOnly(benchmark::State& state) { |
| std::vector<uint64_t> hashes = LoadTraceStrings(state); |
| for (auto _ : state) { |
| base::FlatHashMap<uint64_t, uint64_t, AlreadyHashed<uint64_t>, LinearProbe, |
| /*AppendOnly=*/true> |
| mapz; |
| for (uint64_t hash : hashes) |
| mapz.Insert(hash, 42); |
| |
| benchmark::ClobberMemory(); |
| state.counters["uniq"] = Counter(static_cast<double>(mapz.size())); |
| } |
| state.counters["tot"] = Counter(static_cast<double>(hashes.size()), |
| Counter::kIsIterationInvariant); |
| state.counters["rate"] = Counter(static_cast<double>(hashes.size()), |
| Counter::kIsIterationInvariantRate); |
| } |
| |
| template <typename MapType> |
| void BM_HashMap_InsertTraceStrings(benchmark::State& state) { |
| std::vector<uint64_t> hashes = LoadTraceStrings(state); |
| for (auto _ : state) { |
| MapType mapz; |
| for (uint64_t hash : hashes) |
| mapz.insert({hash, 42}); |
| |
| benchmark::ClobberMemory(); |
| state.counters["uniq"] = Counter(static_cast<double>(mapz.size())); |
| } |
| state.counters["tot"] = Counter(static_cast<double>(hashes.size()), |
| Counter::kIsIterationInvariant); |
| state.counters["rate"] = Counter(static_cast<double>(hashes.size()), |
| Counter::kIsIterationInvariantRate); |
| } |
| |
| template <typename MapType> |
| void BM_HashMap_TraceTids(benchmark::State& state) { |
| std::vector<std::pair<char, int>> ops_and_tids; |
| { |
| base::ScopedFstream f(fopen("/tmp/tids", "re")); |
| if (!f) { |
| // This test requires a large (800MB) test file. It's not checked into the |
| // repository's //test/data because it would slow down all developers for |
| // a marginal benefit. |
| state.SkipWithError( |
| "Please run `curl -Lo /tmp/tids " |
| "https://storage.googleapis.com/perfetto/test_datalong_trace_tids.txt" |
| "` and try again."); |
| return; |
| } |
| char op; |
| int tid; |
| while (fscanf(*f, "%c %d\n", &op, &tid) == 2) |
| ops_and_tids.emplace_back(op, tid); |
| } |
| |
| for (auto _ : state) { |
| MapType mapz; |
| for (const auto& ops_and_tid : ops_and_tids) { |
| if (ops_and_tid.first == '[') { |
| mapz[ops_and_tid.second]++; |
| } else { |
| mapz.insert({ops_and_tid.second, 0}); |
| } |
| } |
| |
| benchmark::ClobberMemory(); |
| state.counters["uniq"] = Counter(static_cast<double>(mapz.size())); |
| } |
| state.counters["rate"] = Counter(static_cast<double>(ops_and_tids.size()), |
| Counter::kIsIterationInvariantRate); |
| } |
| |
| template <typename MapType> |
| void BM_HashMap_InsertRandInts(benchmark::State& state) { |
| std::minstd_rand0 rng(0); |
| std::vector<size_t> keys(static_cast<size_t>(num_samples())); |
| std::shuffle(keys.begin(), keys.end(), rng); |
| for (auto _ : state) { |
| MapType mapz; |
| for (const auto key : keys) |
| mapz.insert({key, key}); |
| benchmark::DoNotOptimize(mapz); |
| benchmark::ClobberMemory(); |
| } |
| state.counters["insertions"] = Counter(static_cast<double>(keys.size()), |
| Counter::kIsIterationInvariantRate); |
| } |
| |
| // This test is performs insertions on integers that are designed to create |
| // lot of clustering on the same small set of buckets. |
| // This covers the unlucky case of using a map with a poor hashing function. |
| template <typename MapType> |
| void BM_HashMap_InsertCollidingInts(benchmark::State& state) { |
| std::vector<size_t> keys; |
| const size_t kNumSamples = num_samples(); |
| |
| // Generates numbers that are all distinct from each other, but that are |
| // designed to collide on the same buckets. |
| constexpr size_t kShift = 8; // Collide on the same 2^8 = 256 buckets. |
| for (size_t i = 0; i < kNumSamples; i++) { |
| size_t bucket = i & ((1 << kShift) - 1); // [0, 256]. |
| size_t multiplier = i >> kShift; // 0,0,0... 1,1,1..., 2,2,2... |
| size_t key = 8192 * multiplier + bucket; |
| keys.push_back(key); |
| } |
| for (auto _ : state) { |
| MapType mapz; |
| for (const size_t key : keys) |
| mapz.insert({key, key}); |
| benchmark::DoNotOptimize(mapz); |
| benchmark::ClobberMemory(); |
| } |
| state.counters["insertions"] = Counter(static_cast<double>(keys.size()), |
| Counter::kIsIterationInvariantRate); |
| } |
| |
| // Unlike the previous benchmark, here integers don't just collide on the same |
| // buckets, they have a large number of duplicates with the same values. |
| // Most of those insertions are no-ops. This tests the ability of the hashmap |
| // to deal with cases where the hash function is good but the insertions contain |
| // lot of dupes (e.g. dealing with pids). |
| template <typename MapType> |
| void BM_HashMap_InsertDupeInts(benchmark::State& state) { |
| std::vector<size_t> keys; |
| const size_t kNumSamples = num_samples(); |
| |
| for (size_t i = 0; i < kNumSamples; i++) |
| keys.push_back(i % 16384); |
| |
| for (auto _ : state) { |
| MapType mapz; |
| for (const size_t key : keys) |
| mapz.insert({key, key}); |
| benchmark::DoNotOptimize(mapz); |
| benchmark::ClobberMemory(); |
| } |
| state.counters["insertions"] = Counter(static_cast<double>(keys.size()), |
| Counter::kIsIterationInvariantRate); |
| } |
| |
| template <typename MapType> |
| void BM_HashMap_LookupRandInts(benchmark::State& state) { |
| std::minstd_rand0 rng(0); |
| std::vector<size_t> keys(static_cast<size_t>(num_samples())); |
| std::shuffle(keys.begin(), keys.end(), rng); |
| |
| MapType mapz; |
| for (const size_t key : keys) |
| mapz.insert({key, key}); |
| |
| for (auto _ : state) { |
| int64_t total = 0; |
| for (const size_t key : keys) { |
| auto it = mapz.find(static_cast<uint64_t>(key)); |
| PERFETTO_CHECK(it != mapz.end()); |
| total += it->second; |
| } |
| benchmark::DoNotOptimize(total); |
| benchmark::ClobberMemory(); |
| state.counters["sum"] = Counter(static_cast<double>(total)); |
| } |
| state.counters["lookups"] = Counter(static_cast<double>(keys.size()), |
| Counter::kIsIterationInvariantRate); |
| } |
| |
| } // namespace |
| |
| using Ours_LinearProbing = |
| Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, LinearProbe>; |
| using Ours_QuadProbing = |
| Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, QuadraticProbe>; |
| using Ours_QuadCompProbing = |
| Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, QuadraticHalfProbe>; |
| using StdUnorderedMap = |
| std::unordered_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>; |
| |
| #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS) |
| using RobinMap = tsl::robin_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>; |
| using AbslFlatHashMap = |
| absl::flat_hash_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>; |
| using FollyF14FastMap = |
| folly::F14FastMap<uint64_t, uint64_t, AlreadyHashed<uint64_t>>; |
| #endif |
| |
| BENCHMARK(BM_HashMap_InsertTraceStrings_AppendOnly); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, Ours_LinearProbing); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, Ours_QuadProbing); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, StdUnorderedMap); |
| #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS) |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, RobinMap); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, AbslFlatHashMap); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, FollyF14FastMap); |
| #endif |
| |
| #define TID_ARGS int, uint64_t, std::hash<int> |
| BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, LinearProbe>); |
| BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, QuadraticProbe>); |
| BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, QuadraticHalfProbe>); |
| BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, std::unordered_map<TID_ARGS>); |
| #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS) |
| BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, tsl::robin_map<TID_ARGS>); |
| BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, absl::flat_hash_map<TID_ARGS>); |
| BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, folly::F14FastMap<TID_ARGS>); |
| #endif |
| |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, Ours_LinearProbing); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, Ours_QuadProbing); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, StdUnorderedMap); |
| #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS) |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, RobinMap); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, AbslFlatHashMap); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, FollyF14FastMap); |
| #endif |
| |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_LinearProbing); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_QuadProbing); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_QuadCompProbing); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, StdUnorderedMap); |
| #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS) |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, RobinMap); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, AbslFlatHashMap); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, FollyF14FastMap); |
| #endif |
| |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_LinearProbing); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_QuadProbing); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_QuadCompProbing); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, StdUnorderedMap); |
| #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS) |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, RobinMap); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, AbslFlatHashMap); |
| BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, FollyF14FastMap); |
| #endif |
| |
| BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, Ours_LinearProbing); |
| BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, Ours_QuadProbing); |
| BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, StdUnorderedMap); |
| #if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS) |
| BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, RobinMap); |
| BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, AbslFlatHashMap); |
| BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, FollyF14FastMap); |
| #endif |