Merge "tp: optimize SelectBvWithIv function in RowMap"
diff --git a/BUILD b/BUILD
index 3c90cb2..fa04d14 100644
--- a/BUILD
+++ b/BUILD
@@ -952,6 +952,7 @@
"src/trace_processor/containers/null_term_string_view.h",
"src/trace_processor/containers/nullable_vector.h",
"src/trace_processor/containers/row_map.h",
+ "src/trace_processor/containers/row_map_algorithms.h",
"src/trace_processor/containers/string_pool.h",
],
deps = [
diff --git a/src/trace_processor/containers/BUILD.gn b/src/trace_processor/containers/BUILD.gn
index 1f843e1..1e92c62 100644
--- a/src/trace_processor/containers/BUILD.gn
+++ b/src/trace_processor/containers/BUILD.gn
@@ -26,6 +26,7 @@
"null_term_string_view.h",
"nullable_vector.h",
"row_map.h",
+ "row_map_algorithms.h",
"string_pool.h",
]
sources = [
@@ -69,6 +70,7 @@
sources = [
"bit_vector_benchmark.cc",
"nullable_vector_benchmark.cc",
+ "row_map_algorithms_benchmark.cc",
"row_map_benchmark.cc",
]
}
diff --git a/src/trace_processor/containers/bit_vector.h b/src/trace_processor/containers/bit_vector.h
index 5f9c27a..06297f0 100644
--- a/src/trace_processor/containers/bit_vector.h
+++ b/src/trace_processor/containers/bit_vector.h
@@ -51,7 +51,7 @@
explicit BitVector(std::initializer_list<bool> init);
// Creates a bitvector of |count| size filled with |value|.
- BitVector(uint32_t count, bool value = false);
+ explicit BitVector(uint32_t count, bool value = false);
// Enable moving bitvectors as they have no unmovable state.
BitVector(BitVector&&) noexcept = default;
diff --git a/src/trace_processor/containers/row_map.cc b/src/trace_processor/containers/row_map.cc
index ceacf95..16c476a 100644
--- a/src/trace_processor/containers/row_map.cc
+++ b/src/trace_processor/containers/row_map.cc
@@ -16,6 +16,8 @@
#include "src/trace_processor/containers/row_map.h"
+#include "src/trace_processor/containers/row_map_algorithms.h"
+
namespace perfetto {
namespace trace_processor {
@@ -77,7 +79,13 @@
PERFETTO_DCHECK(selector_start <= selector_end);
PERFETTO_DCHECK(selector_end <= bv.GetNumBitsSet());
+ // If we're simply selecting every element in the bitvector, just
+ // return a copy of the BitVector without iterating.
BitVector ret = bv.Copy();
+ if (selector_start == 0 && selector_end == bv.GetNumBitsSet()) {
+ return RowMap(std::move(ret));
+ }
+
for (auto it = ret.IterateSetBits(); it; it.Next()) {
auto set_idx = it.ordinal();
if (set_idx < selector_start || set_idx >= selector_end)
@@ -94,12 +102,32 @@
RowMap SelectBvWithIv(const BitVector& bv,
const std::vector<uint32_t>& selector) {
- std::vector<uint32_t> iv(selector.size());
- for (uint32_t i = 0; i < selector.size(); ++i) {
- // TODO(lalitm): this is pretty inefficient.
- iv[i] = bv.IndexOfNthSet(selector[i]);
+ // The value of this constant was found by considering the benchmarks
+ // |BM_SelectBvWithIvByConvertToIv| and |BM_SelectBvWithIvByIndexOfNthSet|.
+ //
+ // We use this to find the ratio between |bv.GetNumBitsSet()| and
+ // |selector.size()| where |SelectBvWithIvByIndexOfNthSet| was found to be
+ // faster than |SelectBvWithIvByConvertToIv|.
+ //
+ // Note: as of writing this, the benchmarks do not take into account the fill
+ // ratio of the BitVector; they assume 50% rate which almost never happens in
+ // practice. In the future, we could also take this into account (by
+ // considering the ratio between bv.size() and bv.GetNumBitsSet()) but this
+ // causes an explosion in the state space for the benchmark so we're not
+ // considering this today.
+ //
+ // The current value of the constant was picked by running these benchmarks on
+ // a E5-2690 v4 and finding the crossover point using a spreadsheet.
+ constexpr uint32_t kIndexOfSetBitToSelectorRatio = 4;
+
+ // If the selector is larger than a threshold, it's more efficient to convert
+ // the entire BitVector to an index vector and use SelectIvWithIv instead.
+ if (bv.GetNumBitsSet() / kIndexOfSetBitToSelectorRatio < selector.size()) {
+ return RowMap(
+ row_map_algorithms::SelectBvWithIvByConvertToIv(bv, selector));
}
- return RowMap(std::move(iv));
+ return RowMap(
+ row_map_algorithms::SelectBvWithIvByIndexOfNthSet(bv, selector));
}
RowMap SelectIvWithRange(const std::vector<uint32_t>& iv,
@@ -132,12 +160,7 @@
RowMap SelectIvWithIv(const std::vector<uint32_t>& iv,
const std::vector<uint32_t>& selector) {
- std::vector<uint32_t> ret(selector.size());
- for (uint32_t i = 0; i < selector.size(); ++i) {
- PERFETTO_DCHECK(selector[i] < iv.size());
- ret[i] = iv[selector[i]];
- }
- return RowMap(std::move(ret));
+ return RowMap(row_map_algorithms::SelectIvWithIv(iv, selector));
}
} // namespace
diff --git a/src/trace_processor/containers/row_map.h b/src/trace_processor/containers/row_map.h
index 5ca3c9c..49bbdcb 100644
--- a/src/trace_processor/containers/row_map.h
+++ b/src/trace_processor/containers/row_map.h
@@ -238,6 +238,12 @@
// Creates a RowMap backed by an std::vector<uint32_t>.
explicit RowMap(std::vector<OutputIndex> vec);
+ RowMap(const RowMap&) noexcept = delete;
+ RowMap& operator=(const RowMap&) = delete;
+
+ RowMap(RowMap&&) noexcept = default;
+ RowMap& operator=(RowMap&&) = default;
+
// Creates a RowMap containing just |index|.
// By default this will be implemented using a range.
static RowMap SingleRow(OutputIndex index) {
@@ -651,8 +657,16 @@
void InsertIntoBitVector(uint32_t row) {
PERFETTO_DCHECK(mode_ == Mode::kBitVector);
- if (row >= bit_vector_.size())
+ // If we're adding a row to precisely the end of the BitVector, just append
+ // true instead of resizing and then setting.
+ if (row == bit_vector_.size()) {
+ bit_vector_.AppendTrue();
+ return;
+ }
+
+ if (row > bit_vector_.size()) {
bit_vector_.Resize(row + 1, false);
+ }
bit_vector_.Set(row);
}
diff --git a/src/trace_processor/containers/row_map_algorithms.h b/src/trace_processor/containers/row_map_algorithms.h
new file mode 100644
index 0000000..7d585c3
--- /dev/null
+++ b/src/trace_processor/containers/row_map_algorithms.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_ALGORITHMS_H_
+#define SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_ALGORITHMS_H_
+
+#include <vector>
+
+#include "perfetto/base/logging.h"
+#include "src/trace_processor/containers/bit_vector.h"
+#include "src/trace_processor/containers/bit_vector_iterators.h"
+
+// This file contains fundamental algorithms used by RowMap.
+//
+// This file is structured in a way to make benchmarking easy. The intention is
+// to use this to decide which heurustics to use and the value of magic
+// constants in RowMap algorithms.
+
+namespace perfetto {
+namespace trace_processor {
+namespace row_map_algorithms {
+
+// Returns a vector containing elements from |iv| selected by indices from
+// |selector|.
+inline std::vector<uint32_t> SelectIvWithIv(
+ const std::vector<uint32_t>& iv,
+ const std::vector<uint32_t>& selector) {
+ std::vector<uint32_t> ret(selector.size());
+ for (uint32_t i = 0; i < selector.size(); ++i) {
+ PERFETTO_DCHECK(selector[i] < iv.size());
+ ret[i] = iv[selector[i]];
+ }
+ return ret;
+}
+
+// Returns a vector containing elements from |bv| by first converting to an
+// index vector and then selecting indices from |selector|.
+inline std::vector<uint32_t> SelectBvWithIvByConvertToIv(
+ const BitVector& bv,
+ const std::vector<uint32_t>& selector) {
+ std::vector<uint32_t> bv_conv(bv.GetNumBitsSet());
+ for (auto it = bv.IterateSetBits(); it; it.Next()) {
+ bv_conv[it.ordinal()] = it.index();
+ }
+ return SelectIvWithIv(bv_conv, selector);
+}
+
+// Returns a vector containing elements from |bv| by selecting indices from
+// |selector| using IndexOfNthSet calls.
+inline std::vector<uint32_t> SelectBvWithIvByIndexOfNthSet(
+ const BitVector& bv,
+ const std::vector<uint32_t>& selector) {
+ std::vector<uint32_t> iv(selector.size());
+ for (uint32_t i = 0; i < selector.size(); ++i) {
+ iv[i] = bv.IndexOfNthSet(selector[i]);
+ }
+ return iv;
+}
+
+} // namespace row_map_algorithms
+} // namespace trace_processor
+} // namespace perfetto
+
+#endif // SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_ALGORITHMS_H_
diff --git a/src/trace_processor/containers/row_map_algorithms_benchmark.cc b/src/trace_processor/containers/row_map_algorithms_benchmark.cc
new file mode 100644
index 0000000..805172c
--- /dev/null
+++ b/src/trace_processor/containers/row_map_algorithms_benchmark.cc
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <random>
+
+#include <benchmark/benchmark.h>
+
+#include "src/trace_processor/containers/row_map_algorithms.h"
+
+using perfetto::trace_processor::BitVector;
+
+namespace {
+
+using namespace perfetto::trace_processor::row_map_algorithms;
+
+bool IsBenchmarkFunctionalOnly() {
+ return getenv("BENCHMARK_FUNCTIONAL_TEST_ONLY") != nullptr;
+}
+
+BitVector BvWithSetBits(uint32_t bv_set_bits) {
+ static constexpr uint32_t kRandomSeed = 29;
+ std::minstd_rand0 rnd_engine(kRandomSeed);
+
+ BitVector bv;
+ for (uint32_t i = 0; i < bv_set_bits;) {
+ if (rnd_engine() % 2 == 0) {
+ bv.AppendTrue();
+ ++i;
+ } else {
+ bv.AppendFalse();
+ }
+ }
+ return bv;
+}
+
+std::vector<uint32_t> IvWithSizeAndMaxIndex(
+ uint32_t bv_set_bits,
+ uint32_t set_bit_to_selector_ratio) {
+ static constexpr uint32_t kRandomSeed = 78;
+ std::minstd_rand0 rnd_engine(kRandomSeed);
+
+ uint32_t size = bv_set_bits / set_bit_to_selector_ratio;
+ std::vector<uint32_t> indices(size);
+ for (uint32_t i = 0; i < size; ++i) {
+ indices[i] = rnd_engine() % bv_set_bits;
+ }
+ return indices;
+}
+
+void BvWithIvArgs(benchmark::internal::Benchmark* b) {
+ std::vector<int> set_bit_to_selector_ratios;
+ if (IsBenchmarkFunctionalOnly()) {
+ set_bit_to_selector_ratios = std::vector<int>{2};
+ } else {
+ set_bit_to_selector_ratios = std::vector<int>{2, 4, 6, 8, 10, 12, 16, 32};
+ }
+
+ std::vector<int> bv_set_bits;
+ if (IsBenchmarkFunctionalOnly()) {
+ bv_set_bits = std::vector<int>{1024};
+ } else {
+ bv_set_bits = std::vector<int>{1024, 4096, 1024 * 1024};
+ }
+
+ for (int bv_set_bit : bv_set_bits) {
+ for (int set_bit_to_selector_ratio : set_bit_to_selector_ratios) {
+ b->Args({bv_set_bit, set_bit_to_selector_ratio});
+ }
+ }
+}
+
+// |BM_SelectBvWithIvByConvertToIv| and |BM_SelectBvWithIvByIndexOfNthSet| are
+// used together to find the ratio between the selector size and the number of
+// set bits in the BitVector where |SelectBvWithIvByIndexOfNthSet| is faster
+// than |SelectBvWithIvByConvertToIv|.
+//
+// See the comment in SelectBvWithIv in row_map.cc for more information on this.
+
+static void BM_SelectBvWithIvByConvertToIv(benchmark::State& state) {
+ uint32_t bv_set_bit = static_cast<uint32_t>(state.range(0));
+ uint32_t set_bit_to_selector_ratio = static_cast<uint32_t>(state.range(1));
+
+ BitVector bv = BvWithSetBits(bv_set_bit);
+ std::vector<uint32_t> iv =
+ IvWithSizeAndMaxIndex(bv_set_bit, set_bit_to_selector_ratio);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(SelectBvWithIvByConvertToIv(bv, iv));
+ }
+}
+BENCHMARK(BM_SelectBvWithIvByConvertToIv)->Apply(BvWithIvArgs);
+
+static void BM_SelectBvWithIvByIndexOfNthSet(benchmark::State& state) {
+ uint32_t bv_set_bit = static_cast<uint32_t>(state.range(0));
+ uint32_t set_bit_to_selector_ratio = static_cast<uint32_t>(state.range(1));
+
+ BitVector bv = BvWithSetBits(bv_set_bit);
+ std::vector<uint32_t> iv =
+ IvWithSizeAndMaxIndex(bv_set_bit, set_bit_to_selector_ratio);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(SelectBvWithIvByIndexOfNthSet(bv, iv));
+ }
+}
+BENCHMARK(BM_SelectBvWithIvByIndexOfNthSet)->Apply(BvWithIvArgs);
+
+} // namespace