blob: a053245e58954b79c212fd2dfbb7cf82605792eb [file] [log] [blame]
/*
* Copyright (C) 2019 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_
#define SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_
#include <stdint.h>
#include <memory>
#include <vector>
#include "perfetto/base/logging.h"
#include "perfetto/ext/base/optional.h"
#include "src/trace_processor/containers/bit_vector.h"
#include "src/trace_processor/containers/bit_vector_iterators.h"
namespace perfetto {
namespace trace_processor {
// Stores a list of row indicies in a space efficient manner. One or more
// columns can refer to the same RowMap. The RowMap defines the access pattern
// to iterate on rows.
//
// Implementation details:
//
// Behind the scenes, this class is impelemented using one of three backing
// data-structures:
// 1. A start and end index (internally named 'range')
// 1. BitVector
// 2. std::vector<uint32_t> (internally named IndexVector).
//
// Generally the preference for data structures is range > BitVector >
// std::vector<uint32>; this ordering is based mainly on memory efficiency as we
// expect RowMaps to be large.
//
// However, BitVector and std::vector<uint32_t> allow things which are not
// possible with the data-structures preferred to them:
// * a range (as the name suggests) can only store a compact set of indices
// with no holes. A BitVector works around this limitation by storing a 1 at an
// index where that row is part of the RowMap and 0 otherwise.
// * as soon as ordering or duplicate rows come into play, we cannot use a
// BitVector anymore as ordering/duplicate row information cannot be captured
// by a BitVector.
//
// For small, sparse RowMaps, it is possible that a std::vector<uint32_t> is
// more efficient than a BitVector; in this case, we will make a best effort
// switch to it but the cases where this happens is not precisely defined.
class RowMap {
private:
// We need to declare these iterator classes before RowMap::Iterator as it
// depends on them. However, we don't want to make these public so keep them
// under a special private section.
// Iterator for ranged mode of RowMap.
// This class should act as a drop-in replacement for
// BitVector::SetBitsIterator.
class RangeIterator {
public:
RangeIterator(const RowMap* rm) : rm_(rm), index_(rm->start_idx_) {}
void Next() { ++index_; }
operator bool() const { return index_ < rm_->end_idx_; }
uint32_t index() const { return index_; }
uint32_t ordinal() const { return index_ - rm_->start_idx_; }
private:
const RowMap* rm_ = nullptr;
uint32_t index_ = 0;
};
// Iterator for index vector mode of RowMap.
// This class should act as a drop-in replacement for
// BitVector::SetBitsIterator.
class IndexVectorIterator {
public:
IndexVectorIterator(const RowMap* rm) : rm_(rm) {}
void Next() { ++ordinal_; }
operator bool() const { return ordinal_ < rm_->index_vector_.size(); }
uint32_t index() const { return rm_->index_vector_[ordinal_]; }
uint32_t ordinal() const { return ordinal_; }
private:
const RowMap* rm_ = nullptr;
uint32_t ordinal_ = 0;
};
public:
// Allows efficient iteration over the rows of a RowMap.
//
// Note: you should usually prefer to use the methods on RowMap directly (if
// they exist for the task being attempted) to avoid the lookup for the mode
// of the RowMap on every method call.
class Iterator {
public:
Iterator(const RowMap* rm) : rm_(rm) {
switch (rm->mode_) {
case Mode::kRange:
range_it_.reset(new RangeIterator(rm));
break;
case Mode::kBitVector:
set_bits_it_.reset(
new BitVector::SetBitsIterator(rm->bit_vector_.IterateSetBits()));
break;
case Mode::kIndexVector:
iv_it_.reset(new IndexVectorIterator(rm));
break;
}
}
Iterator(Iterator&&) noexcept = default;
Iterator& operator=(Iterator&&) = default;
// Forwards the iterator to the next row of the RowMap.
void Next() {
switch (rm_->mode_) {
case Mode::kRange:
range_it_->Next();
break;
case Mode::kBitVector:
set_bits_it_->Next();
break;
case Mode::kIndexVector:
iv_it_->Next();
break;
}
}
// Returns if the iterator is still valid.
operator bool() const {
switch (rm_->mode_) {
case Mode::kRange:
return *range_it_;
case Mode::kBitVector:
return *set_bits_it_;
case Mode::kIndexVector:
return *iv_it_;
}
PERFETTO_FATAL("For GCC");
}
// Returns the row pointed to by this iterator.
uint32_t row() const {
// RowMap uses the row/index nomenclature for referring to the mapping
// from index to a row (as the name suggests). However, the data
// structures used by RowMap use the index/ordinal naming (because they
// don't have the concept of a "row"). Switch the naming here.
switch (rm_->mode_) {
case Mode::kRange:
return range_it_->index();
case Mode::kBitVector:
return set_bits_it_->index();
case Mode::kIndexVector:
return iv_it_->index();
}
PERFETTO_FATAL("For GCC");
}
// Returns the index of the row the iterator points to.
uint32_t index() const {
// RowMap uses the row/index nomenclature for referring to the mapping
// from index to a row (as the name suggests). However, the data
// structures used by RowMap use the index/ordinal naming (because they
// don't have the concept of a "row"). Switch the naming here.
switch (rm_->mode_) {
case Mode::kRange:
return range_it_->ordinal();
case Mode::kBitVector:
return set_bits_it_->ordinal();
case Mode::kIndexVector:
return iv_it_->ordinal();
}
PERFETTO_FATAL("For GCC");
}
private:
Iterator(const Iterator&) = delete;
Iterator& operator=(const Iterator&) = delete;
// Only one of the below will be non-null depending on the mode of the
// RowMap.
std::unique_ptr<RangeIterator> range_it_;
std::unique_ptr<BitVector::SetBitsIterator> set_bits_it_;
std::unique_ptr<IndexVectorIterator> iv_it_;
const RowMap* rm_ = nullptr;
};
// Creates an empty RowMap.
// By default this will be implemented using a range.
RowMap();
// Creates a RowMap containing the range of rows between |start| and |end|
// i.e. all rows between |start| (inclusive) and |end| (exclusive).
explicit RowMap(uint32_t start, uint32_t end);
// Creates a RowMap backed by a BitVector.
explicit RowMap(BitVector bit_vector);
// Creates a RowMap backed by an std::vector<uint32_t>.
explicit RowMap(std::vector<uint32_t> vec);
// Creates a RowMap containing just |row|.
// By default this will be implemented using a range.
static RowMap SingleRow(uint32_t row) { return RowMap(row, row + 1); }
// Creates a copy of the RowMap.
// We have an explicit copy function because RowMap can hold onto large chunks
// of memory and we want to be very explicit when making a copy to avoid
// accidental leaks and copies.
RowMap Copy() const;
// Returns the size of the RowMap; that is the number of rows in the RowMap.
uint32_t size() const {
switch (mode_) {
case Mode::kRange:
return end_idx_ - start_idx_;
case Mode::kBitVector:
return bit_vector_.GetNumBitsSet();
case Mode::kIndexVector:
return static_cast<uint32_t>(index_vector_.size());
}
PERFETTO_FATAL("For GCC");
}
// Returns whether this rowmap is empty.
bool empty() const { return size() == 0; }
// Returns the row at index |row|.
uint32_t Get(uint32_t idx) const {
PERFETTO_DCHECK(idx < size());
switch (mode_) {
case Mode::kRange:
return start_idx_ + idx;
case Mode::kBitVector:
return bit_vector_.IndexOfNthSet(idx);
case Mode::kIndexVector:
return index_vector_[idx];
}
PERFETTO_FATAL("For GCC");
}
// Returns whether the RowMap contains the given row.
bool Contains(uint32_t row) const {
switch (mode_) {
case Mode::kRange: {
return row >= start_idx_ && row < end_idx_;
}
case Mode::kBitVector: {
return row < bit_vector_.size() && bit_vector_.IsSet(row);
}
case Mode::kIndexVector: {
auto it = std::find(index_vector_.begin(), index_vector_.end(), row);
return it != index_vector_.end();
}
}
PERFETTO_FATAL("For GCC");
}
// Returns the first index of the given |row| in the RowMap.
base::Optional<uint32_t> IndexOf(uint32_t row) const {
switch (mode_) {
case Mode::kRange: {
if (row < start_idx_ || row >= end_idx_)
return base::nullopt;
return row - start_idx_;
}
case Mode::kBitVector: {
return row < bit_vector_.size() && bit_vector_.IsSet(row)
? base::make_optional(bit_vector_.GetNumBitsSet(row))
: base::nullopt;
}
case Mode::kIndexVector: {
auto it = std::find(index_vector_.begin(), index_vector_.end(), row);
return it != index_vector_.end()
? base::make_optional(static_cast<uint32_t>(
std::distance(index_vector_.begin(), it)))
: base::nullopt;
}
}
PERFETTO_FATAL("For GCC");
}
// Performs an ordered insert the row into the current RowMap (precondition:
// this RowMap is ordered based on the rows it contains).
//
// Example:
// this = [1, 5, 10, 11, 20]
// Insert(10) // this = [1, 5, 10, 11, 20]
// Insert(12) // this = [1, 5, 10, 11, 12, 20]
// Insert(21) // this = [1, 5, 10, 11, 12, 20, 21]
// Insert(2) // this = [1, 2, 5, 10, 11, 12, 20, 21]
//
// Speecifically, this means that it is only valid to call Insert on a RowMap
// which is sorted by the rows it contains; this is automatically true when
// the RowMap is in range or BitVector mode but is a required condition for
// IndexVector mode.
void Insert(uint32_t row) {
switch (mode_) {
case Mode::kRange:
if (row == end_idx_) {
// Fast path: if we're just appending to the end of the range, we can
// stay in range mode and just bump the end index.
end_idx_++;
} else {
// Slow path: the insert is somewhere else other than the end. This
// means we need to switch to using a BitVector instead.
bit_vector_.Resize(start_idx_, false);
bit_vector_.Resize(end_idx_, true);
*this = RowMap(std::move(bit_vector_));
InsertIntoBitVector(row);
}
break;
case Mode::kBitVector:
InsertIntoBitVector(row);
break;
case Mode::kIndexVector: {
PERFETTO_DCHECK(
std::is_sorted(index_vector_.begin(), index_vector_.end()));
auto it =
std::upper_bound(index_vector_.begin(), index_vector_.end(), row);
index_vector_.insert(it, row);
break;
}
}
}
// Updates this RowMap by 'picking' the rows at indicies given by |picker|.
// This is easiest to explain with an example; suppose we have the following
// RowMaps:
// this : [0, 1, 4, 10, 11]
// picker: [0, 3, 4, 4, 2]
//
// After calling Apply(picker), we now have the following:
// this : [0, 10, 11, 11, 4]
//
// Conceptually, we are performing the following algorithm:
// RowMap rm = Copy()
// for (idx : picker)
// rm[i++] = this[idx]
// return rm;
RowMap SelectRows(const RowMap& selector) const {
uint32_t size = selector.size();
// If the selector is empty, just return an empty RowMap.
if (size == 0u)
return RowMap();
// If the selector is just picking a single row, just return that row
// without any additional overhead.
if (size == 1u)
return RowMap::SingleRow(Get(selector.Get(0)));
// For all other cases, go into the slow-path.
return SelectRowsSlow(selector);
}
// Intersects |other| with |this| writing the result into |this|.
// By "intersect", we mean to keep only the rows present in both RowMaps. The
// order of the preserved rows will be the same as |this|.
//
// Conceptually, we are performing the following algorithm:
// for (idx : this)
// if (!other.Contains(idx))
// Remove(idx)
void Intersect(const RowMap& other) {
if (mode_ == Mode::kRange && other.mode_ == Mode::kRange) {
// If both RowMaps have ranges, we can just take the smallest intersection
// of them as the new RowMap.
// We have this as an explicit fast path as this is very common for
// constraints on id and sorted columns to satisfy this condition.
start_idx_ = std::max(start_idx_, other.start_idx_);
end_idx_ = std::max(start_idx_, std::min(end_idx_, other.end_idx_));
return;
}
// TODO(lalitm): improve efficiency of this if we end up needing it.
RemoveIf([&other](uint32_t row) { return !other.Contains(row); });
}
// Filters the current RowMap into the RowMap given by |out| based on the
// return value of |p(idx)|.
//
// Precondition: |out| should be sorted by the rows inside it (this is
// required to keep this method efficient). This is automatically true if the
// mode is out is Range or BitVector but needs to be enforced if the mode is
// IndexVector.
//
// Specifically, the setup for each of the variables is as follows:
// this: contains the RowMap indices which will be looked up and passed to
// p to filter.
// out : contains indicies into |this| and will be filtered down to only
// contain indicies where p returns true.
// p : takes an index given by |this| and returns whether the index should
// be retained in |out|.
//
// Concretely, the algorithm being invoked looks like (but more efficient
// based on the mode of |this| and |out|):
// for (idx : out)
// this_idx = (*this)[idx]
// if (!p(this_idx))
// out->Remove(idx)
template <typename Predicate>
void FilterInto(RowMap* out, Predicate p) const {
PERFETTO_DCHECK(size() >= out->size());
if (out->empty()) {
// If the output RowMap is empty, we don't need to do anything.
return;
}
if (out->size() == 1) {
// If the output RowMap has a single entry, just lookup that entry and see
// if we should keep it.
if (!p(Get(out->Get(0))))
*out = RowMap();
return;
}
// TODO(lalitm): investigate whether we should have another fast path for
// cases where |out| has only a few entries so we can scan |out| instead of
// scanning |this|.
// TODO(lalit): investigate whether we should also scan |out| if |this| is
// a range or index vector as, in those cases, it would be fast to lookup
// |this| by index.
// We choose to full scan |this| rather than |out| as the performance
// penalty of incorrectly scanning |out| is much worse than mistakely
// scanning |this|.
// This is because scans on |out| involve an indexed lookup on |this| which
// (in the case of a bitvector) can be very expensive. On the other hand,
// scanning |this| means we never have to do indexed lookups but we may
// scan many more rows than necessary (as they may have already been
// excluded in out).
FilterIntoScanSelf(out, p);
}
template <typename Comparator>
void StableSort(std::vector<uint32_t>* out, Comparator c) const {
switch (mode_) {
case Mode::kRange: {
StableSort(out, c, [this](uint32_t off) { return start_idx_ + off; });
break;
}
case Mode::kBitVector: {
StableSort(out, c, [this](uint32_t off) {
return bit_vector_.IndexOfNthSet(off);
});
break;
}
case Mode::kIndexVector: {
StableSort(out, c, [this](uint32_t off) { return index_vector_[off]; });
break;
}
}
}
// Returns the iterator over the rows in this RowMap.
Iterator IterateRows() const { return Iterator(this); }
// Returns if the RowMap is internally represented using a range.
bool IsRange() const { return mode_ == Mode::kRange; }
private:
enum class Mode {
kRange,
kBitVector,
kIndexVector,
};
// Filters the current RowMap into |out| by performing a full scan on |this|.
// See |FilterInto| for a full breakdown of the semantics of this function.
template <typename Predicate>
void FilterIntoScanSelf(RowMap* out, Predicate p) const {
switch (mode_) {
case Mode::kRange:
FilterIntoScanSelf(out, RangeIterator(this), p);
break;
case Mode::kBitVector:
FilterIntoScanSelf(out, bit_vector_.IterateSetBits(), p);
break;
case Mode::kIndexVector:
FilterIntoScanSelf(out, IndexVectorIterator(this), p);
break;
}
}
// Filters the current RowMap into |out| by performing a full scan on |this|
// using the |it|, a strongly typed iterator on |this| (a strongly typed
// iterator is used for performance reasons).
// See |FilterInto| for a full breakdown of the semantics of this function.
template <typename Iterator, typename Predicate>
void FilterIntoScanSelf(RowMap* out, Iterator it, Predicate p) const {
switch (out->mode_) {
case Mode::kRange: {
// TODO(lalitm): investigate whether we can reuse the data inside
// out->bit_vector_ at some point.
BitVector bv(out->end_idx_, false);
for (auto out_it = bv.IterateAllBits(); it; it.Next(), out_it.Next()) {
uint32_t ordinal = it.ordinal();
if (ordinal < out->start_idx_)
continue;
if (ordinal >= out->end_idx_)
break;
if (p(it.index())) {
out_it.Set();
}
}
*out = RowMap(std::move(bv));
break;
}
case Mode::kBitVector: {
auto out_it = out->bit_vector_.IterateAllBits();
for (; out_it; it.Next(), out_it.Next()) {
PERFETTO_DCHECK(it);
if (out_it.IsSet() && !p(it.index()))
out_it.Clear();
}
break;
}
case Mode::kIndexVector: {
PERFETTO_DCHECK(std::is_sorted(out->index_vector_.begin(),
out->index_vector_.end()));
auto fn = [&p, &it](uint32_t i) {
while (it.ordinal() < i) {
it.Next();
PERFETTO_DCHECK(it);
}
PERFETTO_DCHECK(it.ordinal() == i);
return !p(it.index());
};
auto iv_it = std::remove_if(out->index_vector_.begin(),
out->index_vector_.end(), fn);
out->index_vector_.erase(iv_it, out->index_vector_.end());
break;
}
}
}
void InsertIntoBitVector(uint32_t row) {
PERFETTO_DCHECK(mode_ == Mode::kBitVector);
if (row >= bit_vector_.size())
bit_vector_.Resize(row + 1, false);
bit_vector_.Set(row);
}
// Removes any row where |p(row)| returns false from this RowMap.
template <typename Predicate>
void RemoveIf(Predicate p) {
switch (mode_) {
case Mode::kRange: {
bit_vector_.Resize(start_idx_, false);
for (uint32_t i = start_idx_; i < end_idx_; ++i) {
if (p(i))
bit_vector_.AppendFalse();
else
bit_vector_.AppendTrue();
}
*this = RowMap(std::move(bit_vector_));
break;
}
case Mode::kBitVector: {
for (auto it = bit_vector_.IterateSetBits(); it; it.Next()) {
if (p(it.index()))
it.Clear();
}
break;
}
case Mode::kIndexVector: {
auto it = std::remove_if(index_vector_.begin(), index_vector_.end(), p);
index_vector_.erase(it, index_vector_.end());
break;
}
}
}
template <typename Comparator, typename Indexer>
void StableSort(std::vector<uint32_t>* out, Comparator c, Indexer i) const {
std::stable_sort(
out->begin(), out->end(),
[&c, &i](uint32_t a, uint32_t b) { return c(i(a), i(b)); });
}
RowMap SelectRowsSlow(const RowMap& selector) const;
Mode mode_ = Mode::kRange;
// Only valid when |mode_| == Mode::kRange.
uint32_t start_idx_ = 0; // This is an inclusive index.
uint32_t end_idx_ = 0; // This is an exclusive index.
// Only valid when |mode_| == Mode::kBitVector.
BitVector bit_vector_;
// Only valid when |mode_| == Mode::kIndexVector.
std::vector<uint32_t> index_vector_;
};
} // namespace trace_processor
} // namespace perfetto
#endif // SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_