blob: 44433082c786f6616cec1c10eff45a6772250bfa [file] [log] [blame]
/*
* Copyright (C) 2019 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_
#define SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_
#include <algorithm>
#include <cstdint>
#include <iterator>
#include <numeric>
#include <optional>
#include <utility>
#include <variant>
#include <vector>
#include "perfetto/base/compiler.h"
#include "perfetto/base/logging.h"
#include "src/trace_processor/containers/bit_vector.h"
namespace perfetto {
namespace trace_processor {
// Stores a list of row indicies in a space efficient manner. One or more
// columns can refer to the same RowMap. The RowMap defines the access pattern
// to iterate on rows.
//
// Naming convention:
//
// As both the input and output of RowMap is a uint32_t, it can be quite
// confusing to reason about what parameters/return values of the functions
// of RowMap actually means. To help with this, we define a strict convention
// of naming.
//
// row: input - that is, rows are what are passed into operator[]; named as
// such because a "row" number in a table is converted to an index to
// lookup in the backing vectors.
// index: output - that is, indices are what are returned from operator[];
// named as such because an "index" is what's used to lookup data
// from the backing vectors.
//
// Implementation details:
//
// Behind the scenes, this class is impelemented using one of three backing
// data-structures:
// 1. A start and end index (internally named 'range')
// 1. BitVector
// 2. std::vector<uint32_t> (internally named IndexVector).
//
// Generally the preference for data structures is range > BitVector >
// std::vector<uint32>; this ordering is based mainly on memory efficiency as we
// expect RowMaps to be large.
//
// However, BitVector and std::vector<uint32_t> allow things which are not
// possible with the data-structures preferred to them:
// * a range (as the name suggests) can only store a compact set of indices
// with no holes. A BitVector works around this limitation by storing a 1 at an
// index where that row is part of the RowMap and 0 otherwise.
// * as soon as ordering or duplicate rows come into play, we cannot use a
// BitVector anymore as ordering/duplicate row information cannot be captured
// by a BitVector.
//
// For small, sparse RowMaps, it is possible that a std::vector<uint32_t> is
// more efficient than a BitVector; in this case, we will make a best effort
// switch to it but the cases where this happens is not precisely defined.
class RowMap {
public:
using InputRow = uint32_t;
using OutputIndex = uint32_t;
using IndexVector = std::vector<OutputIndex>;
struct Range {
Range(OutputIndex start_index, OutputIndex end_index)
: start(start_index), end(end_index) {
PERFETTO_DCHECK(start_index <= end_index);
}
Range() : start(0), end(0) {}
OutputIndex start; // This is an inclusive index.
OutputIndex end; // This is an exclusive index.
bool empty() const { return size() == 0; }
uint32_t size() const { return end - start; }
inline bool Contains(uint32_t val) const {
return val >= start && val < end;
}
};
// Allows efficient iteration over the rows of a RowMap.
//
// Note: you should usually prefer to use the methods on RowMap directly (if
// they exist for the task being attempted) to avoid the lookup for the mode
// of the RowMap on every method call.
class Iterator {
public:
explicit Iterator(const RowMap* rm);
Iterator(const Iterator&) = delete;
Iterator& operator=(const Iterator&) = delete;
Iterator(Iterator&&) noexcept = default;
Iterator& operator=(Iterator&&) = default;
// Forwards the iterator to the next row of the RowMap.
void Next() { ++ordinal_; }
// Returns if the iterator is still valid.
explicit operator bool() const {
if (const auto* range = std::get_if<Range>(&rm_->data_)) {
return ordinal_ < range->end;
}
if (std::get_if<BitVector>(&rm_->data_)) {
return ordinal_ < results_.size();
}
if (const auto* vec = std::get_if<IndexVector>(&rm_->data_)) {
return ordinal_ < vec->size();
}
PERFETTO_FATAL("Didn't match any variant type.");
}
// Returns the index pointed to by this iterator.
OutputIndex index() const {
if (std::get_if<Range>(&rm_->data_)) {
return ordinal_;
}
if (std::get_if<BitVector>(&rm_->data_)) {
return results_[ordinal_];
}
if (const auto* vec = std::get_if<IndexVector>(&rm_->data_)) {
return (*vec)[ordinal_];
}
PERFETTO_FATAL("Didn't match any variant type.");
}
// Returns the row of the index the iterator points to.
InputRow row() const {
if (const auto* range = std::get_if<Range>(&rm_->data_)) {
return ordinal_ - range->start;
}
if (std::get_if<BitVector>(&rm_->data_) ||
std::get_if<IndexVector>(&rm_->data_)) {
return ordinal_;
}
PERFETTO_FATAL("Didn't match any variant type.");
}
private:
// Ordinal will not be used for BitVector based RowMap.
uint32_t ordinal_ = 0;
// Not empty for BitVector based RowMap.
std::vector<uint32_t> results_;
const RowMap* rm_ = nullptr;
};
// Creates an empty RowMap.
// By default this will be implemented using a range.
RowMap();
// Creates a RowMap containing the range of indices between |start| and |end|
// i.e. all indices between |start| (inclusive) and |end| (exclusive).
RowMap(OutputIndex start, OutputIndex end);
// Creates a RowMap backed by a BitVector.
explicit RowMap(BitVector);
// Creates a RowMap backed by an std::vector<uint32_t>.
explicit RowMap(IndexVector);
RowMap(const RowMap&) noexcept = delete;
RowMap& operator=(const RowMap&) = delete;
RowMap(RowMap&&) noexcept = default;
RowMap& operator=(RowMap&&) = default;
// Creates a RowMap containing just |index|.
// By default this will be implemented using a range.
static RowMap SingleRow(OutputIndex index) {
return RowMap(index, index + 1);
}
// Creates a copy of the RowMap.
// We have an explicit copy function because RowMap can hold onto large chunks
// of memory and we want to be very explicit when making a copy to avoid
// accidental leaks and copies.
RowMap Copy() const;
// Returns the size of the RowMap; that is the number of indices in the
// RowMap.
uint32_t size() const {
if (const auto* range = std::get_if<Range>(&data_)) {
return range->size();
}
if (const auto* bv = std::get_if<BitVector>(&data_)) {
return bv->CountSetBits();
}
if (const auto* vec = std::get_if<IndexVector>(&data_)) {
return static_cast<uint32_t>(vec->size());
}
NoVariantMatched();
}
// Returns whether this rowmap is empty.
bool empty() const { return size() == 0; }
// Returns the index at the given |row|.
OutputIndex Get(InputRow row) const {
if (const auto* range = std::get_if<Range>(&data_)) {
return GetRange(*range, row);
}
if (const auto* bv = std::get_if<BitVector>(&data_)) {
return GetBitVector(*bv, row);
}
if (const auto* vec = std::get_if<IndexVector>(&data_)) {
return GetIndexVector(*vec, row);
}
NoVariantMatched();
}
// Returns the vector of all indices in the RowMap.
std::vector<OutputIndex> GetAllIndices() const {
if (const auto* range = std::get_if<Range>(&data_)) {
std::vector<uint32_t> res(range->size());
std::iota(res.begin(), res.end(), range->start);
return res;
}
if (const auto* bv = std::get_if<BitVector>(&data_)) {
return bv->GetSetBitIndices();
}
if (const auto* vec = std::get_if<IndexVector>(&data_)) {
return *vec;
}
NoVariantMatched();
}
// Returns maximum size of the output. Ie range.end or size of the BV.
OutputIndex Max() const;
// Returns whether the RowMap contains the given index.
bool Contains(OutputIndex index) const {
if (const auto* range = std::get_if<Range>(&data_)) {
return index >= range->start && index < range->end;
}
if (const auto* bv = std::get_if<BitVector>(&data_)) {
return index < bv->size() && bv->IsSet(index);
}
if (const auto* vec = std::get_if<IndexVector>(&data_)) {
return std::find(vec->begin(), vec->end(), index) != vec->end();
}
NoVariantMatched();
}
// Returns the first row of the given |index| in the RowMap.
std::optional<InputRow> RowOf(OutputIndex index) const {
if (const auto* range = std::get_if<Range>(&data_)) {
if (index < range->start || index >= range->end)
return std::nullopt;
return index - range->start;
}
if (const auto* bv = std::get_if<BitVector>(&data_)) {
return index < bv->size() && bv->IsSet(index)
? std::make_optional(bv->CountSetBits(index))
: std::nullopt;
}
if (const auto* vec = std::get_if<IndexVector>(&data_)) {
auto it = std::find(vec->begin(), vec->end(), index);
return it != vec->end() ? std::make_optional(static_cast<InputRow>(
std::distance(vec->begin(), it)))
: std::nullopt;
}
NoVariantMatched();
}
// Performs an ordered insert of the index into the current RowMap
// (precondition: this RowMap is ordered based on the indices it contains).
//
// Example:
// this = [1, 5, 10, 11, 20]
// Insert(10) // this = [1, 5, 10, 11, 20]
// Insert(12) // this = [1, 5, 10, 11, 12, 20]
// Insert(21) // this = [1, 5, 10, 11, 12, 20, 21]
// Insert(2) // this = [1, 2, 5, 10, 11, 12, 20, 21]
//
// Speecifically, this means that it is only valid to call Insert on a RowMap
// which is sorted by the indices it contains; this is automatically true when
// the RowMap is in range or BitVector mode but is a required condition for
// IndexVector mode.
void Insert(OutputIndex index) {
if (auto* range = std::get_if<Range>(&data_)) {
if (index == range->end) {
// Fast path: if we're just appending to the end
// of the range, we can stay in range mode and
// just bump the end index.
range->end++;
return;
}
// Slow path: the insert is somewhere else other
// than the end. This means we need to switch to
// using a BitVector instead.
BitVector bv;
bv.Resize(range->start, false);
bv.Resize(range->end, true);
InsertIntoBitVector(bv, index);
data_ = std::move(bv);
return;
}
if (auto* bv = std::get_if<BitVector>(&data_)) {
InsertIntoBitVector(*bv, index);
return;
}
if (auto* vec = std::get_if<IndexVector>(&data_)) {
PERFETTO_DCHECK(std::is_sorted(vec->begin(), vec->end()));
auto it = std::upper_bound(vec->begin(), vec->end(), index);
vec->insert(it, index);
return;
}
NoVariantMatched();
}
// Updates this RowMap by 'picking' the indices given by |picker|.
// This is easiest to explain with an example; suppose we have the following
// RowMaps:
// this : [0, 1, 4, 10, 11]
// picker: [0, 3, 4, 4, 2]
//
// After calling Apply(picker), we now have the following:
// this : [0, 10, 11, 11, 4]
//
// Conceptually, we are performing the following algorithm:
// RowMap rm = Copy()
// for (p : picker)
// rm[i++] = this[p]
// return rm;
RowMap SelectRows(const RowMap& selector) const {
uint32_t size = selector.size();
// If the selector is empty, just return an empty RowMap.
if (size == 0u)
return {};
// If the selector is just picking a single row, just return that row
// without any additional overhead.
if (size == 1u)
return RowMap::SingleRow(Get(selector.Get(0)));
// For all other cases, go into the slow-path.
return SelectRowsSlow(selector);
}
// Intersects |this| with |second| independent of underlying structure of both
// RowMaps. Modifies |this| to only contain indices present in |second|.
void Intersect(const RowMap& second);
// Intersects this RowMap with |index|. If this RowMap contained |index|, then
// it will *only* contain |index|. Otherwise, it will be empty.
void IntersectExact(OutputIndex index) {
if (Contains(index)) {
*this = RowMap(index, index + 1);
} else {
Clear();
}
}
// Clears this RowMap by resetting it to a newly constructed state.
void Clear() { *this = RowMap(); }
// Converts this RowMap to an index vector in the most efficient way
// possible.
std::vector<uint32_t> TakeAsIndexVector() && {
if (const auto* range = std::get_if<Range>(&data_)) {
std::vector<uint32_t> rm(range->size());
std::iota(rm.begin(), rm.end(), range->start);
return rm;
}
if (const auto* bv = std::get_if<BitVector>(&data_)) {
return bv->GetSetBitIndices();
}
if (auto* vec = std::get_if<IndexVector>(&data_)) {
return std::move(*vec);
}
NoVariantMatched();
}
// Returns the data in RowMap BitVector, nullptr if RowMap is in a different
// mode.
const BitVector* GetIfBitVector() const {
return std::get_if<BitVector>(&data_);
}
// Returns the data in RowMap IndexVector, nullptr if RowMap is in a different
// mode.
const std::vector<uint32_t>* GetIfIndexVector() const {
return std::get_if<IndexVector>(&data_);
}
// Returns the data in RowMap Range, nullptr if RowMap is in a different
// mode.
const Range* GetIfIRange() const { return std::get_if<Range>(&data_); }
// Returns the iterator over the rows in this RowMap.
Iterator IterateRows() const { return Iterator(this); }
// Returns if the RowMap is internally represented using a range.
bool IsRange() const { return std::holds_alternative<Range>(data_); }
// Returns if the RowMap is internally represented using a BitVector.
bool IsBitVector() const { return std::holds_alternative<BitVector>(data_); }
// Returns if the RowMap is internally represented using an index vector.
bool IsIndexVector() const {
return std::holds_alternative<IndexVector>(data_);
}
private:
using Variant = std::variant<Range, BitVector, IndexVector>;
explicit RowMap(Range);
explicit RowMap(Variant);
PERFETTO_ALWAYS_INLINE static OutputIndex GetRange(Range r, InputRow row) {
return r.start + row;
}
PERFETTO_ALWAYS_INLINE static OutputIndex GetBitVector(const BitVector& bv,
uint32_t row) {
return bv.IndexOfNthSet(row);
}
PERFETTO_ALWAYS_INLINE static OutputIndex GetIndexVector(
const IndexVector& vec,
uint32_t row) {
return vec[row];
}
RowMap SelectRowsSlow(const RowMap& selector) const;
static void InsertIntoBitVector(BitVector& bv, OutputIndex row) {
if (row == bv.size()) {
bv.AppendTrue();
return;
}
if (row > bv.size())
bv.Resize(row + 1, false);
bv.Set(row);
}
PERFETTO_NORETURN static void NoVariantMatched() {
PERFETTO_FATAL("Didn't match any variant type.");
}
Variant data_;
};
} // namespace trace_processor
} // namespace perfetto
#endif // SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_