Merge "tp: Add Distinct to CEngine API" into main
diff --git a/src/trace_processor/db/column/arrangement_overlay.cc b/src/trace_processor/db/column/arrangement_overlay.cc
index 17063c9..b7f3dfe 100644
--- a/src/trace_processor/db/column/arrangement_overlay.cc
+++ b/src/trace_processor/db/column/arrangement_overlay.cc
@@ -19,6 +19,7 @@
#include <algorithm>
#include <cstdint>
#include <memory>
+#include <unordered_set>
#include <utility>
#include <vector>
@@ -146,6 +147,24 @@
inner_->StableSort(start, end, direction);
}
+void ArrangementOverlay::ChainImpl::Distinct(Indices& indices) const {
+ PERFETTO_TP_TRACE(metatrace::Category::DB,
+ "ArrangementOverlay::ChainImpl::Distinct");
+ // TODO(mayzner): Utilize `does_arrangmeent_order_storage_`.
+ std::unordered_set<uint32_t> s;
+ indices.tokens.erase(
+ std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+ [this, &s](Indices::Token& idx) {
+ if (s.insert(idx.index).second) {
+ idx.index = (*arrangement_)[idx.index];
+ return false;
+ }
+ return true;
+ }),
+ indices.tokens.end());
+ inner_->Distinct(indices);
+}
+
void ArrangementOverlay::ChainImpl::Serialize(StorageProto* storage) const {
auto* arrangement_overlay = storage->set_arrangement_overlay();
arrangement_overlay->set_values(
diff --git a/src/trace_processor/db/column/arrangement_overlay.h b/src/trace_processor/db/column/arrangement_overlay.h
index 2bc61cb..3f9e4ca 100644
--- a/src/trace_processor/db/column/arrangement_overlay.h
+++ b/src/trace_processor/db/column/arrangement_overlay.h
@@ -72,6 +72,8 @@
SortToken* end,
SortDirection) const override;
+ void Distinct(Indices&) const override;
+
void Serialize(StorageProto*) const override;
uint32_t size() const override {
diff --git a/src/trace_processor/db/column/arrangement_overlay_unittest.cc b/src/trace_processor/db/column/arrangement_overlay_unittest.cc
index 3a26f0c..55f310b 100644
--- a/src/trace_processor/db/column/arrangement_overlay_unittest.cc
+++ b/src/trace_processor/db/column/arrangement_overlay_unittest.cc
@@ -109,8 +109,7 @@
TEST(ArrangementOverlay, OrderingSearch) {
std::vector<uint32_t> arrangement{0, 2, 4, 1, 3};
- auto fake = FakeStorageChain::SearchSubset(
- 5, BitVector({false, true, false, true, false}));
+ auto fake = FakeStorageChain::SearchSubset(5, BitVector({0, 1, 0, 1, 0}));
ArrangementOverlay storage(&arrangement, Indices::State::kNonmonotonic);
auto chain =
storage.MakeChain(std::move(fake), DataLayer::ChainCreationArgs(true));
@@ -141,5 +140,21 @@
ElementsAre(0, 3, 1, 4, 2));
}
+TEST(ArrangementOverlay, Distinct) {
+ std::vector<uint32_t> numeric_data{10, 20, 10, 20, 30};
+ NumericStorage<uint32_t> numeric(&numeric_data, ColumnType::kUint32, false);
+
+ // 30, 30, 10, 20, 10, 20
+ std::vector<uint32_t> arrangement{4, 4, 0, 1, 2, 3};
+ ArrangementOverlay storage(&arrangement, Indices::State::kNonmonotonic);
+ auto chain = storage.MakeChain(numeric.MakeChain());
+
+ // 30, 30, 20, 20
+ auto indices = Indices::CreateWithIndexPayloadForTesting(
+ {0, 1, 3, 3}, Indices::State::kNonmonotonic);
+ chain->Distinct(indices);
+ ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 2));
+}
+
} // namespace
} // namespace perfetto::trace_processor::column
diff --git a/src/trace_processor/db/column/data_layer.h b/src/trace_processor/db/column/data_layer.h
index c8d74a8..85e93b9 100644
--- a/src/trace_processor/db/column/data_layer.h
+++ b/src/trace_processor/db/column/data_layer.h
@@ -283,6 +283,14 @@
SortToken* end,
SortDirection direction) const = 0;
+ // Removes all indices pointing to values that are duplicates, as a result the
+ // indices will only point to distinct (not duplicated) values.
+ //
+ // Notes for implementors:
+ // * Each layer that might introduce duplicates is responsible for removing
+ // them.
+ virtual void Distinct(Indices&) const = 0;
+
// Serializes storage data to proto format.
virtual void Serialize(StorageProto*) const = 0;
diff --git a/src/trace_processor/db/column/dense_null_overlay.cc b/src/trace_processor/db/column/dense_null_overlay.cc
index 55003fa..50a5a7e 100644
--- a/src/trace_processor/db/column/dense_null_overlay.cc
+++ b/src/trace_processor/db/column/dense_null_overlay.cc
@@ -250,6 +250,35 @@
}
}
+void DenseNullOverlay::ChainImpl::Distinct(Indices& indices) const {
+ PERFETTO_TP_TRACE(metatrace::Category::DB,
+ "DenseNullOverlay::ChainImpl::Distinct");
+ // Find first NULL.
+ auto first_null_it = std::find_if(
+ indices.tokens.begin(), indices.tokens.end(),
+ [this](const Indices::Token& t) { return !non_null_->IsSet(t.index); });
+
+ // Save first NULL.
+ std::optional<Indices::Token> null_tok;
+ if (first_null_it != indices.tokens.end()) {
+ null_tok = *first_null_it;
+ }
+
+ // Erase all NULLs.
+ indices.tokens.erase(std::remove_if(first_null_it, indices.tokens.end(),
+ [this](const Indices::Token& idx) {
+ return !non_null_->IsSet(idx.index);
+ }),
+ indices.tokens.end());
+
+ inner_->Distinct(indices);
+
+ // Add the only null as it is distinct value.
+ if (null_tok.has_value()) {
+ indices.tokens.push_back(*null_tok);
+ }
+}
+
void DenseNullOverlay::ChainImpl::Serialize(StorageProto* storage) const {
auto* null_overlay = storage->set_dense_null_overlay();
non_null_->Serialize(null_overlay->set_bit_vector());
diff --git a/src/trace_processor/db/column/dense_null_overlay.h b/src/trace_processor/db/column/dense_null_overlay.h
index b27beb5..1edb84e 100644
--- a/src/trace_processor/db/column/dense_null_overlay.h
+++ b/src/trace_processor/db/column/dense_null_overlay.h
@@ -64,6 +64,8 @@
SortToken* end,
SortDirection) const override;
+ void Distinct(Indices&) const override;
+
void Serialize(StorageProto*) const override;
uint32_t size() const override { return non_null_->size(); }
diff --git a/src/trace_processor/db/column/dense_null_overlay_unittest.cc b/src/trace_processor/db/column/dense_null_overlay_unittest.cc
index efda157..24b1eee 100644
--- a/src/trace_processor/db/column/dense_null_overlay_unittest.cc
+++ b/src/trace_processor/db/column/dense_null_overlay_unittest.cc
@@ -35,6 +35,7 @@
using testing::ElementsAre;
using testing::IsEmpty;
+using testing::UnorderedElementsAre;
using Indices = DataLayerChain::Indices;
using OrderedIndices = DataLayerChain::OrderedIndices;
@@ -237,5 +238,22 @@
}
}
+TEST(DenseNullOverlay, Distinct) {
+ std::vector<uint32_t> numeric_data{0, 3, 0, 1, 0, 2, 4};
+ NumericStorage<uint32_t> numeric(&numeric_data, ColumnType::kUint32, false);
+
+ // NULL, 1, NULL, 1, 0, 2, 4
+ BitVector null{0, 1, 0, 1, 1, 1, 1};
+ DenseNullOverlay overlay(&null);
+ auto chain = overlay.MakeChain(numeric.MakeChain());
+
+ // NULL, 0, 1, 1
+ auto indices = Indices::CreateWithIndexPayloadForTesting(
+ {0, 1, 3, 3}, Indices::State::kNonmonotonic);
+ chain->Distinct(indices);
+ ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+ UnorderedElementsAre(0, 1, 2));
+}
+
} // namespace
} // namespace perfetto::trace_processor::column
diff --git a/src/trace_processor/db/column/dummy_storage.cc b/src/trace_processor/db/column/dummy_storage.cc
index 27b464f..70418e7 100644
--- a/src/trace_processor/db/column/dummy_storage.cc
+++ b/src/trace_processor/db/column/dummy_storage.cc
@@ -62,6 +62,10 @@
PERFETTO_FATAL("Shouldn't be called");
}
+void DummyStorage::ChainImpl::Distinct(Indices&) const {
+ PERFETTO_FATAL("Shouldn't be called");
+}
+
uint32_t DummyStorage::ChainImpl::size() const {
return 0;
}
diff --git a/src/trace_processor/db/column/dummy_storage.h b/src/trace_processor/db/column/dummy_storage.h
index 80674bb..08bd319 100644
--- a/src/trace_processor/db/column/dummy_storage.h
+++ b/src/trace_processor/db/column/dummy_storage.h
@@ -53,6 +53,8 @@
SortToken* end,
SortDirection) const override;
+ void Distinct(Indices&) const override;
+
void Serialize(StorageProto*) const override;
uint32_t size() const override;
diff --git a/src/trace_processor/db/column/fake_storage.cc b/src/trace_processor/db/column/fake_storage.cc
index babfb7c..8a7cb48 100644
--- a/src/trace_processor/db/column/fake_storage.cc
+++ b/src/trace_processor/db/column/fake_storage.cc
@@ -149,6 +149,12 @@
PERFETTO_FATAL("For GCC");
}
+void FakeStorageChain::Distinct(Indices&) const {
+ // Fake storage shouldn't implement Distinct as it's not a binary (this index
+ // passes or not) operation on a column.
+ PERFETTO_FATAL("Not implemented");
+}
+
void FakeStorageChain::StableSort(SortToken*, SortToken*, SortDirection) const {
PERFETTO_FATAL("Not implemented");
}
diff --git a/src/trace_processor/db/column/fake_storage.h b/src/trace_processor/db/column/fake_storage.h
index bc02300..50a981d 100644
--- a/src/trace_processor/db/column/fake_storage.h
+++ b/src/trace_processor/db/column/fake_storage.h
@@ -92,6 +92,8 @@
SortToken* end,
SortDirection) const override;
+ void Distinct(Indices&) const override;
+
void Serialize(StorageProto*) const override;
uint32_t size() const override { return size_; }
diff --git a/src/trace_processor/db/column/fake_storage_unittest.cc b/src/trace_processor/db/column/fake_storage_unittest.cc
index 0bc0211..908b7e0 100644
--- a/src/trace_processor/db/column/fake_storage_unittest.cc
+++ b/src/trace_processor/db/column/fake_storage_unittest.cc
@@ -119,6 +119,40 @@
}
}
+TEST(FakeStorage, Search) {
+ {
+ // All passes
+ auto fake = FakeStorageChain::SearchAll(5);
+ auto ret = fake->Search(FilterOp::kEq, SqlValue(), Range(1, 3));
+ ASSERT_THAT(utils::ToIndexVectorForTests(ret), ElementsAre(1, 2));
+ }
+ {
+ // None passes
+ auto fake = FakeStorageChain::SearchNone(5);
+ auto ret = fake->Search(FilterOp::kEq, SqlValue(), Range(1, 3));
+ ASSERT_THAT(utils::ToIndexVectorForTests(ret), ElementsAre());
+ }
+ {
+ // Index vector
+ auto fake =
+ FakeStorageChain::SearchSubset(5, std::vector<uint32_t>{1, 2, 4, 5});
+ auto ret = fake->Search(FilterOp::kEq, SqlValue(), Range(0, 3));
+ ASSERT_THAT(utils::ToIndexVectorForTests(ret), ElementsAre(1, 2));
+ }
+ {
+ // BitVector
+ auto fake = FakeStorageChain::SearchSubset(5, BitVector{0, 1, 0, 1, 0});
+ auto ret = fake->Search(FilterOp::kEq, SqlValue(), Range(1, 3));
+ ASSERT_THAT(utils::ToIndexVectorForTests(ret), ElementsAre(1));
+ }
+ {
+ // Range
+ auto fake = FakeStorageChain::SearchSubset(5, Range(2, 4));
+ auto ret = fake->Search(FilterOp::kEq, SqlValue(), Range(1, 3));
+ ASSERT_THAT(utils::ToIndexVectorForTests(ret), ElementsAre(2));
+ }
+}
+
TEST(FakeStorage, IndexSearchValidated) {
{
// All passes
diff --git a/src/trace_processor/db/column/id_storage.cc b/src/trace_processor/db/column/id_storage.cc
index e3f4d8a..1f1ee76 100644
--- a/src/trace_processor/db/column/id_storage.cc
+++ b/src/trace_processor/db/column/id_storage.cc
@@ -22,6 +22,7 @@
#include <iterator>
#include <limits>
#include <string>
+#include <unordered_set>
#include <utility>
#include "perfetto/base/logging.h"
@@ -313,6 +314,8 @@
void IdStorage::ChainImpl::StableSort(SortToken* start,
SortToken* end,
SortDirection direction) const {
+ PERFETTO_TP_TRACE(metatrace::Category::DB,
+ "IdStorage::ChainImpl::StableSort");
switch (direction) {
case SortDirection::kAscending:
std::stable_sort(start, end, [](const SortToken& a, const SortToken& b) {
@@ -328,6 +331,17 @@
PERFETTO_FATAL("For GCC");
}
+void IdStorage::ChainImpl::Distinct(Indices& indices) const {
+ PERFETTO_TP_TRACE(metatrace::Category::DB, "IdStorage::ChainImpl::Distinct");
+ std::unordered_set<uint32_t> s;
+ indices.tokens.erase(
+ std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+ [&s](const Indices::Token& idx) {
+ return !s.insert(idx.index).second;
+ }),
+ indices.tokens.end());
+}
+
void IdStorage::ChainImpl::Serialize(StorageProto* storage) const {
storage->set_id_storage();
}
diff --git a/src/trace_processor/db/column/id_storage.h b/src/trace_processor/db/column/id_storage.h
index 51b9f0d..b25f7f8 100644
--- a/src/trace_processor/db/column/id_storage.h
+++ b/src/trace_processor/db/column/id_storage.h
@@ -63,6 +63,8 @@
SortToken* end,
SortDirection) const override;
+ void Distinct(Indices&) const override;
+
void Serialize(StorageProto*) const override;
uint32_t size() const override {
diff --git a/src/trace_processor/db/column/id_storage_unittest.cc b/src/trace_processor/db/column/id_storage_unittest.cc
index 2d6fd44..3e00ff4 100644
--- a/src/trace_processor/db/column/id_storage_unittest.cc
+++ b/src/trace_processor/db/column/id_storage_unittest.cc
@@ -373,6 +373,16 @@
ElementsAre(4, 3, 2, 1, 0));
}
+TEST(IdStorage, Distinct) {
+ IdStorage storage;
+ auto chain = storage.MakeChain();
+
+ auto indices = Indices::CreateWithIndexPayloadForTesting(
+ {1, 1, 0, 3}, Indices::State::kNonmonotonic);
+ chain->Distinct(indices);
+ ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 2, 3));
+}
+
} // namespace
} // namespace column
} // namespace perfetto::trace_processor
diff --git a/src/trace_processor/db/column/null_overlay.cc b/src/trace_processor/db/column/null_overlay.cc
index b1f8f45..78b0ae8 100644
--- a/src/trace_processor/db/column/null_overlay.cc
+++ b/src/trace_processor/db/column/null_overlay.cc
@@ -20,6 +20,7 @@
#include <cstdint>
#include <iterator>
#include <memory>
+#include <optional>
#include <utility>
#include <vector>
@@ -104,9 +105,9 @@
PERFETTO_FATAL("For GCC");
}
-NullOverlay::ChainImpl::ChainImpl(std::unique_ptr<DataLayerChain> innner,
+NullOverlay::ChainImpl::ChainImpl(std::unique_ptr<DataLayerChain> inner,
const BitVector* non_null)
- : inner_(std::move(innner)), non_null_(non_null) {
+ : inner_(std::move(inner)), non_null_(non_null) {
PERFETTO_DCHECK(non_null_->CountSetBits() <= inner_->size());
}
@@ -279,6 +280,8 @@
void NullOverlay::ChainImpl::StableSort(SortToken* start,
SortToken* end,
SortDirection direction) const {
+ PERFETTO_TP_TRACE(metatrace::Category::DB,
+ "NullOverlay::ChainImpl::StableSort");
SortToken* middle = std::stable_partition(
start, end,
[this](const SortToken& idx) { return !non_null_->IsSet(idx.index); });
@@ -291,6 +294,40 @@
}
}
+void NullOverlay::ChainImpl::Distinct(Indices& indices) const {
+ PERFETTO_TP_TRACE(metatrace::Category::DB,
+ "NullOverlay::ChainImpl::Distinct");
+ // Find first NULL.
+ auto first_null_it = std::find_if(
+ indices.tokens.begin(), indices.tokens.end(),
+ [this](const Indices::Token& t) { return !non_null_->IsSet(t.index); });
+
+ // Save first NULL.
+ std::optional<Indices::Token> null_tok;
+ if (first_null_it != indices.tokens.end()) {
+ null_tok = *first_null_it;
+ }
+
+ // Erase all NULLs.
+ indices.tokens.erase(std::remove_if(first_null_it, indices.tokens.end(),
+ [this](const Indices::Token& idx) {
+ return !non_null_->IsSet(idx.index);
+ }),
+ indices.tokens.end());
+
+ // Update index of each token so they all point to inner.
+ for (auto& token : indices.tokens) {
+ token.index = non_null_->CountSetBits(token.index);
+ }
+
+ inner_->Distinct(indices);
+
+ // Add the only null as it is distinct value.
+ if (null_tok.has_value()) {
+ indices.tokens.push_back(*null_tok);
+ }
+}
+
void NullOverlay::ChainImpl::Serialize(StorageProto* storage) const {
auto* null_storage = storage->set_null_overlay();
non_null_->Serialize(null_storage->set_bit_vector());
diff --git a/src/trace_processor/db/column/null_overlay.h b/src/trace_processor/db/column/null_overlay.h
index 625959d..5f7d341 100644
--- a/src/trace_processor/db/column/null_overlay.h
+++ b/src/trace_processor/db/column/null_overlay.h
@@ -63,6 +63,8 @@
SortToken* end,
SortDirection) const override;
+ void Distinct(Indices&) const override;
+
void Serialize(StorageProto*) const override;
uint32_t size() const override { return non_null_->size(); }
diff --git a/src/trace_processor/db/column/null_overlay_unittest.cc b/src/trace_processor/db/column/null_overlay_unittest.cc
index 8f15746..981c7bd 100644
--- a/src/trace_processor/db/column/null_overlay_unittest.cc
+++ b/src/trace_processor/db/column/null_overlay_unittest.cc
@@ -35,6 +35,7 @@
using testing::ElementsAre;
using testing::IsEmpty;
+using testing::UnorderedElementsAre;
using Indices = DataLayerChain::Indices;
using OrderedIndices = DataLayerChain::OrderedIndices;
@@ -276,5 +277,22 @@
}
}
+TEST(NullOverlay, Distinct) {
+ std::vector<uint32_t> numeric_data{0, 0, 1, 1, 2};
+ NumericStorage<uint32_t> numeric(&numeric_data, ColumnType::kUint32, false);
+
+ // NULL, 0, NULL, 0, 1, 1, 2
+ BitVector null{0, 1, 0, 1, 1, 1, 1};
+ NullOverlay overlay(&null);
+ auto chain = overlay.MakeChain(numeric.MakeChain());
+
+ // NULL, 0, 1, 1, 2
+ auto indices = Indices::CreateWithIndexPayloadForTesting(
+ {0, 1, 4, 5, 6}, Indices::State::kNonmonotonic);
+ chain->Distinct(indices);
+ ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+ UnorderedElementsAre(0, 1, 2, 4));
+}
+
} // namespace
} // namespace perfetto::trace_processor::column
diff --git a/src/trace_processor/db/column/numeric_storage.h b/src/trace_processor/db/column/numeric_storage.h
index 821fd40..77fbd2e 100644
--- a/src/trace_processor/db/column/numeric_storage.h
+++ b/src/trace_processor/db/column/numeric_storage.h
@@ -19,13 +19,16 @@
#include <cstdint>
#include <limits>
#include <memory>
+#include <set>
#include <string>
+#include <unordered_set>
#include <variant>
#include <vector>
#include "perfetto/base/compiler.h"
#include "perfetto/trace_processor/basic_types.h"
#include "src/trace_processor/containers/bit_vector.h"
+#include "src/trace_processor/containers/row_map.h"
#include "src/trace_processor/db/column/data_layer.h"
#include "src/trace_processor/db/column/types.h"
#include "src/trace_processor/db/column/utils.h"
@@ -103,6 +106,16 @@
return utils::SingleSearchNumeric(op, (*vector_)[i], sql_val);
}
+ void Distinct(Indices& indices) const override {
+ std::unordered_set<T> s;
+ indices.tokens.erase(
+ std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+ [&s, this](const Indices::Token& idx) {
+ return !s.insert((*vector_)[idx.index]).second;
+ }),
+ indices.tokens.end());
+ }
+
void StableSort(SortToken* start,
SortToken* end,
SortDirection direction) const override {
diff --git a/src/trace_processor/db/column/numeric_storage_unittest.cc b/src/trace_processor/db/column/numeric_storage_unittest.cc
index 46ef920..e80c634 100644
--- a/src/trace_processor/db/column/numeric_storage_unittest.cc
+++ b/src/trace_processor/db/column/numeric_storage_unittest.cc
@@ -766,6 +766,19 @@
}
}
+TEST(NumericStorage, DistinctFromIndexVector) {
+ std::vector<int64_t> data{
+ 1, 100, 2, 100, 2,
+ };
+ NumericStorage<int64_t> storage(&data, ColumnType::kInt64, false);
+ auto chain = storage.MakeChain();
+
+ auto indices = Indices::CreateWithIndexPayloadForTesting(
+ {2, 1, 0, 3}, Indices::State::kNonmonotonic);
+ chain->Distinct(indices);
+ ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 1, 2));
+}
+
} // namespace
} // namespace column
} // namespace perfetto::trace_processor
diff --git a/src/trace_processor/db/column/range_overlay.cc b/src/trace_processor/db/column/range_overlay.cc
index 84f19ce..65ba7cb 100644
--- a/src/trace_processor/db/column/range_overlay.cc
+++ b/src/trace_processor/db/column/range_overlay.cc
@@ -143,6 +143,14 @@
inner_->StableSort(start, end, direction);
}
+void RangeOverlay::ChainImpl::Distinct(Indices& indices) const {
+ PERFETTO_TP_TRACE(metatrace::Category::DB, "RangeOverlay::Distinct");
+ for (auto& token : indices.tokens) {
+ token.index += range_->start;
+ }
+ inner_->Distinct(indices);
+}
+
void RangeOverlay::ChainImpl::Serialize(StorageProto*) const {
PERFETTO_FATAL("Not implemented");
}
diff --git a/src/trace_processor/db/column/range_overlay.h b/src/trace_processor/db/column/range_overlay.h
index 3adb319..128d94a 100644
--- a/src/trace_processor/db/column/range_overlay.h
+++ b/src/trace_processor/db/column/range_overlay.h
@@ -60,6 +60,8 @@
SortToken* end,
SortDirection) const override;
+ void Distinct(Indices&) const override;
+
void Serialize(StorageProto*) const override;
uint32_t size() const override { return range_->size(); }
diff --git a/src/trace_processor/db/column/range_overlay_unittest.cc b/src/trace_processor/db/column/range_overlay_unittest.cc
index 240e998..22ea1b0 100644
--- a/src/trace_processor/db/column/range_overlay_unittest.cc
+++ b/src/trace_processor/db/column/range_overlay_unittest.cc
@@ -126,5 +126,20 @@
ASSERT_THAT(utils::ExtractPayloadForTesting(tokens), ElementsAre(1, 2, 0));
}
+TEST(RangeOverlay, Distinct) {
+ std::vector<uint32_t> numeric_data{100, 99, 2, 0, 1};
+ NumericStorage<uint32_t> numeric(&numeric_data, ColumnType::kUint32, false);
+
+ // 99, 2, 0, 1
+ Range range(1, 4);
+ RangeOverlay storage(&range);
+ auto chain = storage.MakeChain(numeric.MakeChain());
+
+ auto indices = Indices::CreateWithIndexPayloadForTesting(
+ {0, 0, 0}, Indices::State::kNonmonotonic);
+ chain->Distinct(indices);
+ ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0));
+}
+
} // namespace
} // namespace perfetto::trace_processor::column
diff --git a/src/trace_processor/db/column/selector_overlay.cc b/src/trace_processor/db/column/selector_overlay.cc
index 999e4bb..0577ae4 100644
--- a/src/trace_processor/db/column/selector_overlay.cc
+++ b/src/trace_processor/db/column/selector_overlay.cc
@@ -123,6 +123,13 @@
inner_->StableSort(start, end, direction);
}
+void SelectorOverlay::ChainImpl::Distinct(Indices& indices) const {
+ for (auto& token : indices.tokens) {
+ token.index = selector_->IndexOfNthSet(token.index);
+ }
+ inner_->Distinct(indices);
+}
+
void SelectorOverlay::ChainImpl::Serialize(StorageProto* storage) const {
auto* selector_overlay = storage->set_selector_overlay();
inner_->Serialize(selector_overlay->set_storage());
diff --git a/src/trace_processor/db/column/selector_overlay.h b/src/trace_processor/db/column/selector_overlay.h
index 82e2fe3..84c3ec3 100644
--- a/src/trace_processor/db/column/selector_overlay.h
+++ b/src/trace_processor/db/column/selector_overlay.h
@@ -64,6 +64,8 @@
SortToken* end,
SortDirection) const override;
+ void Distinct(Indices&) const override;
+
void Serialize(StorageProto*) const override;
uint32_t size() const override { return selector_->size(); }
diff --git a/src/trace_processor/db/column/set_id_storage.cc b/src/trace_processor/db/column/set_id_storage.cc
index ae1c167..95825ac 100644
--- a/src/trace_processor/db/column/set_id_storage.cc
+++ b/src/trace_processor/db/column/set_id_storage.cc
@@ -22,7 +22,9 @@
#include <iterator>
#include <limits>
#include <memory>
+#include <set>
#include <string>
+#include <unordered_set>
#include <utility>
#include <vector>
@@ -252,6 +254,8 @@
FilterOp op,
SqlValue sql_val,
const OrderedIndices& indices) const {
+ PERFETTO_TP_TRACE(metatrace::Category::DB,
+ "SetIdStorage::ChainImpl::OrderedIndexSearch");
// OrderedIndices are monotonic non-contiguous values.
auto res = SearchValidated(
op, sql_val, Range(indices.data[0], indices.data[indices.size - 1] + 1));
@@ -303,6 +307,8 @@
void SetIdStorage::ChainImpl::StableSort(SortToken* start,
SortToken* end,
SortDirection direction) const {
+ PERFETTO_TP_TRACE(metatrace::Category::DB,
+ "SetIdStorage::ChainImpl::StableSort");
switch (direction) {
case SortDirection::kAscending:
std::stable_sort(start, end,
@@ -319,6 +325,18 @@
}
}
+void SetIdStorage::ChainImpl::Distinct(Indices& indices) const {
+ PERFETTO_TP_TRACE(metatrace::Category::DB,
+ "SetIdStorage::ChainImpl::Distinct");
+ std::unordered_set<uint32_t> s;
+ indices.tokens.erase(
+ std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+ [&s, this](const Indices::Token& idx) {
+ return !s.insert((*values_)[idx.index]).second;
+ }),
+ indices.tokens.end());
+}
+
void SetIdStorage::ChainImpl::Serialize(StorageProto* msg) const {
auto* vec_msg = msg->set_set_id_storage();
vec_msg->set_values(reinterpret_cast<const uint8_t*>(values_->data()),
diff --git a/src/trace_processor/db/column/set_id_storage.h b/src/trace_processor/db/column/set_id_storage.h
index e5bdb76..e408f5f 100644
--- a/src/trace_processor/db/column/set_id_storage.h
+++ b/src/trace_processor/db/column/set_id_storage.h
@@ -64,6 +64,8 @@
void Serialize(StorageProto*) const override;
+ void Distinct(Indices&) const override;
+
uint32_t size() const override {
return static_cast<uint32_t>(values_->size());
}
diff --git a/src/trace_processor/db/column/set_id_storage_unittest.cc b/src/trace_processor/db/column/set_id_storage_unittest.cc
index e3c40fa..2589a3f 100644
--- a/src/trace_processor/db/column/set_id_storage_unittest.cc
+++ b/src/trace_processor/db/column/set_id_storage_unittest.cc
@@ -417,6 +417,17 @@
}
}
+TEST(SetIdStorage, Distinct) {
+ std::vector<uint32_t> storage_data{0, 0, 0, 3, 3, 3, 6, 6, 6, 9, 9, 9};
+ SetIdStorage storage(&storage_data);
+ auto chain = storage.MakeChain();
+
+ auto indices = Indices::CreateWithIndexPayloadForTesting(
+ {10, 9, 0, 1, 4}, Indices::State::kNonmonotonic);
+ chain->Distinct(indices);
+ ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 2, 4));
+}
+
} // namespace
} // namespace column
} // namespace perfetto::trace_processor
diff --git a/src/trace_processor/db/column/string_storage.cc b/src/trace_processor/db/column/string_storage.cc
index 9ab8c1f..aa45218 100644
--- a/src/trace_processor/db/column/string_storage.cc
+++ b/src/trace_processor/db/column/string_storage.cc
@@ -20,9 +20,9 @@
#include <cstdint>
#include <functional>
#include <iterator>
-#include <memory>
#include <optional>
#include <string>
+#include <unordered_set>
#include <utility>
#include <vector>
@@ -522,6 +522,8 @@
FilterOp op,
SqlValue sql_val,
const OrderedIndices& indices) const {
+ PERFETTO_TP_TRACE(metatrace::Category::DB,
+ "StringStorage::ChainImpl::OrderedIndexSearch");
StringPool::Id val =
(op == FilterOp::kIsNull || op == FilterOp::kIsNotNull)
? StringPool::Id::Null()
@@ -619,6 +621,8 @@
void StringStorage::ChainImpl::StableSort(SortToken* start,
SortToken* end,
SortDirection direction) const {
+ PERFETTO_TP_TRACE(metatrace::Category::DB,
+ "StringStorage::ChainImpl::StableSort");
switch (direction) {
case SortDirection::kAscending: {
std::stable_sort(start, end,
@@ -669,6 +673,18 @@
PERFETTO_FATAL("For GCC");
}
+void StringStorage::ChainImpl::Distinct(Indices& indices) const {
+ PERFETTO_TP_TRACE(metatrace::Category::DB,
+ "StringStorage::ChainImpl::Distinct");
+ std::unordered_set<StringPool::Id> s;
+ indices.tokens.erase(
+ std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+ [&s, this](const Indices::Token& idx) {
+ return !s.insert((*data_)[idx.index]).second;
+ }),
+ indices.tokens.end());
+}
+
void StringStorage::ChainImpl::Serialize(StorageProto* msg) const {
auto* string_storage_msg = msg->set_string_storage();
string_storage_msg->set_is_sorted(is_sorted_);
diff --git a/src/trace_processor/db/column/string_storage.h b/src/trace_processor/db/column/string_storage.h
index f037bd1..870fd68 100644
--- a/src/trace_processor/db/column/string_storage.h
+++ b/src/trace_processor/db/column/string_storage.h
@@ -65,6 +65,8 @@
SortToken* end,
SortDirection direction) const override;
+ void Distinct(Indices&) const override;
+
void Serialize(StorageProto*) const override;
uint32_t size() const override {
diff --git a/src/trace_processor/db/column/string_storage_unittest.cc b/src/trace_processor/db/column/string_storage_unittest.cc
index 4e06185..4474ea9 100644
--- a/src/trace_processor/db/column/string_storage_unittest.cc
+++ b/src/trace_processor/db/column/string_storage_unittest.cc
@@ -503,5 +503,22 @@
}
}
+TEST(StringStorage, DistinctFromIndexVector) {
+ std::vector<std::string> strings{"cheese", "pasta", "cheese",
+ "fries", "pasta", "fries"};
+ std::vector<StringPool::Id> ids(2, StringPool::Id::Null());
+ StringPool pool;
+ for (const auto& string : strings) {
+ ids.push_back(pool.InternString(base::StringView(string)));
+ }
+ StringStorage storage(&pool, &ids);
+ auto chain = storage.MakeChain();
+
+ auto indices = Indices::CreateWithIndexPayloadForTesting(
+ {1, 0, 6, 3}, Indices::State::kNonmonotonic);
+ chain->Distinct(indices);
+ ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 2));
+}
+
} // namespace
} // namespace perfetto::trace_processor::column