blob: 6e642b118e33d7d11c56af0c97666c73b3c78d42 [file] [log] [blame]
/*
* Copyright (C) 2023 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/trace_processor/db/column/arrangement_overlay.h"
#include <algorithm>
#include <cstdint>
#include <vector>
#include "protos/perfetto/trace_processor/serialization.pbzero.h"
#include "src/trace_processor/containers/bit_vector.h"
#include "src/trace_processor/tp_metatrace.h"
namespace perfetto {
namespace trace_processor {
namespace column {
namespace {
using Range = RowMap::Range;
} // namespace
ArrangementOverlay::ArrangementOverlay(std::unique_ptr<Column> inner,
const std::vector<uint32_t>* arrangement)
: inner_(std::move(inner)), arrangement_(arrangement) {
PERFETTO_DCHECK(*std::max_element(arrangement->begin(), arrangement->end()) <=
inner_->size());
}
SearchValidationResult ArrangementOverlay::ValidateSearchConstraints(
SqlValue sql_val,
FilterOp op) const {
return inner_->ValidateSearchConstraints(sql_val, op);
}
RangeOrBitVector ArrangementOverlay::Search(FilterOp op,
SqlValue sql_val,
Range in) const {
PERFETTO_TP_TRACE(metatrace::Category::DB, "ArrangementOverlay::Search");
const auto& arrangement = *arrangement_;
PERFETTO_DCHECK(in.end <= arrangement.size());
const auto [min_i, max_i] =
std::minmax_element(arrangement.begin() + static_cast<int32_t>(in.start),
arrangement.begin() + static_cast<int32_t>(in.end));
auto storage_result = inner_->Search(op, sql_val, Range(*min_i, *max_i + 1));
BitVector::Builder builder(in.end, in.start);
if (storage_result.IsRange()) {
Range storage_range = std::move(storage_result).TakeIfRange();
for (uint32_t i = in.start; i < in.end; ++i) {
builder.Append(storage_range.Contains(arrangement[i]));
}
} else {
BitVector storage_bitvector = std::move(storage_result).TakeIfBitVector();
PERFETTO_DCHECK(storage_bitvector.size() == *max_i + 1);
// After benchmarking, it turns out this complexity *is* actually worthwhile
// and has a noticable impact on the performance of this function in real
// world tables.
// Fast path: we compare as many groups of 64 elements as we can.
// This should be very easy for the compiler to auto-vectorize.
const uint32_t* arrangement_idx = arrangement.data() + in.start;
uint32_t fast_path_elements = builder.BitsInCompleteWordsUntilFull();
for (uint32_t i = 0; i < fast_path_elements; i += BitVector::kBitsInWord) {
uint64_t word = 0;
// This part should be optimised by SIMD and is expected to be fast.
for (uint32_t k = 0; k < BitVector::kBitsInWord; ++k, ++arrangement_idx) {
bool comp_result = storage_bitvector.IsSet(*arrangement_idx);
word |= static_cast<uint64_t>(comp_result) << k;
}
builder.AppendWord(word);
}
// Slow path: we compare <64 elements and append to fill the Builder.
uint32_t back_elements = builder.BitsUntilFull();
for (uint32_t i = 0; i < back_elements; ++i, ++arrangement_idx) {
builder.Append(storage_bitvector.IsSet(*arrangement_idx));
}
}
return RangeOrBitVector(std::move(builder).Build());
}
RangeOrBitVector ArrangementOverlay::IndexSearch(FilterOp op,
SqlValue sql_val,
uint32_t* indices,
uint32_t indices_size,
bool sorted) const {
PERFETTO_TP_TRACE(metatrace::Category::DB, "ArrangementOverlay::IndexSearch");
std::vector<uint32_t> storage_iv;
for (uint32_t* it = indices; it != indices + indices_size; ++it) {
storage_iv.push_back((*arrangement_)[*it]);
}
return inner_->IndexSearch(op, sql_val, storage_iv.data(),
static_cast<uint32_t>(storage_iv.size()), sorted);
}
void ArrangementOverlay::StableSort(uint32_t*, uint32_t) const {
// TODO(b/307482437): Implement.
PERFETTO_FATAL("Not implemented");
}
void ArrangementOverlay::Sort(uint32_t*, uint32_t) const {
// TODO(b/307482437): Implement.
PERFETTO_FATAL("Not implemented");
}
void ArrangementOverlay::Serialize(StorageProto* storage) const {
auto* arrangement_overlay = storage->set_arrangement_overlay();
arrangement_overlay->set_values(
reinterpret_cast<const uint8_t*>(arrangement_->data()),
sizeof(uint32_t) * arrangement_->size());
inner_->Serialize(arrangement_overlay->set_storage());
}
} // namespace column
} // namespace trace_processor
} // namespace perfetto