blob: b20f240c6fa45e17256e9369f7a40f6647f62b11 [file]
/*
* Copyright (C) 2025 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SRC_TRACE_PROCESSOR_DATAFRAME_IMPL_QUERY_PLAN_H_
#define SRC_TRACE_PROCESSOR_DATAFRAME_IMPL_QUERY_PLAN_H_
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <type_traits>
#include <utility>
#include <variant>
#include <vector>
#include "perfetto/base/logging.h"
#include "perfetto/base/status.h"
#include "perfetto/ext/base/base64.h"
#include "perfetto/ext/base/status_or.h"
#include "perfetto/ext/base/string_view.h"
#include "perfetto/public/compiler.h"
#include "src/trace_processor/dataframe/impl/bytecode_core.h"
#include "src/trace_processor/dataframe/impl/bytecode_instructions.h"
#include "src/trace_processor/dataframe/impl/bytecode_registers.h"
#include "src/trace_processor/dataframe/impl/slab.h"
#include "src/trace_processor/dataframe/impl/types.h"
#include "src/trace_processor/dataframe/specs.h"
#include "src/trace_processor/util/status_macros.h"
namespace perfetto::trace_processor::dataframe::impl {
static constexpr uint32_t kMaxColumns = 64;
// A QueryPlan encapsulates all the information needed to execute a query,
// including the bytecode instructions and interpreter configuration.
struct QueryPlan {
// Contains various parameters required for execution of this query plan.
struct ExecutionParams {
// The maximum number of rows it's possible for this query plan to return.
uint32_t max_row_count = 0;
// The number of rows this query plan estimates it will return.
uint32_t estimated_row_count = 0;
// An estimate for the cost of executing the query plan.
double estimated_cost = 0;
// Number of filter values used by this query.
uint32_t filter_value_count = 0;
// Register holding the final filtered indices.
bytecode::reg::ReadHandle<Span<uint32_t>> output_register;
// Maps column indices to output offsets.
std::array<uint32_t, kMaxColumns> col_to_output_offset;
// Number of output indices per row.
uint32_t output_per_row = 0;
};
static_assert(std::is_trivially_copyable_v<ExecutionParams>);
static_assert(std::is_trivially_destructible_v<ExecutionParams>);
// Serializes the query plan to a Base64-encoded string.
// This allows plans to be stored or transmitted between processes.
std::string Serialize() const {
size_t size = sizeof(size_t) +
(bytecode.size() * sizeof(bytecode::Bytecode)) +
sizeof(params);
std::string res(size, '\0');
char* p = res.data();
{
size_t bytecode_size = bytecode.size();
memcpy(p, &bytecode_size, sizeof(bytecode_size));
p += sizeof(bytecode_size);
}
{
memcpy(p, bytecode.data(), bytecode.size() * sizeof(bytecode::Bytecode));
p += bytecode.size() * sizeof(bytecode::Bytecode);
}
{
memcpy(p, &params, sizeof(params));
p += sizeof(params);
}
PERFETTO_CHECK(p == res.data() + res.size());
return base::Base64Encode(base::StringView(res));
}
// Deserializes a query plan from a Base64-encoded string.
// Returns the reconstructed QueryPlan.
static QueryPlan Deserialize(std::string_view serialized) {
QueryPlan res;
std::optional<std::string> raw_data = base::Base64Decode(
base::StringView(serialized.data(), serialized.size()));
PERFETTO_CHECK(raw_data);
const char* p = raw_data->data();
size_t bytecode_size;
{
memcpy(&bytecode_size, p, sizeof(bytecode_size));
p += sizeof(bytecode_size);
}
{
for (uint32_t i = 0; i < bytecode_size; ++i) {
res.bytecode.emplace_back();
}
memcpy(res.bytecode.data(), p,
bytecode_size * sizeof(bytecode::Bytecode));
p += bytecode_size * sizeof(bytecode::Bytecode);
}
{
memcpy(&res.params, p, sizeof(res.params));
p += sizeof(res.params);
}
PERFETTO_CHECK(p == raw_data->data() + raw_data->size());
return res;
}
ExecutionParams params;
bytecode::BytecodeVector bytecode;
};
// Builder class for creating query plans.
//
// QueryPlans contain the bytecode instructions and interpreter configuration
// needed to execute a query.
class QueryPlanBuilder {
public:
static base::StatusOr<QueryPlan> Build(
uint32_t row_count,
const std::vector<std::shared_ptr<Column>>& columns,
std::vector<FilterSpec>& specs,
const std::vector<DistinctSpec>& distinct,
const std::vector<SortSpec>& sort_specs,
const LimitSpec& limit_spec,
uint64_t cols_used) {
QueryPlanBuilder builder(row_count, columns);
RETURN_IF_ERROR(builder.Filter(specs));
builder.Distinct(distinct);
if (builder.CanUseMinMaxOptimization(sort_specs, limit_spec)) {
builder.MinMax(sort_specs[0]);
builder.Output({}, cols_used);
} else {
builder.Sort(sort_specs);
builder.Output(limit_spec, cols_used);
}
return std::move(builder).Build();
}
private:
// Represents register types for holding indices.
using IndicesReg = std::variant<bytecode::reg::RwHandle<Range>,
bytecode::reg::RwHandle<Span<uint32_t>>>;
// Indicates that the bytecode does not change the estimated or maximum number
// of rows.
struct UnchangedRowCount {};
// Indicates that the bytecode reduces the estimated number of rows by 2.
struct Div2RowCount {};
// Indicates that the bytecode reduces the estimated number of rows by 2 *
// log(row_count).
struct DoubleLog2RowCount {};
// Indicates that the bytecode produces *exactly* one row and the estimated
// and maximum should be set to 1.
struct OneRowCount {};
// Indicates that the bytecode produces *exactly* zero rows and the estimated
// and maximum should be set to 0.
struct ZeroRowCount {};
// Indicates that the bytecode produces `limit` rows starting at `offset`.
struct LimitOffsetRowCount {
uint32_t limit;
uint32_t offset;
};
using RowCountModifier = std::variant<UnchangedRowCount,
Div2RowCount,
DoubleLog2RowCount,
OneRowCount,
ZeroRowCount,
LimitOffsetRowCount>;
// State information for a column during query planning.
struct ColumnState {
std::optional<bytecode::reg::RwHandle<Slab<uint32_t>>> prefix_popcount;
};
// Constructs a builder for the given number of rows and columns.
QueryPlanBuilder(uint32_t row_count,
const std::vector<std::shared_ptr<Column>>& columns);
// Adds filter operations to the query plan based on filter specifications.
// Optimizes the order of filters for efficiency.
base::Status Filter(std::vector<FilterSpec>& specs);
// Adds distinct operations to the query plan based on distinct
// specifications. Distinct are applied after filters, in reverse order of
// specification.
void Distinct(const std::vector<DistinctSpec>& distinct_specs);
// Adds min/max operations to the query plan given a single column which
// should be sorted on.
void MinMax(const SortSpec& spec);
// Adds sort operations to the query plan based on sort specifications.
// Sorts are applied after filters and disinct.
void Sort(const std::vector<SortSpec>& sort_specs);
// Configures output handling for the filtered rows.
// |cols_used_bitmap| is a bitmap with bits set for columns that will be
// accessed.
void Output(const LimitSpec&, uint64_t cols_used_bitmap);
// Finalizes and returns the built query plan.
QueryPlan Build() &&;
// Processes non-string filter constraints.
void NonStringConstraint(
const FilterSpec& c,
const NonStringType& type,
const NonStringOp& op,
const bytecode::reg::ReadHandle<CastFilterValueResult>& result);
// Processes string filter constraints.
base::Status StringConstraint(
const FilterSpec& c,
const StringOp& op,
const bytecode::reg::ReadHandle<CastFilterValueResult>& result);
// Processes null filter constraints.
void NullConstraint(const NullOp&, FilterSpec&);
// Attempts to apply optimized filtering on sorted data.
// Returns true if the optimization was applied.
bool TrySortedConstraint(
const FilterSpec& fs,
const StorageType& ct,
const NonNullOp& op,
const bytecode::reg::RwHandle<CastFilterValueResult>& result);
// Adds overlay translation for handling special column properties like
// nullability.
bytecode::reg::RwHandle<Span<uint32_t>> MaybeAddOverlayTranslation(
const FilterSpec& c);
// Ensures indices are stored in a Slab, converting from Range if necessary.
PERFETTO_NO_INLINE bytecode::reg::RwHandle<Span<uint32_t>>
EnsureIndicesAreInSlab();
// Adds a new bytecode instruction of type T to the plan.
template <typename T>
T& AddOpcode(RowCountModifier rc) {
return AddOpcode<T>(bytecode::Index<T>(), rc, T::kCost);
}
// Adds a new bytecode instruction of type T with the given option value.
template <typename T>
T& AddOpcode(uint32_t option, RowCountModifier rc) {
return static_cast<T&>(AddRawOpcode(option, rc, T::kCost));
}
// Adds a new bytecode instruction of type T with the given option value.
template <typename T>
T& AddOpcode(uint32_t option, RowCountModifier rc, bytecode::Cost cost) {
return static_cast<T&>(AddRawOpcode(option, rc, cost));
}
PERFETTO_NO_INLINE bytecode::Bytecode& AddRawOpcode(uint32_t option,
RowCountModifier rc,
bytecode::Cost cost);
// Sets the result to an empty set. Use when a filter guarantees no matches.
void SetGuaranteedToBeEmpty();
// Returns the prefix popcount register for the given column.
bytecode::reg::ReadHandle<Slab<uint32_t>> PrefixPopcountRegisterFor(
uint32_t col);
bool CanUseMinMaxOptimization(const std::vector<SortSpec>&, const LimitSpec&);
const Column& GetColumn(uint32_t idx) { return *columns_[idx]; }
// Reference to the columns being queried.
const std::vector<std::shared_ptr<Column>>& columns_;
// The query plan being built.
QueryPlan plan_;
// State information for each column during planning.
std::vector<ColumnState> column_states_;
// Number of registers allocated so far.
uint32_t register_count_ = 0;
// Current register holding the set of matching indices.
IndicesReg indices_reg_;
};
} // namespace perfetto::trace_processor::dataframe::impl
#endif // SRC_TRACE_PROCESSOR_DATAFRAME_IMPL_QUERY_PLAN_H_