blob: 9b9492f0c02f7eec928d9eed30bfa0adb7af9469 [file]
/*
* Copyright (C) 2025 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SRC_TRACE_PROCESSOR_UTIL_GALLOPING_SEARCH_H_
#define SRC_TRACE_PROCESSOR_UTIL_GALLOPING_SEARCH_H_
#include <cstdint>
namespace perfetto::trace_processor {
// Galloping (exponential) search optimized for sorted queries.
//
// When searching for multiple keys that are themselves sorted, galloping search
// exploits locality: each search starts from where the previous one ended,
// using exponential probing to quickly find the new range, then binary search
// within that range.
//
// Performance: O(log d) per query where d is the distance between consecutive
// keys. Much faster than repeated std::lower_bound for sorted query batches.
class GallopingSearch {
public:
GallopingSearch(const int64_t* data, uint32_t size) : data_(data), n_(size) {}
// Keys MUST be sorted in ascending order for this to work correctly.
// Results[i] will contain the lower_bound position for keys[i].
void BatchedLowerBound(const int64_t* keys,
uint32_t num_keys,
uint32_t* results) const {
if (n_ == 0) {
for (uint32_t i = 0; i < num_keys; ++i) {
results[i] = 0;
}
return;
}
uint32_t pos = LowerBound(0, n_, keys[0]);
results[0] = pos;
for (uint32_t i = 1; i < num_keys; ++i) {
pos = GallopForward(pos, keys[i]);
results[i] = pos;
}
}
private:
static constexpr uint32_t kLinearScanThreshold = 16;
uint32_t GallopForward(uint32_t pos, int64_t key) const {
if (pos >= n_ || data_[pos] >= key) {
return pos;
}
// Start at cache-line granularity (16 elements = 2 cache lines of int64_t)
// since nearby elements are already paged in when we access data[pos].
uint32_t step = kLinearScanThreshold;
uint32_t prev = pos;
while (pos + step < n_ && data_[pos + step] < key) {
prev = pos + step;
step *= 2;
}
uint32_t lo = prev + 1;
uint32_t hi = (pos + step + 1 > n_) ? n_ : pos + step + 1;
return LowerBound(lo, hi, key);
}
uint32_t LowerBound(uint32_t lo, uint32_t hi, int64_t key) const {
// Binary search until range is small enough for linear scan.
while (hi - lo > kLinearScanThreshold) {
uint32_t mid = lo + (hi - lo) / 2;
if (data_[mid] < key) {
lo = mid + 1;
} else {
hi = mid;
}
}
// Linear scan for small ranges.
while (lo < hi && data_[lo] < key) {
++lo;
}
return lo;
}
const int64_t* data_ = nullptr;
uint32_t n_ = 0;
};
} // namespace perfetto::trace_processor
#endif // SRC_TRACE_PROCESSOR_UTIL_GALLOPING_SEARCH_H_