Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2019 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
Ryan Savitski | dc21888 | 2020-11-16 18:02:02 +0000 | [diff] [blame] | 17 | #include "src/kallsyms/kernel_symbol_map.h" |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 18 | |
Primiano Tucci | ab293f5 | 2020-12-08 11:46:52 +0100 | [diff] [blame] | 19 | #include "perfetto/base/build_config.h" |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 20 | #include "perfetto/base/logging.h" |
Primiano Tucci | ab293f5 | 2020-12-08 11:46:52 +0100 | [diff] [blame] | 21 | #include "perfetto/ext/base/file_utils.h" |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 22 | #include "perfetto/ext/base/metatrace.h" |
| 23 | #include "perfetto/ext/base/paged_memory.h" |
| 24 | #include "perfetto/ext/base/scoped_file.h" |
| 25 | #include "perfetto/ext/base/string_view.h" |
| 26 | #include "perfetto/ext/base/utils.h" |
| 27 | #include "perfetto/protozero/proto_utils.h" |
| 28 | |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 29 | #include <stdio.h> |
| 30 | |
| 31 | #include <algorithm> |
Primiano Tucci | 58d2dc6 | 2021-06-24 16:03:24 +0100 | [diff] [blame] | 32 | #include <cinttypes> |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 33 | #include <functional> |
| 34 | #include <map> |
Daniele Di Proietto | 3c965d2 | 2021-10-01 17:17:13 +0100 | [diff] [blame] | 35 | #include <unordered_map> |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 36 | #include <utility> |
| 37 | |
| 38 | namespace perfetto { |
| 39 | |
| 40 | // On a Pixel 3 this gives an avg. lookup time of 600 ns and a memory usage |
| 41 | // of 1.1 MB for 65k symbols. See go/kallsyms-parser-bench. |
| 42 | size_t KernelSymbolMap::kSymIndexSampling = 16; |
| 43 | size_t KernelSymbolMap::kTokenIndexSampling = 4; |
| 44 | |
| 45 | namespace { |
| 46 | |
| 47 | using TokenId = KernelSymbolMap::TokenTable::TokenId; |
| 48 | constexpr size_t kSymNameMaxLen = 128; |
Primiano Tucci | a1221cb | 2020-10-08 11:00:42 +0200 | [diff] [blame] | 49 | constexpr size_t kSymMaxSizeBytes = 1024 * 1024; |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 50 | |
| 51 | // Reads a kallsyms file in blocks of 4 pages each and decode its lines using |
| 52 | // a simple FSM. Calls the passed lambda for each valid symbol. |
| 53 | // It skips undefined symbols and other useless stuff. |
| 54 | template <typename Lambda /* void(uint64_t, const char*) */> |
| 55 | void ForEachSym(const std::string& kallsyms_path, Lambda fn) { |
| 56 | base::ScopedFile fd = base::OpenFile(kallsyms_path.c_str(), O_RDONLY); |
| 57 | if (!fd) { |
| 58 | PERFETTO_PLOG("Cannot open %s", kallsyms_path.c_str()); |
| 59 | return; |
| 60 | } |
| 61 | |
| 62 | // /proc/kallsyms looks as follows: |
| 63 | // 0000000000026a80 A bpf_trace_sds |
| 64 | // |
| 65 | // ffffffffc03a6000 T cpufreq_gov_powersave_init<TAB> [cpufreq_powersave] |
| 66 | // ffffffffc035d000 T cpufreq_gov_userspace_init<TAB> [cpufreq_userspace] |
| 67 | // |
| 68 | // We parse it with a state machine that has four states, one for each column. |
| 69 | // We don't care about the part in the square brackets and ignore everything |
| 70 | // after the symbol name. |
| 71 | |
| 72 | static constexpr size_t kBufSize = 16 * 1024; |
| 73 | base::PagedMemory buffer = base::PagedMemory::Allocate(kBufSize); |
| 74 | enum { kSymAddr, kSymType, kSymName, kEatRestOfLine } state = kSymAddr; |
| 75 | uint64_t sym_addr = 0; |
| 76 | char sym_type = '\0'; |
| 77 | char sym_name[kSymNameMaxLen + 1]; |
| 78 | size_t sym_name_len = 0; |
| 79 | for (;;) { |
| 80 | char* buf = static_cast<char*>(buffer.Get()); |
Primiano Tucci | ab293f5 | 2020-12-08 11:46:52 +0100 | [diff] [blame] | 81 | auto rsize = base::Read(*fd, buf, kBufSize); |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 82 | if (rsize < 0) { |
| 83 | PERFETTO_PLOG("read(%s) failed", kallsyms_path.c_str()); |
| 84 | return; |
| 85 | } |
| 86 | if (rsize == 0) |
| 87 | return; // EOF |
| 88 | for (size_t i = 0; i < static_cast<size_t>(rsize); i++) { |
| 89 | char c = buf[i]; |
| 90 | const bool is_space = c == ' ' || c == '\t'; |
| 91 | switch (state) { |
| 92 | case kSymAddr: |
| 93 | if (c >= '0' && c <= '9') { |
| 94 | sym_addr = (sym_addr << 4) | static_cast<uint8_t>(c - '0'); |
| 95 | } else if (c >= 'a' && c <= 'f') { |
| 96 | sym_addr = (sym_addr << 4) | static_cast<uint8_t>(c - 'a' + 10); |
| 97 | } else if (is_space) { |
| 98 | state = kSymType; |
| 99 | } else if (c == '\0') { |
| 100 | return; |
| 101 | } else { |
| 102 | PERFETTO_ELOG("kallsyms parser error: chr 0x%x @ off=%zu", c, i); |
| 103 | return; |
| 104 | } |
| 105 | break; |
| 106 | |
| 107 | case kSymType: |
| 108 | if (is_space) |
| 109 | break; // Eat leading spaces. |
| 110 | sym_type = c; |
| 111 | state = kSymName; |
| 112 | sym_name_len = 0; |
| 113 | break; |
| 114 | |
| 115 | case kSymName: |
| 116 | if (is_space && sym_name_len == 0) |
| 117 | break; // Eat leading spaces. |
| 118 | if (c && c != '\n' && !is_space && sym_name_len < kSymNameMaxLen) { |
| 119 | sym_name[sym_name_len++] = c; |
| 120 | break; |
| 121 | } |
| 122 | sym_name[sym_name_len] = '\0'; |
| 123 | fn(sym_addr, sym_type, sym_name); |
| 124 | sym_addr = 0; |
| 125 | sym_type = '\0'; |
| 126 | state = c == '\n' ? kSymAddr : kEatRestOfLine; |
| 127 | break; |
| 128 | |
| 129 | case kEatRestOfLine: |
| 130 | if (c == '\n') |
| 131 | state = kSymAddr; |
| 132 | break; |
| 133 | } // switch(state) |
| 134 | } // for (char in buf) |
| 135 | } // for (read chunk) |
| 136 | } |
| 137 | |
| 138 | // Splits a symbol name into tokens using '_' as a separator, calling the passed |
| 139 | // lambda for each token. It splits tokens in a way that allows the original |
| 140 | // string to be rebuilt as-is by re-joining using a '_' between each token. |
| 141 | // For instance: |
| 142 | // _fo_a_b -> ["", fo, a, b] |
| 143 | // __fo_a_b -> [_, fo, a, b] |
| 144 | // __fo_a_b_ -> [_, fo, a, b, ""] |
| 145 | // __fo_a_b____ -> [_, fo, a, b, ___] |
| 146 | template <typename Lambda /* void(base::StringView) */> |
| 147 | void Tokenize(const char* name, Lambda fn) { |
| 148 | const char* tok_start = name; |
| 149 | bool is_start_of_token = true; |
| 150 | bool tok_is_sep = false; |
| 151 | for (const char* ptr = name;; ptr++) { |
| 152 | const char c = *ptr; |
| 153 | if (is_start_of_token) { |
| 154 | tok_is_sep = *tok_start == '_'; // Deals with tokens made of '_'s. |
| 155 | is_start_of_token = false; |
| 156 | } |
| 157 | // Scan until either the end of string or the next character (which is a '_' |
| 158 | // in nominal cases, or anything != '_' for tokens made by 1+ '_'). |
| 159 | if (c == '\0' || (!tok_is_sep && c == '_') || (tok_is_sep && c != '_')) { |
| 160 | size_t tok_len = static_cast<size_t>(ptr - tok_start); |
| 161 | if (tok_is_sep && c != '\0') |
| 162 | --tok_len; |
| 163 | fn(base::StringView(tok_start, tok_len)); |
| 164 | if (c == '\0') |
| 165 | return; |
| 166 | tok_start = tok_is_sep ? ptr : ptr + 1; |
| 167 | is_start_of_token = true; |
| 168 | } |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | } // namespace |
| 173 | |
| 174 | KernelSymbolMap::TokenTable::TokenTable() { |
| 175 | // Insert a null token as id 0. We can't just add "" because the empty string |
| 176 | // is special-cased and doesn't insert an actual token. So we push a string of |
| 177 | // size one that contains only the null character instead. |
| 178 | char null_tok = 0; |
| 179 | Add(std::string(&null_tok, 1)); |
| 180 | } |
| 181 | |
| 182 | KernelSymbolMap::TokenTable::~TokenTable() = default; |
| 183 | |
| 184 | // Adds a new token to the db. Does not dedupe identical token (with the |
| 185 | // exception of the empty string). The caller has to deal with that. |
| 186 | // Supports only ASCII characters in the range [1, 127]. |
| 187 | // The last character of the token will have the MSB set. |
| 188 | TokenId KernelSymbolMap::TokenTable::Add(const std::string& token) { |
| 189 | const size_t token_size = token.size(); |
| 190 | if (token_size == 0) |
| 191 | return 0; |
| 192 | TokenId id = num_tokens_++; |
| 193 | |
| 194 | const size_t buf_size_before_insertion = buf_.size(); |
| 195 | if (id % kTokenIndexSampling == 0) |
| 196 | index_.emplace_back(buf_size_before_insertion); |
| 197 | |
| 198 | const size_t prev_size = buf_.size(); |
| 199 | buf_.resize(prev_size + token_size); |
| 200 | char* tok_wptr = &buf_[prev_size]; |
| 201 | for (size_t i = 0; i < token_size - 1; i++) { |
| 202 | PERFETTO_DCHECK((token.at(i) & 0x80) == 0); // |token| must be ASCII only. |
| 203 | *(tok_wptr++) = token.at(i) & 0x7f; |
| 204 | } |
Primiano Tucci | e20db98 | 2020-09-23 10:05:25 +0200 | [diff] [blame] | 205 | *(tok_wptr++) = static_cast<char>(token.at(token_size - 1) | 0x80); |
Primiano Tucci | c0ec36e | 2021-05-17 16:22:25 +0100 | [diff] [blame] | 206 | PERFETTO_DCHECK(tok_wptr == buf_.data() + buf_.size()); |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 207 | return id; |
| 208 | } |
| 209 | |
| 210 | // NOTE: the caller need to mask the returned chars with 0x7f. The last char of |
| 211 | // the StringView will have its MSB set (it's used as a EOF char internally). |
| 212 | base::StringView KernelSymbolMap::TokenTable::Lookup(TokenId id) { |
| 213 | if (id == 0) |
| 214 | return base::StringView(); |
| 215 | if (id > num_tokens_) |
| 216 | return base::StringView("<error>"); |
| 217 | // We don't know precisely where the id-th token starts in the buffer. We |
| 218 | // store only one position every kTokenIndexSampling. From there, the token |
| 219 | // can be found with a linear scan of at most kTokenIndexSampling steps. |
| 220 | size_t index_off = id / kTokenIndexSampling; |
| 221 | PERFETTO_DCHECK(index_off < index_.size()); |
| 222 | TokenId cur_id = static_cast<TokenId>(index_off * kTokenIndexSampling); |
| 223 | uint32_t begin = index_[index_off]; |
| 224 | PERFETTO_DCHECK(begin == 0 || buf_[begin - 1] & 0x80); |
| 225 | const size_t buf_size = buf_.size(); |
| 226 | for (uint32_t off = begin; off < buf_size; ++off) { |
| 227 | // Advance |off| until the end of the token (which has the MSB set). |
| 228 | if ((buf_[off] & 0x80) == 0) |
| 229 | continue; |
| 230 | if (cur_id == id) |
| 231 | return base::StringView(&buf_[begin], off - begin + 1); |
| 232 | ++cur_id; |
| 233 | begin = off + 1; |
| 234 | } |
| 235 | return base::StringView(); |
| 236 | } |
| 237 | |
| 238 | size_t KernelSymbolMap::Parse(const std::string& kallsyms_path) { |
Ryan Savitski | dc21888 | 2020-11-16 18:02:02 +0000 | [diff] [blame] | 239 | PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, KALLSYMS_PARSE); |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 240 | using SymAddr = uint64_t; |
| 241 | |
| 242 | struct TokenInfo { |
| 243 | uint32_t count = 0; |
| 244 | TokenId id = 0; |
| 245 | }; |
| 246 | |
Daniele Di Proietto | 3c965d2 | 2021-10-01 17:17:13 +0100 | [diff] [blame] | 247 | // Note if changing the container: the code below doesn't rely on stable |
| 248 | // iterators, but relies on stable pointers. |
| 249 | using TokenMap = std::unordered_map<std::string, TokenInfo>; |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 250 | using TokenMapPtr = TokenMap::value_type*; |
| 251 | TokenMap tokens; |
| 252 | |
| 253 | // Keep the (ordered) list of tokens for each symbol. |
Primiano Tucci | a1221cb | 2020-10-08 11:00:42 +0200 | [diff] [blame] | 254 | struct SymAddrAndTokenPtr { |
| 255 | SymAddr addr; |
| 256 | TokenMapPtr token_map_entry; |
| 257 | |
| 258 | bool operator<(const SymAddrAndTokenPtr& other) const { |
| 259 | return addr < other.addr; |
| 260 | } |
| 261 | }; |
| 262 | std::vector<SymAddrAndTokenPtr> symbol_tokens; |
| 263 | |
| 264 | // Based on `cat /proc/kallsyms | egrep "\b[tT]\b" | wc -l`. |
| 265 | symbol_tokens.reserve(128 * 1024); |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 266 | |
| 267 | ForEachSym(kallsyms_path, [&](SymAddr addr, char type, const char* name) { |
| 268 | if (addr == 0 || (type != 't' && type != 'T') || name[0] == '$') { |
| 269 | return; |
| 270 | } |
| 271 | |
| 272 | // Split each symbol name in tokens, using '_' as a separator (so that |
| 273 | // "foo_bar" -> ["foo", "bar"]). For each token hash: |
| 274 | // 1. Keep track of the frequency of each token. |
| 275 | // 2. Keep track of the list of token hashes for each symbol. |
Primiano Tucci | a1221cb | 2020-10-08 11:00:42 +0200 | [diff] [blame] | 276 | Tokenize(name, [&tokens, &symbol_tokens, addr](base::StringView token) { |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 277 | // Strip the .cfi part if present. |
| 278 | if (token.substr(token.size() - 4) == ".cfi") |
| 279 | token = token.substr(0, token.size() - 4); |
| 280 | auto it_and_ins = tokens.emplace(token.ToStdString(), TokenInfo{}); |
| 281 | it_and_ins.first->second.count++; |
Primiano Tucci | a1221cb | 2020-10-08 11:00:42 +0200 | [diff] [blame] | 282 | symbol_tokens.emplace_back(SymAddrAndTokenPtr{addr, &*it_and_ins.first}); |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 283 | }); |
| 284 | }); |
| 285 | |
Primiano Tucci | a1221cb | 2020-10-08 11:00:42 +0200 | [diff] [blame] | 286 | symbol_tokens.shrink_to_fit(); |
| 287 | |
| 288 | // For each symbol address, T entries are inserted into |symbol_tokens|, one |
| 289 | // for each token. These symbols are added in arbitrary address (as seen in |
| 290 | // /proc/kallsyms). Here we want to sort symbols by addresses, but at the same |
| 291 | // time preserve the order of tokens within each address. |
| 292 | // For instance, if kallsyms has: {0x41: connect_socket, 0x42: write_file}: |
| 293 | // Before sort: [(0x42, write), (0x42, file), (0x41, connect), (0x41, socket)] |
| 294 | // After sort: [(0x41, connect), (0x41, socket), (0x42, write), (0x42, file)] |
| 295 | std::stable_sort(symbol_tokens.begin(), symbol_tokens.end()); |
| 296 | |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 297 | // At this point we have broken down each symbol into a set of token hashes. |
| 298 | // Now generate the token ids, putting high freq tokens first, so they use |
| 299 | // only one byte to varint encode. |
| 300 | |
| 301 | // This block limits the lifetime of |tokens_by_freq|. |
| 302 | { |
| 303 | std::vector<TokenMapPtr> tokens_by_freq; |
| 304 | tokens_by_freq.resize(tokens.size()); |
| 305 | size_t tok_idx = 0; |
| 306 | for (auto& kv : tokens) |
| 307 | tokens_by_freq[tok_idx++] = &kv; |
| 308 | |
| 309 | auto comparer = [](TokenMapPtr a, TokenMapPtr b) { |
| 310 | PERFETTO_DCHECK(a && b); |
| 311 | return b->second.count < a->second.count; |
| 312 | }; |
| 313 | std::sort(tokens_by_freq.begin(), tokens_by_freq.end(), comparer); |
| 314 | for (TokenMapPtr tinfo : tokens_by_freq) { |
| 315 | tinfo->second.id = tokens_.Add(tinfo->first); |
| 316 | } |
| 317 | } |
| 318 | tokens_.shrink_to_fit(); |
| 319 | |
| 320 | buf_.resize(2 * 1024 * 1024); // Based on real-word observations. |
Primiano Tucci | a1221cb | 2020-10-08 11:00:42 +0200 | [diff] [blame] | 321 | base_addr_ = symbol_tokens.empty() ? 0 : symbol_tokens.begin()->addr; |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 322 | SymAddr prev_sym_addr = base_addr_; |
| 323 | uint8_t* wptr = buf_.data(); |
| 324 | |
Primiano Tucci | a1221cb | 2020-10-08 11:00:42 +0200 | [diff] [blame] | 325 | for (auto it = symbol_tokens.begin(); it != symbol_tokens.end();) { |
| 326 | const SymAddr sym_addr = it->addr; |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 327 | |
| 328 | // Find the iterator to the first token of the next symbol (or the end). |
| 329 | auto sym_start = it; |
| 330 | auto sym_end = it; |
Primiano Tucci | a1221cb | 2020-10-08 11:00:42 +0200 | [diff] [blame] | 331 | while (sym_end != symbol_tokens.end() && sym_end->addr == sym_addr) |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 332 | ++sym_end; |
| 333 | |
| 334 | // The range [sym_start, sym_end) has all the tokens for the current symbol. |
| 335 | uint32_t size_before = static_cast<uint32_t>(wptr - buf_.data()); |
| 336 | |
| 337 | // Make sure there is enough headroom to write the symbol. |
| 338 | if (buf_.size() - size_before < 1024) { |
| 339 | buf_.resize(buf_.size() + 32768); |
| 340 | wptr = buf_.data() + size_before; |
| 341 | } |
| 342 | |
| 343 | uint32_t sym_rel_addr = static_cast<uint32_t>(sym_addr - base_addr_); |
| 344 | const size_t sym_num = num_syms_++; |
| 345 | if (sym_num % kSymIndexSampling == 0) |
| 346 | index_.emplace_back(std::make_pair(sym_rel_addr, size_before)); |
| 347 | PERFETTO_DCHECK(sym_addr >= prev_sym_addr); |
| 348 | uint32_t delta = static_cast<uint32_t>(sym_addr - prev_sym_addr); |
| 349 | wptr = protozero::proto_utils::WriteVarInt(delta, wptr); |
| 350 | // Append all the token ids. |
| 351 | for (it = sym_start; it != sym_end;) { |
Primiano Tucci | a1221cb | 2020-10-08 11:00:42 +0200 | [diff] [blame] | 352 | PERFETTO_DCHECK(it->addr == sym_addr); |
| 353 | TokenMapPtr const token_map_entry = it->token_map_entry; |
| 354 | const TokenInfo& token_info = token_map_entry->second; |
| 355 | TokenId token_id = token_info.id << 1; |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 356 | ++it; |
| 357 | token_id |= (it == sym_end) ? 1 : 0; // Last one has LSB set to 1. |
| 358 | wptr = protozero::proto_utils::WriteVarInt(token_id, wptr); |
| 359 | } |
| 360 | prev_sym_addr = sym_addr; |
| 361 | } // for (symbols) |
| 362 | |
| 363 | buf_.resize(static_cast<size_t>(wptr - buf_.data())); |
| 364 | buf_.shrink_to_fit(); |
Primiano Tucci | a1221cb | 2020-10-08 11:00:42 +0200 | [diff] [blame] | 365 | base::MaybeReleaseAllocatorMemToOS(); // For Scudo, b/170217718. |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 366 | |
Primiano Tucci | f44e2a1 | 2020-10-22 16:15:31 +0200 | [diff] [blame] | 367 | if (num_syms_ == 0) { |
| 368 | PERFETTO_ELOG( |
| 369 | "Failed to parse kallsyms. Kernel functions will not be symbolized. On " |
| 370 | "Linux this requires either running traced_probes as root or manually " |
| 371 | "lowering /proc/sys/kernel/kptr_restrict"); |
| 372 | } else { |
| 373 | PERFETTO_DLOG( |
| 374 | "Loaded %zu kalllsyms entries. Mem usage: %zu B (addresses) + %zu B " |
| 375 | "(tokens), total: %zu B", |
| 376 | num_syms_, addr_bytes(), tokens_.size_bytes(), size_bytes()); |
| 377 | } |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 378 | |
| 379 | return num_syms_; |
| 380 | } |
| 381 | |
| 382 | std::string KernelSymbolMap::Lookup(uint64_t sym_addr) { |
| 383 | if (index_.empty() || sym_addr < base_addr_) |
| 384 | return ""; |
| 385 | |
| 386 | // First find the highest symbol address <= sym_addr. |
| 387 | // Start with a binary search using the sparse index. |
| 388 | |
| 389 | const uint32_t sym_rel_addr = static_cast<uint32_t>(sym_addr - base_addr_); |
| 390 | auto it = std::upper_bound(index_.cbegin(), index_.cend(), |
| 391 | std::make_pair(sym_rel_addr, 0u)); |
| 392 | if (it != index_.cbegin()) |
| 393 | --it; |
| 394 | |
| 395 | // Then continue with a linear scan (of at most kSymIndexSampling steps). |
| 396 | uint32_t addr = it->first; |
| 397 | uint32_t off = it->second; |
| 398 | const uint8_t* rdptr = &buf_[off]; |
Primiano Tucci | c0ec36e | 2021-05-17 16:22:25 +0100 | [diff] [blame] | 399 | const uint8_t* const buf_end = buf_.data() + buf_.size(); |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 400 | bool parsing_addr = true; |
| 401 | const uint8_t* next_rdptr = nullptr; |
Primiano Tucci | a1221cb | 2020-10-08 11:00:42 +0200 | [diff] [blame] | 402 | uint64_t sym_start_addr = 0; |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 403 | for (bool is_first_addr = true;; is_first_addr = false) { |
| 404 | uint64_t v = 0; |
| 405 | const auto* prev_rdptr = rdptr; |
| 406 | rdptr = protozero::proto_utils::ParseVarInt(rdptr, buf_end, &v); |
| 407 | if (rdptr == prev_rdptr) |
| 408 | break; |
| 409 | if (parsing_addr) { |
| 410 | addr += is_first_addr ? 0 : static_cast<uint32_t>(v); |
| 411 | parsing_addr = false; |
| 412 | if (addr > sym_rel_addr) |
| 413 | break; |
| 414 | next_rdptr = rdptr; |
Primiano Tucci | a1221cb | 2020-10-08 11:00:42 +0200 | [diff] [blame] | 415 | sym_start_addr = addr; |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 416 | } else { |
| 417 | // This is a token. Wait for the EOF maker. |
| 418 | parsing_addr = (v & 1) == 1; |
| 419 | } |
| 420 | } |
| 421 | |
| 422 | if (!next_rdptr) |
| 423 | return ""; |
| 424 | |
Primiano Tucci | a1221cb | 2020-10-08 11:00:42 +0200 | [diff] [blame] | 425 | PERFETTO_DCHECK(sym_rel_addr >= sym_start_addr); |
| 426 | |
| 427 | // If this address is too far from the start of the symbol, this is likely |
| 428 | // a pointer to something else (e.g. some vmalloc struct) and we just picked |
| 429 | // the very last symbol for a loader region. |
| 430 | if (sym_rel_addr - sym_start_addr > kSymMaxSizeBytes) |
| 431 | return ""; |
| 432 | |
Primiano Tucci | 0269116 | 2020-01-21 13:30:13 +0000 | [diff] [blame] | 433 | // The address has been found. Now rejoin the tokens to form the symbol name. |
| 434 | |
| 435 | rdptr = next_rdptr; |
| 436 | std::string sym_name; |
| 437 | sym_name.reserve(kSymNameMaxLen); |
| 438 | for (bool eof = false, is_first_token = true; !eof; is_first_token = false) { |
| 439 | uint64_t v = 0; |
| 440 | const auto* old = rdptr; |
| 441 | rdptr = protozero::proto_utils::ParseVarInt(rdptr, buf_end, &v); |
| 442 | if (rdptr == old) |
| 443 | break; |
| 444 | eof = v & 1; |
| 445 | base::StringView token = tokens_.Lookup(static_cast<TokenId>(v >> 1)); |
| 446 | if (!is_first_token) |
| 447 | sym_name.push_back('_'); |
| 448 | for (size_t i = 0; i < token.size(); i++) |
| 449 | sym_name.push_back(token.at(i) & 0x7f); |
| 450 | } |
| 451 | return sym_name; |
| 452 | } |
| 453 | |
| 454 | } // namespace perfetto |