blob: d8de2dc6aea19c22afd5846c3e567a2c433f406b [file] [log] [blame]
Ioannis Ilkosbd4ee3f2019-06-21 17:40:09 +01001/*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Ryan7ad6b7d2021-04-22 17:03:54 +010017#include "perfetto/profiling/pprof_builder.h"
18
Primiano Tucci58d2dc62021-06-24 16:03:24 +010019#include "perfetto/base/build_config.h"
20
Primiano Tuccib730b112020-12-01 14:56:11 +010021#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
Ioannis Ilkosbd4ee3f2019-06-21 17:40:09 +010022#include <cxxabi.h>
Primiano Tuccib730b112020-12-01 14:56:11 +010023#endif
24
Ioannis Ilkosbd4ee3f2019-06-21 17:40:09 +010025#include <algorithm>
Primiano Tucci58d2dc62021-06-24 16:03:24 +010026#include <cinttypes>
Ioannis Ilkosbd4ee3f2019-06-21 17:40:09 +010027#include <map>
28#include <set>
Ryan7ad6b7d2021-04-22 17:03:54 +010029#include <unordered_map>
Ioannis Ilkosbd4ee3f2019-06-21 17:40:09 +010030#include <utility>
31#include <vector>
32
Ioannis Ilkosbd4ee3f2019-06-21 17:40:09 +010033#include "perfetto/base/logging.h"
Hector Dearmand09d9832022-08-04 12:31:44 +000034#include "perfetto/ext/base/hash.h"
35#include "perfetto/ext/base/string_utils.h"
Florian Mayer3ce793c2019-09-24 18:33:30 +010036#include "perfetto/ext/base/utils.h"
Primiano Tucci2d813fd2019-11-26 17:27:07 +000037#include "perfetto/protozero/packed_repeated_fields.h"
Hector Dearmand09d9832022-08-04 12:31:44 +000038#include "perfetto/protozero/scattered_heap_buffer.h"
Primiano Tucci2d813fd2019-11-26 17:27:07 +000039#include "perfetto/trace_processor/trace_processor.h"
Hector Dearmand09d9832022-08-04 12:31:44 +000040#include "src/profiling/symbolizer/symbolize_database.h"
41#include "src/profiling/symbolizer/symbolizer.h"
42#include "src/trace_processor/containers/string_pool.h"
43#include "src/traceconv/utils.h"
44
45#include "protos/perfetto/trace/trace.pbzero.h"
46#include "protos/perfetto/trace/trace_packet.pbzero.h"
47#include "protos/third_party/pprof/profile.pbzero.h"
Ioannis Ilkosbd4ee3f2019-06-21 17:40:09 +010048
Ryan7ad6b7d2021-04-22 17:03:54 +010049// Quick hint on navigating the file:
50// Conversions for both perf and heap profiles start with |TraceToPprof|.
51// Non-shared logic is in the |heap_profile| and |perf_profile| namespaces.
52//
53// To build one or more profiles, first the callstack information is queried
54// from the SQL tables, and converted into an in-memory representation by
55// |PreprocessLocations|. Then an instance of |GProfileBuilder| is used to
56// accumulate samples for that profile, and emit all additional information as a
57// serialized proto. Only the entities referenced by that particular
58// |GProfileBuilder| instance are emitted.
59//
60// See protos/third_party/pprof/profile.proto for the meaning of terms like
61// function/location/line.
62
Hector Dearmand09d9832022-08-04 12:31:44 +000063namespace {
64using StringId = ::perfetto::trace_processor::StringPool::Id;
65
66// In-memory representation of a Profile.Function.
67struct Function {
68 StringId name_id = StringId::Null();
69 StringId system_name_id = StringId::Null();
70 StringId filename_id = StringId::Null();
71
72 Function(StringId n, StringId s, StringId f)
73 : name_id(n), system_name_id(s), filename_id(f) {}
74
75 bool operator==(const Function& other) const {
76 return std::tie(name_id, system_name_id, filename_id) ==
77 std::tie(other.name_id, other.system_name_id, other.filename_id);
78 }
79};
80
81// In-memory representation of a Profile.Line.
82struct Line {
83 int64_t function_id = 0; // LocationTracker's interned Function id
84 int64_t line_no = 0;
85
86 Line(int64_t func, int64_t line) : function_id(func), line_no(line) {}
87
88 bool operator==(const Line& other) const {
89 return function_id == other.function_id && line_no == other.line_no;
90 }
91};
92
93// In-memory representation of a Profile.Location.
94struct Location {
95 int64_t mapping_id = 0; // sqlite row id
96 // Common case: location references a single function.
97 int64_t single_function_id = 0; // interned Function id
98 // Alternatively: multiple inlined functions, recovered via offline
99 // symbolisation. Leaf-first ordering.
100 std::vector<Line> inlined_functions;
101
102 Location(int64_t map, int64_t func, std::vector<Line> inlines)
103 : mapping_id(map),
104 single_function_id(func),
105 inlined_functions(std::move(inlines)) {}
106
107 bool operator==(const Location& other) const {
108 return std::tie(mapping_id, single_function_id, inlined_functions) ==
109 std::tie(other.mapping_id, other.single_function_id,
110 other.inlined_functions);
111 }
112};
113} // namespace
114
115template <>
116struct std::hash<Function> {
117 size_t operator()(const Function& loc) const {
Alexander Timinc2bb1b42022-10-17 18:46:16 +0000118 perfetto::base::Hasher hasher;
Hector Dearmand09d9832022-08-04 12:31:44 +0000119 hasher.Update(loc.name_id.raw_id());
120 hasher.Update(loc.system_name_id.raw_id());
121 hasher.Update(loc.filename_id.raw_id());
122 return static_cast<size_t>(hasher.digest());
123 }
124};
125
126template <>
127struct std::hash<Location> {
128 size_t operator()(const Location& loc) const {
Alexander Timinc2bb1b42022-10-17 18:46:16 +0000129 perfetto::base::Hasher hasher;
Hector Dearmand09d9832022-08-04 12:31:44 +0000130 hasher.Update(loc.mapping_id);
131 hasher.Update(loc.single_function_id);
132 for (auto line : loc.inlined_functions) {
133 hasher.Update(line.function_id);
134 hasher.Update(line.line_no);
135 }
136 return static_cast<size_t>(hasher.digest());
137 }
138};
Ryan7ad6b7d2021-04-22 17:03:54 +0100139
Ioannis Ilkosbd4ee3f2019-06-21 17:40:09 +0100140namespace perfetto {
141namespace trace_to_text {
Ioannis Ilkosbd4ee3f2019-06-21 17:40:09 +0100142namespace {
143
Ryan Savitski822ff952020-12-16 16:50:01 +0000144using ::perfetto::trace_processor::Iterator;
Florian Mayer3ce793c2019-09-24 18:33:30 +0100145
Hector Dearmand09d9832022-08-04 12:31:44 +0000146uint64_t ToPprofId(int64_t id) {
147 PERFETTO_DCHECK(id >= 0);
148 return static_cast<uint64_t>(id) + 1;
149}
150
Ryan Savitski822ff952020-12-16 16:50:01 +0000151std::string AsCsvString(std::vector<uint64_t> vals) {
152 std::string ret;
153 for (size_t i = 0; i < vals.size(); i++) {
154 if (i != 0) {
155 ret += ",";
Florian Mayere7e2bfc2019-10-01 14:11:36 +0100156 }
Ryan Savitski822ff952020-12-16 16:50:01 +0000157 ret += std::to_string(vals[i]);
Florian Mayere7e2bfc2019-10-01 14:11:36 +0100158 }
Ryan Savitski822ff952020-12-16 16:50:01 +0000159 return ret;
Florian Mayere7e2bfc2019-10-01 14:11:36 +0100160}
161
Lalit Maganti4e2303c2023-03-29 15:28:36 +0100162std::optional<int64_t> GetStatsEntry(
Ryan Savitski822ff952020-12-16 16:50:01 +0000163 trace_processor::TraceProcessor* tp,
164 const std::string& name,
Lalit Maganti4e2303c2023-03-29 15:28:36 +0100165 std::optional<uint64_t> idx = std::nullopt) {
Ryan Savitski822ff952020-12-16 16:50:01 +0000166 std::string query = "select value from stats where name == '" + name + "'";
167 if (idx.has_value())
168 query += " and idx == " + std::to_string(idx.value());
169
170 auto it = tp->ExecuteQuery(query);
171 if (!it.Next()) {
172 if (!it.Status().ok()) {
173 PERFETTO_DFATAL_OR_ELOG("Invalid iterator: %s",
174 it.Status().message().c_str());
Lalit Maganti4e2303c2023-03-29 15:28:36 +0100175 return std::nullopt;
Ryan Savitski822ff952020-12-16 16:50:01 +0000176 }
177 // some stats are not present unless non-zero
Lalit Maganti4e2303c2023-03-29 15:28:36 +0100178 return std::make_optional(0);
Ryan Savitski822ff952020-12-16 16:50:01 +0000179 }
Lalit Maganti4e2303c2023-03-29 15:28:36 +0100180 return std::make_optional(it.Get(0).AsLong());
Ryan Savitski822ff952020-12-16 16:50:01 +0000181}
182
Hector Dearmand09d9832022-08-04 12:31:44 +0000183// Interns Locations, Lines, and Functions. Interning is done by the entity's
184// contents, and has no relation to the row ids in the SQL tables.
185// Contains all data for the trace, so can be reused when emitting multiple
186// profiles.
187//
188// TODO(rsavitski): consider moving mappings into here as well. For now, they're
189// still emitted in a single scan during profile building. Mappings should be
190// unique-enough already in the SQL tables, with only incremental state clearing
191// duplicating entries.
192class LocationTracker {
193 public:
194 int64_t InternLocation(Location loc) {
195 auto it = locations_.find(loc);
196 if (it == locations_.end()) {
197 bool inserted = false;
198 std::tie(it, inserted) = locations_.emplace(
199 std::move(loc), static_cast<int64_t>(locations_.size()));
200 PERFETTO_DCHECK(inserted);
201 }
202 return it->second;
203 }
204
205 int64_t InternFunction(Function func) {
206 auto it = functions_.find(func);
207 if (it == functions_.end()) {
208 bool inserted = false;
209 std::tie(it, inserted) =
210 functions_.emplace(func, static_cast<int64_t>(functions_.size()));
211 PERFETTO_DCHECK(inserted);
212 }
213 return it->second;
214 }
215
216 bool IsCallsiteProcessed(int64_t callstack_id) const {
217 return callsite_to_locations_.find(callstack_id) !=
218 callsite_to_locations_.end();
219 }
220
221 void MaybeSetCallsiteLocations(int64_t callstack_id,
222 const std::vector<int64_t>& locs) {
223 // nop if already set
224 callsite_to_locations_.emplace(callstack_id, locs);
225 }
226
227 const std::vector<int64_t>& LocationsForCallstack(
228 int64_t callstack_id) const {
229 auto it = callsite_to_locations_.find(callstack_id);
230 PERFETTO_CHECK(callstack_id >= 0 && it != callsite_to_locations_.end());
231 return it->second;
232 }
233
234 const std::unordered_map<Location, int64_t>& AllLocations() const {
235 return locations_;
236 }
237 const std::unordered_map<Function, int64_t>& AllFunctions() const {
238 return functions_;
239 }
240
241 private:
242 // Root-first location ids for a given callsite id.
243 std::unordered_map<int64_t, std::vector<int64_t>> callsite_to_locations_;
244 std::unordered_map<Location, int64_t> locations_;
245 std::unordered_map<Function, int64_t> functions_;
246};
247
248struct PreprocessedInline {
249 // |name_id| is already demangled
250 StringId name_id = StringId::Null();
251 StringId filename_id = StringId::Null();
252 int64_t line_no = 0;
253
254 PreprocessedInline(StringId s, StringId f, int64_t line)
255 : name_id(s), filename_id(f), line_no(line) {}
256};
257
258std::unordered_map<int64_t, std::vector<PreprocessedInline>>
259PreprocessInliningInfo(trace_processor::TraceProcessor* tp,
260 trace_processor::StringPool* interner) {
261 std::unordered_map<int64_t, std::vector<PreprocessedInline>> inlines;
262
263 // Most-inlined function (leaf) has the lowest id within a symbol set. Query
264 // such that the per-set line vectors are built up leaf-first.
265 Iterator it = tp->ExecuteQuery(
266 "select symbol_set_id, name, source_file, line_number from "
267 "stack_profile_symbol order by symbol_set_id asc, id asc;");
268 while (it.Next()) {
269 int64_t symbol_set_id = it.Get(0).AsLong();
270 auto func_sysname = it.Get(1).is_null() ? "" : it.Get(1).AsString();
271 auto filename = it.Get(2).is_null() ? "" : it.Get(2).AsString();
272 int64_t line_no = it.Get(3).AsLong();
273
274 inlines[symbol_set_id].emplace_back(interner->InternString(func_sysname),
275 interner->InternString(filename),
276 line_no);
277 }
278
279 if (!it.Status().ok()) {
280 PERFETTO_DFATAL_OR_ELOG("Invalid iterator: %s",
281 it.Status().message().c_str());
282 return {};
283 }
284 return inlines;
285}
286
287// Extracts and interns the unique frames and locations (as defined by the proto
288// format) from the callstack SQL tables.
289//
290// Approach:
291// * for each callstack (callsite ids of the leaves):
292// * use experimental_annotated_callstack to build the full list of
293// constituent frames
294// * for each frame (root to leaf):
295// * intern the location and function(s)
296// * remember the mapping from callsite_id to the callstack so far (from
297// the root and including the frame being considered)
298//
299// Optionally mixes in the annotations as a frame name suffix (since there's no
300// good way to attach extra info to locations in the proto format). This relies
301// on the annotations (produced by experimental_annotated_callstack) to be
302// stable for a given callsite (equivalently: dependent only on their parents).
303LocationTracker PreprocessLocations(trace_processor::TraceProcessor* tp,
304 trace_processor::StringPool* interner,
305 bool annotate_frames) {
306 LocationTracker tracker;
307
308 // Keyed by symbol_set_id, discarded once this function converts the inlines
309 // into Line and Function entries.
310 std::unordered_map<int64_t, std::vector<PreprocessedInline>> inlining_info =
311 PreprocessInliningInfo(tp, interner);
312
313 // Higher callsite ids most likely correspond to the deepest stacks, so we'll
314 // fill more of the overall callsite->location map by visiting the callsited
315 // in decreasing id order. Since processing a callstack also fills in the data
316 // for all parent callsites.
317 Iterator cid_it = tp->ExecuteQuery(
318 "select id from stack_profile_callsite order by id desc;");
319 while (cid_it.Next()) {
320 int64_t query_cid = cid_it.Get(0).AsLong();
321
322 // If the leaf has been processed, the rest of the stack is already known.
323 if (tracker.IsCallsiteProcessed(query_cid))
324 continue;
325
326 std::string annotated_query =
327 "select sp.id, sp.annotation, spf.mapping, spf.name, "
328 "coalesce(spf.deobfuscated_name, demangle(spf.name), spf.name), "
329 "spf.symbol_set_id from "
330 "experimental_annotated_callstack(" +
331 std::to_string(query_cid) +
332 ") sp join stack_profile_frame spf on (sp.frame_id == spf.id) "
333 "order by depth asc";
334 Iterator c_it = tp->ExecuteQuery(annotated_query);
335
336 std::vector<int64_t> callstack_loc_ids;
337 while (c_it.Next()) {
338 int64_t cid = c_it.Get(0).AsLong();
339 auto annotation = c_it.Get(1).is_null() ? "" : c_it.Get(1).AsString();
340 int64_t mapping_id = c_it.Get(2).AsLong();
341 auto func_sysname = c_it.Get(3).is_null() ? "" : c_it.Get(3).AsString();
342 auto func_name = c_it.Get(4).is_null() ? "" : c_it.Get(4).AsString();
Lalit Maganti4e2303c2023-03-29 15:28:36 +0100343 std::optional<int64_t> symbol_set_id =
344 c_it.Get(5).is_null() ? std::nullopt
345 : std::make_optional(c_it.Get(5).AsLong());
Hector Dearmand09d9832022-08-04 12:31:44 +0000346
347 Location loc(mapping_id, /*single_function_id=*/-1, {});
348
349 auto intern_function = [interner, &tracker, annotate_frames](
350 StringId func_sysname_id,
351 StringId original_func_name_id,
352 StringId filename_id,
353 const std::string& anno) {
354 std::string fname = interner->Get(original_func_name_id).ToStdString();
355 if (annotate_frames && !anno.empty() && !fname.empty())
356 fname = fname + " [" + anno + "]";
357 StringId func_name_id = interner->InternString(base::StringView(fname));
358 Function func(func_name_id, func_sysname_id, filename_id);
359 return tracker.InternFunction(func);
360 };
361
362 // Inlining information available
363 if (symbol_set_id.has_value()) {
364 auto it = inlining_info.find(*symbol_set_id);
365 if (it == inlining_info.end()) {
366 PERFETTO_DFATAL_OR_ELOG(
367 "Failed to find stack_profile_symbol entry for symbol_set_id "
368 "%" PRIi64 "",
369 *symbol_set_id);
370 return {};
371 }
372
373 // N inlined functions
374 // The symbolised packets currently assume pre-demangled data (as that's
375 // the default of llvm-symbolizer), so we don't have a system name for
376 // each deinlined frame. Set the human-readable name for both fields. We
377 // can change this, but there's no demand for accurate system names in
378 // pprofs.
379 for (const auto& line : it->second) {
380 int64_t func_id = intern_function(line.name_id, line.name_id,
381 line.filename_id, annotation);
382
383 loc.inlined_functions.emplace_back(func_id, line.line_no);
384 }
385 } else {
386 // Otherwise - single function
387 int64_t func_id =
388 intern_function(interner->InternString(func_sysname),
389 interner->InternString(func_name),
390 /*filename_id=*/StringId::Null(), annotation);
391 loc.single_function_id = func_id;
392 }
393
394 int64_t loc_id = tracker.InternLocation(std::move(loc));
395
396 // Update the tracker with the locations so far (for example, at depth 2,
397 // we'll have 3 root-most locations in |callstack_loc_ids|).
398 callstack_loc_ids.push_back(loc_id);
399 tracker.MaybeSetCallsiteLocations(cid, callstack_loc_ids);
400 }
401
402 if (!c_it.Status().ok()) {
403 PERFETTO_DFATAL_OR_ELOG("Invalid iterator: %s",
404 c_it.Status().message().c_str());
405 return {};
406 }
407 }
408
409 if (!cid_it.Status().ok()) {
410 PERFETTO_DFATAL_OR_ELOG("Invalid iterator: %s",
411 cid_it.Status().message().c_str());
412 return {};
413 }
414
415 return tracker;
416}
417
418// Builds the |perftools.profiles.Profile| proto.
419class GProfileBuilder {
420 public:
421 GProfileBuilder(const LocationTracker& locations,
422 trace_processor::StringPool* interner)
423 : locations_(locations), interner_(interner) {
424 // The pprof format requires the first entry in the string table to be the
425 // empty string.
426 int64_t empty_id = ToStringTableId(StringId::Null());
427 PERFETTO_CHECK(empty_id == 0);
428 }
429
430 void WriteSampleTypes(
431 const std::vector<std::pair<std::string, std::string>>& sample_types) {
432 for (const auto& st : sample_types) {
433 auto* sample_type = result_->add_sample_type();
434 sample_type->set_type(
435 ToStringTableId(interner_->InternString(base::StringView(st.first))));
436 sample_type->set_unit(ToStringTableId(
437 interner_->InternString(base::StringView(st.second))));
438 }
439 }
440
441 bool AddSample(const protozero::PackedVarInt& values, int64_t callstack_id) {
442 const auto& location_ids = locations_.LocationsForCallstack(callstack_id);
443 if (location_ids.empty()) {
444 PERFETTO_DFATAL_OR_ELOG(
445 "Failed to find frames for callstack id %" PRIi64 "", callstack_id);
446 return false;
447 }
448
449 // LocationTracker stores location lists root-first, but the pprof format
450 // requires leaf-first.
451 protozero::PackedVarInt packed_locs;
452 for (auto it = location_ids.rbegin(); it != location_ids.rend(); ++it)
453 packed_locs.Append(ToPprofId(*it));
454
455 auto* gsample = result_->add_sample();
456 gsample->set_value(values);
457 gsample->set_location_id(packed_locs);
458
459 // Remember the locations s.t. we only serialize the referenced ones.
460 seen_locations_.insert(location_ids.cbegin(), location_ids.cend());
461 return true;
462 }
463
464 std::string CompleteProfile(trace_processor::TraceProcessor* tp) {
465 std::set<int64_t> seen_mappings;
466 std::set<int64_t> seen_functions;
467
468 if (!WriteLocations(&seen_mappings, &seen_functions))
469 return {};
470 if (!WriteFunctions(seen_functions))
471 return {};
472 if (!WriteMappings(tp, seen_mappings))
473 return {};
474
475 WriteStringTable();
476 return result_.SerializeAsString();
477 }
478
479 private:
480 // Serializes the Profile.Location entries referenced by this profile.
481 bool WriteLocations(std::set<int64_t>* seen_mappings,
482 std::set<int64_t>* seen_functions) {
483 const std::unordered_map<Location, int64_t>& locations =
484 locations_.AllLocations();
485
486 size_t written_locations = 0;
487 for (const auto& loc_and_id : locations) {
488 const auto& loc = loc_and_id.first;
489 int64_t id = loc_and_id.second;
490
491 if (seen_locations_.find(id) == seen_locations_.end())
492 continue;
493
494 written_locations += 1;
495 seen_mappings->emplace(loc.mapping_id);
496
497 auto* glocation = result_->add_location();
498 glocation->set_id(ToPprofId(id));
499 glocation->set_mapping_id(ToPprofId(loc.mapping_id));
500
501 if (!loc.inlined_functions.empty()) {
502 for (const auto& line : loc.inlined_functions) {
503 seen_functions->insert(line.function_id);
504
505 auto* gline = glocation->add_line();
506 gline->set_function_id(ToPprofId(line.function_id));
507 gline->set_line(line.line_no);
508 }
509 } else {
510 seen_functions->insert(loc.single_function_id);
511
512 glocation->add_line()->set_function_id(
513 ToPprofId(loc.single_function_id));
514 }
515 }
516
517 if (written_locations != seen_locations_.size()) {
518 PERFETTO_DFATAL_OR_ELOG(
519 "Found only %zu/%zu locations during serialization.",
520 written_locations, seen_locations_.size());
521 return false;
522 }
523 return true;
524 }
525
526 // Serializes the Profile.Function entries referenced by this profile.
527 bool WriteFunctions(const std::set<int64_t>& seen_functions) {
528 const std::unordered_map<Function, int64_t>& functions =
529 locations_.AllFunctions();
530
531 size_t written_functions = 0;
532 for (const auto& func_and_id : functions) {
533 const auto& func = func_and_id.first;
534 int64_t id = func_and_id.second;
535
536 if (seen_functions.find(id) == seen_functions.end())
537 continue;
538
539 written_functions += 1;
540
541 auto* gfunction = result_->add_function();
542 gfunction->set_id(ToPprofId(id));
543 gfunction->set_name(ToStringTableId(func.name_id));
544 gfunction->set_system_name(ToStringTableId(func.system_name_id));
545 if (!func.filename_id.is_null())
546 gfunction->set_filename(ToStringTableId(func.filename_id));
547 }
548
549 if (written_functions != seen_functions.size()) {
550 PERFETTO_DFATAL_OR_ELOG(
551 "Found only %zu/%zu functions during serialization.",
552 written_functions, seen_functions.size());
553 return false;
554 }
555 return true;
556 }
557
558 // Serializes the Profile.Mapping entries referenced by this profile.
559 bool WriteMappings(trace_processor::TraceProcessor* tp,
560 const std::set<int64_t>& seen_mappings) {
561 Iterator mapping_it = tp->ExecuteQuery(
Mark Hansen3bf1f9d2023-11-21 06:01:56 +0000562 "SELECT id, exact_offset, start, end, name, build_id "
Hector Dearmand09d9832022-08-04 12:31:44 +0000563 "FROM stack_profile_mapping;");
564 size_t mappings_no = 0;
565 while (mapping_it.Next()) {
566 int64_t id = mapping_it.Get(0).AsLong();
567 if (seen_mappings.find(id) == seen_mappings.end())
568 continue;
569 ++mappings_no;
570 auto interned_filename = ToStringTableId(
571 interner_->InternString(mapping_it.Get(4).AsString()));
Mark Hansen3bf1f9d2023-11-21 06:01:56 +0000572 auto interned_build_id = ToStringTableId(
573 interner_->InternString(mapping_it.Get(5).AsString()));
Hector Dearmand09d9832022-08-04 12:31:44 +0000574 auto* gmapping = result_->add_mapping();
575 gmapping->set_id(ToPprofId(id));
Hector Dearmand09d9832022-08-04 12:31:44 +0000576 gmapping->set_file_offset(
577 static_cast<uint64_t>(mapping_it.Get(1).AsLong()));
578 gmapping->set_memory_start(
579 static_cast<uint64_t>(mapping_it.Get(2).AsLong()));
580 gmapping->set_memory_limit(
581 static_cast<uint64_t>(mapping_it.Get(3).AsLong()));
582 gmapping->set_filename(interned_filename);
Mark Hansen3bf1f9d2023-11-21 06:01:56 +0000583 gmapping->set_build_id(interned_build_id);
Hector Dearmand09d9832022-08-04 12:31:44 +0000584 }
585 if (!mapping_it.Status().ok()) {
586 PERFETTO_DFATAL_OR_ELOG("Invalid mapping iterator: %s",
587 mapping_it.Status().message().c_str());
588 return false;
589 }
590 if (mappings_no != seen_mappings.size()) {
591 PERFETTO_DFATAL_OR_ELOG("Missing mappings.");
592 return false;
593 }
594 return true;
595 }
596
597 void WriteStringTable() {
598 for (StringId id : string_table_) {
599 trace_processor::NullTermStringView s = interner_->Get(id);
600 result_->add_string_table(s.data(), s.size());
601 }
602 }
603
604 int64_t ToStringTableId(StringId interned_id) {
605 auto it = interning_remapper_.find(interned_id);
606 if (it == interning_remapper_.end()) {
607 int64_t table_id = static_cast<int64_t>(string_table_.size());
608 string_table_.push_back(interned_id);
609 bool inserted = false;
610 std::tie(it, inserted) =
611 interning_remapper_.emplace(interned_id, table_id);
612 PERFETTO_DCHECK(inserted);
613 }
614 return it->second;
615 }
616
617 // Contains all locations, lines, functions (in memory):
618 const LocationTracker& locations_;
619
620 // String interner, strings referenced by LocationTracker are already
621 // interned. The new internings will come from mappings, and sample types.
622 trace_processor::StringPool* interner_;
623
624 // The profile format uses the repeated string_table field's index as an
625 // implicit id, so these structures remap the interned strings into sequential
626 // ids. Only the strings referenced by this GProfileBuilder instance will be
627 // added to the table.
628 std::unordered_map<StringId, int64_t> interning_remapper_;
629 std::vector<StringId> string_table_;
630
631 // Profile proto being serialized.
632 protozero::HeapBuffered<third_party::perftools::profiles::pbzero::Profile>
633 result_;
634
635 // Set of locations referenced by the added samples.
636 std::set<int64_t> seen_locations_;
637};
638
Ryan Savitski822ff952020-12-16 16:50:01 +0000639namespace heap_profile {
640struct View {
641 const char* type;
642 const char* unit;
643 const char* aggregator;
644 const char* filter;
645};
Ryan Savitski822ff952020-12-16 16:50:01 +0000646
Daniele Di Proiettodd72e9e2022-11-14 17:16:13 +0000647const View kMallocViews[] = {
648 {"Total malloc count", "count", "sum(count)", "size >= 0"},
649 {"Total malloc size", "bytes", "SUM(size)", "size >= 0"},
650 {"Unreleased malloc count", "count", "SUM(count)", nullptr},
651 {"Unreleased malloc size", "bytes", "SUM(size)", nullptr}};
652
653const View kGenericViews[] = {
654 {"Total count", "count", "sum(count)", "size >= 0"},
655 {"Total size", "bytes", "SUM(size)", "size >= 0"},
656 {"Unreleased count", "count", "SUM(count)", nullptr},
657 {"Unreleased size", "bytes", "SUM(size)", nullptr}};
658
659const View kJavaSamplesViews[] = {
660 {"Total allocation count", "count", "SUM(count)", nullptr},
661 {"Total allocation size", "bytes", "SUM(size)", nullptr}};
Ryan Savitski822ff952020-12-16 16:50:01 +0000662
663static bool VerifyPIDStats(trace_processor::TraceProcessor* tp, uint64_t pid) {
664 bool success = true;
Lalit Maganti4e2303c2023-03-29 15:28:36 +0100665 std::optional<int64_t> stat =
666 GetStatsEntry(tp, "heapprofd_buffer_corrupted", std::make_optional(pid));
Ryan Savitski822ff952020-12-16 16:50:01 +0000667 if (!stat.has_value()) {
668 PERFETTO_DFATAL_OR_ELOG("Failed to get heapprofd_buffer_corrupted stat");
669 } else if (stat.value() > 0) {
670 success = false;
671 PERFETTO_ELOG("WARNING: The profile for %" PRIu64
672 " ended early due to a buffer corruption."
673 " THIS IS ALWAYS A BUG IN HEAPPROFD OR"
674 " CLIENT MEMORY CORRUPTION.",
675 pid);
676 }
Lalit Maganti4e2303c2023-03-29 15:28:36 +0100677 stat = GetStatsEntry(tp, "heapprofd_buffer_overran", std::make_optional(pid));
Ryan Savitski822ff952020-12-16 16:50:01 +0000678 if (!stat.has_value()) {
679 PERFETTO_DFATAL_OR_ELOG("Failed to get heapprofd_buffer_overran stat");
680 } else if (stat.value() > 0) {
681 success = false;
682 PERFETTO_ELOG("WARNING: The profile for %" PRIu64
683 " ended early due to a buffer overrun.",
684 pid);
Florian Mayer3ce793c2019-09-24 18:33:30 +0100685 }
686
Ryan Savitski822ff952020-12-16 16:50:01 +0000687 stat = GetStatsEntry(tp, "heapprofd_rejected_concurrent", pid);
688 if (!stat.has_value()) {
689 PERFETTO_DFATAL_OR_ELOG("Failed to get heapprofd_rejected_concurrent stat");
690 } else if (stat.value() > 0) {
691 success = false;
692 PERFETTO_ELOG("WARNING: The profile for %" PRIu64
693 " was rejected due to a concurrent profile.",
694 pid);
695 }
696 return success;
697}
698
699static std::vector<Iterator> BuildViewIterators(
700 trace_processor::TraceProcessor* tp,
701 uint64_t upid,
702 uint64_t ts,
Daniele Di Proiettodd72e9e2022-11-14 17:16:13 +0000703 const char* heap_name,
704 const std::vector<View>& views) {
Ryan Savitski822ff952020-12-16 16:50:01 +0000705 std::vector<Iterator> view_its;
Daniele Di Proiettodd72e9e2022-11-14 17:16:13 +0000706 for (const View& v : views) {
Ryan Savitski822ff952020-12-16 16:50:01 +0000707 std::string query = "SELECT hpa.callsite_id ";
708 query +=
709 ", " + std::string(v.aggregator) + " FROM heap_profile_allocation hpa ";
710 // TODO(fmayer): Figure out where negative callsite_id comes from.
711 query += "WHERE hpa.callsite_id >= 0 ";
712 query += "AND hpa.upid = " + std::to_string(upid) + " ";
713 query += "AND hpa.ts <= " + std::to_string(ts) + " ";
714 query += "AND hpa.heap_name = '" + std::string(heap_name) + "' ";
715 if (v.filter)
716 query += "AND " + std::string(v.filter) + " ";
717 query += "GROUP BY hpa.callsite_id;";
718 view_its.emplace_back(tp->ExecuteQuery(query));
719 }
720 return view_its;
721}
722
723static bool WriteAllocations(GProfileBuilder* builder,
724 std::vector<Iterator>* view_its) {
725 for (;;) {
726 bool all_next = true;
727 bool any_next = false;
Daniele Di Proiettodd72e9e2022-11-14 17:16:13 +0000728 for (size_t i = 0; i < view_its->size(); ++i) {
Ryan Savitski822ff952020-12-16 16:50:01 +0000729 Iterator& it = (*view_its)[i];
730 bool next = it.Next();
731 if (!it.Status().ok()) {
732 PERFETTO_DFATAL_OR_ELOG("Invalid view iterator: %s",
733 it.Status().message().c_str());
734 return false;
735 }
736 all_next = all_next && next;
737 any_next = any_next || next;
738 }
739
740 if (!all_next) {
741 PERFETTO_CHECK(!any_next);
742 break;
743 }
744
745 protozero::PackedVarInt sample_values;
746 int64_t callstack_id = -1;
Daniele Di Proiettodd72e9e2022-11-14 17:16:13 +0000747 for (size_t i = 0; i < view_its->size(); ++i) {
Ryan Savitski822ff952020-12-16 16:50:01 +0000748 if (i == 0) {
749 callstack_id = (*view_its)[i].Get(0).AsLong();
750 } else if (callstack_id != (*view_its)[i].Get(0).AsLong()) {
751 PERFETTO_DFATAL_OR_ELOG("Wrong callstack.");
752 return false;
753 }
754 sample_values.Append((*view_its)[i].Get(1).AsLong());
755 }
756
757 if (!builder->AddSample(sample_values, callstack_id))
758 return false;
759 }
760 return true;
761}
762
763static bool TraceToHeapPprof(trace_processor::TraceProcessor* tp,
764 std::vector<SerializedProfile>* output,
Ryan7ad6b7d2021-04-22 17:03:54 +0100765 bool annotate_frames,
Ryan Savitski822ff952020-12-16 16:50:01 +0000766 uint64_t target_pid,
767 const std::vector<uint64_t>& target_timestamps) {
Hector Dearmand09d9832022-08-04 12:31:44 +0000768 trace_processor::StringPool interner;
769 LocationTracker locations =
770 PreprocessLocations(tp, &interner, annotate_frames);
771
Florian Mayere7e2bfc2019-10-01 14:11:36 +0100772 bool any_fail = false;
Ryan Savitski822ff952020-12-16 16:50:01 +0000773 Iterator it = tp->ExecuteQuery(
774 "select distinct hpa.upid, hpa.ts, p.pid, hpa.heap_name "
775 "from heap_profile_allocation hpa, "
776 "process p where p.upid = hpa.upid;");
Florian Mayer3ce793c2019-09-24 18:33:30 +0100777 while (it.Next()) {
Hector Dearmand09d9832022-08-04 12:31:44 +0000778 GProfileBuilder builder(locations, &interner);
Florian Mayera8e8a6b2020-01-30 16:41:15 +0000779 uint64_t upid = static_cast<uint64_t>(it.Get(0).AsLong());
780 uint64_t ts = static_cast<uint64_t>(it.Get(1).AsLong());
781 uint64_t profile_pid = static_cast<uint64_t>(it.Get(2).AsLong());
Florian Mayerfeb377f2020-06-30 15:00:11 +0100782 const char* heap_name = it.Get(3).AsString();
Ryan Savitski822ff952020-12-16 16:50:01 +0000783 if ((target_pid > 0 && profile_pid != target_pid) ||
784 (!target_timestamps.empty() &&
785 std::find(target_timestamps.begin(), target_timestamps.end(), ts) ==
786 target_timestamps.end())) {
Neda Topoljanac39288012019-09-25 15:25:01 +0100787 continue;
788 }
789
Ryan Savitski822ff952020-12-16 16:50:01 +0000790 if (!VerifyPIDStats(tp, profile_pid))
Florian Mayere7e2bfc2019-10-01 14:11:36 +0100791 any_fail = true;
792
Daniele Di Proiettodd72e9e2022-11-14 17:16:13 +0000793 std::vector<View> views;
794 if (base::StringView(heap_name) == "libc.malloc") {
795 views.assign(std::begin(kMallocViews), std::end(kMallocViews));
796 } else if (base::StringView(heap_name) == "com.android.art") {
797 views.assign(std::begin(kJavaSamplesViews), std::end(kJavaSamplesViews));
798 } else {
799 views.assign(std::begin(kGenericViews), std::end(kGenericViews));
800 }
801
Ryan Savitski822ff952020-12-16 16:50:01 +0000802 std::vector<std::pair<std::string, std::string>> sample_types;
Daniele Di Proiettodd72e9e2022-11-14 17:16:13 +0000803 for (const View& view : views) {
804 sample_types.emplace_back(view.type, view.unit);
Ryan Savitski822ff952020-12-16 16:50:01 +0000805 }
806 builder.WriteSampleTypes(sample_types);
Florian Mayer3ce793c2019-09-24 18:33:30 +0100807
Ryan Savitski822ff952020-12-16 16:50:01 +0000808 std::vector<Iterator> view_its =
Daniele Di Proiettodd72e9e2022-11-14 17:16:13 +0000809 BuildViewIterators(tp, upid, ts, heap_name, views);
Ryan Savitski822ff952020-12-16 16:50:01 +0000810 std::string profile_proto;
811 if (WriteAllocations(&builder, &view_its)) {
Hector Dearmand09d9832022-08-04 12:31:44 +0000812 profile_proto = builder.CompleteProfile(tp);
Ryan Savitski822ff952020-12-16 16:50:01 +0000813 }
Florian Mayerfeb377f2020-06-30 15:00:11 +0100814 output->emplace_back(
Ryan Savitski822ff952020-12-16 16:50:01 +0000815 SerializedProfile{ProfileType::kHeapProfile, profile_pid,
816 std::move(profile_proto), heap_name});
817 }
818
819 if (!it.Status().ok()) {
820 PERFETTO_DFATAL_OR_ELOG("Invalid iterator: %s",
821 it.Status().message().c_str());
822 return false;
Florian Mayer3ce793c2019-09-24 18:33:30 +0100823 }
Florian Mayere7e2bfc2019-10-01 14:11:36 +0100824 if (any_fail) {
825 PERFETTO_ELOG(
826 "One or more of your profiles had an issue. Please consult "
Florian Mayeraaaaa9a2020-06-24 10:59:34 +0200827 "https://perfetto.dev/docs/data-sources/"
828 "native-heap-profiler#troubleshooting");
Florian Mayere7e2bfc2019-10-01 14:11:36 +0100829 }
Ryan Savitski822ff952020-12-16 16:50:01 +0000830 return true;
831}
832} // namespace heap_profile
833
834namespace perf_profile {
835struct ProcessInfo {
836 uint64_t pid;
837 std::vector<uint64_t> utids;
838};
839
840// Returns a map of upid -> {pid, utids[]} for sampled processes.
841static std::map<uint64_t, ProcessInfo> GetProcessMap(
842 trace_processor::TraceProcessor* tp) {
843 Iterator it = tp->ExecuteQuery(
844 "select distinct process.upid, process.pid, thread.utid from perf_sample "
Ryan Savitski12c6f302021-03-10 13:44:46 +0000845 "join thread using (utid) join process using (upid) where callsite_id is "
846 "not null order by process.upid asc");
Ryan Savitski822ff952020-12-16 16:50:01 +0000847 std::map<uint64_t, ProcessInfo> process_map;
848 while (it.Next()) {
849 uint64_t upid = static_cast<uint64_t>(it.Get(0).AsLong());
850 uint64_t pid = static_cast<uint64_t>(it.Get(1).AsLong());
851 uint64_t utid = static_cast<uint64_t>(it.Get(2).AsLong());
852 process_map[upid].pid = pid;
853 process_map[upid].utids.push_back(utid);
854 }
Florian Mayer3ce793c2019-09-24 18:33:30 +0100855 if (!it.Status().ok()) {
856 PERFETTO_DFATAL_OR_ELOG("Invalid iterator: %s",
857 it.Status().message().c_str());
Ryan Savitski822ff952020-12-16 16:50:01 +0000858 return {};
859 }
860 return process_map;
861}
862
863static void LogTracePerfEventIssues(trace_processor::TraceProcessor* tp) {
Lalit Maganti4e2303c2023-03-29 15:28:36 +0100864 std::optional<int64_t> stat = GetStatsEntry(tp, "perf_samples_skipped");
Ryan Savitski822ff952020-12-16 16:50:01 +0000865 if (!stat.has_value()) {
866 PERFETTO_DFATAL_OR_ELOG("Failed to look up perf_samples_skipped stat");
867 } else if (stat.value() > 0) {
868 PERFETTO_ELOG(
869 "Warning: the trace recorded %" PRIi64
870 " skipped samples, which otherwise matched the tracing config. This "
871 "would cause a process to be completely absent from the trace, but "
872 "does *not* imply data loss in any of the output profiles.",
873 stat.value());
874 }
875
876 stat = GetStatsEntry(tp, "perf_samples_skipped_dataloss");
877 if (!stat.has_value()) {
878 PERFETTO_DFATAL_OR_ELOG(
879 "Failed to look up perf_samples_skipped_dataloss stat");
880 } else if (stat.value() > 0) {
881 PERFETTO_ELOG("DATA LOSS: the trace recorded %" PRIi64
882 " lost perf samples (within traced_perf). This means that "
883 "the trace is missing information, but it is not known "
884 "which profile that affected.",
885 stat.value());
886 }
887
888 // Check if any per-cpu ringbuffers encountered dataloss (as recorded by the
889 // kernel).
890 Iterator it = tp->ExecuteQuery(
891 "select idx, value from stats where name == 'perf_cpu_lost_records' and "
892 "value > 0 order by idx asc");
893 while (it.Next()) {
894 PERFETTO_ELOG(
895 "DATA LOSS: during the trace, the per-cpu kernel ring buffer for cpu "
896 "%" PRIi64 " recorded %" PRIi64
897 " lost samples. This means that the trace is missing information, "
898 "but it is not known which profile that affected.",
899 static_cast<int64_t>(it.Get(0).AsLong()),
900 static_cast<int64_t>(it.Get(1).AsLong()));
901 }
902 if (!it.Status().ok()) {
903 PERFETTO_DFATAL_OR_ELOG("Invalid iterator: %s",
904 it.Status().message().c_str());
905 }
906}
907
908// TODO(rsavitski): decide whether errors in |AddSample| should result in an
909// empty profile (and/or whether they should make the overall conversion
910// unsuccessful). Furthermore, clarify the return value's semantics for both
911// perf and heap profiles.
912static bool TraceToPerfPprof(trace_processor::TraceProcessor* tp,
913 std::vector<SerializedProfile>* output,
Ryan7ad6b7d2021-04-22 17:03:54 +0100914 bool annotate_frames,
Ryan Savitski822ff952020-12-16 16:50:01 +0000915 uint64_t target_pid) {
Hector Dearmand09d9832022-08-04 12:31:44 +0000916 trace_processor::StringPool interner;
917 LocationTracker locations =
918 PreprocessLocations(tp, &interner, annotate_frames);
Ryan Savitski822ff952020-12-16 16:50:01 +0000919
920 LogTracePerfEventIssues(tp);
921
922 // Aggregate samples by upid when building profiles.
923 std::map<uint64_t, ProcessInfo> process_map = GetProcessMap(tp);
924 for (const auto& p : process_map) {
925 const ProcessInfo& process = p.second;
926
927 if (target_pid != 0 && process.pid != target_pid)
928 continue;
929
Hector Dearmand09d9832022-08-04 12:31:44 +0000930 GProfileBuilder builder(locations, &interner);
Ryan Savitski822ff952020-12-16 16:50:01 +0000931 builder.WriteSampleTypes({{"samples", "count"}});
932
933 std::string query = "select callsite_id from perf_sample where utid in (" +
Ryan Savitski12c6f302021-03-10 13:44:46 +0000934 AsCsvString(process.utids) +
935 ") and callsite_id is not null order by ts asc;";
Ryan Savitski822ff952020-12-16 16:50:01 +0000936
937 protozero::PackedVarInt single_count_value;
938 single_count_value.Append(1);
939
940 Iterator it = tp->ExecuteQuery(query);
941 while (it.Next()) {
942 int64_t callsite_id = static_cast<int64_t>(it.Get(0).AsLong());
943 builder.AddSample(single_count_value, callsite_id);
944 }
945 if (!it.Status().ok()) {
Ryan Savitski12c6f302021-03-10 13:44:46 +0000946 PERFETTO_DFATAL_OR_ELOG("Failed to iterate over samples: %s",
947 it.Status().c_message());
Ryan Savitski822ff952020-12-16 16:50:01 +0000948 return false;
949 }
950
Hector Dearmand09d9832022-08-04 12:31:44 +0000951 std::string profile_proto = builder.CompleteProfile(tp);
Ryan Savitski822ff952020-12-16 16:50:01 +0000952 output->emplace_back(SerializedProfile{
953 ProfileType::kPerfProfile, process.pid, std::move(profile_proto), ""});
Florian Mayer3ce793c2019-09-24 18:33:30 +0100954 }
955 return true;
Ioannis Ilkosbd4ee3f2019-06-21 17:40:09 +0100956}
Ryan Savitski822ff952020-12-16 16:50:01 +0000957} // namespace perf_profile
Ryan7ad6b7d2021-04-22 17:03:54 +0100958} // namespace
Ryan Savitski822ff952020-12-16 16:50:01 +0000959
960bool TraceToPprof(trace_processor::TraceProcessor* tp,
961 std::vector<SerializedProfile>* output,
962 ConversionMode mode,
Ryan7ad6b7d2021-04-22 17:03:54 +0100963 uint64_t flags,
Ryan Savitski822ff952020-12-16 16:50:01 +0000964 uint64_t pid,
965 const std::vector<uint64_t>& timestamps) {
Ryan7ad6b7d2021-04-22 17:03:54 +0100966 bool annotate_frames =
967 flags & static_cast<uint64_t>(ConversionFlags::kAnnotateFrames);
Ryan Savitski822ff952020-12-16 16:50:01 +0000968 switch (mode) {
969 case (ConversionMode::kHeapProfile):
Ryan7ad6b7d2021-04-22 17:03:54 +0100970 return heap_profile::TraceToHeapPprof(tp, output, annotate_frames, pid,
971 timestamps);
Ryan Savitski822ff952020-12-16 16:50:01 +0000972 case (ConversionMode::kPerfProfile):
Ryan7ad6b7d2021-04-22 17:03:54 +0100973 return perf_profile::TraceToPerfPprof(tp, output, annotate_frames, pid);
Ryan Savitski822ff952020-12-16 16:50:01 +0000974 }
975 PERFETTO_FATAL("unknown conversion option"); // for gcc
976}
Ioannis Ilkosbd4ee3f2019-06-21 17:40:09 +0100977
Ioannis Ilkosbd4ee3f2019-06-21 17:40:09 +0100978} // namespace trace_to_text
979} // namespace perfetto