Merge "processor: Fix order of async events in json export"
diff --git a/docs/trace-processor.md b/docs/trace-processor.md
index cea8846..532056b 100644
--- a/docs/trace-processor.md
+++ b/docs/trace-processor.md
@@ -8,8 +8,10 @@
through SQL queries. The trace processor is used:
* By the [Perfetto UI](https://ui.perfetto.dev/), in the form of a
Web Assembly module.
-* Standalone, using the `trace_processor_shell` target
- (`ninja -C out/xxx trace_processor_shell`).
+* Standalone:
+ * using the [prebuilt](http://get.perfetto.dev/trace_processor) binaries.
+ * using the `trace_processor_shell` target from source
+ (`ninja -C out/xxx trace_processor_shell`).
* In internal pipelines for batch processing.
Supported input formats:
diff --git a/gn/perfetto.gni b/gn/perfetto.gni
index 3d756d0..828a493 100644
--- a/gn/perfetto.gni
+++ b/gn/perfetto.gni
@@ -66,6 +66,10 @@
build_with_chromium = false
}
+if (!defined(is_nacl)) {
+ is_nacl = false
+}
+
declare_args() {
# The Android blueprint file generator set this to true (as well as
# is_perfetto_build_generator). This is just about being built in the
@@ -135,7 +139,7 @@
# system backend in the client library.
# This includes building things that rely on POSIX sockets, this places
# limitations on the supported operating systems.
- enable_perfetto_ipc = !is_win && !is_fuchsia &&
+ enable_perfetto_ipc = !is_win && !is_fuchsia && !is_nacl &&
(perfetto_build_standalone ||
perfetto_build_with_android || build_with_chromium)
diff --git a/gn/standalone/BUILDCONFIG.gn b/gn/standalone/BUILDCONFIG.gn
index d980a05..3668c0f 100644
--- a/gn/standalone/BUILDCONFIG.gn
+++ b/gn/standalone/BUILDCONFIG.gn
@@ -36,10 +36,11 @@
is_linux_host = host_os == "linux"
is_mac = current_os == "mac"
-# Building with Windows/Fuchsia is currently only supported in the Chromium
+# Building with Windows/Fuchsia/nacl is currently only supported in the Chromium
# tree so always set this to false.
is_win = false
is_fuchsia = false
+is_nacl = false
if (target_cpu == "") {
target_cpu = host_cpu
diff --git a/src/profiling/memory/unwinding.cc b/src/profiling/memory/unwinding.cc
index 6b030c0..ad18031 100644
--- a/src/profiling/memory/unwinding.cc
+++ b/src/profiling/memory/unwinding.cc
@@ -58,6 +58,8 @@
namespace profiling {
namespace {
+constexpr base::TimeMillis kMapsReparseInterval{500};
+
constexpr size_t kMaxFrames = 1000;
// We assume average ~300us per unwind. If we handle up to 1000 unwinds, this
@@ -204,8 +206,14 @@
uint8_t error_code = 0;
for (int attempt = 0; attempt < 2; ++attempt) {
if (attempt > 0) {
+ if (metadata->last_maps_reparse_time + kMapsReparseInterval >
+ base::GetWallTimeMs()) {
+ PERFETTO_DLOG("Skipping reparse due to rate limit.");
+ break;
+ }
PERFETTO_DLOG("Reparsing maps");
metadata->ReparseMaps();
+ metadata->last_maps_reparse_time = base::GetWallTimeMs();
// Regs got invalidated by libuwindstack's speculative jump.
// Reset.
ReadFromRawData(regs.get(), alloc_metadata->register_data);
diff --git a/src/profiling/memory/unwinding.h b/src/profiling/memory/unwinding.h
index 39216fe..a702c64 100644
--- a/src/profiling/memory/unwinding.h
+++ b/src/profiling/memory/unwinding.h
@@ -27,6 +27,7 @@
#include <unwindstack/JitDebug.h>
#endif
+#include "perfetto/base/time.h"
#include "perfetto/ext/base/scoped_file.h"
#include "perfetto/ext/base/thread_task_runner.h"
#include "perfetto/ext/tracing/core/basic_types.h"
@@ -129,6 +130,7 @@
// The API of libunwindstack expects shared_ptr for Memory.
std::shared_ptr<unwindstack::Memory> fd_mem;
uint64_t reparses = 0;
+ base::TimeMillis last_maps_reparse_time{0};
#if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
std::unique_ptr<unwindstack::JitDebug> jit_debug;
std::unique_ptr<unwindstack::DexFiles> dex_files;
diff --git a/src/trace_processor/containers/bit_vector_benchmark.cc b/src/trace_processor/containers/bit_vector_benchmark.cc
index b4a0c27..206bc5d 100644
--- a/src/trace_processor/containers/bit_vector_benchmark.cc
+++ b/src/trace_processor/containers/bit_vector_benchmark.cc
@@ -17,6 +17,7 @@
#include <benchmark/benchmark.h>
#include "src/trace_processor/containers/bit_vector.h"
+#include "src/trace_processor/containers/bit_vector_iterators.h"
namespace {
@@ -27,15 +28,40 @@
}
void BitVectorArgs(benchmark::internal::Benchmark* b) {
- b->Arg(64);
+ std::vector<int> set_percentages;
+ if (IsBenchmarkFunctionalOnly()) {
+ set_percentages = std::vector<int>{50};
+ } else {
+ set_percentages = std::vector<int>{0, 1, 5, 50, 95, 99, 100};
+ }
- if (!IsBenchmarkFunctionalOnly()) {
- b->Arg(512);
- b->Arg(8192);
- b->Arg(123456);
- b->Arg(1234567);
+ for (int percentage : set_percentages) {
+ b->Args({64, percentage});
+
+ if (!IsBenchmarkFunctionalOnly()) {
+ b->Args({512, percentage});
+ b->Args({8192, percentage});
+ b->Args({123456, percentage});
+ b->Args({1234567, percentage});
+ }
}
}
+
+BitVector BvWithSizeAndSetPercentage(uint32_t size, uint32_t set_percentage) {
+ static constexpr uint32_t kRandomSeed = 29;
+ std::minstd_rand0 rnd_engine(kRandomSeed);
+
+ BitVector bv;
+ for (uint32_t i = 0; i < size; ++i) {
+ if (rnd_engine() % 100 < set_percentage) {
+ bv.AppendTrue();
+ } else {
+ bv.AppendFalse();
+ }
+ }
+ return bv;
+}
+
} // namespace
static void BM_BitVectorAppendTrue(benchmark::State& state) {
@@ -61,15 +87,9 @@
std::minstd_rand0 rnd_engine(kRandomSeed);
uint32_t size = static_cast<uint32_t>(state.range(0));
+ uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
- BitVector bv;
- for (uint32_t i = 0; i < size; ++i) {
- if (rnd_engine() % 2) {
- bv.AppendTrue();
- } else {
- bv.AppendFalse();
- }
- }
+ BitVector bv = BvWithSizeAndSetPercentage(size, set_percentage);
static constexpr uint32_t kPoolSize = 1024 * 1024;
std::vector<bool> bit_pool(kPoolSize);
@@ -93,15 +113,9 @@
std::minstd_rand0 rnd_engine(kRandomSeed);
uint32_t size = static_cast<uint32_t>(state.range(0));
+ uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
- BitVector bv;
- for (uint32_t i = 0; i < size; ++i) {
- if (rnd_engine() % 2) {
- bv.AppendTrue();
- } else {
- bv.AppendFalse();
- }
- }
+ BitVector bv = BvWithSizeAndSetPercentage(size, set_percentage);
static constexpr uint32_t kPoolSize = 1024 * 1024;
std::vector<uint32_t> row_pool(kPoolSize);
@@ -123,16 +137,9 @@
std::minstd_rand0 rnd_engine(kRandomSeed);
uint32_t size = static_cast<uint32_t>(state.range(0));
+ uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
- BitVector bv;
- for (uint32_t i = 0; i < size; ++i) {
- if (rnd_engine() % 2) {
- bv.AppendTrue();
- } else {
- bv.AppendFalse();
- }
- }
-
+ BitVector bv = BvWithSizeAndSetPercentage(size, set_percentage);
static constexpr uint32_t kPoolSize = 1024 * 1024;
std::vector<uint32_t> row_pool(kPoolSize);
uint32_t set_bit_count = bv.GetNumBitsSet();
@@ -153,11 +160,12 @@
std::minstd_rand0 rnd_engine(kRandomSeed);
uint32_t size = static_cast<uint32_t>(state.range(0));
+ uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
uint32_t count = 0;
BitVector bv;
for (uint32_t i = 0; i < size; ++i) {
- bool value = rnd_engine() % 2;
+ bool value = rnd_engine() % 100 < set_percentage;
if (value) {
bv.AppendTrue();
} else {
@@ -205,11 +213,12 @@
std::minstd_rand0 rnd_engine(kRandomSeed);
uint32_t size = static_cast<uint32_t>(state.range(0));
+ uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
BitVector bv;
BitVector picker;
for (uint32_t i = 0; i < size; ++i) {
- bool value = rnd_engine() % 2;
+ bool value = rnd_engine() % 100 < set_percentage;
if (value) {
bv.AppendTrue();
@@ -234,3 +243,16 @@
}
}
BENCHMARK(BM_BitVectorUpdateSetBits)->Apply(BitVectorArgs);
+
+static void BM_BitVectorSetBitsIterator(benchmark::State& state) {
+ uint32_t size = static_cast<uint32_t>(state.range(0));
+ uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
+
+ BitVector bv = BvWithSizeAndSetPercentage(size, set_percentage);
+ for (auto _ : state) {
+ for (auto it = bv.IterateSetBits(); it; it.Next()) {
+ benchmark::DoNotOptimize(it.index());
+ }
+ }
+}
+BENCHMARK(BM_BitVectorSetBitsIterator)->Apply(BitVectorArgs);
diff --git a/src/trace_processor/heap_profile_tracker.cc b/src/trace_processor/heap_profile_tracker.cc
index 4bf0756..a2aff3a 100644
--- a/src/trace_processor/heap_profile_tracker.cc
+++ b/src/trace_processor/heap_profile_tracker.cc
@@ -24,6 +24,201 @@
namespace perfetto {
namespace trace_processor {
+namespace {
+struct MergedCallsite {
+ StringId frame_name;
+ StringId mapping_name;
+ base::Optional<uint32_t> parent_idx;
+ bool operator<(const MergedCallsite& o) const {
+ return std::tie(frame_name, mapping_name, parent_idx) <
+ std::tie(o.frame_name, o.mapping_name, o.parent_idx);
+ }
+};
+
+std::vector<MergedCallsite> GetMergedCallsites(TraceStorage* storage,
+ uint32_t callstack_row) {
+ const tables::StackProfileCallsiteTable& callsites_tbl =
+ storage->stack_profile_callsite_table();
+ const tables::StackProfileFrameTable& frames_tbl =
+ storage->stack_profile_frame_table();
+ const tables::SymbolTable& symbols_tbl = storage->symbol_table();
+ const tables::StackProfileMappingTable& mapping_tbl =
+ storage->stack_profile_mapping_table();
+
+ // TODO(fmayer): Clean up types and remove the static_cast.
+ uint32_t frame_idx = *frames_tbl.id().IndexOf(
+ FrameId(static_cast<uint32_t>(callsites_tbl.frame_id()[callstack_row])));
+
+ // TODO(fmayer): Clean up types and remove the static_cast.
+ uint32_t mapping_idx = *mapping_tbl.id().IndexOf(
+ MappingId(static_cast<uint32_t>(frames_tbl.mapping()[frame_idx])));
+ StringId mapping_name = mapping_tbl.name()[mapping_idx];
+
+ base::Optional<uint32_t> symbol_set_id =
+ frames_tbl.symbol_set_id()[frame_idx];
+
+ if (!symbol_set_id) {
+ StringId frame_name = frames_tbl.name()[frame_idx];
+ return {{frame_name, mapping_name, base::nullopt}};
+ }
+
+ std::vector<MergedCallsite> result;
+ // id == symbol_set_id for the bottommost frame.
+ // TODO(lalitm): Encode this optimization in the table and remove this
+ // custom optimization.
+ uint32_t symbol_set_idx = *symbols_tbl.id().IndexOf(SymbolId(*symbol_set_id));
+ for (uint32_t i = symbol_set_idx;
+ i < symbols_tbl.row_count() &&
+ symbols_tbl.symbol_set_id()[i] == *symbol_set_id;
+ ++i) {
+ result.emplace_back(
+ MergedCallsite{symbols_tbl.name()[i], mapping_name, base::nullopt});
+ }
+ return result;
+}
+} // namespace
+
+std::unique_ptr<tables::ExperimentalFlamegraphNodesTable> BuildNativeFlamegraph(
+ TraceStorage* storage,
+ UniquePid upid,
+ int64_t timestamp) {
+ const tables::HeapProfileAllocationTable& allocation_tbl =
+ storage->heap_profile_allocation_table();
+ const tables::StackProfileCallsiteTable& callsites_tbl =
+ storage->stack_profile_callsite_table();
+
+ StringId profile_type = storage->InternString("native");
+
+ std::vector<uint32_t> callsite_to_merged_callsite(callsites_tbl.row_count(),
+ 0);
+ std::map<MergedCallsite, uint32_t> merged_callsites_to_table_idx;
+
+ std::unique_ptr<tables::ExperimentalFlamegraphNodesTable> tbl(
+ new tables::ExperimentalFlamegraphNodesTable(
+ storage->mutable_string_pool(), nullptr));
+
+ // FORWARD PASS:
+ // Aggregate callstacks by frame name / mapping name. Use symbolization data.
+ for (uint32_t i = 0; i < callsites_tbl.row_count(); ++i) {
+ base::Optional<uint32_t> parent_idx;
+ // TODO(fmayer): Clean up types and remove the conditional and static_cast.
+ if (callsites_tbl.parent_id()[i] != -1) {
+ auto parent_id = static_cast<uint32_t>(callsites_tbl.parent_id()[i]);
+ parent_idx = callsites_tbl.id().IndexOf(CallsiteId(parent_id));
+ parent_idx = callsite_to_merged_callsite[*parent_idx];
+ PERFETTO_CHECK(*parent_idx < i);
+ }
+
+ auto callsites = GetMergedCallsites(storage, i);
+ for (MergedCallsite& merged_callsite : callsites) {
+ merged_callsite.parent_idx = parent_idx;
+ auto it = merged_callsites_to_table_idx.find(merged_callsite);
+ if (it == merged_callsites_to_table_idx.end()) {
+ std::tie(it, std::ignore) = merged_callsites_to_table_idx.emplace(
+ merged_callsite, merged_callsites_to_table_idx.size());
+ tables::ExperimentalFlamegraphNodesTable::Row row{};
+ if (parent_idx) {
+ row.depth = tbl->depth()[*parent_idx] + 1;
+ } else {
+ row.depth = 0;
+ }
+ row.ts = timestamp;
+ row.upid = upid;
+ row.profile_type = profile_type;
+ row.name = merged_callsite.frame_name;
+ row.map_name = merged_callsite.mapping_name;
+ if (parent_idx)
+ row.parent_id = tbl->id()[*parent_idx].value;
+
+ parent_idx = *tbl->id().IndexOf(tbl->Insert(std::move(row)));
+ PERFETTO_CHECK(merged_callsites_to_table_idx.size() ==
+ tbl->row_count());
+ }
+ parent_idx = it->second;
+ }
+ PERFETTO_CHECK(parent_idx);
+ callsite_to_merged_callsite[i] = *parent_idx;
+ }
+
+ // PASS OVER ALLOCATIONS:
+ // Aggregate allocations into the newly built tree.
+ auto filtered = allocation_tbl.Filter(
+ {allocation_tbl.ts().eq(timestamp), allocation_tbl.upid().eq(upid)});
+
+ if (filtered.row_count() == 0) {
+ return nullptr;
+ }
+
+ for (auto it = filtered.IterateRows(); it; it.Next()) {
+ int64_t size =
+ it.Get(static_cast<uint32_t>(
+ tables::HeapProfileAllocationTable::ColumnIndex::size))
+ .long_value;
+ int64_t count =
+ it.Get(static_cast<uint32_t>(
+ tables::HeapProfileAllocationTable::ColumnIndex::count))
+ .long_value;
+ int64_t callsite_id =
+ it
+ .Get(static_cast<uint32_t>(
+ tables::HeapProfileAllocationTable::ColumnIndex::callsite_id))
+ .long_value;
+
+ PERFETTO_CHECK((size < 0 && count < 0) || (size >= 0 && count >= 0));
+ uint32_t merged_idx =
+ callsite_to_merged_callsite[*callsites_tbl.id().IndexOf(
+ CallsiteId(static_cast<uint32_t>(callsite_id)))];
+ if (size > 0) {
+ // TODO(fmayer): Clean up types and remove the static_cast.
+ tbl->mutable_alloc_size()->Set(merged_idx,
+ tbl->alloc_size()[merged_idx] + size);
+ tbl->mutable_alloc_count()->Set(merged_idx,
+ tbl->alloc_count()[merged_idx] + count);
+ }
+
+ // TODO(fmayer): Clean up types and remove the static_cast.
+ tbl->mutable_size()->Set(merged_idx, tbl->size()[merged_idx] + size);
+ tbl->mutable_count()->Set(merged_idx, tbl->count()[merged_idx] + count);
+ }
+
+ // BACKWARD PASS:
+ // Propagate sizes to parents.
+ for (int64_t i = tbl->row_count() - 1; i >= 0; --i) {
+ uint32_t idx = static_cast<uint32_t>(i);
+
+ tbl->mutable_cumulative_size()->Set(
+ idx, tbl->cumulative_size()[idx] + tbl->size()[idx]);
+ tbl->mutable_cumulative_count()->Set(
+ idx, tbl->cumulative_count()[idx] + tbl->count()[idx]);
+
+ tbl->mutable_cumulative_alloc_size()->Set(
+ idx, tbl->cumulative_alloc_size()[idx] + tbl->alloc_size()[idx]);
+ tbl->mutable_cumulative_alloc_count()->Set(
+ idx, tbl->cumulative_alloc_count()[idx] + tbl->alloc_count()[idx]);
+
+ auto parent = tbl->parent_id()[idx];
+ if (parent) {
+ uint32_t parent_idx = *tbl->id().IndexOf(
+ tables::ExperimentalFlamegraphNodesTable::Id(*parent));
+ tbl->mutable_cumulative_size()->Set(
+ parent_idx,
+ tbl->cumulative_size()[parent_idx] + tbl->cumulative_size()[idx]);
+ tbl->mutable_cumulative_count()->Set(
+ parent_idx,
+ tbl->cumulative_count()[parent_idx] + tbl->cumulative_count()[idx]);
+
+ tbl->mutable_cumulative_alloc_size()->Set(
+ parent_idx, tbl->cumulative_alloc_size()[parent_idx] +
+ tbl->cumulative_alloc_size()[idx]);
+ tbl->mutable_cumulative_alloc_count()->Set(
+ parent_idx, tbl->cumulative_alloc_count()[parent_idx] +
+ tbl->cumulative_alloc_count()[idx]);
+ }
+ }
+
+ return tbl;
+}
+
HeapProfileTracker::HeapProfileTracker(TraceProcessorContext* context)
: context_(context), empty_(context_->storage->InternString({"", 0})) {}
diff --git a/src/trace_processor/heap_profile_tracker.h b/src/trace_processor/heap_profile_tracker.h
index 93a2440..53e7637 100644
--- a/src/trace_processor/heap_profile_tracker.h
+++ b/src/trace_processor/heap_profile_tracker.h
@@ -30,6 +30,11 @@
namespace perfetto {
namespace trace_processor {
+std::unique_ptr<tables::ExperimentalFlamegraphNodesTable> BuildNativeFlamegraph(
+ TraceStorage* storage,
+ UniquePid upid,
+ int64_t timestamp);
+
class TraceProcessorContext;
class HeapProfileTracker {
diff --git a/src/trace_processor/importers/ftrace/rss_stat_tracker.cc b/src/trace_processor/importers/ftrace/rss_stat_tracker.cc
index db444a7..f536a02 100644
--- a/src/trace_processor/importers/ftrace/rss_stat_tracker.cc
+++ b/src/trace_processor/importers/ftrace/rss_stat_tracker.cc
@@ -69,24 +69,34 @@
base::Optional<UniqueTid> RssStatTracker::FindUtidForMmId(int64_t mm_id,
bool is_curr,
uint32_t pid) {
- auto it = mm_id_to_utid_.find(mm_id);
- if (!is_curr) {
- return it == mm_id_to_utid_.end() ? base::nullopt
- : base::make_optional(it->second);
+ // If curr is true, we can just overwrite the state in the map and return
+ // the utid correspodning to |pid|.
+ if (is_curr) {
+ UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
+ mm_id_to_utid_[mm_id] = utid;
+ return utid;
}
+ // If curr is false, try and lookup the utid we previously saw for this
+ // mm id.
+ auto it = mm_id_to_utid_.find(mm_id);
+ if (it == mm_id_to_utid_.end())
+ return base::nullopt;
+
+ // If the utid in the map is the same as our current utid but curr is false,
+ // that means we are in the middle of a process changing mm structs (i.e. in
+ // the middle of a vfork + exec). Therefore, we should discard the association
+ // of this vm struct with this thread.
UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
- if (it != mm_id_to_utid_.end() && it->second != utid) {
- // Since both of these structs have the same mm hash and both say that
- // the mm hash is for the current project, we can assume they belong to
- // the same process so we can associate them together.
- // TODO(lalitm): investigate if it's possible for mm_id to be reused
- // between different processes if we have pid reuse and get unlucky. If
- // so, we'll need to do some more careful tracking here.
- context_->process_tracker->AssociateThreads(it->second, utid);
+ if (it->second == utid) {
+ mm_id_to_utid_.erase(it);
+ return base::nullopt;
}
- mm_id_to_utid_[mm_id] = utid;
- return utid;
+
+ // This case happens when a process is changing the VM of another process and
+ // we know that the utid corresponding to the target process. Just return that
+ // utid.
+ return it->second;
}
} // namespace trace_processor
diff --git a/src/trace_processor/importers/proto/heap_graph_tracker.cc b/src/trace_processor/importers/proto/heap_graph_tracker.cc
index 124fab9..f96cf6e 100644
--- a/src/trace_processor/importers/proto/heap_graph_tracker.cc
+++ b/src/trace_processor/importers/proto/heap_graph_tracker.cc
@@ -234,17 +234,17 @@
// artificial root in the database.
tables::ExperimentalFlamegraphNodesTable::Row alloc_row{
- current_ts,
- current_upid,
- profile_type,
- depth,
- StringId::Raw(static_cast<uint32_t>(node.class_name)),
- java_mapping,
+ current_ts, current_upid, profile_type, depth,
+ StringId::Raw(static_cast<uint32_t>(node.class_name)), java_mapping,
static_cast<int64_t>(node.count),
static_cast<int64_t>(node_to_cumulative_count[i]),
static_cast<int64_t>(node.size),
static_cast<int64_t>(node_to_cumulative_size[i]),
- parent_id};
+ // For java dumps, set alloc_count == count, etc.
+ static_cast<int64_t>(node.count),
+ static_cast<int64_t>(node_to_cumulative_count[i]),
+ static_cast<int64_t>(node.size),
+ static_cast<int64_t>(node_to_cumulative_size[i]), parent_id};
node_to_row_idx[i] = *tbl->id().IndexOf(tbl->Insert(alloc_row));
}
return tbl;
diff --git a/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc b/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
index 5fd1d74..ff27f76 100644
--- a/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
+++ b/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
@@ -2390,11 +2390,6 @@
auto mapping = interned_data->add_mappings();
mapping->set_iid(1);
- mapping->set_build_id(1);
-
- auto build_id = interned_data->add_build_ids();
- build_id->set_iid(1);
- build_id->set_str("3BBCFBD372448A727265C3E7C4D954F91");
auto frame = interned_data->add_frames();
frame->set_iid(1);
@@ -2456,13 +2451,6 @@
EXPECT_EQ(samples.ts()[2], 68000);
EXPECT_EQ(samples.callsite_id()[2], 0);
EXPECT_EQ(samples.utid()[2], 1u);
-
- // Breakpad build_ids should not be modified/mangled.
- ASSERT_STREQ(
- context_.storage
- ->GetString(storage_->stack_profile_mapping_table().build_id()[0])
- .c_str(),
- "3BBCFBD372448A727265C3E7C4D954F91");
}
} // namespace
diff --git a/src/trace_processor/metrics/android/unmapped_java_symbols.sql b/src/trace_processor/metrics/android/unmapped_java_symbols.sql
index 7cc314f..d13d948 100644
--- a/src/trace_processor/metrics/android/unmapped_java_symbols.sql
+++ b/src/trace_processor/metrics/android/unmapped_java_symbols.sql
@@ -24,6 +24,7 @@
AND INSTR(type_name, '.') = 0
AND RTRIM(type_name, '[]') NOT IN ('byte', 'char', 'short', 'int', 'long', 'boolean', 'float', 'double')
AND type_name NOT LIKE '$Proxy%'
+ AND LENGTH(type_name) > 0
)
SELECT upid, RepeatedField(type_name) AS types
FROM distinct_unmapped_type_names GROUP BY 1;
@@ -35,6 +36,7 @@
WHERE deobfuscated_type_name IS NULL
AND field_name NOT LIKE '%.%.%'
AND field_name NOT LIKE '$Proxy%'
+ AND LENGTH(field_name) > 0
)
SELECT upid, RepeatedField(field_name) AS fields
FROM distinct_unmapped_field_names GROUP BY 1;
diff --git a/src/trace_processor/sqlite/db_sqlite_table.cc b/src/trace_processor/sqlite/db_sqlite_table.cc
index 054f387..83789ea 100644
--- a/src/trace_processor/sqlite/db_sqlite_table.cc
+++ b/src/trace_processor/sqlite/db_sqlite_table.cc
@@ -222,9 +222,12 @@
} else if (sqlite_utils::IsOpEq(c.op)) {
// If there is only a single equality constraint, we have special logic
// to sort by that column and then binary search if we see the constraint
- // set often. Model this by dividing but the log of the number of rows as
+ // set often. Model this by dividing by the log of the number of rows as
// a good approximation. Otherwise, we'll need to do a full table scan.
- filter_cost += cs.size() == 1
+ // Alternatively, if the column is sorted, we can use the same binary
+ // search logic so we have the same low cost (even better because we don't
+ // have to sort at all).
+ filter_cost += cs.size() == 1 || col.IsSorted()
? (2 * current_row_count) / log2(current_row_count)
: current_row_count;
diff --git a/src/trace_processor/sqlite/db_sqlite_table_unittest.cc b/src/trace_processor/sqlite/db_sqlite_table_unittest.cc
index 4f39839..2fbc3e4 100644
--- a/src/trace_processor/sqlite/db_sqlite_table_unittest.cc
+++ b/src/trace_processor/sqlite/db_sqlite_table_unittest.cc
@@ -33,12 +33,18 @@
Column("a", &a_, Column::Flag::kNoFlag, this, 1u, 0u));
columns_.emplace_back(
Column("sorted", &sorted_, Column::Flag::kSorted, this, 2u, 0u));
+ columns_.emplace_back(
+ Column("other", &other_, Column::Flag::kNoFlag, this, 3u, 0u));
+ columns_.emplace_back(
+ Column("other2", &other_, Column::Flag::kNoFlag, this, 4u, 0u));
}
private:
StringPool pool_;
SparseVector<uint32_t> a_;
SparseVector<uint32_t> sorted_;
+ SparseVector<uint32_t> other_;
+ SparseVector<uint32_t> other2_;
};
TEST(DbSqliteTable, IdEqCheaperThanOtherEq) {
@@ -95,6 +101,27 @@
ASSERT_GT(single_cost.rows, multi_cost.rows);
}
+TEST(DbSqliteTable, MultiSortedEqCheaperThanMultiUnsortedEq) {
+ TestTable table(1234);
+
+ QueryConstraints sorted_eq;
+ sorted_eq.AddConstraint(2u, SQLITE_INDEX_CONSTRAINT_EQ, 0u);
+ sorted_eq.AddConstraint(3u, SQLITE_INDEX_CONSTRAINT_EQ, 0u);
+
+ auto sorted_cost = DbSqliteTable::EstimateCost(table, sorted_eq);
+
+ QueryConstraints unsorted_eq;
+ unsorted_eq.AddConstraint(3u, SQLITE_INDEX_CONSTRAINT_EQ, 0u);
+ unsorted_eq.AddConstraint(4u, SQLITE_INDEX_CONSTRAINT_EQ, 0u);
+
+ auto unsorted_cost = DbSqliteTable::EstimateCost(table, unsorted_eq);
+
+ // The number of rows should be the same but the cost of the sorted
+ // query should be less.
+ ASSERT_LT(sorted_cost.cost, unsorted_cost.cost);
+ ASSERT_EQ(sorted_cost.rows, unsorted_cost.rows);
+}
+
TEST(DbSqliteTable, EmptyTableCosting) {
TestTable table(0u);
diff --git a/src/trace_processor/sqlite_experimental_flamegraph_table.cc b/src/trace_processor/sqlite_experimental_flamegraph_table.cc
index 68dd50f..12a9bb6 100644
--- a/src/trace_processor/sqlite_experimental_flamegraph_table.cc
+++ b/src/trace_processor/sqlite_experimental_flamegraph_table.cc
@@ -17,6 +17,7 @@
#include "src/trace_processor/sqlite_experimental_flamegraph_table.h"
+#include "src/trace_processor/heap_profile_tracker.h"
#include "src/trace_processor/importers/proto/heap_graph_tracker.h"
#include "src/trace_processor/trace_processor_context.h"
@@ -154,11 +155,14 @@
// Get the input column values and compute the flamegraph using them.
values_ = GetInputValues(qc, argv);
- // TODO(fmayer): extend this to support native profile as well.
if (values_.profile_type == "graph") {
auto* tracker = HeapGraphTracker::GetOrCreate(context_);
table_ = tracker->BuildFlamegraph(values_.ts, values_.upid);
}
+ if (values_.profile_type == "native") {
+ table_ = BuildNativeFlamegraph(context_->storage.get(),
+ values_.upid, values_.ts);
+ }
// table_ can be nullptr precisely where the constraints passed to us don't
// make sense. Therefore, we can just return this to SQLite.
diff --git a/src/trace_processor/stack_profile_tracker.cc b/src/trace_processor/stack_profile_tracker.cc
index e9185e0..63443f4 100644
--- a/src/trace_processor/stack_profile_tracker.cc
+++ b/src/trace_processor/stack_profile_tracker.cc
@@ -68,18 +68,9 @@
context_->storage->GetString(raw_build_id);
StringId build_id = GetEmptyStringId();
if (raw_build_id_str.size() > 0) {
- // If the build_id is 33 characters long, we assume it's a Breakpad debug
- // identifier which is already in Hex and doesn't need conversion.
- // TODO(b/148109467): Remove workaround once all active Chrome versions
- // write raw bytes instead of a string as build_id.
- if (raw_build_id_str.size() == 33) {
- build_id = raw_build_id;
- } else {
- std::string hex_build_id =
- base::ToHex(raw_build_id_str.c_str(), raw_build_id_str.size());
- build_id =
- context_->storage->InternString(base::StringView(hex_build_id));
- }
+ std::string hex_build_id =
+ base::ToHex(raw_build_id_str.c_str(), raw_build_id_str.size());
+ build_id = context_->storage->InternString(base::StringView(hex_build_id));
}
tables::StackProfileMappingTable::Row row{
diff --git a/src/trace_processor/tables/profiler_tables.h b/src/trace_processor/tables/profiler_tables.h
index 5f529eb..04dae8d 100644
--- a/src/trace_processor/tables/profiler_tables.h
+++ b/src/trace_processor/tables/profiler_tables.h
@@ -100,6 +100,10 @@
C(int64_t, cumulative_count) \
C(int64_t, size) \
C(int64_t, cumulative_size) \
+ C(int64_t, alloc_count) \
+ C(int64_t, cumulative_alloc_count) \
+ C(int64_t, alloc_size) \
+ C(int64_t, cumulative_alloc_size) \
C(base::Optional<uint32_t>, parent_id)
PERFETTO_TP_TABLE(PERFETTO_TP_EXPERIMENTAL_FLAMEGRAPH_NODES);
@@ -124,7 +128,7 @@
#define PERFETTO_TP_HEAP_GRAPH_REFERENCE_DEF(NAME, PARENT, C) \
NAME(HeapGraphReferenceTable, "heap_graph_reference") \
PERFETTO_TP_ROOT_TABLE(PARENT, C) \
- C(int64_t, reference_set_id) \
+ C(int64_t, reference_set_id, Column::Flag::kSorted) \
C(int64_t, owner_id) \
C(int64_t, owned_id) \
C(StringPool::Id, field_name) \
diff --git a/src/trace_processor/trace_storage.h b/src/trace_processor/trace_storage.h
index db6ac60..67c6358 100644
--- a/src/trace_processor/trace_storage.h
+++ b/src/trace_processor/trace_storage.h
@@ -75,6 +75,8 @@
using FrameId = tables::StackProfileFrameTable::Id;
+using SymbolId = tables::SymbolTable::Id;
+
using CallsiteId = tables::StackProfileCallsiteTable::Id;
using MetadataId = tables::MetadataTable::Id;
diff --git a/test/synth_common.py b/test/synth_common.py
index f950294..09bf07e 100644
--- a/test/synth_common.py
+++ b/test/synth_common.py
@@ -19,6 +19,8 @@
from google.protobuf.pyext import _message
CLONE_THREAD = 0x00010000
+CLONE_VFORK = 0x00004000
+CLONE_VM = 0x00000100
class Trace(object):
diff --git a/test/trace_processor/heap_profile_flamegraph.sql b/test/trace_processor/heap_profile_flamegraph.sql
new file mode 100644
index 0000000..fa90b15
--- /dev/null
+++ b/test/trace_processor/heap_profile_flamegraph.sql
@@ -0,0 +1 @@
+select * from experimental_flamegraph(605908369259172, 1, 'native') limit 10;
diff --git a/test/trace_processor/heap_profile_flamegraph_system-server-native-profile.out b/test/trace_processor/heap_profile_flamegraph_system-server-native-profile.out
new file mode 100644
index 0000000..42ab65d
--- /dev/null
+++ b/test/trace_processor/heap_profile_flamegraph_system-server-native-profile.out
@@ -0,0 +1,11 @@
+"id","type","depth","name","map_name","count","cumulative_count","size","cumulative_size","alloc_count","cumulative_alloc_count","alloc_size","cumulative_alloc_size","parent_id"
+0,"experimental_flamegraph_nodes",0,"__start_thread","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,"[NULL]"
+1,"experimental_flamegraph_nodes",1,"_ZL15__pthread_startPv","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,0
+2,"experimental_flamegraph_nodes",2,"_ZN7android14AndroidRuntime15javaThreadShellEPv","/system/lib64/libandroid_runtime.so",0,5,0,27704,0,77,0,348050,1
+3,"experimental_flamegraph_nodes",3,"_ZN7android6Thread11_threadLoopEPv","/system/lib64/libutils.so",0,5,0,27704,0,77,0,348050,2
+4,"experimental_flamegraph_nodes",4,"_ZN7android10PoolThread10threadLoopEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,3
+5,"experimental_flamegraph_nodes",5,"_ZN7android14IPCThreadState14joinThreadPoolEb","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,4
+6,"experimental_flamegraph_nodes",6,"_ZN7android14IPCThreadState20getAndExecuteCommandEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,5
+7,"experimental_flamegraph_nodes",7,"_ZN7android14IPCThreadState14executeCommandEi","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,6
+8,"experimental_flamegraph_nodes",8,"_ZN7android7BBinder8transactEjRKNS_6ParcelEPS1_j","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,7
+9,"experimental_flamegraph_nodes",9,"_ZN11JavaBBinder10onTransactEjRKN7android6ParcelEPS1_j","/system/lib64/libandroid_runtime.so",0,0,0,0,0,60,0,262730,8
diff --git a/test/trace_processor/index b/test/trace_processor/index
index 612d838..afd8064 100644
--- a/test/trace_processor/index
+++ b/test/trace_processor/index
@@ -57,6 +57,8 @@
# Rss stats
rss_stat_mm_id.py rss_stat.sql rss_stat_mm_id.out
+rss_stat_mm_id_clone.py rss_stat.sql rss_stat_mm_id_clone.out
+rss_stat_mm_id_reuse.py rss_stat.sql rss_stat_mm_id_reuse.out
rss_stat_legacy.py rss_stat.sql rss_stat_legacy.out
# Memory counters
@@ -152,6 +154,7 @@
heap_graph_interleaved.textproto heap_graph_object.sql heap_graph_interleaved_object.out
heap_graph_interleaved.textproto heap_graph_reference.sql heap_graph_interleaved_reference.out
../data/system-server-heap-graph.pftrace heap_graph_flamegraph.sql heap_graph_flamegraph_system-server-heap-graph.out
+../data/system-server-native-profile heap_profile_flamegraph.sql heap_profile_flamegraph_system-server-native-profile.out
# TrackEvent tests.
track_event_same_tids.textproto process_tracking.sql track_event_same_tids_threads.out
diff --git a/test/trace_processor/rss_stat_mm_id copy.py b/test/trace_processor/rss_stat_mm_id copy.py
new file mode 100644
index 0000000..d2b5e5a
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id copy.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This synthetic trace tests handling of the mm_id field in the rss_stat
+# event.
+
+from os import sys, path
+
+sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
+import synth_common
+
+trace = synth_common.create_trace()
+
+trace.add_process_tree_packet(ts=1)
+trace.add_process(10, 0, "process")
+
+trace.add_ftrace_packet(0)
+
+# Create a new child process (treated internally as a thread) of kthreadd.
+trace.add_newtask(ts=50, tid=2, new_tid=3, new_comm="kthread_child", flags=0)
+
+# Add an event on tid 3 which affects its own rss.
+trace.add_rss_stat(ts=90, tid=3, member=0, size=9, mm_id=4321, curr=True)
+
+# Try to add an event for tid 10. However, as we've not seen an event
+# with curr == True for tid == 10, this event will be dropped.
+trace.add_rss_stat(ts=91, tid=3, member=0, size=900, mm_id=1234, curr=False)
+
+# Add an event for tid 3 from tid 10. This emulates e.g. direct reclaim
+# where a process reaches into another process' mm struct.
+trace.add_rss_stat(ts=99, tid=10, member=0, size=10, mm_id=4321, curr=False)
+
+# Add an event on tid 10 which affects its own rss.
+trace.add_rss_stat(ts=100, tid=10, member=0, size=1000, mm_id=1234, curr=True)
+
+# Add an event on tid 10 from tid 3. This emlates e.g. background reclaim
+# where kthreadd is cleaning up the mm struct of another process.
+trace.add_rss_stat(ts=101, tid=3, member=0, size=900, mm_id=1234, curr=False)
+
+print(trace.trace.SerializeToString())
diff --git a/test/trace_processor/rss_stat_mm_id_clone.out b/test/trace_processor/rss_stat_mm_id_clone.out
new file mode 100644
index 0000000..0969481
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id_clone.out
@@ -0,0 +1,9 @@
+"ts","name","pid","name","value"
+100,"mem.rss.file",10,"parent_process",100.000000
+100,"mem.rss.file",2,"kthreadd",10.000000
+102,"mem.rss.file",2,"kthreadd",20.000000
+102,"mem.rss.file",11,"child_process",90.000000
+104,"mem.rss.file",11,"child_process",10.000000
+105,"mem.rss.file",10,"parent_process",95.000000
+107,"mem.rss.file",10,"parent_process",105.000000
+108,"mem.rss.file",10,"parent_process",110.000000
diff --git a/test/trace_processor/rss_stat_mm_id_clone.py b/test/trace_processor/rss_stat_mm_id_clone.py
new file mode 100644
index 0000000..308b6b4
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id_clone.py
@@ -0,0 +1,98 @@
+#!/usr/bin/python
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This synthetic trace tests handling of the mm_id field in the rss_stat
+# event during clone events which have various flag combinations set.
+
+from os import sys, path
+
+sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
+import synth_common
+
+trace = synth_common.create_trace()
+
+trace.add_process_tree_packet(ts=1)
+trace.add_process(10, 1, "parent_process")
+trace.add_process(3, 2, "kernel_thread")
+
+# In this packet, check what happens to userspace processes with different
+# clone flags.
+trace.add_ftrace_packet(1)
+
+# Emit an rss stat event for the main thread of the process to associate it
+# with an mm_id.
+trace.add_rss_stat(100, tid=10, member=0, size=100, mm_id=0x1234, curr=1)
+
+# Create a newtask event emulating vfork/posix_spawn (i.e. CLONE_VM and
+# CLONE_VFORK set).
+trace.add_newtask(
+ 101,
+ tid=10,
+ new_tid=11,
+ new_comm="child_process",
+ flags=synth_common.CLONE_VFORK | synth_common.CLONE_VM)
+
+# The child process will now change its own (and parent's) VM space with
+# |curr| set to 1 (emulating cleaning up some memory in parent).
+trace.add_rss_stat(102, tid=11, member=0, size=90, mm_id=0x1234, curr=1)
+
+# At this point, the child process will obtain a new mm struct. From this
+# point on, all mm_ids from the child should be different from the parent.
+
+# The child process will now change its parents VM space with curr set to
+# 0 (emulating e.g. cleaning up its stack).
+trace.add_rss_stat(103, tid=11, member=0, size=85, mm_id=0x1234, curr=0)
+
+# Now the child process should exec another process.
+
+# The child can now change its own memory.
+trace.add_rss_stat(104, tid=11, member=0, size=10, mm_id=0x5678, curr=1)
+
+# The parent can now resume execution and may emit another rss event.
+trace.add_rss_stat(105, tid=10, member=0, size=95, mm_id=0x1234, curr=1)
+
+# The parent can now go ahead and start a new thread.
+trace.add_newtask(
+ 106,
+ tid=10,
+ new_tid=12,
+ new_comm="parent_thread",
+ flags=synth_common.CLONE_VM | synth_common.CLONE_THREAD)
+
+# Since this thread shares mm space with the parent, it should have the
+# same mm id and have curr set to 1.
+trace.add_rss_stat(107, tid=12, member=0, size=105, mm_id=0x1234, curr=1)
+
+# The parent can also emit events with the same mm struct at the same time.
+trace.add_rss_stat(108, tid=10, member=0, size=110, mm_id=0x1234, curr=1)
+
+# In this packet, we check what happens to kernel threads in RSS stat.
+trace.add_ftrace_packet(1)
+
+# Emit an rss stat event for the the existing kernel thread.
+trace.add_rss_stat(100, tid=3, member=0, size=10, mm_id=0x2345, curr=1)
+
+# Start a new kernel thread.
+trace.add_newtask(
+ 101,
+ tid=2,
+ new_tid=4,
+ new_comm="kernel_thread2",
+ flags=synth_common.CLONE_VM)
+
+# Emit a rss stat for the new kernel thread.
+trace.add_rss_stat(102, tid=4, member=0, size=20, mm_id=0x2345, curr=1)
+
+print(trace.trace.SerializeToString())
diff --git a/test/trace_processor/rss_stat_mm_id_reuse.out b/test/trace_processor/rss_stat_mm_id_reuse.out
new file mode 100644
index 0000000..396f5f7
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id_reuse.out
@@ -0,0 +1,3 @@
+"ts","name","pid","name","value"
+100,"mem.rss.file",10,"parent_process",100.000000
+103,"mem.rss.file",10,"new_process",10.000000
diff --git a/test/trace_processor/rss_stat_mm_id_reuse.py b/test/trace_processor/rss_stat_mm_id_reuse.py
new file mode 100644
index 0000000..58d7b4d
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id_reuse.py
@@ -0,0 +1,43 @@
+#!/usr/bin/python
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This synthetic trace tests handling of the mm_id field in the rss_stat
+# event when mm_structs are reused on process death.
+
+from os import sys, path
+
+sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
+import synth_common
+
+trace = synth_common.create_trace()
+
+trace.add_process_tree_packet(ts=1)
+trace.add_process(10, 1, "parent_process")
+
+trace.add_ftrace_packet(1)
+
+# Emit an event for the process.
+trace.add_rss_stat(100, tid=10, member=0, size=100, mm_id=0x1234, curr=1)
+
+# Now kill the process.
+trace.add_process_free(ts=101, tid=10, comm="parent_process", prio=0)
+
+# Create a new thread which reuses the pid and mm struct.
+trace.add_newtask(102, tid=1, new_tid=10, new_comm="new_process", flags=0)
+
+# Emit an event for the new thread.
+trace.add_rss_stat(103, tid=10, member=0, size=10, mm_id=0x1234, curr=1)
+
+print(trace.trace.SerializeToString())
diff --git a/tools/install-build-deps b/tools/install-build-deps
index db5aa59..1ed1fab 100755
--- a/tools/install-build-deps
+++ b/tools/install-build-deps
@@ -146,8 +146,8 @@
# Example traces for regression tests.
(
'buildtools/test_data.zip',
- 'https://storage.googleapis.com/perfetto/test-data-20200121-101157.zip',
- 'ddfe8aa6ceca7be17f222740927dc50aa9f8ed02',
+ 'https://storage.googleapis.com/perfetto/test-data-20200122-100845.zip',
+ '56ac45b5239fda50d33cf05bfd329dcb3efb0b2a',
'all',
),
diff --git a/ui/src/controller/heap_profile_controller.ts b/ui/src/controller/heap_profile_controller.ts
index 973a92f..eadb704 100644
--- a/ui/src/controller/heap_profile_controller.ts
+++ b/ui/src/controller/heap_profile_controller.ts
@@ -177,23 +177,23 @@
let sizeIndex = 4;
switch (viewingOption) {
case SPACE_MEMORY_ALLOCATED_NOT_FREED_KEY:
- orderBy = `where size > 0 order by depth, parent_hash, size desc, name`;
+ orderBy = `where cumulative_size > 0 order by depth, parent_id,
+ cumulative_size desc, name`;
sizeIndex = 4;
break;
case ALLOC_SPACE_MEMORY_ALLOCATED_KEY:
- orderBy =
- `where alloc_size > 0 order by depth, parent_hash, alloc_size desc,
- name`;
+ orderBy = `where cumulative_alloc_size > 0 order by depth, parent_id,
+ cumulative_alloc_size desc, name`;
sizeIndex = 5;
break;
case OBJECTS_ALLOCATED_NOT_FREED_KEY:
- orderBy =
- `where count > 0 order by depth, parent_hash, count desc, name`;
+ orderBy = `where cumulative_count > 0 order by depth, parent_id,
+ cumulative_count desc, name`;
sizeIndex = 6;
break;
case OBJECTS_ALLOCATED_KEY:
- orderBy = `where alloc_count > 0 order by depth, parent_hash,
- alloc_count desc, name`;
+ orderBy = `where cumulative_alloc_count > 0 order by depth, parent_id,
+ cumulative_alloc_count desc, name`;
sizeIndex = 7;
break;
default:
@@ -201,8 +201,9 @@
}
const callsites = await this.args.engine.query(
- `SELECT hash, name, parent_hash, depth, size, alloc_size, count,
- alloc_count, map_name, self_size from ${tableName} ${orderBy}`);
+ `SELECT id, name, parent_id, depth, cumulative_size,
+ cumulative_alloc_size, cumulative_count,
+ cumulative_alloc_count, map_name, size from ${tableName} ${orderBy}`);
const flamegraphData: CallsiteInfo[] = new Array();
const hashToindex: Map<number, number> = new Map();
@@ -230,102 +231,15 @@
Promise<string> {
// Creating unique names for views so we can reuse and not delete them
// for each marker.
- const tableNameCallsiteNameSize =
- this.tableName(`callsite_with_name_and_size`);
- const tableNameCallsiteHashNameSize =
- this.tableName(`callsite_hash_name_size`);
const tableNameGroupedCallsitesForFlamegraph =
this.tableName(`grouped_callsites_for_flamegraph`);
- // Joining the callsite table with frame table then with alloc table to get
- // the size and name for each callsite.
- // TODO(taylori): Make frame name nullable in the trace processor for
- // consistency with the other columns.
- await this.args.engine.query(
- `create view if not exists ${tableNameCallsiteNameSize} as
- select id, parent_id, depth, IFNULL(DEMANGLE(name), name) as name,
- map_name, size, alloc_size, count, alloc_count from (
- select cs.id as id, parent_id, depth,
- coalesce(symbols.name,
- case when fr.name != '' then fr.name else map.name end) as name,
- map.name as map_name,
- SUM(IFNULL(size, 0)) as size,
- SUM(IFNULL(size, 0)) as size,
- SUM(case when size > 0 then size else 0 end) as alloc_size,
- SUM(IFNULL(count, 0)) as count,
- SUM(case when count > 0 then count else 0 end) as alloc_count
- from stack_profile_callsite cs
- join stack_profile_frame fr on cs.frame_id = fr.id
- join stack_profile_mapping map on fr.mapping = map.id
- left join (
- select symbol_set_id, FIRST_VALUE(name) OVER(PARTITION BY
- symbol_set_id) as name
- from stack_profile_symbol GROUP BY symbol_set_id
- ) as symbols using(symbol_set_id)
- left join heap_profile_allocation alloc on alloc.callsite_id = cs.id
- and alloc.ts <= ${ts} and alloc.upid = ${upid} group by cs.id)`);
- // Recursive query to compute the hash for each callsite based on names
- // rather than ids.
- // We get all the children of the row in question and emit a row with hash
- // equal hash(name, parent.hash). Roots without the parent will have -1 as
- // hash. Slices will be merged into a big slice.
- await this.args.engine.query(
- `create view if not exists ${tableNameCallsiteHashNameSize} as
- with recursive callsite_table_names(
- id, hash, name, map_name, size, alloc_size, count, alloc_count,
- parent_hash, depth) AS (
- select id, hash(name) as hash, name, map_name, size, alloc_size, count,
- alloc_count, -1, depth
- from ${tableNameCallsiteNameSize}
- where depth = 0
- union all
- select cs.id, hash(cs.name, ctn.hash) as hash, cs.name, cs.map_name,
- cs.size, cs.alloc_size, cs.count, cs.alloc_count, ctn.hash, cs.depth
- from callsite_table_names ctn
- inner join ${tableNameCallsiteNameSize} cs ON ctn.id = cs.parent_id
- )
- select hash, name, map_name, parent_hash, depth, SUM(size) as size,
- SUM(case when alloc_size > 0 then alloc_size else 0 end)
- as alloc_size, SUM(count) as count,
- SUM(case when alloc_count > 0 then alloc_count else 0 end)
- as alloc_count
- from callsite_table_names
- group by hash`);
-
- // Recursive query to compute the cumulative size of each callsite.
- // Base case: We get all the callsites where the size is non-zero.
- // Recursive case: We get the callsite which is the parent of the current
- // callsite(in terms of hashes) and emit a row with the size of the current
- // callsite plus all the info of the parent.
- // Grouping: For each callsite, our recursive table has n rows where n is
- // the number of descendents with a non-zero self size. We need to group on
- // the hash and sum all the sizes to get the cumulative size for each
- // callsite hash.
await this.args.engine.query(`create temp table if not exists ${
- tableNameGroupedCallsitesForFlamegraph}
- as with recursive callsite_children(
- hash, name, map_name, parent_hash, depth, size, alloc_size, count,
- alloc_count, self_size, self_alloc_size, self_count, self_alloc_count)
- as (
- select hash, name, map_name, parent_hash, depth, size, alloc_size,
- count, alloc_count, size as self_size, alloc_size as self_alloc_size,
- count as self_count, alloc_count as self_alloc_count
- from ${tableNameCallsiteHashNameSize}
- union all
- select chns.hash, chns.name, chns.map_name, chns.parent_hash,
- chns.depth, cc.size, cc.alloc_size, cc.count, cc.alloc_count,
- chns.size, chns.alloc_size, chns.count, chns.alloc_count
- from ${tableNameCallsiteHashNameSize} chns
- inner join callsite_children cc on chns.hash = cc.parent_hash
- )
- select hash, name, map_name, parent_hash, depth, SUM(size) as size,
- SUM(case when alloc_size > 0 then alloc_size else 0 end)
- as alloc_size, SUM(count) as count,
- SUM(case when alloc_count > 0 then alloc_count else 0 end) as
- alloc_count,
- self_size, self_alloc_size, self_count, self_alloc_count
- from callsite_children
- group by hash`);
+ tableNameGroupedCallsitesForFlamegraph} as
+ select id, name, map_name, parent_id, depth, cumulative_size,
+ cumulative_alloc_size, cumulative_count, cumulative_alloc_count,
+ size, alloc_size, count, alloc_count
+ from experimental_flamegraph(${ts}, ${upid}, 'native')`);
return tableNameGroupedCallsitesForFlamegraph;
}
diff --git a/ui/src/controller/trace_controller.ts b/ui/src/controller/trace_controller.ts
index f819244..d7b07b6 100644
--- a/ui/src/controller/trace_controller.ts
+++ b/ui/src/controller/trace_controller.ts
@@ -467,6 +467,30 @@
});
}
+ // Add global slice tracks.
+ const globalSliceTracks = await engine.query(`
+ select
+ track.name as track_name,
+ track.id as track_id,
+ max(depth) as max_depth
+ from track join slice on track.id = slice.track_id
+ where track.type = 'track'
+ `);
+
+ for (let i = 0; i < globalSliceTracks.numRecords; i++) {
+ const name = globalSliceTracks.columns[0].stringValues![i];
+ const trackId = +globalSliceTracks.columns[1].longValues![i];
+ const maxDepth = +globalSliceTracks.columns[2].longValues![i];
+
+ tracksToAdd.push({
+ engineId: this.engineId,
+ kind: SLICE_TRACK_KIND,
+ name: `${name}`,
+ trackGroup: SCROLLING_TRACK_GROUP,
+ config: {maxDepth, trackId},
+ });
+ }
+
interface CounterTrack {
name: string;
trackId: number;
@@ -687,8 +711,6 @@
name: `${threadName} [${tid}]`,
trackGroup: pUuid,
config: {
- upid,
- utid,
maxDepth: threadTrack.maxDepth,
trackId: threadTrack.trackId
},
diff --git a/ui/src/tracks/chrome_slices/common.ts b/ui/src/tracks/chrome_slices/common.ts
index faa40c3..41eb3c2 100644
--- a/ui/src/tracks/chrome_slices/common.ts
+++ b/ui/src/tracks/chrome_slices/common.ts
@@ -18,8 +18,6 @@
export interface Config {
maxDepth: number;
- upid: number;
- utid: number;
trackId: number;
}