| /* | 
 |  * Copyright (C) 2018 The Android Open Source Project | 
 |  * | 
 |  * Licensed under the Apache License, Version 2.0 (the "License"); | 
 |  * you may not use this file except in compliance with the License. | 
 |  * You may obtain a copy of the License at | 
 |  * | 
 |  *      http://www.apache.org/licenses/LICENSE-2.0 | 
 |  * | 
 |  * Unless required by applicable law or agreed to in writing, software | 
 |  * distributed under the License is distributed on an "AS IS" BASIS, | 
 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 |  * See the License for the specific language governing permissions and | 
 |  * limitations under the License. | 
 |  */ | 
 |  | 
 | #ifndef SRC_PROFILING_MEMORY_BOOKKEEPING_H_ | 
 | #define SRC_PROFILING_MEMORY_BOOKKEEPING_H_ | 
 |  | 
 | #include <map> | 
 | #include <vector> | 
 |  | 
 | #include "perfetto/base/time.h" | 
 | #include "src/profiling/common/callstack_trie.h" | 
 | #include "src/profiling/common/interner.h" | 
 | #include "src/profiling/memory/unwound_messages.h" | 
 |  | 
 | // Below is an illustration of the bookkeeping system state where | 
 | // PID 1 does the following allocations: | 
 | // 0x123: 128 bytes at [bar main] | 
 | // 0x234: 128 bytes at [bar main] | 
 | // 0xf00: 512 bytes at [foo main] | 
 | // PID 1 allocated but previously freed 1024 bytes at [bar main] | 
 | // | 
 | // PID 2 does the following allocations: | 
 | // 0x345: 512 bytes at [foo main] | 
 | // 0x456:  32 bytes at [foo main] | 
 | // PID 2 allocated but already freed 1235 bytes at [foo main] | 
 | // PID 2 allocated and freed 2048 bytes in main. | 
 | // | 
 | // +---------------------------------+   +-------------------+ | 
 | // | +---------+    HeapTracker PID 1|   | GlobalCallstackTri| | 
 | // | |0x123 128+---+    +----------+ |   |           +---+   | | 
 | // | |         |   +---->alloc:1280+----------------->bar|   | | 
 | // | |0x234 128+---+    |free: 1024| |   |           +-^-+   | | 
 | // | |         |        +----------+ |   |   +---+     ^     | | 
 | // | |0xf00 512+---+                 | +----->foo|     |     | | 
 | // | +--------+|   |    +----------+ | | |   +-^-+     |     | | 
 | // |               +---->alloc: 512+---+ |     |       |     | | 
 | // |                    |free:    0| | | |     +--+----+     | | 
 | // |                    +----------+ | | |        |          | | 
 | // |                                 | | |      +-+--+       | | 
 | // +---------------------------------+ | |      |main|       | | 
 | //                                     | |      +--+-+       | | 
 | // +---------------------------------+ | |         ^         | | 
 | // | +---------+    HeapTracker PID 2| | +-------------------+ | 
 | // | |0x345 512+---+    +----------+ | |           | | 
 | // | |         |   +---->alloc:1779+---+           | | 
 | // | |0x456  32+---+    |free: 1235| |             | | 
 | // | +---------+        +----------+ |             | | 
 | // |                                 |             | | 
 | // |                    +----------+ |             | | 
 | // |                    |alloc:2048+---------------+ | 
 | // |                    |free: 2048| | | 
 | // |                    +----------+ | | 
 | // |                                 | | 
 | // +---------------------------------+ | 
 | //   Allocation    CallstackAllocations        Node | 
 | // | 
 | // The active allocations are on the leftmost side, modeled as the class | 
 | // HeapTracker::Allocation. | 
 | // | 
 | // The total allocated and freed bytes per callsite are in the middle, modeled | 
 | // as the HeapTracker::CallstackAllocations class. | 
 | // Note that (1280 - 1024) = 256, so alloc - free is equal to the total of the | 
 | // currently active allocations. | 
 | // Note in PID 2 there is a CallstackAllocations with 2048 allocated and 2048 | 
 | // freed bytes. This is not currently referenced by any Allocations (as it | 
 | // should, as 2048 - 2048 = 0, which would mean that the total size of the | 
 | // allocations referencing it should be 0). This is because we haven't dumped | 
 | // this state yet, so the CallstackAllocations will be kept around until the | 
 | // next dump, written to the trace, and then destroyed. | 
 | // | 
 | // On the right hand side is the GlobalCallstackTrie, with nodes representing | 
 | // distinct callstacks. They have no information about the currently allocated | 
 | // or freed bytes, they only contain a reference count to destroy them as | 
 | // soon as they are no longer referenced by a HeapTracker. | 
 |  | 
 | namespace perfetto { | 
 | namespace profiling { | 
 |  | 
 | // Snapshot for memory allocations of a particular process. Shares callsites | 
 | // with other processes. | 
 | class HeapTracker { | 
 |  public: | 
 |   struct CallstackMaxAllocations { | 
 |     uint64_t max; | 
 |     uint64_t cur; | 
 |     uint64_t max_count; | 
 |     uint64_t cur_count; | 
 |   }; | 
 |   struct CallstackTotalAllocations { | 
 |     uint64_t allocated; | 
 |     uint64_t freed; | 
 |     uint64_t allocation_count; | 
 |     uint64_t free_count; | 
 |   }; | 
 |  | 
 |   // Sum of all the allocations for a given callstack. | 
 |   struct CallstackAllocations { | 
 |     explicit CallstackAllocations(GlobalCallstackTrie::Node* n) : node(n) {} | 
 |  | 
 |     uint64_t allocs = 0; | 
 |  | 
 |     union { | 
 |       CallstackMaxAllocations retain_max; | 
 |       CallstackTotalAllocations totals; | 
 |     } value = {}; | 
 |  | 
 |     GlobalCallstackTrie::Node* const node; | 
 |  | 
 |     ~CallstackAllocations() { GlobalCallstackTrie::DecrementNode(node); } | 
 |  | 
 |     bool operator<(const CallstackAllocations& other) const { | 
 |       return node < other.node; | 
 |     } | 
 |   }; | 
 |  | 
 |   // Caller needs to ensure that callsites outlives the HeapTracker. | 
 |   explicit HeapTracker(GlobalCallstackTrie* callsites, bool dump_at_max_mode) | 
 |       : callsites_(callsites), dump_at_max_mode_(dump_at_max_mode) {} | 
 |  | 
 |   void RecordMalloc(const std::vector<unwindstack::FrameData>& callstack, | 
 |                     const std::vector<std::string>& build_ids, | 
 |                     uint64_t address, | 
 |                     uint64_t sample_size, | 
 |                     uint64_t alloc_size, | 
 |                     uint64_t sequence_number, | 
 |                     uint64_t timestamp); | 
 |  | 
 |   template <typename F> | 
 |   void GetCallstackAllocations(F fn) { | 
 |     // There are two reasons we remove the unused callstack allocations on the | 
 |     // next iteration of Dump: | 
 |     // * We need to remove them after the callstacks were dumped, which | 
 |     //   currently happens after the allocations are dumped. | 
 |     // * This way, we do not destroy and recreate callstacks as frequently. | 
 |     for (auto it_and_alloc : dead_callstack_allocations_) { | 
 |       auto& it = it_and_alloc.first; | 
 |       uint64_t allocated = it_and_alloc.second; | 
 |       const CallstackAllocations& alloc = it->second; | 
 |       // For non-dump-at-max, we need to check, even if there are still no | 
 |       // allocations referencing this callstack, whether there were any | 
 |       // allocations that happened but were freed again. If that was the case, | 
 |       // we need to keep the callsite, because the next dump will indicate a | 
 |       // different self_alloc and self_freed. | 
 |       if (alloc.allocs == 0 && | 
 |           (dump_at_max_mode_ || | 
 |            alloc.value.totals.allocation_count == allocated)) { | 
 |         // TODO(fmayer): We could probably be smarter than throw away | 
 |         // our whole frames cache. | 
 |         ClearFrameCache(); | 
 |         callstack_allocations_.erase(it); | 
 |       } | 
 |     } | 
 |     dead_callstack_allocations_.clear(); | 
 |  | 
 |     for (auto it = callstack_allocations_.begin(); | 
 |          it != callstack_allocations_.end(); ++it) { | 
 |       const CallstackAllocations& alloc = it->second; | 
 |       fn(alloc); | 
 |  | 
 |       if (alloc.allocs == 0) | 
 |         dead_callstack_allocations_.emplace_back( | 
 |             it, !dump_at_max_mode_ ? alloc.value.totals.allocation_count : 0); | 
 |     } | 
 |   } | 
 |  | 
 |   template <typename F> | 
 |   void GetAllocations(F fn) { | 
 |     for (const auto& addr_and_allocation : allocations_) { | 
 |       const Allocation& alloc = addr_and_allocation.second; | 
 |       fn(addr_and_allocation.first, alloc.sample_size, alloc.alloc_size, | 
 |          alloc.callstack_allocations()->node->id()); | 
 |     } | 
 |   } | 
 |  | 
 |   void RecordFree(uint64_t address, | 
 |                   uint64_t sequence_number, | 
 |                   uint64_t timestamp) { | 
 |     RecordOperation(sequence_number, {address, timestamp}); | 
 |   } | 
 |  | 
 |   void ClearFrameCache() { frame_cache_.clear(); } | 
 |  | 
 |   uint64_t dump_timestamp() { | 
 |     return dump_at_max_mode_ ? max_timestamp_ : committed_timestamp_; | 
 |   } | 
 |  | 
 |   uint64_t GetSizeForTesting(const std::vector<unwindstack::FrameData>& stack, | 
 |                              std::vector<std::string> build_ids); | 
 |   uint64_t GetMaxForTesting(const std::vector<unwindstack::FrameData>& stack, | 
 |                             std::vector<std::string> build_ids); | 
 |   uint64_t GetMaxCountForTesting( | 
 |       const std::vector<unwindstack::FrameData>& stack, | 
 |       std::vector<std::string> build_ids); | 
 |  | 
 |   uint64_t GetTimestampForTesting() { return committed_timestamp_; } | 
 |  | 
 |  private: | 
 |   struct Allocation { | 
 |     Allocation(uint64_t size, | 
 |                uint64_t asize, | 
 |                uint64_t seq, | 
 |                CallstackAllocations* csa) | 
 |         : sample_size(size), alloc_size(asize), sequence_number(seq) { | 
 |       SetCallstackAllocations(csa); | 
 |     } | 
 |  | 
 |     Allocation() = default; | 
 |     Allocation(const Allocation&) = delete; | 
 |     Allocation(Allocation&& other) noexcept { | 
 |       sample_size = other.sample_size; | 
 |       alloc_size = other.alloc_size; | 
 |       sequence_number = other.sequence_number; | 
 |       callstack_allocations_ = other.callstack_allocations_; | 
 |       other.callstack_allocations_ = nullptr; | 
 |     } | 
 |  | 
 |     ~Allocation() { SetCallstackAllocations(nullptr); } | 
 |  | 
 |     void SetCallstackAllocations(CallstackAllocations* callstack_allocations) { | 
 |       if (callstack_allocations_) | 
 |         callstack_allocations_->allocs--; | 
 |       callstack_allocations_ = callstack_allocations; | 
 |       if (callstack_allocations_) | 
 |         callstack_allocations_->allocs++; | 
 |     } | 
 |  | 
 |     CallstackAllocations* callstack_allocations() const { | 
 |       return callstack_allocations_; | 
 |     } | 
 |  | 
 |     uint64_t sample_size; | 
 |     uint64_t alloc_size; | 
 |     uint64_t sequence_number; | 
 |  | 
 |    private: | 
 |     CallstackAllocations* callstack_allocations_ = nullptr; | 
 |   }; | 
 |  | 
 |   struct PendingOperation { | 
 |     uint64_t allocation_address; | 
 |     uint64_t timestamp; | 
 |   }; | 
 |  | 
 |   CallstackAllocations* MaybeCreateCallstackAllocations( | 
 |       GlobalCallstackTrie::Node* node) { | 
 |     auto callstack_allocations_it = callstack_allocations_.find(node); | 
 |     if (callstack_allocations_it == callstack_allocations_.end()) { | 
 |       GlobalCallstackTrie::IncrementNode(node); | 
 |       bool inserted; | 
 |       std::tie(callstack_allocations_it, inserted) = | 
 |           callstack_allocations_.emplace(node, node); | 
 |       PERFETTO_DCHECK(inserted); | 
 |     } | 
 |     return &callstack_allocations_it->second; | 
 |   } | 
 |  | 
 |   void RecordOperation(uint64_t sequence_number, | 
 |                        const PendingOperation& operation); | 
 |  | 
 |   // Commits a malloc or free operation. | 
 |   // See comment of pending_operations_ for encoding of malloc and free | 
 |   // operations. | 
 |   // | 
 |   // Committing a malloc operation: Add the allocations size to | 
 |   // CallstackAllocation::allocated. | 
 |   // Committing a free operation: Add the allocation's size to | 
 |   // CallstackAllocation::freed and delete the allocation. | 
 |   void CommitOperation(uint64_t sequence_number, | 
 |                        const PendingOperation& operation); | 
 |  | 
 |   void AddToCallstackAllocations(uint64_t ts, const Allocation& alloc) { | 
 |     if (dump_at_max_mode_) { | 
 |       current_unfreed_ += alloc.sample_size; | 
 |       alloc.callstack_allocations()->value.retain_max.cur += alloc.sample_size; | 
 |       alloc.callstack_allocations()->value.retain_max.cur_count++; | 
 |  | 
 |       if (current_unfreed_ <= max_unfreed_) | 
 |         return; | 
 |  | 
 |       if (max_sequence_number_ == alloc.sequence_number - 1) { | 
 |         // We know the only CallstackAllocation that has max != cur is the | 
 |         // one we just updated. | 
 |         alloc.callstack_allocations()->value.retain_max.max = | 
 |             alloc.callstack_allocations()->value.retain_max.cur; | 
 |         alloc.callstack_allocations()->value.retain_max.max_count = | 
 |             alloc.callstack_allocations()->value.retain_max.cur_count; | 
 |       } else { | 
 |         for (auto& p : callstack_allocations_) { | 
 |           // We need to reset max = cur for every CallstackAllocation, as we | 
 |           // do not know which ones have changed since the last max. | 
 |           // TODO(fmayer): Add an index to speed this up | 
 |           CallstackAllocations& csa = p.second; | 
 |           csa.value.retain_max.max = csa.value.retain_max.cur; | 
 |           csa.value.retain_max.max_count = csa.value.retain_max.cur_count; | 
 |         } | 
 |       } | 
 |       max_sequence_number_ = alloc.sequence_number; | 
 |       max_unfreed_ = current_unfreed_; | 
 |       max_timestamp_ = ts; | 
 |     } else { | 
 |       alloc.callstack_allocations()->value.totals.allocated += | 
 |           alloc.sample_size; | 
 |       alloc.callstack_allocations()->value.totals.allocation_count++; | 
 |     } | 
 |   } | 
 |  | 
 |   void SubtractFromCallstackAllocations(const Allocation& alloc) { | 
 |     if (dump_at_max_mode_) { | 
 |       current_unfreed_ -= alloc.sample_size; | 
 |       alloc.callstack_allocations()->value.retain_max.cur -= alloc.sample_size; | 
 |       alloc.callstack_allocations()->value.retain_max.cur_count--; | 
 |     } else { | 
 |       alloc.callstack_allocations()->value.totals.freed += alloc.sample_size; | 
 |       alloc.callstack_allocations()->value.totals.free_count++; | 
 |     } | 
 |   } | 
 |  | 
 |   // We cannot use an interner here, because after the last allocation goes | 
 |   // away, we still need to keep the CallstackAllocations around until the next | 
 |   // dump. | 
 |   std::map<GlobalCallstackTrie::Node*, CallstackAllocations> | 
 |       callstack_allocations_; | 
 |  | 
 |   std::vector<std::pair<decltype(callstack_allocations_)::iterator, uint64_t>> | 
 |       dead_callstack_allocations_; | 
 |  | 
 |   std::map<uint64_t /* allocation address */, Allocation> allocations_; | 
 |  | 
 |   // An operation is either a commit of an allocation or freeing of an | 
 |   // allocation. An operation is a free if its seq_id is larger than | 
 |   // the sequence_number of the corresponding allocation. It is a commit if its | 
 |   // seq_id is equal to the sequence_number of the corresponding allocation. | 
 |   // | 
 |   // If its seq_id is less than the sequence_number of the corresponding | 
 |   // allocation it could be either, but is ignored either way. | 
 |   std::map<uint64_t /* seq_id */, PendingOperation /* allocation address */> | 
 |       pending_operations_; | 
 |  | 
 |   uint64_t committed_timestamp_ = 0; | 
 |   // The sequence number all mallocs and frees have been handled up to. | 
 |   uint64_t committed_sequence_number_ = 0; | 
 |   GlobalCallstackTrie* callsites_; | 
 |  | 
 |   const bool dump_at_max_mode_ = false; | 
 |   // The following members are only used if dump_at_max_mode_ == true. | 
 |   uint64_t max_sequence_number_ = 0; | 
 |   uint64_t current_unfreed_ = 0; | 
 |   uint64_t max_unfreed_ = 0; | 
 |   uint64_t max_timestamp_ = 0; | 
 |  | 
 |   // We index by abspc, which is unique as long as the maps do not change. | 
 |   // This is why we ClearFrameCache after we reparsed maps. | 
 |   std::unordered_map<uint64_t /* abs pc */, Interned<Frame>> frame_cache_; | 
 | }; | 
 |  | 
 | }  // namespace profiling | 
 | }  // namespace perfetto | 
 |  | 
 | #endif  // SRC_PROFILING_MEMORY_BOOKKEEPING_H_ |