|  | /* | 
|  | * Copyright (C) 2020 The Android Open Source Project | 
|  | * | 
|  | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | * you may not use this file except in compliance with the License. | 
|  | * You may obtain a copy of the License at | 
|  | * | 
|  | *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | * | 
|  | * Unless required by applicable law or agreed to in writing, software | 
|  | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | * See the License for the specific language governing permissions and | 
|  | * limitations under the License. | 
|  | */ | 
|  |  | 
|  | #include "tools/trace_to_text/trace_to_hprof.h" | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <limits> | 
|  | #include <string> | 
|  | #include <unordered_map> | 
|  | #include <unordered_set> | 
|  | #include <vector> | 
|  |  | 
|  | #include "perfetto/base/logging.h" | 
|  | #include "perfetto/ext/base/endian.h" | 
|  | #include "perfetto/ext/base/optional.h" | 
|  | #include "perfetto/ext/base/string_utils.h" | 
|  | #include "tools/trace_to_text/utils.h" | 
|  |  | 
|  | // Spec | 
|  | // http://hg.openjdk.java.net/jdk6/jdk6/jdk/raw-file/tip/src/share/demo/jvmti/hprof/manual.html#Basic_Type | 
|  | // Parser | 
|  | // https://cs.android.com/android/platform/superproject/+/master:art/tools/ahat/src/main/com/android/ahat/heapdump/Parser.java | 
|  |  | 
|  | namespace perfetto { | 
|  | namespace trace_to_text { | 
|  |  | 
|  | namespace { | 
|  | constexpr char kHeader[] = "PERFETTO_JAVA_HEAP"; | 
|  | constexpr uint32_t kIdSz = 8; | 
|  | constexpr uint32_t kStackTraceSerialNumber = 1; | 
|  |  | 
|  | class BigEndianBuffer { | 
|  | public: | 
|  | void WriteId(uint64_t val) { WriteU8(val); } | 
|  |  | 
|  | void WriteU8(uint64_t val) { | 
|  | val = base::HostToBE64(val); | 
|  | Write(reinterpret_cast<char*>(&val), sizeof(uint64_t)); | 
|  | } | 
|  |  | 
|  | void WriteU4(uint32_t val) { | 
|  | val = base::HostToBE32(val); | 
|  | Write(reinterpret_cast<char*>(&val), sizeof(uint32_t)); | 
|  | } | 
|  |  | 
|  | void SetU4(uint32_t val, size_t pos) { | 
|  | val = base::HostToBE32(val); | 
|  | PERFETTO_CHECK(pos + 4 <= buf_.size()); | 
|  | memcpy(buf_.data() + pos, &val, sizeof(uint32_t)); | 
|  | } | 
|  |  | 
|  | // Uncomment when needed | 
|  | // void WriteU2(uint16_t val) { | 
|  | //   val = base::HostToBE16(val); | 
|  | //   Write(reinterpret_cast<char*>(&val), sizeof(uint16_t)); | 
|  | // } | 
|  |  | 
|  | void WriteByte(uint8_t val) { buf_.emplace_back(val); } | 
|  |  | 
|  | void Write(const char* val, uint32_t sz) { | 
|  | const char* end = val + sz; | 
|  | while (val < end) { | 
|  | WriteByte(static_cast<uint8_t>(*val)); | 
|  | val++; | 
|  | } | 
|  | } | 
|  |  | 
|  | size_t written() const { return buf_.size(); } | 
|  |  | 
|  | void Flush(std::ostream* out) const { | 
|  | out->write(buf_.data(), static_cast<std::streamsize>(buf_.size())); | 
|  | } | 
|  |  | 
|  | private: | 
|  | std::vector<char> buf_; | 
|  | }; | 
|  |  | 
|  | class HprofWriter { | 
|  | public: | 
|  | HprofWriter(std::ostream* output) : output_(output) {} | 
|  |  | 
|  | void WriteBuffer(const BigEndianBuffer& buf) { buf.Flush(output_); } | 
|  |  | 
|  | void WriteRecord(const uint8_t type, | 
|  | const std::function<void(BigEndianBuffer*)>&& writer) { | 
|  | BigEndianBuffer buf; | 
|  | buf.WriteByte(type); | 
|  | // ts offset | 
|  | buf.WriteU4(0); | 
|  | // size placeholder | 
|  | buf.WriteU4(0); | 
|  | writer(&buf); | 
|  | uint32_t record_sz = static_cast<uint32_t>(buf.written() - 9); | 
|  | buf.SetU4(record_sz, 5); | 
|  | WriteBuffer(buf); | 
|  | } | 
|  |  | 
|  | private: | 
|  | std::ostream* output_; | 
|  | }; | 
|  |  | 
|  | // A Class from the heap dump. | 
|  | class ClassData { | 
|  | public: | 
|  | explicit ClassData(uint64_t class_name_string_id) | 
|  | : class_name_string_id_(class_name_string_id) {} | 
|  |  | 
|  | // Writes a HPROF LOAD_CLASS record for this Class | 
|  | void WriteHprofLoadClass(HprofWriter* writer, | 
|  | uint64_t class_object_id, | 
|  | uint32_t class_serial_number) const { | 
|  | writer->WriteRecord(0x02, [class_object_id, class_serial_number, | 
|  | this](BigEndianBuffer* buf) { | 
|  | buf->WriteU4(class_serial_number); | 
|  | buf->WriteId(class_object_id); | 
|  | buf->WriteU4(kStackTraceSerialNumber); | 
|  | buf->WriteId(class_name_string_id_); | 
|  | }); | 
|  | } | 
|  |  | 
|  | private: | 
|  | uint64_t class_name_string_id_; | 
|  | }; | 
|  |  | 
|  | // Ingested data from a Java Heap Profile for a name, location pair. | 
|  | // We need to support multiple class datas per pair as name, location is | 
|  | // not unique. Classloader should guarantee uniqueness but is not available | 
|  | // until S. | 
|  | class RawClassData { | 
|  | public: | 
|  | void AddClass(uint64_t id, base::Optional<uint64_t> superclass_id) { | 
|  | ids_.push_back(std::make_pair(id, superclass_id)); | 
|  | } | 
|  |  | 
|  | void AddTemplate(uint64_t template_id) { | 
|  | template_ids_.push_back(template_id); | 
|  | } | 
|  |  | 
|  | // Transforms the raw data into one or more ClassData and adds them to the | 
|  | // parameter map. | 
|  | void ToClassData(std::unordered_map<uint64_t, ClassData>* id_to_class, | 
|  | uint64_t class_name_string_id) const { | 
|  | // TODO(dinoderek) assert the two vectors have same length, iterate on both | 
|  | for (auto it_ids = ids_.begin(); it_ids != ids_.end(); ++it_ids) { | 
|  | // TODO(dinoderek) more data will be needed to write CLASS_DUMP | 
|  | id_to_class->emplace(it_ids->first, ClassData(class_name_string_id)); | 
|  | } | 
|  | } | 
|  |  | 
|  | private: | 
|  | // Pair contains class ID and super class ID. | 
|  | std::vector<std::pair<uint64_t, base::Optional<uint64_t>>> ids_; | 
|  | // Class id of the template | 
|  | std::vector<uint64_t> template_ids_; | 
|  | }; | 
|  |  | 
|  | // The Heap Dump data | 
|  | class HeapDump { | 
|  | public: | 
|  | explicit HeapDump(trace_processor::TraceProcessor* tp) : tp_(tp) {} | 
|  |  | 
|  | void Ingest() { IngestClasses(); } | 
|  |  | 
|  | void Write(HprofWriter* writer) { | 
|  | WriteStrings(writer); | 
|  | WriteLoadClass(writer); | 
|  | } | 
|  |  | 
|  | private: | 
|  | trace_processor::TraceProcessor* tp_; | 
|  |  | 
|  | // String IDs start from 1 as 0 appears to be reserved. | 
|  | uint64_t next_string_id_ = 1; | 
|  | // Strings to corresponding String ID | 
|  | std::unordered_map<std::string, uint64_t> string_to_id_; | 
|  | // Type ID to corresponding Class | 
|  | std::unordered_map<uint64_t, ClassData> id_to_class_; | 
|  |  | 
|  | // Ingests and processes the class data from the heap dump. | 
|  | void IngestClasses() { | 
|  | // TODO(dinoderek): heap_graph_class does not support pid or ts filtering | 
|  |  | 
|  | std::map<std::pair<uint64_t, std::string>, RawClassData> raw_classes; | 
|  |  | 
|  | auto it = tp_->ExecuteQuery(R"(SELECT | 
|  | id, | 
|  | IFNULL(deobfuscated_name, name), | 
|  | superclass_id, | 
|  | location | 
|  | FROM heap_graph_class )"); | 
|  |  | 
|  | while (it.Next()) { | 
|  | uint64_t id = static_cast<uint64_t>(it.Get(0).AsLong()); | 
|  |  | 
|  | std::string raw_dname(it.Get(1).AsString()); | 
|  | std::string dname; | 
|  | bool is_template_class = | 
|  | base::StartsWith(raw_dname, std::string("java.lang.Class<")); | 
|  | if (is_template_class) { | 
|  | dname = raw_dname.substr(17, raw_dname.size() - 18); | 
|  | } else { | 
|  | dname = raw_dname; | 
|  | } | 
|  | uint64_t name_id = IngestString(dname); | 
|  |  | 
|  | auto raw_super_id = it.Get(2); | 
|  | base::Optional<uint64_t> maybe_super_id = | 
|  | raw_super_id.is_null() | 
|  | ? base::nullopt | 
|  | : base::Optional<uint64_t>( | 
|  | static_cast<uint64_t>(raw_super_id.AsLong())); | 
|  |  | 
|  | std::string location(it.Get(3).AsString()); | 
|  |  | 
|  | auto raw_classes_it = | 
|  | raw_classes.emplace(std::make_pair(name_id, location), RawClassData()) | 
|  | .first; | 
|  | if (is_template_class) { | 
|  | raw_classes_it->second.AddTemplate(id); | 
|  | } else { | 
|  | raw_classes_it->second.AddClass(id, maybe_super_id); | 
|  | } | 
|  | } | 
|  |  | 
|  | for (const auto& raw : raw_classes) { | 
|  | auto class_name_string_id = raw.first.first; | 
|  | raw.second.ToClassData(&id_to_class_, class_name_string_id); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Ingests the parameter string and returns the HPROF ID for the string. | 
|  | uint64_t IngestString(const std::string& s) { | 
|  | auto maybe_id = string_to_id_.find(s); | 
|  | if (maybe_id != string_to_id_.end()) { | 
|  | return maybe_id->second; | 
|  | } else { | 
|  | auto id = next_string_id_; | 
|  | next_string_id_ += 1; | 
|  | string_to_id_[s] = id; | 
|  | return id; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Writes STRING sections to the output | 
|  | void WriteStrings(HprofWriter* writer) { | 
|  | for (const auto& it : string_to_id_) { | 
|  | writer->WriteRecord(0x01, [it](BigEndianBuffer* buf) { | 
|  | buf->WriteId(it.second); | 
|  | // TODO(dinoderek): UTF-8 encoding | 
|  | buf->Write(it.first.c_str(), static_cast<uint32_t>(it.first.length())); | 
|  | }); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Writes LOAD CLASS sections to the output | 
|  | void WriteLoadClass(HprofWriter* writer) { | 
|  | uint32_t class_serial_number = 1; | 
|  | for (const auto& it : id_to_class_) { | 
|  | it.second.WriteHprofLoadClass(writer, it.first, class_serial_number); | 
|  | class_serial_number += 1; | 
|  | } | 
|  | } | 
|  | }; | 
|  |  | 
|  | void WriteHeaderAndStack(HprofWriter* writer) { | 
|  | BigEndianBuffer header; | 
|  | header.Write(kHeader, sizeof(kHeader)); | 
|  | // Identifier size | 
|  | header.WriteU4(kIdSz); | 
|  | // walltime high (unused) | 
|  | header.WriteU4(0); | 
|  | // walltime low (unused) | 
|  | header.WriteU4(0); | 
|  | writer->WriteBuffer(header); | 
|  |  | 
|  | // Add placeholder stack trace (required by the format). | 
|  | writer->WriteRecord(0x05, [](BigEndianBuffer* buf) { | 
|  | buf->WriteU4(kStackTraceSerialNumber); | 
|  | buf->WriteU4(0); | 
|  | buf->WriteU4(0); | 
|  | }); | 
|  | } | 
|  | }  // namespace | 
|  |  | 
|  | int TraceToHprof(trace_processor::TraceProcessor* tp, | 
|  | std::ostream* output, | 
|  | uint64_t pid, | 
|  | uint64_t ts) { | 
|  | PERFETTO_DCHECK(tp != nullptr && pid != 0 && ts != 0); | 
|  |  | 
|  | HprofWriter writer(output); | 
|  | HeapDump dump(tp); | 
|  |  | 
|  | dump.Ingest(); | 
|  | WriteHeaderAndStack(&writer); | 
|  | dump.Write(&writer); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int TraceToHprof(std::istream* input, | 
|  | std::ostream* output, | 
|  | uint64_t pid, | 
|  | std::vector<uint64_t> timestamps) { | 
|  | // TODO: Simplify this for cmdline users. For example, if there is a single | 
|  | // heap graph, use this, and only fail when there is ambiguity. | 
|  | if (pid == 0) { | 
|  | PERFETTO_ELOG("Must specify pid"); | 
|  | return -1; | 
|  | } | 
|  | if (timestamps.size() != 1) { | 
|  | PERFETTO_ELOG("Must specify single timestamp"); | 
|  | return -1; | 
|  | } | 
|  | trace_processor::Config config; | 
|  | std::unique_ptr<trace_processor::TraceProcessor> tp = | 
|  | trace_processor::TraceProcessor::CreateInstance(config); | 
|  | if (!ReadTrace(tp.get(), input)) | 
|  | return false; | 
|  | tp->NotifyEndOfFile(); | 
|  | return TraceToHprof(tp.get(), output, pid, timestamps[0]); | 
|  | } | 
|  |  | 
|  | }  // namespace trace_to_text | 
|  | }  // namespace perfetto |