| /* |
| * Copyright (C) 2022 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "src/trace_processor/util/zip_reader.h" |
| |
| #include <cstdint> |
| #include <cstring> |
| #include <ctime> |
| #include <limits> |
| #include <optional> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "perfetto/base/build_config.h" |
| #include "perfetto/base/logging.h" |
| #include "perfetto/base/status.h" |
| #include "perfetto/base/time.h" |
| #include "perfetto/ext/base/status_or.h" |
| #include "perfetto/ext/base/string_view.h" |
| #include "perfetto/ext/base/utils.h" |
| #include "perfetto/trace_processor/trace_blob_view.h" |
| #include "src/trace_processor/util/gzip_utils.h" |
| #include "src/trace_processor/util/status_macros.h" |
| #include "src/trace_processor/util/streaming_line_reader.h" |
| |
| #if PERFETTO_BUILDFLAG(PERFETTO_ZLIB) |
| #include <zconf.h> |
| #include <zlib.h> |
| #endif |
| |
| namespace perfetto::trace_processor::util { |
| |
| namespace { |
| |
| // Entry signatures. |
| constexpr uint32_t kFileHeaderSig = 0x04034b50; |
| constexpr uint32_t kCentralDirectorySig = 0x02014b50; |
| constexpr uint32_t kDataDescriptorSig = 0x08074b50; |
| |
| // 4 bytes each of: 1) signature, 2) crc, 3) compressed size 4) uncompressed |
| // size. |
| constexpr uint32_t kDataDescriptorSize = 4 * 4; |
| |
| enum GeneralPurposeBitFlag : uint32_t { |
| kEncrypted = 1 << 0, |
| k8kSlidingDictionary = 1u << 1, |
| kShannonFaro = 1u << 2, |
| kDataDescriptor = 1u << 3, |
| kLangageEncoding = 1u << 11, |
| kUnknown = ~(kEncrypted | k8kSlidingDictionary | kShannonFaro | |
| kDataDescriptor | kLangageEncoding), |
| }; |
| |
| // Compression flags. |
| const uint16_t kNoCompression = 0; |
| const uint16_t kDeflate = 8; |
| |
| template <typename T> |
| T ReadAndAdvance(const uint8_t** ptr) { |
| T res{}; |
| memcpy(base::AssumeLittleEndian(&res), *ptr, sizeof(T)); |
| *ptr += sizeof(T); |
| return res; |
| } |
| |
| } // namespace |
| |
| ZipReader::ZipReader() = default; |
| ZipReader::~ZipReader() = default; |
| |
| base::Status ZipReader::Parse(TraceBlobView tbv) { |
| reader_.PushBack(std::move(tbv)); |
| |
| // .zip file sequence: |
| // [ File 1 header (30 bytes) ] |
| // [ File 1 name ] |
| // [ File 1 extra fields (optional) ] |
| // [ File 1 compressed payload ] |
| // [ File 1 data descriptor (optional) ] |
| // |
| // [ File 2 header (30 bytes) ] |
| // [ File 2 name ] |
| // [ File 2 extra fields (optional) ] |
| // [ File 2 compressed payload ] |
| // [ File 2 data descriptor (optional) ] |
| // |
| // [ Central directory (ignored) ] |
| |
| for (;;) { |
| auto state = cur_.parse_state; |
| switch (state) { |
| case FileParseState::kHeader: |
| RETURN_IF_ERROR(TryParseHeader()); |
| break; |
| case FileParseState::kFilename: |
| RETURN_IF_ERROR(TryParseFilename()); |
| break; |
| case FileParseState::kSkipBytes: |
| RETURN_IF_ERROR(TrySkipBytes()); |
| break; |
| case FileParseState::kCompressedData: |
| RETURN_IF_ERROR(TryParseCompressedData()); |
| break; |
| } |
| if (state == cur_.parse_state) { |
| return base::OkStatus(); |
| } |
| } |
| } |
| |
| base::Status ZipReader::TryParseHeader() { |
| PERFETTO_CHECK(cur_.hdr.signature == 0); |
| |
| std::optional<TraceBlobView> hdr = |
| reader_.SliceOff(reader_.start_offset(), kZipFileHdrSize); |
| if (!hdr) { |
| return base::OkStatus(); |
| } |
| PERFETTO_CHECK(reader_.PopFrontBytes(kZipFileHdrSize)); |
| |
| const uint8_t* hdr_it = hdr->data(); |
| cur_.hdr.signature = ReadAndAdvance<uint32_t>(&hdr_it); |
| if (cur_.hdr.signature == kCentralDirectorySig) { |
| // We reached the central directory at the end of file. |
| // We don't make any use here of the central directory, so we just |
| // ignore everything else after this point. |
| // Here we abuse the ZipFile class a bit. The Central Directory header |
| // has a different layout. The first 4 bytes (signature) match, the |
| // rest don't but the sizeof(central dir) is >> sizeof(file header) so |
| // we are fine. |
| // We do this rather than retuning because we could have further |
| // Parse() calls (imagine parsing bytes one by one), and we need a way |
| // to keep track of the "keep eating input without doing anything". |
| cur_.ignore_bytes_after_fname = std::numeric_limits<size_t>::max(); |
| cur_.parse_state = FileParseState::kSkipBytes; |
| return base::OkStatus(); |
| } |
| if (cur_.hdr.signature != kFileHeaderSig) { |
| return base::ErrStatus( |
| "Invalid signature found at offset 0x%zx. Actual=0x%x, " |
| "expected=0x%x", |
| reader_.start_offset(), cur_.hdr.signature, kFileHeaderSig); |
| } |
| |
| cur_.hdr.version = ReadAndAdvance<uint16_t>(&hdr_it); |
| cur_.hdr.flags = ReadAndAdvance<uint16_t>(&hdr_it); |
| cur_.hdr.compression = ReadAndAdvance<uint16_t>(&hdr_it); |
| cur_.hdr.mtime = ReadAndAdvance<uint16_t>(&hdr_it); |
| cur_.hdr.mdate = ReadAndAdvance<uint16_t>(&hdr_it); |
| cur_.hdr.checksum = ReadAndAdvance<uint32_t>(&hdr_it); |
| cur_.hdr.compressed_size = ReadAndAdvance<uint32_t>(&hdr_it); |
| cur_.hdr.uncompressed_size = ReadAndAdvance<uint32_t>(&hdr_it); |
| cur_.hdr.fname_len = ReadAndAdvance<uint16_t>(&hdr_it); |
| cur_.hdr.extra_field_len = ReadAndAdvance<uint16_t>(&hdr_it); |
| PERFETTO_DCHECK(static_cast<size_t>(hdr_it - hdr->data()) == kZipFileHdrSize); |
| |
| // We support only up to version 2.0 (20). Higher versions define |
| // more advanced features that we don't support (zip64 extensions, |
| // encryption). |
| // Disallow encryption or any flags we don't know how to handle. |
| if ((cur_.hdr.version > 20) || (cur_.hdr.flags & kEncrypted) || |
| (cur_.hdr.flags & kUnknown)) { |
| return base::ErrStatus( |
| "Unsupported zip features at offset 0x%zx. version=%x, flags=%x", |
| reader_.start_offset(), cur_.hdr.version, cur_.hdr.flags); |
| } |
| if (cur_.hdr.compression != kNoCompression && |
| cur_.hdr.compression != kDeflate) { |
| return base::ErrStatus( |
| "Unsupported compression type at offset 0x%zx. type=%x. Only " |
| "deflate and no compression are supported.", |
| reader_.start_offset(), cur_.hdr.compression); |
| } |
| if (cur_.hdr.flags & kDataDescriptor && cur_.hdr.compression != kDeflate) { |
| return base::ErrStatus( |
| "Unsupported compression type at offset 0x%zx. type=%x. Only " |
| "deflate supported for ZIPs compressed in a streaming fashion.", |
| reader_.start_offset(), cur_.hdr.compression); |
| } |
| cur_.ignore_bytes_after_fname = cur_.hdr.extra_field_len; |
| cur_.parse_state = FileParseState::kFilename; |
| return base::OkStatus(); |
| } |
| |
| base::Status ZipReader::TryParseFilename() { |
| if (cur_.hdr.fname_len == 0) { |
| cur_.parse_state = FileParseState::kSkipBytes; |
| return base::OkStatus(); |
| } |
| PERFETTO_CHECK(cur_.hdr.fname.empty()); |
| |
| std::optional<TraceBlobView> fname_tbv = |
| reader_.SliceOff(reader_.start_offset(), cur_.hdr.fname_len); |
| if (!fname_tbv) { |
| return base::OkStatus(); |
| } |
| PERFETTO_CHECK(reader_.PopFrontBytes(cur_.hdr.fname_len)); |
| cur_.hdr.fname = std::string(reinterpret_cast<const char*>(fname_tbv->data()), |
| cur_.hdr.fname_len); |
| cur_.parse_state = FileParseState::kSkipBytes; |
| return base::OkStatus(); |
| } |
| |
| base::Status ZipReader::TrySkipBytes() { |
| if (cur_.ignore_bytes_after_fname == 0) { |
| cur_.parse_state = FileParseState::kCompressedData; |
| return base::OkStatus(); |
| } |
| |
| size_t avail = reader_.avail(); |
| if (avail < cur_.ignore_bytes_after_fname) { |
| PERFETTO_CHECK(reader_.PopFrontBytes(avail)); |
| cur_.ignore_bytes_after_fname -= avail; |
| return base::OkStatus(); |
| } |
| PERFETTO_CHECK(reader_.PopFrontBytes(cur_.ignore_bytes_after_fname)); |
| cur_.ignore_bytes_after_fname = 0; |
| cur_.parse_state = FileParseState::kCompressedData; |
| return base::OkStatus(); |
| } |
| |
| base::Status ZipReader::TryParseCompressedData() { |
| // Build up the compressed payload |
| if (cur_.hdr.flags & kDataDescriptor) { |
| if (!cur_.compressed) { |
| ASSIGN_OR_RETURN(auto compressed, TryParseUnsizedCompressedData()); |
| if (!compressed) { |
| return base::OkStatus(); |
| } |
| cur_.compressed = std::move(compressed); |
| } |
| |
| std::optional<TraceBlobView> data_descriptor = |
| reader_.SliceOff(reader_.start_offset(), kDataDescriptorSize); |
| if (!data_descriptor) { |
| return base::OkStatus(); |
| } |
| PERFETTO_CHECK(reader_.PopFrontBytes(kDataDescriptorSize)); |
| |
| const auto* desc_it = data_descriptor->data(); |
| auto desc_sig = ReadAndAdvance<uint32_t>(&desc_it); |
| if (desc_sig != kDataDescriptorSig) { |
| return base::ErrStatus( |
| "Invalid signature found at offset 0x%zx. Actual=0x%x, " |
| "expected=0x%x", |
| reader_.start_offset(), desc_sig, kDataDescriptorSig); |
| } |
| cur_.hdr.checksum = ReadAndAdvance<uint32_t>(&desc_it); |
| cur_.hdr.compressed_size = ReadAndAdvance<uint32_t>(&desc_it); |
| cur_.hdr.uncompressed_size = ReadAndAdvance<uint32_t>(&desc_it); |
| } else { |
| PERFETTO_CHECK(!cur_.compressed); |
| std::optional<TraceBlobView> raw_compressed = |
| reader_.SliceOff(reader_.start_offset(), cur_.hdr.compressed_size); |
| if (!raw_compressed) { |
| return base::OkStatus(); |
| } |
| cur_.compressed = *std::move(raw_compressed); |
| PERFETTO_CHECK(reader_.PopFrontBytes(cur_.hdr.compressed_size)); |
| } |
| |
| // We have accumulated the whole header, file name and compressed payload. |
| PERFETTO_CHECK(cur_.compressed); |
| PERFETTO_CHECK(cur_.hdr.fname.size() == cur_.hdr.fname_len); |
| PERFETTO_CHECK(cur_.compressed->size() == cur_.hdr.compressed_size); |
| PERFETTO_CHECK(cur_.ignore_bytes_after_fname == 0); |
| |
| files_.emplace_back(); |
| files_.back().hdr_ = std::move(cur_.hdr); |
| files_.back().compressed_data_ = *std::move(cur_.compressed); |
| cur_ = FileParseState(); // Reset the parsing state for the next file. |
| return base::OkStatus(); |
| } // namespace perfetto::trace_processor::util |
| |
| base::StatusOr<std::optional<TraceBlobView>> |
| ZipReader::TryParseUnsizedCompressedData() { |
| PERFETTO_CHECK(cur_.hdr.compression == kDeflate); |
| |
| auto start = reader_.start_offset() + cur_.decompressor_bytes_fed; |
| auto end = reader_.end_offset(); |
| auto slice = reader_.SliceOff(start, end - start); |
| PERFETTO_CHECK(slice); |
| auto res_code = cur_.decompressor.FeedAndExtract(slice->data(), slice->size(), |
| [](const uint8_t*, size_t) { |
| // Intentionally do |
| // nothing: we are only |
| // looking for the bounds |
| // of the deflate stream, |
| // we are not actually |
| // interested in the |
| // output. |
| }); |
| switch (res_code) { |
| case GzipDecompressor::ResultCode::kNeedsMoreInput: |
| cur_.decompressor_bytes_fed += slice->size(); |
| return {std::nullopt}; |
| case GzipDecompressor::ResultCode::kError: |
| return base::ErrStatus( |
| "Failed decompressing stream in ZIP file at offset 0x%zx", |
| reader_.start_offset()); |
| case GzipDecompressor::ResultCode::kOk: |
| PERFETTO_FATAL("Unexpected result code"); |
| case GzipDecompressor::ResultCode::kEof: |
| break; |
| } |
| cur_.decompressor_bytes_fed += slice->size() - cur_.decompressor.AvailIn(); |
| auto raw_compressed = |
| reader_.SliceOff(reader_.start_offset(), cur_.decompressor_bytes_fed); |
| PERFETTO_CHECK(raw_compressed); |
| PERFETTO_CHECK(reader_.PopFrontBytes(cur_.decompressor_bytes_fed)); |
| return {std::move(raw_compressed)}; |
| } |
| |
| ZipFile* ZipReader::Find(const std::string& path) { |
| for (ZipFile& zf : files_) { |
| if (zf.name() == path) |
| return &zf; |
| } |
| return nullptr; |
| } |
| |
| ZipFile::ZipFile() = default; |
| ZipFile::~ZipFile() = default; |
| ZipFile::ZipFile(ZipFile&& other) noexcept = default; |
| ZipFile& ZipFile::operator=(ZipFile&& other) noexcept = default; |
| |
| base::Status ZipFile::Decompress(std::vector<uint8_t>* out_data) const { |
| out_data->clear(); |
| RETURN_IF_ERROR(DoDecompressionChecks()); |
| |
| if (hdr_.compression == kNoCompression) { |
| const uint8_t* data = compressed_data_.data(); |
| out_data->insert(out_data->end(), data, data + hdr_.compressed_size); |
| return base::OkStatus(); |
| } |
| |
| if (hdr_.uncompressed_size == 0) { |
| return base::OkStatus(); |
| } |
| |
| PERFETTO_DCHECK(hdr_.compression == kDeflate); |
| GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate); |
| dec.Feed(compressed_data_.data(), hdr_.compressed_size); |
| |
| out_data->resize(hdr_.uncompressed_size); |
| auto dec_res = dec.ExtractOutput(out_data->data(), out_data->size()); |
| if (dec_res.ret != GzipDecompressor::ResultCode::kEof) { |
| return base::ErrStatus("Zip decompression error (%d) on %s (c=%u, u=%u)", |
| static_cast<int>(dec_res.ret), hdr_.fname.c_str(), |
| hdr_.compressed_size, hdr_.uncompressed_size); |
| } |
| out_data->resize(dec_res.bytes_written); |
| |
| #if PERFETTO_BUILDFLAG(PERFETTO_ZLIB) |
| const auto* crc_data = reinterpret_cast<const ::Bytef*>(out_data->data()); |
| auto crc_len = static_cast<::uInt>(out_data->size()); |
| auto actual_crc32 = static_cast<uint32_t>(::crc32(0u, crc_data, crc_len)); |
| if (actual_crc32 != hdr_.checksum) { |
| return base::ErrStatus("Zip CRC32 failure on %s (actual: %x, expected: %x)", |
| hdr_.fname.c_str(), actual_crc32, hdr_.checksum); |
| } |
| #endif |
| |
| return base::OkStatus(); |
| } |
| |
| base::Status ZipFile::DecompressLines(LinesCallback callback) const { |
| using ResultCode = GzipDecompressor::ResultCode; |
| RETURN_IF_ERROR(DoDecompressionChecks()); |
| |
| StreamingLineReader line_reader(std::move(callback)); |
| |
| if (hdr_.compression == kNoCompression) { |
| line_reader.Tokenize( |
| base::StringView(reinterpret_cast<const char*>(compressed_data_.data()), |
| hdr_.compressed_size)); |
| return base::OkStatus(); |
| } |
| |
| PERFETTO_DCHECK(hdr_.compression == kDeflate); |
| GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate); |
| dec.Feed(compressed_data_.data(), hdr_.compressed_size); |
| |
| static constexpr size_t kChunkSize = 32768; |
| GzipDecompressor::Result dec_res; |
| do { |
| auto* wptr = reinterpret_cast<uint8_t*>(line_reader.BeginWrite(kChunkSize)); |
| dec_res = dec.ExtractOutput(wptr, kChunkSize); |
| if (dec_res.ret == ResultCode::kError || |
| dec_res.ret == ResultCode::kNeedsMoreInput) { |
| return base::ErrStatus("zlib decompression error on %s (%d)", |
| name().c_str(), static_cast<int>(dec_res.ret)); |
| } |
| PERFETTO_DCHECK(dec_res.bytes_written <= kChunkSize); |
| line_reader.EndWrite(dec_res.bytes_written); |
| } while (dec_res.ret == ResultCode::kOk); |
| return base::OkStatus(); |
| } |
| |
| // Common logic for both Decompress() and DecompressLines(). |
| base::Status ZipFile::DoDecompressionChecks() const { |
| if (hdr_.compression == kNoCompression) { |
| PERFETTO_CHECK(hdr_.compressed_size == hdr_.uncompressed_size); |
| return base::OkStatus(); |
| } |
| if (hdr_.compression != kDeflate) { |
| return base::ErrStatus("Zip compression mode not supported (%u)", |
| hdr_.compression); |
| } |
| if (!IsGzipSupported()) { |
| return base::ErrStatus( |
| "Cannot open zip file. Gzip is not enabled in the current build. " |
| "Rebuild with enable_perfetto_zlib=true"); |
| } |
| return base::OkStatus(); |
| } |
| |
| // Returns a 64-bit version of time_t, that is, the num seconds since the |
| // Epoch. |
| int64_t ZipFile::GetDatetime() const { |
| // Date: 7 bits year, 4 bits month, 5 bits day. |
| // Time: 5 bits hour, 6 bits minute, 5 bits second. |
| struct tm mdt {}; |
| // As per man 3 mktime, `tm_year` is relative to 1900 not Epoch. Go figure. |
| mdt.tm_year = 1980 + (hdr_.mdate >> (16 - 7)) - 1900; |
| |
| // As per the man page, the month ranges 0 to 11 (Jan = 0). |
| mdt.tm_mon = ((hdr_.mdate >> (16 - 7 - 4)) & 0x0f) - 1; |
| |
| // However, still according to the same man page, the day starts from 1. |
| mdt.tm_mday = hdr_.mdate & 0x1f; |
| |
| mdt.tm_hour = hdr_.mtime >> (16 - 5); |
| mdt.tm_min = (hdr_.mtime >> (16 - 5 - 6)) & 0x3f; |
| |
| // Seconds in the DOS format have only 5 bits, so they lose the last bit of |
| // resolution, hence the * 2. |
| mdt.tm_sec = (hdr_.mtime & 0x1f) * 2; |
| return base::TimeGm(&mdt); |
| } |
| |
| std::string ZipFile::GetDatetimeStr() const { |
| char buf[32]{}; |
| time_t secs = static_cast<time_t>(GetDatetime()); |
| strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", gmtime(&secs)); |
| buf[sizeof(buf) - 1] = '\0'; |
| return buf; |
| } |
| |
| } // namespace perfetto::trace_processor::util |