blob: 87dbeb2726fae0465c0bbf6e764fe07870562387 [file] [log] [blame]
/*
* Copyright (C) 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/trace_processor/util/zip_reader.h"
#include <cstdint>
#include <cstring>
#include <ctime>
#include <limits>
#include <optional>
#include <string>
#include <utility>
#include <vector>
#include "perfetto/base/build_config.h"
#include "perfetto/base/logging.h"
#include "perfetto/base/status.h"
#include "perfetto/base/time.h"
#include "perfetto/ext/base/status_or.h"
#include "perfetto/ext/base/string_view.h"
#include "perfetto/ext/base/utils.h"
#include "perfetto/trace_processor/trace_blob_view.h"
#include "src/trace_processor/util/gzip_utils.h"
#include "src/trace_processor/util/status_macros.h"
#include "src/trace_processor/util/streaming_line_reader.h"
#if PERFETTO_BUILDFLAG(PERFETTO_ZLIB)
#include <zconf.h>
#include <zlib.h>
#endif
namespace perfetto::trace_processor::util {
namespace {
// Entry signatures.
constexpr uint32_t kFileHeaderSig = 0x04034b50;
constexpr uint32_t kCentralDirectorySig = 0x02014b50;
constexpr uint32_t kDataDescriptorSig = 0x08074b50;
// 4 bytes each of: 1) signature, 2) crc, 3) compressed size 4) uncompressed
// size.
constexpr uint32_t kDataDescriptorSize = 4 * 4;
enum GeneralPurposeBitFlag : uint32_t {
kEncrypted = 1 << 0,
k8kSlidingDictionary = 1u << 1,
kShannonFaro = 1u << 2,
kDataDescriptor = 1u << 3,
kUnknown = ~((1u << 4) - 1),
};
// Compression flags.
const uint16_t kNoCompression = 0;
const uint16_t kDeflate = 8;
template <typename T>
T ReadAndAdvance(const uint8_t** ptr) {
T res{};
memcpy(base::AssumeLittleEndian(&res), *ptr, sizeof(T));
*ptr += sizeof(T);
return res;
}
} // namespace
ZipReader::ZipReader() = default;
ZipReader::~ZipReader() = default;
base::Status ZipReader::Parse(TraceBlobView tbv) {
reader_.PushBack(std::move(tbv));
// .zip file sequence:
// [ File 1 header (30 bytes) ]
// [ File 1 name ]
// [ File 1 extra fields (optional) ]
// [ File 1 compressed payload ]
// [ File 1 data descriptor (optional) ]
//
// [ File 2 header (30 bytes) ]
// [ File 2 name ]
// [ File 2 extra fields (optional) ]
// [ File 2 compressed payload ]
// [ File 2 data descriptor (optional) ]
//
// [ Central directory (ignored) ]
for (;;) {
auto state = cur_.parse_state;
switch (state) {
case FileParseState::kHeader:
RETURN_IF_ERROR(TryParseHeader());
break;
case FileParseState::kFilename:
RETURN_IF_ERROR(TryParseFilename());
break;
case FileParseState::kSkipBytes:
RETURN_IF_ERROR(TrySkipBytes());
break;
case FileParseState::kCompressedData:
RETURN_IF_ERROR(TryParseCompressedData());
break;
}
if (state == cur_.parse_state) {
return base::OkStatus();
}
}
}
base::Status ZipReader::TryParseHeader() {
PERFETTO_CHECK(cur_.hdr.signature == 0);
std::optional<TraceBlobView> hdr =
reader_.SliceOff(reader_.start_offset(), kZipFileHdrSize);
if (!hdr) {
return base::OkStatus();
}
PERFETTO_CHECK(reader_.PopFrontBytes(kZipFileHdrSize));
const uint8_t* hdr_it = hdr->data();
cur_.hdr.signature = ReadAndAdvance<uint32_t>(&hdr_it);
if (cur_.hdr.signature == kCentralDirectorySig) {
// We reached the central directory at the end of file.
// We don't make any use here of the central directory, so we just
// ignore everything else after this point.
// Here we abuse the ZipFile class a bit. The Central Directory header
// has a different layout. The first 4 bytes (signature) match, the
// rest don't but the sizeof(central dir) is >> sizeof(file header) so
// we are fine.
// We do this rather than retuning because we could have further
// Parse() calls (imagine parsing bytes one by one), and we need a way
// to keep track of the "keep eating input without doing anything".
cur_.ignore_bytes_after_fname = std::numeric_limits<size_t>::max();
cur_.parse_state = FileParseState::kSkipBytes;
return base::OkStatus();
}
if (cur_.hdr.signature != kFileHeaderSig) {
return base::ErrStatus(
"Invalid signature found at offset 0x%zx. Actual=0x%x, "
"expected=0x%x",
reader_.start_offset(), cur_.hdr.signature, kFileHeaderSig);
}
cur_.hdr.version = ReadAndAdvance<uint16_t>(&hdr_it);
cur_.hdr.flags = ReadAndAdvance<uint16_t>(&hdr_it);
cur_.hdr.compression = ReadAndAdvance<uint16_t>(&hdr_it);
cur_.hdr.mtime = ReadAndAdvance<uint16_t>(&hdr_it);
cur_.hdr.mdate = ReadAndAdvance<uint16_t>(&hdr_it);
cur_.hdr.checksum = ReadAndAdvance<uint32_t>(&hdr_it);
cur_.hdr.compressed_size = ReadAndAdvance<uint32_t>(&hdr_it);
cur_.hdr.uncompressed_size = ReadAndAdvance<uint32_t>(&hdr_it);
cur_.hdr.fname_len = ReadAndAdvance<uint16_t>(&hdr_it);
cur_.hdr.extra_field_len = ReadAndAdvance<uint16_t>(&hdr_it);
PERFETTO_DCHECK(static_cast<size_t>(hdr_it - hdr->data()) == kZipFileHdrSize);
// We support only up to version 2.0 (20). Higher versions define
// more advanced features that we don't support (zip64 extensions,
// encryption).
// Disallow encryption or any flags we don't know how to handle.
if ((cur_.hdr.version > 20) || (cur_.hdr.flags & kEncrypted) ||
(cur_.hdr.flags & kUnknown)) {
return base::ErrStatus(
"Unsupported zip features at offset 0x%zx. version=%x, flags=%x",
reader_.start_offset(), cur_.hdr.version, cur_.hdr.flags);
}
if (cur_.hdr.compression != kNoCompression &&
cur_.hdr.compression != kDeflate) {
return base::ErrStatus(
"Unsupported compression type at offset 0x%zx. type=%x. Only "
"deflate and no compression are supported.",
reader_.start_offset(), cur_.hdr.compression);
}
if (cur_.hdr.flags & kDataDescriptor && cur_.hdr.compression != kDeflate) {
return base::ErrStatus(
"Unsupported compression type at offset 0x%zx. type=%x. Only "
"deflate supported for ZIPs compressed in a streaming fashion.",
reader_.start_offset(), cur_.hdr.compression);
}
cur_.ignore_bytes_after_fname = cur_.hdr.extra_field_len;
cur_.parse_state = FileParseState::kFilename;
return base::OkStatus();
}
base::Status ZipReader::TryParseFilename() {
if (cur_.hdr.fname_len == 0) {
cur_.parse_state = FileParseState::kSkipBytes;
return base::OkStatus();
}
PERFETTO_CHECK(cur_.hdr.fname.empty());
std::optional<TraceBlobView> fname_tbv =
reader_.SliceOff(reader_.start_offset(), cur_.hdr.fname_len);
if (!fname_tbv) {
return base::OkStatus();
}
PERFETTO_CHECK(reader_.PopFrontBytes(cur_.hdr.fname_len));
cur_.hdr.fname = std::string(reinterpret_cast<const char*>(fname_tbv->data()),
cur_.hdr.fname_len);
cur_.parse_state = FileParseState::kSkipBytes;
return base::OkStatus();
}
base::Status ZipReader::TrySkipBytes() {
if (cur_.ignore_bytes_after_fname == 0) {
cur_.parse_state = FileParseState::kCompressedData;
return base::OkStatus();
}
size_t avail = reader_.avail();
if (avail < cur_.ignore_bytes_after_fname) {
PERFETTO_CHECK(reader_.PopFrontBytes(avail));
cur_.ignore_bytes_after_fname -= avail;
return base::OkStatus();
}
PERFETTO_CHECK(reader_.PopFrontBytes(cur_.ignore_bytes_after_fname));
cur_.ignore_bytes_after_fname = 0;
cur_.parse_state = FileParseState::kCompressedData;
return base::OkStatus();
}
base::Status ZipReader::TryParseCompressedData() {
// Build up the compressed payload
if (cur_.hdr.flags & kDataDescriptor) {
if (!cur_.compressed) {
ASSIGN_OR_RETURN(auto compressed, TryParseUnsizedCompressedData());
if (!compressed) {
return base::OkStatus();
}
compressed = std::move(cur_.compressed);
}
std::optional<TraceBlobView> data_descriptor =
reader_.SliceOff(reader_.start_offset(), kDataDescriptorSize);
if (!data_descriptor) {
return base::OkStatus();
}
PERFETTO_CHECK(reader_.PopFrontBytes(kDataDescriptorSize));
const auto* desc_it = data_descriptor->data();
auto desc_sig = ReadAndAdvance<uint32_t>(&desc_it);
if (desc_sig != kDataDescriptorSig) {
return base::ErrStatus(
"Invalid signature found at offset 0x%zx. Actual=0x%x, "
"expected=0x%x",
reader_.start_offset(), desc_sig, kDataDescriptorSig);
}
cur_.hdr.checksum = ReadAndAdvance<uint32_t>(&desc_it);
cur_.hdr.compressed_size = ReadAndAdvance<uint32_t>(&desc_it);
cur_.hdr.uncompressed_size = ReadAndAdvance<uint32_t>(&desc_it);
} else {
PERFETTO_CHECK(!cur_.compressed);
std::optional<TraceBlobView> raw_compressed =
reader_.SliceOff(reader_.start_offset(), cur_.hdr.compressed_size);
if (!raw_compressed) {
return base::OkStatus();
}
cur_.compressed = *std::move(raw_compressed);
PERFETTO_CHECK(reader_.PopFrontBytes(cur_.hdr.compressed_size));
}
// We have accumulated the whole header, file name and compressed payload.
PERFETTO_CHECK(cur_.compressed);
PERFETTO_CHECK(cur_.hdr.fname.size() == cur_.hdr.fname_len);
PERFETTO_CHECK(cur_.compressed->size() == cur_.hdr.compressed_size);
PERFETTO_CHECK(cur_.ignore_bytes_after_fname == 0);
files_.emplace_back();
files_.back().hdr_ = std::move(cur_.hdr);
files_.back().compressed_data_ = *std::move(cur_.compressed);
cur_ = FileParseState(); // Reset the parsing state for the next file.
return base::OkStatus();
} // namespace perfetto::trace_processor::util
base::StatusOr<std::optional<TraceBlobView>>
ZipReader::TryParseUnsizedCompressedData() {
PERFETTO_CHECK(cur_.hdr.compression == kDeflate);
auto start = reader_.start_offset() + cur_.decompressor_bytes_fed;
auto end = reader_.end_offset();
auto slice = reader_.SliceOff(start, end - start);
PERFETTO_CHECK(slice);
auto res_code = cur_.decompressor.FeedAndExtract(slice->data(), slice->size(),
[](const uint8_t*, size_t) {
// Intentionally do
// nothing: we are only
// looking for the bounds
// of the deflate stream,
// we are not actually
// interested in the
// output.
});
switch (res_code) {
case GzipDecompressor::ResultCode::kNeedsMoreInput:
cur_.decompressor_bytes_fed += slice->size();
return {std::nullopt};
case GzipDecompressor::ResultCode::kError:
return base::ErrStatus(
"Failed decompressing stream in ZIP file at offset 0x%zx",
reader_.start_offset());
case GzipDecompressor::ResultCode::kOk:
PERFETTO_FATAL("Unexpected result code");
case GzipDecompressor::ResultCode::kEof:
break;
}
cur_.decompressor_bytes_fed += slice->size() - cur_.decompressor.AvailIn();
auto raw_compressed =
reader_.SliceOff(reader_.start_offset(), cur_.decompressor_bytes_fed);
PERFETTO_CHECK(raw_compressed);
PERFETTO_CHECK(reader_.PopFrontBytes(cur_.decompressor_bytes_fed));
return {std::move(raw_compressed)};
}
ZipFile* ZipReader::Find(const std::string& path) {
for (ZipFile& zf : files_) {
if (zf.name() == path)
return &zf;
}
return nullptr;
}
ZipFile::ZipFile() = default;
ZipFile::~ZipFile() = default;
ZipFile::ZipFile(ZipFile&& other) noexcept = default;
ZipFile& ZipFile::operator=(ZipFile&& other) noexcept = default;
base::Status ZipFile::Decompress(std::vector<uint8_t>* out_data) const {
out_data->clear();
RETURN_IF_ERROR(DoDecompressionChecks());
if (hdr_.compression == kNoCompression) {
const uint8_t* data = compressed_data_.data();
out_data->insert(out_data->end(), data, data + hdr_.compressed_size);
return base::OkStatus();
}
if (hdr_.uncompressed_size == 0) {
return base::OkStatus();
}
PERFETTO_DCHECK(hdr_.compression == kDeflate);
GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate);
dec.Feed(compressed_data_.data(), hdr_.compressed_size);
out_data->resize(hdr_.uncompressed_size);
auto dec_res = dec.ExtractOutput(out_data->data(), out_data->size());
if (dec_res.ret != GzipDecompressor::ResultCode::kEof) {
return base::ErrStatus("Zip decompression error (%d) on %s (c=%u, u=%u)",
static_cast<int>(dec_res.ret), hdr_.fname.c_str(),
hdr_.compressed_size, hdr_.uncompressed_size);
}
out_data->resize(dec_res.bytes_written);
#if PERFETTO_BUILDFLAG(PERFETTO_ZLIB)
const auto* crc_data = reinterpret_cast<const ::Bytef*>(out_data->data());
auto crc_len = static_cast<::uInt>(out_data->size());
auto actual_crc32 = static_cast<uint32_t>(::crc32(0u, crc_data, crc_len));
if (actual_crc32 != hdr_.checksum) {
return base::ErrStatus("Zip CRC32 failure on %s (actual: %x, expected: %x)",
hdr_.fname.c_str(), actual_crc32, hdr_.checksum);
}
#endif
return base::OkStatus();
}
base::Status ZipFile::DecompressLines(LinesCallback callback) const {
using ResultCode = GzipDecompressor::ResultCode;
RETURN_IF_ERROR(DoDecompressionChecks());
StreamingLineReader line_reader(std::move(callback));
if (hdr_.compression == kNoCompression) {
line_reader.Tokenize(
base::StringView(reinterpret_cast<const char*>(compressed_data_.data()),
hdr_.compressed_size));
return base::OkStatus();
}
PERFETTO_DCHECK(hdr_.compression == kDeflate);
GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate);
dec.Feed(compressed_data_.data(), hdr_.compressed_size);
static constexpr size_t kChunkSize = 32768;
GzipDecompressor::Result dec_res;
do {
auto* wptr = reinterpret_cast<uint8_t*>(line_reader.BeginWrite(kChunkSize));
dec_res = dec.ExtractOutput(wptr, kChunkSize);
if (dec_res.ret == ResultCode::kError ||
dec_res.ret == ResultCode::kNeedsMoreInput) {
return base::ErrStatus("zlib decompression error on %s (%d)",
name().c_str(), static_cast<int>(dec_res.ret));
}
PERFETTO_DCHECK(dec_res.bytes_written <= kChunkSize);
line_reader.EndWrite(dec_res.bytes_written);
} while (dec_res.ret == ResultCode::kOk);
return base::OkStatus();
}
// Common logic for both Decompress() and DecompressLines().
base::Status ZipFile::DoDecompressionChecks() const {
if (hdr_.compression == kNoCompression) {
PERFETTO_CHECK(hdr_.compressed_size == hdr_.uncompressed_size);
return base::OkStatus();
}
if (hdr_.compression != kDeflate) {
return base::ErrStatus("Zip compression mode not supported (%u)",
hdr_.compression);
}
if (!IsGzipSupported()) {
return base::ErrStatus(
"Cannot open zip file. Gzip is not enabled in the current build. "
"Rebuild with enable_perfetto_zlib=true");
}
return base::OkStatus();
}
// Returns a 64-bit version of time_t, that is, the num seconds since the
// Epoch.
int64_t ZipFile::GetDatetime() const {
// Date: 7 bits year, 4 bits month, 5 bits day.
// Time: 5 bits hour, 6 bits minute, 5 bits second.
struct tm mdt {};
// As per man 3 mktime, `tm_year` is relative to 1900 not Epoch. Go figure.
mdt.tm_year = 1980 + (hdr_.mdate >> (16 - 7)) - 1900;
// As per the man page, the month ranges 0 to 11 (Jan = 0).
mdt.tm_mon = ((hdr_.mdate >> (16 - 7 - 4)) & 0x0f) - 1;
// However, still according to the same man page, the day starts from 1.
mdt.tm_mday = hdr_.mdate & 0x1f;
mdt.tm_hour = hdr_.mtime >> (16 - 5);
mdt.tm_min = (hdr_.mtime >> (16 - 5 - 6)) & 0x3f;
// Seconds in the DOS format have only 5 bits, so they lose the last bit of
// resolution, hence the * 2.
mdt.tm_sec = (hdr_.mtime & 0x1f) * 2;
return base::TimeGm(&mdt);
}
std::string ZipFile::GetDatetimeStr() const {
char buf[32]{};
time_t secs = static_cast<time_t>(GetDatetime());
strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", gmtime(&secs));
buf[sizeof(buf) - 1] = '\0';
return buf;
}
} // namespace perfetto::trace_processor::util