|  | /* | 
|  | * Copyright (C) 2022 The Android Open Source Project | 
|  | * | 
|  | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | * you may not use this file except in compliance with the License. | 
|  | * You may obtain a copy of the License at | 
|  | * | 
|  | *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | * | 
|  | * Unless required by applicable law or agreed to in writing, software | 
|  | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | * See the License for the specific language governing permissions and | 
|  | * limitations under the License. | 
|  | */ | 
|  |  | 
|  | #ifndef SRC_TRACE_PROCESSOR_UTIL_ZIP_READER_H_ | 
|  | #define SRC_TRACE_PROCESSOR_UTIL_ZIP_READER_H_ | 
|  |  | 
|  | #include <cstddef> | 
|  | #include <cstdint> | 
|  | #include <functional> | 
|  | #include <optional> | 
|  | #include <string> | 
|  | #include <utility> | 
|  | #include <vector> | 
|  |  | 
|  | #include "perfetto/base/status.h" | 
|  | #include "perfetto/ext/base/status_or.h" | 
|  | #include "perfetto/ext/base/string_view.h" | 
|  | #include "perfetto/trace_processor/trace_blob_view.h" | 
|  | #include "src/trace_processor/util/gzip_utils.h" | 
|  | #include "src/trace_processor/util/trace_blob_view_reader.h" | 
|  |  | 
|  | // ZipReader allows to read Zip files in a streaming fashion. | 
|  | // Key features: | 
|  | // - Read-only access, there is no ZipWriter. | 
|  | // - Files can be processed as they are seen in the zip archive, without needing | 
|  | //   to see the whole .zip file first. | 
|  | // - It does not read the final zip central directory. Only the metadata in the | 
|  | //   inline file headers is exposed. | 
|  | // - Only the compressed payload is kept around in memory. | 
|  | // - Supports line-based streaming for compressed text files (e.g. logs). This | 
|  | //   enables line-based processing of compressed logs without having to | 
|  | //   decompress fully the individual text file in memory. | 
|  | // - Does NOT support zip64, encryption and other advanced zip file features. | 
|  | // - It is not suitable for security-sensitive contexts. E.g. it doesn't deal | 
|  | //   with zip path traversal attacks (the same file showing up twice with two | 
|  | //   different payloads). | 
|  | // | 
|  | // Possible future features: | 
|  | // - The user could setup a filter (a glob, or a callback) to select the | 
|  | //   interesting files (e.g. *.txt) and skip the appending of the other entries. | 
|  | //   This would avoid completely the cost of keeping in memory the compressed | 
|  | //   payload of unwanted files (e.g. dumpstate.bin in BRs). | 
|  |  | 
|  | namespace perfetto::trace_processor::util { | 
|  |  | 
|  | class ZipReader; | 
|  |  | 
|  | constexpr size_t kZipFileHdrSize = 30; | 
|  |  | 
|  | // Holds the metadata and compressed payload of a zip file and allows | 
|  | // decompression. The lifecycle of a ZipFile is completely independent of the | 
|  | // ZipReader that created it. ZipFile(s) can be std::move(d) around and even | 
|  | // outlive the ZipReader. | 
|  | class ZipFile { | 
|  | public: | 
|  | // Note: the lifetime of the lines passed in the vector argument is valid only | 
|  | // for the duration of the callback. Don't retain the StringView(s) passed. | 
|  | using LinesCallback = | 
|  | std::function<void(const std::vector<base::StringView>&)>; | 
|  |  | 
|  | ZipFile(); | 
|  | ~ZipFile(); | 
|  | ZipFile(ZipFile&&) noexcept; | 
|  | ZipFile& operator=(ZipFile&&) noexcept; | 
|  | ZipFile(const ZipFile&) = delete; | 
|  | ZipFile& operator=(const ZipFile&) = delete; | 
|  |  | 
|  | // Bulk decompression. It keeps around the compressed data internally, so | 
|  | // this can be called several times. | 
|  | base::Status Decompress(std::vector<uint8_t>*) const; | 
|  |  | 
|  | // Streaming line-based decompression for text files. | 
|  | // It decompresses the file in chunks and passes batches of lines to the | 
|  | // caller, without decompressing the whole file into memory. | 
|  | // The typical use case is processing large log files from a bugreport. | 
|  | // Like the above, this is idempotent and keeps around the compressed data. | 
|  | base::Status DecompressLines(LinesCallback) const; | 
|  |  | 
|  | // File name, including the relative path (e.g., "FS/data/misc/foobar") | 
|  | const std::string& name() const { return hdr_.fname; } | 
|  |  | 
|  | // Seconds since the Epoch. This is effectively time_t on 64 bit platforms. | 
|  | int64_t GetDatetime() const; | 
|  |  | 
|  | // Returns the modified time in the format %Y-%m-%d %H:%M:%S. | 
|  | std::string GetDatetimeStr() const; | 
|  |  | 
|  | size_t uncompressed_size() const { return hdr_.uncompressed_size; } | 
|  | size_t compressed_size() const { return hdr_.compressed_size; } | 
|  |  | 
|  | private: | 
|  | friend class ZipReader; | 
|  |  | 
|  | base::Status DoDecompressionChecks() const; | 
|  |  | 
|  | // Rationale for having this as a nested sub-struct: | 
|  | // 1. Makes the move operator easier to maintain. | 
|  | // 2. Allows the ZipReader to handle a copy of this struct for the file | 
|  | //    being parsed. ZipReade will move the hdr into a full ZipFile once it | 
|  | //    has established the file is complete and valid. | 
|  | struct Header { | 
|  | uint32_t signature = 0; | 
|  | uint16_t version = 0; | 
|  | uint16_t flags = 0; | 
|  | uint16_t compression = 0; | 
|  | uint32_t checksum = 0; | 
|  | uint16_t mtime = 0; | 
|  | uint16_t mdate = 0; | 
|  | uint32_t compressed_size = 0; | 
|  | uint32_t uncompressed_size = 0; | 
|  | uint16_t fname_len = 0; | 
|  | uint16_t extra_field_len = 0; | 
|  | std::string fname; | 
|  | }; | 
|  |  | 
|  | Header hdr_{}; | 
|  | TraceBlobView compressed_data_; | 
|  | // If adding new fields here, remember to update the move operators. | 
|  | }; | 
|  |  | 
|  | class ZipReader { | 
|  | public: | 
|  | ZipReader(); | 
|  | ~ZipReader(); | 
|  |  | 
|  | ZipReader(const ZipReader&) = delete; | 
|  | ZipReader& operator=(const ZipReader&) = delete; | 
|  | ZipReader(ZipReader&&) = delete; | 
|  | ZipReader& operator=(ZipReader&&) = delete; | 
|  |  | 
|  | // Parses data incrementally from a zip file in chunks. The chunks can be | 
|  | // arbitrarily cut. You can pass the whole file in one go, byte by byte or | 
|  | // anything in between. | 
|  | // files() is updated incrementally as soon as a new whole compressed file | 
|  | // has been processed. You don't need to get to the end of the zip file to | 
|  | // see all files. The final "central directory" at the end of the file is | 
|  | // actually ignored. | 
|  | base::Status Parse(TraceBlobView); | 
|  |  | 
|  | // Returns a list of all the files discovered so far. | 
|  | const std::vector<ZipFile>& files() const { return files_; } | 
|  |  | 
|  | // Moves ownership of the ZipFiles to the caller. The caller can use this | 
|  | // to reduce the memory working set and retain only the files they care about. | 
|  | std::vector<ZipFile> TakeFiles() { return std::move(files_); } | 
|  |  | 
|  | // Find a file by its path inside the zip archive. | 
|  | ZipFile* Find(const std::string& path); | 
|  |  | 
|  | private: | 
|  | // Keeps track of the incremental parsing state of the current zip stream. | 
|  | // When a compressed file is completely parsed, a ZipFile instance is | 
|  | // constructed and appended to `files_`. | 
|  | struct FileParseState { | 
|  | enum { | 
|  | kHeader, | 
|  | kFilename, | 
|  | kSkipBytes, | 
|  | kCompressedData, | 
|  | } parse_state = kHeader; | 
|  | size_t ignore_bytes_after_fname = 0; | 
|  | // Used to track the number of bytes fed into the decompressor when we don't | 
|  | // know the compressed size upfront. | 
|  | size_t decompressor_bytes_fed = 0; | 
|  | GzipDecompressor decompressor{GzipDecompressor::InputMode::kRawDeflate}; | 
|  | std::optional<TraceBlobView> compressed; | 
|  | ZipFile::Header hdr{}; | 
|  | }; | 
|  |  | 
|  | base::Status TryParseHeader(); | 
|  | base::Status TryParseFilename(); | 
|  | base::Status TrySkipBytes(); | 
|  | base::Status TryParseCompressedData(); | 
|  | base::StatusOr<std::optional<TraceBlobView>> TryParseUnsizedCompressedData(); | 
|  |  | 
|  | FileParseState cur_; | 
|  | std::vector<ZipFile> files_; | 
|  | util::TraceBlobViewReader reader_; | 
|  | }; | 
|  |  | 
|  | }  // namespace perfetto::trace_processor::util | 
|  |  | 
|  | #endif  // SRC_TRACE_PROCESSOR_UTIL_ZIP_READER_H_ |