| /* |
| * Copyright (C) 2022 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef SRC_TRACE_PROCESSOR_UTIL_STREAMING_LINE_READER_H_ |
| #define SRC_TRACE_PROCESSOR_UTIL_STREAMING_LINE_READER_H_ |
| |
| #include <functional> |
| #include <vector> |
| |
| #include "perfetto/ext/base/string_view.h" |
| |
| namespace perfetto { |
| namespace trace_processor { |
| namespace util { |
| |
| // A streaming line tokenizer for efficiently processing large text files on a |
| // line-by-line basis. It's designed to be used in conjunction with ZipReader to |
| // stream lines out of a compressed file (think of a bugreport) without having |
| // to decompress the whole file in memory upfront. |
| // Internally it deals with the necessary buffering and line-merging across |
| // different chunks. |
| // Usage: |
| // - The caller should pass a callback into the ctor. The callback is invoked |
| // whenever a batch of lines has been tokenized. This happens after calls to |
| // either BeginWrite()+EndWrite() or Tokenize(). In order to avoid too much |
| // virtual dispatch overhead, the callback argument is a vector of lines, not |
| // a single line. |
| // - The caller can call either: |
| // - Tokenize(whole input): this exist to avoid a copy in the case of |
| // non-compressed (STORE) files in zip archive. |
| // - A sequence of BeginWrite() + EndWrite() as follows: |
| // - BeginWrite(n) guarantees that the caller can write at least `n` char. |
| // `n` is typically the decompression buffer passed to zlib. |
| // - The caller writes at most `n` bytes into the pointer returned above. |
| // - The caller calls EndWrite(m) passing the number of bytes actually |
| // written (`m` <= `n`); |
| // NOTE: |
| // This implementation slightly diverges from base::StringSplitter as follows: |
| // 1. It does NOT skip empty lines. SS coalesces empty tokens, this doesn't. |
| // 2. it won't output the last line unless it terminates with a \n. SS doesn't |
| // tell the difference between "foo\nbar" and "foo\nbar\n". This is |
| // fundamental for streaming, where we cannot tell upfront if we got the end. |
| class StreamingLineReader { |
| public: |
| // Note: the lifetime of the lines passed in the vector argument is valid only |
| // for the duration of the callback. Don't retain the StringView(s) passed. |
| using LinesCallback = |
| std::function<void(const std::vector<base::StringView>&)>; |
| |
| explicit StreamingLineReader(LinesCallback); |
| ~StreamingLineReader(); |
| |
| // This can be used when the whole input is known upfront and we just need |
| // splitting. This exist mostly for convenience when processing uncompressed |
| // (STORE) files in zip archives. If you just need a tokenizer outside of the |
| // context of a zip file, you are better off just using base::StringSplitter. |
| size_t Tokenize(base::StringView input); |
| |
| // Reserves `write_buf_size` bytes into the internal buffer. The caller is |
| // expected to write at most `write_buf_size` on the returned pointer and |
| // then call EndWrite(). |
| char* BeginWrite(size_t write_buf_size); |
| |
| // Finishes the write reporting the number of bytes actually written, which |
| // must be <= `write_buf_size`. If one or more lines can be tokenized, this |
| // will cause one or more calls to the LinesCallback. |
| void EndWrite(size_t size_written); |
| |
| private: |
| std::vector<char> buf_; |
| LinesCallback lines_callback_; |
| size_t size_before_write_ = 0; |
| }; |
| |
| } // namespace util |
| } // namespace trace_processor |
| } // namespace perfetto |
| |
| #endif // SRC_TRACE_PROCESSOR_UTIL_STREAMING_LINE_READER_H_ |