blob: 2aea91826491ff6679d5afe40ae964b004eeace1 [file] [log] [blame]
/*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SRC_TRACING_CORE_TRACE_BUFFER_H_
#define SRC_TRACING_CORE_TRACE_BUFFER_H_
#include <stdint.h>
#include <string.h>
#include <array>
#include <limits>
#include <map>
#include <tuple>
#include "perfetto/base/logging.h"
#include "perfetto/base/page_allocator.h"
#include "perfetto/tracing/core/basic_types.h"
#include "perfetto/tracing/core/slice.h"
namespace perfetto {
class TracePacket;
// The main buffer, owned by the tracing service, where all the trace data is
// ultimately stored into. The service will own several instances of this class,
// at least one per active consumer (as defined in the |buffers| section of
// trace_config.proto) and will copy chunks from the producer's shared memory
// buffers into here when a CommitData IPC is received.
//
// Writing into the buffer
// -----------------------
// Data is copied from the SMB(s) using CopyChunkUntrusted(). The buffer will
// hence contain data coming from different producers and different writer
// sequences, more specifically:
// - The service receives data by several producer(s), identified by their ID.
// - Each producer writes several sequences identified by the same WriterID.
// (they correspond to TraceWriter instances in the producer).
// - Each Writer writes, in order, several chunks.
// - Each chunk contains zero, one, or more TracePacket(s), or even just
// fragments of packets (when they span across several chunks).
//
// So at any point in time, the buffer will contain a variable number of logical
// sequences identified by the {ProducerID, WriterID} tuple. Any given chunk
// will only contain packets (or fragments) belonging to the same sequence.
//
// The buffer operates by default as a ring buffer. Chunks are (over-)written
// in the same order of the CopyChunkUntrusted() calls. When overwriting old
// content, entire chunks are overwritten or clobbered. The buffer never leaves
// a partial chunk around. Chunks' payload is copied as-is, but their header is
// not and is repacked in order to keep the ProducerID around.
//
// Chunks are stored in the buffer next to each other. Each chunk is prefixed by
// an inline header (ChunkRecord), which contains most of the fields of the
// SharedMemoryABI ChunkHeader + the ProducerID + the size of the payload.
// It's a conventional binary object stream essentially, where each ChunkRecord
// tells where it ends and hence where to find the next one, like this:
//
// .-------------------------. 16 byte boundary
// | ChunkRecord: 16 bytes |
// | - chunk id: 4 bytes |
// | - producer id: 2 bytes |
// | - writer id: 2 bytes |
// | - #fragments: 2 bytes |
// +-----+ - record size: 2 bytes |
// | | - flags+pad: 4 bytes |
// | +-------------------------+
// | | |
// | : Chunk payload :
// | | |
// | +-------------------------+
// | | Optional padding |
// +---> +-------------------------+ 16 byte boundary
// | ChunkRecord |
// : :
// Chunks stored in the buffer are always rounded up to 16 bytes (that is
// sizeof(ChunkRecord)), in order to avoid further inner fragmentation.
// Special "padding" chunks can be put in the buffer, e.g. in the case when we
// try to write a chunk of size N while the write pointer is at the end of the
// buffer, but the write pointer is < N bytes from the end (and hence needs to
// wrap over).
// Because of this, the buffer is self-describing: the contents of the buffer
// can be reconstructed by just looking at the buffer content (this will be
// quite useful in future to recover the buffer from crash reports).
//
// However, in order to keep some operations (patching and reading) fast, a
// lookaside index is maintained (in |index_|), keeping each chunk in the buffer
// indexed by their {ProducerID, WriterID, ChunkID} tuple.
//
// Patching data out-of-band
// -------------------------
// This buffer also supports patching chunks' payload out-of-band, after they
// have been stored. This is to allow producers to backfill the "size" fields
// of the protos that spawn across several chunks, when the previous chunks are
// returned to the service. The MaybePatchChunkContents() deals with the fact
// that a chunk might have been lost (because of wrapping) by the time the OOB
// IPC comes.
//
// Reading from the buffer
// -----------------------
// This class supports one reader only (the consumer). Reads are NOT idempotent
// as they move the read cursors around. Reading back the buffer is the most
// conceptually complex part. The ReadPacket() method, in fact, operates with
// whole packet granularity. Packets are returned only when all their fragments
// are available.
// This class takes care of:
// - Gluing packets within the same sequence, even if they are not stored
// adjacently in the buffer.
// - Re-ordering chunks within a sequence (using the ChunkID, which wraps).
// - Detecting holes in packet fragments (because of loss of chunks).
// Reads guarantee that packets for the same sequence are read in FIFO order
// (according to their ChunkID), but don't give any guarantee about the read
// order of packets from different sequences (see ReadPacket() comments below).
//
// TODO(primiano): the name of this class is deliberately typo-ed as a temporary
// situation until we replace TraceBuffer within service_impl.cc.
class TraceBuffez {
public:
static const size_t InlineChunkHeaderSize; // For test/fake_packet.{cc,h}.
struct Stats {
size_t failed_patches = 0;
size_t succeeded_patches = 0;
size_t fragment_readahead_successes = 0;
size_t fragment_readahead_failures = 0;
size_t write_wrap_count = 0;
// TODO(primiano): add packets_{read,written}.
// TODO(primiano): add bytes_{read,written}.
// TODO(primiano): add bytes_lost_for_padding.
};
// Argument for out-of-band patches applied through TryPatchChunkContents().
struct Patch {
// From SharedMemoryABI::kPacketHeaderSize.
static constexpr size_t kSize = 4;
size_t offset_untrusted;
std::array<uint8_t, kSize> data;
};
// Can return nullptr if the memory allocation fails.
static std::unique_ptr<TraceBuffez> Create(size_t size_in_bytes);
~TraceBuffez();
// Copies a Chunk from a producer Shared Memory Buffer into the trace buffer.
// |src| points to the first packet in the SharedMemoryABI's chunk shared
// with an untrusted producer. "untrusted" here means: the producer might be
// malicious and might change |src| concurrently while we read it (internally
// this method memcpy()-s first the chunk before processing it).
// None of the arguments should be trusted, unless otherwise stated. We can
// trust that |src| points to a valid memory area, but not its contents.
void CopyChunkUntrusted(ProducerID producer_id_trusted,
uid_t producer_uid_trusted,
WriterID writer_id,
ChunkID chunk_id,
uint16_t num_fragments,
uint8_t chunk_flags,
const uint8_t* src,
size_t size);
// Applies a batch of |patches| to the given chunk, if the given chunk is
// still in the buffer. Does nothing if the given ChunkID is gone.
// Returns true if the chunk has been found and patched, false otherwise.
// |other_patches_pending| is used to determine whether this is the only
// batch of patches for the chunk or there is more.
// If |other_patches_pending| == false, the chunk is marked as ready to be
// consumed. If true, the state of the chunk is not altered.
bool TryPatchChunkContents(ProducerID,
WriterID,
ChunkID,
const Patch* patches,
size_t patches_size,
bool other_patches_pending);
// To read the contents of the buffer the caller needs to:
// BeginRead()
// while (ReadNextTracePacket(packet_fragments)) { ... }
// No other calls to any other method should be interleaved between
// BeginRead() and ReadNextTracePacket().
// Reads in the TraceBuffer are NOT idempotent.
void BeginRead();
// Returns the next packet in the buffer, if any, and the uid of the producer
// that wrote it (as passed in the CopyChunkUntrusted() call). Returns false
// if no packets can be read at this point.
// This function returns only complete packets. Specifically:
// When there is at least one complete packet in the buffer, this function
// returns true and populates the TracePacket argument with the boundaries of
// each fragment for one packet.
// TracePacket will have at least one slice when this function returns true.
// When there are no whole packets eligible to read (e.g. we are still missing
// fragments) this function returns false.
// This function guarantees also that packets for a given
// {ProducerID, WriterID} are read in FIFO order.
// This function does not guarantee any ordering w.r.t. packets belonging to
// different WriterID(s). For instance, given the following packets copied
// into the buffer:
// {ProducerID: 1, WriterID: 1}: P1 P2 P3
// {ProducerID: 1, WriterID: 2}: P4 P5 P6
// {ProducerID: 2, WriterID: 1}: P7 P8 P9
// The following read sequence is possible:
// P1, P4, P7, P2, P3, P5, P8, P9, P6
// But the following is guaranteed to NOT happen:
// P1, P5, P7, P4 (P4 cannot come after P5)
bool ReadNextTracePacket(TracePacket*, uid_t* producer_uid);
const Stats& stats() const { return stats_; }
size_t size() const { return size_; }
private:
friend class TraceBufferTest;
// ChunkRecord is a Chunk header stored inline in the |data_| buffer, before
// the chunk payload (the packets' data). The |data_| buffer looks like this:
// +---------------+------------------++---------------+-----------------+
// | ChunkRecord 1 | Chunk payload 1 || ChunkRecord 2 | Chunk payload 2 | ...
// +---------------+------------------++---------------+-----------------+
// Most of the ChunkRecord fields are copied from SharedMemoryABI::ChunkHeader
// (the chunk header used in the the shared memory buffers).
// A ChunkRecord can be a special "padding" record. In this case its payload
// should be ignored and the record should be just skipped.
//
// Full page move optimization:
// This struct has to be exactly (sizeof(PageHeader) + sizeof(ChunkHeader))
// (from shared_memory_abi.h) to allow full page move optimizations
// (TODO(primiano): not implemented yet). In the special case of moving a full
// 4k page that contains only one chunk, in fact, we can just ask the kernel
// to move the full SHM page (see SPLICE_F_{GIFT,MOVE}) and overlay the
// ChunkRecord on top of the moved SMB's header (page + chunk header).
// This special requirement is covered by static_assert(s) in the .cc file.
struct ChunkRecord {
explicit ChunkRecord(size_t sz) : flags{0}, is_padding{0} {
PERFETTO_DCHECK(sz >= sizeof(ChunkRecord) &&
sz % sizeof(ChunkRecord) == 0 && sz <= kMaxSize);
size = static_cast<decltype(size)>(sz);
}
bool is_valid() const { return size != 0; }
// Keep this structure packed and exactly 16 bytes (128 bits) big.
// [32 bits] Monotonic counter within the same writer_id.
ChunkID chunk_id = 0;
// [16 bits] ID of the Producer from which the Chunk was copied from.
ProducerID producer_id = 0;
// [16 bits] Unique per Producer (but not within the service).
// If writer_id == kWriterIdPadding the record should just be skipped.
WriterID writer_id = 0;
// Number of fragments contained in the chunk.
uint16_t num_fragments = 0;
// Size in bytes, including sizeof(ChunkRecord) itself.
uint16_t size;
uint8_t flags : 6; // See SharedMemoryABI::ChunkHeader::flags.
uint8_t is_padding : 1;
uint8_t unused_flag : 1;
// Not strictly needed, can be reused for more fields in the future. But
// right now helps to spot chunks in hex dumps.
char unused[3] = {'C', 'H', 'U'};
static constexpr size_t kMaxSize =
std::numeric_limits<decltype(size)>::max();
};
// Lookaside index entry. This serves two purposes:
// 1) Allow a fast lookup of ChunkRecord by their ID (the tuple
// {ProducerID, WriterID, ChunkID}). This is used when applying out-of-band
// patches to the contents of the chunks after they have been copied into
// the TraceBuffer.
// 2) keep the chunks ordered by their ID. This is used when reading back.
// 3) Keep metadata about the status of the chunk, e.g. whether the contents
// have been read already and should be skipped in a future read pass.
// This struct should not have any field that is essential for reconstructing
// the contents of the buffer from a crash dump.
struct ChunkMeta {
// Key used for sorting in the map.
struct Key {
Key(ProducerID p, WriterID w, ChunkID c)
: producer_id{p}, writer_id{w}, chunk_id{c} {}
explicit Key(const ChunkRecord& cr)
: Key(cr.producer_id, cr.writer_id, cr.chunk_id) {}
// Note that this sorting doesn't keep into account the fact that ChunkID
// will wrap over at some point. The extra logic in SequenceIterator deals
// with that.
bool operator<(const Key& other) const {
return std::tie(producer_id, writer_id, chunk_id) <
std::tie(other.producer_id, other.writer_id, other.chunk_id);
}
bool operator==(const Key& other) const {
return std::tie(producer_id, writer_id, chunk_id) ==
std::tie(other.producer_id, other.writer_id, other.chunk_id);
}
// These fields should match at all times the corresponding fields in
// the |chunk_record|. They are copied here purely for efficiency to avoid
// dereferencing the buffer all the time.
ProducerID producer_id;
WriterID writer_id;
ChunkID chunk_id;
};
ChunkMeta(ChunkRecord* c, uint16_t p, uint8_t f, uid_t u)
: chunk_record{c}, trusted_uid{u}, flags{f}, num_fragments{p} {}
ChunkRecord* const chunk_record; // Addr of ChunkRecord within |data_|.
const uid_t trusted_uid; // uid of the producer.
uint8_t flags = 0; // See SharedMemoryABI::flags.
const uint16_t num_fragments = 0; // Total number of packet fragments.
uint16_t num_fragments_read = 0; // Number of fragments already read.
// The start offset of the next fragment (the |num_fragments_read|-th) to be
// read. This is the offset in bytes from the beginning of the ChunkRecord's
// payload (the 1st fragment starts at |chunk_record| +
// sizeof(ChunkRecord)).
uint16_t cur_fragment_offset = 0;
};
using ChunkMap = std::map<ChunkMeta::Key, ChunkMeta>;
// Allows to iterate over a sub-sequence of |index_| for all keys belonging to
// the same {ProducerID,WriterID}. Furthermore takes into account the wrapping
// of ChunkID. Instances are valid only as long as the |index_| is not
// altered (can be used safely only between adjacent ReadPacket() calls).
// The order of the iteration will proceed in the following order:
// |wrapping_id| + 1 -> |seq_end|, |seq_begin| -> |wrapping_id|.
// Practical example:
// - Assume that kMaxChunkID == 7
// - Assume that we have all 8 chunks in the range (0..7).
// - Hence, |seq_begin| == c0, |seq_end| == c7
// - Assume |wrapping_id| = 4 (c4 is the last chunk copied over
// through a CopyChunkUntrusted()).
// The resulting iteration order will be: c5, c6, c7, c0, c1, c2, c3, c4.
struct SequenceIterator {
// Points to the 1st key (the one with the numerically min ChunkID).
ChunkMap::iterator seq_begin;
// Points one past the last key (the one with the numerically max ChunkID).
ChunkMap::iterator seq_end;
// Current iterator, always >= seq_begin && <= seq_end.
ChunkMap::iterator cur;
// The latest ChunkID written. Determines the start/end of the sequence.
ChunkID wrapping_id;
bool is_valid() const { return cur != seq_end; }
ProducerID producer_id() const {
PERFETTO_DCHECK(is_valid());
return cur->first.producer_id;
}
WriterID writer_id() const {
PERFETTO_DCHECK(is_valid());
return cur->first.writer_id;
}
ChunkID chunk_id() const {
PERFETTO_DCHECK(is_valid());
return cur->first.chunk_id;
}
ChunkMeta& operator*() {
PERFETTO_DCHECK(is_valid());
return cur->second;
}
// Moves |cur| to the next chunk in the index.
// is_valid() will become false after calling this, if this was the last
// entry of the sequence.
void MoveNext();
void MoveToEnd() { cur = seq_end; }
};
enum class ReadAheadResult {
kSucceededReturnSlices,
kFailedMoveToNextSequence,
kFailedStayOnSameSequence,
};
TraceBuffez();
TraceBuffez(const TraceBuffez&) = delete;
TraceBuffez& operator=(const TraceBuffez&) = delete;
bool Initialize(size_t size);
// Returns an object that allows to iterate over chunks in the |index_| that
// have the same {ProducerID, WriterID} of
// |seq_begin.first.{producer,writer}_id|. |seq_begin| must be an iterator to
// the first entry in the |index_| that has a different {ProducerID, WriterID}
// from the previous one. It is valid for |seq_begin| to be == index_.end()
// (i.e. if the index is empty). The iteration takes care of ChunkID wrapping,
// by using |last_chunk_id_|.
SequenceIterator GetReadIterForSequence(ChunkMap::iterator seq_begin);
// Used as a last resort when a buffer corruption is detected.
void ClearContentsAndResetRWCursors();
// Adds a padding record of the given size (must be a multiple of
// sizeof(ChunkRecord)).
void AddPaddingRecord(size_t);
// Look for contiguous fragment of the same packet starting from |read_iter_|.
// If a contiguous packet is found, all the fragments are pushed into
// TracePacket and the function returns kSucceededReturnSlices. If not, the
// function returns either kFailedMoveToNextSequence or
// kFailedStayOnSameSequence, telling the caller to continue looking for
// packets.
ReadAheadResult ReadAhead(TracePacket*);
// Deletes (by marking the record invalid and removing form the index) all
// chunks from |wptr_| to |wptr_| + |bytes_to_clear|. Returns the size of the
// gap left between the next valid Chunk and the end of the deletion range, or
// 0 if such next valid chunk doesn't exist (if the buffer is still zeroed).
// Graphically, assume the initial situation is the following (|wptr_| = 10).
// |0 |10 (wptr_) |30 |40 |60
// +---------+-----------------+---------+-------------------+---------+
// | Chunk 1 | Chunk 2 | Chunk 3 | Chunk 4 | Chunk 5 |
// +---------+-----------------+---------+-------------------+---------+
// |_________Deletion range_______|~~return value~~|
//
// A call to DeleteNextChunksFor(32) will remove chunks 2,3,4 and return 18
// (60 - 42), the distance between chunk 5 and the end of the deletion range.
size_t DeleteNextChunksFor(size_t bytes_to_clear);
// Decodes the boundaries of the next packet (or a fragment) pointed by
// ChunkMeta and pushes that into |TracePacket|. It also increments the
// |num_fragments_read| counter.
// TracePacket can be nullptr, in which case the read state is still advanced.
// When TracePacket is not nullptr, ProducerID must also be not null and will
// be updated with the ProducerID that originally wrote the chunk.
bool ReadNextPacketInChunk(ChunkMeta*, TracePacket*);
void DcheckIsAlignedAndWithinBounds(const uint8_t* ptr) const {
PERFETTO_DCHECK(ptr >= begin() && ptr <= end() - sizeof(ChunkRecord));
PERFETTO_DCHECK(
(reinterpret_cast<uintptr_t>(ptr) & (alignof(ChunkRecord) - 1)) == 0);
}
ChunkRecord* GetChunkRecordAt(uint8_t* ptr) const {
DcheckIsAlignedAndWithinBounds(ptr);
return reinterpret_cast<ChunkRecord*>(ptr);
}
// |src| can be nullptr (in which case |size| must be ==
// record.size - sizeof(ChunkRecord)), for the case of writing a padding
// record. |wptr_| is NOT advanced by this function, the caller must do that.
void WriteChunkRecord(const ChunkRecord& record,
const uint8_t* src,
size_t size) {
// Note: |record.size| will be slightly bigger than |size| because of the
// ChunkRecord header and rounding, to ensure that all ChunkRecord(s) are
// multiple of sizeof(ChunkRecord). The invariant is:
// record.size >= |size| + sizeof(ChunkRecord) (== if no rounding).
PERFETTO_DCHECK(size <= ChunkRecord::kMaxSize);
PERFETTO_DCHECK(record.size >= sizeof(record));
PERFETTO_DCHECK(record.size % sizeof(record) == 0);
PERFETTO_DCHECK(record.size >= size + sizeof(record));
PERFETTO_CHECK(record.size <= size_to_end());
DcheckIsAlignedAndWithinBounds(wptr_);
// Deliberately not a *D*CHECK.
PERFETTO_CHECK(wptr_ + sizeof(record) + size <= end());
memcpy(wptr_, &record, sizeof(record));
if (PERFETTO_LIKELY(src)) {
memcpy(wptr_ + sizeof(record), src, size);
} else {
PERFETTO_DCHECK(size == record.size - sizeof(record));
}
const size_t rounding_size = record.size - sizeof(record) - size;
memset(wptr_ + sizeof(record) + size, 0, rounding_size);
}
uint8_t* begin() const { return reinterpret_cast<uint8_t*>(data_.get()); }
uint8_t* end() const { return begin() + size_; }
size_t size_to_end() const { return end() - wptr_; }
base::PageAllocator::UniquePtr data_;
size_t size_ = 0; // Size in bytes of |data_|.
size_t max_chunk_size_ = 0; // Max size in bytes allowed for a chunk.
uint8_t* wptr_ = nullptr; // Write pointer.
// An index that keeps track of the positions and metadata of each
// ChunkRecord.
ChunkMap index_;
// Read iterator used for ReadNext(). It is reset by calling BeginRead().
// It becomes invalid after any call to methods that alters the |index_|.
SequenceIterator read_iter_;
// Keeps track of the last ChunkID written for a given writer.
// TODO(primiano): should clean up keys from this map. Right now this map
// grows without bounds (although realistically is not a problem unless we
// have too many producers/writers within the same trace session).
std::map<std::pair<ProducerID, WriterID>, ChunkID> last_chunk_id_;
// Statistics about buffer usage.
Stats stats_;
#if PERFETTO_DCHECK_IS_ON()
bool changed_since_last_read_ = false;
#endif
// When true disable some DCHECKs that have been put in place to detect
// bugs in the producers. This is for tests that feed malicious inputs and
// hence mimic a buggy producer.
bool suppress_sanity_dchecks_for_testing_ = false;
};
} // namespace perfetto
#endif // SRC_TRACING_CORE_TRACE_BUFFER_H_