blob: 90edfe1f058209fa2c6e4fb1575ae2cd75ad360a [file] [log] [blame]
Ryan Savitskia76b3cc2019-11-20 16:25:24 +00001/*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "src/profiling/perf/event_reader.h"
18
19#include <linux/perf_event.h>
Ryan Savitski8548a8e2020-02-10 13:05:39 +000020#include <sys/ioctl.h>
Ryan Savitskia76b3cc2019-11-20 16:25:24 +000021#include <sys/mman.h>
22#include <sys/syscall.h>
23#include <sys/types.h>
24#include <unistd.h>
25
26#include "perfetto/ext/base/utils.h"
Ryan Savitskic46d4dc2020-02-04 15:03:38 +000027#include "src/profiling/perf/regs_parsing.h"
Ryan Savitskia76b3cc2019-11-20 16:25:24 +000028
29namespace perfetto {
30namespace profiling {
31
32namespace {
33
Ryan Savitski53df6cc2020-02-11 15:58:20 +000034constexpr size_t kDefaultDataPagesPerRingBuffer = 256; // 1 MB (256 x 4k pages)
Ryan Savitskic46d4dc2020-02-04 15:03:38 +000035
Ryan Savitskia76b3cc2019-11-20 16:25:24 +000036template <typename T>
37const char* ReadValue(T* value_out, const char* ptr) {
38 memcpy(value_out, reinterpret_cast<const void*>(ptr), sizeof(T));
39 return ptr + sizeof(T);
40}
41
42bool IsPowerOfTwo(size_t v) {
43 return (v != 0 && ((v & (v - 1)) == 0));
44}
45
46static int perf_event_open(perf_event_attr* attr,
47 pid_t pid,
48 int cpu,
49 int group_fd,
50 unsigned long flags) {
51 return static_cast<int>(
52 syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags));
53}
54
Ryan Savitskie054e8e2020-02-10 13:07:21 +000055base::ScopedFile PerfEventOpen(uint32_t cpu, const EventConfig& event_cfg) {
Ryan Savitskia76b3cc2019-11-20 16:25:24 +000056 base::ScopedFile perf_fd{
Ryan Savitskie054e8e2020-02-10 13:07:21 +000057 perf_event_open(event_cfg.perf_attr(), /*pid=*/-1, static_cast<int>(cpu),
Ryan Savitski703bcab2019-12-18 14:38:14 +000058 /*group_fd=*/-1, PERF_FLAG_FD_CLOEXEC)};
Ryan Savitskia76b3cc2019-11-20 16:25:24 +000059 return perf_fd;
60}
61
62} // namespace
63
64PerfRingBuffer::PerfRingBuffer(PerfRingBuffer&& other) noexcept
65 : metadata_page_(other.metadata_page_),
66 mmap_sz_(other.mmap_sz_),
67 data_buf_(other.data_buf_),
68 data_buf_sz_(other.data_buf_sz_) {
69 other.metadata_page_ = nullptr;
70 other.mmap_sz_ = 0;
71 other.data_buf_ = nullptr;
72 other.data_buf_sz_ = 0;
73}
74
75PerfRingBuffer& PerfRingBuffer::operator=(PerfRingBuffer&& other) noexcept {
76 if (this == &other)
77 return *this;
78
79 this->~PerfRingBuffer();
80 new (this) PerfRingBuffer(std::move(other));
81 return *this;
82}
83
84PerfRingBuffer::~PerfRingBuffer() {
85 if (!valid())
86 return;
87
88 if (munmap(reinterpret_cast<void*>(metadata_page_), mmap_sz_) != 0)
89 PERFETTO_PLOG("failed munmap");
90}
91
92base::Optional<PerfRingBuffer> PerfRingBuffer::Allocate(
93 int perf_fd,
94 size_t data_page_count) {
95 // perf_event_open requires the ring buffer to be a power of two in size.
Ryan Savitski53df6cc2020-02-11 15:58:20 +000096 PERFETTO_DCHECK(IsPowerOfTwo(data_page_count));
Ryan Savitskia76b3cc2019-11-20 16:25:24 +000097
98 PerfRingBuffer ret;
99
100 // mmap request is one page larger than the buffer size (for the metadata).
101 ret.data_buf_sz_ = data_page_count * base::kPageSize;
102 ret.mmap_sz_ = ret.data_buf_sz_ + base::kPageSize;
103
104 // If PROT_WRITE, kernel won't overwrite unread samples.
105 void* mmap_addr = mmap(nullptr, ret.mmap_sz_, PROT_READ | PROT_WRITE,
106 MAP_SHARED, perf_fd, 0);
107 if (mmap_addr == MAP_FAILED) {
Ryan Savitski53df6cc2020-02-11 15:58:20 +0000108 PERFETTO_PLOG("failed mmap");
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000109 return base::nullopt;
110 }
111
112 // Expected layout is [ metadata page ] [ data pages ... ]
113 ret.metadata_page_ = reinterpret_cast<perf_event_mmap_page*>(mmap_addr);
114 ret.data_buf_ = reinterpret_cast<char*>(mmap_addr) + base::kPageSize;
115 PERFETTO_CHECK(ret.metadata_page_->data_offset == base::kPageSize);
116 PERFETTO_CHECK(ret.metadata_page_->data_size = ret.data_buf_sz_);
117
118 return base::make_optional(std::move(ret));
119}
120
Ryan Savitskia7a60d42020-02-04 17:23:16 +0000121// See |perf_output_put_handle| for the necessary synchronization between the
122// kernel and this userspace thread (which are using the same shared memory, but
123// might be on different cores).
124// TODO(rsavitski): is there false sharing between |data_tail| and |data_head|?
125// Is there an argument for maintaining our own copy of |data_tail| instead of
126// reloading it?
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000127char* PerfRingBuffer::ReadRecordNonconsuming() {
Ryan Savitskifdeb11f2020-02-05 16:53:45 +0000128 static_assert(sizeof(std::atomic<uint64_t>) == sizeof(uint64_t), "");
129
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000130 PERFETTO_CHECK(valid());
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000131
Ryan Savitskia7a60d42020-02-04 17:23:16 +0000132 // |data_tail| is written only by this userspace thread, so we can safely read
133 // it without any synchronization.
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000134 uint64_t read_offset = metadata_page_->data_tail;
Ryan Savitskia7a60d42020-02-04 17:23:16 +0000135
136 // |data_head| is written by the kernel, perform an acquiring load such that
137 // the payload reads below are ordered after this load.
138 uint64_t write_offset =
139 reinterpret_cast<std::atomic<uint64_t>*>(&metadata_page_->data_head)
140 ->load(std::memory_order_acquire);
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000141
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000142 PERFETTO_DCHECK(read_offset <= write_offset);
143 if (write_offset == read_offset)
144 return nullptr; // no new data
145
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000146 size_t read_pos = static_cast<size_t>(read_offset & (data_buf_sz_ - 1));
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000147
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000148 // event header (64 bits) guaranteed to be contiguous
149 PERFETTO_DCHECK(read_pos <= data_buf_sz_ - sizeof(perf_event_header));
150 PERFETTO_DCHECK(0 == reinterpret_cast<size_t>(data_buf_ + read_pos) %
151 alignof(perf_event_header));
152
153 perf_event_header* evt_header =
154 reinterpret_cast<perf_event_header*>(data_buf_ + read_pos);
155 uint16_t evt_size = evt_header->size;
156
157 // event wrapped - reconstruct it, and return a pointer to the buffer
158 if (read_pos + evt_size > data_buf_sz_) {
159 PERFETTO_DCHECK(read_pos + evt_size !=
160 ((read_pos + evt_size) & (data_buf_sz_ - 1)));
161 PERFETTO_DLOG("PerfRingBuffer: returning reconstructed event");
162
163 size_t prefix_sz = data_buf_sz_ - read_pos;
164 memcpy(&reconstructed_record_[0], data_buf_ + read_pos, prefix_sz);
165 memcpy(&reconstructed_record_[0] + prefix_sz, data_buf_,
166 evt_size - prefix_sz);
167 return &reconstructed_record_[0];
168 } else {
169 // usual case - contiguous sample
170 PERFETTO_DCHECK(read_pos + evt_size ==
171 ((read_pos + evt_size) & (data_buf_sz_ - 1)));
172
173 return data_buf_ + read_pos;
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000174 }
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000175}
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000176
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000177void PerfRingBuffer::Consume(size_t bytes) {
178 PERFETTO_CHECK(valid());
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000179
Ryan Savitskia7a60d42020-02-04 17:23:16 +0000180 // Advance |data_tail|, which is written only by this thread. The store of the
181 // updated value needs to have release semantics such that the preceding
182 // payload reads are ordered before it. The reader in this case is the kernel,
183 // which reads |data_tail| to calculate the available ring buffer capacity
184 // before trying to store a new record.
185 uint64_t updated_tail = metadata_page_->data_tail + bytes;
186 reinterpret_cast<std::atomic<uint64_t>*>(&metadata_page_->data_tail)
187 ->store(updated_tail, std::memory_order_release);
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000188}
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000189
Ryan Savitskie054e8e2020-02-10 13:07:21 +0000190EventReader::EventReader(uint32_t cpu,
Ryan Savitski1ae8a812020-03-02 18:22:39 +0000191 perf_event_attr event_attr,
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000192 base::ScopedFile perf_fd,
193 PerfRingBuffer ring_buffer)
Ryan Savitskie054e8e2020-02-10 13:07:21 +0000194 : cpu_(cpu),
Ryan Savitski1ae8a812020-03-02 18:22:39 +0000195 event_attr_(event_attr),
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000196 perf_fd_(std::move(perf_fd)),
197 ring_buffer_(std::move(ring_buffer)) {}
198
199EventReader::EventReader(EventReader&& other) noexcept
Ryan Savitskie054e8e2020-02-10 13:07:21 +0000200 : cpu_(other.cpu_),
Ryan Savitski1ae8a812020-03-02 18:22:39 +0000201 event_attr_(other.event_attr_),
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000202 perf_fd_(std::move(other.perf_fd_)),
203 ring_buffer_(std::move(other.ring_buffer_)) {}
204
205EventReader& EventReader::operator=(EventReader&& other) noexcept {
206 if (this == &other)
207 return *this;
208
209 this->~EventReader();
210 new (this) EventReader(std::move(other));
211 return *this;
212}
213
214base::Optional<EventReader> EventReader::ConfigureEvents(
Ryan Savitskie054e8e2020-02-10 13:07:21 +0000215 uint32_t cpu,
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000216 const EventConfig& event_cfg) {
Ryan Savitskie054e8e2020-02-10 13:07:21 +0000217 auto perf_fd = PerfEventOpen(cpu, event_cfg);
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000218 if (!perf_fd) {
219 PERFETTO_PLOG("failed perf_event_open");
220 return base::nullopt;
221 }
222
Ryan Savitski53df6cc2020-02-11 15:58:20 +0000223 // choose a reasonable ring buffer size
224 size_t ring_buffer_pages = kDefaultDataPagesPerRingBuffer;
225 size_t config_pages = event_cfg.ring_buffer_pages();
226 if (config_pages) {
227 if (!IsPowerOfTwo(config_pages)) {
228 PERFETTO_ELOG("kernel buffer size must be a power of two pages");
229 return base::nullopt;
230 }
231 ring_buffer_pages = config_pages;
232 }
233
234 auto ring_buffer = PerfRingBuffer::Allocate(perf_fd.get(), ring_buffer_pages);
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000235 if (!ring_buffer.has_value()) {
236 return base::nullopt;
237 }
238
Ryan Savitski1ae8a812020-03-02 18:22:39 +0000239 return base::make_optional<EventReader>(cpu, *event_cfg.perf_attr(),
240 std::move(perf_fd),
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000241 std::move(ring_buffer.value()));
242}
243
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000244base::Optional<ParsedSample> EventReader::ReadUntilSample(
Ryan Savitskidea799b2020-02-11 22:13:12 +0000245 std::function<void(uint64_t)> records_lost_callback) {
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000246 for (;;) {
247 char* event = ring_buffer_.ReadRecordNonconsuming();
248 if (!event)
249 return base::nullopt; // caught up with the writer
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000250
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000251 auto* event_hdr = reinterpret_cast<const perf_event_header*>(event);
252 PERFETTO_DLOG("record header: [%zu][%zu][%zu]",
253 static_cast<size_t>(event_hdr->type),
254 static_cast<size_t>(event_hdr->misc),
255 static_cast<size_t>(event_hdr->size));
256
257 if (event_hdr->type == PERF_RECORD_SAMPLE) {
Ryan Savitski7a747352020-02-11 14:54:29 +0000258 ParsedSample sample = ParseSampleRecord(cpu_, event);
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000259 ring_buffer_.Consume(event_hdr->size);
260 return base::make_optional(std::move(sample));
261 }
262
263 if (event_hdr->type == PERF_RECORD_LOST) {
Ryan Savitski7a747352020-02-11 14:54:29 +0000264 /*
265 * struct {
266 * struct perf_event_header header;
267 * u64 id;
268 * u64 lost;
269 * struct sample_id sample_id;
270 * };
271 */
Ryan Savitskidea799b2020-02-11 22:13:12 +0000272 uint64_t records_lost = *reinterpret_cast<const uint64_t*>(
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000273 event + sizeof(perf_event_header) + sizeof(uint64_t));
274
Ryan Savitskidea799b2020-02-11 22:13:12 +0000275 records_lost_callback(records_lost);
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000276 ring_buffer_.Consume(event_hdr->size);
Ryan Savitski7a747352020-02-11 14:54:29 +0000277 continue; // keep looking for a sample
278 }
279
280 // Kernel had to throttle irqs.
Ryan Savitski7a747352020-02-11 14:54:29 +0000281 if (event_hdr->type == PERF_RECORD_THROTTLE ||
282 event_hdr->type == PERF_RECORD_UNTHROTTLE) {
283 ring_buffer_.Consume(event_hdr->size);
284 continue; // keep looking for a sample
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000285 }
286
Ryan Savitskidea799b2020-02-11 22:13:12 +0000287 PERFETTO_DFATAL_OR_ELOG("Unsupported event type [%zu]",
288 static_cast<size_t>(event_hdr->type));
289 ring_buffer_.Consume(event_hdr->size);
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000290 }
291}
292
Ryan Savitskie054e8e2020-02-10 13:07:21 +0000293// Generally, samples can belong to any cpu (which can be recorded with
294// PERF_SAMPLE_CPU). However, this producer uses only cpu-scoped events,
295// therefore it is already known.
296ParsedSample EventReader::ParseSampleRecord(uint32_t cpu,
Ryan Savitski7a747352020-02-11 14:54:29 +0000297 const char* record_start) {
Ryan Savitski1ae8a812020-03-02 18:22:39 +0000298 if (event_attr_.sample_type &
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000299 (~uint64_t(PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_STACK_USER |
300 PERF_SAMPLE_REGS_USER))) {
301 PERFETTO_FATAL("Unsupported sampling option");
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000302 }
303
Ryan Savitski7a747352020-02-11 14:54:29 +0000304 auto* event_hdr = reinterpret_cast<const perf_event_header*>(record_start);
305 size_t sample_size = event_hdr->size;
306
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000307 ParsedSample sample = {};
Ryan Savitskie054e8e2020-02-10 13:07:21 +0000308 sample.cpu = cpu;
Ryan Savitski7a747352020-02-11 14:54:29 +0000309 sample.cpu_mode = event_hdr->misc & PERF_RECORD_MISC_CPUMODE_MASK;
310
311 // Parse the payload, which consists of concatenated data for each
312 // |attr.sample_type| flag.
313 const char* parse_pos = record_start + sizeof(perf_event_header);
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000314
Ryan Savitski1ae8a812020-03-02 18:22:39 +0000315 if (event_attr_.sample_type & PERF_SAMPLE_TID) {
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000316 uint32_t pid = 0;
317 uint32_t tid = 0;
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000318 parse_pos = ReadValue(&pid, parse_pos);
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000319 parse_pos = ReadValue(&tid, parse_pos);
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000320 sample.pid = static_cast<pid_t>(pid);
321 sample.tid = static_cast<pid_t>(tid);
322 }
323
Ryan Savitski1ae8a812020-03-02 18:22:39 +0000324 if (event_attr_.sample_type & PERF_SAMPLE_TIME) {
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000325 parse_pos = ReadValue(&sample.timestamp, parse_pos);
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000326 }
327
Ryan Savitski1ae8a812020-03-02 18:22:39 +0000328 if (event_attr_.sample_type & PERF_SAMPLE_REGS_USER) {
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000329 // Can be empty, e.g. if we sampled a kernel thread.
330 sample.regs = ReadPerfUserRegsData(&parse_pos);
Ryan Savitski703bcab2019-12-18 14:38:14 +0000331 }
332
Ryan Savitski1ae8a812020-03-02 18:22:39 +0000333 if (event_attr_.sample_type & PERF_SAMPLE_STACK_USER) {
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000334 uint64_t max_stack_size; // the requested size
335 parse_pos = ReadValue(&max_stack_size, parse_pos);
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000336 PERFETTO_DLOG("max_stack_size: %" PRIu64 "", max_stack_size);
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000337
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000338 const char* stack_start = parse_pos;
339 parse_pos += max_stack_size; // skip to dyn_size
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000340
Ryan Savitski7a747352020-02-11 14:54:29 +0000341 // Payload written conditionally, e.g. kernel threads don't have a
342 // user stack.
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000343 if (max_stack_size > 0) {
344 uint64_t filled_stack_size;
345 parse_pos = ReadValue(&filled_stack_size, parse_pos);
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000346 PERFETTO_DLOG("filled_stack_size: %" PRIu64 "", filled_stack_size);
347
348 // copy stack bytes into a vector
349 size_t payload_sz = static_cast<size_t>(filled_stack_size);
350 sample.stack.resize(payload_sz);
351 memcpy(sample.stack.data(), stack_start, payload_sz);
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000352 }
353 }
354
Ryan Savitski7a747352020-02-11 14:54:29 +0000355 PERFETTO_CHECK(parse_pos == record_start + sample_size);
Ryan Savitskic46d4dc2020-02-04 15:03:38 +0000356 return sample;
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000357}
358
Ryan Savitski8548a8e2020-02-10 13:05:39 +0000359void EventReader::PauseEvents() {
360 int ret = ioctl(perf_fd_.get(), PERF_EVENT_IOC_DISABLE);
361 PERFETTO_CHECK(ret == 0);
362}
363
Ryan Savitskia76b3cc2019-11-20 16:25:24 +0000364} // namespace profiling
365} // namespace perfetto