| /* |
| * Copyright (C) 2019 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "src/profiling/perf/event_reader.h" |
| |
| #include <linux/perf_event.h> |
| #include <sys/mman.h> |
| #include <sys/syscall.h> |
| #include <sys/types.h> |
| #include <unistd.h> |
| |
| #include "perfetto/ext/base/utils.h" |
| #include "src/profiling/perf/unwind_support.h" |
| |
| namespace perfetto { |
| namespace profiling { |
| |
| namespace { |
| |
| template <typename T> |
| const char* ReadValue(T* value_out, const char* ptr) { |
| memcpy(value_out, reinterpret_cast<const void*>(ptr), sizeof(T)); |
| return ptr + sizeof(T); |
| } |
| |
| bool IsPowerOfTwo(size_t v) { |
| return (v != 0 && ((v & (v - 1)) == 0)); |
| } |
| |
| static int perf_event_open(perf_event_attr* attr, |
| pid_t pid, |
| int cpu, |
| int group_fd, |
| unsigned long flags) { |
| return static_cast<int>( |
| syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags)); |
| } |
| |
| // TODO(rsavitski): one EventConfig will correspond to N perf_event_open calls |
| // in the general case. Does it make sense to keep a single function which does |
| // the N calls, and then returns the group leader's fd? What about cases where |
| // we have >1 pid or >1 cpu to open for? Should the entire EventReader be |
| // cpu-scoped? |
| base::ScopedFile PerfEventOpen(const EventConfig& event_cfg) { |
| base::ScopedFile perf_fd{ |
| perf_event_open(event_cfg.perf_attr(), /*pid=*/-1, |
| static_cast<int>(event_cfg.target_cpu()), |
| /*group_fd=*/-1, PERF_FLAG_FD_CLOEXEC)}; |
| return perf_fd; |
| } |
| |
| } // namespace |
| |
| PerfRingBuffer::PerfRingBuffer(PerfRingBuffer&& other) noexcept |
| : metadata_page_(other.metadata_page_), |
| mmap_sz_(other.mmap_sz_), |
| data_buf_(other.data_buf_), |
| data_buf_sz_(other.data_buf_sz_) { |
| other.metadata_page_ = nullptr; |
| other.mmap_sz_ = 0; |
| other.data_buf_ = nullptr; |
| other.data_buf_sz_ = 0; |
| } |
| |
| PerfRingBuffer& PerfRingBuffer::operator=(PerfRingBuffer&& other) noexcept { |
| if (this == &other) |
| return *this; |
| |
| this->~PerfRingBuffer(); |
| new (this) PerfRingBuffer(std::move(other)); |
| return *this; |
| } |
| |
| PerfRingBuffer::~PerfRingBuffer() { |
| if (!valid()) |
| return; |
| |
| if (munmap(reinterpret_cast<void*>(metadata_page_), mmap_sz_) != 0) |
| PERFETTO_PLOG("failed munmap"); |
| } |
| |
| base::Optional<PerfRingBuffer> PerfRingBuffer::Allocate( |
| int perf_fd, |
| size_t data_page_count) { |
| // perf_event_open requires the ring buffer to be a power of two in size. |
| PERFETTO_CHECK(IsPowerOfTwo(data_page_count)); |
| |
| PerfRingBuffer ret; |
| |
| // mmap request is one page larger than the buffer size (for the metadata). |
| ret.data_buf_sz_ = data_page_count * base::kPageSize; |
| ret.mmap_sz_ = ret.data_buf_sz_ + base::kPageSize; |
| |
| // If PROT_WRITE, kernel won't overwrite unread samples. |
| void* mmap_addr = mmap(nullptr, ret.mmap_sz_, PROT_READ | PROT_WRITE, |
| MAP_SHARED, perf_fd, 0); |
| if (mmap_addr == MAP_FAILED) { |
| PERFETTO_PLOG("failed mmap (check perf_event_mlock_kb in procfs)"); |
| return base::nullopt; |
| } |
| |
| // Expected layout is [ metadata page ] [ data pages ... ] |
| ret.metadata_page_ = reinterpret_cast<perf_event_mmap_page*>(mmap_addr); |
| ret.data_buf_ = reinterpret_cast<char*>(mmap_addr) + base::kPageSize; |
| PERFETTO_CHECK(ret.metadata_page_->data_offset == base::kPageSize); |
| PERFETTO_CHECK(ret.metadata_page_->data_size = ret.data_buf_sz_); |
| |
| return base::make_optional(std::move(ret)); |
| } |
| |
| // TODO(rsavitski): look into more specific barrier builtins. Copying simpleperf |
| // for now. See |perf_output_put_handle| in the kernel for the barrier |
| // requirements. |
| #pragma GCC diagnostic push |
| #if defined(__clang__) |
| #pragma GCC diagnostic ignored "-Watomic-implicit-seq-cst" |
| #endif |
| std::vector<char> PerfRingBuffer::ReadAvailable() { |
| if (!valid()) |
| return {}; |
| |
| uint64_t write_offset = metadata_page_->data_head; |
| uint64_t read_offset = metadata_page_->data_tail; |
| __sync_synchronize(); // needs to be rmb() |
| |
| size_t read_pos = static_cast<size_t>(read_offset & (data_buf_sz_ - 1)); |
| size_t data_sz = static_cast<size_t>(write_offset - read_offset); |
| |
| if (data_sz == 0) { |
| return {}; |
| } |
| |
| // memcpy accounting for wrapping |
| std::vector<char> data(data_sz); |
| size_t copy_sz = std::min(data_sz, data_buf_sz_ - read_pos); |
| memcpy(data.data(), data_buf_ + read_pos, copy_sz); |
| if (copy_sz < data_sz) { |
| memcpy(data.data() + copy_sz, data_buf_, data_sz - copy_sz); |
| } |
| |
| // consume the data |
| __sync_synchronize(); // needs to be mb() |
| metadata_page_->data_tail += data_sz; |
| |
| PERFETTO_LOG("WIP: consumed [%zu] bytes from ring buffer", data_sz); |
| return data; |
| } |
| #pragma GCC diagnostic pop |
| |
| EventReader::EventReader(const EventConfig& event_cfg, |
| base::ScopedFile perf_fd, |
| PerfRingBuffer ring_buffer) |
| : event_cfg_(event_cfg), |
| perf_fd_(std::move(perf_fd)), |
| ring_buffer_(std::move(ring_buffer)) {} |
| |
| EventReader::EventReader(EventReader&& other) noexcept |
| : event_cfg_(other.event_cfg_), |
| perf_fd_(std::move(other.perf_fd_)), |
| ring_buffer_(std::move(other.ring_buffer_)) {} |
| |
| EventReader& EventReader::operator=(EventReader&& other) noexcept { |
| if (this == &other) |
| return *this; |
| |
| this->~EventReader(); |
| new (this) EventReader(std::move(other)); |
| return *this; |
| } |
| |
| base::Optional<EventReader> EventReader::ConfigureEvents( |
| const EventConfig& event_cfg) { |
| auto perf_fd = PerfEventOpen(event_cfg); |
| if (!perf_fd) { |
| PERFETTO_PLOG("failed perf_event_open"); |
| return base::nullopt; |
| } |
| |
| auto ring_buffer = |
| PerfRingBuffer::Allocate(perf_fd.get(), /*data_page_count=*/128); |
| if (!ring_buffer.has_value()) { |
| return base::nullopt; |
| } |
| |
| return base::make_optional<EventReader>(event_cfg, std::move(perf_fd), |
| std::move(ring_buffer.value())); |
| } |
| |
| void EventReader::ParseNextSampleBatch() { |
| std::vector<char> data = ring_buffer_.ReadAvailable(); |
| if (data.size() == 0) { |
| PERFETTO_LOG("no samples (work in progress)"); |
| return; |
| } |
| |
| for (const char* ptr = data.data(); ptr < data.data() + data.size();) { |
| if (!ParseSampleAndAdvance(&ptr)) |
| break; |
| } |
| } |
| |
| bool EventReader::ParseSampleAndAdvance(const char** ptr) { |
| const char* sample_start = *ptr; |
| auto* event_hdr = reinterpret_cast<const perf_event_header*>(sample_start); |
| |
| PERFETTO_LOG("WIP: event_header[%zu][%zu][%zu]", |
| static_cast<size_t>(event_hdr->type), |
| static_cast<size_t>(event_hdr->misc), |
| static_cast<size_t>(event_hdr->size)); |
| |
| if (event_hdr->type == PERF_RECORD_SAMPLE) { |
| ParsePerfRecordSample(sample_start, event_hdr->size); |
| } else { |
| PERFETTO_ELOG("Unsupported event type (work in progress)"); |
| } |
| |
| *ptr = sample_start + event_hdr->size; |
| return true; |
| } |
| |
| // TODO(rsavitski): actually handle the samples instead of logging. |
| void EventReader::ParsePerfRecordSample(const char* sample_start, |
| size_t sample_size) { |
| const perf_event_attr* cfg = event_cfg_.perf_attr(); |
| |
| if (cfg->sample_type & (~uint64_t(PERF_SAMPLE_TID | PERF_SAMPLE_STACK_USER | |
| PERF_SAMPLE_REGS_USER))) { |
| PERFETTO_ELOG("Unsupported sampling option (work in progress)"); |
| return; |
| } |
| |
| // Parse the payload, which consists of concatenated data for each |
| // |attr.sample_type| flag. |
| const char* parse_pos = sample_start + sizeof(perf_event_header); |
| |
| if (cfg->sample_type & PERF_SAMPLE_TID) { |
| uint32_t pid; |
| parse_pos = ReadValue(&pid, parse_pos); |
| PERFETTO_LOG("pid: %" PRIu32 "", pid); |
| |
| uint32_t tid; |
| parse_pos = ReadValue(&tid, parse_pos); |
| PERFETTO_LOG("tid: %" PRIu32 "", tid); |
| } |
| |
| if (cfg->sample_type & PERF_SAMPLE_REGS_USER) { |
| auto parsed_regs = ReadPerfUserRegsData(&parse_pos); |
| |
| if (parsed_regs) { |
| parsed_regs->IterateRegisters([](const char* name, uint64_t value) { |
| PERFETTO_LOG("reg[%s]: %" PRIx64 "", name, value); |
| }); |
| } |
| } |
| |
| if (cfg->sample_type & PERF_SAMPLE_STACK_USER) { |
| uint64_t max_stack_size; // the requested size |
| parse_pos = ReadValue(&max_stack_size, parse_pos); |
| PERFETTO_LOG("max_stack_size: %" PRIu64 "", max_stack_size); |
| |
| parse_pos += max_stack_size; // skip raw data |
| |
| // not written if requested stack sampling size is zero |
| if (max_stack_size > 0) { |
| uint64_t filled_stack_size; |
| parse_pos = ReadValue(&filled_stack_size, parse_pos); |
| PERFETTO_LOG("filled_stack_size: %" PRIu64 "", filled_stack_size); |
| } |
| } |
| |
| PERFETTO_CHECK(parse_pos == sample_start + sample_size); |
| } |
| |
| } // namespace profiling |
| } // namespace perfetto |