blob: 921677408697c39bf0909465f5cde93e81100f34 [file] [log] [blame]
// Copyright 2013 The Flutter Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "impeller/renderer/backend/vulkan/gpu_tracer_vk.h"
#include <memory>
#include <optional>
#include <thread>
#include <utility>
#include "fml/logging.h"
#include "fml/trace_event.h"
#include "impeller/base/validation.h"
#include "impeller/renderer/backend/vulkan/context_vk.h"
#include "vulkan/vulkan.hpp"
namespace impeller {
static constexpr uint32_t kPoolSize = 1024u;
GPUTracerVK::GPUTracerVK(const std::shared_ptr<DeviceHolder>& device_holder)
: device_holder_(device_holder) {
timestamp_period_ = device_holder_->GetPhysicalDevice()
.getProperties()
.limits.timestampPeriod;
if (timestamp_period_ <= 0) {
// The device does not support timestamp queries.
return;
}
// Disable tracing in release mode.
#ifdef IMPELLER_DEBUG
enabled_ = true;
#endif
}
bool GPUTracerVK::IsEnabled() const {
return enabled_;
}
void GPUTracerVK::MarkFrameStart() {
FML_DCHECK(!in_frame_);
in_frame_ = true;
raster_thread_id_ = std::this_thread::get_id();
}
void GPUTracerVK::MarkFrameEnd() {
if (!enabled_) {
return;
}
Lock lock(trace_state_mutex_);
current_state_ = (current_state_ + 1) % kTraceStatesSize;
auto& state = trace_states_[current_state_];
// If there are still pending buffers on the trace state we're switching to,
// that means that a cmd buffer we were relying on to signal this likely
// never finished. This shouldn't happen unless there is a bug in the
// encoder logic. We set it to zero anyway to prevent a validation error
// from becoming a memory leak.
FML_DCHECK(state.pending_buffers == 0u);
state.pending_buffers = 0;
state.current_index = 0;
in_frame_ = false;
}
std::unique_ptr<GPUProbe> GPUTracerVK::CreateGPUProbe() {
return std::make_unique<GPUProbe>(weak_from_this());
}
void GPUTracerVK::RecordCmdBufferStart(const vk::CommandBuffer& buffer,
GPUProbe& probe) {
if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
!in_frame_) {
return;
}
Lock lock(trace_state_mutex_);
auto& state = trace_states_[current_state_];
// Initialize the query pool for the first query on each frame.
if (state.pending_buffers == 0) {
vk::QueryPoolCreateInfo info;
info.queryCount = kPoolSize;
info.queryType = vk::QueryType::eTimestamp;
auto [status, pool] =
device_holder_->GetDevice().createQueryPoolUnique(info);
if (status != vk::Result::eSuccess) {
VALIDATION_LOG << "Failed to create query pool.";
return;
}
trace_states_[current_state_].query_pool = std::move(pool);
buffer.resetQueryPool(trace_states_[current_state_].query_pool.get(), 0,
kPoolSize);
}
// We size the query pool to kPoolSize, but Flutter applications can create an
// unbounded amount of work per frame. If we encounter this, stop recording
// cmds.
if (state.current_index >= kPoolSize) {
return;
}
buffer.writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe,
trace_states_[current_state_].query_pool.get(),
state.current_index);
state.current_index += 1;
if (!probe.index_.has_value()) {
state.pending_buffers += 1;
probe.index_ = current_state_;
}
}
void GPUTracerVK::RecordCmdBufferEnd(const vk::CommandBuffer& buffer,
GPUProbe& probe) {
if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
!in_frame_) {
return;
}
Lock lock(trace_state_mutex_);
GPUTraceState& state = trace_states_[current_state_];
if (state.current_index >= kPoolSize) {
return;
}
buffer.writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe,
state.query_pool.get(), state.current_index);
state.current_index += 1;
if (!probe.index_.has_value()) {
state.pending_buffers += 1;
probe.index_ = current_state_;
}
}
void GPUTracerVK::OnFenceComplete(size_t frame_index) {
if (!enabled_) {
return;
}
Lock lock(trace_state_mutex_);
GPUTraceState& state = trace_states_[frame_index];
FML_DCHECK(state.pending_buffers > 0);
state.pending_buffers -= 1;
if (state.pending_buffers == 0) {
auto buffer_count = state.current_index;
std::vector<uint64_t> bits(buffer_count);
auto result = device_holder_->GetDevice().getQueryPoolResults(
state.query_pool.get(), 0, state.current_index,
buffer_count * sizeof(uint64_t), bits.data(), sizeof(uint64_t),
vk::QueryResultFlagBits::e64);
// This may return VK_NOT_READY if the query couldn't be completed, or if
// there are queries still pending. From local testing, this happens
// occassionally on very expensive frames. Its unclear if we can do anything
// about this, because by design this should only signal after all cmd
// buffers have signaled. Adding VK_QUERY_RESULT_WAIT_BIT to the flags
// passed to getQueryPoolResults seems like it would fix this, but actually
// seems to result in more stuck query errors. Better to just drop them and
// move on.
if (result != vk::Result::eSuccess) {
return;
}
uint64_t smallest_timestamp = std::numeric_limits<uint64_t>::max();
uint64_t largest_timestamp = 0;
for (auto i = 0u; i < bits.size(); i++) {
smallest_timestamp = std::min(smallest_timestamp, bits[i]);
largest_timestamp = std::max(largest_timestamp, bits[i]);
}
auto gpu_ms =
(((largest_timestamp - smallest_timestamp) * timestamp_period_) /
1000000);
FML_TRACE_COUNTER("flutter", "GPUTracer",
reinterpret_cast<int64_t>(this), // Trace Counter ID
"FrameTimeMS", gpu_ms);
}
}
GPUProbe::GPUProbe(const std::weak_ptr<GPUTracerVK>& tracer)
: tracer_(tracer) {}
GPUProbe::~GPUProbe() {
if (!index_.has_value()) {
return;
}
auto tracer = tracer_.lock();
if (!tracer) {
return;
}
tracer->OnFenceComplete(index_.value());
}
void GPUProbe::RecordCmdBufferStart(const vk::CommandBuffer& buffer) {
auto tracer = tracer_.lock();
if (!tracer) {
return;
}
tracer->RecordCmdBufferStart(buffer, *this);
}
void GPUProbe::RecordCmdBufferEnd(const vk::CommandBuffer& buffer) {
auto tracer = tracer_.lock();
if (!tracer) {
return;
}
tracer->RecordCmdBufferEnd(buffer, *this);
}
} // namespace impeller