blob: 2b5ac2eece6205c6135ae242fab8c2df1241f3d6 [file] [log] [blame]
// Copyright 2013 The Flutter Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "impeller/renderer/backend/vulkan/compute_pass_vk.h"
#include "impeller/renderer/backend/vulkan/command_buffer_vk.h"
#include "impeller/renderer/backend/vulkan/compute_pipeline_vk.h"
#include "impeller/renderer/backend/vulkan/formats_vk.h"
#include "impeller/renderer/backend/vulkan/sampler_vk.h"
#include "impeller/renderer/backend/vulkan/texture_vk.h"
#include "vulkan/vulkan_structs.hpp"
namespace impeller {
ComputePassVK::ComputePassVK(std::shared_ptr<const Context> context,
std::shared_ptr<CommandBufferVK> command_buffer)
: ComputePass(std::move(context)),
command_buffer_(std::move(command_buffer)) {
// TOOD(dnfield): This should be moved to caps. But for now keeping this
// in parallel with Metal.
max_wg_size_ = ContextVK::Cast(*context_)
.GetPhysicalDevice()
.getProperties()
.limits.maxComputeWorkGroupSize;
is_valid_ = true;
}
ComputePassVK::~ComputePassVK() = default;
bool ComputePassVK::IsValid() const {
return is_valid_;
}
void ComputePassVK::OnSetLabel(const std::string& label) {
if (label.empty()) {
return;
}
label_ = label;
}
// |RenderPass|
void ComputePassVK::SetCommandLabel(std::string_view label) {
#ifdef IMPELLER_DEBUG
command_buffer_->GetEncoder()->PushDebugGroup(label);
has_label_ = true;
#endif // IMPELLER_DEBUG
}
// |ComputePass|
void ComputePassVK::SetPipeline(
const std::shared_ptr<Pipeline<ComputePipelineDescriptor>>& pipeline) {
const auto& pipeline_vk = ComputePipelineVK::Cast(*pipeline);
const vk::CommandBuffer& command_buffer_vk =
command_buffer_->GetEncoder()->GetCommandBuffer();
command_buffer_vk.bindPipeline(vk::PipelineBindPoint::eCompute,
pipeline_vk.GetPipeline());
pipeline_layout_ = pipeline_vk.GetPipelineLayout();
auto descriptor_result =
command_buffer_->GetEncoder()->AllocateDescriptorSets(
pipeline_vk.GetDescriptorSetLayout(), ContextVK::Cast(*context_));
if (!descriptor_result.ok()) {
return;
}
descriptor_set_ = descriptor_result.value();
pipeline_valid_ = true;
}
// |ComputePass|
fml::Status ComputePassVK::Compute(const ISize& grid_size) {
if (grid_size.IsEmpty() || !pipeline_valid_) {
bound_image_offset_ = 0u;
bound_buffer_offset_ = 0u;
descriptor_write_offset_ = 0u;
has_label_ = false;
pipeline_valid_ = false;
return fml::Status(fml::StatusCode::kCancelled,
"Invalid pipeline or empty grid.");
}
const ContextVK& context_vk = ContextVK::Cast(*context_);
for (auto i = 0u; i < descriptor_write_offset_; i++) {
write_workspace_[i].dstSet = descriptor_set_;
}
context_vk.GetDevice().updateDescriptorSets(descriptor_write_offset_,
write_workspace_.data(), 0u, {});
const vk::CommandBuffer& command_buffer_vk =
command_buffer_->GetEncoder()->GetCommandBuffer();
command_buffer_vk.bindDescriptorSets(
vk::PipelineBindPoint::eCompute, // bind point
pipeline_layout_, // layout
0, // first set
1, // set count
&descriptor_set_, // sets
0, // offset count
nullptr // offsets
);
int64_t width = grid_size.width;
int64_t height = grid_size.height;
// Special case for linear processing.
if (height == 1) {
command_buffer_vk.dispatch(width, 1, 1);
} else {
while (width > max_wg_size_[0]) {
width = std::max(static_cast<int64_t>(1), width / 2);
}
while (height > max_wg_size_[1]) {
height = std::max(static_cast<int64_t>(1), height / 2);
}
command_buffer_vk.dispatch(width, height, 1);
}
#ifdef IMPELLER_DEBUG
if (has_label_) {
command_buffer_->GetEncoder()->PopDebugGroup();
}
has_label_ = false;
#endif // IMPELLER_DEBUG
bound_image_offset_ = 0u;
bound_buffer_offset_ = 0u;
descriptor_write_offset_ = 0u;
has_label_ = false;
pipeline_valid_ = false;
return fml::Status();
}
// |ResourceBinder|
bool ComputePassVK::BindResource(ShaderStage stage,
DescriptorType type,
const ShaderUniformSlot& slot,
const ShaderMetadata& metadata,
BufferView view) {
return BindResource(slot.binding, type, view);
}
// |ResourceBinder|
bool ComputePassVK::BindResource(
ShaderStage stage,
DescriptorType type,
const SampledImageSlot& slot,
const ShaderMetadata& metadata,
std::shared_ptr<const Texture> texture,
const std::unique_ptr<const Sampler>& sampler) {
if (bound_image_offset_ >= kMaxBindings) {
return false;
}
if (!texture->IsValid() || !sampler) {
return false;
}
const TextureVK& texture_vk = TextureVK::Cast(*texture);
const SamplerVK& sampler_vk = SamplerVK::Cast(*sampler);
if (!command_buffer_->GetEncoder()->Track(texture)) {
return false;
}
vk::DescriptorImageInfo image_info;
image_info.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal;
image_info.sampler = sampler_vk.GetSampler();
image_info.imageView = texture_vk.GetImageView();
image_workspace_[bound_image_offset_++] = image_info;
vk::WriteDescriptorSet write_set;
write_set.dstBinding = slot.binding;
write_set.descriptorCount = 1u;
write_set.descriptorType = ToVKDescriptorType(type);
write_set.pImageInfo = &image_workspace_[bound_image_offset_ - 1];
write_workspace_[descriptor_write_offset_++] = write_set;
return true;
}
bool ComputePassVK::BindResource(size_t binding,
DescriptorType type,
const BufferView& view) {
if (bound_buffer_offset_ >= kMaxBindings) {
return false;
}
const std::shared_ptr<const DeviceBuffer>& device_buffer = view.buffer;
auto buffer = DeviceBufferVK::Cast(*device_buffer).GetBuffer();
if (!buffer) {
return false;
}
if (!command_buffer_->GetEncoder()->Track(device_buffer)) {
return false;
}
uint32_t offset = view.range.offset;
vk::DescriptorBufferInfo buffer_info;
buffer_info.buffer = buffer;
buffer_info.offset = offset;
buffer_info.range = view.range.length;
buffer_workspace_[bound_buffer_offset_++] = buffer_info;
vk::WriteDescriptorSet write_set;
write_set.dstBinding = binding;
write_set.descriptorCount = 1u;
write_set.descriptorType = ToVKDescriptorType(type);
write_set.pBufferInfo = &buffer_workspace_[bound_buffer_offset_ - 1];
write_workspace_[descriptor_write_offset_++] = write_set;
return true;
}
// Note:
// https://github.com/KhronosGroup/Vulkan-Docs/wiki/Synchronization-Examples
// Seems to suggest that anything more finely grained than a global memory
// barrier is likely to be weakened into a global barrier. Confirming this on
// mobile devices will require some experimentation.
// |ComputePass|
void ComputePassVK::AddBufferMemoryBarrier() {
vk::MemoryBarrier barrier;
barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite;
barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead;
command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier(
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eComputeShader, {}, 1, &barrier, 0, {}, 0, {});
}
// |ComputePass|
void ComputePassVK::AddTextureMemoryBarrier() {
vk::MemoryBarrier barrier;
barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite;
barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead;
command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier(
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eComputeShader, {}, 1, &barrier, 0, {}, 0, {});
}
// |ComputePass|
bool ComputePassVK::EncodeCommands() const {
// Since we only use global memory barrier, we don't have to worry about
// compute to compute dependencies across cmd buffers. Instead, we pessimize
// here and assume that we wrote to a storage image or buffer and that a
// render pass will read from it. if there are ever scenarios where we end up
// with compute to compute dependencies this should be revisited.
// This does not currently handle image barriers as we do not use them
// for anything.
vk::MemoryBarrier barrier;
barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite;
barrier.dstAccessMask =
vk::AccessFlagBits::eIndexRead | vk::AccessFlagBits::eVertexAttributeRead;
command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier(
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eVertexInput, {}, 1, &barrier, 0, {}, 0, {});
return true;
}
} // namespace impeller