| // Copyright 2013 The Flutter Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "impeller/renderer/backend/vulkan/compute_pass_vk.h" |
| |
| #include "impeller/renderer/backend/vulkan/command_buffer_vk.h" |
| #include "impeller/renderer/backend/vulkan/compute_pipeline_vk.h" |
| #include "impeller/renderer/backend/vulkan/formats_vk.h" |
| #include "impeller/renderer/backend/vulkan/sampler_vk.h" |
| #include "impeller/renderer/backend/vulkan/texture_vk.h" |
| #include "vulkan/vulkan_structs.hpp" |
| |
| namespace impeller { |
| |
| ComputePassVK::ComputePassVK(std::shared_ptr<const Context> context, |
| std::shared_ptr<CommandBufferVK> command_buffer) |
| : ComputePass(std::move(context)), |
| command_buffer_(std::move(command_buffer)) { |
| // TOOD(dnfield): This should be moved to caps. But for now keeping this |
| // in parallel with Metal. |
| max_wg_size_ = ContextVK::Cast(*context_) |
| .GetPhysicalDevice() |
| .getProperties() |
| .limits.maxComputeWorkGroupSize; |
| is_valid_ = true; |
| } |
| |
| ComputePassVK::~ComputePassVK() = default; |
| |
| bool ComputePassVK::IsValid() const { |
| return is_valid_; |
| } |
| |
| void ComputePassVK::OnSetLabel(const std::string& label) { |
| if (label.empty()) { |
| return; |
| } |
| label_ = label; |
| } |
| |
| // |RenderPass| |
| void ComputePassVK::SetCommandLabel(std::string_view label) { |
| #ifdef IMPELLER_DEBUG |
| command_buffer_->GetEncoder()->PushDebugGroup(label); |
| has_label_ = true; |
| #endif // IMPELLER_DEBUG |
| } |
| |
| // |ComputePass| |
| void ComputePassVK::SetPipeline( |
| const std::shared_ptr<Pipeline<ComputePipelineDescriptor>>& pipeline) { |
| const auto& pipeline_vk = ComputePipelineVK::Cast(*pipeline); |
| const vk::CommandBuffer& command_buffer_vk = |
| command_buffer_->GetEncoder()->GetCommandBuffer(); |
| command_buffer_vk.bindPipeline(vk::PipelineBindPoint::eCompute, |
| pipeline_vk.GetPipeline()); |
| pipeline_layout_ = pipeline_vk.GetPipelineLayout(); |
| |
| auto descriptor_result = |
| command_buffer_->GetEncoder()->AllocateDescriptorSets( |
| pipeline_vk.GetDescriptorSetLayout(), ContextVK::Cast(*context_)); |
| if (!descriptor_result.ok()) { |
| return; |
| } |
| descriptor_set_ = descriptor_result.value(); |
| pipeline_valid_ = true; |
| } |
| |
| // |ComputePass| |
| fml::Status ComputePassVK::Compute(const ISize& grid_size) { |
| if (grid_size.IsEmpty() || !pipeline_valid_) { |
| bound_image_offset_ = 0u; |
| bound_buffer_offset_ = 0u; |
| descriptor_write_offset_ = 0u; |
| has_label_ = false; |
| pipeline_valid_ = false; |
| return fml::Status(fml::StatusCode::kCancelled, |
| "Invalid pipeline or empty grid."); |
| } |
| |
| const ContextVK& context_vk = ContextVK::Cast(*context_); |
| for (auto i = 0u; i < descriptor_write_offset_; i++) { |
| write_workspace_[i].dstSet = descriptor_set_; |
| } |
| |
| context_vk.GetDevice().updateDescriptorSets(descriptor_write_offset_, |
| write_workspace_.data(), 0u, {}); |
| const vk::CommandBuffer& command_buffer_vk = |
| command_buffer_->GetEncoder()->GetCommandBuffer(); |
| |
| command_buffer_vk.bindDescriptorSets( |
| vk::PipelineBindPoint::eCompute, // bind point |
| pipeline_layout_, // layout |
| 0, // first set |
| 1, // set count |
| &descriptor_set_, // sets |
| 0, // offset count |
| nullptr // offsets |
| ); |
| |
| int64_t width = grid_size.width; |
| int64_t height = grid_size.height; |
| |
| // Special case for linear processing. |
| if (height == 1) { |
| command_buffer_vk.dispatch(width, 1, 1); |
| } else { |
| while (width > max_wg_size_[0]) { |
| width = std::max(static_cast<int64_t>(1), width / 2); |
| } |
| while (height > max_wg_size_[1]) { |
| height = std::max(static_cast<int64_t>(1), height / 2); |
| } |
| command_buffer_vk.dispatch(width, height, 1); |
| } |
| |
| #ifdef IMPELLER_DEBUG |
| if (has_label_) { |
| command_buffer_->GetEncoder()->PopDebugGroup(); |
| } |
| has_label_ = false; |
| #endif // IMPELLER_DEBUG |
| |
| bound_image_offset_ = 0u; |
| bound_buffer_offset_ = 0u; |
| descriptor_write_offset_ = 0u; |
| has_label_ = false; |
| pipeline_valid_ = false; |
| |
| return fml::Status(); |
| } |
| |
| // |ResourceBinder| |
| bool ComputePassVK::BindResource(ShaderStage stage, |
| DescriptorType type, |
| const ShaderUniformSlot& slot, |
| const ShaderMetadata& metadata, |
| BufferView view) { |
| return BindResource(slot.binding, type, view); |
| } |
| |
| // |ResourceBinder| |
| bool ComputePassVK::BindResource( |
| ShaderStage stage, |
| DescriptorType type, |
| const SampledImageSlot& slot, |
| const ShaderMetadata& metadata, |
| std::shared_ptr<const Texture> texture, |
| const std::unique_ptr<const Sampler>& sampler) { |
| if (bound_image_offset_ >= kMaxBindings) { |
| return false; |
| } |
| if (!texture->IsValid() || !sampler) { |
| return false; |
| } |
| const TextureVK& texture_vk = TextureVK::Cast(*texture); |
| const SamplerVK& sampler_vk = SamplerVK::Cast(*sampler); |
| |
| if (!command_buffer_->GetEncoder()->Track(texture)) { |
| return false; |
| } |
| |
| vk::DescriptorImageInfo image_info; |
| image_info.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; |
| image_info.sampler = sampler_vk.GetSampler(); |
| image_info.imageView = texture_vk.GetImageView(); |
| image_workspace_[bound_image_offset_++] = image_info; |
| |
| vk::WriteDescriptorSet write_set; |
| write_set.dstBinding = slot.binding; |
| write_set.descriptorCount = 1u; |
| write_set.descriptorType = ToVKDescriptorType(type); |
| write_set.pImageInfo = &image_workspace_[bound_image_offset_ - 1]; |
| |
| write_workspace_[descriptor_write_offset_++] = write_set; |
| return true; |
| } |
| |
| bool ComputePassVK::BindResource(size_t binding, |
| DescriptorType type, |
| const BufferView& view) { |
| if (bound_buffer_offset_ >= kMaxBindings) { |
| return false; |
| } |
| |
| const std::shared_ptr<const DeviceBuffer>& device_buffer = view.buffer; |
| auto buffer = DeviceBufferVK::Cast(*device_buffer).GetBuffer(); |
| if (!buffer) { |
| return false; |
| } |
| |
| if (!command_buffer_->GetEncoder()->Track(device_buffer)) { |
| return false; |
| } |
| |
| uint32_t offset = view.range.offset; |
| |
| vk::DescriptorBufferInfo buffer_info; |
| buffer_info.buffer = buffer; |
| buffer_info.offset = offset; |
| buffer_info.range = view.range.length; |
| buffer_workspace_[bound_buffer_offset_++] = buffer_info; |
| |
| vk::WriteDescriptorSet write_set; |
| write_set.dstBinding = binding; |
| write_set.descriptorCount = 1u; |
| write_set.descriptorType = ToVKDescriptorType(type); |
| write_set.pBufferInfo = &buffer_workspace_[bound_buffer_offset_ - 1]; |
| |
| write_workspace_[descriptor_write_offset_++] = write_set; |
| return true; |
| } |
| |
| // Note: |
| // https://github.com/KhronosGroup/Vulkan-Docs/wiki/Synchronization-Examples |
| // Seems to suggest that anything more finely grained than a global memory |
| // barrier is likely to be weakened into a global barrier. Confirming this on |
| // mobile devices will require some experimentation. |
| |
| // |ComputePass| |
| void ComputePassVK::AddBufferMemoryBarrier() { |
| vk::MemoryBarrier barrier; |
| barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite; |
| barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; |
| |
| command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier( |
| vk::PipelineStageFlagBits::eComputeShader, |
| vk::PipelineStageFlagBits::eComputeShader, {}, 1, &barrier, 0, {}, 0, {}); |
| } |
| |
| // |ComputePass| |
| void ComputePassVK::AddTextureMemoryBarrier() { |
| vk::MemoryBarrier barrier; |
| barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite; |
| barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; |
| |
| command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier( |
| vk::PipelineStageFlagBits::eComputeShader, |
| vk::PipelineStageFlagBits::eComputeShader, {}, 1, &barrier, 0, {}, 0, {}); |
| } |
| |
| // |ComputePass| |
| bool ComputePassVK::EncodeCommands() const { |
| // Since we only use global memory barrier, we don't have to worry about |
| // compute to compute dependencies across cmd buffers. Instead, we pessimize |
| // here and assume that we wrote to a storage image or buffer and that a |
| // render pass will read from it. if there are ever scenarios where we end up |
| // with compute to compute dependencies this should be revisited. |
| |
| // This does not currently handle image barriers as we do not use them |
| // for anything. |
| vk::MemoryBarrier barrier; |
| barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite; |
| barrier.dstAccessMask = |
| vk::AccessFlagBits::eIndexRead | vk::AccessFlagBits::eVertexAttributeRead; |
| |
| command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier( |
| vk::PipelineStageFlagBits::eComputeShader, |
| vk::PipelineStageFlagBits::eVertexInput, {}, 1, &barrier, 0, {}, 0, {}); |
| |
| return true; |
| } |
| |
| } // namespace impeller |