src/libANGLE/renderer/vulkan/CommandProcessor.cpp - third_party/angle - Git at Google

 //
 // Copyright 2020 The ANGLE Project Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //
 // CommandProcessor.cpp:
 //    Implements the class methods for CommandProcessor.
 //

 #include "libANGLE/renderer/vulkan/CommandProcessor.h"
 #include "common/system_utils.h"
 #include "libANGLE/renderer/vulkan/RendererVk.h"
 #include "libANGLE/renderer/vulkan/SyncVk.h"

 namespace rx
 {
 namespace vk
 {
 namespace
 {
 constexpr bool kOutputVmaStatsString = false;
 // When suballocation garbages is more than this, we may wait for GPU to finish and free up some
 // memory for allocation.
 constexpr VkDeviceSize kMaxBufferSuballocationGarbageSize = 64 * 1024 * 1024;

 void InitializeSubmitInfo(VkSubmitInfo *submitInfo,
                           const PrimaryCommandBuffer &commandBuffer,
                           const std::vector<VkSemaphore> &waitSemaphores,
                           const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
                           const VkSemaphore &signalSemaphore)
 {
     // Verify that the submitInfo has been zero'd out.
     ASSERT(submitInfo->signalSemaphoreCount == 0);
     ASSERT(waitSemaphores.size() == waitSemaphoreStageMasks.size());
     submitInfo->sType              = VK_STRUCTURE_TYPE_SUBMIT_INFO;
     submitInfo->commandBufferCount = commandBuffer.valid() ? 1 : 0;
     submitInfo->pCommandBuffers    = commandBuffer.ptr();
     submitInfo->waitSemaphoreCount = static_cast<uint32_t>(waitSemaphores.size());
     submitInfo->pWaitSemaphores    = waitSemaphores.empty() ? nullptr : waitSemaphores.data();
     submitInfo->pWaitDstStageMask  = waitSemaphoreStageMasks.data();

     if (signalSemaphore != VK_NULL_HANDLE)
     {
         submitInfo->signalSemaphoreCount = 1;
         submitInfo->pSignalSemaphores    = &signalSemaphore;
     }
 }
 }  // namespace

 // SharedFence implementation
 SharedFence::SharedFence() : mRefCountedFence(nullptr), mRecycler(nullptr) {}
 SharedFence::SharedFence(const SharedFence &other)
     : mRefCountedFence(other.mRefCountedFence), mRecycler(other.mRecycler)
 {
     if (mRefCountedFence != nullptr)
     {
         mRefCountedFence->addRef();
     }
 }
 SharedFence::SharedFence(SharedFence &&other)
     : mRefCountedFence(other.mRefCountedFence), mRecycler(other.mRecycler)
 {
     other.mRecycler        = nullptr;
     other.mRefCountedFence = nullptr;
 }

 SharedFence::~SharedFence()
 {
     release();
 }

 VkResult SharedFence::init(VkDevice device, FenceRecycler *recycler)
 {
     ASSERT(mRecycler == nullptr && mRefCountedFence == nullptr);
     Fence fence;

     // First try to fetch from recycler. If that failed, try to create a new VkFence
     recycler->fetch(device, &fence);
     if (!fence.valid())
     {
         VkFenceCreateInfo fenceCreateInfo = {};
         fenceCreateInfo.sType             = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
         fenceCreateInfo.flags             = 0;
         VkResult result                   = fence.init(device, fenceCreateInfo);
         if (result != VK_SUCCESS)
         {
             return result;
         }
     }

     // Create a new refcounted object to hold onto VkFence
     mRefCountedFence = new RefCounted<Fence>(std::move(fence));
     mRefCountedFence->addRef();
     mRecycler = recycler;

     return VK_SUCCESS;
 }

 SharedFence &SharedFence::operator=(const SharedFence &other)
 {
     release();

     mRecycler = other.mRecycler;
     if (other.mRefCountedFence != nullptr)
     {
         mRefCountedFence = other.mRefCountedFence;
         mRefCountedFence->addRef();
     }
     return *this;
 }

 SharedFence &SharedFence::operator=(SharedFence &&other)
 {
     release();
     mRecycler              = other.mRecycler;
     mRefCountedFence       = other.mRefCountedFence;
     other.mRecycler        = nullptr;
     other.mRefCountedFence = nullptr;
     return *this;
 }

 void SharedFence::destroy(VkDevice device)
 {
     if (mRefCountedFence != nullptr)
     {
         mRefCountedFence->releaseRef();
         if (!mRefCountedFence->isReferenced())
         {
             mRefCountedFence->get().destroy(device);
             SafeDelete(mRefCountedFence);
         }
         else
         {
             mRefCountedFence = nullptr;
         }
         mRecycler = nullptr;
     }
 }

 void SharedFence::release()
 {
     if (mRefCountedFence != nullptr)
     {
         mRefCountedFence->releaseRef();
         if (!mRefCountedFence->isReferenced())
         {
             mRecycler->recycle(std::move(mRefCountedFence->get()));
             ASSERT(!mRefCountedFence->get().valid());
             SafeDelete(mRefCountedFence);
         }
         else
         {
             mRefCountedFence = nullptr;
         }
         mRecycler = nullptr;
     }
 }

 SharedFence::operator bool() const
 {
     ASSERT(mRefCountedFence == nullptr || mRefCountedFence->isReferenced());
     return mRefCountedFence != nullptr;
 }

 VkResult SharedFence::getStatus(VkDevice device) const
 {
     if (mRefCountedFence != nullptr)
     {
         return mRefCountedFence->get().getStatus(device);
     }
     return VK_SUCCESS;
 }

 VkResult SharedFence::wait(VkDevice device, uint64_t timeout) const
 {
     if (mRefCountedFence != nullptr)
     {
         ANGLE_TRACE_EVENT0("gpu.angle", "SharedFence::wait");
         return mRefCountedFence->get().wait(device, timeout);
     }
     return VK_SUCCESS;
 }

 // FenceRecycler implementation
 void FenceRecycler::destroy(Context *context)
 {
     std::lock_guard<std::mutex> lock(mMutex);
     mRecyler.destroy(context->getDevice());
 }

 void FenceRecycler::fetch(VkDevice device, Fence *fenceOut)
 {
     ASSERT(fenceOut != nullptr && !fenceOut->valid());
     std::lock_guard<std::mutex> lock(mMutex);
     if (!mRecyler.empty())
     {
         mRecyler.fetch(fenceOut);
         fenceOut->reset(device);
     }
 }

 void FenceRecycler::recycle(Fence &&fence)
 {
     std::lock_guard<std::mutex> lock(mMutex);
     mRecyler.recycle(std::move(fence));
 }

 // CommandProcessorTask implementation
 void CommandProcessorTask::initTask()
 {
     mTask                           = CustomTask::Invalid;
     mOutsideRenderPassCommandBuffer = nullptr;
     mRenderPassCommandBuffer        = nullptr;
     mRenderPass                     = nullptr;
     mSemaphore                      = VK_NULL_HANDLE;
     mOneOffWaitSemaphore            = VK_NULL_HANDLE;
     mOneOffWaitSemaphoreStageMask   = 0;
     mPresentInfo                    = {};
     mPresentInfo.pResults           = nullptr;
     mPresentInfo.pSwapchains        = nullptr;
     mPresentInfo.pImageIndices      = nullptr;
     mPresentInfo.pNext              = nullptr;
     mPresentInfo.pWaitSemaphores    = nullptr;
     mPresentFence                   = VK_NULL_HANDLE;
     mSwapchainStatus                = nullptr;
     mOneOffCommandBuffer            = VK_NULL_HANDLE;
     mPriority                       = egl::ContextPriority::Medium;
     mProtectionType                 = ProtectionType::InvalidEnum;
 }

 void CommandProcessorTask::initFlushWaitSemaphores(
     ProtectionType protectionType,
     egl::ContextPriority priority,
     std::vector<VkSemaphore> &&waitSemaphores,
     std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks)
 {
     mTask                    = CustomTask::FlushWaitSemaphores;
     mPriority                = priority;
     mProtectionType          = protectionType;
     mWaitSemaphores          = std::move(waitSemaphores);
     mWaitSemaphoreStageMasks = std::move(waitSemaphoreStageMasks);
 }

 void CommandProcessorTask::initOutsideRenderPassProcessCommands(
     ProtectionType protectionType,
     egl::ContextPriority priority,
     OutsideRenderPassCommandBufferHelper *commandBuffer)
 {
     mTask                           = CustomTask::ProcessOutsideRenderPassCommands;
     mOutsideRenderPassCommandBuffer = commandBuffer;
     mPriority                       = priority;
     mProtectionType                 = protectionType;
 }

 void CommandProcessorTask::initRenderPassProcessCommands(
     ProtectionType protectionType,
     egl::ContextPriority priority,
     RenderPassCommandBufferHelper *commandBuffer,
     const RenderPass *renderPass)
 {
     mTask                    = CustomTask::ProcessRenderPassCommands;
     mRenderPassCommandBuffer = commandBuffer;
     mRenderPass              = renderPass;
     mPriority                = priority;
     mProtectionType          = protectionType;
 }

 void CommandProcessorTask::copyPresentInfo(const VkPresentInfoKHR &other)
 {
     if (other.sType == 0)
     {
         return;
     }

     mPresentInfo.sType = other.sType;
     mPresentInfo.pNext = nullptr;

     if (other.swapchainCount > 0)
     {
         ASSERT(other.swapchainCount == 1);
         mPresentInfo.swapchainCount = 1;
         mSwapchain                  = other.pSwapchains[0];
         mPresentInfo.pSwapchains    = &mSwapchain;
         mImageIndex                 = other.pImageIndices[0];
         mPresentInfo.pImageIndices  = &mImageIndex;
     }

     if (other.waitSemaphoreCount > 0)
     {
         ASSERT(other.waitSemaphoreCount == 1);
         mPresentInfo.waitSemaphoreCount = 1;
         mWaitSemaphore                  = other.pWaitSemaphores[0];
         mPresentInfo.pWaitSemaphores    = &mWaitSemaphore;
     }

     mPresentInfo.pResults = other.pResults;

     void *pNext = const_cast<void *>(other.pNext);
     while (pNext != nullptr)
     {
         VkStructureType sType = *reinterpret_cast<VkStructureType *>(pNext);
         switch (sType)
         {
             case VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR:
             {
                 const VkPresentRegionsKHR *presentRegions =
                     reinterpret_cast<VkPresentRegionsKHR *>(pNext);
                 mPresentRegion = *presentRegions->pRegions;
                 mRects.resize(mPresentRegion.rectangleCount);
                 for (uint32_t i = 0; i < mPresentRegion.rectangleCount; i++)
                 {
                     mRects[i] = presentRegions->pRegions->pRectangles[i];
                 }
                 mPresentRegion.pRectangles = mRects.data();

                 mPresentRegions.sType          = VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR;
                 mPresentRegions.pNext          = nullptr;
                 mPresentRegions.swapchainCount = 1;
                 mPresentRegions.pRegions       = &mPresentRegion;
                 AddToPNextChain(&mPresentInfo, &mPresentRegions);
                 pNext = const_cast<void *>(presentRegions->pNext);
                 break;
             }
             case VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_EXT:
             {
                 const VkSwapchainPresentFenceInfoEXT *presentFenceInfo =
                     reinterpret_cast<VkSwapchainPresentFenceInfoEXT *>(pNext);
                 ASSERT(presentFenceInfo->swapchainCount == 1);
                 mPresentFence = presentFenceInfo->pFences[0];

                 mPresentFenceInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_EXT;
                 mPresentFenceInfo.pNext = nullptr;
                 mPresentFenceInfo.swapchainCount = 1;
                 mPresentFenceInfo.pFences        = &mPresentFence;
                 AddToPNextChain(&mPresentInfo, &mPresentFenceInfo);
                 pNext = const_cast<void *>(presentFenceInfo->pNext);
                 break;
             }
             case VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_EXT:
             {
                 const VkSwapchainPresentModeInfoEXT *presentModeInfo =
                     reinterpret_cast<VkSwapchainPresentModeInfoEXT *>(pNext);
                 ASSERT(presentModeInfo->swapchainCount == 1);
                 mPresentMode = presentModeInfo->pPresentModes[0];

                 mPresentModeInfo.sType          = VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_EXT;
                 mPresentModeInfo.pNext          = nullptr;
                 mPresentModeInfo.swapchainCount = 1;
                 mPresentModeInfo.pPresentModes  = &mPresentMode;
                 AddToPNextChain(&mPresentInfo, &mPresentModeInfo);
                 pNext = const_cast<void *>(presentModeInfo->pNext);
                 break;
             }
             default:
                 ERR() << "Unknown sType: " << sType << " in VkPresentInfoKHR.pNext chain";
                 UNREACHABLE();
                 break;
         }
     }
 }

 void CommandProcessorTask::initPresent(egl::ContextPriority priority,
                                        const VkPresentInfoKHR &presentInfo,
                                        SwapchainStatus *swapchainStatus)
 {
     mTask            = CustomTask::Present;
     mPriority        = priority;
     mSwapchainStatus = swapchainStatus;
     copyPresentInfo(presentInfo);
 }

 void CommandProcessorTask::initFlushAndQueueSubmit(VkSemaphore semaphore,
                                                    SharedExternalFence &&externalFence,
                                                    ProtectionType protectionType,
                                                    egl::ContextPriority priority,
                                                    const QueueSerial &submitQueueSerial)
 {
     mTask              = CustomTask::FlushAndQueueSubmit;
     mSemaphore         = semaphore;
     mExternalFence     = std::move(externalFence);
     mPriority          = priority;
     mProtectionType    = protectionType;
     mSubmitQueueSerial = submitQueueSerial;
 }

 void CommandProcessorTask::initOneOffQueueSubmit(VkCommandBuffer commandBufferHandle,
                                                  ProtectionType protectionType,
                                                  egl::ContextPriority priority,
                                                  VkSemaphore waitSemaphore,
                                                  VkPipelineStageFlags waitSemaphoreStageMask,
                                                  const QueueSerial &submitQueueSerial)
 {
     mTask                         = CustomTask::OneOffQueueSubmit;
     mOneOffCommandBuffer          = commandBufferHandle;
     mOneOffWaitSemaphore          = waitSemaphore;
     mOneOffWaitSemaphoreStageMask = waitSemaphoreStageMask;
     mPriority                     = priority;
     mProtectionType               = protectionType;
     mSubmitQueueSerial            = submitQueueSerial;
 }

 CommandProcessorTask &CommandProcessorTask::operator=(CommandProcessorTask &&rhs)
 {
     if (this == &rhs)
     {
         return *this;
     }

     std::swap(mRenderPass, rhs.mRenderPass);
     std::swap(mOutsideRenderPassCommandBuffer, rhs.mOutsideRenderPassCommandBuffer);
     std::swap(mRenderPassCommandBuffer, rhs.mRenderPassCommandBuffer);
     std::swap(mTask, rhs.mTask);
     std::swap(mWaitSemaphores, rhs.mWaitSemaphores);
     std::swap(mWaitSemaphoreStageMasks, rhs.mWaitSemaphoreStageMasks);
     std::swap(mSemaphore, rhs.mSemaphore);
     std::swap(mExternalFence, rhs.mExternalFence);
     std::swap(mOneOffWaitSemaphore, rhs.mOneOffWaitSemaphore);
     std::swap(mOneOffWaitSemaphoreStageMask, rhs.mOneOffWaitSemaphoreStageMask);
     std::swap(mSubmitQueueSerial, rhs.mSubmitQueueSerial);
     std::swap(mPriority, rhs.mPriority);
     std::swap(mProtectionType, rhs.mProtectionType);
     std::swap(mOneOffCommandBuffer, rhs.mOneOffCommandBuffer);

     copyPresentInfo(rhs.mPresentInfo);
     std::swap(mSwapchainStatus, rhs.mSwapchainStatus);

     // clear rhs now that everything has moved.
     rhs.initTask();

     return *this;
 }

 // CommandBatch implementation.
 CommandBatch::CommandBatch() : protectionType(ProtectionType::InvalidEnum) {}

 CommandBatch::~CommandBatch() = default;

 CommandBatch::CommandBatch(CommandBatch &&other) : CommandBatch()
 {
     *this = std::move(other);
 }

 CommandBatch &CommandBatch::operator=(CommandBatch &&other)
 {
     std::swap(primaryCommands, other.primaryCommands);
     std::swap(secondaryCommands, other.secondaryCommands);
     std::swap(fence, other.fence);
     std::swap(externalFence, other.externalFence);
     std::swap(queueSerial, other.queueSerial);
     std::swap(protectionType, other.protectionType);
     return *this;
 }

 void CommandBatch::destroy(VkDevice device)
 {
     primaryCommands.destroy(device);
     secondaryCommands.retireCommandBuffers();
     destroyFence(device);
     protectionType = ProtectionType::InvalidEnum;
 }

 bool CommandBatch::hasFence() const
 {
     ASSERT(!externalFence || !fence);
     return fence || externalFence;
 }

 void CommandBatch::releaseFence()
 {
     fence.release();
     externalFence.reset();
 }

 void CommandBatch::destroyFence(VkDevice device)
 {
     fence.destroy(device);
     externalFence.reset();
 }

 VkFence CommandBatch::getFenceHandle() const
 {
     ASSERT(hasFence());
     return fence ? fence.get().getHandle() : externalFence->getHandle();
 }

 VkResult CommandBatch::getFenceStatus(VkDevice device) const
 {
     ASSERT(hasFence());
     return fence ? fence.getStatus(device) : externalFence->getStatus(device);
 }

 VkResult CommandBatch::waitFence(VkDevice device, uint64_t timeout) const
 {
     ASSERT(hasFence());
     return fence ? fence.wait(device, timeout) : externalFence->wait(device, timeout);
 }

 VkResult CommandBatch::waitFenceUnlocked(VkDevice device,
                                          uint64_t timeout,
                                          std::unique_lock<std::mutex> *lock) const
 {
     ASSERT(hasFence());
     VkResult status;
     // You can only use the local copy of the fence without lock.
     // Do not access "this" after unlock() because object might be deleted from other thread.
     if (fence)
     {
         const SharedFence localFenceToWaitOn = fence;
         lock->unlock();
         status = localFenceToWaitOn.wait(device, timeout);
         lock->lock();
     }
     else
     {
         const SharedExternalFence localFenceToWaitOn = externalFence;
         lock->unlock();
         status = localFenceToWaitOn->wait(device, timeout);
         lock->lock();
     }
     return status;
 }

 // CommandProcessor implementation.
 void CommandProcessor::handleError(VkResult errorCode,
                                    const char *file,
                                    const char *function,
                                    unsigned int line)
 {
     ASSERT(errorCode != VK_SUCCESS);

     std::stringstream errorStream;
     errorStream << "Internal Vulkan error (" << errorCode << "): " << VulkanResultString(errorCode)
                 << ".";

     if (errorCode == VK_ERROR_DEVICE_LOST)
     {
         WARN() << errorStream.str();
         handleDeviceLost(mRenderer);
     }

     std::lock_guard<std::mutex> queueLock(mErrorMutex);
     Error error = {errorCode, file, function, line};
     mErrors.emplace(error);
 }

 CommandProcessor::CommandProcessor(RendererVk *renderer, CommandQueue *commandQueue)
     : Context(renderer),
       mCommandQueue(commandQueue),
       mTaskThreadShouldExit(false),
       mNeedCommandsAndGarbageCleanup(false)
 {
     std::lock_guard<std::mutex> queueLock(mErrorMutex);
     while (!mErrors.empty())
     {
         mErrors.pop();
     }
 }

 CommandProcessor::~CommandProcessor() = default;

 angle::Result CommandProcessor::checkAndPopPendingError(Context *errorHandlingContext)
 {
     std::lock_guard<std::mutex> queueLock(mErrorMutex);
     if (mErrors.empty())
     {
         return angle::Result::Continue;
     }

     while (!mErrors.empty())
     {
         Error err = mErrors.front();
         mErrors.pop();
         errorHandlingContext->handleError(err.errorCode, err.file, err.function, err.line);
     }
     return angle::Result::Stop;
 }

 angle::Result CommandProcessor::queueCommand(CommandProcessorTask &&task)
 {
     ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::queueCommand");
     // Take mTaskEnqueueMutex lock. If task queue is full, try to drain one.
     std::unique_lock<std::mutex> enqueueLock(mTaskEnqueueMutex);
     if (mTaskQueue.full())
     {
         std::lock_guard<std::mutex> dequeueLock(mTaskDequeueMutex);
         // Check mTasks again in case someone just drained the mTasks.
         if (mTaskQueue.full())
         {
             CommandProcessorTask frontTask(std::move(mTaskQueue.front()));
             mTaskQueue.pop();
             ANGLE_TRY(processTask(&frontTask));
         }
     }
     mTaskQueue.push(std::move(task));
     mWorkAvailableCondition.notify_one();

     return angle::Result::Continue;
 }

 void CommandProcessor::requestCommandsAndGarbageCleanup()
 {
     if (!mNeedCommandsAndGarbageCleanup.exchange(true))
     {
         // request clean up in async thread
         std::unique_lock<std::mutex> enqueueLock(mTaskEnqueueMutex);
         mWorkAvailableCondition.notify_one();
     }
 }

 void CommandProcessor::processTasks()
 {
     while (true)
     {
         bool exitThread      = false;
         angle::Result result = processTasksImpl(&exitThread);
         if (exitThread)
         {
             // We are doing a controlled exit of the thread, break out of the while loop.
             break;
         }
         if (result != angle::Result::Continue)
         {
             // TODO: https://issuetracker.google.com/issues/170311829 - follow-up on error handling
             // ContextVk::commandProcessorSyncErrorsAndQueueCommand and WindowSurfaceVk::destroy
             // do error processing, is anything required here? Don't think so, mostly need to
             // continue the worker thread until it's been told to exit.
             UNREACHABLE();
         }
     }
 }

 angle::Result CommandProcessor::processTasksImpl(bool *exitThread)
 {
     while (true)
     {
         std::unique_lock<std::mutex> enqueueLock(mTaskEnqueueMutex);
         if (mTaskQueue.empty())
         {
             if (mTaskThreadShouldExit)
             {
                 break;
             }

             // Only wake if notified and command queue is not empty
             mWorkAvailableCondition.wait(enqueueLock, [this] {
                 return !mTaskQueue.empty() || mTaskThreadShouldExit ||
                        mNeedCommandsAndGarbageCleanup;
             });
         }
         // Do submission with mTaskEnqueueMutex unlocked so that we still allow enqueue while we
         // process work.
         enqueueLock.unlock();

         // Take submission lock to ensure the submission is in the same order as we received.
         std::lock_guard<std::mutex> dequeueLock(mTaskDequeueMutex);
         if (!mTaskQueue.empty())
         {
             CommandProcessorTask task(std::move(mTaskQueue.front()));
             mTaskQueue.pop();

             // Artificially make the task take longer to catch threading issues.
             if (getFeatures().slowAsyncCommandQueueForTesting.enabled)
             {
                 constexpr double kSlowdownTime = 0.005;

                 double startTime = angle::GetCurrentSystemTime();
                 while (angle::GetCurrentSystemTime() - startTime < kSlowdownTime)
                 {
                     // Busy waiting
                 }
             }

             ANGLE_TRY(processTask(&task));
         }

         if (mNeedCommandsAndGarbageCleanup.exchange(false))
         {
             // Always check completed commands again in case anything new has been finished.
             ANGLE_TRY(mCommandQueue->checkCompletedCommands(this));

             // Reset command buffer and clean up garbage
             if (mRenderer->isAsyncCommandBufferResetEnabled() &&
                 mCommandQueue->hasFinishedCommands())
             {
                 ANGLE_TRY(mCommandQueue->retireFinishedCommands(this));
             }
             mRenderer->cleanupGarbage();
         }
     }
     *exitThread = true;
     return angle::Result::Continue;
 }

 angle::Result CommandProcessor::processTask(CommandProcessorTask *task)
 {
     switch (task->getTaskCommand())
     {
         case CustomTask::FlushAndQueueSubmit:
         {
             ANGLE_TRACE_EVENT0("gpu.angle", "processTask::FlushAndQueueSubmit");
             // End command buffer

             // Call submitCommands()
             ANGLE_TRY(mCommandQueue->submitCommands(
                 this, task->getProtectionType(), task->getPriority(), task->getSemaphore(),
                 std::move(task->getExternalFence()), task->getSubmitQueueSerial()));
             mNeedCommandsAndGarbageCleanup = true;
             break;
         }
         case CustomTask::OneOffQueueSubmit:
         {
             ANGLE_TRACE_EVENT0("gpu.angle", "processTask::OneOffQueueSubmit");

             ANGLE_TRY(mCommandQueue->queueSubmitOneOff(
                 this, task->getProtectionType(), task->getPriority(),
                 task->getOneOffCommandBuffer(), task->getOneOffWaitSemaphore(),
                 task->getOneOffWaitSemaphoreStageMask(), SubmitPolicy::EnsureSubmitted,
                 task->getSubmitQueueSerial()));
             mNeedCommandsAndGarbageCleanup = true;
             break;
         }
         case CustomTask::Present:
         {
             // Do not access task->getSwapchainStatus() after this call because it is marked as no
             // longer pending, and so may get deleted or clobbered by another thread.
             VkResult result =
                 present(task->getPriority(), task->getPresentInfo(), task->getSwapchainStatus());

             // We get to ignore these as they are not fatal
             if (result != VK_ERROR_OUT_OF_DATE_KHR && result != VK_SUBOPTIMAL_KHR &&
                 result != VK_SUCCESS)
             {
                 // Save the error so that we can handle it.
                 // Don't leave processing loop, don't consider errors from present to be fatal.
                 // TODO: https://issuetracker.google.com/issues/170329600 - This needs to improve to
                 // properly parallelize present
                 handleError(result, __FILE__, __FUNCTION__, __LINE__);
             }
             break;
         }
         case CustomTask::FlushWaitSemaphores:
         {
             mCommandQueue->flushWaitSemaphores(task->getProtectionType(), task->getPriority(),
                                                std::move(task->getWaitSemaphores()),
                                                std::move(task->getWaitSemaphoreStageMasks()));
             break;
         }
         case CustomTask::ProcessOutsideRenderPassCommands:
         {
             OutsideRenderPassCommandBufferHelper *commandBuffer =
                 task->getOutsideRenderPassCommandBuffer();
             ANGLE_TRY(mCommandQueue->flushOutsideRPCommands(this, task->getProtectionType(),
                                                             task->getPriority(), &commandBuffer));

             OutsideRenderPassCommandBufferHelper *originalCommandBuffer =
                 task->getOutsideRenderPassCommandBuffer();
             mRenderer->recycleOutsideRenderPassCommandBufferHelper(&originalCommandBuffer);
             break;
         }
         case CustomTask::ProcessRenderPassCommands:
         {
             RenderPassCommandBufferHelper *commandBuffer = task->getRenderPassCommandBuffer();
             ANGLE_TRY(mCommandQueue->flushRenderPassCommands(
                 this, task->getProtectionType(), task->getPriority(), *task->getRenderPass(),
                 &commandBuffer));

             RenderPassCommandBufferHelper *originalCommandBuffer =
                 task->getRenderPassCommandBuffer();
             mRenderer->recycleRenderPassCommandBufferHelper(&originalCommandBuffer);
             break;
         }
         default:
             UNREACHABLE();
             break;
     }

     return angle::Result::Continue;
 }

 angle::Result CommandProcessor::waitForAllWorkToBeSubmitted(Context *context)
 {
     ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::waitForAllWorkToBeSubmitted");
     // Take mWorkerMutex lock so that no one is able to enqueue more work while we drain it
     // and handle device lost.
     std::lock_guard<std::mutex> enqueueLock(mTaskEnqueueMutex);
     std::lock_guard<std::mutex> dequeueLock(mTaskDequeueMutex);
     // Sync any errors to the context
     // Do this inside the mutex to prevent new errors adding to the list.
     ANGLE_TRY(checkAndPopPendingError(context));

     while (!mTaskQueue.empty())
     {
         CommandProcessorTask task(std::move(mTaskQueue.front()));
         mTaskQueue.pop();
         ANGLE_TRY(processTask(&task));
     }

     if (mRenderer->isAsyncCommandBufferResetEnabled())
     {
         ANGLE_TRY(mCommandQueue->retireFinishedCommands(context));
     }
     context->getRenderer()->cleanupGarbage();

     mNeedCommandsAndGarbageCleanup = false;

     return angle::Result::Continue;
 }

 angle::Result CommandProcessor::init()
 {
     mTaskThread = std::thread(&CommandProcessor::processTasks, this);

     return angle::Result::Continue;
 }

 void CommandProcessor::destroy(Context *context)
 {
     {
         // Request to terminate the worker thread
         std::lock_guard<std::mutex> enqueueLock(mTaskEnqueueMutex);
         mTaskThreadShouldExit = true;
         mWorkAvailableCondition.notify_one();
     }

     (void)waitForAllWorkToBeSubmitted(context);
     if (mTaskThread.joinable())
     {
         mTaskThread.join();
     }
 }

 void CommandProcessor::handleDeviceLost(RendererVk *renderer)
 {
     ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::handleDeviceLost");
     // Take mTaskEnqueueMutex lock so that no one is able to add more work to the queue while we
     // drain it and handle device lost.
     std::lock_guard<std::mutex> enqueueLock(mTaskEnqueueMutex);
     (void)waitForAllWorkToBeSubmitted(this);
     // Worker thread is idle and command queue is empty so good to continue
     mCommandQueue->handleDeviceLost(renderer);
 }

 VkResult CommandProcessor::present(egl::ContextPriority priority,
                                    const VkPresentInfoKHR &presentInfo,
                                    SwapchainStatus *swapchainStatus)
 {
     ANGLE_TRACE_EVENT0("gpu.angle", "vkQueuePresentKHR");
     // Verify that we are presenting one and only one swapchain
     ASSERT(presentInfo.swapchainCount == 1);
     ASSERT(presentInfo.pResults == nullptr);

     mCommandQueue->queuePresent(priority, presentInfo, swapchainStatus);
     const VkResult result = swapchainStatus->lastPresentResult;

     // Always make sure update isPending after status has been updated.
     // Can't access swapchainStatus after this assignment because it is marked as no longer pending,
     // and so may get deleted or clobbered by another thread.
     ASSERT(swapchainStatus->isPending);
     swapchainStatus->isPending = false;

     return result;
 }

 angle::Result CommandProcessor::enqueueSubmitCommands(Context *context,
                                                       ProtectionType protectionType,
                                                       egl::ContextPriority priority,
                                                       VkSemaphore signalSemaphore,
                                                       SharedExternalFence &&externalFence,
                                                       const QueueSerial &submitQueueSerial)
 {
     ANGLE_TRY(checkAndPopPendingError(context));

     CommandProcessorTask task;
     task.initFlushAndQueueSubmit(signalSemaphore, std::move(externalFence), protectionType,
                                  priority, submitQueueSerial);

     ANGLE_TRY(queueCommand(std::move(task)));

     mLastEnqueuedSerials.setQueueSerial(submitQueueSerial);

     return angle::Result::Continue;
 }

 angle::Result CommandProcessor::enqueueSubmitOneOffCommands(
     Context *context,
     ProtectionType protectionType,
     egl::ContextPriority contextPriority,
     VkCommandBuffer commandBufferHandle,
     VkSemaphore waitSemaphore,
     VkPipelineStageFlags waitSemaphoreStageMask,
     SubmitPolicy submitPolicy,
     const QueueSerial &submitQueueSerial)
 {
     ANGLE_TRY(checkAndPopPendingError(context));

     CommandProcessorTask task;
     task.initOneOffQueueSubmit(commandBufferHandle, protectionType, contextPriority, waitSemaphore,
                                waitSemaphoreStageMask, submitQueueSerial);
     ANGLE_TRY(queueCommand(std::move(task)));

     mLastEnqueuedSerials.setQueueSerial(submitQueueSerial);

     if (submitPolicy == SubmitPolicy::EnsureSubmitted)
     {
         // Caller has synchronization requirement to have work in GPU pipe when returning from this
         // function.
         ANGLE_TRY(waitForQueueSerialToBeSubmitted(context, submitQueueSerial));
     }

     return angle::Result::Continue;
 }

 void CommandProcessor::enqueuePresent(egl::ContextPriority contextPriority,
                                       const VkPresentInfoKHR &presentInfo,
                                       SwapchainStatus *swapchainStatus)
 {
     ASSERT(!swapchainStatus->isPending);
     swapchainStatus->isPending = true;
     // Always return with VK_SUCCESS initially. When we call acquireNextImage we'll check the
     // return code again. This allows the app to continue working until we really need to know
     // the return code from present.
     swapchainStatus->lastPresentResult = VK_SUCCESS;

     CommandProcessorTask task;
     task.initPresent(contextPriority, presentInfo, swapchainStatus);
     (void)queueCommand(std::move(task));
 }

 angle::Result CommandProcessor::enqueueFlushWaitSemaphores(
     ProtectionType protectionType,
     egl::ContextPriority priority,
     std::vector<VkSemaphore> &&waitSemaphores,
     std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks)
 {
     CommandProcessorTask task;
     task.initFlushWaitSemaphores(protectionType, priority, std::move(waitSemaphores),
                                  std::move(waitSemaphoreStageMasks));
     ANGLE_TRY(queueCommand(std::move(task)));

     return angle::Result::Continue;
 }

 angle::Result CommandProcessor::enqueueFlushOutsideRPCommands(
     Context *context,
     ProtectionType protectionType,
     egl::ContextPriority priority,
     OutsideRenderPassCommandBufferHelper **outsideRPCommands)
 {
     ANGLE_TRY(checkAndPopPendingError(context));

     (*outsideRPCommands)->markClosed();

     SecondaryCommandPool *commandPool = nullptr;
     ANGLE_TRY((*outsideRPCommands)->detachCommandPool(context, &commandPool));

     // Detach functions are only used for ring buffer allocators.
     SecondaryCommandMemoryAllocator *allocator = (*outsideRPCommands)->detachAllocator();

     CommandProcessorTask task;
     task.initOutsideRenderPassProcessCommands(protectionType, priority, *outsideRPCommands);
     ANGLE_TRY(queueCommand(std::move(task)));

     ANGLE_TRY(mRenderer->getOutsideRenderPassCommandBufferHelper(context, commandPool, allocator,
                                                                  outsideRPCommands));

     return angle::Result::Continue;
 }

 angle::Result CommandProcessor::enqueueFlushRenderPassCommands(
     Context *context,
     ProtectionType protectionType,
     egl::ContextPriority priority,
     const RenderPass &renderPass,
     RenderPassCommandBufferHelper **renderPassCommands)
 {
     ANGLE_TRY(checkAndPopPendingError(context));

     (*renderPassCommands)->markClosed();

     SecondaryCommandPool *commandPool = nullptr;
     (*renderPassCommands)->detachCommandPool(&commandPool);

     // Detach functions are only used for ring buffer allocators.
     SecondaryCommandMemoryAllocator *allocator = (*renderPassCommands)->detachAllocator();

     CommandProcessorTask task;
     task.initRenderPassProcessCommands(protectionType, priority, *renderPassCommands, &renderPass);
     ANGLE_TRY(queueCommand(std::move(task)));

     ANGLE_TRY(mRenderer->getRenderPassCommandBufferHelper(context, commandPool, allocator,
                                                           renderPassCommands));

     return angle::Result::Continue;
 }

 angle::Result CommandProcessor::waitForResourceUseToBeSubmitted(Context *context,
                                                                 const ResourceUse &use)
 {
     if (mCommandQueue->hasResourceUseSubmitted(use))
     {
         ANGLE_TRY(checkAndPopPendingError(context));
     }
     else
     {
         // We do not hold mTaskEnqueueMutex lock, so that we still allow other context to enqueue
         // work while we are processing them.
         std::lock_guard<std::mutex> dequeueLock(mTaskDequeueMutex);

         // Do this inside the mutex to prevent new errors adding to the list.
         ANGLE_TRY(checkAndPopPendingError(context));

         size_t maxTaskCount = mTaskQueue.size();
         size_t taskCount    = 0;
         while (taskCount < maxTaskCount && !mCommandQueue->hasResourceUseSubmitted(use))
         {
             CommandProcessorTask task(std::move(mTaskQueue.front()));
             mTaskQueue.pop();
             ANGLE_TRY(processTask(&task));
             taskCount++;
         }
     }
     return angle::Result::Continue;
 }

 angle::Result CommandProcessor::waitForPresentToBeSubmitted(SwapchainStatus *swapchainStatus)
 {
     if (!swapchainStatus->isPending)
     {
         return angle::Result::Continue;
     }

     std::lock_guard<std::mutex> dequeueLock(mTaskDequeueMutex);
     size_t maxTaskCount = mTaskQueue.size();
     size_t taskCount    = 0;
     while (taskCount < maxTaskCount && swapchainStatus->isPending)
     {
         CommandProcessorTask task(std::move(mTaskQueue.front()));
         mTaskQueue.pop();
         ANGLE_TRY(processTask(&task));
         taskCount++;
     }
     ASSERT(!swapchainStatus->isPending);
     return angle::Result::Continue;
 }

 // CommandQueue public API implementation. These must be thread safe and never called from
 // CommandQueue class itself.
 CommandQueue::CommandQueue() : mPerfCounters{} {}

 CommandQueue::~CommandQueue() = default;

 void CommandQueue::destroy(Context *context)
 {
     std::lock_guard<std::mutex> lock(mMutex);
     std::lock_guard<std::mutex> enqueuelock(mQueueSubmitMutex);
     // Force all commands to finish by flushing all queues.
     for (VkQueue queue : mQueueMap)
     {
         if (queue != VK_NULL_HANDLE)
         {
             vkQueueWaitIdle(queue);
         }
     }

     RendererVk *renderer = context->getRenderer();

     // Assigns an infinite "last completed" serial to force garbage to delete.
     mLastCompletedSerials.fill(Serial::Infinite());

     for (auto &protectionMap : mCommandsStateMap)
     {
         for (CommandsState &state : protectionMap)
         {
             state.waitSemaphores.clear();
             state.waitSemaphoreStageMasks.clear();
             state.primaryCommands.destroy(renderer->getDevice());
             state.secondaryCommands.retireCommandBuffers();
         }
     }

     for (PersistentCommandPool &commandPool : mPrimaryCommandPoolMap)
     {
         commandPool.destroy(renderer->getDevice());
     }

     mFenceRecycler.destroy(context);

     ASSERT(mInFlightCommands.empty());
     ASSERT(mFinishedCommandBatches.empty());
 }

 angle::Result CommandQueue::init(Context *context, const DeviceQueueMap &queueMap)
 {
     std::lock_guard<std::mutex> lock(mMutex);
     // In case of RendererVk gets re-initialized, we can't rely on constructor to do initialization
     // for us.
     mLastSubmittedSerials.fill(kZeroSerial);
     mLastCompletedSerials.fill(kZeroSerial);

     // Assign before initializing the command pools in order to get the queue family index.
     mQueueMap = queueMap;

     ANGLE_TRY(initCommandPool(context, ProtectionType::Unprotected));

     if (queueMap.isProtected())
     {
         ANGLE_TRY(initCommandPool(context, ProtectionType::Protected));
     }

     return angle::Result::Continue;
 }

 void CommandQueue::handleDeviceLost(RendererVk *renderer)
 {
     ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::handleDeviceLost");
     VkDevice device = renderer->getDevice();
     // Hold both locks while clean up mInFlightCommands.
     std::lock_guard<std::mutex> dequeuelock(mMutex);
     std::lock_guard<std::mutex> enqueuelock(mQueueSubmitMutex);

     while (!mInFlightCommands.empty())
     {
         CommandBatch &batch = mInFlightCommands.front();
         // On device loss we need to wait for fence to be signaled before destroying it
         if (batch.hasFence())
         {
             VkResult status = batch.waitFence(device, renderer->getMaxFenceWaitTimeNs());
             // If the wait times out, it is probably not possible to recover from lost device
             ASSERT(status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST);

             batch.destroyFence(device);
         }

         // On device lost, here simply destroy the CommandBuffer, it will fully cleared later
         // by CommandPool::destroy
         if (batch.primaryCommands.valid())
         {
             batch.primaryCommands.destroy(device);
         }

         batch.secondaryCommands.retireCommandBuffers();

         mLastCompletedSerials.setQueueSerial(batch.queueSerial);
         mInFlightCommands.pop();
     }
 }

 angle::Result CommandQueue::postSubmitCheck(Context *context)
 {
     RendererVk *renderer = context->getRenderer();

     // Update mLastCompletedQueueSerial immediately in case any command has been finished.
     ANGLE_TRY(checkAndCleanupCompletedCommands(context));

     VkDeviceSize suballocationGarbageSize = renderer->getSuballocationGarbageSize();
     if (suballocationGarbageSize > kMaxBufferSuballocationGarbageSize)
     {
         // CPU should be throttled to avoid accumulating too much memory garbage waiting to be
         // destroyed. This is important to keep peak memory usage at check when game launched and a
         // lot of staging buffers used for textures upload and then gets released. But if there is
         // only one command buffer in flight, we do not wait here to ensure we keep GPU busy.
         std::unique_lock<std::mutex> lock(mMutex);
         while (suballocationGarbageSize > kMaxBufferSuballocationGarbageSize &&
                mInFlightCommands.size() > 1)
         {
             ANGLE_TRY(
                 finishOneCommandBatchAndCleanupImpl(context, renderer->getMaxFenceWaitTimeNs()));
             suballocationGarbageSize = renderer->getSuballocationGarbageSize();
         }
     }

     if (kOutputVmaStatsString)
     {
         renderer->outputVmaStatString();
     }

     return angle::Result::Continue;
 }

 angle::Result CommandQueue::finishResourceUse(Context *context,
                                               const ResourceUse &use,
                                               uint64_t timeout)
 {
     VkDevice device = context->getDevice();

     {
         std::unique_lock<std::mutex> lock(mMutex);
         while (!mInFlightCommands.empty() && !hasResourceUseFinished(use))
         {
             bool finished;
             ANGLE_TRY(checkOneCommandBatch(context, &finished));
             if (!finished)
             {
                 ANGLE_VK_TRY(context,
                              mInFlightCommands.front().waitFenceUnlocked(device, timeout, &lock));
             }
         }
         // Check the rest of the commands in case they are also finished.
         ANGLE_TRY(checkCompletedCommandsLocked(context));
     }
     ASSERT(hasResourceUseFinished(use));

     if (!mFinishedCommandBatches.empty())
     {
         ANGLE_TRY(retireFinishedCommandsAndCleanupGarbage(context));
     }

     return angle::Result::Continue;
 }

 angle::Result CommandQueue::finishQueueSerial(Context *context,
                                               const QueueSerial &queueSerial,
                                               uint64_t timeout)
 {
     vk::ResourceUse use(queueSerial);
     return finishResourceUse(context, use, timeout);
 }

 angle::Result CommandQueue::waitIdle(Context *context, uint64_t timeout)
 {
     // Fill the local variable with lock
     vk::ResourceUse use;
     {
         std::lock_guard<std::mutex> lock(mMutex);
         if (mInFlightCommands.empty())
         {
             return angle::Result::Continue;
         }
         use.setQueueSerial(mInFlightCommands.back().queueSerial);
     }

     return finishResourceUse(context, use, timeout);
 }

 angle::Result CommandQueue::waitForResourceUseToFinishWithUserTimeout(Context *context,
                                                                       const ResourceUse &use,
                                                                       uint64_t timeout,
                                                                       VkResult *result)
 {
     // Serial is not yet submitted. This is undefined behaviour, so we can do anything.
     if (!hasResourceUseSubmitted(use))
     {
         WARN() << "Waiting on an unsubmitted serial.";
         *result = VK_TIMEOUT;
         return angle::Result::Continue;
     }

     VkDevice device      = context->getDevice();
     size_t finishedCount = 0;
     {
         std::unique_lock<std::mutex> lock(mMutex);
         while (!mInFlightCommands.empty() && !hasResourceUseFinished(use))
         {
             bool finished;
             ANGLE_TRY(checkOneCommandBatch(context, &finished));
             if (!finished)
             {
                 *result = mInFlightCommands.front().waitFenceUnlocked(device, timeout, &lock);
                 // Don't trigger an error on timeout.
                 if (*result == VK_TIMEOUT)
                 {
                     break;
                 }
                 else
                 {
                     ANGLE_VK_TRY(context, *result);
                 }
             }
         }
         // Do one more check in case more commands also finished.
         ANGLE_TRY(checkCompletedCommandsLocked(context));
         finishedCount = mFinishedCommandBatches.size();
     }

     if (finishedCount > 0)
     {
         ANGLE_TRY(retireFinishedCommandsAndCleanupGarbage(context));
     }

     return angle::Result::Continue;
 }

 bool CommandQueue::isBusy(RendererVk *renderer) const
 {
     // No lock is needed here since we are accessing atomic variables only.
     size_t maxIndex = renderer->getLargestQueueSerialIndexEverAllocated();
     for (SerialIndex i = 0; i <= maxIndex; ++i)
     {
         if (mLastSubmittedSerials[i] > mLastCompletedSerials[i])
         {
             return true;
         }
     }
     return false;
 }

 void CommandQueue::flushWaitSemaphores(ProtectionType protectionType,
                                        egl::ContextPriority priority,
                                        std::vector<VkSemaphore> &&waitSemaphores,
                                        std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks)
 {
     ASSERT(!waitSemaphores.empty());
     ASSERT(waitSemaphores.size() == waitSemaphoreStageMasks.size());
     std::lock_guard<std::mutex> lock(mMutex);

     CommandsState &state = mCommandsStateMap[priority][protectionType];

     state.waitSemaphores.insert(state.waitSemaphores.end(), waitSemaphores.begin(),
                                 waitSemaphores.end());
     state.waitSemaphoreStageMasks.insert(state.waitSemaphoreStageMasks.end(),
                                          waitSemaphoreStageMasks.begin(),
                                          waitSemaphoreStageMasks.end());

     waitSemaphores.clear();
     waitSemaphoreStageMasks.clear();
 }

 angle::Result CommandQueue::flushOutsideRPCommands(
     Context *context,
     ProtectionType protectionType,
     egl::ContextPriority priority,
     OutsideRenderPassCommandBufferHelper **outsideRPCommands)
 {
     std::lock_guard<std::mutex> lock(mMutex);
     ANGLE_TRY(ensurePrimaryCommandBufferValid(context, protectionType, priority));
     CommandsState &state = mCommandsStateMap[priority][protectionType];
     return (*outsideRPCommands)->flushToPrimary(context, &state);
 }

 angle::Result CommandQueue::flushRenderPassCommands(
     Context *context,
     ProtectionType protectionType,
     egl::ContextPriority priority,
     const RenderPass &renderPass,
     RenderPassCommandBufferHelper **renderPassCommands)
 {
     std::lock_guard<std::mutex> lock(mMutex);
     ANGLE_TRY(ensurePrimaryCommandBufferValid(context, protectionType, priority));
     CommandsState &state = mCommandsStateMap[priority][protectionType];
     return (*renderPassCommands)->flushToPrimary(context, &state, &renderPass);
 }

 angle::Result CommandQueue::submitCommands(Context *context,
                                            ProtectionType protectionType,
                                            egl::ContextPriority priority,
                                            VkSemaphore signalSemaphore,
                                            SharedExternalFence &&externalFence,
                                            const QueueSerial &submitQueueSerial)
 {
     ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::submitCommands");
     std::unique_lock<std::mutex> lock(mMutex);
     RendererVk *renderer = context->getRenderer();
     VkDevice device      = renderer->getDevice();

     ++mPerfCounters.commandQueueSubmitCallsTotal;
     ++mPerfCounters.commandQueueSubmitCallsPerFrame;

     DeviceScoped<CommandBatch> scopedBatch(device);
     CommandBatch &batch = scopedBatch.get();

     batch.queueSerial    = submitQueueSerial;
     batch.protectionType = protectionType;

     CommandsState &state = mCommandsStateMap[priority][protectionType];
     // Store the primary CommandBuffer in the in-flight list.
     batch.primaryCommands = std::move(state.primaryCommands);

     // Store secondary Command Buffers.
     batch.secondaryCommands = std::move(state.secondaryCommands);
     ASSERT(batch.primaryCommands.valid() || batch.secondaryCommands.empty());

     // Move to local copy of vectors since queueSubmit will release the lock.
     std::vector<VkSemaphore> waitSemaphores = std::move(state.waitSemaphores);
     std::vector<VkPipelineStageFlags> waitSemaphoreStageMasks =
         std::move(state.waitSemaphoreStageMasks);

     mPerfCounters.commandQueueWaitSemaphoresTotal += waitSemaphores.size();

     // Don't make a submission if there is nothing to submit.
     const bool needsQueueSubmit = batch.primaryCommands.valid() ||
                                   signalSemaphore != VK_NULL_HANDLE || externalFence ||
                                   !waitSemaphores.empty();
     VkSubmitInfo submitInfo                   = {};
     VkProtectedSubmitInfo protectedSubmitInfo = {};

     if (needsQueueSubmit)
     {
         if (batch.primaryCommands.valid())
         {
             ANGLE_VK_TRY(context, batch.primaryCommands.end());
         }

         InitializeSubmitInfo(&submitInfo, batch.primaryCommands, waitSemaphores,
                              waitSemaphoreStageMasks, signalSemaphore);

         // No need protected submission if no commands to submit.
         if (protectionType == ProtectionType::Protected && batch.primaryCommands.valid())
         {
             protectedSubmitInfo.sType           = VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO;
             protectedSubmitInfo.pNext           = nullptr;
             protectedSubmitInfo.protectedSubmit = true;
             submitInfo.pNext                    = &protectedSubmitInfo;
         }

         if (!externalFence)
         {
             ANGLE_VK_TRY(context, batch.fence.init(context->getDevice(), &mFenceRecycler));
         }
         else
         {
             batch.externalFence = std::move(externalFence);
         }

         ++mPerfCounters.vkQueueSubmitCallsTotal;
         ++mPerfCounters.vkQueueSubmitCallsPerFrame;
     }

     // Note queueSubmit will release the lock.
     ANGLE_TRY(queueSubmit(context, std::move(lock), priority, submitInfo, scopedBatch,
                           submitQueueSerial));

     // Clear local vector without lock.
     waitSemaphores.clear();
     waitSemaphoreStageMasks.clear();

     return angle::Result::Continue;
 }

 angle::Result CommandQueue::queueSubmitOneOff(Context *context,
                                               ProtectionType protectionType,
                                               egl::ContextPriority contextPriority,
                                               VkCommandBuffer commandBufferHandle,
                                               VkSemaphore waitSemaphore,
                                               VkPipelineStageFlags waitSemaphoreStageMask,
                                               SubmitPolicy submitPolicy,
                                               const QueueSerial &submitQueueSerial)
 {
     std::unique_lock<std::mutex> lock(mMutex);
     DeviceScoped<CommandBatch> scopedBatch(context->getDevice());
     CommandBatch &batch  = scopedBatch.get();
     batch.queueSerial    = submitQueueSerial;
     batch.protectionType = protectionType;

     ANGLE_VK_TRY(context, batch.fence.init(context->getDevice(), &mFenceRecycler));

     VkSubmitInfo submitInfo = {};
     submitInfo.sType        = VK_STRUCTURE_TYPE_SUBMIT_INFO;

     VkProtectedSubmitInfo protectedSubmitInfo = {};
     ASSERT(protectionType == ProtectionType::Unprotected ||
            protectionType == ProtectionType::Protected);
     if (protectionType == ProtectionType::Protected)
     {
         protectedSubmitInfo.sType           = VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO;
         protectedSubmitInfo.pNext           = nullptr;
         protectedSubmitInfo.protectedSubmit = true;
         submitInfo.pNext                    = &protectedSubmitInfo;
     }

     if (commandBufferHandle != VK_NULL_HANDLE)
     {
         submitInfo.commandBufferCount = 1;
         submitInfo.pCommandBuffers    = &commandBufferHandle;
     }

     if (waitSemaphore != VK_NULL_HANDLE)
     {
         submitInfo.waitSemaphoreCount = 1;
         submitInfo.pWaitSemaphores    = &waitSemaphore;
         submitInfo.pWaitDstStageMask  = &waitSemaphoreStageMask;
     }

     ++mPerfCounters.vkQueueSubmitCallsTotal;
     ++mPerfCounters.vkQueueSubmitCallsPerFrame;

     // Note queueSubmit will release the lock.
     return queueSubmit(context, std::move(lock), contextPriority, submitInfo, scopedBatch,
                        submitQueueSerial);
 }

 angle::Result CommandQueue::queueSubmit(Context *context,
                                         std::unique_lock<std::mutex> &&dequeueLock,
                                         egl::ContextPriority contextPriority,
                                         const VkSubmitInfo &submitInfo,
                                         DeviceScoped<CommandBatch> &commandBatch,
                                         const QueueSerial &submitQueueSerial)
 {
     ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::queueSubmit");
     RendererVk *renderer = context->getRenderer();

     // Lock relay to ensure the ordering of submission strictly follow the context's submission
     // order. This lock relay (first take mMutex and then mQueueSubmitMutex, and then release
     // mMutex) ensures we always have a lock covering the entire call which ensures the strict
     // submission order.
     std::lock_guard<std::mutex> queueSubmitLock(mQueueSubmitMutex);
     // CPU should be throttled to avoid mInFlightCommands from growing too fast. Important for
     // off-screen scenarios.
     if (mInFlightCommands.full())
     {
         ANGLE_TRY(finishOneCommandBatchAndCleanupImpl(context, renderer->getMaxFenceWaitTimeNs()));
     }
     // Release the dequeue lock while doing potentially lengthy vkQueueSubmit call.
     // Note: after this point, you can not reference anything that required mMutex lock.
     dequeueLock.unlock();

     if (submitInfo.sType == VK_STRUCTURE_TYPE_SUBMIT_INFO)
     {
         CommandBatch &batch = commandBatch.get();

         VkQueue queue = getQueue(contextPriority);
         VkFence fence = batch.getFenceHandle();
         ASSERT(fence != VK_NULL_HANDLE);
         ANGLE_VK_TRY(context, vkQueueSubmit(queue, 1, &submitInfo, fence));

         if (batch.externalFence)
         {
             // exportFd is exporting VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR type handle which
             // obeys copy semantics. This means that the fence must already be signaled or the work
             // to signal it is in the graphics pipeline at the time we export the fd.
             // In other words, must call exportFd() after successful vkQueueSubmit() call.
             ExternalFence &externalFence       = *batch.externalFence;
             VkFenceGetFdInfoKHR fenceGetFdInfo = {};
             fenceGetFdInfo.sType               = VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR;
             fenceGetFdInfo.fence               = externalFence.getHandle();
             fenceGetFdInfo.handleType          = VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
             externalFence.exportFd(renderer->getDevice(), fenceGetFdInfo);
         }
     }

     mInFlightCommands.push(commandBatch.release());

     // This must set last so that when this submission appears submitted, it actually already
     // submitted and enqueued to mInFlightCommands.
     mLastSubmittedSerials.setQueueSerial(submitQueueSerial);
     return angle::Result::Continue;
 }

 void CommandQueue::queuePresent(egl::ContextPriority contextPriority,
                                 const VkPresentInfoKHR &presentInfo,
                                 SwapchainStatus *swapchainStatus)
 {
     std::lock_guard<std::mutex> queueSubmitLock(mQueueSubmitMutex);
     VkQueue queue                      = getQueue(contextPriority);
     swapchainStatus->lastPresentResult = vkQueuePresentKHR(queue, &presentInfo);
 }

 const angle::VulkanPerfCounters CommandQueue::getPerfCounters() const
 {
     std::lock_guard<std::mutex> lock(mMutex);
     return mPerfCounters;
 }

 void CommandQueue::resetPerFramePerfCounters()
 {
     std::lock_guard<std::mutex> lock(mMutex);
     mPerfCounters.commandQueueSubmitCallsPerFrame = 0;
     mPerfCounters.vkQueueSubmitCallsPerFrame      = 0;
 }

 angle::Result CommandQueue::retireFinishedCommandsAndCleanupGarbage(Context *context)
 {
     RendererVk *renderer = context->getRenderer();
     if (!renderer->isAsyncCommandBufferResetEnabled())
     {
         // Do immediate command buffer reset
         ANGLE_TRY(retireFinishedCommands(context));
     }

     renderer->requestAsyncCommandsAndGarbageCleanup(context);

     return angle::Result::Continue;
 }

 // CommandQueue private API implementation. These are called by public API, so lock already held.
 angle::Result CommandQueue::checkOneCommandBatch(Context *context, bool *finished)
 {
     ASSERT(!mInFlightCommands.empty());

     CommandBatch &batch = mInFlightCommands.front();
     *finished           = false;
     if (batch.hasFence())
     {
         VkResult status = batch.getFenceStatus(context->getDevice());
         if (status == VK_NOT_READY)
         {
             return angle::Result::Continue;
         }
         ANGLE_VK_TRY(context, status);
     }

     // Finished.
     mLastCompletedSerials.setQueueSerial(batch.queueSerial);

     // Move command batch to mFinishedCommandBatches.
     if (mFinishedCommandBatches.full())
     {
         ANGLE_TRY(retireFinishedCommandsLocked(context));
     }
     mFinishedCommandBatches.push(std::move(batch));
     mInFlightCommands.pop();
     *finished = true;

     return angle::Result::Continue;
 }

 angle::Result CommandQueue::finishOneCommandBatchAndCleanup(Context *context,
                                                             uint64_t timeout,
                                                             bool *anyFinished)
 {
     std::lock_guard<std::mutex> lock(mMutex);

     // If there are in-flight submissions in the queue, they can be finished.
     *anyFinished = false;
     if (!mInFlightCommands.empty())
     {
         ANGLE_TRY(finishOneCommandBatchAndCleanupImpl(context, timeout));
         *anyFinished = true;
     }
     return angle::Result::Continue;
 }

 angle::Result CommandQueue::finishOneCommandBatchAndCleanupImpl(Context *context, uint64_t timeout)
 {
     ASSERT(!mInFlightCommands.empty());
     CommandBatch &batch = mInFlightCommands.front();
     if (batch.hasFence())
     {
         VkResult status = batch.waitFence(context->getDevice(), timeout);
         ANGLE_VK_TRY(context, status);
     }

     mLastCompletedSerials.setQueueSerial(batch.queueSerial);
     // Move command batch to mFinishedCommandBatches.
     if (mFinishedCommandBatches.full())
     {
         ANGLE_TRY(retireFinishedCommandsLocked(context));
     }
     mFinishedCommandBatches.push(std::move(batch));
     mInFlightCommands.pop();

     // Immediately clean up finished batches.
     ANGLE_TRY(retireFinishedCommandsLocked(context));
     context->getRenderer()->cleanupGarbage();

     return angle::Result::Continue;
 }

 angle::Result CommandQueue::retireFinishedCommandsLocked(Context *context)
 {
     ANGLE_TRACE_EVENT0("gpu.angle", "retireFinishedCommandsLocked");

     while (!mFinishedCommandBatches.empty())
     {
         CommandBatch &batch = mFinishedCommandBatches.front();
         ASSERT(batch.queueSerial <= mLastCompletedSerials);

         batch.releaseFence();

         if (batch.primaryCommands.valid())
         {
             PersistentCommandPool &commandPool = mPrimaryCommandPoolMap[batch.protectionType];
             ANGLE_TRY(commandPool.collect(context, std::move(batch.primaryCommands)));
         }

         batch.secondaryCommands.retireCommandBuffers();

         mFinishedCommandBatches.pop();
     }

     return angle::Result::Continue;
 }

 angle::Result CommandQueue::checkCompletedCommandsLocked(Context *context)
 {
     while (!mInFlightCommands.empty())
     {
         bool finished;
         ANGLE_TRY(checkOneCommandBatch(context, &finished));
         if (!finished)
         {
             break;
         }
     }
     return angle::Result::Continue;
 }

 angle::Result CommandQueue::ensurePrimaryCommandBufferValid(Context *context,
                                                             ProtectionType protectionType,
                                                             egl::ContextPriority priority)
 {
     CommandsState &state = mCommandsStateMap[priority][protectionType];

     if (state.primaryCommands.valid())
     {
         return angle::Result::Continue;
     }

     ANGLE_TRY(mPrimaryCommandPoolMap[protectionType].allocate(context, &state.primaryCommands));
     VkCommandBufferBeginInfo beginInfo = {};
     beginInfo.sType                    = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
     beginInfo.flags                    = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
     beginInfo.pInheritanceInfo         = nullptr;
     ANGLE_VK_TRY(context, state.primaryCommands.begin(beginInfo));

     return angle::Result::Continue;
 }

 // QueuePriorities:
 constexpr float kVulkanQueuePriorityLow    = 0.0;
 constexpr float kVulkanQueuePriorityMedium = 0.4;
 constexpr float kVulkanQueuePriorityHigh   = 1.0;

 const float QueueFamily::kQueuePriorities[static_cast<uint32_t>(egl::ContextPriority::EnumCount)] =
     {kVulkanQueuePriorityMedium, kVulkanQueuePriorityHigh, kVulkanQueuePriorityLow};

 egl::ContextPriority DeviceQueueMap::getDevicePriority(egl::ContextPriority priority) const
 {
     return mPriorities[priority];
 }

 DeviceQueueMap::~DeviceQueueMap() {}

 DeviceQueueMap &DeviceQueueMap::operator=(const DeviceQueueMap &other)
 {
     ASSERT(this != &other);
     if ((this != &other) && other.valid())
     {
         mIndex                                    = other.mIndex;
         mIsProtected                              = other.mIsProtected;
         mPriorities[egl::ContextPriority::Low]    = other.mPriorities[egl::ContextPriority::Low];
         mPriorities[egl::ContextPriority::Medium] = other.mPriorities[egl::ContextPriority::Medium];
         mPriorities[egl::ContextPriority::High]   = other.mPriorities[egl::ContextPriority::High];
         *static_cast<angle::PackedEnumMap<egl::ContextPriority, VkQueue> *>(this) = other;
     }
     return *this;
 }

 void QueueFamily::getDeviceQueue(VkDevice device,
                                  bool makeProtected,
                                  uint32_t queueIndex,
                                  VkQueue *queue)
 {
     if (makeProtected)
     {
         VkDeviceQueueInfo2 queueInfo2 = {};
         queueInfo2.sType              = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2;
         queueInfo2.flags              = VK_DEVICE_QUEUE_CREATE_PROTECTED_BIT;
         queueInfo2.queueFamilyIndex   = mIndex;
         queueInfo2.queueIndex         = queueIndex;

         vkGetDeviceQueue2(device, &queueInfo2, queue);
     }
     else
     {
         vkGetDeviceQueue(device, mIndex, queueIndex, queue);
     }
 }

 DeviceQueueMap QueueFamily::initializeQueueMap(VkDevice device,
                                                bool makeProtected,
                                                uint32_t queueIndex,
                                                uint32_t queueCount)
 {
     // QueueIndexing:
     constexpr uint32_t kQueueIndexMedium = 0;
     constexpr uint32_t kQueueIndexHigh   = 1;
     constexpr uint32_t kQueueIndexLow    = 2;

     ASSERT(queueCount);
     ASSERT((queueIndex + queueCount) <= mProperties.queueCount);
     DeviceQueueMap queueMap(mIndex, makeProtected);

     getDeviceQueue(device, makeProtected, queueIndex + kQueueIndexMedium,
                    &queueMap[egl::ContextPriority::Medium]);
     queueMap.mPriorities[egl::ContextPriority::Medium] = egl::ContextPriority::Medium;

     // If at least 2 queues, High has its own queue
     if (queueCount > 1)
     {
         getDeviceQueue(device, makeProtected, queueIndex + kQueueIndexHigh,
                        &queueMap[egl::ContextPriority::High]);
         queueMap.mPriorities[egl::ContextPriority::High] = egl::ContextPriority::High;
     }
     else
     {
         queueMap[egl::ContextPriority::High]             = queueMap[egl::ContextPriority::Medium];
         queueMap.mPriorities[egl::ContextPriority::High] = egl::ContextPriority::Medium;
     }
     // If at least 3 queues, Low has its own queue. Adjust Low priority.
     if (queueCount > 2)
     {
         getDeviceQueue(device, makeProtected, queueIndex + kQueueIndexLow,
                        &queueMap[egl::ContextPriority::Low]);
         queueMap.mPriorities[egl::ContextPriority::Low] = egl::ContextPriority::Low;
     }
     else
     {
         queueMap[egl::ContextPriority::Low]             = queueMap[egl::ContextPriority::Medium];
         queueMap.mPriorities[egl::ContextPriority::Low] = egl::ContextPriority::Medium;
     }
     return queueMap;
 }

 void QueueFamily::initialize(const VkQueueFamilyProperties &queueFamilyProperties, uint32_t index)
 {
     mProperties = queueFamilyProperties;
     mIndex      = index;
 }

 uint32_t QueueFamily::FindIndex(const std::vector<VkQueueFamilyProperties> &queueFamilyProperties,
                                 VkQueueFlags flags,
                                 int32_t matchNumber,
                                 uint32_t *matchCount)
 {
     uint32_t index = QueueFamily::kInvalidIndex;
     uint32_t count = 0;

     for (uint32_t familyIndex = 0; familyIndex < queueFamilyProperties.size(); ++familyIndex)
     {
         const auto &queueInfo = queueFamilyProperties[familyIndex];
         if ((queueInfo.queueFlags & flags) == flags)
         {
             ASSERT(queueInfo.queueCount > 0);
             count++;
             if ((index == QueueFamily::kInvalidIndex) && (matchNumber-- == 0))
             {
                 index = familyIndex;
             }
         }
     }
     if (matchCount)
     {
         *matchCount = count;
     }

     return index;
 }

 }  // namespace vk
 }  // namespace rx