blob: 395abbf6753e5ed9a03df7c93039b88bab18337a [file] [log] [blame] [edit]
//
// Copyright 2020 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// CommandProcessor.cpp:
// Implements the class methods for CommandProcessor.
//
#include "libANGLE/renderer/vulkan/CommandProcessor.h"
#include "common/system_utils.h"
#include "libANGLE/renderer/vulkan/RendererVk.h"
#include "libANGLE/renderer/vulkan/SyncVk.h"
namespace rx
{
namespace vk
{
namespace
{
constexpr bool kOutputVmaStatsString = false;
// When suballocation garbages is more than this, we may wait for GPU to finish and free up some
// memory for allocation.
constexpr VkDeviceSize kMaxBufferSuballocationGarbageSize = 64 * 1024 * 1024;
void InitializeSubmitInfo(VkSubmitInfo *submitInfo,
const PrimaryCommandBuffer &commandBuffer,
const std::vector<VkSemaphore> &waitSemaphores,
const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
const VkSemaphore &signalSemaphore)
{
// Verify that the submitInfo has been zero'd out.
ASSERT(submitInfo->signalSemaphoreCount == 0);
ASSERT(waitSemaphores.size() == waitSemaphoreStageMasks.size());
submitInfo->sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo->commandBufferCount = commandBuffer.valid() ? 1 : 0;
submitInfo->pCommandBuffers = commandBuffer.ptr();
submitInfo->waitSemaphoreCount = static_cast<uint32_t>(waitSemaphores.size());
submitInfo->pWaitSemaphores = waitSemaphores.empty() ? nullptr : waitSemaphores.data();
submitInfo->pWaitDstStageMask = waitSemaphoreStageMasks.data();
if (signalSemaphore != VK_NULL_HANDLE)
{
submitInfo->signalSemaphoreCount = 1;
submitInfo->pSignalSemaphores = &signalSemaphore;
}
}
} // namespace
// SharedFence implementation
SharedFence::SharedFence() : mRefCountedFence(nullptr), mRecycler(nullptr) {}
SharedFence::SharedFence(const SharedFence &other)
: mRefCountedFence(other.mRefCountedFence), mRecycler(other.mRecycler)
{
if (mRefCountedFence != nullptr)
{
mRefCountedFence->addRef();
}
}
SharedFence::SharedFence(SharedFence &&other)
: mRefCountedFence(other.mRefCountedFence), mRecycler(other.mRecycler)
{
other.mRecycler = nullptr;
other.mRefCountedFence = nullptr;
}
SharedFence::~SharedFence()
{
release();
}
VkResult SharedFence::init(VkDevice device, FenceRecycler *recycler)
{
ASSERT(mRecycler == nullptr && mRefCountedFence == nullptr);
Fence fence;
// First try to fetch from recycler. If that failed, try to create a new VkFence
recycler->fetch(device, &fence);
if (!fence.valid())
{
VkFenceCreateInfo fenceCreateInfo = {};
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fenceCreateInfo.flags = 0;
VkResult result = fence.init(device, fenceCreateInfo);
if (result != VK_SUCCESS)
{
return result;
}
}
// Create a new refcounted object to hold onto VkFence
mRefCountedFence = new RefCounted<Fence>(std::move(fence));
mRefCountedFence->addRef();
mRecycler = recycler;
return VK_SUCCESS;
}
SharedFence &SharedFence::operator=(const SharedFence &other)
{
release();
mRecycler = other.mRecycler;
if (other.mRefCountedFence != nullptr)
{
mRefCountedFence = other.mRefCountedFence;
mRefCountedFence->addRef();
}
return *this;
}
SharedFence &SharedFence::operator=(SharedFence &&other)
{
release();
mRecycler = other.mRecycler;
mRefCountedFence = other.mRefCountedFence;
other.mRecycler = nullptr;
other.mRefCountedFence = nullptr;
return *this;
}
void SharedFence::destroy(VkDevice device)
{
if (mRefCountedFence != nullptr)
{
mRefCountedFence->releaseRef();
if (!mRefCountedFence->isReferenced())
{
mRefCountedFence->get().destroy(device);
SafeDelete(mRefCountedFence);
}
else
{
mRefCountedFence = nullptr;
}
mRecycler = nullptr;
}
}
void SharedFence::release()
{
if (mRefCountedFence != nullptr)
{
mRefCountedFence->releaseRef();
if (!mRefCountedFence->isReferenced())
{
mRecycler->recycle(std::move(mRefCountedFence->get()));
ASSERT(!mRefCountedFence->get().valid());
SafeDelete(mRefCountedFence);
}
else
{
mRefCountedFence = nullptr;
}
mRecycler = nullptr;
}
}
SharedFence::operator bool() const
{
ASSERT(mRefCountedFence == nullptr || mRefCountedFence->isReferenced());
return mRefCountedFence != nullptr;
}
VkResult SharedFence::getStatus(VkDevice device) const
{
if (mRefCountedFence != nullptr)
{
return mRefCountedFence->get().getStatus(device);
}
return VK_SUCCESS;
}
VkResult SharedFence::wait(VkDevice device, uint64_t timeout) const
{
if (mRefCountedFence != nullptr)
{
ANGLE_TRACE_EVENT0("gpu.angle", "SharedFence::wait");
return mRefCountedFence->get().wait(device, timeout);
}
return VK_SUCCESS;
}
// FenceRecycler implementation
void FenceRecycler::destroy(Context *context)
{
std::lock_guard<std::mutex> lock(mMutex);
mRecyler.destroy(context->getDevice());
}
void FenceRecycler::fetch(VkDevice device, Fence *fenceOut)
{
ASSERT(fenceOut != nullptr && !fenceOut->valid());
std::lock_guard<std::mutex> lock(mMutex);
if (!mRecyler.empty())
{
mRecyler.fetch(fenceOut);
fenceOut->reset(device);
}
}
void FenceRecycler::recycle(Fence &&fence)
{
std::lock_guard<std::mutex> lock(mMutex);
mRecyler.recycle(std::move(fence));
}
// CommandProcessorTask implementation
void CommandProcessorTask::initTask()
{
mTask = CustomTask::Invalid;
mOutsideRenderPassCommandBuffer = nullptr;
mRenderPassCommandBuffer = nullptr;
mRenderPass = nullptr;
mSemaphore = VK_NULL_HANDLE;
mOneOffWaitSemaphore = VK_NULL_HANDLE;
mOneOffWaitSemaphoreStageMask = 0;
mPresentInfo = {};
mPresentInfo.pResults = nullptr;
mPresentInfo.pSwapchains = nullptr;
mPresentInfo.pImageIndices = nullptr;
mPresentInfo.pNext = nullptr;
mPresentInfo.pWaitSemaphores = nullptr;
mPresentFence = VK_NULL_HANDLE;
mSwapchainStatus = nullptr;
mOneOffCommandBuffer = VK_NULL_HANDLE;
mPriority = egl::ContextPriority::Medium;
mProtectionType = ProtectionType::InvalidEnum;
}
void CommandProcessorTask::initFlushWaitSemaphores(
ProtectionType protectionType,
egl::ContextPriority priority,
std::vector<VkSemaphore> &&waitSemaphores,
std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks)
{
mTask = CustomTask::FlushWaitSemaphores;
mPriority = priority;
mProtectionType = protectionType;
mWaitSemaphores = std::move(waitSemaphores);
mWaitSemaphoreStageMasks = std::move(waitSemaphoreStageMasks);
}
void CommandProcessorTask::initOutsideRenderPassProcessCommands(
ProtectionType protectionType,
egl::ContextPriority priority,
OutsideRenderPassCommandBufferHelper *commandBuffer)
{
mTask = CustomTask::ProcessOutsideRenderPassCommands;
mOutsideRenderPassCommandBuffer = commandBuffer;
mPriority = priority;
mProtectionType = protectionType;
}
void CommandProcessorTask::initRenderPassProcessCommands(
ProtectionType protectionType,
egl::ContextPriority priority,
RenderPassCommandBufferHelper *commandBuffer,
const RenderPass *renderPass)
{
mTask = CustomTask::ProcessRenderPassCommands;
mRenderPassCommandBuffer = commandBuffer;
mRenderPass = renderPass;
mPriority = priority;
mProtectionType = protectionType;
}
void CommandProcessorTask::copyPresentInfo(const VkPresentInfoKHR &other)
{
if (other.sType == 0)
{
return;
}
mPresentInfo.sType = other.sType;
mPresentInfo.pNext = nullptr;
if (other.swapchainCount > 0)
{
ASSERT(other.swapchainCount == 1);
mPresentInfo.swapchainCount = 1;
mSwapchain = other.pSwapchains[0];
mPresentInfo.pSwapchains = &mSwapchain;
mImageIndex = other.pImageIndices[0];
mPresentInfo.pImageIndices = &mImageIndex;
}
if (other.waitSemaphoreCount > 0)
{
ASSERT(other.waitSemaphoreCount == 1);
mPresentInfo.waitSemaphoreCount = 1;
mWaitSemaphore = other.pWaitSemaphores[0];
mPresentInfo.pWaitSemaphores = &mWaitSemaphore;
}
mPresentInfo.pResults = other.pResults;
void *pNext = const_cast<void *>(other.pNext);
while (pNext != nullptr)
{
VkStructureType sType = *reinterpret_cast<VkStructureType *>(pNext);
switch (sType)
{
case VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR:
{
const VkPresentRegionsKHR *presentRegions =
reinterpret_cast<VkPresentRegionsKHR *>(pNext);
mPresentRegion = *presentRegions->pRegions;
mRects.resize(mPresentRegion.rectangleCount);
for (uint32_t i = 0; i < mPresentRegion.rectangleCount; i++)
{
mRects[i] = presentRegions->pRegions->pRectangles[i];
}
mPresentRegion.pRectangles = mRects.data();
mPresentRegions.sType = VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR;
mPresentRegions.pNext = nullptr;
mPresentRegions.swapchainCount = 1;
mPresentRegions.pRegions = &mPresentRegion;
AddToPNextChain(&mPresentInfo, &mPresentRegions);
pNext = const_cast<void *>(presentRegions->pNext);
break;
}
case VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_EXT:
{
const VkSwapchainPresentFenceInfoEXT *presentFenceInfo =
reinterpret_cast<VkSwapchainPresentFenceInfoEXT *>(pNext);
ASSERT(presentFenceInfo->swapchainCount == 1);
mPresentFence = presentFenceInfo->pFences[0];
mPresentFenceInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_EXT;
mPresentFenceInfo.pNext = nullptr;
mPresentFenceInfo.swapchainCount = 1;
mPresentFenceInfo.pFences = &mPresentFence;
AddToPNextChain(&mPresentInfo, &mPresentFenceInfo);
pNext = const_cast<void *>(presentFenceInfo->pNext);
break;
}
case VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_EXT:
{
const VkSwapchainPresentModeInfoEXT *presentModeInfo =
reinterpret_cast<VkSwapchainPresentModeInfoEXT *>(pNext);
ASSERT(presentModeInfo->swapchainCount == 1);
mPresentMode = presentModeInfo->pPresentModes[0];
mPresentModeInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_EXT;
mPresentModeInfo.pNext = nullptr;
mPresentModeInfo.swapchainCount = 1;
mPresentModeInfo.pPresentModes = &mPresentMode;
AddToPNextChain(&mPresentInfo, &mPresentModeInfo);
pNext = const_cast<void *>(presentModeInfo->pNext);
break;
}
default:
ERR() << "Unknown sType: " << sType << " in VkPresentInfoKHR.pNext chain";
UNREACHABLE();
break;
}
}
}
void CommandProcessorTask::initPresent(egl::ContextPriority priority,
const VkPresentInfoKHR &presentInfo,
SwapchainStatus *swapchainStatus)
{
mTask = CustomTask::Present;
mPriority = priority;
mSwapchainStatus = swapchainStatus;
copyPresentInfo(presentInfo);
}
void CommandProcessorTask::initFlushAndQueueSubmit(VkSemaphore semaphore,
SharedExternalFence &&externalFence,
ProtectionType protectionType,
egl::ContextPriority priority,
const QueueSerial &submitQueueSerial)
{
mTask = CustomTask::FlushAndQueueSubmit;
mSemaphore = semaphore;
mExternalFence = std::move(externalFence);
mPriority = priority;
mProtectionType = protectionType;
mSubmitQueueSerial = submitQueueSerial;
}
void CommandProcessorTask::initOneOffQueueSubmit(VkCommandBuffer commandBufferHandle,
ProtectionType protectionType,
egl::ContextPriority priority,
VkSemaphore waitSemaphore,
VkPipelineStageFlags waitSemaphoreStageMask,
const QueueSerial &submitQueueSerial)
{
mTask = CustomTask::OneOffQueueSubmit;
mOneOffCommandBuffer = commandBufferHandle;
mOneOffWaitSemaphore = waitSemaphore;
mOneOffWaitSemaphoreStageMask = waitSemaphoreStageMask;
mPriority = priority;
mProtectionType = protectionType;
mSubmitQueueSerial = submitQueueSerial;
}
CommandProcessorTask &CommandProcessorTask::operator=(CommandProcessorTask &&rhs)
{
if (this == &rhs)
{
return *this;
}
std::swap(mRenderPass, rhs.mRenderPass);
std::swap(mOutsideRenderPassCommandBuffer, rhs.mOutsideRenderPassCommandBuffer);
std::swap(mRenderPassCommandBuffer, rhs.mRenderPassCommandBuffer);
std::swap(mTask, rhs.mTask);
std::swap(mWaitSemaphores, rhs.mWaitSemaphores);
std::swap(mWaitSemaphoreStageMasks, rhs.mWaitSemaphoreStageMasks);
std::swap(mSemaphore, rhs.mSemaphore);
std::swap(mExternalFence, rhs.mExternalFence);
std::swap(mOneOffWaitSemaphore, rhs.mOneOffWaitSemaphore);
std::swap(mOneOffWaitSemaphoreStageMask, rhs.mOneOffWaitSemaphoreStageMask);
std::swap(mSubmitQueueSerial, rhs.mSubmitQueueSerial);
std::swap(mPriority, rhs.mPriority);
std::swap(mProtectionType, rhs.mProtectionType);
std::swap(mOneOffCommandBuffer, rhs.mOneOffCommandBuffer);
copyPresentInfo(rhs.mPresentInfo);
std::swap(mSwapchainStatus, rhs.mSwapchainStatus);
// clear rhs now that everything has moved.
rhs.initTask();
return *this;
}
// CommandBatch implementation.
CommandBatch::CommandBatch() : protectionType(ProtectionType::InvalidEnum) {}
CommandBatch::~CommandBatch() = default;
CommandBatch::CommandBatch(CommandBatch &&other) : CommandBatch()
{
*this = std::move(other);
}
CommandBatch &CommandBatch::operator=(CommandBatch &&other)
{
std::swap(primaryCommands, other.primaryCommands);
std::swap(secondaryCommands, other.secondaryCommands);
std::swap(fence, other.fence);
std::swap(externalFence, other.externalFence);
std::swap(queueSerial, other.queueSerial);
std::swap(protectionType, other.protectionType);
return *this;
}
void CommandBatch::destroy(VkDevice device)
{
primaryCommands.destroy(device);
secondaryCommands.retireCommandBuffers();
destroyFence(device);
protectionType = ProtectionType::InvalidEnum;
}
bool CommandBatch::hasFence() const
{
ASSERT(!externalFence || !fence);
return fence || externalFence;
}
void CommandBatch::releaseFence()
{
fence.release();
externalFence.reset();
}
void CommandBatch::destroyFence(VkDevice device)
{
fence.destroy(device);
externalFence.reset();
}
VkFence CommandBatch::getFenceHandle() const
{
ASSERT(hasFence());
return fence ? fence.get().getHandle() : externalFence->getHandle();
}
VkResult CommandBatch::getFenceStatus(VkDevice device) const
{
ASSERT(hasFence());
return fence ? fence.getStatus(device) : externalFence->getStatus(device);
}
VkResult CommandBatch::waitFence(VkDevice device, uint64_t timeout) const
{
ASSERT(hasFence());
return fence ? fence.wait(device, timeout) : externalFence->wait(device, timeout);
}
VkResult CommandBatch::waitFenceUnlocked(VkDevice device,
uint64_t timeout,
std::unique_lock<std::mutex> *lock) const
{
ASSERT(hasFence());
VkResult status;
// You can only use the local copy of the fence without lock.
// Do not access "this" after unlock() because object might be deleted from other thread.
if (fence)
{
const SharedFence localFenceToWaitOn = fence;
lock->unlock();
status = localFenceToWaitOn.wait(device, timeout);
lock->lock();
}
else
{
const SharedExternalFence localFenceToWaitOn = externalFence;
lock->unlock();
status = localFenceToWaitOn->wait(device, timeout);
lock->lock();
}
return status;
}
// CommandProcessor implementation.
void CommandProcessor::handleError(VkResult errorCode,
const char *file,
const char *function,
unsigned int line)
{
ASSERT(errorCode != VK_SUCCESS);
std::stringstream errorStream;
errorStream << "Internal Vulkan error (" << errorCode << "): " << VulkanResultString(errorCode)
<< ".";
if (errorCode == VK_ERROR_DEVICE_LOST)
{
WARN() << errorStream.str();
handleDeviceLost(mRenderer);
}
std::lock_guard<std::mutex> queueLock(mErrorMutex);
Error error = {errorCode, file, function, line};
mErrors.emplace(error);
}
CommandProcessor::CommandProcessor(RendererVk *renderer, CommandQueue *commandQueue)
: Context(renderer),
mCommandQueue(commandQueue),
mTaskThreadShouldExit(false),
mNeedCommandsAndGarbageCleanup(false)
{
std::lock_guard<std::mutex> queueLock(mErrorMutex);
while (!mErrors.empty())
{
mErrors.pop();
}
}
CommandProcessor::~CommandProcessor() = default;
angle::Result CommandProcessor::checkAndPopPendingError(Context *errorHandlingContext)
{
std::lock_guard<std::mutex> queueLock(mErrorMutex);
if (mErrors.empty())
{
return angle::Result::Continue;
}
while (!mErrors.empty())
{
Error err = mErrors.front();
mErrors.pop();
errorHandlingContext->handleError(err.errorCode, err.file, err.function, err.line);
}
return angle::Result::Stop;
}
angle::Result CommandProcessor::queueCommand(CommandProcessorTask &&task)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::queueCommand");
// Take mTaskEnqueueMutex lock. If task queue is full, try to drain one.
std::unique_lock<std::mutex> enqueueLock(mTaskEnqueueMutex);
if (mTaskQueue.full())
{
std::lock_guard<std::mutex> dequeueLock(mTaskDequeueMutex);
// Check mTasks again in case someone just drained the mTasks.
if (mTaskQueue.full())
{
CommandProcessorTask frontTask(std::move(mTaskQueue.front()));
mTaskQueue.pop();
ANGLE_TRY(processTask(&frontTask));
}
}
mTaskQueue.push(std::move(task));
mWorkAvailableCondition.notify_one();
return angle::Result::Continue;
}
void CommandProcessor::requestCommandsAndGarbageCleanup()
{
if (!mNeedCommandsAndGarbageCleanup.exchange(true))
{
// request clean up in async thread
std::unique_lock<std::mutex> enqueueLock(mTaskEnqueueMutex);
mWorkAvailableCondition.notify_one();
}
}
void CommandProcessor::processTasks()
{
while (true)
{
bool exitThread = false;
angle::Result result = processTasksImpl(&exitThread);
if (exitThread)
{
// We are doing a controlled exit of the thread, break out of the while loop.
break;
}
if (result != angle::Result::Continue)
{
// TODO: https://issuetracker.google.com/issues/170311829 - follow-up on error handling
// ContextVk::commandProcessorSyncErrorsAndQueueCommand and WindowSurfaceVk::destroy
// do error processing, is anything required here? Don't think so, mostly need to
// continue the worker thread until it's been told to exit.
UNREACHABLE();
}
}
}
angle::Result CommandProcessor::processTasksImpl(bool *exitThread)
{
while (true)
{
std::unique_lock<std::mutex> enqueueLock(mTaskEnqueueMutex);
if (mTaskQueue.empty())
{
if (mTaskThreadShouldExit)
{
break;
}
// Only wake if notified and command queue is not empty
mWorkAvailableCondition.wait(enqueueLock, [this] {
return !mTaskQueue.empty() || mTaskThreadShouldExit ||
mNeedCommandsAndGarbageCleanup;
});
}
// Do submission with mTaskEnqueueMutex unlocked so that we still allow enqueue while we
// process work.
enqueueLock.unlock();
// Take submission lock to ensure the submission is in the same order as we received.
std::lock_guard<std::mutex> dequeueLock(mTaskDequeueMutex);
if (!mTaskQueue.empty())
{
CommandProcessorTask task(std::move(mTaskQueue.front()));
mTaskQueue.pop();
// Artificially make the task take longer to catch threading issues.
if (getFeatures().slowAsyncCommandQueueForTesting.enabled)
{
constexpr double kSlowdownTime = 0.005;
double startTime = angle::GetCurrentSystemTime();
while (angle::GetCurrentSystemTime() - startTime < kSlowdownTime)
{
// Busy waiting
}
}
ANGLE_TRY(processTask(&task));
}
if (mNeedCommandsAndGarbageCleanup.exchange(false))
{
// Always check completed commands again in case anything new has been finished.
ANGLE_TRY(mCommandQueue->checkCompletedCommands(this));
// Reset command buffer and clean up garbage
if (mRenderer->isAsyncCommandBufferResetEnabled() &&
mCommandQueue->hasFinishedCommands())
{
ANGLE_TRY(mCommandQueue->retireFinishedCommands(this));
}
mRenderer->cleanupGarbage();
}
}
*exitThread = true;
return angle::Result::Continue;
}
angle::Result CommandProcessor::processTask(CommandProcessorTask *task)
{
switch (task->getTaskCommand())
{
case CustomTask::FlushAndQueueSubmit:
{
ANGLE_TRACE_EVENT0("gpu.angle", "processTask::FlushAndQueueSubmit");
// End command buffer
// Call submitCommands()
ANGLE_TRY(mCommandQueue->submitCommands(
this, task->getProtectionType(), task->getPriority(), task->getSemaphore(),
std::move(task->getExternalFence()), task->getSubmitQueueSerial()));
mNeedCommandsAndGarbageCleanup = true;
break;
}
case CustomTask::OneOffQueueSubmit:
{
ANGLE_TRACE_EVENT0("gpu.angle", "processTask::OneOffQueueSubmit");
ANGLE_TRY(mCommandQueue->queueSubmitOneOff(
this, task->getProtectionType(), task->getPriority(),
task->getOneOffCommandBuffer(), task->getOneOffWaitSemaphore(),
task->getOneOffWaitSemaphoreStageMask(), SubmitPolicy::EnsureSubmitted,
task->getSubmitQueueSerial()));
mNeedCommandsAndGarbageCleanup = true;
break;
}
case CustomTask::Present:
{
// Do not access task->getSwapchainStatus() after this call because it is marked as no
// longer pending, and so may get deleted or clobbered by another thread.
VkResult result =
present(task->getPriority(), task->getPresentInfo(), task->getSwapchainStatus());
// We get to ignore these as they are not fatal
if (result != VK_ERROR_OUT_OF_DATE_KHR && result != VK_SUBOPTIMAL_KHR &&
result != VK_SUCCESS)
{
// Save the error so that we can handle it.
// Don't leave processing loop, don't consider errors from present to be fatal.
// TODO: https://issuetracker.google.com/issues/170329600 - This needs to improve to
// properly parallelize present
handleError(result, __FILE__, __FUNCTION__, __LINE__);
}
break;
}
case CustomTask::FlushWaitSemaphores:
{
mCommandQueue->flushWaitSemaphores(task->getProtectionType(), task->getPriority(),
std::move(task->getWaitSemaphores()),
std::move(task->getWaitSemaphoreStageMasks()));
break;
}
case CustomTask::ProcessOutsideRenderPassCommands:
{
OutsideRenderPassCommandBufferHelper *commandBuffer =
task->getOutsideRenderPassCommandBuffer();
ANGLE_TRY(mCommandQueue->flushOutsideRPCommands(this, task->getProtectionType(),
task->getPriority(), &commandBuffer));
OutsideRenderPassCommandBufferHelper *originalCommandBuffer =
task->getOutsideRenderPassCommandBuffer();
mRenderer->recycleOutsideRenderPassCommandBufferHelper(&originalCommandBuffer);
break;
}
case CustomTask::ProcessRenderPassCommands:
{
RenderPassCommandBufferHelper *commandBuffer = task->getRenderPassCommandBuffer();
ANGLE_TRY(mCommandQueue->flushRenderPassCommands(
this, task->getProtectionType(), task->getPriority(), *task->getRenderPass(),
&commandBuffer));
RenderPassCommandBufferHelper *originalCommandBuffer =
task->getRenderPassCommandBuffer();
mRenderer->recycleRenderPassCommandBufferHelper(&originalCommandBuffer);
break;
}
default:
UNREACHABLE();
break;
}
return angle::Result::Continue;
}
angle::Result CommandProcessor::waitForAllWorkToBeSubmitted(Context *context)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::waitForAllWorkToBeSubmitted");
// Take mWorkerMutex lock so that no one is able to enqueue more work while we drain it
// and handle device lost.
std::lock_guard<std::mutex> enqueueLock(mTaskEnqueueMutex);
std::lock_guard<std::mutex> dequeueLock(mTaskDequeueMutex);
// Sync any errors to the context
// Do this inside the mutex to prevent new errors adding to the list.
ANGLE_TRY(checkAndPopPendingError(context));
while (!mTaskQueue.empty())
{
CommandProcessorTask task(std::move(mTaskQueue.front()));
mTaskQueue.pop();
ANGLE_TRY(processTask(&task));
}
if (mRenderer->isAsyncCommandBufferResetEnabled())
{
ANGLE_TRY(mCommandQueue->retireFinishedCommands(context));
}
context->getRenderer()->cleanupGarbage();
mNeedCommandsAndGarbageCleanup = false;
return angle::Result::Continue;
}
angle::Result CommandProcessor::init()
{
mTaskThread = std::thread(&CommandProcessor::processTasks, this);
return angle::Result::Continue;
}
void CommandProcessor::destroy(Context *context)
{
{
// Request to terminate the worker thread
std::lock_guard<std::mutex> enqueueLock(mTaskEnqueueMutex);
mTaskThreadShouldExit = true;
mWorkAvailableCondition.notify_one();
}
(void)waitForAllWorkToBeSubmitted(context);
if (mTaskThread.joinable())
{
mTaskThread.join();
}
}
void CommandProcessor::handleDeviceLost(RendererVk *renderer)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::handleDeviceLost");
// Take mTaskEnqueueMutex lock so that no one is able to add more work to the queue while we
// drain it and handle device lost.
std::lock_guard<std::mutex> enqueueLock(mTaskEnqueueMutex);
(void)waitForAllWorkToBeSubmitted(this);
// Worker thread is idle and command queue is empty so good to continue
mCommandQueue->handleDeviceLost(renderer);
}
VkResult CommandProcessor::present(egl::ContextPriority priority,
const VkPresentInfoKHR &presentInfo,
SwapchainStatus *swapchainStatus)
{
ANGLE_TRACE_EVENT0("gpu.angle", "vkQueuePresentKHR");
// Verify that we are presenting one and only one swapchain
ASSERT(presentInfo.swapchainCount == 1);
ASSERT(presentInfo.pResults == nullptr);
mCommandQueue->queuePresent(priority, presentInfo, swapchainStatus);
const VkResult result = swapchainStatus->lastPresentResult;
// Always make sure update isPending after status has been updated.
// Can't access swapchainStatus after this assignment because it is marked as no longer pending,
// and so may get deleted or clobbered by another thread.
ASSERT(swapchainStatus->isPending);
swapchainStatus->isPending = false;
return result;
}
angle::Result CommandProcessor::enqueueSubmitCommands(Context *context,
ProtectionType protectionType,
egl::ContextPriority priority,
VkSemaphore signalSemaphore,
SharedExternalFence &&externalFence,
const QueueSerial &submitQueueSerial)
{
ANGLE_TRY(checkAndPopPendingError(context));
CommandProcessorTask task;
task.initFlushAndQueueSubmit(signalSemaphore, std::move(externalFence), protectionType,
priority, submitQueueSerial);
ANGLE_TRY(queueCommand(std::move(task)));
mLastEnqueuedSerials.setQueueSerial(submitQueueSerial);
return angle::Result::Continue;
}
angle::Result CommandProcessor::enqueueSubmitOneOffCommands(
Context *context,
ProtectionType protectionType,
egl::ContextPriority contextPriority,
VkCommandBuffer commandBufferHandle,
VkSemaphore waitSemaphore,
VkPipelineStageFlags waitSemaphoreStageMask,
SubmitPolicy submitPolicy,
const QueueSerial &submitQueueSerial)
{
ANGLE_TRY(checkAndPopPendingError(context));
CommandProcessorTask task;
task.initOneOffQueueSubmit(commandBufferHandle, protectionType, contextPriority, waitSemaphore,
waitSemaphoreStageMask, submitQueueSerial);
ANGLE_TRY(queueCommand(std::move(task)));
mLastEnqueuedSerials.setQueueSerial(submitQueueSerial);
if (submitPolicy == SubmitPolicy::EnsureSubmitted)
{
// Caller has synchronization requirement to have work in GPU pipe when returning from this
// function.
ANGLE_TRY(waitForQueueSerialToBeSubmitted(context, submitQueueSerial));
}
return angle::Result::Continue;
}
void CommandProcessor::enqueuePresent(egl::ContextPriority contextPriority,
const VkPresentInfoKHR &presentInfo,
SwapchainStatus *swapchainStatus)
{
ASSERT(!swapchainStatus->isPending);
swapchainStatus->isPending = true;
// Always return with VK_SUCCESS initially. When we call acquireNextImage we'll check the
// return code again. This allows the app to continue working until we really need to know
// the return code from present.
swapchainStatus->lastPresentResult = VK_SUCCESS;
CommandProcessorTask task;
task.initPresent(contextPriority, presentInfo, swapchainStatus);
(void)queueCommand(std::move(task));
}
angle::Result CommandProcessor::enqueueFlushWaitSemaphores(
ProtectionType protectionType,
egl::ContextPriority priority,
std::vector<VkSemaphore> &&waitSemaphores,
std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks)
{
CommandProcessorTask task;
task.initFlushWaitSemaphores(protectionType, priority, std::move(waitSemaphores),
std::move(waitSemaphoreStageMasks));
ANGLE_TRY(queueCommand(std::move(task)));
return angle::Result::Continue;
}
angle::Result CommandProcessor::enqueueFlushOutsideRPCommands(
Context *context,
ProtectionType protectionType,
egl::ContextPriority priority,
OutsideRenderPassCommandBufferHelper **outsideRPCommands)
{
ANGLE_TRY(checkAndPopPendingError(context));
(*outsideRPCommands)->markClosed();
SecondaryCommandPool *commandPool = nullptr;
ANGLE_TRY((*outsideRPCommands)->detachCommandPool(context, &commandPool));
// Detach functions are only used for ring buffer allocators.
SecondaryCommandMemoryAllocator *allocator = (*outsideRPCommands)->detachAllocator();
CommandProcessorTask task;
task.initOutsideRenderPassProcessCommands(protectionType, priority, *outsideRPCommands);
ANGLE_TRY(queueCommand(std::move(task)));
ANGLE_TRY(mRenderer->getOutsideRenderPassCommandBufferHelper(context, commandPool, allocator,
outsideRPCommands));
return angle::Result::Continue;
}
angle::Result CommandProcessor::enqueueFlushRenderPassCommands(
Context *context,
ProtectionType protectionType,
egl::ContextPriority priority,
const RenderPass &renderPass,
RenderPassCommandBufferHelper **renderPassCommands)
{
ANGLE_TRY(checkAndPopPendingError(context));
(*renderPassCommands)->markClosed();
SecondaryCommandPool *commandPool = nullptr;
(*renderPassCommands)->detachCommandPool(&commandPool);
// Detach functions are only used for ring buffer allocators.
SecondaryCommandMemoryAllocator *allocator = (*renderPassCommands)->detachAllocator();
CommandProcessorTask task;
task.initRenderPassProcessCommands(protectionType, priority, *renderPassCommands, &renderPass);
ANGLE_TRY(queueCommand(std::move(task)));
ANGLE_TRY(mRenderer->getRenderPassCommandBufferHelper(context, commandPool, allocator,
renderPassCommands));
return angle::Result::Continue;
}
angle::Result CommandProcessor::waitForResourceUseToBeSubmitted(Context *context,
const ResourceUse &use)
{
if (mCommandQueue->hasResourceUseSubmitted(use))
{
ANGLE_TRY(checkAndPopPendingError(context));
}
else
{
// We do not hold mTaskEnqueueMutex lock, so that we still allow other context to enqueue
// work while we are processing them.
std::lock_guard<std::mutex> dequeueLock(mTaskDequeueMutex);
// Do this inside the mutex to prevent new errors adding to the list.
ANGLE_TRY(checkAndPopPendingError(context));
size_t maxTaskCount = mTaskQueue.size();
size_t taskCount = 0;
while (taskCount < maxTaskCount && !mCommandQueue->hasResourceUseSubmitted(use))
{
CommandProcessorTask task(std::move(mTaskQueue.front()));
mTaskQueue.pop();
ANGLE_TRY(processTask(&task));
taskCount++;
}
}
return angle::Result::Continue;
}
angle::Result CommandProcessor::waitForPresentToBeSubmitted(SwapchainStatus *swapchainStatus)
{
if (!swapchainStatus->isPending)
{
return angle::Result::Continue;
}
std::lock_guard<std::mutex> dequeueLock(mTaskDequeueMutex);
size_t maxTaskCount = mTaskQueue.size();
size_t taskCount = 0;
while (taskCount < maxTaskCount && swapchainStatus->isPending)
{
CommandProcessorTask task(std::move(mTaskQueue.front()));
mTaskQueue.pop();
ANGLE_TRY(processTask(&task));
taskCount++;
}
ASSERT(!swapchainStatus->isPending);
return angle::Result::Continue;
}
// CommandQueue public API implementation. These must be thread safe and never called from
// CommandQueue class itself.
CommandQueue::CommandQueue() : mPerfCounters{} {}
CommandQueue::~CommandQueue() = default;
void CommandQueue::destroy(Context *context)
{
std::lock_guard<std::mutex> lock(mMutex);
std::lock_guard<std::mutex> enqueuelock(mQueueSubmitMutex);
// Force all commands to finish by flushing all queues.
for (VkQueue queue : mQueueMap)
{
if (queue != VK_NULL_HANDLE)
{
vkQueueWaitIdle(queue);
}
}
RendererVk *renderer = context->getRenderer();
// Assigns an infinite "last completed" serial to force garbage to delete.
mLastCompletedSerials.fill(Serial::Infinite());
for (auto &protectionMap : mCommandsStateMap)
{
for (CommandsState &state : protectionMap)
{
state.waitSemaphores.clear();
state.waitSemaphoreStageMasks.clear();
state.primaryCommands.destroy(renderer->getDevice());
state.secondaryCommands.retireCommandBuffers();
}
}
for (PersistentCommandPool &commandPool : mPrimaryCommandPoolMap)
{
commandPool.destroy(renderer->getDevice());
}
mFenceRecycler.destroy(context);
ASSERT(mInFlightCommands.empty());
ASSERT(mFinishedCommandBatches.empty());
}
angle::Result CommandQueue::init(Context *context, const DeviceQueueMap &queueMap)
{
std::lock_guard<std::mutex> lock(mMutex);
// In case of RendererVk gets re-initialized, we can't rely on constructor to do initialization
// for us.
mLastSubmittedSerials.fill(kZeroSerial);
mLastCompletedSerials.fill(kZeroSerial);
// Assign before initializing the command pools in order to get the queue family index.
mQueueMap = queueMap;
ANGLE_TRY(initCommandPool(context, ProtectionType::Unprotected));
if (queueMap.isProtected())
{
ANGLE_TRY(initCommandPool(context, ProtectionType::Protected));
}
return angle::Result::Continue;
}
void CommandQueue::handleDeviceLost(RendererVk *renderer)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::handleDeviceLost");
VkDevice device = renderer->getDevice();
// Hold both locks while clean up mInFlightCommands.
std::lock_guard<std::mutex> dequeuelock(mMutex);
std::lock_guard<std::mutex> enqueuelock(mQueueSubmitMutex);
while (!mInFlightCommands.empty())
{
CommandBatch &batch = mInFlightCommands.front();
// On device loss we need to wait for fence to be signaled before destroying it
if (batch.hasFence())
{
VkResult status = batch.waitFence(device, renderer->getMaxFenceWaitTimeNs());
// If the wait times out, it is probably not possible to recover from lost device
ASSERT(status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST);
batch.destroyFence(device);
}
// On device lost, here simply destroy the CommandBuffer, it will fully cleared later
// by CommandPool::destroy
if (batch.primaryCommands.valid())
{
batch.primaryCommands.destroy(device);
}
batch.secondaryCommands.retireCommandBuffers();
mLastCompletedSerials.setQueueSerial(batch.queueSerial);
mInFlightCommands.pop();
}
}
angle::Result CommandQueue::postSubmitCheck(Context *context)
{
RendererVk *renderer = context->getRenderer();
// Update mLastCompletedQueueSerial immediately in case any command has been finished.
ANGLE_TRY(checkAndCleanupCompletedCommands(context));
VkDeviceSize suballocationGarbageSize = renderer->getSuballocationGarbageSize();
if (suballocationGarbageSize > kMaxBufferSuballocationGarbageSize)
{
// CPU should be throttled to avoid accumulating too much memory garbage waiting to be
// destroyed. This is important to keep peak memory usage at check when game launched and a
// lot of staging buffers used for textures upload and then gets released. But if there is
// only one command buffer in flight, we do not wait here to ensure we keep GPU busy.
std::unique_lock<std::mutex> lock(mMutex);
while (suballocationGarbageSize > kMaxBufferSuballocationGarbageSize &&
mInFlightCommands.size() > 1)
{
ANGLE_TRY(
finishOneCommandBatchAndCleanupImpl(context, renderer->getMaxFenceWaitTimeNs()));
suballocationGarbageSize = renderer->getSuballocationGarbageSize();
}
}
if (kOutputVmaStatsString)
{
renderer->outputVmaStatString();
}
return angle::Result::Continue;
}
angle::Result CommandQueue::finishResourceUse(Context *context,
const ResourceUse &use,
uint64_t timeout)
{
VkDevice device = context->getDevice();
{
std::unique_lock<std::mutex> lock(mMutex);
while (!mInFlightCommands.empty() && !hasResourceUseFinished(use))
{
bool finished;
ANGLE_TRY(checkOneCommandBatch(context, &finished));
if (!finished)
{
ANGLE_VK_TRY(context,
mInFlightCommands.front().waitFenceUnlocked(device, timeout, &lock));
}
}
// Check the rest of the commands in case they are also finished.
ANGLE_TRY(checkCompletedCommandsLocked(context));
}
ASSERT(hasResourceUseFinished(use));
if (!mFinishedCommandBatches.empty())
{
ANGLE_TRY(retireFinishedCommandsAndCleanupGarbage(context));
}
return angle::Result::Continue;
}
angle::Result CommandQueue::finishQueueSerial(Context *context,
const QueueSerial &queueSerial,
uint64_t timeout)
{
vk::ResourceUse use(queueSerial);
return finishResourceUse(context, use, timeout);
}
angle::Result CommandQueue::waitIdle(Context *context, uint64_t timeout)
{
// Fill the local variable with lock
vk::ResourceUse use;
{
std::lock_guard<std::mutex> lock(mMutex);
if (mInFlightCommands.empty())
{
return angle::Result::Continue;
}
use.setQueueSerial(mInFlightCommands.back().queueSerial);
}
return finishResourceUse(context, use, timeout);
}
angle::Result CommandQueue::waitForResourceUseToFinishWithUserTimeout(Context *context,
const ResourceUse &use,
uint64_t timeout,
VkResult *result)
{
// Serial is not yet submitted. This is undefined behaviour, so we can do anything.
if (!hasResourceUseSubmitted(use))
{
WARN() << "Waiting on an unsubmitted serial.";
*result = VK_TIMEOUT;
return angle::Result::Continue;
}
VkDevice device = context->getDevice();
size_t finishedCount = 0;
{
std::unique_lock<std::mutex> lock(mMutex);
while (!mInFlightCommands.empty() && !hasResourceUseFinished(use))
{
bool finished;
ANGLE_TRY(checkOneCommandBatch(context, &finished));
if (!finished)
{
*result = mInFlightCommands.front().waitFenceUnlocked(device, timeout, &lock);
// Don't trigger an error on timeout.
if (*result == VK_TIMEOUT)
{
break;
}
else
{
ANGLE_VK_TRY(context, *result);
}
}
}
// Do one more check in case more commands also finished.
ANGLE_TRY(checkCompletedCommandsLocked(context));
finishedCount = mFinishedCommandBatches.size();
}
if (finishedCount > 0)
{
ANGLE_TRY(retireFinishedCommandsAndCleanupGarbage(context));
}
return angle::Result::Continue;
}
bool CommandQueue::isBusy(RendererVk *renderer) const
{
// No lock is needed here since we are accessing atomic variables only.
size_t maxIndex = renderer->getLargestQueueSerialIndexEverAllocated();
for (SerialIndex i = 0; i <= maxIndex; ++i)
{
if (mLastSubmittedSerials[i] > mLastCompletedSerials[i])
{
return true;
}
}
return false;
}
void CommandQueue::flushWaitSemaphores(ProtectionType protectionType,
egl::ContextPriority priority,
std::vector<VkSemaphore> &&waitSemaphores,
std::vector<VkPipelineStageFlags> &&waitSemaphoreStageMasks)
{
ASSERT(!waitSemaphores.empty());
ASSERT(waitSemaphores.size() == waitSemaphoreStageMasks.size());
std::lock_guard<std::mutex> lock(mMutex);
CommandsState &state = mCommandsStateMap[priority][protectionType];
state.waitSemaphores.insert(state.waitSemaphores.end(), waitSemaphores.begin(),
waitSemaphores.end());
state.waitSemaphoreStageMasks.insert(state.waitSemaphoreStageMasks.end(),
waitSemaphoreStageMasks.begin(),
waitSemaphoreStageMasks.end());
waitSemaphores.clear();
waitSemaphoreStageMasks.clear();
}
angle::Result CommandQueue::flushOutsideRPCommands(
Context *context,
ProtectionType protectionType,
egl::ContextPriority priority,
OutsideRenderPassCommandBufferHelper **outsideRPCommands)
{
std::lock_guard<std::mutex> lock(mMutex);
ANGLE_TRY(ensurePrimaryCommandBufferValid(context, protectionType, priority));
CommandsState &state = mCommandsStateMap[priority][protectionType];
return (*outsideRPCommands)->flushToPrimary(context, &state);
}
angle::Result CommandQueue::flushRenderPassCommands(
Context *context,
ProtectionType protectionType,
egl::ContextPriority priority,
const RenderPass &renderPass,
RenderPassCommandBufferHelper **renderPassCommands)
{
std::lock_guard<std::mutex> lock(mMutex);
ANGLE_TRY(ensurePrimaryCommandBufferValid(context, protectionType, priority));
CommandsState &state = mCommandsStateMap[priority][protectionType];
return (*renderPassCommands)->flushToPrimary(context, &state, &renderPass);
}
angle::Result CommandQueue::submitCommands(Context *context,
ProtectionType protectionType,
egl::ContextPriority priority,
VkSemaphore signalSemaphore,
SharedExternalFence &&externalFence,
const QueueSerial &submitQueueSerial)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::submitCommands");
std::unique_lock<std::mutex> lock(mMutex);
RendererVk *renderer = context->getRenderer();
VkDevice device = renderer->getDevice();
++mPerfCounters.commandQueueSubmitCallsTotal;
++mPerfCounters.commandQueueSubmitCallsPerFrame;
DeviceScoped<CommandBatch> scopedBatch(device);
CommandBatch &batch = scopedBatch.get();
batch.queueSerial = submitQueueSerial;
batch.protectionType = protectionType;
CommandsState &state = mCommandsStateMap[priority][protectionType];
// Store the primary CommandBuffer in the in-flight list.
batch.primaryCommands = std::move(state.primaryCommands);
// Store secondary Command Buffers.
batch.secondaryCommands = std::move(state.secondaryCommands);
ASSERT(batch.primaryCommands.valid() || batch.secondaryCommands.empty());
// Move to local copy of vectors since queueSubmit will release the lock.
std::vector<VkSemaphore> waitSemaphores = std::move(state.waitSemaphores);
std::vector<VkPipelineStageFlags> waitSemaphoreStageMasks =
std::move(state.waitSemaphoreStageMasks);
mPerfCounters.commandQueueWaitSemaphoresTotal += waitSemaphores.size();
// Don't make a submission if there is nothing to submit.
const bool needsQueueSubmit = batch.primaryCommands.valid() ||
signalSemaphore != VK_NULL_HANDLE || externalFence ||
!waitSemaphores.empty();
VkSubmitInfo submitInfo = {};
VkProtectedSubmitInfo protectedSubmitInfo = {};
if (needsQueueSubmit)
{
if (batch.primaryCommands.valid())
{
ANGLE_VK_TRY(context, batch.primaryCommands.end());
}
InitializeSubmitInfo(&submitInfo, batch.primaryCommands, waitSemaphores,
waitSemaphoreStageMasks, signalSemaphore);
// No need protected submission if no commands to submit.
if (protectionType == ProtectionType::Protected && batch.primaryCommands.valid())
{
protectedSubmitInfo.sType = VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO;
protectedSubmitInfo.pNext = nullptr;
protectedSubmitInfo.protectedSubmit = true;
submitInfo.pNext = &protectedSubmitInfo;
}
if (!externalFence)
{
ANGLE_VK_TRY(context, batch.fence.init(context->getDevice(), &mFenceRecycler));
}
else
{
batch.externalFence = std::move(externalFence);
}
++mPerfCounters.vkQueueSubmitCallsTotal;
++mPerfCounters.vkQueueSubmitCallsPerFrame;
}
// Note queueSubmit will release the lock.
ANGLE_TRY(queueSubmit(context, std::move(lock), priority, submitInfo, scopedBatch,
submitQueueSerial));
// Clear local vector without lock.
waitSemaphores.clear();
waitSemaphoreStageMasks.clear();
return angle::Result::Continue;
}
angle::Result CommandQueue::queueSubmitOneOff(Context *context,
ProtectionType protectionType,
egl::ContextPriority contextPriority,
VkCommandBuffer commandBufferHandle,
VkSemaphore waitSemaphore,
VkPipelineStageFlags waitSemaphoreStageMask,
SubmitPolicy submitPolicy,
const QueueSerial &submitQueueSerial)
{
std::unique_lock<std::mutex> lock(mMutex);
DeviceScoped<CommandBatch> scopedBatch(context->getDevice());
CommandBatch &batch = scopedBatch.get();
batch.queueSerial = submitQueueSerial;
batch.protectionType = protectionType;
ANGLE_VK_TRY(context, batch.fence.init(context->getDevice(), &mFenceRecycler));
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
VkProtectedSubmitInfo protectedSubmitInfo = {};
ASSERT(protectionType == ProtectionType::Unprotected ||
protectionType == ProtectionType::Protected);
if (protectionType == ProtectionType::Protected)
{
protectedSubmitInfo.sType = VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO;
protectedSubmitInfo.pNext = nullptr;
protectedSubmitInfo.protectedSubmit = true;
submitInfo.pNext = &protectedSubmitInfo;
}
if (commandBufferHandle != VK_NULL_HANDLE)
{
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &commandBufferHandle;
}
if (waitSemaphore != VK_NULL_HANDLE)
{
submitInfo.waitSemaphoreCount = 1;
submitInfo.pWaitSemaphores = &waitSemaphore;
submitInfo.pWaitDstStageMask = &waitSemaphoreStageMask;
}
++mPerfCounters.vkQueueSubmitCallsTotal;
++mPerfCounters.vkQueueSubmitCallsPerFrame;
// Note queueSubmit will release the lock.
return queueSubmit(context, std::move(lock), contextPriority, submitInfo, scopedBatch,
submitQueueSerial);
}
angle::Result CommandQueue::queueSubmit(Context *context,
std::unique_lock<std::mutex> &&dequeueLock,
egl::ContextPriority contextPriority,
const VkSubmitInfo &submitInfo,
DeviceScoped<CommandBatch> &commandBatch,
const QueueSerial &submitQueueSerial)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::queueSubmit");
RendererVk *renderer = context->getRenderer();
// Lock relay to ensure the ordering of submission strictly follow the context's submission
// order. This lock relay (first take mMutex and then mQueueSubmitMutex, and then release
// mMutex) ensures we always have a lock covering the entire call which ensures the strict
// submission order.
std::lock_guard<std::mutex> queueSubmitLock(mQueueSubmitMutex);
// CPU should be throttled to avoid mInFlightCommands from growing too fast. Important for
// off-screen scenarios.
if (mInFlightCommands.full())
{
ANGLE_TRY(finishOneCommandBatchAndCleanupImpl(context, renderer->getMaxFenceWaitTimeNs()));
}
// Release the dequeue lock while doing potentially lengthy vkQueueSubmit call.
// Note: after this point, you can not reference anything that required mMutex lock.
dequeueLock.unlock();
if (submitInfo.sType == VK_STRUCTURE_TYPE_SUBMIT_INFO)
{
CommandBatch &batch = commandBatch.get();
VkQueue queue = getQueue(contextPriority);
VkFence fence = batch.getFenceHandle();
ASSERT(fence != VK_NULL_HANDLE);
ANGLE_VK_TRY(context, vkQueueSubmit(queue, 1, &submitInfo, fence));
if (batch.externalFence)
{
// exportFd is exporting VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR type handle which
// obeys copy semantics. This means that the fence must already be signaled or the work
// to signal it is in the graphics pipeline at the time we export the fd.
// In other words, must call exportFd() after successful vkQueueSubmit() call.
ExternalFence &externalFence = *batch.externalFence;
VkFenceGetFdInfoKHR fenceGetFdInfo = {};
fenceGetFdInfo.sType = VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR;
fenceGetFdInfo.fence = externalFence.getHandle();
fenceGetFdInfo.handleType = VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
externalFence.exportFd(renderer->getDevice(), fenceGetFdInfo);
}
}
mInFlightCommands.push(commandBatch.release());
// This must set last so that when this submission appears submitted, it actually already
// submitted and enqueued to mInFlightCommands.
mLastSubmittedSerials.setQueueSerial(submitQueueSerial);
return angle::Result::Continue;
}
void CommandQueue::queuePresent(egl::ContextPriority contextPriority,
const VkPresentInfoKHR &presentInfo,
SwapchainStatus *swapchainStatus)
{
std::lock_guard<std::mutex> queueSubmitLock(mQueueSubmitMutex);
VkQueue queue = getQueue(contextPriority);
swapchainStatus->lastPresentResult = vkQueuePresentKHR(queue, &presentInfo);
}
const angle::VulkanPerfCounters CommandQueue::getPerfCounters() const
{
std::lock_guard<std::mutex> lock(mMutex);
return mPerfCounters;
}
void CommandQueue::resetPerFramePerfCounters()
{
std::lock_guard<std::mutex> lock(mMutex);
mPerfCounters.commandQueueSubmitCallsPerFrame = 0;
mPerfCounters.vkQueueSubmitCallsPerFrame = 0;
}
angle::Result CommandQueue::retireFinishedCommandsAndCleanupGarbage(Context *context)
{
RendererVk *renderer = context->getRenderer();
if (!renderer->isAsyncCommandBufferResetEnabled())
{
// Do immediate command buffer reset
ANGLE_TRY(retireFinishedCommands(context));
}
renderer->requestAsyncCommandsAndGarbageCleanup(context);
return angle::Result::Continue;
}
// CommandQueue private API implementation. These are called by public API, so lock already held.
angle::Result CommandQueue::checkOneCommandBatch(Context *context, bool *finished)
{
ASSERT(!mInFlightCommands.empty());
CommandBatch &batch = mInFlightCommands.front();
*finished = false;
if (batch.hasFence())
{
VkResult status = batch.getFenceStatus(context->getDevice());
if (status == VK_NOT_READY)
{
return angle::Result::Continue;
}
ANGLE_VK_TRY(context, status);
}
// Finished.
mLastCompletedSerials.setQueueSerial(batch.queueSerial);
// Move command batch to mFinishedCommandBatches.
if (mFinishedCommandBatches.full())
{
ANGLE_TRY(retireFinishedCommandsLocked(context));
}
mFinishedCommandBatches.push(std::move(batch));
mInFlightCommands.pop();
*finished = true;
return angle::Result::Continue;
}
angle::Result CommandQueue::finishOneCommandBatchAndCleanup(Context *context,
uint64_t timeout,
bool *anyFinished)
{
std::lock_guard<std::mutex> lock(mMutex);
// If there are in-flight submissions in the queue, they can be finished.
*anyFinished = false;
if (!mInFlightCommands.empty())
{
ANGLE_TRY(finishOneCommandBatchAndCleanupImpl(context, timeout));
*anyFinished = true;
}
return angle::Result::Continue;
}
angle::Result CommandQueue::finishOneCommandBatchAndCleanupImpl(Context *context, uint64_t timeout)
{
ASSERT(!mInFlightCommands.empty());
CommandBatch &batch = mInFlightCommands.front();
if (batch.hasFence())
{
VkResult status = batch.waitFence(context->getDevice(), timeout);
ANGLE_VK_TRY(context, status);
}
mLastCompletedSerials.setQueueSerial(batch.queueSerial);
// Move command batch to mFinishedCommandBatches.
if (mFinishedCommandBatches.full())
{
ANGLE_TRY(retireFinishedCommandsLocked(context));
}
mFinishedCommandBatches.push(std::move(batch));
mInFlightCommands.pop();
// Immediately clean up finished batches.
ANGLE_TRY(retireFinishedCommandsLocked(context));
context->getRenderer()->cleanupGarbage();
return angle::Result::Continue;
}
angle::Result CommandQueue::retireFinishedCommandsLocked(Context *context)
{
ANGLE_TRACE_EVENT0("gpu.angle", "retireFinishedCommandsLocked");
while (!mFinishedCommandBatches.empty())
{
CommandBatch &batch = mFinishedCommandBatches.front();
ASSERT(batch.queueSerial <= mLastCompletedSerials);
batch.releaseFence();
if (batch.primaryCommands.valid())
{
PersistentCommandPool &commandPool = mPrimaryCommandPoolMap[batch.protectionType];
ANGLE_TRY(commandPool.collect(context, std::move(batch.primaryCommands)));
}
batch.secondaryCommands.retireCommandBuffers();
mFinishedCommandBatches.pop();
}
return angle::Result::Continue;
}
angle::Result CommandQueue::checkCompletedCommandsLocked(Context *context)
{
while (!mInFlightCommands.empty())
{
bool finished;
ANGLE_TRY(checkOneCommandBatch(context, &finished));
if (!finished)
{
break;
}
}
return angle::Result::Continue;
}
angle::Result CommandQueue::ensurePrimaryCommandBufferValid(Context *context,
ProtectionType protectionType,
egl::ContextPriority priority)
{
CommandsState &state = mCommandsStateMap[priority][protectionType];
if (state.primaryCommands.valid())
{
return angle::Result::Continue;
}
ANGLE_TRY(mPrimaryCommandPoolMap[protectionType].allocate(context, &state.primaryCommands));
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
beginInfo.pInheritanceInfo = nullptr;
ANGLE_VK_TRY(context, state.primaryCommands.begin(beginInfo));
return angle::Result::Continue;
}
// QueuePriorities:
constexpr float kVulkanQueuePriorityLow = 0.0;
constexpr float kVulkanQueuePriorityMedium = 0.4;
constexpr float kVulkanQueuePriorityHigh = 1.0;
const float QueueFamily::kQueuePriorities[static_cast<uint32_t>(egl::ContextPriority::EnumCount)] =
{kVulkanQueuePriorityMedium, kVulkanQueuePriorityHigh, kVulkanQueuePriorityLow};
egl::ContextPriority DeviceQueueMap::getDevicePriority(egl::ContextPriority priority) const
{
return mPriorities[priority];
}
DeviceQueueMap::~DeviceQueueMap() {}
DeviceQueueMap &DeviceQueueMap::operator=(const DeviceQueueMap &other)
{
ASSERT(this != &other);
if ((this != &other) && other.valid())
{
mIndex = other.mIndex;
mIsProtected = other.mIsProtected;
mPriorities[egl::ContextPriority::Low] = other.mPriorities[egl::ContextPriority::Low];
mPriorities[egl::ContextPriority::Medium] = other.mPriorities[egl::ContextPriority::Medium];
mPriorities[egl::ContextPriority::High] = other.mPriorities[egl::ContextPriority::High];
*static_cast<angle::PackedEnumMap<egl::ContextPriority, VkQueue> *>(this) = other;
}
return *this;
}
void QueueFamily::getDeviceQueue(VkDevice device,
bool makeProtected,
uint32_t queueIndex,
VkQueue *queue)
{
if (makeProtected)
{
VkDeviceQueueInfo2 queueInfo2 = {};
queueInfo2.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2;
queueInfo2.flags = VK_DEVICE_QUEUE_CREATE_PROTECTED_BIT;
queueInfo2.queueFamilyIndex = mIndex;
queueInfo2.queueIndex = queueIndex;
vkGetDeviceQueue2(device, &queueInfo2, queue);
}
else
{
vkGetDeviceQueue(device, mIndex, queueIndex, queue);
}
}
DeviceQueueMap QueueFamily::initializeQueueMap(VkDevice device,
bool makeProtected,
uint32_t queueIndex,
uint32_t queueCount)
{
// QueueIndexing:
constexpr uint32_t kQueueIndexMedium = 0;
constexpr uint32_t kQueueIndexHigh = 1;
constexpr uint32_t kQueueIndexLow = 2;
ASSERT(queueCount);
ASSERT((queueIndex + queueCount) <= mProperties.queueCount);
DeviceQueueMap queueMap(mIndex, makeProtected);
getDeviceQueue(device, makeProtected, queueIndex + kQueueIndexMedium,
&queueMap[egl::ContextPriority::Medium]);
queueMap.mPriorities[egl::ContextPriority::Medium] = egl::ContextPriority::Medium;
// If at least 2 queues, High has its own queue
if (queueCount > 1)
{
getDeviceQueue(device, makeProtected, queueIndex + kQueueIndexHigh,
&queueMap[egl::ContextPriority::High]);
queueMap.mPriorities[egl::ContextPriority::High] = egl::ContextPriority::High;
}
else
{
queueMap[egl::ContextPriority::High] = queueMap[egl::ContextPriority::Medium];
queueMap.mPriorities[egl::ContextPriority::High] = egl::ContextPriority::Medium;
}
// If at least 3 queues, Low has its own queue. Adjust Low priority.
if (queueCount > 2)
{
getDeviceQueue(device, makeProtected, queueIndex + kQueueIndexLow,
&queueMap[egl::ContextPriority::Low]);
queueMap.mPriorities[egl::ContextPriority::Low] = egl::ContextPriority::Low;
}
else
{
queueMap[egl::ContextPriority::Low] = queueMap[egl::ContextPriority::Medium];
queueMap.mPriorities[egl::ContextPriority::Low] = egl::ContextPriority::Medium;
}
return queueMap;
}
void QueueFamily::initialize(const VkQueueFamilyProperties &queueFamilyProperties, uint32_t index)
{
mProperties = queueFamilyProperties;
mIndex = index;
}
uint32_t QueueFamily::FindIndex(const std::vector<VkQueueFamilyProperties> &queueFamilyProperties,
VkQueueFlags flags,
int32_t matchNumber,
uint32_t *matchCount)
{
uint32_t index = QueueFamily::kInvalidIndex;
uint32_t count = 0;
for (uint32_t familyIndex = 0; familyIndex < queueFamilyProperties.size(); ++familyIndex)
{
const auto &queueInfo = queueFamilyProperties[familyIndex];
if ((queueInfo.queueFlags & flags) == flags)
{
ASSERT(queueInfo.queueCount > 0);
count++;
if ((index == QueueFamily::kInvalidIndex) && (matchNumber-- == 0))
{
index = familyIndex;
}
}
}
if (matchCount)
{
*matchCount = count;
}
return index;
}
} // namespace vk
} // namespace rx