Vulkan: Faster state transitions. Implements a transition table from Pipeline Cache entry to state change neighbouring Pipeline Cache entries. We use a 64-bit mask to do a quick scan over the pipeline desc. This ends up being a lot faster than doing a full hash and memcmp over the pipeline description. Note that there could be future optimizations to this design. We might keep a hash map of the pipeline transitions instead of a list. Or use a sorted list. This could speed up the search when there are many transitions for cache entries. Also we could skip the transition table and opt to do a full hash when there are more than a configurable number of dirty states. This might be a bit faster in some cases. Likely this will be something we can add performance tests for in the future. Documentation is also added in a README file for the Vulkan back end. This will be extended over time. Improves performance about 30-35% on the VBO state change test. Bug: angleproject:3013 Change-Id: I793f9e3efd8887acf00ad60e4ac2502a54c95dee Reviewed-on: https://chromium-review.googlesource.com/c/1369287 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: Yuly Novikov <ynovikov@chromium.org>
diff --git a/src/common/mathutil.h b/src/common/mathutil.h index 3dbf658..edf63dd 100644 --- a/src/common/mathutil.h +++ b/src/common/mathutil.h
@@ -1271,6 +1271,11 @@ # define ANGLE_ROTR16(x, y) ::rx::RotR16(x, y) #endif // namespace rx + +constexpr unsigned int Log2(unsigned int bytes) +{ + return bytes == 1 ? 0 : (1 + Log2(bytes / 2)); +} } // namespace rx #endif // COMMON_MATHUTIL_H_
diff --git a/src/libANGLE/formatutils.cpp b/src/libANGLE/formatutils.cpp index e0e361a..1cbca8c 100644 --- a/src/libANGLE/formatutils.cpp +++ b/src/libANGLE/formatutils.cpp
@@ -40,16 +40,11 @@ } } -constexpr GLuint Log2(GLuint bytes) -{ - return bytes == 1 ? 0 : (1 + Log2(bytes / 2)); -} - constexpr uint32_t PackTypeInfo(GLuint bytes, bool specialized) { // static_assert within constexpr requires c++17 // static_assert(isPow2(bytes)); - return bytes | (Log2(bytes) << 8) | (specialized << 16); + return bytes | (rx::Log2(bytes) << 8) | (specialized << 16); } } // anonymous namespace
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.cpp b/src/libANGLE/renderer/vulkan/ContextVk.cpp index bfd3b18..2f3899c 100644 --- a/src/libANGLE/renderer/vulkan/ContextVk.cpp +++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp
@@ -109,6 +109,7 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk *renderer) : ContextImpl(state, errorSet), vk::Context(renderer), + mCurrentPipeline(nullptr), mCurrentDrawMode(gl::PrimitiveMode::InvalidEnum), mVertexArray(nullptr), mDrawFramebuffer(nullptr), @@ -253,7 +254,7 @@ { invalidateCurrentPipeline(); mCurrentDrawMode = mode; - mGraphicsPipelineDesc->updateTopology(mCurrentDrawMode); + mGraphicsPipelineDesc->updateTopology(&mGraphicsPipelineTransition, mCurrentDrawMode); } if (!mDrawFramebuffer->appendToStartedRenderPass(mRenderer, commandBufferOut)) @@ -365,13 +366,34 @@ { if (!mCurrentPipeline) { + const vk::GraphicsPipelineDesc *descPtr; + // Draw call shader patching, shader compilation, and pipeline cache query. ANGLE_TRY(mProgram->getGraphicsPipeline(this, mCurrentDrawMode, *mGraphicsPipelineDesc, mProgram->getState().getActiveAttribLocationsMask(), - &mCurrentPipeline)); + &descPtr, &mCurrentPipeline)); + mGraphicsPipelineTransition.reset(); + } + else if (mGraphicsPipelineTransition.any()) + { + if (!mCurrentPipeline->findTransition(mGraphicsPipelineTransition, *mGraphicsPipelineDesc, + &mCurrentPipeline)) + { + vk::PipelineHelper *oldPipeline = mCurrentPipeline; + + const vk::GraphicsPipelineDesc *descPtr; + + ANGLE_TRY(mProgram->getGraphicsPipeline( + this, mCurrentDrawMode, *mGraphicsPipelineDesc, + mProgram->getState().getActiveAttribLocationsMask(), &descPtr, &mCurrentPipeline)); + + oldPipeline->addTransition(mGraphicsPipelineTransition, descPtr, mCurrentPipeline); + } + + mGraphicsPipelineTransition.reset(); } - commandBuffer->bindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, mCurrentPipeline->get()); + commandBuffer->bindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, mCurrentPipeline->getPipeline()); // Update the queue serial for the pipeline object. ASSERT(mCurrentPipeline && mCurrentPipeline->valid()); @@ -623,7 +645,7 @@ blendState.colorMaskBlue, blendState.colorMaskAlpha); FramebufferVk *framebufferVk = vk::GetImpl(mState.getDrawFramebuffer()); - mGraphicsPipelineDesc->updateColorWriteMask(mClearColorMask, + mGraphicsPipelineDesc->updateColorWriteMask(&mGraphicsPipelineTransition, mClearColorMask, framebufferVk->getEmulatedAlphaAttachmentMask()); } @@ -689,16 +711,20 @@ updateDepthRange(glState.getNearPlane(), glState.getFarPlane()); break; case gl::State::DIRTY_BIT_BLEND_ENABLED: - mGraphicsPipelineDesc->updateBlendEnabled(glState.isBlendEnabled()); + mGraphicsPipelineDesc->updateBlendEnabled(&mGraphicsPipelineTransition, + glState.isBlendEnabled()); break; case gl::State::DIRTY_BIT_BLEND_COLOR: - mGraphicsPipelineDesc->updateBlendColor(glState.getBlendColor()); + mGraphicsPipelineDesc->updateBlendColor(&mGraphicsPipelineTransition, + glState.getBlendColor()); break; case gl::State::DIRTY_BIT_BLEND_FUNCS: - mGraphicsPipelineDesc->updateBlendFuncs(glState.getBlendState()); + mGraphicsPipelineDesc->updateBlendFuncs(&mGraphicsPipelineTransition, + glState.getBlendState()); break; case gl::State::DIRTY_BIT_BLEND_EQUATIONS: - mGraphicsPipelineDesc->updateBlendEquations(glState.getBlendState()); + mGraphicsPipelineDesc->updateBlendEquations(&mGraphicsPipelineTransition, + glState.getBlendState()); break; case gl::State::DIRTY_BIT_COLOR_MASK: updateColorMask(glState.getBlendState()); @@ -714,61 +740,75 @@ case gl::State::DIRTY_BIT_SAMPLE_MASK: break; case gl::State::DIRTY_BIT_DEPTH_TEST_ENABLED: - mGraphicsPipelineDesc->updateDepthTestEnabled(glState.getDepthStencilState(), + mGraphicsPipelineDesc->updateDepthTestEnabled(&mGraphicsPipelineTransition, + glState.getDepthStencilState(), glState.getDrawFramebuffer()); break; case gl::State::DIRTY_BIT_DEPTH_FUNC: - mGraphicsPipelineDesc->updateDepthFunc(glState.getDepthStencilState()); + mGraphicsPipelineDesc->updateDepthFunc(&mGraphicsPipelineTransition, + glState.getDepthStencilState()); break; case gl::State::DIRTY_BIT_DEPTH_MASK: - mGraphicsPipelineDesc->updateDepthWriteEnabled(glState.getDepthStencilState(), + mGraphicsPipelineDesc->updateDepthWriteEnabled(&mGraphicsPipelineTransition, + glState.getDepthStencilState(), glState.getDrawFramebuffer()); break; case gl::State::DIRTY_BIT_STENCIL_TEST_ENABLED: - mGraphicsPipelineDesc->updateStencilTestEnabled(glState.getDepthStencilState(), + mGraphicsPipelineDesc->updateStencilTestEnabled(&mGraphicsPipelineTransition, + glState.getDepthStencilState(), glState.getDrawFramebuffer()); break; case gl::State::DIRTY_BIT_STENCIL_FUNCS_FRONT: - mGraphicsPipelineDesc->updateStencilFrontFuncs(glState.getStencilRef(), + mGraphicsPipelineDesc->updateStencilFrontFuncs(&mGraphicsPipelineTransition, + glState.getStencilRef(), glState.getDepthStencilState()); break; case gl::State::DIRTY_BIT_STENCIL_FUNCS_BACK: - mGraphicsPipelineDesc->updateStencilBackFuncs(glState.getStencilBackRef(), + mGraphicsPipelineDesc->updateStencilBackFuncs(&mGraphicsPipelineTransition, + glState.getStencilBackRef(), glState.getDepthStencilState()); break; case gl::State::DIRTY_BIT_STENCIL_OPS_FRONT: - mGraphicsPipelineDesc->updateStencilFrontOps(glState.getDepthStencilState()); + mGraphicsPipelineDesc->updateStencilFrontOps(&mGraphicsPipelineTransition, + glState.getDepthStencilState()); break; case gl::State::DIRTY_BIT_STENCIL_OPS_BACK: - mGraphicsPipelineDesc->updateStencilBackOps(glState.getDepthStencilState()); + mGraphicsPipelineDesc->updateStencilBackOps(&mGraphicsPipelineTransition, + glState.getDepthStencilState()); break; case gl::State::DIRTY_BIT_STENCIL_WRITEMASK_FRONT: - mGraphicsPipelineDesc->updateStencilFrontWriteMask(glState.getDepthStencilState(), + mGraphicsPipelineDesc->updateStencilFrontWriteMask(&mGraphicsPipelineTransition, + glState.getDepthStencilState(), glState.getDrawFramebuffer()); break; case gl::State::DIRTY_BIT_STENCIL_WRITEMASK_BACK: - mGraphicsPipelineDesc->updateStencilBackWriteMask(glState.getDepthStencilState(), + mGraphicsPipelineDesc->updateStencilBackWriteMask(&mGraphicsPipelineTransition, + glState.getDepthStencilState(), glState.getDrawFramebuffer()); break; case gl::State::DIRTY_BIT_CULL_FACE_ENABLED: case gl::State::DIRTY_BIT_CULL_FACE: - mGraphicsPipelineDesc->updateCullMode(glState.getRasterizerState()); + mGraphicsPipelineDesc->updateCullMode(&mGraphicsPipelineTransition, + glState.getRasterizerState()); break; case gl::State::DIRTY_BIT_FRONT_FACE: - mGraphicsPipelineDesc->updateFrontFace(glState.getRasterizerState(), + mGraphicsPipelineDesc->updateFrontFace(&mGraphicsPipelineTransition, + glState.getRasterizerState(), isViewportFlipEnabledForDrawFBO()); break; case gl::State::DIRTY_BIT_POLYGON_OFFSET_FILL_ENABLED: mGraphicsPipelineDesc->updatePolygonOffsetFillEnabled( - glState.isPolygonOffsetFillEnabled()); + &mGraphicsPipelineTransition, glState.isPolygonOffsetFillEnabled()); break; case gl::State::DIRTY_BIT_POLYGON_OFFSET: - mGraphicsPipelineDesc->updatePolygonOffset(glState.getRasterizerState()); + mGraphicsPipelineDesc->updatePolygonOffset(&mGraphicsPipelineTransition, + glState.getRasterizerState()); break; case gl::State::DIRTY_BIT_RASTERIZER_DISCARD_ENABLED: break; case gl::State::DIRTY_BIT_LINE_WIDTH: - mGraphicsPipelineDesc->updateLineWidth(glState.getLineWidth()); + mGraphicsPipelineDesc->updateLineWidth(&mGraphicsPipelineTransition, + glState.getLineWidth()); break; case gl::State::DIRTY_BIT_PRIMITIVE_RESTART_ENABLED: break; @@ -813,19 +853,26 @@ updateViewport(mDrawFramebuffer, glState.getViewport(), glState.getNearPlane(), glState.getFarPlane(), isViewportFlipEnabledForDrawFBO()); updateColorMask(glState.getBlendState()); - mGraphicsPipelineDesc->updateCullMode(glState.getRasterizerState()); + mGraphicsPipelineDesc->updateCullMode(&mGraphicsPipelineTransition, + glState.getRasterizerState()); updateScissor(glState); - mGraphicsPipelineDesc->updateDepthTestEnabled(glState.getDepthStencilState(), + mGraphicsPipelineDesc->updateDepthTestEnabled(&mGraphicsPipelineTransition, + glState.getDepthStencilState(), glState.getDrawFramebuffer()); - mGraphicsPipelineDesc->updateDepthWriteEnabled(glState.getDepthStencilState(), + mGraphicsPipelineDesc->updateDepthWriteEnabled(&mGraphicsPipelineTransition, + glState.getDepthStencilState(), glState.getDrawFramebuffer()); - mGraphicsPipelineDesc->updateStencilTestEnabled(glState.getDepthStencilState(), + mGraphicsPipelineDesc->updateStencilTestEnabled(&mGraphicsPipelineTransition, + glState.getDepthStencilState(), glState.getDrawFramebuffer()); - mGraphicsPipelineDesc->updateStencilFrontWriteMask(glState.getDepthStencilState(), + mGraphicsPipelineDesc->updateStencilFrontWriteMask(&mGraphicsPipelineTransition, + glState.getDepthStencilState(), glState.getDrawFramebuffer()); - mGraphicsPipelineDesc->updateStencilBackWriteMask(glState.getDepthStencilState(), + mGraphicsPipelineDesc->updateStencilBackWriteMask(&mGraphicsPipelineTransition, + glState.getDepthStencilState(), glState.getDrawFramebuffer()); - mGraphicsPipelineDesc->updateRenderPassDesc(mDrawFramebuffer->getRenderPassDesc()); + mGraphicsPipelineDesc->updateRenderPassDesc(&mGraphicsPipelineTransition, + mDrawFramebuffer->getRenderPassDesc()); break; } case gl::State::DIRTY_BIT_RENDERBUFFER_BINDING: @@ -851,6 +898,8 @@ bool useVertexBuffer = (mProgram->getState().getMaxActiveAttribLocation()); mNonIndexedDirtyBitsMask.set(DIRTY_BIT_VERTEX_BUFFERS, useVertexBuffer); mIndexedDirtyBitsMask.set(DIRTY_BIT_VERTEX_BUFFERS, useVertexBuffer); + mCurrentPipeline = nullptr; + mGraphicsPipelineTransition.reset(); break; } case gl::State::DIRTY_BIT_TEXTURE_BINDINGS: @@ -1054,7 +1103,7 @@ { // Ensure that the RenderPass description is updated. invalidateCurrentPipeline(); - mGraphicsPipelineDesc->updateRenderPassDesc(renderPassDesc); + mGraphicsPipelineDesc->updateRenderPassDesc(&mGraphicsPipelineTransition, renderPassDesc); } angle::Result ContextVk::dispatchCompute(const gl::Context *context, @@ -1279,4 +1328,5 @@ static_cast<uint32_t>(offset)); return angle::Result::Continue; } + } // namespace rx
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.h b/src/libANGLE/renderer/vulkan/ContextVk.h index 453e5d2..ac350fd 100644 --- a/src/libANGLE/renderer/vulkan/ContextVk.h +++ b/src/libANGLE/renderer/vulkan/ContextVk.h
@@ -179,7 +179,8 @@ GLuint relativeOffset) { invalidateVertexAndIndexBuffers(); - mGraphicsPipelineDesc->updateVertexInput(static_cast<uint32_t>(attribIndex), stride, + mGraphicsPipelineDesc->updateVertexInput(&mGraphicsPipelineTransition, + static_cast<uint32_t>(attribIndex), stride, divisor, format, relativeOffset); } @@ -270,7 +271,6 @@ mDirtyBits.set(DIRTY_BIT_PIPELINE); mDirtyBits.set(DIRTY_BIT_VIEWPORT); mDirtyBits.set(DIRTY_BIT_SCISSOR); - mCurrentPipeline = nullptr; } void invalidateCurrentTextures(); @@ -291,12 +291,13 @@ angle::Result handleDirtyViewport(const gl::Context *context, vk::CommandBuffer *commandBuffer); angle::Result handleDirtyScissor(const gl::Context *context, vk::CommandBuffer *commandBuffer); - vk::PipelineAndSerial *mCurrentPipeline; + vk::PipelineHelper *mCurrentPipeline; gl::PrimitiveMode mCurrentDrawMode; // Keep a cached pipeline description structure that can be used to query the pipeline cache. // Kept in a pointer so allocations can be aligned, and structs can be portably packed. std::unique_ptr<vk::GraphicsPipelineDesc> mGraphicsPipelineDesc; + vk::GraphicsPipelineTransitionBits mGraphicsPipelineTransition; // The descriptor pools are externally sychronized, so cannot be accessed from different // threads simultaneously. Hence, we keep them in the ContextVk instead of the RendererVk.
diff --git a/src/libANGLE/renderer/vulkan/ProgramVk.h b/src/libANGLE/renderer/vulkan/ProgramVk.h index 5b6147f..70b6ad0 100644 --- a/src/libANGLE/renderer/vulkan/ProgramVk.h +++ b/src/libANGLE/renderer/vulkan/ProgramVk.h
@@ -123,7 +123,8 @@ gl::PrimitiveMode mode, const vk::GraphicsPipelineDesc &desc, const gl::AttributesMask &activeAttribLocations, - vk::PipelineAndSerial **pipelineOut) + const vk::GraphicsPipelineDesc **descPtrOut, + vk::PipelineHelper **pipelineOut) { vk::ShaderProgramHelper *shaderProgram; ANGLE_TRY(initShaders(contextVk, mode, &shaderProgram)); @@ -132,7 +133,7 @@ return shaderProgram->getGraphicsPipeline( contextVk, &renderer->getRenderPassCache(), renderer->getPipelineCache(), renderer->getCurrentQueueSerial(), mPipelineLayout.get(), desc, activeAttribLocations, - pipelineOut); + descPtrOut, pipelineOut); } private:
diff --git a/src/libANGLE/renderer/vulkan/README.md b/src/libANGLE/renderer/vulkan/README.md new file mode 100644 index 0000000..dba3eec --- /dev/null +++ b/src/libANGLE/renderer/vulkan/README.md
@@ -0,0 +1,65 @@ +# ANGLE: Vulkan Back-end + +ANGLE's Vulkan back-end implementation lives in this folder. + +[Vulkan](https://www.khronos.org/vulkan/) is an explicit graphics API. It has a lot in common with +other explicit APIs such as Microsoft's +[D3D12](https://docs.microsoft.com/en-us/windows/desktop/direct3d12/directx-12-programming-guide) +and Apple's [Metal](https://developer.apple.com/metal/). Compared to APIs like OpenGL or D3D11 +explicit APIs can offer a number of significant benefits: + + * Lower API call CPU overhead. + * A smaller API surface with more direct hardware control. + * Better support for multi-core programming. + * Vulkan in particular has open-source tooling and tests. + +## Back-end Design + +The [RendererVk](RendererVk.cpp) is a singleton. RendererVk owns shared global resources like the +[VkDevice](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkDevice.html), +[VkQueue](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkQueue.html), the +[Vulkan format tables](vk_format_utils.h) and [internal Vulkan shaders](shaders). The back-end +creates a new [ContextVk](ContextVk.cpp) instance to manage each allocated OpenGL Context. ContextVk +processes state changes and handles action commands like `glDrawArrays` and `glDrawElements`. + +### Fast OpenGL State Transitions + +Typical OpenGL programs issue a few small state change commands between draw call commands. We want +the typical app's use case to be as fast as possible so this leads to unique performance challenges. + +Vulkan in quite different from OpenGL because it requires a separate compiled +[VkPipeline](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkPipeline.html) +for each state vector. Compiling VkPipelines is multiple orders of magnitude slower than enabling or +disabling an OpenGL render state. To speed this up we use three levels of caching when transitioning +states in the Vulkan back-end. + +The first level is the driver's +[VkPipelineCache](https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkPipelineCache.html). The driver cache reduces pipeline recompilation time +significantly. But even cached pipeline recompilations are orders of manitude slower than OpenGL +state changes. + +The second level cache is an ANGLE-owned hash map from OpenGL state vectors to compiled pipelines. +See +[GraphicsPipelineCache](https://chromium.googlesource.com/angle/angle/+/225f08bf85a368f905362cdd1366e4795680452c/src/libANGLE/renderer/vulkan/vk_cache_utils.h#498) +in [vk_cache_utils.h](vk_cache_utils.h). ANGLE's +[GraphicsPipelineDesc](https://chromium.googlesource.com/angle/angle/+/225f08bf85a368f905362cdd1366e4795680452c/src/libANGLE/renderer/vulkan/vk_cache_utils.h#244) +class is a tightly packed 256-byte description of the current OpenGL rendering state. We +also use a [xxHash](https://github.com/Cyan4973/xxHash) for the fastest possible hash computation. +The hash map speeds up state changes considerably. But it is still significantly slower than OpenGL +implementations. + +To get best performance we use a transition table from each OpenGL state vector to neighbouring +state vectors. The transition table points from GraphicsPipelineCache entries directly to +neighbouring VkPipeline objects. When the application changes state the state change bits are +recorded into a compact bit mask that covers the GraphicsPipelineDesc state vector. Then on the next +draw call we scan the transition bit mask and compare the GraphicsPipelineDesc of the current state +vector and the state vector of the cached transition. With the hash map we compute a hash over the +entire state vector and then do a 256-byte `memcmp` to guard against hash collisions. With the +transition table we will only compare as many bytes as were changed in the transition bit mask. By +skipping the expensive hashing and `memcmp` we can get as good or faster performance than native +OpenGL drivers. + +Note that the current design of the transition table stores transitions in an unsorted list. If +applications map from one state to many this will slow down the transition time. This could be +improved in the future using a faster look up. For instance we could keep a sorted transition table +or use a small hash map for transitions.
diff --git a/src/libANGLE/renderer/vulkan/UtilsVk.cpp b/src/libANGLE/renderer/vulkan/UtilsVk.cpp index a59c2cf..7c2ba26 100644 --- a/src/libANGLE/renderer/vulkan/UtilsVk.cpp +++ b/src/libANGLE/renderer/vulkan/UtilsVk.cpp
@@ -325,25 +325,30 @@ Serial serial = renderer->getCurrentQueueSerial(); - vk::PipelineAndSerial *pipelineAndSerial; if (isCompute) { + vk::PipelineAndSerial *pipelineAndSerial; program->setShader(gl::ShaderType::Compute, fsCsShader); ANGLE_TRY(program->getComputePipeline(context, pipelineLayout.get(), &pipelineAndSerial)); + pipelineAndSerial->updateSerial(serial); + commandBuffer->bindPipeline(bindPoint, pipelineAndSerial->get()); } else { program->setShader(gl::ShaderType::Vertex, vsShader); program->setShader(gl::ShaderType::Fragment, fsCsShader); + // This value is not used but is passed to getGraphicsPipeline to avoid a nullptr check. + const vk::GraphicsPipelineDesc *descPtr; + vk::PipelineHelper *helper; + ANGLE_TRY(program->getGraphicsPipeline( context, &renderer->getRenderPassCache(), renderer->getPipelineCache(), serial, - pipelineLayout.get(), *pipelineDesc, gl::AttributesMask(), &pipelineAndSerial)); + pipelineLayout.get(), *pipelineDesc, gl::AttributesMask(), &descPtr, &helper)); + helper->updateSerial(serial); + commandBuffer->bindPipeline(bindPoint, helper->getPipeline()); } - commandBuffer->bindPipeline(bindPoint, pipelineAndSerial->get()); - pipelineAndSerial->updateSerial(serial); - if (descriptorSet != VK_NULL_HANDLE) { commandBuffer->bindDescriptorSets(bindPoint, pipelineLayout.get(), 0, 1, &descriptorSet, 0, @@ -619,8 +624,8 @@ vk::GraphicsPipelineDesc pipelineDesc; pipelineDesc.initDefaults(); - pipelineDesc.updateColorWriteMask(params.colorMaskFlags, *params.alphaMask); - pipelineDesc.updateRenderPassDesc(*params.renderPassDesc); + pipelineDesc.setColorWriteMask(params.colorMaskFlags, *params.alphaMask); + pipelineDesc.setRenderPassDesc(*params.renderPassDesc); vk::ShaderLibrary &shaderLibrary = renderer->getShaderLibrary(); vk::RefCounted<vk::ShaderAndSerial> *vertexShader = nullptr; @@ -707,7 +712,7 @@ vk::GraphicsPipelineDesc pipelineDesc; pipelineDesc.initDefaults(); - pipelineDesc.updateRenderPassDesc(renderPassDesc); + pipelineDesc.setRenderPassDesc(renderPassDesc); gl::Rectangle renderArea; renderArea.x = params.destOffset[0];
diff --git a/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp b/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp index ac0d7f7..f33829e 100644 --- a/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp +++ b/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp
@@ -158,10 +158,10 @@ uint8_t stencilReference, VkStencilOpState *stateOut) { - stateOut->failOp = static_cast<VkStencilOp>(packedState.failOp); - stateOut->passOp = static_cast<VkStencilOp>(packedState.passOp); - stateOut->depthFailOp = static_cast<VkStencilOp>(packedState.depthFailOp); - stateOut->compareOp = static_cast<VkCompareOp>(packedState.compareOp); + stateOut->failOp = static_cast<VkStencilOp>(packedState.ops.fail); + stateOut->passOp = static_cast<VkStencilOp>(packedState.ops.pass); + stateOut->depthFailOp = static_cast<VkStencilOp>(packedState.ops.depthFail); + stateOut->compareOp = static_cast<VkCompareOp>(packedState.ops.compare); stateOut->compareMask = packedState.compareMask; stateOut->writeMask = packedState.writeMask; stateOut->reference = stencilReference; @@ -281,6 +281,54 @@ #define SetBitField(lhs, rhs) \ lhs = static_cast<typename std::decay<decltype(lhs)>::type>(rhs); \ ASSERT(static_cast<decltype(rhs)>(lhs) == rhs); + +// When converting a byte number to a transition bit index we can shift instead of divide. +constexpr size_t kTransitionByteShift = Log2(kGraphicsPipelineDirtyBitBytes); + +// When converting a number of bits offset to a transition bit index we can also shift. +constexpr size_t kBitsPerByte = 8; +constexpr size_t kTransitionBitShift = kTransitionByteShift + Log2(kBitsPerByte); + +// Helper macro to map from a PipelineDesc struct and field to a dirty bit index. +// Uses the 'offsetof' macro to compute the offset 'Member' within the PipelineDesc +// and the offset of 'Field' within 'Member'. We can optimize the dirty bit setting by computing +// the shifted dirty bit at compile time instead of calling "set". +#define ANGLE_GET_TRANSITION_BIT(Member, Field) \ + ((offsetof(GraphicsPipelineDesc, Member) + offsetof(decltype(Member), Field)) >> \ + kTransitionByteShift) + +// Indexed dirty bits cannot be entirely computed at compile time since the index is passed to +// the update function. +#define ANGLE_GET_INDEXED_TRANSITION_BIT(Member, Field, Index, BitWidth) \ + (((BitWidth * Index) >> kTransitionBitShift) + ANGLE_GET_TRANSITION_BIT(Member, Field)) + +bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA, + GraphicsPipelineTransitionBits bitsB, + const GraphicsPipelineDesc &descA, + const GraphicsPipelineDesc &descB) +{ + if (bitsA != bitsB) + return false; + + // We currently mask over 4 bytes of the pipeline description with each dirty bit. + // We could consider using 8 bytes and a mask of 32 bits. This would make some parts + // of the code faster. The for loop below would scan over twice as many bits per iteration. + // But there may be more collisions between the same dirty bit masks leading to different + // transitions. Thus there may be additional cost when applications use many transitions. + // We should revisit this in the future and investigate using different bit widths. + static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch"); + + const uint32_t *rawPtrA = descA.getPtr<uint32_t>(); + const uint32_t *rawPtrB = descB.getPtr<uint32_t>(); + + for (size_t dirtyBit : bitsA) + { + if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit]) + return false; + } + + return true; +} } // anonymous namespace // RenderPassDesc implementation. @@ -391,53 +439,52 @@ void GraphicsPipelineDesc::initDefaults() { - mRasterizationAndMultisampleStateInfo.depthClampEnable = 0; - mRasterizationAndMultisampleStateInfo.rasterizationDiscardEnable = 0; - SetBitField(mRasterizationAndMultisampleStateInfo.polygonMode, VK_POLYGON_MODE_FILL); - SetBitField(mRasterizationAndMultisampleStateInfo.cullMode, VK_CULL_MODE_NONE); - SetBitField(mRasterizationAndMultisampleStateInfo.frontFace, VK_FRONT_FACE_CLOCKWISE); - mRasterizationAndMultisampleStateInfo.depthBiasEnable = 0; + mRasterizationAndMultisampleStateInfo.bits.depthClampEnable = 0; + mRasterizationAndMultisampleStateInfo.bits.rasterizationDiscardEnable = 0; + SetBitField(mRasterizationAndMultisampleStateInfo.bits.polygonMode, VK_POLYGON_MODE_FILL); + SetBitField(mRasterizationAndMultisampleStateInfo.bits.cullMode, VK_CULL_MODE_NONE); + SetBitField(mRasterizationAndMultisampleStateInfo.bits.frontFace, VK_FRONT_FACE_CLOCKWISE); + mRasterizationAndMultisampleStateInfo.bits.depthBiasEnable = 0; mRasterizationAndMultisampleStateInfo.depthBiasConstantFactor = 0.0f; mRasterizationAndMultisampleStateInfo.depthBiasClamp = 0.0f; mRasterizationAndMultisampleStateInfo.depthBiasSlopeFactor = 0.0f; mRasterizationAndMultisampleStateInfo.lineWidth = 1.0f; - mRasterizationAndMultisampleStateInfo.rasterizationSamples = 1; - mRasterizationAndMultisampleStateInfo.sampleShadingEnable = 0; - mRasterizationAndMultisampleStateInfo.minSampleShading = 0.0f; + mRasterizationAndMultisampleStateInfo.bits.rasterizationSamples = 1; + mRasterizationAndMultisampleStateInfo.bits.sampleShadingEnable = 0; + mRasterizationAndMultisampleStateInfo.minSampleShading = 0.0f; for (uint32_t &sampleMask : mRasterizationAndMultisampleStateInfo.sampleMask) { sampleMask = 0; } - mRasterizationAndMultisampleStateInfo.alphaToCoverageEnable = 0; - mRasterizationAndMultisampleStateInfo.alphaToOneEnable = 0; + mRasterizationAndMultisampleStateInfo.bits.alphaToCoverageEnable = 0; + mRasterizationAndMultisampleStateInfo.bits.alphaToOneEnable = 0; - mDepthStencilStateInfo.depthTestEnable = 0; - mDepthStencilStateInfo.depthWriteEnable = 1; + mDepthStencilStateInfo.enable.depthTest = 0; + mDepthStencilStateInfo.enable.depthWrite = 1; SetBitField(mDepthStencilStateInfo.depthCompareOp, VK_COMPARE_OP_LESS); - mDepthStencilStateInfo.depthBoundsTestEnable = 0; - mDepthStencilStateInfo.stencilTestEnable = 0; - mDepthStencilStateInfo.minDepthBounds = 0.0f; - mDepthStencilStateInfo.maxDepthBounds = 0.0f; - SetBitField(mDepthStencilStateInfo.front.failOp, VK_STENCIL_OP_KEEP); - SetBitField(mDepthStencilStateInfo.front.passOp, VK_STENCIL_OP_KEEP); - SetBitField(mDepthStencilStateInfo.front.depthFailOp, VK_STENCIL_OP_KEEP); - SetBitField(mDepthStencilStateInfo.front.compareOp, VK_COMPARE_OP_ALWAYS); + mDepthStencilStateInfo.enable.depthBoundsTest = 0; + mDepthStencilStateInfo.enable.stencilTest = 0; + mDepthStencilStateInfo.minDepthBounds = 0.0f; + mDepthStencilStateInfo.maxDepthBounds = 0.0f; + SetBitField(mDepthStencilStateInfo.front.ops.fail, VK_STENCIL_OP_KEEP); + SetBitField(mDepthStencilStateInfo.front.ops.pass, VK_STENCIL_OP_KEEP); + SetBitField(mDepthStencilStateInfo.front.ops.depthFail, VK_STENCIL_OP_KEEP); + SetBitField(mDepthStencilStateInfo.front.ops.compare, VK_COMPARE_OP_ALWAYS); SetBitField(mDepthStencilStateInfo.front.compareMask, 0xFF); SetBitField(mDepthStencilStateInfo.front.writeMask, 0xFF); mDepthStencilStateInfo.frontStencilReference = 0; - SetBitField(mDepthStencilStateInfo.back.failOp, VK_STENCIL_OP_KEEP); - SetBitField(mDepthStencilStateInfo.back.passOp, VK_STENCIL_OP_KEEP); - SetBitField(mDepthStencilStateInfo.back.depthFailOp, VK_STENCIL_OP_KEEP); - SetBitField(mDepthStencilStateInfo.back.compareOp, VK_COMPARE_OP_ALWAYS); + SetBitField(mDepthStencilStateInfo.back.ops.fail, VK_STENCIL_OP_KEEP); + SetBitField(mDepthStencilStateInfo.back.ops.pass, VK_STENCIL_OP_KEEP); + SetBitField(mDepthStencilStateInfo.back.ops.depthFail, VK_STENCIL_OP_KEEP); + SetBitField(mDepthStencilStateInfo.back.ops.compare, VK_COMPARE_OP_ALWAYS); SetBitField(mDepthStencilStateInfo.back.compareMask, 0xFF); SetBitField(mDepthStencilStateInfo.back.writeMask, 0xFF); mDepthStencilStateInfo.backStencilReference = 0; - PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend = - mInputAssembltyAndColorBlendStateInfo; - inputAndBlend.logicOpEnable = 0; - inputAndBlend.logicOp = static_cast<uint32_t>(VK_LOGIC_OP_CLEAR); + PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend = mInputAssemblyAndColorBlendStateInfo; + inputAndBlend.logic.opEnable = 0; + inputAndBlend.logic.op = static_cast<uint32_t>(VK_LOGIC_OP_CLEAR); inputAndBlend.blendEnableMask = 0; inputAndBlend.blendConstants[0] = 0.0f; inputAndBlend.blendConstants[1] = 0.0f; @@ -464,8 +511,8 @@ &inputAndBlend.attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS], blendAttachmentState); - inputAndBlend.topology = static_cast<uint16_t>(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); - inputAndBlend.primitiveRestartEnable = 0; + inputAndBlend.primitive.topology = static_cast<uint16_t>(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); + inputAndBlend.primitive.restartEnable = 0; } angle::Result GraphicsPipelineDesc::initializePipeline( @@ -550,9 +597,9 @@ inputAssemblyState.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; inputAssemblyState.flags = 0; inputAssemblyState.topology = - static_cast<VkPrimitiveTopology>(mInputAssembltyAndColorBlendStateInfo.topology); + static_cast<VkPrimitiveTopology>(mInputAssemblyAndColorBlendStateInfo.primitive.topology); inputAssemblyState.primitiveRestartEnable = - static_cast<VkBool32>(mInputAssembltyAndColorBlendStateInfo.primitiveRestartEnable); + static_cast<VkBool32>(mInputAssemblyAndColorBlendStateInfo.primitive.restartEnable); // Set initial viewport and scissor state. @@ -569,13 +616,13 @@ // Rasterizer state. rasterState.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; rasterState.flags = 0; - rasterState.depthClampEnable = static_cast<VkBool32>(rasterAndMS.depthClampEnable); + rasterState.depthClampEnable = static_cast<VkBool32>(rasterAndMS.bits.depthClampEnable); rasterState.rasterizerDiscardEnable = - static_cast<VkBool32>(rasterAndMS.rasterizationDiscardEnable); - rasterState.polygonMode = static_cast<VkPolygonMode>(rasterAndMS.polygonMode); - rasterState.cullMode = static_cast<VkCullModeFlags>(rasterAndMS.cullMode); - rasterState.frontFace = static_cast<VkFrontFace>(rasterAndMS.frontFace); - rasterState.depthBiasEnable = static_cast<VkBool32>(rasterAndMS.depthBiasEnable); + static_cast<VkBool32>(rasterAndMS.bits.rasterizationDiscardEnable); + rasterState.polygonMode = static_cast<VkPolygonMode>(rasterAndMS.bits.polygonMode); + rasterState.cullMode = static_cast<VkCullModeFlags>(rasterAndMS.bits.cullMode); + rasterState.frontFace = static_cast<VkFrontFace>(rasterAndMS.bits.frontFace); + rasterState.depthBiasEnable = static_cast<VkBool32>(rasterAndMS.bits.depthBiasEnable); rasterState.depthBiasConstantFactor = rasterAndMS.depthBiasConstantFactor; rasterState.depthBiasClamp = rasterAndMS.depthBiasClamp; rasterState.depthBiasSlopeFactor = rasterAndMS.depthBiasSlopeFactor; @@ -584,28 +631,30 @@ // Multisample state. multisampleState.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; multisampleState.flags = 0; - multisampleState.rasterizationSamples = gl_vk::GetSamples(rasterAndMS.rasterizationSamples); - multisampleState.sampleShadingEnable = static_cast<VkBool32>(rasterAndMS.sampleShadingEnable); - multisampleState.minSampleShading = rasterAndMS.minSampleShading; + multisampleState.rasterizationSamples = + gl_vk::GetSamples(rasterAndMS.bits.rasterizationSamples); + multisampleState.sampleShadingEnable = + static_cast<VkBool32>(rasterAndMS.bits.sampleShadingEnable); + multisampleState.minSampleShading = rasterAndMS.minSampleShading; // TODO(jmadill): sample masks multisampleState.pSampleMask = nullptr; multisampleState.alphaToCoverageEnable = - static_cast<VkBool32>(rasterAndMS.alphaToCoverageEnable); - multisampleState.alphaToOneEnable = static_cast<VkBool32>(rasterAndMS.alphaToOneEnable); + static_cast<VkBool32>(rasterAndMS.bits.alphaToCoverageEnable); + multisampleState.alphaToOneEnable = static_cast<VkBool32>(rasterAndMS.bits.alphaToOneEnable); // Depth/stencil state. depthStencilState.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; depthStencilState.flags = 0; depthStencilState.depthTestEnable = - static_cast<VkBool32>(mDepthStencilStateInfo.depthTestEnable); + static_cast<VkBool32>(mDepthStencilStateInfo.enable.depthTest); depthStencilState.depthWriteEnable = - static_cast<VkBool32>(mDepthStencilStateInfo.depthWriteEnable); + static_cast<VkBool32>(mDepthStencilStateInfo.enable.depthWrite); depthStencilState.depthCompareOp = static_cast<VkCompareOp>(mDepthStencilStateInfo.depthCompareOp); depthStencilState.depthBoundsTestEnable = - static_cast<VkBool32>(mDepthStencilStateInfo.depthBoundsTestEnable); + static_cast<VkBool32>(mDepthStencilStateInfo.enable.depthBoundsTest); depthStencilState.stencilTestEnable = - static_cast<VkBool32>(mDepthStencilStateInfo.stencilTestEnable); + static_cast<VkBool32>(mDepthStencilStateInfo.enable.stencilTest); UnpackStencilState(mDepthStencilStateInfo.front, mDepthStencilStateInfo.frontStencilReference, &depthStencilState.front); UnpackStencilState(mDepthStencilStateInfo.back, mDepthStencilStateInfo.backStencilReference, @@ -614,12 +663,12 @@ depthStencilState.maxDepthBounds = mDepthStencilStateInfo.maxDepthBounds; const PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend = - mInputAssembltyAndColorBlendStateInfo; + mInputAssemblyAndColorBlendStateInfo; blendState.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; blendState.flags = 0; - blendState.logicOpEnable = static_cast<VkBool32>(inputAndBlend.logicOpEnable); - blendState.logicOp = static_cast<VkLogicOp>(inputAndBlend.logicOp); + blendState.logicOpEnable = static_cast<VkBool32>(inputAndBlend.logic.opEnable); + blendState.logicOp = static_cast<VkLogicOp>(inputAndBlend.logic.op); blendState.attachmentCount = mRenderPassDesc.colorAttachmentCount(); blendState.pAttachments = blendAttachmentState.data(); @@ -672,7 +721,8 @@ return angle::Result::Continue; } -void GraphicsPipelineDesc::updateVertexInput(uint32_t attribIndex, +void GraphicsPipelineDesc::updateVertexInput(GraphicsPipelineTransitionBits *transition, + uint32_t attribIndex, GLuint stride, GLuint divisor, VkFormat format, @@ -682,90 +732,124 @@ bindingDesc.stride = static_cast<uint16_t>(stride); bindingDesc.inputRate = static_cast<uint16_t>(divisor > 0 ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX); + constexpr size_t kBindingBaseBit = + offsetof(GraphicsPipelineDesc, mVertexInputBindings) >> kTransitionByteShift; + transition->set(kBindingBaseBit + attribIndex); + static_assert(kVertexInputBindingSize == 4, "Size mismatch"); - ASSERT(format <= std::numeric_limits<uint16_t>::max()); if (format == VK_FORMAT_UNDEFINED) { UNIMPLEMENTED(); } - mVertexInputAttribs.formats[attribIndex] = static_cast<uint8_t>(format); - mVertexInputAttribs.offsets[attribIndex] = static_cast<uint16_t>(relativeOffset); + SetBitField(mVertexInputAttribs.formats[attribIndex], format); + SetBitField(mVertexInputAttribs.offsets[attribIndex], relativeOffset); + transition->set(ANGLE_GET_INDEXED_TRANSITION_BIT(mVertexInputAttribs, formats, attribIndex, 8)); + transition->set( + ANGLE_GET_INDEXED_TRANSITION_BIT(mVertexInputAttribs, offsets, attribIndex, 16)); } -void GraphicsPipelineDesc::updateTopology(gl::PrimitiveMode drawMode) +void GraphicsPipelineDesc::updateTopology(GraphicsPipelineTransitionBits *transition, + gl::PrimitiveMode drawMode) { - mInputAssembltyAndColorBlendStateInfo.topology = - static_cast<uint32_t>(gl_vk::GetPrimitiveTopology(drawMode)); + VkPrimitiveTopology vkTopology = gl_vk::GetPrimitiveTopology(drawMode); + SetBitField(mInputAssemblyAndColorBlendStateInfo.primitive.topology, vkTopology); + + transition->set(ANGLE_GET_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo, primitive)); } -void GraphicsPipelineDesc::updateCullMode(const gl::RasterizerState &rasterState) +void GraphicsPipelineDesc::updateCullMode(GraphicsPipelineTransitionBits *transition, + const gl::RasterizerState &rasterState) { - mRasterizationAndMultisampleStateInfo.cullMode = + mRasterizationAndMultisampleStateInfo.bits.cullMode = static_cast<uint16_t>(gl_vk::GetCullMode(rasterState)); + transition->set(ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, bits)); } -void GraphicsPipelineDesc::updateFrontFace(const gl::RasterizerState &rasterState, +void GraphicsPipelineDesc::updateFrontFace(GraphicsPipelineTransitionBits *transition, + const gl::RasterizerState &rasterState, bool invertFrontFace) { - mRasterizationAndMultisampleStateInfo.frontFace = + mRasterizationAndMultisampleStateInfo.bits.frontFace = static_cast<uint16_t>(gl_vk::GetFrontFace(rasterState.frontFace, invertFrontFace)); + transition->set(ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, bits)); } -void GraphicsPipelineDesc::updateLineWidth(float lineWidth) +void GraphicsPipelineDesc::updateLineWidth(GraphicsPipelineTransitionBits *transition, + float lineWidth) { mRasterizationAndMultisampleStateInfo.lineWidth = lineWidth; + transition->set(ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, lineWidth)); } -const RenderPassDesc &GraphicsPipelineDesc::getRenderPassDesc() const +void GraphicsPipelineDesc::updateBlendColor(GraphicsPipelineTransitionBits *transition, + const gl::ColorF &color) { - return mRenderPassDesc; + mInputAssemblyAndColorBlendStateInfo.blendConstants[0] = color.red; + mInputAssemblyAndColorBlendStateInfo.blendConstants[1] = color.green; + mInputAssemblyAndColorBlendStateInfo.blendConstants[2] = color.blue; + mInputAssemblyAndColorBlendStateInfo.blendConstants[3] = color.alpha; + constexpr size_t kSize = sizeof(mInputAssemblyAndColorBlendStateInfo.blendConstants[0]) * 8; + + for (int index = 0; index < 4; ++index) + { + const size_t kBit = ANGLE_GET_INDEXED_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo, + blendConstants, index, kSize); + transition->set(kBit); + } } -void GraphicsPipelineDesc::updateBlendColor(const gl::ColorF &color) -{ - mInputAssembltyAndColorBlendStateInfo.blendConstants[0] = color.red; - mInputAssembltyAndColorBlendStateInfo.blendConstants[1] = color.green; - mInputAssembltyAndColorBlendStateInfo.blendConstants[2] = color.blue; - mInputAssembltyAndColorBlendStateInfo.blendConstants[3] = color.alpha; -} - -void GraphicsPipelineDesc::updateBlendEnabled(bool isBlendEnabled) +void GraphicsPipelineDesc::updateBlendEnabled(GraphicsPipelineTransitionBits *transition, + bool isBlendEnabled) { gl::DrawBufferMask blendEnabled; if (isBlendEnabled) blendEnabled.set(); - mInputAssembltyAndColorBlendStateInfo.blendEnableMask = + mInputAssemblyAndColorBlendStateInfo.blendEnableMask = static_cast<uint8_t>(blendEnabled.bits()); + transition->set( + ANGLE_GET_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo, blendEnableMask)); } -void GraphicsPipelineDesc::updateBlendEquations(const gl::BlendState &blendState) +void GraphicsPipelineDesc::updateBlendEquations(GraphicsPipelineTransitionBits *transition, + const gl::BlendState &blendState) { - for (PackedColorBlendAttachmentState &blendAttachmentState : - mInputAssembltyAndColorBlendStateInfo.attachments) + constexpr size_t kSize = sizeof(PackedColorBlendAttachmentState) * 8; + + for (size_t attachmentIndex = 0; attachmentIndex < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS; + ++attachmentIndex) { + PackedColorBlendAttachmentState &blendAttachmentState = + mInputAssemblyAndColorBlendStateInfo.attachments[attachmentIndex]; blendAttachmentState.colorBlendOp = PackGLBlendOp(blendState.blendEquationRGB); blendAttachmentState.alphaBlendOp = PackGLBlendOp(blendState.blendEquationAlpha); + transition->set(ANGLE_GET_INDEXED_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo, + attachments, attachmentIndex, kSize)); } } -void GraphicsPipelineDesc::updateBlendFuncs(const gl::BlendState &blendState) +void GraphicsPipelineDesc::updateBlendFuncs(GraphicsPipelineTransitionBits *transition, + const gl::BlendState &blendState) { - for (PackedColorBlendAttachmentState &blendAttachmentState : - mInputAssembltyAndColorBlendStateInfo.attachments) + constexpr size_t kSize = sizeof(PackedColorBlendAttachmentState) * 8; + for (size_t attachmentIndex = 0; attachmentIndex < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS; + ++attachmentIndex) { + PackedColorBlendAttachmentState &blendAttachmentState = + mInputAssemblyAndColorBlendStateInfo.attachments[attachmentIndex]; blendAttachmentState.srcColorBlendFactor = PackGLBlendFactor(blendState.sourceBlendRGB); blendAttachmentState.dstColorBlendFactor = PackGLBlendFactor(blendState.destBlendRGB); blendAttachmentState.srcAlphaBlendFactor = PackGLBlendFactor(blendState.sourceBlendAlpha); blendAttachmentState.dstAlphaBlendFactor = PackGLBlendFactor(blendState.destBlendAlpha); + transition->set(ANGLE_GET_INDEXED_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo, + attachments, attachmentIndex, kSize)); } } -void GraphicsPipelineDesc::updateColorWriteMask(VkColorComponentFlags colorComponentFlags, - const gl::DrawBufferMask &alphaMask) +void GraphicsPipelineDesc::setColorWriteMask(VkColorComponentFlags colorComponentFlags, + const gl::DrawBufferMask &alphaMask) { - PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend = - mInputAssembltyAndColorBlendStateInfo; + PackedInputAssemblyAndColorBlendStateInfo &inputAndBlend = mInputAssemblyAndColorBlendStateInfo; uint8_t colorMask = static_cast<uint8_t>(colorComponentFlags); for (size_t colorIndex = 0; colorIndex < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS; colorIndex++) @@ -775,105 +859,163 @@ } } -void GraphicsPipelineDesc::updateDepthTestEnabled(const gl::DepthStencilState &depthStencilState, +void GraphicsPipelineDesc::updateColorWriteMask(GraphicsPipelineTransitionBits *transition, + VkColorComponentFlags colorComponentFlags, + const gl::DrawBufferMask &alphaMask) +{ + setColorWriteMask(colorComponentFlags, alphaMask); + + for (size_t colorIndex = 0; colorIndex < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS; colorIndex++) + { + transition->set(ANGLE_GET_INDEXED_TRANSITION_BIT(mInputAssemblyAndColorBlendStateInfo, + colorWriteMaskBits, colorIndex, 4)); + } +} + +void GraphicsPipelineDesc::updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState, const gl::Framebuffer *drawFramebuffer) { // Only enable the depth test if the draw framebuffer has a depth buffer. It's possible that // we're emulating a stencil-only buffer with a depth-stencil buffer - mDepthStencilStateInfo.depthTestEnable = + mDepthStencilStateInfo.enable.depthTest = static_cast<uint8_t>(depthStencilState.depthTest && drawFramebuffer->hasDepth()); + transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, enable)); } -void GraphicsPipelineDesc::updateDepthFunc(const gl::DepthStencilState &depthStencilState) +void GraphicsPipelineDesc::updateDepthFunc(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState) { mDepthStencilStateInfo.depthCompareOp = PackGLCompareFunc(depthStencilState.depthFunc); + transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, depthCompareOp)); } -void GraphicsPipelineDesc::updateDepthWriteEnabled(const gl::DepthStencilState &depthStencilState, +void GraphicsPipelineDesc::updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState, const gl::Framebuffer *drawFramebuffer) { // Don't write to depth buffers that should not exist - mDepthStencilStateInfo.depthWriteEnable = + mDepthStencilStateInfo.enable.depthWrite = static_cast<uint8_t>(drawFramebuffer->hasDepth() ? depthStencilState.depthMask : 0); + transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, enable)); } -void GraphicsPipelineDesc::updateStencilTestEnabled(const gl::DepthStencilState &depthStencilState, +void GraphicsPipelineDesc::updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState, const gl::Framebuffer *drawFramebuffer) { // Only enable the stencil test if the draw framebuffer has a stencil buffer. It's possible // that we're emulating a depth-only buffer with a depth-stencil buffer - mDepthStencilStateInfo.stencilTestEnable = + mDepthStencilStateInfo.enable.stencilTest = static_cast<uint8_t>(depthStencilState.stencilTest && drawFramebuffer->hasStencil()); + transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, enable)); } -void GraphicsPipelineDesc::updateStencilFrontFuncs(GLint ref, +void GraphicsPipelineDesc::updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition, + GLint ref, const gl::DepthStencilState &depthStencilState) { mDepthStencilStateInfo.frontStencilReference = static_cast<uint8_t>(ref); - mDepthStencilStateInfo.front.compareOp = PackGLCompareFunc(depthStencilState.stencilFunc); + mDepthStencilStateInfo.front.ops.compare = PackGLCompareFunc(depthStencilState.stencilFunc); mDepthStencilStateInfo.front.compareMask = static_cast<uint8_t>(depthStencilState.stencilMask); + transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, front)); + transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, frontStencilReference)); } -void GraphicsPipelineDesc::updateStencilBackFuncs(GLint ref, +void GraphicsPipelineDesc::updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition, + GLint ref, const gl::DepthStencilState &depthStencilState) { mDepthStencilStateInfo.backStencilReference = static_cast<uint8_t>(ref); - mDepthStencilStateInfo.back.compareOp = PackGLCompareFunc(depthStencilState.stencilBackFunc); + mDepthStencilStateInfo.back.ops.compare = PackGLCompareFunc(depthStencilState.stencilBackFunc); mDepthStencilStateInfo.back.compareMask = static_cast<uint8_t>(depthStencilState.stencilBackMask); + transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, back)); + transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, backStencilReference)); } -void GraphicsPipelineDesc::updateStencilFrontOps(const gl::DepthStencilState &depthStencilState) +void GraphicsPipelineDesc::updateStencilFrontOps(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState) { - mDepthStencilStateInfo.front.passOp = PackGLStencilOp(depthStencilState.stencilPassDepthPass); - mDepthStencilStateInfo.front.failOp = PackGLStencilOp(depthStencilState.stencilFail); - mDepthStencilStateInfo.front.depthFailOp = + mDepthStencilStateInfo.front.ops.pass = PackGLStencilOp(depthStencilState.stencilPassDepthPass); + mDepthStencilStateInfo.front.ops.fail = PackGLStencilOp(depthStencilState.stencilFail); + mDepthStencilStateInfo.front.ops.depthFail = PackGLStencilOp(depthStencilState.stencilPassDepthFail); + transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, front)); } -void GraphicsPipelineDesc::updateStencilBackOps(const gl::DepthStencilState &depthStencilState) +void GraphicsPipelineDesc::updateStencilBackOps(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState) { - mDepthStencilStateInfo.back.passOp = + mDepthStencilStateInfo.back.ops.pass = PackGLStencilOp(depthStencilState.stencilBackPassDepthPass); - mDepthStencilStateInfo.back.failOp = PackGLStencilOp(depthStencilState.stencilBackFail); - mDepthStencilStateInfo.back.depthFailOp = + mDepthStencilStateInfo.back.ops.fail = PackGLStencilOp(depthStencilState.stencilBackFail); + mDepthStencilStateInfo.back.ops.depthFail = PackGLStencilOp(depthStencilState.stencilBackPassDepthFail); + transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, back)); } void GraphicsPipelineDesc::updateStencilFrontWriteMask( + GraphicsPipelineTransitionBits *transition, const gl::DepthStencilState &depthStencilState, const gl::Framebuffer *drawFramebuffer) { // Don't write to stencil buffers that should not exist mDepthStencilStateInfo.front.writeMask = static_cast<uint8_t>( drawFramebuffer->hasStencil() ? depthStencilState.stencilWritemask : 0); + transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, front)); } void GraphicsPipelineDesc::updateStencilBackWriteMask( + GraphicsPipelineTransitionBits *transition, const gl::DepthStencilState &depthStencilState, const gl::Framebuffer *drawFramebuffer) { // Don't write to stencil buffers that should not exist mDepthStencilStateInfo.back.writeMask = static_cast<uint8_t>( drawFramebuffer->hasStencil() ? depthStencilState.stencilBackWritemask : 0); + transition->set(ANGLE_GET_TRANSITION_BIT(mDepthStencilStateInfo, back)); } -void GraphicsPipelineDesc::updatePolygonOffsetFillEnabled(bool enabled) +void GraphicsPipelineDesc::updatePolygonOffsetFillEnabled( + GraphicsPipelineTransitionBits *transition, + bool enabled) { - mRasterizationAndMultisampleStateInfo.depthBiasEnable = enabled; + mRasterizationAndMultisampleStateInfo.bits.depthBiasEnable = enabled; + transition->set(ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, bits)); } -void GraphicsPipelineDesc::updatePolygonOffset(const gl::RasterizerState &rasterState) +void GraphicsPipelineDesc::updatePolygonOffset(GraphicsPipelineTransitionBits *transition, + const gl::RasterizerState &rasterState) { mRasterizationAndMultisampleStateInfo.depthBiasSlopeFactor = rasterState.polygonOffsetFactor; mRasterizationAndMultisampleStateInfo.depthBiasConstantFactor = rasterState.polygonOffsetUnits; + transition->set( + ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, depthBiasSlopeFactor)); + transition->set( + ANGLE_GET_TRANSITION_BIT(mRasterizationAndMultisampleStateInfo, depthBiasConstantFactor)); } -void GraphicsPipelineDesc::updateRenderPassDesc(const RenderPassDesc &renderPassDesc) +void GraphicsPipelineDesc::setRenderPassDesc(const RenderPassDesc &renderPassDesc) { mRenderPassDesc = renderPassDesc; } +void GraphicsPipelineDesc::updateRenderPassDesc(GraphicsPipelineTransitionBits *transition, + const RenderPassDesc &renderPassDesc) +{ + setRenderPassDesc(renderPassDesc); + + // The RenderPass is a special case where it spans multiple bits but has no member. + constexpr size_t kFirstBit = + offsetof(GraphicsPipelineDesc, mRenderPassDesc) >> kTransitionByteShift; + constexpr size_t kBitCount = kRenderPassDescSize >> kTransitionByteShift; + for (size_t bit = 0; bit < kBitCount; ++bit) + { + transition->set(kFirstBit + bit); + } +} + // AttachmentOpsArray implementation. AttachmentOpsArray::AttachmentOpsArray() { @@ -1026,6 +1168,41 @@ { return mPushConstantRanges; } + +// PipelineHelper implementation. +PipelineHelper::PipelineHelper() = default; + +PipelineHelper::~PipelineHelper() = default; + +void PipelineHelper::destroy(VkDevice device) +{ + mPipeline.destroy(device); +} + +bool PipelineHelper::findTransition(GraphicsPipelineTransitionBits bits, + const GraphicsPipelineDesc &desc, + PipelineHelper **pipelineOut) const +{ + // Search could be improved using sorting or hashing. + for (const GraphicsPipelineTransition &transition : mTransitions) + { + if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc)) + { + *pipelineOut = transition.target; + return true; + } + } + + return false; +} + +void PipelineHelper::addTransition(GraphicsPipelineTransitionBits bits, + const GraphicsPipelineDesc *desc, + PipelineHelper *pipeline) +{ + GraphicsPipelineTransition transition = {bits, desc, pipeline}; + mTransitions.push_back(transition); +} } // namespace vk // RenderPassCache implementation. @@ -1129,8 +1306,8 @@ { for (auto &item : mPayload) { - vk::PipelineAndSerial &pipeline = item.second; - pipeline.get().destroy(device); + vk::PipelineHelper &pipeline = item.second; + pipeline.destroy(device); } mPayload.clear(); @@ -1140,8 +1317,8 @@ { for (auto &item : mPayload) { - vk::PipelineAndSerial &pipeline = item.second; - renderer->releaseObject(pipeline.getSerial(), &pipeline.get()); + vk::PipelineHelper &pipeline = item.second; + renderer->releaseObject(pipeline.getSerial(), &pipeline.getPipeline()); } mPayload.clear(); @@ -1156,7 +1333,8 @@ const vk::ShaderModule &vertexModule, const vk::ShaderModule &fragmentModule, const vk::GraphicsPipelineDesc &desc, - vk::PipelineAndSerial **pipelineOut) + const vk::GraphicsPipelineDesc **descPtrOut, + vk::PipelineHelper **pipelineOut) { vk::Pipeline newPipeline; @@ -1169,9 +1347,9 @@ } // The Serial will be updated outside of this query. - auto insertedItem = - mPayload.emplace(desc, vk::PipelineAndSerial(std::move(newPipeline), Serial())); - *pipelineOut = &insertedItem.first->second; + auto insertedItem = mPayload.emplace(desc, std::move(newPipeline)); + *descPtrOut = &insertedItem.first->first; + *pipelineOut = &insertedItem.first->second; return angle::Result::Continue; } @@ -1184,7 +1362,7 @@ return; } - mPayload.emplace(desc, vk::PipelineAndSerial(std::move(pipeline), Serial())); + mPayload.emplace(desc, std::move(pipeline)); } // DescriptorSetLayoutCache implementation.
diff --git a/src/libANGLE/renderer/vulkan/vk_cache_utils.h b/src/libANGLE/renderer/vulkan/vk_cache_utils.h index 110268f..5c497a0 100644 --- a/src/libANGLE/renderer/vulkan/vk_cache_utils.h +++ b/src/libANGLE/renderer/vulkan/vk_cache_utils.h
@@ -124,15 +124,6 @@ static_assert(sizeof(AttachmentOpsArray) == 80, "Size check failed"); -struct PackedShaderStageInfo final -{ - uint32_t stage; - uint32_t moduleSerial; - // TODO(jmadill): Do we want specialization constants? -}; - -static_assert(sizeof(PackedShaderStageInfo) == 8, "Size check failed"); - struct PackedVertexInputBindingDesc final { // Although techncially stride can be any value in ES 2.0, in practice supporting stride @@ -142,11 +133,22 @@ uint16_t inputRate; }; -static_assert(sizeof(PackedVertexInputBindingDesc) == 4, "Size check failed"); +constexpr size_t kVertexInputBindingSize = sizeof(PackedVertexInputBindingDesc); +static_assert(kVertexInputBindingSize == 4, "Size check failed"); -struct PackedRasterizationAndMultisampleStateInfo final +using VertexInputBindings = gl::AttribArray<PackedVertexInputBindingDesc>; +constexpr size_t kVertexInputBindingsSize = sizeof(VertexInputBindings); + +struct VertexInputAttributes final { - // Padded to ensure there's no gaps in this structure or those that use it. + uint8_t formats[gl::MAX_VERTEX_ATTRIBS]; + uint16_t offsets[gl::MAX_VERTEX_ATTRIBS]; // can only take 11 bits on NV +}; + +constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes); + +struct RasterizationStateBits final +{ uint32_t depthClampEnable : 4; uint32_t rasterizationDiscardEnable : 4; uint32_t polygonMode : 4; @@ -157,42 +159,66 @@ uint32_t sampleShadingEnable : 1; uint32_t alphaToCoverageEnable : 1; uint32_t alphaToOneEnable : 2; +}; + +constexpr size_t kRasterizationStateBitsSize = sizeof(RasterizationStateBits); +static_assert(kRasterizationStateBitsSize == 4, "Size check failed"); + +struct PackedRasterizationAndMultisampleStateInfo final +{ + RasterizationStateBits bits; + // Padded to ensure there's no gaps in this structure or those that use it. float minSampleShading; uint32_t sampleMask[gl::MAX_SAMPLE_MASK_WORDS]; - float depthBiasConstantFactor; // Note: depth bias clamp is only exposed in a 3.1 extension, but left here for completeness. float depthBiasClamp; + float depthBiasConstantFactor; float depthBiasSlopeFactor; float lineWidth; }; -static constexpr size_t kPackedRasterizationAndMultisampleStateSize = +constexpr size_t kPackedRasterizationAndMultisampleStateSize = sizeof(PackedRasterizationAndMultisampleStateInfo); static_assert(kPackedRasterizationAndMultisampleStateSize == 32, "Size check failed"); +struct StencilOps final +{ + uint8_t fail : 4; + uint8_t pass : 4; + uint8_t depthFail : 4; + uint8_t compare : 4; +}; + +constexpr size_t kStencilOpsSize = sizeof(StencilOps); +static_assert(kStencilOpsSize == 2, "Size check failed"); + struct PackedStencilOpState final { - uint8_t failOp : 4; - uint8_t passOp : 4; - uint8_t depthFailOp : 4; - uint8_t compareOp : 4; + StencilOps ops; uint8_t compareMask; uint8_t writeMask; }; -static constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState); -static_assert(sizeof(PackedStencilOpState) == 4, "Size check failed"); +constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState); +static_assert(kPackedStencilOpSize == 4, "Size check failed"); + +struct DepthStencilEnableFlags final +{ + uint8_t depthTest : 2; // these only need one bit each. the extra is used as padding. + uint8_t depthWrite : 2; + uint8_t depthBoundsTest : 2; + uint8_t stencilTest : 2; +}; + +constexpr size_t kDepthStencilEnableFlagsSize = sizeof(DepthStencilEnableFlags); +static_assert(kDepthStencilEnableFlagsSize == 1, "Size check failed"); struct PackedDepthStencilStateInfo final { - uint8_t depthTestEnable : 1; - uint8_t depthWriteEnable : 1; - uint8_t depthCompareOp : 4; - uint8_t depthBoundsTestEnable : 1; - uint8_t stencilTestEnable : 1; + DepthStencilEnableFlags enable; uint8_t frontStencilReference; uint8_t backStencilReference; - uint8_t padding; + uint8_t depthCompareOp; // only needs 4 bits. extra used as padding. float minDepthBounds; float maxDepthBounds; PackedStencilOpState front; @@ -202,6 +228,15 @@ constexpr size_t kPackedDepthStencilStateSize = sizeof(PackedDepthStencilStateInfo); static_assert(kPackedDepthStencilStateSize == 20, "Size check failed"); +struct LogicOpState final +{ + uint8_t opEnable : 1; + uint8_t op : 7; +}; + +constexpr size_t kLogicOpStateSize = sizeof(LogicOpState); +static_assert(kLogicOpStateSize == 1, "Size check failed"); + struct PackedColorBlendAttachmentState final { uint16_t srcColorBlendFactor : 5; @@ -212,35 +247,50 @@ uint16_t alphaBlendOp : 6; }; -static_assert(sizeof(PackedColorBlendAttachmentState) == 4, "Size check failed"); +constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState); +static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed"); + +struct PrimitiveState final +{ + uint16_t topology : 15; + uint16_t restartEnable : 1; +}; + +constexpr size_t kPrimitiveStateSize = sizeof(PrimitiveState); +static_assert(kPrimitiveStateSize == 2, "Size check failed"); struct PackedInputAssemblyAndColorBlendStateInfo final { - uint8_t logicOpEnable : 1; - uint8_t logicOp : 7; - uint8_t blendEnableMask; uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2]; PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS]; - uint16_t topology : 15; - uint16_t primitiveRestartEnable : 1; float blendConstants[4]; + LogicOpState logic; + uint8_t blendEnableMask; + PrimitiveState primitive; }; constexpr size_t kPackedInputAssemblyAndColorBlendStateSize = sizeof(PackedInputAssemblyAndColorBlendStateInfo); static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed"); -using VertexInputBindings = gl::AttribArray<PackedVertexInputBindingDesc>; +constexpr size_t kGraphicsPipelineDescSumOfSizes = + kVertexInputBindingsSize + kVertexInputAttributesSize + + kPackedInputAssemblyAndColorBlendStateSize + kPackedRasterizationAndMultisampleStateSize + + kPackedDepthStencilStateSize + kRenderPassDescSize; -struct VertexInputAttributes final -{ - uint8_t formats[gl::MAX_VERTEX_ATTRIBS]; - uint16_t offsets[gl::MAX_VERTEX_ATTRIBS]; // can only take 11 bits on NV -}; +// Number of dirty bits in the dirty bit set. +constexpr size_t kGraphicsPipelineDirtyBitBytes = 4; +constexpr static size_t kNumGraphicsPipelineDirtyBits = + kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes; +static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits"); -constexpr size_t kVertexInputBindingsSize = sizeof(VertexInputBindings); -constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes); +// Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc. +using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>; +// State changes are applied through the update methods. Each update method can also have a +// sibling method that applies the update without marking a state transition. The non-transition +// update methods are used for internal shader pipelines. Not every non-transition update method +// is implemented yet as not every state is used in internal shaders. class GraphicsPipelineDesc final { public: @@ -258,6 +308,13 @@ void initDefaults(); + // For custom comparisons. + template <typename T> + const T *getPtr() const + { + return reinterpret_cast<const T *>(this); + } + angle::Result initializePipeline(vk::Context *context, const vk::PipelineCache &pipelineCacheVk, const RenderPass &compatibleRenderPass, @@ -267,53 +324,78 @@ const ShaderModule &fragmentModule, Pipeline *pipelineOut) const; - // Vertex input state - void updateVertexInput(uint32_t attribIndex, + // Vertex input state. For ES 3.1 this should be separated into binding and attribute. + void updateVertexInput(GraphicsPipelineTransitionBits *transition, + uint32_t attribIndex, GLuint stride, GLuint divisor, VkFormat format, GLuint relativeOffset); // Input assembly info - void updateTopology(gl::PrimitiveMode drawMode); + void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode); // Raster states - void updateCullMode(const gl::RasterizerState &rasterState); - void updateFrontFace(const gl::RasterizerState &rasterState, bool invertFrontFace); - void updateLineWidth(float lineWidth); + void updateCullMode(GraphicsPipelineTransitionBits *transition, + const gl::RasterizerState &rasterState); + void updateFrontFace(GraphicsPipelineTransitionBits *transition, + const gl::RasterizerState &rasterState, + bool invertFrontFace); + void updateLineWidth(GraphicsPipelineTransitionBits *transition, float lineWidth); // RenderPass description. - const RenderPassDesc &getRenderPassDesc() const; - void updateRenderPassDesc(const RenderPassDesc &renderPassDesc); + const RenderPassDesc &getRenderPassDesc() const { return mRenderPassDesc; } + + void setRenderPassDesc(const RenderPassDesc &renderPassDesc); + void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition, + const RenderPassDesc &renderPassDesc); // Blend states - void updateBlendEnabled(bool isBlendEnabled); - void updateBlendColor(const gl::ColorF &color); - void updateBlendFuncs(const gl::BlendState &blendState); - void updateBlendEquations(const gl::BlendState &blendState); - void updateColorWriteMask(VkColorComponentFlags colorComponentFlags, + void updateBlendEnabled(GraphicsPipelineTransitionBits *transition, bool isBlendEnabled); + void updateBlendColor(GraphicsPipelineTransitionBits *transition, const gl::ColorF &color); + void updateBlendFuncs(GraphicsPipelineTransitionBits *transition, + const gl::BlendState &blendState); + void updateBlendEquations(GraphicsPipelineTransitionBits *transition, + const gl::BlendState &blendState); + void setColorWriteMask(VkColorComponentFlags colorComponentFlags, + const gl::DrawBufferMask &alphaMask); + void updateColorWriteMask(GraphicsPipelineTransitionBits *transition, + VkColorComponentFlags colorComponentFlags, const gl::DrawBufferMask &alphaMask); // Depth/stencil states. - void updateDepthTestEnabled(const gl::DepthStencilState &depthStencilState, + void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState, const gl::Framebuffer *drawFramebuffer); - void updateDepthFunc(const gl::DepthStencilState &depthStencilState); - void updateDepthWriteEnabled(const gl::DepthStencilState &depthStencilState, + void updateDepthFunc(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState); + void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState, const gl::Framebuffer *drawFramebuffer); - void updateStencilTestEnabled(const gl::DepthStencilState &depthStencilState, + void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState, const gl::Framebuffer *drawFramebuffer); - void updateStencilFrontFuncs(GLint ref, const gl::DepthStencilState &depthStencilState); - void updateStencilBackFuncs(GLint ref, const gl::DepthStencilState &depthStencilState); - void updateStencilFrontOps(const gl::DepthStencilState &depthStencilState); - void updateStencilBackOps(const gl::DepthStencilState &depthStencilState); - void updateStencilFrontWriteMask(const gl::DepthStencilState &depthStencilState, + void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition, + GLint ref, + const gl::DepthStencilState &depthStencilState); + void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition, + GLint ref, + const gl::DepthStencilState &depthStencilState); + void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState); + void updateStencilBackOps(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState); + void updateStencilFrontWriteMask(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState, const gl::Framebuffer *drawFramebuffer); - void updateStencilBackWriteMask(const gl::DepthStencilState &depthStencilState, + void updateStencilBackWriteMask(GraphicsPipelineTransitionBits *transition, + const gl::DepthStencilState &depthStencilState, const gl::Framebuffer *drawFramebuffer); // Depth offset. - void updatePolygonOffsetFillEnabled(bool enabled); - void updatePolygonOffset(const gl::RasterizerState &rasterState); + void updatePolygonOffsetFillEnabled(GraphicsPipelineTransitionBits *transition, bool enabled); + void updatePolygonOffset(GraphicsPipelineTransitionBits *transition, + const gl::RasterizerState &rasterState); private: VertexInputBindings mVertexInputBindings; @@ -321,7 +403,7 @@ RenderPassDesc mRenderPassDesc; PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo; PackedDepthStencilStateInfo mDepthStencilStateInfo; - PackedInputAssemblyAndColorBlendStateInfo mInputAssembltyAndColorBlendStateInfo; + PackedInputAssemblyAndColorBlendStateInfo mInputAssemblyAndColorBlendStateInfo; // Viewport and scissor are applied as dynamic state. }; @@ -329,12 +411,7 @@ // This is not guaranteed by the spec, but is validated by a compile-time check. // No gaps or padding at the end ensures that hashing and memcmp checks will not run // into uninitialized memory regions. -constexpr size_t kGraphicsPipelineDescSumOfSizes = - kVertexInputBindingsSize + kVertexInputAttributesSize + - kPackedInputAssemblyAndColorBlendStateSize + kPackedRasterizationAndMultisampleStateSize + - kPackedDepthStencilStateSize + kRenderPassDescSize; - -static constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc); +constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc); static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch"); constexpr uint32_t kMaxDescriptorSetLayoutBindings = gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES; @@ -430,6 +507,43 @@ // Disable warnings about struct padding. ANGLE_DISABLE_STRUCT_PADDING_WARNINGS + +class PipelineHelper; + +struct GraphicsPipelineTransition +{ + GraphicsPipelineTransitionBits bits; + const GraphicsPipelineDesc *desc; + PipelineHelper *target; +}; + +class PipelineHelper final : angle::NonCopyable +{ + public: + PipelineHelper(); + ~PipelineHelper(); + explicit PipelineHelper(Pipeline &&pipeline) : mPipeline(std::move(pipeline)) {} + + void destroy(VkDevice device); + + void updateSerial(Serial serial) { mSerial = serial; } + bool valid() const { return mPipeline.valid(); } + Serial getSerial() const { return mSerial; } + Pipeline &getPipeline() { return mPipeline; } + + bool findTransition(GraphicsPipelineTransitionBits bits, + const GraphicsPipelineDesc &desc, + PipelineHelper **pipelineOut) const; + void addTransition(GraphicsPipelineTransitionBits bits, + const GraphicsPipelineDesc *desc, + PipelineHelper *pipeline); + + private: + std::vector<GraphicsPipelineTransition> mTransitions; + Serial mSerial; + Pipeline mPipeline; +}; + } // namespace vk } // namespace rx @@ -538,18 +652,20 @@ const vk::ShaderModule &vertexModule, const vk::ShaderModule &fragmentModule, const vk::GraphicsPipelineDesc &desc, - vk::PipelineAndSerial **pipelineOut) + const vk::GraphicsPipelineDesc **descPtrOut, + vk::PipelineHelper **pipelineOut) { auto item = mPayload.find(desc); if (item != mPayload.end()) { + *descPtrOut = &item->first; *pipelineOut = &item->second; return angle::Result::Continue; } return insertPipeline(context, pipelineCacheVk, compatibleRenderPass, pipelineLayout, activeAttribLocationsMask, vertexModule, fragmentModule, desc, - pipelineOut); + descPtrOut, pipelineOut); } private: @@ -561,9 +677,10 @@ const vk::ShaderModule &vertexModule, const vk::ShaderModule &fragmentModule, const vk::GraphicsPipelineDesc &desc, - vk::PipelineAndSerial **pipelineOut); + const vk::GraphicsPipelineDesc **descPtrOut, + vk::PipelineHelper **pipelineOut); - std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineAndSerial> mPayload; + std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper> mPayload; }; class DescriptorSetLayoutCache final : angle::NonCopyable
diff --git a/src/libANGLE/renderer/vulkan/vk_helpers.h b/src/libANGLE/renderer/vulkan/vk_helpers.h index 389b386..e6228a1 100644 --- a/src/libANGLE/renderer/vulkan/vk_helpers.h +++ b/src/libANGLE/renderer/vulkan/vk_helpers.h
@@ -641,7 +641,8 @@ const PipelineLayout &pipelineLayout, const GraphicsPipelineDesc &pipelineDesc, const gl::AttributesMask &activeAttribLocationsMask, - PipelineAndSerial **pipelineOut) + const vk::GraphicsPipelineDesc **descPtrOut, + PipelineHelper **pipelineOut) { // Pull in a compatible RenderPass. vk::RenderPass *compatibleRenderPass = nullptr; @@ -651,7 +652,7 @@ return mGraphicsPipelines.getPipeline( context, pipelineCache, *compatibleRenderPass, pipelineLayout, activeAttribLocationsMask, mShaders[gl::ShaderType::Vertex].get().get(), - mShaders[gl::ShaderType::Fragment].get().get(), pipelineDesc, pipelineOut); + mShaders[gl::ShaderType::Fragment].get().get(), pipelineDesc, descPtrOut, pipelineOut); } angle::Result getComputePipeline(Context *context, @@ -661,6 +662,8 @@ private: gl::ShaderMap<BindingPointer<ShaderAndSerial>> mShaders; GraphicsPipelineCache mGraphicsPipelines; + + // We should probably use PipelineHelper here so we can remove PipelineAndSerial. PipelineAndSerial mComputePipeline; }; } // namespace vk
diff --git a/src/tests/perf_tests/VulkanPipelineCachePerf.cpp b/src/tests/perf_tests/VulkanPipelineCachePerf.cpp index 10c60a0..2ca98a6 100644 --- a/src/tests/perf_tests/VulkanPipelineCachePerf.cpp +++ b/src/tests/perf_tests/VulkanPipelineCachePerf.cpp
@@ -83,14 +83,15 @@ vk::PipelineLayout pl; vk::PipelineCache pc; vk::ShaderModule sm; - vk::PipelineAndSerial *result = nullptr; + const vk::GraphicsPipelineDesc *desc = nullptr; + vk::PipelineHelper *result = nullptr; gl::AttributesMask am; for (unsigned int iteration = 0; iteration < kIterationsPerStep; ++iteration) { for (const auto &hit : mCacheHits) { - (void)mCache.getPipeline(VK_NULL_HANDLE, pc, rp, pl, am, sm, sm, hit, &result); + (void)mCache.getPipeline(VK_NULL_HANDLE, pc, rp, pl, am, sm, sm, hit, &desc, &result); } } @@ -98,7 +99,7 @@ ++missCount, ++mMissIndex) { const auto &miss = mCacheMisses[mMissIndex]; - (void)mCache.getPipeline(VK_NULL_HANDLE, pc, rp, pl, am, sm, sm, miss, &result); + (void)mCache.getPipeline(VK_NULL_HANDLE, pc, rp, pl, am, sm, sm, miss, &desc, &result); } }