diff --git a/impeller/entity/entity_unittests.cc b/impeller/entity/entity_unittests.cc index e949755a9080c..f3d40b009cf98 100644 --- a/impeller/entity/entity_unittests.cc +++ b/impeller/entity/entity_unittests.cc @@ -2545,11 +2545,6 @@ TEST_P(EntityTest, PointFieldGeometryCoverage) { Rect::MakeLTRB(35, 15, 135, 205)); } -TEST_P(EntityTest, PointFieldCanUseCompute) { - EXPECT_EQ(PointFieldGeometry::CanUseCompute(*GetContentContext()), - GetContext()->GetBackendType() == Context::BackendType::kMetal); -} - TEST_P(EntityTest, ColorFilterContentsWithLargeGeometry) { Entity entity; entity.SetTransform(Matrix::MakeScale(GetContentScale())); diff --git a/impeller/entity/geometry/point_field_geometry.cc b/impeller/entity/geometry/point_field_geometry.cc index 0e7bf69476ce8..5b5cd8caf7813 100644 --- a/impeller/entity/geometry/point_field_geometry.cc +++ b/impeller/entity/geometry/point_field_geometry.cc @@ -17,7 +17,7 @@ GeometryResult PointFieldGeometry::GetPositionBuffer( const ContentContext& renderer, const Entity& entity, RenderPass& pass) const { - if (CanUseCompute(renderer)) { + if (renderer.GetDeviceCapabilities().SupportsCompute()) { return GetPositionBufferGPU(renderer, entity, pass); } auto vtx_builder = GetPositionBufferCPU(renderer, entity, pass); @@ -40,7 +40,7 @@ GeometryResult PointFieldGeometry::GetPositionUVBuffer( const ContentContext& renderer, const Entity& entity, RenderPass& pass) const { - if (CanUseCompute(renderer)) { + if (renderer.GetDeviceCapabilities().SupportsCompute()) { return GetPositionBufferGPU(renderer, entity, pass, texture_coverage, effect_transform); } @@ -200,6 +200,7 @@ GeometryResult PointFieldGeometry::GetPositionBufferGPU( using UV = UvComputeShader; + compute_pass->AddBufferMemoryBarrier(); compute_pass->SetCommandLabel("UV Geometry"); compute_pass->SetPipeline(renderer.GetUvComputePipeline()); @@ -264,14 +265,6 @@ GeometryVertexType PointFieldGeometry::GetVertexType() const { return GeometryVertexType::kPosition; } -// Compute is disabled for Vulkan because the barriers are incorrect, see -// also: https://github.com/flutter/flutter/issues/140798 . -bool PointFieldGeometry::CanUseCompute(const ContentContext& renderer) { - return renderer.GetDeviceCapabilities().SupportsCompute() && - renderer.GetContext()->GetBackendType() == - Context::BackendType::kMetal; -} - // |Geometry| std::optional PointFieldGeometry::GetCoverage( const Matrix& transform) const { diff --git a/impeller/entity/geometry/point_field_geometry.h b/impeller/entity/geometry/point_field_geometry.h index a8c296adfc3c6..9d43f07cb9fbe 100644 --- a/impeller/entity/geometry/point_field_geometry.h +++ b/impeller/entity/geometry/point_field_geometry.h @@ -17,9 +17,6 @@ class PointFieldGeometry final : public Geometry { static size_t ComputeCircleDivisions(Scalar scaled_radius, bool round); - /// If the platform can use compute safely. - static bool CanUseCompute(const ContentContext& renderer); - private: // |Geometry| GeometryResult GetPositionBuffer(const ContentContext& renderer, diff --git a/impeller/renderer/backend/metal/compute_pass_mtl.h b/impeller/renderer/backend/metal/compute_pass_mtl.h index 4ca07189d6be6..ff9669a4105ca 100644 --- a/impeller/renderer/backend/metal/compute_pass_mtl.h +++ b/impeller/renderer/backend/metal/compute_pass_mtl.h @@ -66,6 +66,12 @@ class ComputePassMTL final : public ComputePass { // |ComputePass| bool EncodeCommands() const override; + // |ComputePass| + void AddBufferMemoryBarrier() override; + + // |ComputePass| + void AddTextureMemoryBarrier() override; + ComputePassMTL(const ComputePassMTL&) = delete; ComputePassMTL& operator=(const ComputePassMTL&) = delete; diff --git a/impeller/renderer/backend/metal/compute_pass_mtl.mm b/impeller/renderer/backend/metal/compute_pass_mtl.mm index c4106e87bc054..f05825c139ec3 100644 --- a/impeller/renderer/backend/metal/compute_pass_mtl.mm +++ b/impeller/renderer/backend/metal/compute_pass_mtl.mm @@ -32,7 +32,8 @@ if (!buffer_) { return; } - encoder_ = [buffer_ computeCommandEncoder]; + encoder_ = [buffer_ computeCommandEncoderWithDispatchType: + MTLDispatchType::MTLDispatchTypeConcurrent]; if (!encoder_) { return; } @@ -67,6 +68,16 @@ ComputePipelineMTL::Cast(*pipeline).GetMTLComputePipelineState()); } +// |ComputePass| +void ComputePassMTL::AddBufferMemoryBarrier() { + [encoder_ memoryBarrierWithScope:MTLBarrierScopeBuffers]; +} + +// |ComputePass| +void ComputePassMTL::AddTextureMemoryBarrier() { + [encoder_ memoryBarrierWithScope:MTLBarrierScopeTextures]; +} + // |ComputePass| bool ComputePassMTL::BindResource(ShaderStage stage, DescriptorType type, diff --git a/impeller/renderer/backend/vulkan/compute_pass_vk.cc b/impeller/renderer/backend/vulkan/compute_pass_vk.cc index 71c3815b6f2cd..2b5ac2eece620 100644 --- a/impeller/renderer/backend/vulkan/compute_pass_vk.cc +++ b/impeller/renderer/backend/vulkan/compute_pass_vk.cc @@ -104,11 +104,7 @@ fml::Status ComputePassVK::Compute(const ISize& grid_size) { // Special case for linear processing. if (height == 1) { - int64_t minimum = 1; - int64_t threadGroups = std::max( - static_cast(std::ceil(width * 1.0 / max_wg_size_[0] * 1.0)), - minimum); - command_buffer_vk.dispatch(threadGroups, 1, 1); + command_buffer_vk.dispatch(width, 1, 1); } else { while (width > max_wg_size_[0]) { width = std::max(static_cast(1), width / 2); @@ -216,8 +212,53 @@ bool ComputePassVK::BindResource(size_t binding, return true; } +// Note: +// https://github.com/KhronosGroup/Vulkan-Docs/wiki/Synchronization-Examples +// Seems to suggest that anything more finely grained than a global memory +// barrier is likely to be weakened into a global barrier. Confirming this on +// mobile devices will require some experimentation. + +// |ComputePass| +void ComputePassVK::AddBufferMemoryBarrier() { + vk::MemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + + command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eComputeShader, {}, 1, &barrier, 0, {}, 0, {}); +} + +// |ComputePass| +void ComputePassVK::AddTextureMemoryBarrier() { + vk::MemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + + command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eComputeShader, {}, 1, &barrier, 0, {}, 0, {}); +} + // |ComputePass| bool ComputePassVK::EncodeCommands() const { + // Since we only use global memory barrier, we don't have to worry about + // compute to compute dependencies across cmd buffers. Instead, we pessimize + // here and assume that we wrote to a storage image or buffer and that a + // render pass will read from it. if there are ever scenarios where we end up + // with compute to compute dependencies this should be revisited. + + // This does not currently handle image barriers as we do not use them + // for anything. + vk::MemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eShaderWrite; + barrier.dstAccessMask = + vk::AccessFlagBits::eIndexRead | vk::AccessFlagBits::eVertexAttributeRead; + + command_buffer_->GetEncoder()->GetCommandBuffer().pipelineBarrier( + vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eVertexInput, {}, 1, &barrier, 0, {}, 0, {}); + return true; } diff --git a/impeller/renderer/backend/vulkan/compute_pass_vk.h b/impeller/renderer/backend/vulkan/compute_pass_vk.h index b13e005fa5496..adc2720d6272a 100644 --- a/impeller/renderer/backend/vulkan/compute_pass_vk.h +++ b/impeller/renderer/backend/vulkan/compute_pass_vk.h @@ -57,6 +57,12 @@ class ComputePassVK final : public ComputePass { void SetPipeline(const std::shared_ptr>& pipeline) override; + // |ComputePass| + void AddBufferMemoryBarrier() override; + + // |ComputePass| + void AddTextureMemoryBarrier() override; + // |ComputePass| fml::Status Compute(const ISize& grid_size) override; diff --git a/impeller/renderer/compute_pass.h b/impeller/renderer/compute_pass.h index 40c313ecf203c..50b65c432e86b 100644 --- a/impeller/renderer/compute_pass.h +++ b/impeller/renderer/compute_pass.h @@ -35,6 +35,22 @@ class ComputePass : public ResourceBinder { virtual fml::Status Compute(const ISize& grid_size) = 0; + /// @brief Ensures all previously encoded compute command's buffer writes are + /// visible to any subsequent compute commands. + /// + /// On Vulkan, it does not matter if the compute command is in a + /// different command buffer, only that it is executed later in queue + /// order. + virtual void AddBufferMemoryBarrier() = 0; + + /// @brief Ensures all previously encoded compute command's texture writes are + /// visible to any subsequent compute commands. + /// + /// On Vulkan, it does not matter if the compute command is in a + /// different command buffer, only that it is executed later in queue + /// order. + virtual void AddTextureMemoryBarrier() = 0; + //---------------------------------------------------------------------------- /// @brief Encode the recorded commands to the underlying command buffer. /// @@ -43,6 +59,8 @@ class ComputePass : public ResourceBinder { /// virtual bool EncodeCommands() const = 0; + const Context& GetContext() const { return *context_; } + protected: const std::shared_ptr context_; diff --git a/impeller/renderer/compute_unittests.cc b/impeller/renderer/compute_unittests.cc index d8fd55d875c0f..8a3cb8769412b 100644 --- a/impeller/renderer/compute_unittests.cc +++ b/impeller/renderer/compute_unittests.cc @@ -308,6 +308,7 @@ TEST_P(ComputeTest, MultiStageInputAndOutput) { CS1::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer_1)); ASSERT_TRUE(pass->Compute(ISize(512, 1)).ok()); + pass->AddBufferMemoryBarrier(); } {