[Impeller] Optionally support SamplerAddressMode::kDecal on the OpenGLES backend (#46650)

fix https://github.com/flutter/flutter/issues/129358
diff --git a/impeller/entity/contents/atlas_contents.cc b/impeller/entity/contents/atlas_contents.cc
index f92c3db..34030ac 100644
--- a/impeller/entity/contents/atlas_contents.cc
+++ b/impeller/entity/contents/atlas_contents.cc
@@ -262,6 +262,8 @@
       dst_sampler_descriptor.width_address_mode = SamplerAddressMode::kDecal;
       dst_sampler_descriptor.height_address_mode = SamplerAddressMode::kDecal;
     }
+    frag_info.supports_decal_sampler_address_mode =
+        renderer.GetDeviceCapabilities().SupportsDecalSamplerAddressMode();
     auto dst_sampler = renderer.GetContext()->GetSamplerLibrary()->GetSampler(
         dst_sampler_descriptor);
     FS::BindTextureSamplerDst(cmd, texture_, dst_sampler);
diff --git a/impeller/entity/contents/filters/blend_filter_contents.cc b/impeller/entity/contents/filters/blend_filter_contents.cc
index 7525bda..44c33c5 100644
--- a/impeller/entity/contents/filters/blend_filter_contents.cc
+++ b/impeller/entity/contents/filters/blend_filter_contents.cc
@@ -181,6 +181,8 @@
       dst_sampler_descriptor.width_address_mode = SamplerAddressMode::kDecal;
       dst_sampler_descriptor.height_address_mode = SamplerAddressMode::kDecal;
     }
+    blend_info.supports_decal_sampler_address_mode =
+        renderer.GetDeviceCapabilities().SupportsDecalSamplerAddressMode();
     auto dst_sampler = renderer.GetContext()->GetSamplerLibrary()->GetSampler(
         dst_sampler_descriptor);
     FS::BindTextureSamplerDst(cmd, dst_snapshot->texture, dst_sampler);
@@ -355,6 +357,8 @@
       dst_sampler_descriptor.width_address_mode = SamplerAddressMode::kDecal;
       dst_sampler_descriptor.height_address_mode = SamplerAddressMode::kDecal;
     }
+    blend_info.supports_decal_sampler_address_mode =
+        renderer.GetDeviceCapabilities().SupportsDecalSamplerAddressMode();
     auto dst_sampler = renderer.GetContext()->GetSamplerLibrary()->GetSampler(
         dst_sampler_descriptor);
     FS::BindTextureSamplerDst(cmd, dst_snapshot->texture, dst_sampler);
@@ -479,6 +483,8 @@
       dst_sampler_descriptor.width_address_mode = SamplerAddressMode::kDecal;
       dst_sampler_descriptor.height_address_mode = SamplerAddressMode::kDecal;
     }
+    frag_info.supports_decal_sampler_address_mode =
+        renderer.GetDeviceCapabilities().SupportsDecalSamplerAddressMode();
     auto dst_sampler = renderer.GetContext()->GetSamplerLibrary()->GetSampler(
         dst_sampler_descriptor);
     FS::BindTextureSamplerDst(cmd, dst_snapshot->texture, dst_sampler);
diff --git a/impeller/entity/contents/filters/morphology_filter_contents.cc b/impeller/entity/contents/filters/morphology_filter_contents.cc
index bf7a69e..d8f087a 100644
--- a/impeller/entity/contents/filters/morphology_filter_contents.cc
+++ b/impeller/entity/contents/filters/morphology_filter_contents.cc
@@ -127,6 +127,8 @@
       sampler_descriptor.width_address_mode = SamplerAddressMode::kDecal;
       sampler_descriptor.height_address_mode = SamplerAddressMode::kDecal;
     }
+    frag_info.supports_decal_sampler_address_mode =
+        renderer.GetDeviceCapabilities().SupportsDecalSamplerAddressMode();
 
     FS::BindTextureSampler(
         cmd, input_snapshot->texture,
diff --git a/impeller/entity/shaders/blending/advanced_blend.glsl b/impeller/entity/shaders/blending/advanced_blend.glsl
index c2e26d5..9a11dce 100644
--- a/impeller/entity/shaders/blending/advanced_blend.glsl
+++ b/impeller/entity/shaders/blending/advanced_blend.glsl
@@ -12,6 +12,7 @@
   float16_t src_input_alpha;
   float16_t color_factor;
   f16vec4 color;  // This color input is expected to be unpremultiplied.
+  float supports_decal_sampler_address_mode;
 }
 blend_info;
 
@@ -24,9 +25,12 @@
 out f16vec4 frag_color;
 
 f16vec4 Sample(f16sampler2D texture_sampler, vec2 texture_coords) {
-// gles 2.0 is the only backend without native decal support.
 #ifdef IMPELLER_TARGET_OPENGLES
-  return IPSampleDecal(texture_sampler, texture_coords);
+  if (blend_info.supports_decal_sampler_address_mode > 0.0) {
+    return texture(texture_sampler, texture_coords);
+  } else {
+    return IPHalfSampleDecal(texture_sampler, texture_coords);
+  }
 #else
   return texture(texture_sampler, texture_coords);
 #endif
diff --git a/impeller/entity/shaders/blending/porter_duff_blend.frag b/impeller/entity/shaders/blending/porter_duff_blend.frag
index a24b523..7e3498f 100644
--- a/impeller/entity/shaders/blending/porter_duff_blend.frag
+++ b/impeller/entity/shaders/blending/porter_duff_blend.frag
@@ -19,6 +19,7 @@
   float16_t dst_coeff_src_color;
   float16_t input_alpha;
   float16_t output_alpha;
+  float supports_decal_sampler_address_mode;
 }
 frag_info;
 
@@ -28,9 +29,12 @@
 out f16vec4 frag_color;
 
 f16vec4 Sample(f16sampler2D texture_sampler, vec2 texture_coords) {
-// gles 2.0 is the only backend without native decal support.
 #ifdef IMPELLER_TARGET_OPENGLES
-  return IPSampleDecal(texture_sampler, texture_coords);
+  if (frag_info.supports_decal_sampler_address_mode > 0.0) {
+    return texture(texture_sampler, texture_coords);
+  } else {
+    return IPHalfSampleDecal(texture_sampler, texture_coords);
+  }
 #else
   return texture(texture_sampler, texture_coords);
 #endif
diff --git a/impeller/entity/shaders/morphology_filter.frag b/impeller/entity/shaders/morphology_filter.frag
index 8f2d01f..29428d0 100644
--- a/impeller/entity/shaders/morphology_filter.frag
+++ b/impeller/entity/shaders/morphology_filter.frag
@@ -19,6 +19,7 @@
   f16vec2 uv_offset;
   float16_t radius;
   float16_t morph_type;
+  float supports_decal_sampler_address_mode;
 }
 frag_info;
 
@@ -32,11 +33,15 @@
   for (float16_t i = -frag_info.radius; i <= frag_info.radius; i++) {
     vec2 texture_coords = v_texture_coords + frag_info.uv_offset * i;
 
-// gles 2.0 is the only backend without native decal support.
+    f16vec4 color;
 #ifdef IMPELLER_TARGET_OPENGLES
-    f16vec4 color = IPHalfSampleDecal(texture_sampler, texture_coords);
+    if (frag_info.supports_decal_sampler_address_mode > 0.0) {
+      color = texture(texture_sampler, texture_coords);
+    } else {
+      color = IPHalfSampleDecal(texture_sampler, texture_coords);
+    }
 #else
-    f16vec4 color = texture(texture_sampler, texture_coords);
+    color = texture(texture_sampler, texture_coords);
 #endif
 
     if (frag_info.morph_type == kMorphTypeDilate) {
diff --git a/impeller/renderer/backend/gles/capabilities_gles.cc b/impeller/renderer/backend/gles/capabilities_gles.cc
index e639b14..45ad42e 100644
--- a/impeller/renderer/backend/gles/capabilities_gles.cc
+++ b/impeller/renderer/backend/gles/capabilities_gles.cc
@@ -86,6 +86,12 @@
     gl.GetIntegerv(GL_NUM_SHADER_BINARY_FORMATS, &value);
     num_shader_binary_formats = value;
   }
+
+  if (gl.GetDescription()->HasExtension("GL_EXT_texture_border_clamp") ||
+      gl.GetDescription()->HasExtension("GL_NV_texture_border_clamp") ||
+      gl.GetDescription()->HasExtension("GL_OES_texture_border_clamp")) {
+    supports_decal_sampler_address_mode_ = true;
+  }
 }
 
 size_t CapabilitiesGLES::GetMaxTextureUnits(ShaderStage stage) const {
@@ -140,7 +146,7 @@
 }
 
 bool CapabilitiesGLES::SupportsDecalSamplerAddressMode() const {
-  return false;
+  return supports_decal_sampler_address_mode_;
 }
 
 bool CapabilitiesGLES::SupportsDeviceTransientTextures() const {
diff --git a/impeller/renderer/backend/gles/capabilities_gles.h b/impeller/renderer/backend/gles/capabilities_gles.h
index b6ba085..20685f1 100644
--- a/impeller/renderer/backend/gles/capabilities_gles.h
+++ b/impeller/renderer/backend/gles/capabilities_gles.h
@@ -114,6 +114,9 @@
 
   // |Capabilities|
   PixelFormat GetDefaultDepthStencilFormat() const override;
+
+ private:
+  bool supports_decal_sampler_address_mode_ = false;
 };
 
 }  // namespace impeller
diff --git a/impeller/renderer/backend/gles/context_gles.cc b/impeller/renderer/backend/gles/context_gles.cc
index 8d98d41..6896d66 100644
--- a/impeller/renderer/backend/gles/context_gles.cc
+++ b/impeller/renderer/backend/gles/context_gles.cc
@@ -53,13 +53,15 @@
     }
   }
 
+  device_capabilities_ = reactor_->GetProcTable().GetCapabilities();
+
   // Create the sampler library.
   {
     sampler_library_ =
-        std::shared_ptr<SamplerLibraryGLES>(new SamplerLibraryGLES());
+        std::shared_ptr<SamplerLibraryGLES>(new SamplerLibraryGLES(
+            device_capabilities_->SupportsDecalSamplerAddressMode()));
   }
 
-  device_capabilities_ = reactor_->GetProcTable().GetCapabilities();
   is_valid_ = true;
 }
 
diff --git a/impeller/renderer/backend/gles/gles.h b/impeller/renderer/backend/gles/gles.h
index 9bacfc8..e14116e 100644
--- a/impeller/renderer/backend/gles/gles.h
+++ b/impeller/renderer/backend/gles/gles.h
@@ -6,6 +6,7 @@
 
 // IWYU pragma: begin_exports
 #include "GLES3/gl3.h"
+#define GL_CLAMP_TO_BORDER 0x812D
 #define GL_GLEXT_PROTOTYPES
 #include "GLES2/gl2ext.h"
 // IWYU pragma: end_exports
diff --git a/impeller/renderer/backend/gles/sampler_gles.cc b/impeller/renderer/backend/gles/sampler_gles.cc
index f1a54de..19d4e0f 100644
--- a/impeller/renderer/backend/gles/sampler_gles.cc
+++ b/impeller/renderer/backend/gles/sampler_gles.cc
@@ -53,7 +53,8 @@
   FML_UNREACHABLE();
 }
 
-static GLint ToAddressMode(SamplerAddressMode mode) {
+static GLint ToAddressMode(SamplerAddressMode mode,
+                           bool supports_decal_sampler_address_mode) {
   switch (mode) {
     case SamplerAddressMode::kClampToEdge:
       return GL_CLAMP_TO_EDGE;
@@ -62,7 +63,10 @@
     case SamplerAddressMode::kMirror:
       return GL_MIRRORED_REPEAT;
     case SamplerAddressMode::kDecal:
-      break;  // Unsupported.
+      if (supports_decal_sampler_address_mode) {
+        return GL_CLAMP_TO_BORDER;
+      }
+      break;
   }
   FML_UNREACHABLE();
 }
@@ -96,10 +100,14 @@
                    ToParam(desc.min_filter, mip_filter));
   gl.TexParameteri(target.value(), GL_TEXTURE_MAG_FILTER,
                    ToParam(desc.mag_filter));
-  gl.TexParameteri(target.value(), GL_TEXTURE_WRAP_S,
-                   ToAddressMode(desc.width_address_mode));
-  gl.TexParameteri(target.value(), GL_TEXTURE_WRAP_T,
-                   ToAddressMode(desc.height_address_mode));
+  gl.TexParameteri(
+      target.value(), GL_TEXTURE_WRAP_S,
+      ToAddressMode(desc.width_address_mode,
+                    gl.GetCapabilities()->SupportsDecalSamplerAddressMode()));
+  gl.TexParameteri(
+      target.value(), GL_TEXTURE_WRAP_T,
+      ToAddressMode(desc.height_address_mode,
+                    gl.GetCapabilities()->SupportsDecalSamplerAddressMode()));
   return true;
 }
 
diff --git a/impeller/renderer/backend/gles/sampler_library_gles.cc b/impeller/renderer/backend/gles/sampler_library_gles.cc
index 7f4cd3d..6012203 100644
--- a/impeller/renderer/backend/gles/sampler_library_gles.cc
+++ b/impeller/renderer/backend/gles/sampler_library_gles.cc
@@ -11,7 +11,9 @@
 
 namespace impeller {
 
-SamplerLibraryGLES::SamplerLibraryGLES() = default;
+SamplerLibraryGLES::SamplerLibraryGLES(bool supports_decal_sampler_address_mode)
+    : supports_decal_sampler_address_mode_(
+          supports_decal_sampler_address_mode) {}
 
 // |SamplerLibrary|
 SamplerLibraryGLES::~SamplerLibraryGLES() = default;
@@ -19,14 +21,12 @@
 // |SamplerLibrary|
 std::shared_ptr<const Sampler> SamplerLibraryGLES::GetSampler(
     SamplerDescriptor descriptor) {
-  // TODO(bdero): Change this validation once optional support for kDecal is
-  //              added to the OpenGLES backend:
-  //              https://github.com/flutter/flutter/issues/129358
-  if (descriptor.width_address_mode == SamplerAddressMode::kDecal ||
-      descriptor.height_address_mode == SamplerAddressMode::kDecal ||
-      descriptor.depth_address_mode == SamplerAddressMode::kDecal) {
+  if (!supports_decal_sampler_address_mode_ &&
+      (descriptor.width_address_mode == SamplerAddressMode::kDecal ||
+       descriptor.height_address_mode == SamplerAddressMode::kDecal ||
+       descriptor.depth_address_mode == SamplerAddressMode::kDecal)) {
     VALIDATION_LOG << "SamplerAddressMode::kDecal is not supported by the "
-                      "OpenGLES backend.";
+                      "current OpenGLES backend.";
     return nullptr;
   }
 
diff --git a/impeller/renderer/backend/gles/sampler_library_gles.h b/impeller/renderer/backend/gles/sampler_library_gles.h
index 4379442..e23af54 100644
--- a/impeller/renderer/backend/gles/sampler_library_gles.h
+++ b/impeller/renderer/backend/gles/sampler_library_gles.h
@@ -12,6 +12,7 @@
 
 class SamplerLibraryGLES final : public SamplerLibrary {
  public:
+  explicit SamplerLibraryGLES(bool supports_decal_sampler_address_mode);
   // |SamplerLibrary|
   ~SamplerLibraryGLES() override;
 
@@ -26,6 +27,8 @@
   std::shared_ptr<const Sampler> GetSampler(
       SamplerDescriptor descriptor) override;
 
+  bool supports_decal_sampler_address_mode_ = false;
+
   FML_DISALLOW_COPY_AND_ASSIGN(SamplerLibraryGLES);
 };
 
diff --git a/impeller/renderer/backend/gles/test/capabilities_unittests.cc b/impeller/renderer/backend/gles/test/capabilities_unittests.cc
index 0bbc1b9..189f54c 100644
--- a/impeller/renderer/backend/gles/test/capabilities_unittests.cc
+++ b/impeller/renderer/backend/gles/test/capabilities_unittests.cc
@@ -34,5 +34,15 @@
             PixelFormat::kD24UnormS8Uint);
 }
 
+TEST(CapabilitiesGLES, SupportsDecalSamplerAddressMode) {
+  auto const extensions = std::vector<const unsigned char*>{
+      reinterpret_cast<const unsigned char*>("GL_KHR_debug"),                 //
+      reinterpret_cast<const unsigned char*>("GL_EXT_texture_border_clamp"),  //
+  };
+  auto mock_gles = MockGLES::Init(extensions);
+  auto capabilities = mock_gles->GetProcTable().GetCapabilities();
+  EXPECT_TRUE(capabilities->SupportsDecalSamplerAddressMode());
+}
+
 }  // namespace testing
 }  // namespace impeller
diff --git a/impeller/renderer/backend/gles/test/mock_gles.cc b/impeller/renderer/backend/gles/test/mock_gles.cc
index 3d4f980..1abf7e8 100644
--- a/impeller/renderer/backend/gles/test/mock_gles.cc
+++ b/impeller/renderer/backend/gles/test/mock_gles.cc
@@ -19,6 +19,8 @@
 
 static std::weak_ptr<MockGLES> g_mock_gles;
 
+static std::vector<const unsigned char*> g_extensions;
+
 // Has friend visibility into MockGLES to record calls.
 void RecordGLCall(const char* name) {
   if (auto mock_gles = g_mock_gles.lock()) {
@@ -37,7 +39,7 @@
 
 auto const kMockVendor = (unsigned char*)"MockGLES";
 auto const kMockVersion = (unsigned char*)"3.0";
-auto const kExtensions = std::vector<unsigned char*>{
+auto const kExtensions = std::vector<const unsigned char*>{
     (unsigned char*)"GL_KHR_debug"  //
 };
 
@@ -60,7 +62,7 @@
 const unsigned char* mockGetStringi(GLenum name, GLuint index) {
   switch (name) {
     case GL_EXTENSIONS:
-      return kExtensions[index];
+      return g_extensions[index];
     default:
       return (unsigned char*)"";
   }
@@ -72,7 +74,7 @@
 void mockGetIntegerv(GLenum name, int* value) {
   switch (name) {
     case GL_NUM_EXTENSIONS: {
-      *value = kExtensions.size();
+      *value = g_extensions.size();
     } break;
     case GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS:
       *value = 8;
@@ -110,10 +112,12 @@
 static_assert(CheckSameSignature<decltype(mockPushDebugGroupKHR),  //
                                  decltype(glPushDebugGroupKHR)>::value);
 
-std::shared_ptr<MockGLES> MockGLES::Init() {
+std::shared_ptr<MockGLES> MockGLES::Init(
+    const std::optional<std::vector<const unsigned char*>>& extensions) {
   // If we cannot obtain a lock, MockGLES is already being used elsewhere.
   FML_CHECK(g_test_lock.try_lock())
       << "MockGLES is already being used by another test.";
+  g_extensions = extensions.value_or(kExtensions);
   auto mock_gles = std::shared_ptr<MockGLES>(new MockGLES());
   g_mock_gles = mock_gles;
   return mock_gles;
diff --git a/impeller/renderer/backend/gles/test/mock_gles.h b/impeller/renderer/backend/gles/test/mock_gles.h
index ec548ed..fb29715 100644
--- a/impeller/renderer/backend/gles/test/mock_gles.h
+++ b/impeller/renderer/backend/gles/test/mock_gles.h
@@ -3,6 +3,7 @@
 // found in the LICENSE file.
 
 #include <memory>
+#include <optional>
 #include "fml/macros.h"
 #include "impeller/renderer/backend/gles/proc_table_gles.h"
 
@@ -24,7 +25,9 @@
   /// This method overwrites mocked global GLES function pointers to record
   /// invocations on this instance of |MockGLES|. As such, it should only be
   /// called once per test.
-  static std::shared_ptr<MockGLES> Init();
+  static std::shared_ptr<MockGLES> Init(
+      const std::optional<std::vector<const unsigned char*>>& extensions =
+          std::nullopt);
 
   /// @brief      Returns a configured |ProcTableGLES| instance.
   const ProcTableGLES& GetProcTable() const { return proc_table_; }
diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json
index 3924493..3f20eb4 100644
--- a/impeller/tools/malioc.json
+++ b/impeller/tools/malioc.json
@@ -3820,7 +3820,7 @@
             "longest_path_cycles": [
               0.609375,
               0.609375,
-              0.4375,
+              0.46875,
               0.5,
               0.0,
               0.5,
@@ -3842,24 +3842,23 @@
             "shortest_path_cycles": [
               0.34375,
               0.34375,
-              0.3125,
-              0.1875,
+              0.265625,
+              0.0,
               0.0,
               0.25,
-              0.0
+              0.25
             ],
             "total_bound_pipelines": [
-              "arith_total",
-              "arith_fma"
+              "texture"
             ],
             "total_cycles": [
               0.609375,
               0.609375,
-              0.484375,
+              0.578125,
               0.5,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -3882,7 +3881,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              8.90999984741211,
+              9.569999694824219,
               2.0,
               2.0
             ],
@@ -3895,7 +3894,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              5.28000020980835,
+              4.949999809265137,
               1.0,
               0.0
             ],
@@ -3903,13 +3902,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              9.666666984558105,
+              11.0,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 3
         }
       }
@@ -3936,9 +3935,9 @@
               "arith_cvt"
             ],
             "longest_path_cycles": [
-              0.578125,
+              0.637499988079071,
               0.28125,
-              0.578125,
+              0.637499988079071,
               0.5625,
               0.0,
               0.5,
@@ -3958,26 +3957,25 @@
               "arith_cvt"
             ],
             "shortest_path_cycles": [
-              0.453125,
+              0.40625,
               0.25,
-              0.453125,
-              0.375,
+              0.40625,
+              0.1875,
               0.0,
               0.25,
-              0.0
+              0.25
             ],
             "total_bound_pipelines": [
-              "arith_total",
-              "arith_cvt"
+              "texture"
             ],
             "total_cycles": [
-              0.625,
+              0.75,
               0.28125,
-              0.625,
+              0.75,
               0.5625,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -4000,7 +3998,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              9.569999694824219,
+              10.229999542236328,
               2.0,
               2.0
             ],
@@ -4013,7 +4011,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              7.920000076293945,
+              7.590000152587891,
               1.0,
               0.0
             ],
@@ -4021,13 +4019,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              10.333333015441895,
+              11.666666984558105,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 2
         }
       }
@@ -4051,13 +4049,12 @@
           "performance": {
             "longest_path_bound_pipelines": [
               "arith_total",
-              "arith_cvt",
-              "arith_sfu"
+              "arith_cvt"
             ],
             "longest_path_cycles": [
-              0.5625,
+              0.625,
               0.25,
-              0.5625,
+              0.625,
               0.5625,
               0.0,
               0.5,
@@ -4077,26 +4074,25 @@
               "arith_cvt"
             ],
             "shortest_path_cycles": [
-              0.4375,
+              0.390625,
               0.21875,
-              0.4375,
-              0.375,
+              0.390625,
+              0.1875,
               0.0,
               0.25,
-              0.0
+              0.25
             ],
             "total_bound_pipelines": [
-              "arith_total",
-              "arith_cvt"
+              "texture"
             ],
             "total_cycles": [
-              0.609375,
+              0.737500011920929,
               0.25,
-              0.609375,
+              0.737500011920929,
               0.5625,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -4119,7 +4115,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              9.569999694824219,
+              10.229999542236328,
               2.0,
               2.0
             ],
@@ -4132,7 +4128,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              7.920000076293945,
+              7.590000152587891,
               1.0,
               0.0
             ],
@@ -4140,13 +4136,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              10.333333015441895,
+              11.666666984558105,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 2
         }
       }
@@ -4173,9 +4169,9 @@
               "texture"
             ],
             "longest_path_cycles": [
-              0.375,
+              0.40625,
               0.1875,
-              0.34375,
+              0.40625,
               0.375,
               0.0,
               0.5,
@@ -4191,29 +4187,29 @@
               "texture"
             ],
             "shortest_path_bound_pipelines": [
-              "varying"
-            ],
-            "shortest_path_cycles": [
-              0.21875,
-              0.15625,
-              0.21875,
-              0.1875,
-              0.0,
-              0.25,
-              0.0
-            ],
-            "total_bound_pipelines": [
               "varying",
               "texture"
             ],
+            "shortest_path_cycles": [
+              0.171875,
+              0.15625,
+              0.171875,
+              0.0,
+              0.0,
+              0.25,
+              0.25
+            ],
+            "total_bound_pipelines": [
+              "texture"
+            ],
             "total_cycles": [
-              0.390625,
+              0.515625,
               0.1875,
-              0.390625,
+              0.515625,
               0.375,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -4236,7 +4232,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              4.619999885559082,
+              5.28000020980835,
               2.0,
               2.0
             ],
@@ -4249,7 +4245,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              2.640000104904175,
+              2.309999942779541,
               1.0,
               0.0
             ],
@@ -4257,13 +4253,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              5.0,
+              6.333333492279053,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 2
         }
       }
@@ -4292,7 +4288,7 @@
             "longest_path_cycles": [
               0.375,
               0.234375,
-              0.3125,
+              0.375,
               0.375,
               0.0,
               0.5,
@@ -4308,29 +4304,29 @@
               "texture"
             ],
             "shortest_path_bound_pipelines": [
-              "varying"
+              "varying",
+              "texture"
             ],
             "shortest_path_cycles": [
               0.203125,
               0.203125,
-              0.1875,
-              0.1875,
+              0.140625,
+              0.0,
               0.0,
               0.25,
-              0.0
+              0.25
             ],
             "total_bound_pipelines": [
-              "varying",
               "texture"
             ],
             "total_cycles": [
-              0.375,
+              0.484375,
               0.234375,
-              0.359375,
+              0.484375,
               0.375,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -4353,7 +4349,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              4.949999809265137,
+              5.610000133514404,
               2.0,
               2.0
             ],
@@ -4366,7 +4362,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              2.9700000286102295,
+              2.640000104904175,
               1.0,
               0.0
             ],
@@ -4374,13 +4370,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              5.333333492279053,
+              6.666666507720947,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 2
         }
       }
@@ -4409,7 +4405,7 @@
             "longest_path_cycles": [
               0.375,
               0.28125,
-              0.3125,
+              0.375,
               0.375,
               0.0,
               0.5,
@@ -4427,29 +4423,29 @@
             "shortest_path_bound_pipelines": [
               "arith_total",
               "arith_fma",
-              "varying"
+              "varying",
+              "texture"
             ],
             "shortest_path_cycles": [
               0.25,
               0.25,
-              0.1875,
-              0.1875,
+              0.140625,
+              0.0,
               0.0,
               0.25,
-              0.0
+              0.25
             ],
             "total_bound_pipelines": [
-              "varying",
               "texture"
             ],
             "total_cycles": [
-              0.375,
+              0.484375,
               0.28125,
-              0.359375,
+              0.484375,
               0.375,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -4472,7 +4468,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              4.949999809265137,
+              5.610000133514404,
               2.0,
               2.0
             ],
@@ -4485,7 +4481,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              2.9700000286102295,
+              2.640000104904175,
               1.0,
               0.0
             ],
@@ -4493,13 +4489,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              5.333333492279053,
+              6.666666507720947,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 2
         }
       }
@@ -4528,7 +4524,7 @@
             "longest_path_cycles": [
               0.453125,
               0.453125,
-              0.359375,
+              0.421875,
               0.375,
               0.0,
               0.5,
@@ -4550,24 +4546,23 @@
             "shortest_path_cycles": [
               0.421875,
               0.421875,
-              0.234375,
               0.1875,
               0.0,
+              0.0,
               0.25,
-              0.0
+              0.25
             ],
             "total_bound_pipelines": [
-              "varying",
               "texture"
             ],
             "total_cycles": [
+              0.53125,
               0.453125,
-              0.453125,
-              0.40625,
+              0.53125,
               0.375,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -4590,7 +4585,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              5.940000057220459,
+              6.599999904632568,
               2.0,
               2.0
             ],
@@ -4603,7 +4598,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              4.289999961853027,
+              3.9600000381469727,
               1.0,
               0.0
             ],
@@ -4611,13 +4606,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              6.666666507720947,
+              8.0,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 4
         }
       }
@@ -4646,7 +4641,7 @@
             "longest_path_cycles": [
               0.71875,
               0.71875,
-              0.59375,
+              0.625,
               0.5625,
               0.0,
               0.5,
@@ -4666,26 +4661,25 @@
               "arith_cvt"
             ],
             "shortest_path_cycles": [
-              0.453125,
+              0.40625,
               0.34375,
-              0.453125,
-              0.1875,
+              0.40625,
+              0.0,
               0.0,
               0.25,
-              0.0
+              0.25
             ],
             "total_bound_pipelines": [
-              "arith_total",
-              "arith_fma"
+              "texture"
             ],
             "total_cycles": [
+              0.78125,
               0.71875,
-              0.71875,
-              0.6875,
+              0.78125,
               0.5625,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -4708,7 +4702,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              10.5600004196167,
+              11.220000267028809,
               2.0,
               2.0
             ],
@@ -4721,7 +4715,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              5.940000057220459,
+              5.610000133514404,
               1.0,
               0.0
             ],
@@ -4729,13 +4723,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              11.666666984558105,
+              13.0,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 3
         }
       }
@@ -4762,9 +4756,9 @@
               "texture"
             ],
             "longest_path_cycles": [
-              0.375,
+              0.40625,
               0.1875,
-              0.34375,
+              0.40625,
               0.375,
               0.0,
               0.5,
@@ -4780,29 +4774,29 @@
               "texture"
             ],
             "shortest_path_bound_pipelines": [
-              "varying"
-            ],
-            "shortest_path_cycles": [
-              0.21875,
-              0.15625,
-              0.21875,
-              0.1875,
-              0.0,
-              0.25,
-              0.0
-            ],
-            "total_bound_pipelines": [
               "varying",
               "texture"
             ],
+            "shortest_path_cycles": [
+              0.171875,
+              0.15625,
+              0.171875,
+              0.0,
+              0.0,
+              0.25,
+              0.25
+            ],
+            "total_bound_pipelines": [
+              "texture"
+            ],
             "total_cycles": [
-              0.390625,
+              0.515625,
               0.1875,
-              0.390625,
+              0.515625,
               0.375,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -4825,7 +4819,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              4.619999885559082,
+              5.28000020980835,
               2.0,
               2.0
             ],
@@ -4838,7 +4832,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              2.640000104904175,
+              2.309999942779541,
               1.0,
               0.0
             ],
@@ -4846,13 +4840,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              5.0,
+              6.333333492279053,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 2
         }
       }
@@ -4881,7 +4875,7 @@
             "longest_path_cycles": [
               0.609375,
               0.609375,
-              0.4375,
+              0.46875,
               0.5,
               0.0,
               0.5,
@@ -4903,24 +4897,23 @@
             "shortest_path_cycles": [
               0.34375,
               0.34375,
-              0.3125,
-              0.1875,
+              0.265625,
+              0.0,
               0.0,
               0.25,
-              0.0
+              0.25
             ],
             "total_bound_pipelines": [
-              "arith_total",
-              "arith_fma"
+              "texture"
             ],
             "total_cycles": [
               0.609375,
               0.609375,
-              0.484375,
+              0.578125,
               0.5,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -4943,7 +4936,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              8.90999984741211,
+              9.569999694824219,
               2.0,
               2.0
             ],
@@ -4956,7 +4949,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              5.28000020980835,
+              4.949999809265137,
               1.0,
               0.0
             ],
@@ -4964,13 +4957,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              9.666666984558105,
+              11.0,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 3
         }
       }
@@ -4999,7 +4992,7 @@
             "longest_path_cycles": [
               0.375,
               0.203125,
-              0.3125,
+              0.375,
               0.375,
               0.0,
               0.5,
@@ -5015,29 +5008,29 @@
               "texture"
             ],
             "shortest_path_bound_pipelines": [
-              "varying"
-            ],
-            "shortest_path_cycles": [
-              0.1875,
-              0.171875,
-              0.1875,
-              0.1875,
-              0.0,
-              0.25,
-              0.0
-            ],
-            "total_bound_pipelines": [
               "varying",
               "texture"
             ],
+            "shortest_path_cycles": [
+              0.171875,
+              0.171875,
+              0.140625,
+              0.0,
+              0.0,
+              0.25,
+              0.25
+            ],
+            "total_bound_pipelines": [
+              "texture"
+            ],
             "total_cycles": [
-              0.375,
+              0.484375,
               0.203125,
-              0.359375,
+              0.484375,
               0.375,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -5060,7 +5053,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              4.949999809265137,
+              5.610000133514404,
               2.0,
               2.0
             ],
@@ -5073,7 +5066,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              2.9700000286102295,
+              2.640000104904175,
               1.0,
               0.0
             ],
@@ -5081,13 +5074,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              5.333333492279053,
+              6.666666507720947,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 2
         }
       }
@@ -5116,7 +5109,7 @@
             "longest_path_cycles": [
               0.453125,
               0.453125,
-              0.375,
+              0.4375,
               0.375,
               0.0,
               0.5,
@@ -5138,24 +5131,23 @@
             "shortest_path_cycles": [
               0.421875,
               0.421875,
-              0.25,
-              0.1875,
+              0.203125,
+              0.0,
               0.0,
               0.25,
-              0.0
+              0.25
             ],
             "total_bound_pipelines": [
-              "varying",
               "texture"
             ],
             "total_cycles": [
+              0.546875,
               0.453125,
-              0.453125,
-              0.421875,
+              0.546875,
               0.375,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -5178,7 +5170,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              6.269999980926514,
+              6.929999828338623,
               2.0,
               2.0
             ],
@@ -5191,7 +5183,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              4.289999961853027,
+              3.9600000381469727,
               1.0,
               0.0
             ],
@@ -5199,13 +5191,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              6.666666507720947,
+              8.0,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 4
         }
       }
@@ -5234,7 +5226,7 @@
             "longest_path_cycles": [
               0.71875,
               0.71875,
-              0.59375,
+              0.625,
               0.5625,
               0.0,
               0.5,
@@ -5254,26 +5246,25 @@
               "arith_cvt"
             ],
             "shortest_path_cycles": [
-              0.453125,
+              0.40625,
               0.34375,
-              0.453125,
-              0.1875,
+              0.40625,
+              0.0,
               0.0,
               0.25,
-              0.0
+              0.25
             ],
             "total_bound_pipelines": [
-              "arith_total",
-              "arith_fma"
+              "texture"
             ],
             "total_cycles": [
+              0.78125,
               0.71875,
-              0.71875,
-              0.6875,
+              0.78125,
               0.5625,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -5296,7 +5287,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              10.890000343322754,
+              11.550000190734863,
               2.0,
               2.0
             ],
@@ -5309,7 +5300,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              5.940000057220459,
+              5.610000133514404,
               1.0,
               0.0
             ],
@@ -5317,13 +5308,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              11.666666984558105,
+              13.0,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 3
         }
       }
@@ -5352,7 +5343,7 @@
             "longest_path_cycles": [
               0.375,
               0.25,
-              0.3125,
+              0.375,
               0.375,
               0.0,
               0.5,
@@ -5368,29 +5359,29 @@
               "texture"
             ],
             "shortest_path_bound_pipelines": [
-              "varying"
+              "varying",
+              "texture"
             ],
             "shortest_path_cycles": [
               0.21875,
               0.21875,
-              0.1875,
-              0.1875,
+              0.140625,
+              0.0,
               0.0,
               0.25,
-              0.0
+              0.25
             ],
             "total_bound_pipelines": [
-              "varying",
               "texture"
             ],
             "total_cycles": [
-              0.375,
+              0.484375,
               0.25,
-              0.359375,
+              0.484375,
               0.375,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -5413,7 +5404,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              4.949999809265137,
+              5.610000133514404,
               2.0,
               2.0
             ],
@@ -5426,7 +5417,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              2.9700000286102295,
+              2.640000104904175,
               1.0,
               0.0
             ],
@@ -5434,13 +5425,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              5.333333492279053,
+              6.666666507720947,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 2
         }
       }
@@ -5469,7 +5460,7 @@
             "longest_path_cycles": [
               0.75,
               0.75,
-              0.515625,
+              0.578125,
               0.5625,
               0.0,
               0.5,
@@ -5491,24 +5482,23 @@
             "shortest_path_cycles": [
               0.71875,
               0.71875,
-              0.390625,
-              0.375,
+              0.34375,
+              0.1875,
               0.0,
               0.25,
-              0.0
+              0.25
             ],
             "total_bound_pipelines": [
-              "arith_total",
-              "arith_fma"
+              "texture"
             ],
             "total_cycles": [
               0.75,
               0.75,
-              0.5625,
+              0.6875,
               0.5625,
               0.0,
               0.5,
-              0.5
+              1.0
             ]
           },
           "stack_spill_bytes": 0,
@@ -5531,7 +5521,7 @@
               "arithmetic"
             ],
             "longest_path_cycles": [
-              8.90999984741211,
+              9.569999694824219,
               2.0,
               2.0
             ],
@@ -5544,7 +5534,7 @@
               "arithmetic"
             ],
             "shortest_path_cycles": [
-              6.929999828338623,
+              6.599999904632568,
               1.0,
               0.0
             ],
@@ -5552,13 +5542,13 @@
               "arithmetic"
             ],
             "total_cycles": [
-              9.333333015441895,
+              10.666666984558105,
               2.0,
-              2.0
+              4.0
             ]
           },
           "thread_occupancy": 100,
-          "uniform_registers_used": 1,
+          "uniform_registers_used": 2,
           "work_registers_used": 4
         }
       }
@@ -8354,17 +8344,16 @@
               0.0
             ],
             "total_bound_pipelines": [
-              "arith_total",
-              "arith_cvt"
+              "texture"
             ],
             "total_cycles": [
-              0.359375,
+              0.390625,
               0.078125,
-              0.359375,
+              0.390625,
               0.0625,
               0.0,
               0.25,
-              0.25
+              0.5
             ]
           },
           "stack_spill_bytes": 0,
@@ -8409,14 +8398,14 @@
               "arithmetic"
             ],
             "total_cycles": [
-              4.0,
+              4.666666507720947,
               1.0,
-              1.0
+              2.0
             ]
           },
           "thread_occupancy": 100,
           "uniform_registers_used": 1,
-          "work_registers_used": 4
+          "work_registers_used": 3
         }
       }
     }