[Impeller] offload all text computation into vertex shader (#42417)
TextContents::Render occassionally shows up in the highest CPU functions. We can actually unload most of this computation into the vertex shader.
diff --git a/impeller/entity/contents/text_contents.cc b/impeller/entity/contents/text_contents.cc
index 09b6264..c4cd636 100644
--- a/impeller/entity/contents/text_contents.cc
+++ b/impeller/entity/contents/text_contents.cc
@@ -78,27 +78,32 @@
return bounds->TransformBounds(entity.GetTransformation());
}
-static bool CommonRender(
- const ContentContext& renderer,
- const Entity& entity,
- RenderPass& pass,
- const Color& color,
- const TextFrame& frame,
- Vector2 offset,
- std::shared_ptr<GlyphAtlas>
- atlas, // NOLINT(performance-unnecessary-value-param)
- Command& cmd) {
+static bool CommonRender(const ContentContext& renderer,
+ const Entity& entity,
+ RenderPass& pass,
+ const Color& color,
+ const TextFrame& frame,
+ Vector2 offset,
+ const std::shared_ptr<GlyphAtlas>& atlas,
+ Command& cmd) {
using VS = GlyphAtlasPipeline::VertexShader;
using FS = GlyphAtlasPipeline::FragmentShader;
// Common vertex uniforms for all glyphs.
VS::FrameInfo frame_info;
-
frame_info.mvp = Matrix::MakeOrthographic(pass.GetRenderTargetSize());
+ frame_info.atlas_size =
+ Vector2{static_cast<Scalar>(atlas->GetTexture()->GetSize().width),
+ static_cast<Scalar>(atlas->GetTexture()->GetSize().height)};
+ frame_info.offset = offset;
+ frame_info.is_translation_scale =
+ entity.GetTransformation().IsTranslationScaleOnly();
+ frame_info.entity_transform = entity.GetTransformation();
+
VS::BindFrameInfo(cmd, pass.GetTransientsBuffer().EmplaceUniform(frame_info));
SamplerDescriptor sampler_desc;
- if (entity.GetTransformation().IsTranslationScaleOnly()) {
+ if (frame_info.is_translation_scale) {
sampler_desc.min_filter = MinMagFilter::kNearest;
sampler_desc.mag_filter = MinMagFilter::kNearest;
} else {
@@ -152,12 +157,6 @@
index_offset += 4;
}
- auto atlas_size =
- Point{static_cast<Scalar>(atlas->GetTexture()->GetSize().width),
- static_cast<Scalar>(atlas->GetTexture()->GetSize().height)};
-
- Vector2 screen_offset = (entity.GetTransformation() * offset).Round();
-
for (const auto& run : frame.GetRuns()) {
const Font& font = run.GetFont();
@@ -168,40 +167,22 @@
VALIDATION_LOG << "Could not find glyph position in the atlas.";
return false;
}
-
- // For each glyph, we compute two rectangles. One for the vertex positions
- // and one for the texture coordinates (UVs).
-
- auto uv_origin =
- (atlas_glyph_bounds->origin - Point(0.5, 0.5)) / atlas_size;
- auto uv_size = (atlas_glyph_bounds->size + Size(1, 1)) / atlas_size;
-
- // Rounding here prevents most jitter between glyphs in the run when
- // nearest sampling.
- auto screen_glyph_position =
- screen_offset +
- (entity.GetTransformation().Basis() *
- (glyph_position.position + glyph_position.glyph.bounds.origin))
- .Round();
+ Vector4 atlas_glyph_bounds_vec = Vector4(
+ atlas_glyph_bounds->origin.x, atlas_glyph_bounds->origin.y,
+ atlas_glyph_bounds->size.width, atlas_glyph_bounds->size.height);
+ Vector4 glyph_bounds_vec =
+ Vector4(glyph_position.glyph.bounds.origin.x,
+ glyph_position.glyph.bounds.origin.y,
+ glyph_position.glyph.bounds.size.width,
+ glyph_position.glyph.bounds.size.height);
for (const auto& point : unit_points) {
- VS::PerVertexData vtx;
-
- if (entity.GetTransformation().IsTranslationScaleOnly()) {
- // Rouding up here prevents the bounds from becoming 1 pixel too small
- // when nearest sampling. This path breaks down for projections.
- vtx.position =
- screen_glyph_position + (entity.GetTransformation().Basis() *
- point * glyph_position.glyph.bounds.size)
- .Ceil();
- } else {
- vtx.position = entity.GetTransformation() *
- Vector4(offset + glyph_position.position +
- glyph_position.glyph.bounds.origin +
- point * glyph_position.glyph.bounds.size);
- }
- vtx.uv = uv_origin + point * uv_size;
- vertex_builder.AppendVertex(vtx);
+ vertex_builder.AppendVertex(VS::PerVertexData{
+ .atlas_glyph_bounds = atlas_glyph_bounds_vec,
+ .glyph_bounds = glyph_bounds_vec,
+ .unit_position = point,
+ .glyph_position = glyph_position.position,
+ });
}
}
}
@@ -209,11 +190,7 @@
vertex_builder.CreateVertexBuffer(pass.GetTransientsBuffer());
cmd.BindVertices(vertex_buffer);
- if (!pass.AddCommand(cmd)) {
- return false;
- }
-
- return true;
+ return pass.AddCommand(cmd);
}
bool TextContents::Render(const ContentContext& renderer,
diff --git a/impeller/entity/shaders/glyph_atlas.vert b/impeller/entity/shaders/glyph_atlas.vert
index 3cf2ce7..e2a7e04 100644
--- a/impeller/entity/shaders/glyph_atlas.vert
+++ b/impeller/entity/shaders/glyph_atlas.vert
@@ -7,15 +7,75 @@
uniform FrameInfo {
mat4 mvp;
+ mat4 entity_transform;
+ vec2 atlas_size;
+ vec2 offset;
+ float is_translation_scale;
}
frame_info;
-in highp vec4 position;
-in vec2 uv;
+// XYWH.
+in vec4 atlas_glyph_bounds;
+// XYWH
+in vec4 glyph_bounds;
+
+in vec2 unit_position;
+in vec2 glyph_position;
out vec2 v_uv;
+mat4 basis(mat4 m) {
+ return mat4(m[0][0], m[0][1], m[0][2], 0.0, //
+ m[1][0], m[1][1], m[1][2], 0.0, //
+ m[2][0], m[2][1], m[2][2], 0.0, //
+ 0.0, 0.0, 0.0, 1.0 //
+ );
+}
+
+vec2 project(mat4 m, vec2 v) {
+ float w = v.x * m[0][3] + v.y * m[1][3] + m[3][3];
+ vec2 result = vec2(v.x * m[0][0] + v.y * m[1][0] + m[3][0],
+ v.x * m[0][1] + v.y * m[1][1] + m[3][1]);
+
+ // This is Skia's behavior, but it may be reasonable to allow UB for the w=0
+ // case.
+ if (w != 0) {
+ w = 1 / w;
+ }
+ return result * w;
+}
+
void main() {
+ vec2 screen_offset =
+ round(project(frame_info.entity_transform, frame_info.offset));
+
+ // For each glyph, we compute two rectangles. One for the vertex positions
+ // and one for the texture coordinates (UVs).
+ vec2 uv_origin = (atlas_glyph_bounds.xy - vec2(0.5)) / frame_info.atlas_size;
+ vec2 uv_size = (atlas_glyph_bounds.zw + vec2(1)) / frame_info.atlas_size;
+
+ // Rounding here prevents most jitter between glyphs in the run when
+ // nearest sampling.
+ mat4 basis_transform = basis(frame_info.entity_transform);
+ vec2 screen_glyph_position =
+ screen_offset +
+ round(project(basis_transform, (glyph_position + glyph_bounds.xy)));
+
+ vec4 position;
+ if (frame_info.is_translation_scale == 1.0) {
+ // Rouding up here prevents the bounds from becoming 1 pixel too small
+ // when nearest sampling. This path breaks down for projections.
+ position = vec4(
+ screen_glyph_position +
+ ceil(project(basis_transform, unit_position * glyph_bounds.zw)),
+ 0.0, 1.0);
+ } else {
+ position = frame_info.entity_transform *
+ vec4(frame_info.offset + glyph_position + glyph_bounds.xy +
+ unit_position * glyph_bounds.zw,
+ 0.0, 1.0);
+ }
+
gl_Position = frame_info.mvp * position;
- v_uv = uv;
+ v_uv = uv_origin + unit_position * uv_size;
}
diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json
index 3697d3a..4d232a0 100644
--- a/impeller/tools/malioc.json
+++ b/impeller/tools/malioc.json
@@ -7552,22 +7552,22 @@
"Mali-G78": {
"core": "Mali-G78",
"filename": "flutter/impeller/entity/gles/glyph_atlas.vert.gles",
- "has_uniform_computation": false,
+ "has_uniform_computation": true,
"type": "Vertex",
"variants": {
"Position": {
- "fp16_arithmetic": 0,
+ "fp16_arithmetic": 96,
"has_stack_spilling": false,
"performance": {
"longest_path_bound_pipelines": [
"load_store"
],
"longest_path_cycles": [
- 0.265625,
- 0.265625,
+ 0.34375,
+ 0.34375,
+ 0.171875,
0.0,
- 0.0,
- 2.0,
+ 4.0,
0.0
],
"pipelines": [
@@ -7582,43 +7582,43 @@
"load_store"
],
"shortest_path_cycles": [
- 0.265625,
- 0.265625,
+ 0.25,
+ 0.25,
+ 0.078125,
0.0,
- 0.0,
- 2.0,
+ 4.0,
0.0
],
"total_bound_pipelines": [
"load_store"
],
"total_cycles": [
- 0.265625,
- 0.265625,
+ 0.453125,
+ 0.453125,
+ 0.1875,
0.0,
- 0.0,
- 2.0,
+ 4.0,
0.0
]
},
"stack_spill_bytes": 0,
"thread_occupancy": 100,
- "uniform_registers_used": 16,
+ "uniform_registers_used": 26,
"work_registers_used": 32
},
"Varying": {
- "fp16_arithmetic": null,
+ "fp16_arithmetic": 100,
"has_stack_spilling": false,
"performance": {
"longest_path_bound_pipelines": [
"load_store"
],
"longest_path_cycles": [
+ 0.078125,
+ 0.078125,
0.0,
0.0,
- 0.0,
- 0.0,
- 3.0,
+ 4.0,
0.0
],
"pipelines": [
@@ -7633,36 +7633,36 @@
"load_store"
],
"shortest_path_cycles": [
+ 0.078125,
+ 0.078125,
0.0,
0.0,
- 0.0,
- 0.0,
- 3.0,
+ 4.0,
0.0
],
"total_bound_pipelines": [
"load_store"
],
"total_cycles": [
+ 0.078125,
+ 0.078125,
0.0,
0.0,
- 0.0,
- 0.0,
- 3.0,
+ 4.0,
0.0
]
},
"stack_spill_bytes": 0,
"thread_occupancy": 100,
- "uniform_registers_used": 8,
- "work_registers_used": 6
+ "uniform_registers_used": 12,
+ "work_registers_used": 9
}
}
},
"Mali-T880": {
"core": "Mali-T880",
"filename": "flutter/impeller/entity/gles/glyph_atlas.vert.gles",
- "has_uniform_computation": false,
+ "has_uniform_computation": true,
"type": "Vertex",
"variants": {
"Main": {
@@ -7672,8 +7672,8 @@
"load_store"
],
"longest_path_cycles": [
- 2.9700000286102295,
- 5.0,
+ 6.929999828338623,
+ 7.0,
0.0
],
"pipelines": [
@@ -7685,21 +7685,21 @@
"load_store"
],
"shortest_path_cycles": [
- 2.9700000286102295,
- 5.0,
+ 5.940000057220459,
+ 7.0,
0.0
],
"total_bound_pipelines": [
- "load_store"
+ "arithmetic"
],
"total_cycles": [
- 3.0,
- 5.0,
+ 9.0,
+ 7.0,
0.0
]
},
"thread_occupancy": 100,
- "uniform_registers_used": 4,
+ "uniform_registers_used": 7,
"work_registers_used": 2
}
}
@@ -10963,18 +10963,18 @@
"type": "Vertex",
"variants": {
"Position": {
- "fp16_arithmetic": 0,
+ "fp16_arithmetic": 100,
"has_stack_spilling": false,
"performance": {
"longest_path_bound_pipelines": [
"load_store"
],
"longest_path_cycles": [
- 0.25,
- 0.25,
+ 0.3125,
+ 0.3125,
+ 0.15625,
0.0,
- 0.0,
- 2.0,
+ 4.0,
0.0
],
"pipelines": [
@@ -10991,41 +10991,41 @@
"shortest_path_cycles": [
0.25,
0.25,
+ 0.09375,
0.0,
- 0.0,
- 2.0,
+ 4.0,
0.0
],
"total_bound_pipelines": [
"load_store"
],
"total_cycles": [
- 0.25,
- 0.25,
+ 0.4375,
+ 0.4375,
+ 0.171875,
0.0,
- 0.0,
- 2.0,
+ 4.0,
0.0
]
},
"stack_spill_bytes": 0,
"thread_occupancy": 100,
- "uniform_registers_used": 24,
+ "uniform_registers_used": 34,
"work_registers_used": 32
},
"Varying": {
- "fp16_arithmetic": null,
+ "fp16_arithmetic": 100,
"has_stack_spilling": false,
"performance": {
"longest_path_bound_pipelines": [
"load_store"
],
"longest_path_cycles": [
+ 0.09375,
+ 0.09375,
0.0,
0.0,
- 0.0,
- 0.0,
- 3.0,
+ 4.0,
0.0
],
"pipelines": [
@@ -11040,29 +11040,29 @@
"load_store"
],
"shortest_path_cycles": [
+ 0.09375,
+ 0.09375,
0.0,
0.0,
- 0.0,
- 0.0,
- 3.0,
+ 4.0,
0.0
],
"total_bound_pipelines": [
"load_store"
],
"total_cycles": [
+ 0.09375,
+ 0.09375,
0.0,
0.0,
- 0.0,
- 0.0,
- 3.0,
+ 4.0,
0.0
]
},
"stack_spill_bytes": 0,
"thread_occupancy": 100,
- "uniform_registers_used": 24,
- "work_registers_used": 6
+ "uniform_registers_used": 30,
+ "work_registers_used": 9
}
}
}