Call focus on input after detecting a tap (#37863)

* fix tap detection and call focus

* addressed comments
diff --git a/lib/web_ui/lib/src/engine/semantics/text_field.dart b/lib/web_ui/lib/src/engine/semantics/text_field.dart
index a71f55b..3e36ffb 100644
--- a/lib/web_ui/lib/src/engine/semantics/text_field.dart
+++ b/lib/web_ui/lib/src/engine/semantics/text_field.dart
@@ -288,24 +288,24 @@
       _initializeForBlink();
       return;
     }
-    num? lastTouchStartOffsetX;
-    num? lastTouchStartOffsetY;
+    num? lastPointerDownOffsetX;
+    num? lastPointerDownOffsetY;
 
-    editableElement.addEventListener('touchstart',
+    editableElement.addEventListener('pointerdown',
         allowInterop((DomEvent event) {
-          final DomTouchEvent touchEvent = event as DomTouchEvent;
-          lastTouchStartOffsetX = touchEvent.changedTouches!.last.clientX;
-          lastTouchStartOffsetY = touchEvent.changedTouches!.last.clientY;
+          final DomPointerEvent pointerEvent = event as DomPointerEvent;
+          lastPointerDownOffsetX = pointerEvent.clientX;
+          lastPointerDownOffsetY = pointerEvent.clientY;
         }), true);
 
     editableElement.addEventListener(
-        'touchend', allowInterop((DomEvent event) {
-      final DomTouchEvent touchEvent = event as DomTouchEvent;
+        'pointerup', allowInterop((DomEvent event) {
+      final DomPointerEvent pointerEvent = event as DomPointerEvent;
 
-      if (lastTouchStartOffsetX != null) {
-        assert(lastTouchStartOffsetY != null);
-        final num offsetX = touchEvent.changedTouches!.last.clientX;
-        final num offsetY = touchEvent.changedTouches!.last.clientY;
+      if (lastPointerDownOffsetX != null) {
+        assert(lastPointerDownOffsetY != null);
+        final num deltaX = pointerEvent.clientX - lastPointerDownOffsetX!;
+        final num deltaY = pointerEvent.clientY - lastPointerDownOffsetY!;
 
         // This should match the similar constant defined in:
         //
@@ -314,17 +314,30 @@
         // The value is pre-squared so we have to do less math at runtime.
         const double kTouchSlop = 18.0 * 18.0; // Logical pixels squared
 
-        if (offsetX * offsetX + offsetY * offsetY < kTouchSlop) {
+        if (deltaX * deltaX + deltaY * deltaY < kTouchSlop) {
           // Recognize it as a tap that requires a keyboard.
           EnginePlatformDispatcher.instance.invokeOnSemanticsAction(
               semanticsObject.id, ui.SemanticsAction.tap, null);
+
+          // We need to call focus for the following scenario:
+          // 1. The virtial keyboard in iOS gets dismissed by the 'Done' button
+          // located at the top right of the keyboard.
+          // 2. The user tries to focus on the input field again, either by
+          // VoiceOver or manually, but the keyboard does not show up.
+          //
+          // In this scenario, the Flutter framework does not send a semantic update,
+          // so we need to call focus after detecting a tap to make sure that the
+          // virtual keyboard will show.
+          if (semanticsObject.hasFocus) {
+            editableElement.focus();
+          }
         }
       } else {
-        assert(lastTouchStartOffsetY == null);
+        assert(lastPointerDownOffsetY == null);
       }
 
-      lastTouchStartOffsetX = null;
-      lastTouchStartOffsetY = null;
+      lastPointerDownOffsetX = null;
+      lastPointerDownOffsetY = null;
     }), true);
   }
 
diff --git a/lib/web_ui/test/engine/semantics/text_field_test.dart b/lib/web_ui/test/engine/semantics/text_field_test.dart
index 879a3e2..91129dc 100644
--- a/lib/web_ui/test/engine/semantics/text_field_test.dart
+++ b/lib/web_ui/test/engine/semantics/text_field_test.dart
@@ -63,6 +63,43 @@
     semantics().semanticsEnabled = false;
   });
 
+  test('tap detection works', () async {
+    debugBrowserEngineOverride = BrowserEngine.webkit;
+    debugOperatingSystemOverride = OperatingSystem.iOs;
+
+    final SemanticsActionLogger logger = SemanticsActionLogger();
+    semantics()
+      ..debugOverrideTimestampFunction(() => _testTime)
+      ..semanticsEnabled = true;
+
+    createTextFieldSemantics(value: 'hello');
+
+    final DomElement textField = appHostNode
+        .querySelector('input[data-semantics-role="text-field"]')!;
+
+    textField.dispatchEvent(createDomPointerEvent(
+        'pointerdown',
+        <Object?, Object?>{
+          'clientX': 25,
+          'clientY': 48,
+        },
+      ));
+    textField.dispatchEvent(createDomPointerEvent(
+        'pointerup',
+        <Object?, Object?>{
+          'clientX': 26,
+          'clientY': 48,
+        },
+      ));
+
+    expect(await logger.idLog.first, 0);
+    expect(await logger.actionLog.first, ui.SemanticsAction.tap);
+
+    semantics().semanticsEnabled = false;
+    debugBrowserEngineOverride = null;
+    debugOperatingSystemOverride = null;
+  });
+
   // TODO(yjbanov): this test will need to be adjusted for Safari when we add
   //                Safari testing.
   test('sends a tap action when browser requests focus', () async {