Record & Query PrCheckRuns in luci when scheduling (#4054)

Reduces the need to search for PRs by check suite / check runs in a monorepo
diff --git a/app_dart/lib/src/model/firestore/pr_check_runs.dart b/app_dart/lib/src/model/firestore/pr_check_runs.dart
index 64b81dd..c423305 100644
--- a/app_dart/lib/src/model/firestore/pr_check_runs.dart
+++ b/app_dart/lib/src/model/firestore/pr_check_runs.dart
@@ -95,12 +95,13 @@
   }
 
   /// Retrieve the [PullRequest] for a given [checkRun] or throw an error.
-  static Future<PullRequest> findDocumentFor(
+  static Future<PullRequest> findPullRequestFor(
     FirestoreService firestoreService,
-    CheckRun checkRun,
+    int checkRunId,
+    String checkRunName,
   ) async {
     final filterMap = <String, Object>{
-      '${checkRun.name!} =': '${checkRun.id}',
+      '$checkRunName =': '$checkRunId',
     };
     log.info('findDocumentFor($filterMap): finding prCheckRuns document');
     final docs = await firestoreService.query(kCollectionId, filterMap);
diff --git a/app_dart/lib/src/service/luci_build_service.dart b/app_dart/lib/src/service/luci_build_service.dart
index 59e4513..9d56556 100644
--- a/app_dart/lib/src/service/luci_build_service.dart
+++ b/app_dart/lib/src/service/luci_build_service.dart
@@ -7,12 +7,15 @@
 import 'dart:typed_data';
 
 import 'package:cocoon_service/cocoon_service.dart';
+import 'package:cocoon_service/src/model/firestore/pr_check_runs.dart';
 import 'package:collection/collection.dart';
 import 'package:fixnum/fixnum.dart';
 import 'package:github/github.dart' as github;
 import 'package:github/hooks.dart';
 import 'package:googleapis/firestore/v1.dart' hide Status;
 import 'package:buildbucket/buildbucket_pb.dart' as bbv2;
+import 'package:github/github.dart';
+import 'package:meta/meta.dart';
 
 import '../foundation/github_checks_util.dart';
 import '../model/appengine/commit.dart';
@@ -38,6 +41,8 @@
     GithubChecksUtil? githubChecksUtil,
     GerritService? gerritService,
     this.pubsub = const PubSub(),
+    @visibleForTesting this.initializePrCheckRuns = PrCheckRuns.initializeDocument,
+    @visibleForTesting this.findPullRequestFor = PrCheckRuns.findPullRequestFor,
   })  : githubChecksUtil = githubChecksUtil ?? const GithubChecksUtil(),
         gerritService = gerritService ?? GerritService(config: config);
 
@@ -49,6 +54,18 @@
 
   final PubSub pubsub;
 
+  final Future<Document> Function({
+    required FirestoreService firestoreService,
+    required PullRequest pullRequest,
+    required List<CheckRun> checks,
+  }) initializePrCheckRuns;
+
+  final Future<PullRequest> Function(
+    FirestoreService firestoreService,
+    int checkRunId,
+    String checkRunName,
+  ) findPullRequestFor;
+
   static const Set<bbv2.Status> failStatusSet = <bbv2.Status>{
     bbv2.Status.CANCELED,
     bbv2.Status.FAILURE,
@@ -233,13 +250,15 @@
     String cipdVersion = 'refs/heads/${pullRequest.base!.ref!}';
     cipdVersion = branches.contains(cipdVersion) ? cipdVersion : config.defaultRecipeBundleRef;
 
+    final checkRuns = <github.CheckRun>[];
     for (Target target in targets) {
-      final github.CheckRun checkRun = await githubChecksUtil.createCheckRun(
+      final checkRun = await githubChecksUtil.createCheckRun(
         config,
         target.slug,
         sha,
         target.value.name,
       );
+      checkRuns.add(checkRun);
 
       final github.RepositorySlug slug = pullRequest.base!.repo!.slug();
 
@@ -296,6 +315,22 @@
       );
     }
 
+    // All check runs created, now record them in firestore so we can
+    // figure out which PR started what check run later (e.g. check_run completed).
+    try {
+      final firestore = await config.createFirestoreService();
+      final doc = await initializePrCheckRuns(
+        firestoreService: firestore,
+        pullRequest: pullRequest,
+        checks: checkRuns,
+      );
+      log.info('scheduleTryBuilds: created PrCheckRuns doc ${doc.name}');
+    } catch (e, s) {
+      // We are not going to block on this error. If we cannot find this document
+      // later, we'll fall back to the old github query method.
+      log.warning('scheduleTryBuilds: error creating PrCheckRuns doc', e, s);
+    }
+
     final Iterable<List<bbv2.BatchRequest_Request>> requestPartitions = await shard(
       requests: batchRequestList,
       maxShardSize: config.schedulingShardSize,
@@ -492,6 +527,26 @@
       githubCheckRun,
       detailsUrl: buildUrl,
     );
+
+    // Check run created, now record it in firestore so we can figure out which
+    // PR started what check run later (e.g. check_run completed).
+    try {
+      final firestore = await config.createFirestoreService();
+      // Find the original pull request.
+      final pullRequest = await findPullRequestFor(firestore, checkRunEvent.checkRun!.id!, checkName);
+
+      final doc = await initializePrCheckRuns(
+        firestoreService: firestore,
+        pullRequest: pullRequest,
+        checks: [githubCheckRun],
+      );
+      log.info('reschedulePresubmitBuildUsingCheckRunEvent: created PrCheckRuns doc ${doc.name}');
+    } catch (e, s) {
+      // We are not going to block on this error. If we cannot find this document
+      // later, we'll fall back to the old github query method.
+      log.warning('reschedulePresubmitBuildUsingCheckRunEvent: error creating PrCheckRuns doc', e, s);
+    }
+
     return scheduleBuild;
   }
 
@@ -928,6 +983,8 @@
     Target target,
     Map<String, dynamic> rawUserData,
   ) async {
+    // We are not tracking this check run in the PrCheckRuns firestore doc because
+    // there is no PR to track here.
     final github.CheckRun checkRun = await githubChecksUtil.createCheckRun(
       config,
       target.slug,
diff --git a/app_dart/lib/src/service/scheduler.dart b/app_dart/lib/src/service/scheduler.dart
index 28904eb..6d83efe 100644
--- a/app_dart/lib/src/service/scheduler.dart
+++ b/app_dart/lib/src/service/scheduler.dart
@@ -8,6 +8,7 @@
 
 import 'package:buildbucket/buildbucket_pb.dart' as bbv2;
 import 'package:cocoon_service/src/model/firestore/ci_staging.dart';
+import 'package:cocoon_service/src/model/firestore/pr_check_runs.dart';
 import 'package:cocoon_service/src/service/exceptions.dart';
 import 'package:cocoon_service/src/service/build_status_provider.dart';
 import 'package:cocoon_service/src/service/scheduler/policy.dart';
@@ -59,6 +60,7 @@
     this.buildStatusProvider = BuildStatusService.defaultProvider,
     @visibleForTesting this.markCheckRunConclusion = CiStaging.markConclusion,
     @visibleForTesting this.initializeCiStagingDocument = CiStaging.initializeDocument,
+    @visibleForTesting this.findPullRequestFor = PrCheckRuns.findPullRequestFor,
   });
 
   final BuildStatusServiceProvider buildStatusProvider;
@@ -90,6 +92,12 @@
     required String checkRunGuard,
   }) initializeCiStagingDocument;
 
+  final Future<PullRequest> Function(
+    FirestoreService firestoreService,
+    int checkRunId,
+    String checkRunName,
+  ) findPullRequestFor;
+
   /// Name of the subcache to store scheduler related values in redis.
   static const String subcacheName = 'scheduler';
 
@@ -888,13 +896,29 @@
 
     final checkRunGuard = CheckRun.fromJson(json.decode(conclusion.checkRunGuard!));
 
-    // We're in a pull request and the engine is fully built. We need to reverse look up the PR from the check suite,
-    // which sadly is not available in the check_run data. This could be cached at check_run creation time to avoid
-    // this cost.
-    final int checkSuiteId = checkRunEvent.checkRun!.checkSuite!.id!;
-    final PullRequest? pullRequest = await githubChecksService.findMatchingPullRequest(slug, sha, checkSuiteId);
+    // Look up the PR in our cache first. This reduces github quota and requires less calls.
+    PullRequest? pullRequest;
+    final id = checkRunEvent.checkRun!.id!;
+    final name = checkRunEvent.checkRun!.name!;
+    try {
+      pullRequest = await findPullRequestFor(
+        firestoreService,
+        id,
+        name,
+      );
+    } catch (e, s) {
+      log.warning('$logCrumb: unable to find PR in PrCheckRuns', e, s);
+    }
+
+    // We'va failed to find the pull request; try a reverse look it from the check suite.
     if (pullRequest == null) {
-      throw 'No PR found matching this check_run';
+      final int checkSuiteId = checkRunEvent.checkRun!.checkSuite!.id!;
+      pullRequest = await githubChecksService.findMatchingPullRequest(slug, sha, checkSuiteId);
+    }
+
+    // We cannot make any forward progress. Abandon all hope, Check runs who enter here.
+    if (pullRequest == null) {
+      throw 'No PR found matching this check_run($id, $name)';
     }
 
     Object? exception;
diff --git a/app_dart/test/model/firestore/pr_check_runs_test.dart b/app_dart/test/model/firestore/pr_check_runs_test.dart
index 77cc445..b806d1d 100644
--- a/app_dart/test/model/firestore/pr_check_runs_test.dart
+++ b/app_dart/test/model/firestore/pr_check_runs_test.dart
@@ -88,7 +88,7 @@
           ),
         ],
       );
-      final pr = await PrCheckRuns.findDocumentFor(firestoreService, generateCheckRun(1, name: 'testing tesing'));
+      final pr = await PrCheckRuns.findPullRequestFor(firestoreService, 1, 'testing tesing');
 
       final captured = verify(firestoreService.query(PrCheckRuns.kCollectionId, captureAny)).captured;
       expect(captured, [
diff --git a/app_dart/test/service/luci_build_service_test.dart b/app_dart/test/service/luci_build_service_test.dart
index 0ca72c3..0106169 100644
--- a/app_dart/test/service/luci_build_service_test.dart
+++ b/app_dart/test/service/luci_build_service_test.dart
@@ -38,7 +38,7 @@
   FakeGithubService githubService;
   late MockBuildBucketClient mockBuildBucketClient;
   late LuciBuildService service;
-  late MockGithubChecksUtil mockGithubChecksUtil = MockGithubChecksUtil();
+  MockGithubChecksUtil mockGithubChecksUtil = MockGithubChecksUtil();
   late FakePubSub pubsub;
 
   final List<Target> targets = <Target>[
@@ -310,10 +310,15 @@
   });
 
   group('scheduleBuilds', () {
+    late MockFirestoreService firestoreService;
+    late MockCallbacks callbacks;
+
     setUp(() {
+      firestoreService = MockFirestoreService();
+      callbacks = MockCallbacks();
       cache = CacheService(inMemory: true);
       githubService = FakeGithubService();
-      config = FakeConfig(githubService: githubService);
+      config = FakeConfig(githubService: githubService, firestoreService: firestoreService);
       mockBuildBucketClient = MockBuildBucketClient();
       mockGithubChecksUtil = MockGithubChecksUtil();
       pubsub = FakePubSub();
@@ -324,10 +329,20 @@
         githubChecksUtil: mockGithubChecksUtil,
         gerritService: FakeGerritService(branchesValue: <String>['master']),
         pubsub: pubsub,
+        initializePrCheckRuns: callbacks.initializePrCheckRuns,
       );
     });
 
     test('schedule try builds successfully', () async {
+      when(
+        callbacks.initializePrCheckRuns(
+          firestoreService: anyNamed('firestoreService'),
+          pullRequest: anyNamed('pullRequest'),
+          checks: anyNamed('checks'),
+        ),
+      ).thenAnswer((inv) async {
+        return Document(name: '1234-56-7890', fields: {});
+      });
       final PullRequest pullRequest = generatePullRequest();
       when(mockBuildBucketClient.batch(any)).thenAnswer((_) async {
         return bbv2.BatchResponse(
@@ -345,6 +360,18 @@
         targets: targets,
       );
 
+      final result = verify(
+        callbacks.initializePrCheckRuns(
+          firestoreService: anyNamed('firestoreService'),
+          pullRequest: argThat(equals(pullRequest), named: 'pullRequest'),
+          checks: captureAnyNamed('checks'),
+        ),
+      )..called(1);
+      final checkRuns = result.captured.first as List<CheckRun>;
+      expect(checkRuns, hasLength(1));
+      expect(checkRuns.first.id, 1);
+      expect(checkRuns.first.name, 'Linux 1');
+
       final Iterable<String> scheduledTargetNames = scheduledTargets.map((Target target) => target.value.name);
       expect(scheduledTargetNames, <String>['Linux 1']);
 
diff --git a/app_dart/test/service/scheduler_test.dart b/app_dart/test/service/scheduler_test.dart
index ac5e87e..d148dbf 100644
--- a/app_dart/test/service/scheduler_test.dart
+++ b/app_dart/test/service/scheduler_test.dart
@@ -1049,6 +1049,7 @@
                 generateCheckSuite(668083231),
               ],
             );
+
             httpClient = MockClient((http.Request request) async {
               if (request.url.path.endsWith('engine/src/flutter/.ci.yaml')) {
                 return http.Response(fusionCiYaml, 200);
@@ -1063,12 +1064,18 @@
               return [];
             });
 
+            final gitHubChecksService = MockGithubChecksService();
+            when(gitHubChecksService.githubChecksUtil).thenReturn(mockGithubChecksUtil);
+            when(gitHubChecksService.findMatchingPullRequest(any, any, any)).thenAnswer((inv) async {
+              return pullRequest;
+            });
+
             scheduler = Scheduler(
               cache: cache,
               config: config,
               datastoreProvider: (DatastoreDB db) => DatastoreService(db, 2),
               buildStatusProvider: (_, __) => buildStatusService,
-              githubChecksService: GithubChecksService(config, githubChecksUtil: mockGithubChecksUtil),
+              githubChecksService: gitHubChecksService,
               httpClientProvider: () => httpClient,
               luciBuildService: luci,
               fusionTester: fakeFusion,
@@ -1096,11 +1103,137 @@
             expect(
               await scheduler.processCheckRunCompletion(
                 cocoon_checks.CheckRunEvent.fromJson(
+                  json.decode(checkRunEventFor(test: 'Bar bar', sha: 'testSha')),
+                ),
+              ),
+              isTrue,
+            );
+
+            verify(gitHubChecksService.findMatchingPullRequest(Config.flauxSlug, 'testSha', 668083231)).called(1);
+
+            verify(
+              callbacks.markCheckRunConclusion(
+                firestoreService: argThat(isNotNull, named: 'firestoreService'),
+                slug: argThat(equals(Config.flauxSlug), named: 'slug'),
+                sha: 'testSha',
+                stage: argThat(equals(CiStage.fusionEngineBuild), named: 'stage'),
+                checkRun: argThat(equals('Bar bar'), named: 'checkRun'),
+                conclusion: argThat(equals('success'), named: 'conclusion'),
+              ),
+            ).called(1);
+
+            verify(
+              mockGithubChecksUtil.updateCheckRun(
+                any,
+                argThat(equals(RepositorySlug('flutter', 'flaux'))),
+                argThat(
+                  predicate<CheckRun>((arg) {
+                    expect(arg.name, 'GUARD TEST');
+                    return true;
+                  }),
+                ),
+                status: argThat(equals(CheckRunStatus.completed), named: 'status'),
+                conclusion: argThat(equals(CheckRunConclusion.success), named: 'conclusion'),
+                output: anyNamed('output'),
+              ),
+            ).called(1);
+
+            final result = verify(
+              luci.scheduleTryBuilds(
+                targets: captureAnyNamed('targets'),
+                pullRequest: captureAnyNamed('pullRequest'),
+              ),
+            );
+            expect(result.callCount, 1);
+            final captured = result.captured;
+            expect(captured[0], hasLength(2));
+            // see the blend of fusionCiYaml and singleCiYaml
+            expect(captured[0][0].getTestName, 'A');
+            expect(captured[0][1].getTestName, 'Z');
+            expect(captured[1], pullRequest);
+          });
+
+          test('schedules tests after engine stage - with pr caching', () async {
+            final githubService = config.githubService = MockGithubService();
+            final githubClient = MockGitHub();
+            when(githubService.github).thenReturn(githubClient);
+            when(githubService.searchIssuesAndPRs(any, any, sort: anyNamed('sort'), pages: anyNamed('pages')))
+                .thenAnswer((_) async => [generateIssue(42)]);
+
+            final pullRequest = generatePullRequest();
+            when(githubService.getPullRequest(any, any)).thenAnswer((_) async => pullRequest);
+            when(githubService.listFiles(any)).thenAnswer((_) async => ['abc/def']);
+            when(mockGithubChecksUtil.listCheckSuitesForRef(any, any, ref: anyNamed('ref'))).thenAnswer(
+              (_) async => [
+                // From check_run.check_suite.id in [checkRunString].
+                generateCheckSuite(668083231),
+              ],
+            );
+
+            when(callbacks.findPullRequestFor(any, any, any)).thenAnswer((inv) async {
+              return pullRequest;
+            });
+
+            httpClient = MockClient((http.Request request) async {
+              if (request.url.path.endsWith('engine/src/flutter/.ci.yaml')) {
+                return http.Response(fusionCiYaml, 200);
+              } else if (request.url.path.endsWith('.ci.yaml')) {
+                return http.Response(singleCiYaml, 200);
+              }
+              throw Exception('Failed to find ${request.url.path}');
+            });
+            final luci = MockLuciBuildService();
+            when(luci.scheduleTryBuilds(targets: anyNamed('targets'), pullRequest: anyNamed('pullRequest')))
+                .thenAnswer((inv) async {
+              return [];
+            });
+
+            final gitHubChecksService = MockGithubChecksService();
+            when(gitHubChecksService.githubChecksUtil).thenReturn(mockGithubChecksUtil);
+
+            scheduler = Scheduler(
+              cache: cache,
+              config: config,
+              datastoreProvider: (DatastoreDB db) => DatastoreService(db, 2),
+              buildStatusProvider: (_, __) => buildStatusService,
+              githubChecksService: gitHubChecksService,
+              httpClientProvider: () => httpClient,
+              luciBuildService: luci,
+              fusionTester: fakeFusion,
+              markCheckRunConclusion: callbacks.markCheckRunConclusion,
+              findPullRequestFor: callbacks.findPullRequestFor,
+            );
+
+            when(
+              callbacks.markCheckRunConclusion(
+                firestoreService: anyNamed('firestoreService'),
+                slug: anyNamed('slug'),
+                sha: anyNamed('sha'),
+                stage: anyNamed('stage'),
+                checkRun: anyNamed('checkRun'),
+                conclusion: anyNamed('conclusion'),
+              ),
+            ).thenAnswer((inv) async {
+              return StagingConclusion(
+                valid: true,
+                remaining: 0,
+                checkRunGuard: checkRunFor(name: 'GUARD TEST'),
+                failed: 0,
+              );
+            });
+
+            expect(
+              await scheduler.processCheckRunCompletion(
+                cocoon_checks.CheckRunEvent.fromJson(
                   json.decode(checkRunEventFor(test: 'Bar bar')),
                 ),
               ),
               isTrue,
             );
+
+            verify(callbacks.findPullRequestFor(mockFirestoreService, 1, 'Bar bar')).called(1);
+            verifyNever(gitHubChecksService.findMatchingPullRequest(any, any, any));
+
             verify(
               callbacks.markCheckRunConclusion(
                 firestoreService: argThat(isNotNull, named: 'firestoreService'),
diff --git a/app_dart/test/src/utilities/mocks.dart b/app_dart/test/src/utilities/mocks.dart
index 31bd947..6f7a16a 100644
--- a/app_dart/test/src/utilities/mocks.dart
+++ b/app_dart/test/src/utilities/mocks.dart
@@ -128,4 +128,18 @@
     required List<String> tasks,
     required String checkRunGuard,
   });
+
+  /// See [PrCheckRuns.initializeDocument]
+  Future<Document> initializePrCheckRuns({
+    required FirestoreService firestoreService,
+    required PullRequest pullRequest,
+    required List<CheckRun> checks,
+  });
+
+  /// See [PrCheckRuns.findPullRequestFor]
+  Future<PullRequest> findPullRequestFor(
+    FirestoreService firestoreService,
+    int checkRunId,
+    String checkRunName,
+  );
 }
diff --git a/app_dart/test/src/utilities/mocks.mocks.dart b/app_dart/test/src/utilities/mocks.mocks.dart
index 59e7592..bc4c124 100644
--- a/app_dart/test/src/utilities/mocks.mocks.dart
+++ b/app_dart/test/src/utilities/mocks.mocks.dart
@@ -3922,6 +3922,22 @@
       ) as _i20.Future<List<_i13.RepositoryCommit>>);
 
   @override
+  _i20.Future<bool> deleteBranch(
+    _i13.RepositorySlug? slug,
+    String? branchName,
+  ) =>
+      (super.noSuchMethod(
+        Invocation.method(
+          #deleteBranch,
+          [
+            slug,
+            branchName,
+          ],
+        ),
+        returnValue: _i20.Future<bool>.value(false),
+      ) as _i20.Future<bool>);
+
+  @override
   _i20.Future<List<_i13.PullRequest>> listPullRequests(
     _i13.RepositorySlug? slug,
     String? branch,
@@ -6564,6 +6580,28 @@
       ) as _i15.PubSub);
 
   @override
+  _i20.Future<_i21.Document> Function({
+    required List<_i13.CheckRun> checks,
+    required _i15.FirestoreService firestoreService,
+    required _i13.PullRequest pullRequest,
+  }) get initializePrCheckRuns => (super.noSuchMethod(
+        Invocation.getter(#initializePrCheckRuns),
+        returnValue: ({
+          required List<_i13.CheckRun> checks,
+          required _i15.FirestoreService firestoreService,
+          required _i13.PullRequest pullRequest,
+        }) =>
+            _i20.Future<_i21.Document>.value(_FakeDocument_25(
+          this,
+          Invocation.getter(#initializePrCheckRuns),
+        )),
+      ) as _i20.Future<_i21.Document> Function({
+        required List<_i13.CheckRun> checks,
+        required _i15.FirestoreService firestoreService,
+        required _i13.PullRequest pullRequest,
+      }));
+
+  @override
   _i20.Future<List<List<_i8.BatchRequest_Request>>> shard({
     required List<_i8.BatchRequest_Request>? requests,
     required int? maxShardSize,
@@ -9944,6 +9982,64 @@
           ),
         )),
       ) as _i20.Future<_i21.Document>);
+
+  @override
+  _i20.Future<_i21.Document> initializePrCheckRuns({
+    required _i15.FirestoreService? firestoreService,
+    required _i13.PullRequest? pullRequest,
+    required List<_i13.CheckRun>? checks,
+  }) =>
+      (super.noSuchMethod(
+        Invocation.method(
+          #initializePrCheckRuns,
+          [],
+          {
+            #firestoreService: firestoreService,
+            #pullRequest: pullRequest,
+            #checks: checks,
+          },
+        ),
+        returnValue: _i20.Future<_i21.Document>.value(_FakeDocument_25(
+          this,
+          Invocation.method(
+            #initializePrCheckRuns,
+            [],
+            {
+              #firestoreService: firestoreService,
+              #pullRequest: pullRequest,
+              #checks: checks,
+            },
+          ),
+        )),
+      ) as _i20.Future<_i21.Document>);
+
+  @override
+  _i20.Future<_i13.PullRequest> findPullRequestFor(
+    _i15.FirestoreService? firestoreService,
+    int? checkRunId,
+    String? checkRunName,
+  ) =>
+      (super.noSuchMethod(
+        Invocation.method(
+          #findPullRequestFor,
+          [
+            firestoreService,
+            checkRunId,
+            checkRunName,
+          ],
+        ),
+        returnValue: _i20.Future<_i13.PullRequest>.value(_FakePullRequest_41(
+          this,
+          Invocation.method(
+            #findPullRequestFor,
+            [
+              firestoreService,
+              checkRunId,
+              checkRunName,
+            ],
+          ),
+        )),
+      ) as _i20.Future<_i13.PullRequest>);
 }
 
 /// A class which mocks [Cache].