| # Copyright 2021 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import attr |
| import json |
| import collections |
| |
| from google.protobuf import duration_pb2 |
| from recipe_engine import recipe_api |
| from recipe_engine import engine_types |
| from PB.go.chromium.org.luci.buildbucket.proto import common as common_pb2 |
| from PB.go.chromium.org.luci.buildbucket.proto import build as build_pb2 |
| from RECIPE_MODULES.fuchsia.utils import pluralize |
| |
| # Builder names use full platform name instead of short names. We need to |
| # map short names to full platform names to be able to identify the drone |
| # used to run the subshards. |
| PLATFORM_TO_NAME = {'os=win': 'Windows', 'os=lin': 'Linux', 'os=mac': 'Mac'} |
| |
| # Characters to use from the build configuration os dimension. |
| OS_DIM_CHAR_SIZE = 6 |
| |
| # Monorepo builder names use short names |
| NAME_TO_PLATFORM = {'Windows': 'win', 'Linux': 'linux', 'Mac': 'mac'} |
| |
| # Internal properties that should be set for builds running on BuildBucket. |
| PROPERTIES_TO_REMOVE = [ |
| '$recipe_engine/buildbucket', 'buildername', '$recipe_engine/runtime', |
| 'is_experimental' |
| ] |
| |
| # Environments map to calculate the environment from the bucket. |
| ENVIRONMENTS_MAP = { |
| 'try': '', 'staging': 'Staging ', 'flutter': 'Production ', |
| 'prod': 'Production ' |
| } |
| |
| |
| @attr.s |
| class SubbuildResult(object): |
| """Subbuild result metadata.""" |
| # Task name for led and "<Platform> <Environment> Drone" for buildbucket. |
| builder = attr.ib(type=str) |
| build_id = attr.ib(type=str) |
| # Task name for both led and buildbucket. |
| build_name = attr.ib(type=str) |
| url = attr.ib(type=str, default=None) |
| build_proto = attr.ib(type=build_pb2.Build, default=None) |
| |
| |
| class ShardUtilApi(recipe_api.RecipeApi): |
| """Utilities to shard tasks.""" |
| |
| def unfreeze_dict(self, dictionary): |
| """Creates a mutable dictionary out of a FrozenDict. |
| |
| FrozenDict example: |
| FrozenDict([('dependency', 'open_jdk'), ('version', 'version:11')]) |
| , which is not a default python type. |
| |
| This refactors it to regular dict: |
| {'dependency': 'open_jdk', 'version': 'version:11'} |
| """ |
| result = collections.OrderedDict() |
| for k, v in sorted(dictionary.items()): |
| if isinstance(v, engine_types.FrozenDict): |
| result[k] = self.unfreeze_dict(v) |
| elif isinstance(v, (list, tuple)): |
| result[k] = [ |
| self.unfreeze_dict(i) |
| if isinstance(i, engine_types.FrozenDict) else i for i in v |
| ] |
| else: |
| result[k] = v |
| return result |
| |
| def get_base_bucket_name(self): |
| """Gets the base bucket name current build is running against. |
| |
| Subbuilds triggered by an orchestrator build are appending `.shadow` to the |
| bucket. Trim the suffix to make the bucket name consistent for LED runs. |
| """ |
| bucket = self.m.buildbucket.build.builder.bucket |
| return bucket.replace('.shadow', '') |
| |
| def pre_process_properties(self, target): |
| """Converts json properties to dicts or lists. |
| |
| Dict or lists in ci_yaml are passed as json string to recipes and they |
| need to be converted back to dict or lists before passing them to subbuilds. |
| |
| Args: |
| target: A target dictionary as read from the yaml file. |
| |
| Returns: |
| A copy of the original dictionary with the json properties decoded. |
| """ |
| if target.get('properties'): |
| properties = target.get('properties') |
| new_props = {} |
| for k, v in properties.items(): |
| if isinstance(v, str) and (v.startswith('[') or v.startswith('{')): |
| new_props[k] = json.loads(v) |
| else: |
| new_props[k] = v |
| target['properties'] = new_props |
| return target |
| |
| def struct_to_dict(self, struct): |
| """Transforms a proto structure to a dictionary. |
| |
| Args: |
| struct: A proto structure. |
| Returns: |
| A dictionary representation of the proto structure. |
| |
| This is because the proto structures can not be passed to the BuildBucket or led |
| requests. |
| """ |
| return collections.OrderedDict((k, v) for k, v in struct.items()) |
| |
| def schedule_builds(self, builds, presentation, branch='main'): |
| """Schedule builds using the builds configurations. |
| |
| Args: |
| builds(dict): The build configurations to be passed to BuildBucket or led. |
| presentation(StepPresentation): The step object used to add links and/or logs. |
| branch(String): The current branch name. |
| Returns: |
| A dictionary with a long build_id as key and SubbuildResult as value. |
| """ |
| # Update build with default recipe. |
| updated_builds = [] |
| for b in builds: |
| build = self.unfreeze_dict(b) |
| build['recipe'] = build.get('recipe') or 'engine_v2/builder' |
| updated_builds.append(build) |
| return self.schedule(updated_builds, presentation, branch=branch) |
| |
| def schedule_tests(self, tests, build_results, presentation): |
| """Schedule tests using build_results for dependencies. |
| |
| Args: |
| tests(dict): The test configurations to be passed to BuildBucket or led. |
| build_results: A dictionary with a long build_id as key and SubbuildResult as value. |
| presentation(StepPresentation): The step object used to add links and/or logs. |
| Returns: |
| A dictionary with a long build_id as key and SubbuildResult as value. |
| """ |
| # Expand tests with result archives for dependencies. |
| results_map = {b.build_name: b for k, b in build_results.items()} |
| # build_results to map of builder name |
| updated_tests = [] |
| for t in tests: |
| test = self.unfreeze_dict(t) |
| test['resolved_deps'] = [] |
| test['recipe'] = test.get('recipe') or 'engine_v2/tester' |
| for dep in test.get('dependencies', []): |
| dep_dict = self.struct_to_dict( |
| results_map[dep].build_proto.output.properties['cas_output_hash'] |
| ) |
| test['resolved_deps'].append(dep_dict) |
| updated_tests.append(test) |
| return self.schedule(updated_tests, presentation) |
| |
| def schedule(self, builds, presentation, branch='main'): |
| """Schedules one subbuild per build configuration. |
| |
| Args: |
| builds(dict): The build/test configurations to be passed to BuildBucket or led. |
| presentation(StepPresentation): The step object used to add links and/or logs. |
| branch(String): The current branch name. |
| Returns: |
| A dictionary with a long build_id as key and SubbuildResult as value. |
| """ |
| build_list = [self.unfreeze_dict(b) for b in builds] |
| if self.m.led.launched_by_led: |
| builds = self._schedule_with_led(build_list) |
| else: |
| builds = self._schedule_with_bb(build_list, branch=branch) |
| return builds |
| |
| def _schedule_with_led(self, builds): |
| """Schedules one subbuild per build using led. |
| |
| Args: |
| builds(dict): The build/test configurations to be passed to BuildBucket or led. |
| Returns: |
| A dictionary with a long build_id as key and SubbuildResult as value. |
| """ |
| # Dependencies get here as a frozen dict we need to force them back |
| # to list of dicts. |
| results = {} |
| for build in builds: |
| task_name = build.get('name') |
| drone_properties = self.m.properties.thaw() |
| # Do not propagate main build deps. |
| drone_properties.pop('dependencies', None) |
| drone_properties.update(build.get('properties', [])) |
| drone_properties['build'] = build |
| drone_properties['gclient_variables'] = build.get('gclient_variables', {}) |
| drone_properties['task_name'] = task_name |
| # Delete builds property if it exists. |
| drone_properties.pop('builds', None) |
| # Copy parent bot dimensions. |
| drone_dimensions = build.get('drone_dimensions', []) |
| # ci.yaml provided dimensions. |
| ci_yaml_dimensions = build.get('dimensions', {}) |
| platform_name = build.get('platform') or PLATFORM_TO_NAME.get( |
| self.m.platform.name |
| ) |
| |
| # Buildbucket properties are not propagated to sub-builds when running with |
| # led. Copy the properties bb gitiles_commit to git_ref and git_url if not |
| # set already. Try jobs from a Gerrit CL have an empty gitiles_commit, they |
| # are handled in a later 'led edit-cr-cl' step. |
| if not (drone_properties.get('git_ref') or |
| drone_properties.get('git_url')): |
| host = self.m.buildbucket.gitiles_commit.host |
| project = self.m.buildbucket.gitiles_commit.project |
| if host: |
| drone_properties['git_url'] = f'https://{host}/{project}' |
| drone_properties['git_ref'] = self.m.buildbucket.gitiles_commit.id |
| |
| # Override recipe. |
| drone_properties['recipe'] = build['recipe'] |
| # Pass try build identifier to subbuilds |
| if self.m.monorepo.is_monorepo_try_build: |
| drone_properties['try_build_identifier' |
| ] = self.m.monorepo.try_build_identifier |
| builder_name, bucket = self._drone_name(build) |
| parent = self.m.buildbucket.build |
| led_data = self.m.led( |
| 'get-builder', |
| '-real-build', |
| '%s/%s/%s' % (parent.builder.project, bucket, builder_name), |
| ) |
| edit_args = [] |
| for k, v in sorted(drone_properties.items()): |
| if k in PROPERTIES_TO_REMOVE: |
| continue |
| edit_args.extend(['-p', '%s=%s' % (k, self.m.json.dumps(v))]) |
| # led reduces the priority of tasks by 10 from their values in |
| # buildbucket which we do not want. |
| # TODO(crbug.com/1138533) Add an option to led to handle this. |
| led_data.result.buildbucket.bbagent_args.build.infra.backend.config[ |
| 'priority'] -= 20 |
| led_data = led_data.then('edit', *edit_args) |
| led_data = led_data.then('edit', '-name', task_name) |
| led_data = led_data.then('edit', '-r', build['recipe']) |
| # Create a single dict of dimensions giving priority to drone dimensions |
| # and removing duplicates. |
| final_dimensions = {} |
| for k, v in ci_yaml_dimensions.items(): |
| final_dimensions[k] = v |
| for d in drone_dimensions: |
| k, v = d.split('=') |
| final_dimensions[k] = v |
| for k, v in final_dimensions.items(): |
| led_data = led_data.then('edit', '-d', '%s=%s' % (k, v)) |
| led_data = self.m.led.inject_input_recipes(led_data) |
| if parent.input.gerrit_changes: |
| change = parent.input.gerrit_changes[0] |
| project_url = f'https://{change.host}/c/{change.project}' |
| change_url = f'{project_url}/+/{change.change}/{change.patchset}' |
| led_data = led_data.then('edit-cr-cl', change_url) |
| launch_res = led_data.then('launch', '-modernize', '-real-build') |
| # real-build is being used and only build_id is being populated |
| task_id = launch_res.launch_result.task_id or launch_res.launch_result.build_id |
| build_url_swarming = 'https://ci.chromium.org/swarming/task/%s?server=%s' % ( |
| task_id, |
| launch_res.launch_result.swarming_hostname, |
| ) |
| build_url_bb = 'https://%s/build/%s' % ( |
| launch_res.launch_result.buildbucket_hostname, task_id |
| ) |
| build_url = build_url_swarming if launch_res.launch_result.task_id else build_url_bb |
| results[task_name] = SubbuildResult( |
| builder=task_name, |
| build_id=task_id, |
| url=build_url, |
| build_name=task_name |
| ) |
| return results |
| |
| def _drone_name(self, build): |
| """Calculates the drone name to use for a build. |
| |
| Args: |
| build: A build configuration dictionary. |
| """ |
| dimensions = build.get('drone_dimensions', []) |
| task_name = build.get('name') |
| platform_name = '' |
| for d in dimensions: |
| if d.startswith('os='): |
| platform_name = PLATFORM_TO_NAME.get((d[:OS_DIM_CHAR_SIZE]).lower()) |
| break |
| platform_name = build.get('platform') or platform_name |
| platform = NAME_TO_PLATFORM.get(platform_name) |
| |
| if self.m.monorepo.is_monorepo_ci_build: |
| bucket = 'ci.sandbox' |
| builder_name = f'flutter-{platform}-{task_name}' |
| elif self.m.monorepo.is_monorepo_try_build: |
| bucket = 'try.monorepo' |
| builder_name = f'flutter-{platform}-{task_name}-try' |
| else: |
| # If this is an led real build, then the bucket will be the shadow bucket |
| # and getting a builder from the shadow bucket doesn't work, so get the |
| # builder from the shadowed bucket |
| bucket = ( |
| self.m.led.shadowed_bucket or self.m.buildbucket.build.builder.bucket |
| ) |
| environment = ENVIRONMENTS_MAP.get(bucket, '') |
| builder_name = f'{platform_name} {environment}Engine Drone' |
| return builder_name, bucket |
| |
| def _schedule_with_bb(self, builds, branch='main'): |
| """Schedules builds using builbbucket. |
| |
| Args: |
| builds(dict): The build/test configurations to be passed to BuildBucket or led. |
| branch(String): The current branch name. |
| Returns: |
| A dictionary with a long build_id as key and SubbuildResult as value. |
| """ |
| swarming_parent_run_id = self.m.swarming.task_id |
| reqs = [] |
| task_names = [] |
| for build in builds: |
| task_name = build.get('name') |
| drone_properties = self.m.properties.thaw() |
| # Do not propagate main build deps. |
| drone_properties.pop('dependencies', None) |
| drone_properties.update(build.get('properties', [])) |
| drone_properties['build'] = build |
| drone_properties['gclient_variables'] = build.get('gclient_variables', {}) |
| # Copy parent bot dimensions. |
| drone_dimensions = build.get('drone_dimensions', []) |
| # ci.yaml provided dimensions. |
| ci_yaml_dimensions = build.get('dimensions', {}) |
| task_dimensions = [] |
| builder_name, bucket = self._drone_name(build) |
| # Delete builds property if it exists. |
| drone_properties.pop('builds', None) |
| |
| # Create a single dict of dimensions giving priority to drone dimensions |
| # and removing duplicates. |
| final_dimensions = {} |
| for k, v in ci_yaml_dimensions.items(): |
| final_dimensions[k] = v |
| for d in drone_dimensions: |
| k, v = d.split('=') |
| final_dimensions[k] = v |
| for k, v in final_dimensions.items(): |
| task_dimensions.append(common_pb2.RequestedDimension(key=k, value=v)) |
| # Override recipe. |
| drone_properties['recipe'] = build['recipe'] |
| # Pass try build identifier to subbuilds |
| if self.m.monorepo.is_monorepo_try_build: |
| drone_properties['try_build_identifier' |
| ] = self.m.monorepo.try_build_identifier |
| properties = collections.OrderedDict( |
| (key, val) |
| for key, val in sorted(drone_properties.items()) |
| if key not in PROPERTIES_TO_REMOVE |
| ) |
| task_names.append(task_name) |
| req = self.m.buildbucket.schedule_request( |
| swarming_parent_run_id=self.m.swarming.task_id, |
| bucket=bucket, |
| builder=builder_name, |
| properties=properties, |
| dimensions=task_dimensions or None, |
| # Having main build and subbuilds with the same priority can lead |
| # to a deadlock situation when there are limited resources. For example |
| # if we have only 7 mac bots and we get more than 7 new build requests the |
| # within minutes of each other then the 7 bots will be used by main tasks |
| # and they will all timeout waiting for resources to run subbuilds. |
| # Increasing priority won't fix the problem but will make the deadlock |
| # situation less unlikely. |
| # https://github.com/flutter/flutter/issues/59169. |
| # |
| # Set priority to be same of main build temporily to help triage |
| # https://github.com/flutter/flutter/issues/124155 |
| priority=30, |
| exe_cipd_version=self.m.properties.get( |
| 'exe_cipd_version', 'refs/heads/%s' % branch |
| ) |
| ) |
| # Increase timeout if no_goma, since the runtime is going to |
| # be much longer. |
| if drone_properties.get("no_goma", False): |
| req.execution_timeout.FromSeconds(60 * 60 * 4) |
| reqs.append(req) |
| scheduled_builds = self.m.buildbucket.schedule(reqs, step_name="schedule") |
| results = {} |
| for build, task_name in zip(scheduled_builds, task_names): |
| build_url = "https://ci.chromium.org/b/%s" % build.id |
| results[build.id] = SubbuildResult( |
| builder=build.builder.builder, |
| build_id=build.id, |
| url=build_url, |
| build_name=task_name |
| ) |
| return results |
| |
| def collect(self, tasks): |
| """Collects builds from build bucket services using the provided tasks. |
| |
| Args: |
| tasks (dict(int, SubbuildResult)): A dictionary with the subbuild |
| results and the build id as key. |
| |
| Returns: A list of SubBuildResult, one per task. |
| """ |
| build_ids = [build.build_id for build in tasks.values()] |
| build_id_to_name = { |
| int(build.build_id): build.build_name for build in tasks.values() |
| } |
| bb_fields = self.m.buildbucket.DEFAULT_FIELDS.union({ |
| "summary_markdown", |
| }) |
| # As of 2019-11-18, timeout defaults to something too short. |
| # We never want this step to time out. We'd rather the whole build time out. |
| builds = self.m.buildbucket.collect_builds( |
| [int(build_id) for build_id in build_ids], |
| interval=20, # Lower from default of 60 b/c we're impatient. |
| timeout=24 * 60 * 60, |
| step_name="collect", |
| fields=bb_fields, |
| # Setting mirror status to False allows to pass the error processing |
| # to the subbuild presentation step. |
| mirror_status=False, |
| ) |
| failed_builds = [ |
| b for b in builds.values() if b.status != common_pb2.SUCCESS |
| ] |
| if failed_builds: |
| task_ids = [ |
| b.infra.swarming.task_id |
| if b.infra.swarming.task_id else b.infra.backend.task.id.id |
| for b in failed_builds |
| ] |
| # Make sure task IDs are non-empty. |
| assert all(task_ids), task_ids |
| |
| # Wait for the underlying Swarming tasks to complete. The Swarming |
| # task for a Buildbucket build can take significantly longer to |
| # complete than the build itself due to post-processing outside the |
| # scope of the build's recipe (e.g. cache pruning). If the parent |
| # build and its Swarming task both complete before the subbuild's |
| # Swarming task finishes post-processing, then the subbuild's |
| # Swarming task will be killed by Swarming due to the parent being |
| # complete. |
| # |
| # That is actually working as intended. However, it's confusing for |
| # a subbuild to be marked as killed when the recipe actually exited |
| # normally; "killed" usually only happens for CQ builds, when a |
| # build is canceled by CQ because a new patchset of the triggering |
| # CL is uploaded. So it's convenient to have dashboards and queries |
| # ignore "killed" tasks. We use this workaround to ensure that |
| # failed subbuilds with long post-processing steps have time to |
| # complete and exit cleanly with a plain old "COMPLETED (FAILURE)" |
| # status. |
| # |
| # We only do this if the subbuild failed as a latency optimization. |
| # If all subbuilds passed, the parent will go on to do some more |
| # steps using the results of the subbuilds, leaving time for the |
| # subbuilds' tasks to complete asynchronously, so we don't want to |
| # block here while the tasks complete. |
| self.m.swarming.collect( |
| "wait for %s to complete" % pluralize("task", task_ids), task_ids |
| ) |
| for build_id, build in sorted(builds.items()): |
| builds[build_id] = SubbuildResult( |
| builder=build.builder.builder, |
| build_id=build_id, |
| build_proto=build, |
| build_name=build_id_to_name[int(build_id)], |
| url=self.m.buildbucket.build_url(build_id=build_id) |
| ) |
| return builds |
| |
| def download_full_builds(self, build_results, out_build_paths): |
| """Downloads intermediate builds from CAS. |
| |
| Args: |
| build_results (dict(int, SubbuildResult)): A dictionary with the subbuild |
| result and the build id as key. |
| |
| Mac and fuchsia use artifacts from different sub-builds to generate the final artifacts. |
| Calls to this API will happen most likely after all the subbuilds have been completed and |
| only if global generators will be executed. |
| """ |
| for build_id in build_results: |
| build_props = build_results[build_id].build_proto.output.properties |
| if 'cas_output_hash' in build_props: |
| cas_out_dict = build_props['cas_output_hash'] |
| build_name = build_results[build_id].build_name |
| if 'full_build' in cas_out_dict: |
| self.m.cas.download( |
| 'Download for build %s and cas key %s' % (build_id, build_name), |
| cas_out_dict['full_build'], out_build_paths |
| ) |
| |
| def archive_full_build(self, build_dir, target): |
| """Archives a full build in cas. |
| |
| Args: |
| build_dir: The path to the build output folder. |
| target(str): The name of the build we are archiving. |
| |
| Returns: |
| A string with the hash of the cas archive. |
| """ |
| cas_dir = self.m.path.mkdtemp('out-cas-directory') |
| cas_engine = cas_dir.join(target) |
| self.m.file.copytree('Copy %s' % target, build_dir, cas_engine) |
| |
| # pylint: disable=unused-argument |
| def _upload(timeout=None): |
| return self.m.cas_util.upload( |
| cas_dir, step_name='Archive full build for %s' % target |
| ) |
| |
| # Windows CAS upload is flaky, hashes are calculated before files are fully synced to disk. |
| return self.m.retry.basic_wrap( |
| _upload, |
| step_name='Archive full build', |
| sleep=10.0, |
| backoff_factor=5, |
| max_attempts=3 |
| ) |