| # Lint as: python2, python3 |
| # Copyright 2016 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """ |
| Repair actions and verifiers relating to CrOS firmware. |
| |
| This contains the repair actions and verifiers need to find problems |
| with the firmware installed on Chrome OS DUTs, and when necessary, to |
| fix problems by updating or re-installing the firmware. |
| |
| The operations in the module support two distinct use cases: |
| * DUTs used for FAFT tests can in some cases have problems with |
| corrupted firmware. The module supplies `FirmwareStatusVerifier` |
| to check for corruption, and supplies `FaftFirmwareRepair` to |
| re-install firmware of current faft stable_version via servo |
| when needed. |
| * DUTs used for general testing normally should be running a |
| designated "stable" firmware version. This module supplies |
| `FirmwareVersionVerifier` to detect and automatically update |
| firmware that is out-of-date from the designated version. This model |
| also supplys `GeneralFirmwareRepair` to re-install firmware that |
| tied with current stable_version image via servo when needed. |
| |
| For purposes of the operations in the module, we distinguish three kinds |
| of DUT, based on pool assignments: |
| * DUTs used for general testing. These DUTs automatically check for |
| and install the stable firmware using `FirmwareVersionVerifier`. |
| * DUTs in pools used for FAFT testing. These check for bad firmware |
| builds with `FirmwareStatusVerifier`, and will fix problems using |
| `FirmwareRepair`. These DUTs don't check for or install the |
| stable firmware. |
| * DUTs not in general pools, and not used for FAFT. These DUTs |
| are expected to be managed by separate processes and are excluded |
| from all of the verification and repair code in this module. |
| """ |
| |
| # pylint: disable=missing-docstring |
| |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import json |
| import logging |
| |
| import common |
| from autotest_lib.client.common_lib import global_config |
| from autotest_lib.client.common_lib import hosts |
| from autotest_lib.server import afe_utils |
| from autotest_lib.server.hosts import repair_utils |
| from autotest_lib.server.hosts import cros_constants |
| |
| from chromite.lib import timeout_util |
| import six |
| |
| |
| # _FIRMWARE_REPAIR_POOLS - The set of pools that should be |
| # managed by `FirmwareStatusVerifier` and `FirmwareRepair`. |
| # |
| _FIRMWARE_REPAIR_POOLS = set( |
| global_config.global_config.get_config_value( |
| 'CROS', |
| 'pools_support_firmware_repair', |
| type=str).split(',')) |
| |
| |
| def _is_firmware_testing_device(host): |
| """ |
| check if a host is dedicated for firmware testing. |
| |
| When this function returns true, the DUT should be managed by |
| `FirmwareStatusVerifier` and `FaftFirmwareRepair`, but not |
| `FirmwareVersionVerifier` and `GeneralFirmwareRepair. |
| |
| @return A true value if the host should use `FirmwareStatusVerifier` |
| and `FaftFirmwareRepair`; a false value otherwise. |
| """ |
| info = host.host_info_store.get() |
| return bool(info.pools & _FIRMWARE_REPAIR_POOLS) |
| |
| |
| def _is_firmware_update_supported(host): |
| """ |
| Return whether a DUT should be running the standard firmware. |
| |
| In the test lab, DUTs used for general testing, (e.g. the `bvt` |
| pool) need their firmware kept up-to-date with |
| `FirmwareVersionVerifier`. However, some pools have alternative |
| policies for firmware management. This returns whether a given DUT |
| should be updated via the standard stable version update, or |
| managed by some other procedure. |
| |
| @param host The host to be checked for update policy. |
| @return A true value if the host should use |
| `FirmwareVersionVerifier`; a false value otherwise. |
| """ |
| return not _is_firmware_testing_device(host) |
| |
| |
| def _get_available_firmware(host, model): |
| """Get the available RW firmware version given the model. |
| |
| @param host The host to get available firmware for. |
| @param model The model name to get corresponding firmware version. |
| @return The available RW firmware version if found, else, None. |
| """ |
| result = host.run('chromeos-firmwareupdate --manifest', ignore_status=True) |
| |
| if result.exit_status != 0: |
| return None |
| |
| # The manifest is a JSON in .model.host.versions.rw |
| data = json.loads(result.stdout) or {} |
| key = model if len(data) > 1 else next(six.iterkeys(data), '') |
| key += '.host.versions.rw' |
| for k in key.split('.'): |
| data = data.get(k, {}) |
| return data or None |
| |
| |
| class FirmwareStatusVerifier(hosts.Verifier): |
| """ |
| Verify that a host's firmware is in a good state. |
| |
| For DUTs that run firmware tests, it's possible that the firmware |
| on the DUT can get corrupted. This verifier checks whether it |
| appears that firmware should be re-flashed using servo. |
| """ |
| |
| @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) |
| def verify(self, host): |
| if not _is_firmware_testing_device(host): |
| return |
| try: |
| # Read the AP firmware and dump the sections that we're |
| # interested in. |
| cmd = ('mkdir /tmp/verify_firmware; ' |
| 'cd /tmp/verify_firmware; ' |
| 'for section in VBLOCK_A VBLOCK_B FW_MAIN_A FW_MAIN_B; ' |
| 'do flashrom -p host -r -i $section:$section; ' |
| 'done') |
| host.run(cmd) |
| |
| # Verify the firmware blocks A and B. |
| cmd = ('vbutil_firmware --verify /tmp/verify_firmware/VBLOCK_%c' |
| ' --signpubkey /usr/share/vboot/devkeys/root_key.vbpubk' |
| ' --fv /tmp/verify_firmware/FW_MAIN_%c') |
| for c in ('A', 'B'): |
| rv = host.run(cmd % (c, c), ignore_status=True) |
| if rv.exit_status: |
| raise hosts.AutoservVerifyError( |
| 'Firmware %c is in a bad state.' % c) |
| finally: |
| # Remove the temporary files. |
| host.run('rm -rf /tmp/verify_firmware') |
| |
| @property |
| def description(self): |
| return 'Firmware on this DUT is clean' |
| |
| |
| class FirmwareRepair(hosts.RepairAction): |
| """ |
| Reinstall the firmware image using servo. |
| |
| This repair function attempts to use servo to install the DUT's |
| designated "stable firmware version". |
| |
| This repair method only applies to DUTs used for FAFT. |
| """ |
| |
| def _get_faft_stable_build(self, host): |
| info = host.host_info_store.get() |
| return afe_utils.get_stable_faft_version_v2(info) |
| |
| def _get_os_stable_build(self, host): |
| # Use firmware in current stable os build. |
| return host.get_cros_repair_image_name() |
| |
| def _run_faft_repair(self, host, build): |
| host.firmware_install(build) |
| |
| def _run_general_repair(self, host, build): |
| # As GeneralFirmwareRepair is the last repair action, we expect |
| # stable_version os image is loaded on usbkey during other repair |
| # action runs. And there is also no point to repeat and waste time if |
| # download image to usbkey failed in other repair actions. |
| if host._servo_host.validate_image_usbkey() != build: |
| raise hosts.AutoservRepairError('%s is expected to be preloaded,' |
| 'however it\'s not found on the usbkey' % build, |
| 'image not loaded on usbkey') |
| ec_image, bios_image = host._servo_host.prepare_repair_firmware_image() |
| |
| # For EVT device with signed variant exists we skip this repair |
| # as it's hard to decide which image to use if DUT do not boot. |
| info = host.host_info_store.get() |
| phase = info.get_label_value('phase') |
| if 'signed' in bios_image and phase.lower() in ('evt', 'dvt', ''): |
| raise hosts.AutoservRepairError( |
| 'Could not determine which firmware image to use' |
| ' due to signed firmware image variant exists but' |
| ' DUT phase is earlier than PVT or missing; Phase' |
| ' from inventory: %s' % phase, |
| 'Can not determine variant for EVT device') |
| |
| # Before flash firmware we want update the build into health profile. |
| if host.health_profile: |
| host.health_profile.set_firmware_stable_version(build) |
| |
| if ec_image: |
| logging.info('Attempting to flash ec firmware...') |
| host.servo.program_ec(ec_image, copy_image=False) |
| if bios_image: |
| logging.info('Attempting to flash bios firmware...') |
| host._servo_host.flash_ap_firmware_via_servo(bios_image) |
| |
| logging.info('Cold resetting DUT through servo...') |
| host.servo.get_power_state_controller().reset() |
| host.wait_up(timeout=host.BOOT_TIMEOUT) |
| # flash firmware via servo will turn DUT into dev mode, so disable |
| # dev mode and reset gbb flag here. |
| host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0', ignore_status=True) |
| host.run('crossystem disable_dev_request=1', ignore_status=True) |
| host.reboot() |
| |
| |
| class FaftFirmwareRepair(FirmwareRepair): |
| """ |
| Reinstall the firmware for DUTs in faft related pool. |
| """ |
| |
| def repair(self, host): |
| repair_utils.require_servo(host, ignore_state=True) |
| build = self._get_faft_stable_build(host) |
| if build: |
| self._run_faft_repair(host, build) |
| else: |
| logging.info('Cannot find faft stable_version, falling back to' |
| ' use firmware on OS stable_version.') |
| build = self._get_os_stable_build(host) |
| if not build: |
| raise hosts.AutoservRepairError( |
| 'Failed to find stable_version from host_info.', |
| 'cannot find stable_version') |
| self._run_general_repair(host, build) |
| |
| def _is_applicable(self, host): |
| return _is_firmware_testing_device(host) |
| |
| @property |
| def description(self): |
| return 'Re-install the stable firmware(faft) via servo' |
| |
| |
| class GeneralFirmwareRepair(FirmwareRepair): |
| """Reinstall the firmware for non-faft DUTs. |
| We need different RepairAction for non firmware testing DUT because |
| we want only try re-install firmware if all other RepairAction could |
| not restore ssh capability to the DUT. |
| """ |
| |
| def repair(self, host): |
| repair_utils.require_servo(host, ignore_state=True) |
| build = self._get_os_stable_build(host) |
| if not build: |
| raise hosts.AutoservRepairError( |
| 'Failed to find stable_version from host_info.', |
| 'cannot find stable_version') |
| self._run_general_repair(host, build) |
| |
| def _is_applicable(self, host): |
| if _is_firmware_testing_device(host): |
| return False |
| if not host.servo: |
| logging.info( |
| 'The current servo state of %s is not met the' |
| ' minimum requirement to flash firmware.', host.hostname) |
| # Flash firmware via servo is consider an expansive opertation, so we |
| # want to check repair data from previous repairs to determine if |
| # firmware repair is need. |
| dhp = host.health_profile |
| if not dhp: |
| logging.info('Device health profile is not available, cannot' |
| ' determine if firmware repair is needed.') |
| return False |
| repair_fail_count = dhp.get_repair_fail_count() |
| if repair_fail_count < 2: |
| # We want to start with a more conservative strategy, so only try |
| # this action on DUTs that failed repair at least twice. |
| # @TODO(xianuowang@) adjust or remove this threshold. |
| logging.info( |
| 'Firmware repair will only applies to DUT that' |
| ' failed at least two AdminRepair, current fail' |
| ' count: %s', repair_fail_count) |
| return False |
| flashed_build = dhp.get_firmware_stable_version() |
| candidate_build = self._get_os_stable_build(host) |
| # If we had an success firmware flash in this repair loop, |
| # there is no need to retry flash the same firmware build. |
| if (dhp.get_succeed_repair_action(self.tag) > 0 |
| and flashed_build == candidate_build): |
| logging.info( |
| 'Firmware from %s has been already installed on %s,' |
| ' no need to retry.', flashed_build, host.hostname) |
| return False |
| if (dhp.get_failed_repair_action(self.tag) > 2 |
| and flashed_build == candidate_build): |
| logging.info( |
| 'Firmware from %s has been attempted and failed 3 ' |
| 'times, no need to retry.', flashed_build) |
| return False |
| return True |
| |
| @property |
| def description(self): |
| return 'Re-install the stable firmware(non-faft) via servo' |
| |
| |
| class FirmwareVersionVerifier(hosts.Verifier): |
| """ |
| Check for a firmware update, and apply it if appropriate. |
| |
| This verifier checks to ensure that either the firmware on the DUT |
| is up-to-date, or that the target firmware can be installed from the |
| currently running build. |
| |
| Failure occurs when all of the following apply: |
| 1. The DUT is not excluded from updates. For example, DUTs used |
| for FAFT testing use `FirmwareRepair` instead. |
| 2. The DUT's board has an assigned stable firmware version. |
| 3. The DUT is not running the assigned stable firmware. |
| 4. The firmware supplied in the running OS build is not the |
| assigned stable firmware. |
| |
| If the DUT needs an upgrade and the currently running OS build |
| supplies the necessary firmware, the verifier installs the new |
| firmware using `chromeos-firmwareupdate`. Failure to install will |
| cause the verifier to fail. |
| |
| This verifier nominally breaks the rule that "verifiers must succeed |
| quickly", since it can invoke `reboot()` during the success code |
| path. We're doing it anyway for two reasons: |
| * The time between updates will typically be measured in months, |
| so the amortized cost is low. |
| * The reason we distinguish repair from verify is to allow |
| rescheduling work immediately while the expensive repair happens |
| out-of-band. But a firmware update will likely hit all DUTs at |
| once, so it's pointless to pass the buck to repair. |
| |
| N.B. This verifier is a trigger for all repair actions that install |
| the stable repair image. If the firmware is out-of-date, but the |
| stable repair image does *not* contain the proper firmware version, |
| _the target DUT will fail repair, and will be unable to fix itself_. |
| """ |
| |
| @staticmethod |
| def _get_rw_firmware(host): |
| result = host.run('crossystem fwid', ignore_status=True) |
| if result.exit_status == 0: |
| return result.stdout |
| else: |
| return None |
| |
| @staticmethod |
| def _check_hardware_match(version_a, version_b): |
| """ |
| Check that two firmware versions identify the same hardware. |
| |
| Firmware version strings look like this: |
| Google_Gnawty.5216.239.34 |
| The part before the numbers identifies the hardware for which |
| the firmware was built. This function checks that the hardware |
| identified by `version_a` and `version_b` is the same. |
| |
| This is a sanity check to protect us from installing the wrong |
| firmware on a DUT when a board label has somehow gone astray. |
| |
| @param version_a First firmware version for the comparison. |
| @param version_b Second firmware version for the comparison. |
| """ |
| hardware_a = version_a.split('.')[0] |
| hardware_b = version_b.split('.')[0] |
| if hardware_a != hardware_b: |
| message = 'Hardware/Firmware mismatch updating %s to %s' |
| raise hosts.AutoservVerifyError( |
| message % (version_a, version_b)) |
| |
| @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) |
| def verify(self, host): |
| # Test 1 - The DUT is not excluded from updates. |
| if not _is_firmware_update_supported(host): |
| return |
| # Test 2 - The DUT has an assigned stable firmware version. |
| info = host.host_info_store.get() |
| if info.model is None: |
| raise hosts.AutoservVerifyError( |
| 'Can not verify firmware version. ' |
| 'No model label value found') |
| |
| stable_firmware = None |
| try: |
| stable_firmware = afe_utils.get_stable_firmware_version_v2(info) |
| except Exception as e: |
| logging.exception('Failed lookup to AFE for stable fw version ' |
| ' with exception: %s', e) |
| |
| if stable_firmware is None: |
| # This DUT doesn't have a firmware update target |
| return |
| |
| # For tests 3 and 4: If the output from `crossystem` or |
| # `chromeos-firmwareupdate` isn't what we expect, we log an |
| # error, but don't fail: We don't want DUTs unable to test a |
| # build merely because of a bug or change in either of those |
| # commands. |
| |
| # Test 3 - The DUT is not running the target stable firmware. |
| current_firmware = self._get_rw_firmware(host) |
| if current_firmware is None: |
| logging.error('DUT firmware version can\'t be determined.') |
| return |
| if current_firmware == stable_firmware: |
| return |
| # Test 4 - The firmware supplied in the running OS build is not |
| # the assigned stable firmware. |
| available_firmware = _get_available_firmware(host, info.model) |
| if available_firmware is None: |
| logging.error('Supplied firmware version in OS can\'t be ' |
| 'determined.') |
| return |
| if available_firmware != stable_firmware: |
| raise hosts.AutoservVerifyError( |
| 'DUT firmware requires update from %s to %s' % |
| (current_firmware, stable_firmware)) |
| # Time to update the firmware. |
| logging.info('Updating firmware from %s to %s', |
| current_firmware, stable_firmware) |
| self._check_hardware_match(current_firmware, stable_firmware) |
| try: |
| host.run('chromeos-firmwareupdate --mode=autoupdate') |
| host.reboot() |
| except Exception as e: |
| message = ('chromeos-firmwareupdate failed: from ' |
| '%s to %s') |
| logging.exception(message, current_firmware, stable_firmware) |
| raise hosts.AutoservVerifyError( |
| message % (current_firmware, stable_firmware)) |
| final_firmware = self._get_rw_firmware(host) |
| if final_firmware != stable_firmware: |
| message = ('chromeos-firmwareupdate failed: tried upgrade ' |
| 'to %s, now running %s instead') |
| raise hosts.AutoservVerifyError( |
| message % (stable_firmware, final_firmware)) |
| |
| @property |
| def description(self): |
| return 'The firmware on this DUT is up-to-date' |