[autotest] Remove the concept of 'protection level' from repair.

In original Autotest, hosts have a 'protection level' that is
passed to repair jobs in order to limit the specific kinds of repair
that can be done.  For the CrOS lab, this concept is largely not
useful; we want a uniform repair procedure determined by the kind of
DUT being repaired, not by a database configuration.

This removes the concept of 'protection level' from the repair flow;
repair steps are now determined only by the class of Host that
handles the operation.  The protection level remains in the
database, but the only meaningful value is 'Do not verify'.  That
setting on a host will prevent it from running any verify, cleanup,
or repair tasks.  This is done for the sake of non-DUT hosts that
exist in the CrOS lab AFE database.

BUG=None
TEST=unit tests, and run repair in a local instance

Change-Id: I1d512eb2b1fe604b70fec00b3a290f5f6b9d9beb
Reviewed-on: https://chromium-review.googlesource.com/312979
Commit-Ready: Richard Barnette <[email protected]>
Tested-by: Richard Barnette <[email protected]>
Reviewed-by: Dan Shi <[email protected]>
diff --git a/client/common_lib/error.py b/client/common_lib/error.py
index 16ac5ac..9f1a6ca 100644
--- a/client/common_lib/error.py
+++ b/client/common_lib/error.py
@@ -482,23 +482,6 @@
     pass
 
 
-class AutoservHardwareRepairRequestedError(AutoservError):
-    """
-    Exception class raised from Host.repair_full() (or overrides) when software
-    repair fails but it successfully managed to request a hardware repair (by
-    notifying the staff, sending mail, etc)
-    """
-    pass
-
-
-class AutoservHardwareRepairRequiredError(AutoservError):
-    """
-    Exception class raised during repairs to indicate that a hardware repair
-    is going to be necessary.
-    """
-    pass
-
-
 class AutoservInstallError(AutoservError):
     """Error occured while installing autotest on a host"""
     pass
diff --git a/client/common_lib/hosts/base_classes.py b/client/common_lib/hosts/base_classes.py
index 12d8eef..c66a670 100644
--- a/client/common_lib/hosts/base_classes.py
+++ b/client/common_lib/hosts/base_classes.py
@@ -18,7 +18,6 @@
 import cPickle, cStringIO, logging, os, re, time
 
 from autotest_lib.client.common_lib import global_config, error, utils
-from autotest_lib.client.common_lib import host_protections
 from autotest_lib.client.common_lib.cros.graphite import autotest_stats
 from autotest_lib.client.bin import partition
 
@@ -72,8 +71,7 @@
 
 
     def _initialize(self, *args, **dargs):
-        self._already_repaired = []
-        self._removed_files = False
+        pass
 
 
     def close(self):
@@ -152,12 +150,7 @@
 
     def is_shutting_down(self):
         """ Indicates is a machine is currently shutting down. """
-        # runlevel() may not be available, so wrap it in try block.
-        try:
-            runlevel = int(self.run("runlevel").stdout.strip().split()[1])
-            return runlevel in (0, 6)
-        except:
-            return False
+        return False
 
 
     def get_wait_up_processes(self):
@@ -199,7 +192,7 @@
         raise NotImplementedError('Wait down not implemented!')
 
 
-    def construct_host_metadata(self, type_str):
+    def _construct_host_metadata(self, type_str):
         """Returns dict of metadata with type_str, hostname, time_recorded.
 
         @param type_str: String representing _type field in es db.
@@ -222,9 +215,9 @@
         key_string = 'Reboot.%s' % dargs.get('board')
 
         total_reboot_timer = autotest_stats.Timer('%s.total' % key_string,
-                metadata=self.construct_host_metadata('reboot_total'))
+                metadata=self._construct_host_metadata('reboot_total'))
         wait_down_timer = autotest_stats.Timer('%s.wait_down' % key_string,
-                metadata=self.construct_host_metadata('reboot_down'))
+                metadata=self._construct_host_metadata('reboot_down'))
 
         total_reboot_timer.start()
         wait_down_timer.start()
@@ -236,7 +229,7 @@
             raise error.AutoservShutdownError("Host did not shut down")
         wait_down_timer.stop()
         wait_up_timer = autotest_stats.Timer('%s.wait_up' % key_string,
-                metadata=self.construct_host_metadata('reboot_up'))
+                metadata=self._construct_host_metadata('reboot_up'))
         wait_up_timer.start()
         if self.wait_up(timeout):
             self.record("GOOD", None, "reboot.verify")
@@ -315,211 +308,15 @@
                          path, self.hostname)
 
 
-    def get_open_func(self, use_cache=True):
-        """
-        Defines and returns a function that may be used instead of built-in
-        open() to open and read files. The returned function is implemented
-        by using self.run('cat <file>') and may cache the results for the same
-        filename.
-
-        @param use_cache Cache results of self.run('cat <filename>') for the
-            same filename
-
-        @return a function that can be used instead of built-in open()
-        """
-        cached_files = {}
-
-        def open_func(filename):
-            if not use_cache or filename not in cached_files:
-                output = self.run('cat \'%s\'' % filename,
-                                  stdout_tee=open('/dev/null', 'w')).stdout
-                fd = cStringIO.StringIO(output)
-
-                if not use_cache:
-                    return fd
-
-                cached_files[filename] = fd
-            else:
-                cached_files[filename].seek(0)
-
-            return cached_files[filename]
-
-        return open_func
-
-
-    def check_partitions(self, root_part, filter_func=None):
-        """ Compare the contents of /proc/partitions with those of
-        /proc/mounts and raise exception in case unmounted partitions are found
-
-        root_part: in Linux /proc/mounts will never directly mention the root
-        partition as being mounted on / instead it will say that /dev/root is
-        mounted on /. Thus require this argument to filter out the root_part
-        from the ones checked to be mounted
-
-        filter_func: unnary predicate for additional filtering out of
-        partitions required to be mounted
-
-        Raise: error.AutoservHostError if unfiltered unmounted partition found
-        """
-
-        print 'Checking if non-swap partitions are mounted...'
-
-        unmounted = partition.get_unmounted_partition_list(root_part,
-            filter_func=filter_func, open_func=self.get_open_func())
-        if unmounted:
-            raise error.AutoservNotMountedHostError(
-                'Found unmounted partitions: %s' %
-                [part.device for part in unmounted])
-
-
-    def _repair_wait_for_reboot(self):
-        TIMEOUT = int(self.HOURS_TO_WAIT_FOR_RECOVERY * 3600)
-        if self.is_shutting_down():
-            logging.info('Host is shutting down, waiting for a restart')
-            self.wait_for_restart(TIMEOUT)
-        else:
-            self.wait_up(TIMEOUT)
-
-
-    def _get_mountpoint(self, path):
-        """Given a "path" get the mount point of the filesystem containing
-        that path."""
-        code = ('import os\n'
-                # sanitize the path and resolve symlinks
-                'path = os.path.realpath(%r)\n'
-                "while path != '/' and not os.path.ismount(path):\n"
-                '    path, _ = os.path.split(path)\n'
-                'print path\n') % path
-        return self.run('python -c "%s"' % code,
-                        stdout_tee=open(os.devnull, 'w')).stdout.rstrip()
-
-
     def erase_dir_contents(self, path, ignore_status=True, timeout=3600):
         """Empty a given directory path contents."""
         rm_cmd = 'find "%s" -mindepth 1 -maxdepth 1 -print0 | xargs -0 rm -rf'
         self.run(rm_cmd % path, ignore_status=ignore_status, timeout=timeout)
-        self._removed_files = True
 
 
-    def repair_full_disk(self, mountpoint):
-        # it's safe to remove /tmp and /var/tmp, site specific overrides may
-        # want to remove some other places too
-        if mountpoint == self._get_mountpoint('/tmp'):
-            self.erase_dir_contents('/tmp')
-
-        if mountpoint == self._get_mountpoint('/var/tmp'):
-            self.erase_dir_contents('/var/tmp')
-
-
-    def _call_repair_func(self, err, func, *args, **dargs):
-        for old_call in self._already_repaired:
-            if old_call == (func, args, dargs):
-                # re-raising the original exception because surrounding
-                # error handling may want to try other ways to fix it
-                logging.warning('Already done this (%s) repair procedure, '
-                             're-raising the original exception.', func)
-                raise err
-
-        try:
-            func(*args, **dargs)
-        except (error.AutoservHardwareRepairRequestedError,
-                error.AutoservHardwareRepairRequiredError):
-            # let these special exceptions propagate
-            raise
-        except error.AutoservError:
-            logging.exception('Repair failed but continuing in case it managed'
-                              ' to repair enough')
-
-        self._already_repaired.append((func, args, dargs))
-
-
-    def repair_filesystem_only(self):
-        """perform file system repairs only"""
-        while True:
-            # try to repair specific problems
-            try:
-                logging.info('Running verify to find failures to repair...')
-                self.verify()
-                if self._removed_files:
-                    logging.info('Removed files, rebooting to release the'
-                                 ' inodes')
-                    self.reboot()
-                return # verify succeeded, then repair succeeded
-            except error.AutoservHostIsShuttingDownError, err:
-                logging.exception('verify failed')
-                self._call_repair_func(err, self._repair_wait_for_reboot)
-            except error.AutoservDiskFullHostError, err:
-                logging.exception('verify failed')
-                self._call_repair_func(err, self.repair_full_disk,
-                                       self._get_mountpoint(err.path))
-
-
-    def repair_software_only(self):
-        """perform software repairs only"""
-        while True:
-            try:
-                self.repair_filesystem_only()
-                break
-            except (error.AutoservSshPingHostError, error.AutoservSSHTimeout,
-                    error.AutoservSshPermissionDeniedError,
-                    error.AutoservDiskFullHostError), err:
-                logging.exception('verify failed')
-                logging.info('Trying to reinstall the machine')
-                self._call_repair_func(err, self.machine_install)
-
-
-    def repair_full(self):
-        hardware_repair_requests = 0
-        while True:
-            try:
-                self.repair_software_only()
-                break
-            except error.AutoservHardwareRepairRequiredError, err:
-                logging.exception('software repair failed, '
-                                  'hardware repair requested')
-                hardware_repair_requests += 1
-                try_hardware_repair = (hardware_repair_requests >=
-                                       self.HARDWARE_REPAIR_REQUEST_THRESHOLD)
-                if try_hardware_repair:
-                    logging.info('hardware repair requested %d times, '
-                                 'trying hardware repair',
-                                 hardware_repair_requests)
-                    self._call_repair_func(err, self.request_hardware_repair)
-                else:
-                    logging.info('hardware repair requested %d times, '
-                                 'trying software repair again',
-                                 hardware_repair_requests)
-            except error.AutoservHardwareHostError, err:
-                logging.exception('verify failed')
-                # software repair failed, try hardware repair
-                logging.info('Hardware problem found, '
-                             'requesting hardware repairs')
-                self._call_repair_func(err, self.request_hardware_repair)
-
-
-    def repair_with_protection(self, protection_level):
-        """Perform the maximal amount of repair within the specified
-        protection level.
-
-        @param protection_level: the protection level to use for limiting
-                                 repairs, a host_protections.Protection
-        """
-        protection = host_protections.Protection
-        if protection_level == protection.DO_NOT_REPAIR:
-            logging.info('Protection is "Do not repair" so just verifying')
-            self.verify()
-        elif protection_level == protection.REPAIR_FILESYSTEM_ONLY:
-            logging.info('Attempting filesystem-only repair')
-            self.repair_filesystem_only()
-        elif protection_level == protection.REPAIR_SOFTWARE_ONLY:
-            logging.info('Attempting software repair only')
-            self.repair_software_only()
-        elif protection_level == protection.NO_PROTECTION:
-            logging.info('Attempting full repair')
-            self.repair_full()
-        else:
-            raise NotImplementedError('Unknown host protection level %s'
-                                      % protection_level)
+    def repair(self):
+        """Try and get the host to pass `self.verify()`."""
+        self.verify()
 
 
     def disable_ipfilters(self):
@@ -656,15 +453,6 @@
             op_func()
 
 
-    def request_hardware_repair(self):
-        """ Should somehow request (send a mail?) for hardware repairs on
-        this machine. The implementation can either return by raising the
-        special error.AutoservHardwareRepairRequestedError exception or can
-        try to wait until the machine is repaired and then return normally.
-        """
-        raise NotImplementedError("request_hardware_repair not implemented")
-
-
     def list_files_glob(self, glob):
         """
         Get a list of files on a remote host given a glob pattern path.
diff --git a/server/autoserv b/server/autoserv
index 476d86e..0c77475 100755
--- a/server/autoserv
+++ b/server/autoserv
@@ -414,7 +414,6 @@
     execution_tag = parser.options.execution_tag
     if not execution_tag:
         execution_tag = parse_job
-    host_protection = parser.options.host_protection
     ssh_user = parser.options.ssh_user
     ssh_port = parser.options.ssh_port
     ssh_pass = parser.options.ssh_pass
@@ -483,7 +482,7 @@
             if repair:
                 if auto_start_servod and len(machines) == 1:
                     _start_servod(machines[0])
-                job.repair(host_protection, job_labels)
+                job.repair(job_labels)
             elif verify:
                 job.verify(job_labels)
             elif provision:
diff --git a/server/autoserv_parser.py b/server/autoserv_parser.py
index 4d6beaf..0f5c3ea 100644
--- a/server/autoserv_parser.py
+++ b/server/autoserv_parser.py
@@ -4,8 +4,6 @@
 import shlex
 import sys
 
-from autotest_lib.client.common_lib import host_protections
-
 
 class autoserv_parser(object):
     """Custom command-line options parser for autoserv.
@@ -120,13 +118,6 @@
                                        ' an existing results directory'))
         self.parser.add_argument('-a', '--args', dest='args',
                                  help='additional args to pass to control file')
-        protection_levels = [host_protections.Protection.get_attr_name(s)
-                             for _, s in host_protections.choices]
-        self.parser.add_argument('--host-protection', action='store',
-                                 type=str, dest='host_protection',
-                                 default=host_protections.default,
-                                 choices=protection_levels,
-                                 help='level of host protection during repair')
         self.parser.add_argument('--ssh-user', action='store',
                                  type=str, dest='ssh_user', default='root',
                                  help='specify the user for ssh connections')
diff --git a/server/control_segments/repair b/server/control_segments/repair
index 665cfca..3d740a2 100644
--- a/server/control_segments/repair
+++ b/server/control_segments/repair
@@ -1,7 +1,6 @@
 import logging
 import os
 
-from autotest_lib.client.common_lib import host_protections
 from autotest_lib.server import crashcollect
 from autotest_lib.server.cros import provision
 
@@ -11,20 +10,6 @@
 labels_list = [l.strip() for l in job_labels.split(',') if l]
 
 
-def _call_repair(host):
-    protection = host_protections.Protection
-
-    try:
-        level = protection.get_value(protection_level)
-    except ValueError:
-        raise NotImplementedError("Unknown host protection level %s" %
-                                  protection_level)
-
-    logging.debug('Protection level of host %s is %s: %s',
-                  host.hostname, protection_level, level)
-    host.repair_with_protection(level)
-
-
 def repair(machine):
     try:
         job.record('START', None, 'repair')
@@ -36,7 +21,7 @@
         # Collect crash info.
         crashcollect.get_crashinfo(host, None)
 
-        _call_repair(host)
+        host.repair()
         logging.debug('Repair with labels list %s', labels_list)
         provision.run_special_task_actions(job, host, labels_list,
                                            provision.Repair)
diff --git a/server/hosts/cros_host.py b/server/hosts/cros_host.py
index b0ffa6e..cbabadf 100644
--- a/server/hosts/cros_host.py
+++ b/server/hosts/cros_host.py
@@ -1257,8 +1257,8 @@
             return False
 
 
-    def repair_full(self):
-        """Repair a host for repair level NO_PROTECTION.
+    def repair(self):
+        """Attempt to get the DUT to pass `self.verify()`.
 
         This overrides the base class function for repair; it does
         not call back to the parent class, but instead offers a
diff --git a/server/hosts/moblab_host.py b/server/hosts/moblab_host.py
index a210cbe..010f683 100644
--- a/server/hosts/moblab_host.py
+++ b/server/hosts/moblab_host.py
@@ -277,8 +277,8 @@
         super(MoblabHost, self).check_device()
 
 
-    def repair_full(self):
-        """Moblab specific repair_full.
+    def repair(self):
+        """Moblab specific repair.
 
         Preserves the dhcp lease file prior to repairing the device.
         """
@@ -288,7 +288,7 @@
             self._dhcpd_leasefile = temp
         except error.AutoservRunError:
             logging.debug('Failed to retrieve dhcpd lease file from host.')
-        super(MoblabHost, self).repair_full()
+        super(MoblabHost, self).repair()
 
 
     def get_platform(self):
diff --git a/server/hosts/servo_host.py b/server/hosts/servo_host.py
index c4f1eb0..79d05ec 100644
--- a/server/hosts/servo_host.py
+++ b/server/hosts/servo_host.py
@@ -140,7 +140,7 @@
                 if not self.is_in_lab():
                     raise
                 else:
-                    self.repair_full()
+                    self.repair()
 
 
     def is_in_lab(self):
@@ -591,7 +591,7 @@
         time.sleep(self.REBOOT_DELAY_SECS)
 
 
-    def repair_full(self):
+    def repair(self):
         """Attempt to repair servo host.
 
         This overrides the base class function for repair.
diff --git a/server/server_job.py b/server/server_job.py
index 45ff890..5dc20ba 100644
--- a/server/server_job.py
+++ b/server/server_job.py
@@ -385,7 +385,7 @@
         return namespace
 
 
-    def cleanup(self, labels=''):
+    def cleanup(self, labels):
         """Cleanup machines.
 
         @param labels: Comma separated job labels, will be used to
@@ -401,7 +401,7 @@
         self._execute_code(CLEANUP_CONTROL_FILE, namespace, protect=False)
 
 
-    def verify(self, labels=''):
+    def verify(self, labels):
         """Verify machines are all ssh-able.
 
         @param labels: Comma separated job labels, will be used to
@@ -417,7 +417,7 @@
         self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)
 
 
-    def reset(self, labels=''):
+    def reset(self, labels):
         """Reset machines by first cleanup then verify each machine.
 
         @param labels: Comma separated job labels, will be used to
@@ -433,10 +433,9 @@
         self._execute_code(RESET_CONTROL_FILE, namespace, protect=False)
 
 
-    def repair(self, host_protection, labels=''):
+    def repair(self, labels):
         """Repair machines.
 
-        @param host_protection: level of host protection.
         @param labels: Comma separated job labels, will be used to
                        determine special task actions.
         """
@@ -446,8 +445,7 @@
             os.chdir(self.resultdir)
 
         namespace = self._make_namespace()
-        namespace.update({'protection_level' : host_protection,
-                          'job_labels': labels, 'args': ''})
+        namespace.update({'job_labels': labels, 'args': ''})
         self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)