Created a common_lib host object hierarchy (with site specific override
support) and moved *some* non server specific code from server/hosts to
the common_lib version. Created a client.bin LocalHost host object
inheriting from the common_lib Host and implemented a run() method based
on utils.run() (ie a host object for the local machine). Modified
client/bin/autotest to receive a new --hostname argument to tell it the
hostname to use for the LocalHost.hostname instance attribute and
updated server/autotest.py to send use this argument. Modified
client.bin.job to set a self.host instance attribute with an instance of
LocalHost and updated the unittest. Added an AutotestHostRunError class
(raised on LocalHost.run() failures).
Risk: high (there are modifications in the core server side support code
and some core client code).
Tested with verify/repair and client/server sleeptest jobs.
To be able for SVN to remember code history (that most of the new
client/common_lib/hosts/base_classes.py is based on the old
server/hosts/base_classes.py) then the following steps are probably needed
to apply this patch:
$ svn mkdir client/common_lib/hosts
$ svn copy server/hosts/base_classes.py client/common_lib/hosts
$ patch -p1 ...
$ svn add client/bin/local_host.py client/bin/local_host_unittest.py client/common_lib/hosts/__init__.py client/common_lib/hosts/base_classes_unittest.py
Signed-off-by: Mihai Rusu <[email protected]>
git-svn-id: http://test.kernel.org/svn/autotest/trunk@3594 592f7852-d20e-0410-864c-8624ca9c26a4
diff --git a/server/hosts/base_classes.py b/server/hosts/base_classes.py
index 896c035..0759d5f 100644
--- a/server/hosts/base_classes.py
+++ b/server/hosts/base_classes.py
@@ -2,7 +2,7 @@
# Copyright 2007 Google Inc. Released under the GPL v2
"""
-This module defines the base classes for the Host hierarchy.
+This module defines the base classes for the server Host hierarchy.
Implementation details:
You should import the "hosts" package instead of importing each type of host.
@@ -17,16 +17,14 @@
[email protected] (Ryan Stutsman)
"""
-import os, re, time, cStringIO, sys, logging
+import os
-from autotest_lib.client.common_lib import global_config, error
-from autotest_lib.client.common_lib import host_protections
-from autotest_lib.client.bin import partition
+from autotest_lib.client.common_lib import hosts
from autotest_lib.server import utils
from autotest_lib.server.hosts import bootloader
-class Host(object):
+class Host(hosts.Host):
"""
This class represents a machine on which you can run programs.
@@ -54,15 +52,11 @@
"""
bootloader = None
- job = None
- DEFAULT_REBOOT_TIMEOUT = 1800
- WAIT_DOWN_REBOOT_TIMEOUT = 840
- WAIT_DOWN_REBOOT_WARNING = 540
- HOURS_TO_WAIT_FOR_RECOVERY = 2.5
def __init__(self, *args, **dargs):
- self._initialize(*args, **dargs)
+ super(Host, self).__init__(*args, **dargs)
+
self.start_loggers()
if self.job:
self.job.hosts.add(self)
@@ -70,409 +64,17 @@
def _initialize(self, target_file_owner=None,
*args, **dargs):
+ super(Host, self)._initialize(*args, **dargs)
+
self.serverdir = utils.get_server_dir()
self.monitordir = os.path.join(os.path.dirname(__file__), "monitors")
self.bootloader = bootloader.Bootloader(self)
self.env = {}
self.target_file_owner = target_file_owner
- self._already_repaired = []
- self._removed_files = False
-
def close(self):
+ super(Host, self).close()
+
if self.job:
self.job.hosts.discard(self)
-
-
- def setup(self):
- pass
-
-
- def run(self, command):
- raise NotImplementedError('Run not implemented!')
-
-
- def run_output(self, command, *args, **dargs):
- return self.run(command, *args, **dargs).stdout.rstrip()
-
-
- def reboot(self):
- raise NotImplementedError('Reboot not implemented!')
-
-
- def sysrq_reboot(self):
- raise NotImplementedError('Sysrq reboot not implemented!')
-
-
- def reboot_setup(self, *args, **dargs):
- pass
-
-
- def reboot_followup(self, *args, **dargs):
- pass
-
-
- def get_file(self, source, dest, delete_dest=False):
- raise NotImplementedError('Get file not implemented!')
-
-
- def send_file(self, source, dest, delete_dest=False):
- raise NotImplementedError('Send file not implemented!')
-
-
- def get_tmp_dir(self):
- raise NotImplementedError('Get temp dir not implemented!')
-
-
- def is_up(self):
- raise NotImplementedError('Is up not implemented!')
-
-
- def is_shutting_down(self):
- """ Indicates is a machine is currently shutting down. """
- runlevel = int(self.run("runlevel").stdout.strip().split()[1])
- return runlevel in (0, 6)
-
-
- def get_wait_up_processes(self):
- """ Gets the list of local processes to wait for in wait_up. """
- get_config = global_config.global_config.get_config_value
- proc_list = get_config("HOSTS", "wait_up_processes",
- default="").strip()
- processes = set(p.strip() for p in proc_list.split(","))
- processes.discard("")
- return processes
-
-
- def wait_up(self, timeout=None):
- raise NotImplementedError('Wait up not implemented!')
-
-
- def wait_down(self, timeout=None, warning_timer=None):
- raise NotImplementedError('Wait down not implemented!')
-
-
- def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
- """ Wait for the host to come back from a reboot. This is a generic
- implementation based entirely on wait_up and wait_down. """
- if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
- warning_timer=self.WAIT_DOWN_REBOOT_WARNING):
- self.record("ABORT", None, "reboot.verify", "shut down failed")
- raise error.AutoservShutdownError("Host did not shut down")
-
- self.wait_up(timeout)
- time.sleep(2) # this is needed for complete reliability
- if self.wait_up(timeout):
- self.record("GOOD", None, "reboot.verify")
- self.reboot_followup(**dargs)
- else:
- self.record("ABORT", None, "reboot.verify",
- "Host did not return from reboot")
- raise error.AutoservRebootError("Host did not return from reboot")
-
-
- def verify(self):
- pass
-
-
- def verify_hardware(self):
- pass
-
-
- def verify_software(self):
- pass
-
-
- def check_diskspace(self, path, gb):
- logging.info('Checking for >= %s GB of space under %s on machine %s',
- gb, path, self.hostname)
- df = self.run('df -mP %s | tail -1' % path).stdout.split()
- free_space_gb = int(df[3])/1000.0
- if free_space_gb < gb:
- raise error.AutoservDiskFullHostError(path, gb, free_space_gb)
- else:
- logging.info('Found %s GB >= %s GB of space under %s on machine %s',
- free_space_gb, gb, path, self.hostname)
-
-
- def get_open_func(self, use_cache=True):
- """
- Defines and returns a function that may be used instead of built-in
- open() to open and read files. The returned function is implemented
- by using self.run('cat <file>') and may cache the results for the same
- filename.
-
- @param use_cache Cache results of self.run('cat <filename>') for the
- same filename
-
- @return a function that can be used instead of built-in open()
- """
- cached_files = {}
-
- def open_func(filename):
- if not use_cache or filename not in cached_files:
- output = self.run('cat \'%s\'' % filename,
- stdout_tee=open('/dev/null', 'w')).stdout
- fd = cStringIO.StringIO(output)
-
- if not use_cache:
- return fd
-
- cached_files[filename] = fd
- else:
- cached_files[filename].seek(0)
-
- return cached_files[filename]
-
- return open_func
-
-
- def check_partitions(self, root_part, filter_func=None):
- """ Compare the contents of /proc/partitions with those of
- /proc/mounts and raise exception in case unmounted partitions are found
-
- root_part: in Linux /proc/mounts will never directly mention the root
- partition as being mounted on / instead it will say that /dev/root is
- mounted on /. Thus require this argument to filter out the root_part
- from the ones checked to be mounted
-
- filter_func: unnary predicate for additional filtering out of
- partitions required to be mounted
-
- Raise: error.AutoservHostError if unfiltered unmounted partition found
- """
-
- print 'Checking if non-swap partitions are mounted...'
-
- unmounted = partition.get_unmounted_partition_list(root_part,
- filter_func=filter_func, open_func=self.get_open_func())
- if unmounted:
- raise error.AutoservNotMountedHostError(
- 'Found unmounted partitions: %s' %
- [part.device for part in unmounted])
-
-
- def _repair_wait_for_reboot(self):
- TIMEOUT = int(self.HOURS_TO_WAIT_FOR_RECOVERY * 3600)
- if self.is_shutting_down():
- logging.info('Host is shutting down, waiting for a restart')
- self.wait_for_restart(TIMEOUT)
- else:
- self.wait_up(TIMEOUT)
-
-
- def _get_mountpoint(self, path):
- """Given a "path" get the mount point of the filesystem containing
- that path."""
- code = ('import os\n'
- # sanitize the path and resolve symlinks
- 'path = os.path.realpath(%r)\n'
- "while path != '/' and not os.path.ismount(path):\n"
- ' path, _ = os.path.split(path)\n'
- 'print path\n') % path
- return self.run('python2.4 -c "%s"' % code,
- stdout_tee=open(os.devnull, 'w')).stdout.rstrip()
-
-
- def erase_dir_contents(self, path, ignore_status=True, timeout=3600):
- """Empty a given directory path contents."""
- rm_cmd = 'find "%s" -mindepth 1 -maxdepth 1 -print0 | xargs -0 rm -rf'
- self.run(rm_cmd % path, ignore_status=ignore_status, timeout=timeout)
- self._removed_files = True
-
-
- def repair_full_disk(self, mountpoint):
- # it's safe to remove /tmp and /var/tmp, site specific overrides may
- # want to remove some other places too
- if mountpoint == self._get_mountpoint('/tmp'):
- self.erase_dir_contents('/tmp')
-
- if mountpoint == self._get_mountpoint('/var/tmp'):
- self.erase_dir_contents('/var/tmp')
-
-
- def _call_repair_func(self, err, func, *args, **dargs):
- for old_call in self._already_repaired:
- if old_call == (func, args, dargs):
- # re-raising the original exception because surrounding
- # error handling may want to try other ways to fix it
- logging.warn('Already done this (%s) repair procedure, '
- 're-raising the original exception.', func)
- raise err
-
- try:
- func(*args, **dargs)
- except error.AutoservHardwareRepairRequestedError:
- # let this special exception propagate
- raise
- except error.AutoservError:
- logging.exception('Repair failed but continuing in case it managed'
- ' to repair enough')
-
- self._already_repaired.append((func, args, dargs))
-
-
- def repair_filesystem_only(self):
- """perform file system repairs only"""
- while True:
- # try to repair specific problems
- try:
- logging.info('Running verify to find failures to repair...')
- self.verify()
- if self._removed_files:
- logging.info('Removed files, rebooting to release the'
- ' inodes')
- self.reboot()
- return # verify succeeded, then repair succeeded
- except error.AutoservHostIsShuttingDownError, err:
- logging.exception('verify failed')
- self._call_repair_func(err, self._repair_wait_for_reboot)
- except error.AutoservDiskFullHostError, err:
- logging.exception('verify failed')
- self._call_repair_func(err, self.repair_full_disk,
- self._get_mountpoint(err.path))
-
-
- def repair_software_only(self):
- """perform software repairs only"""
- while True:
- try:
- self.repair_filesystem_only()
- break
- except (error.AutoservSshPingHostError, error.AutoservSSHTimeout,
- error.AutoservSshPermissionDeniedError,
- error.AutoservDiskFullHostError), err:
- logging.exception('verify failed')
- logging.info('Trying to reinstall the machine')
- self._call_repair_func(err, self.machine_install)
-
-
- def repair_full(self):
- while True:
- try:
- self.repair_software_only()
- break
- except error.AutoservHardwareHostError, err:
- logging.exception('verify failed')
- # software repair failed, try hardware repair
- logging.info('Hardware problem found, '
- 'requesting hardware repairs')
- self._call_repair_func(err, self.request_hardware_repair)
-
-
- def cleanup(self):
- pass
-
-
- def machine_install(self):
- raise NotImplementedError('Machine install not implemented!')
-
-
- def install(self, installableObject):
- installableObject.install(self)
-
-
- def get_autodir(self):
- raise NotImplementedError('Get autodir not implemented!')
-
-
- def set_autodir(self):
- raise NotImplementedError('Set autodir not implemented!')
-
-
- def start_loggers(self):
- """ Called to start continuous host logging. """
- pass
-
-
- def stop_loggers(self):
- """ Called to stop continuous host logging. """
- pass
-
-
- # some extra methods simplify the retrieval of information about the
- # Host machine, with generic implementations based on run(). subclasses
- # should feel free to override these if they can provide better
- # implementations for their specific Host types
-
- def get_num_cpu(self):
- """ Get the number of CPUs in the host according to /proc/cpuinfo. """
-
- proc_cpuinfo = self.run("cat /proc/cpuinfo",
- stdout_tee=open('/dev/null', 'w')).stdout
- cpus = 0
- for line in proc_cpuinfo.splitlines():
- if line.startswith('processor'):
- cpus += 1
- return cpus
-
-
- def get_arch(self):
- """ Get the hardware architecture of the remote machine. """
- arch = self.run('/bin/uname -m').stdout.rstrip()
- if re.match(r'i\d86$', arch):
- arch = 'i386'
- return arch
-
-
- def get_kernel_ver(self):
- """ Get the kernel version of the remote machine. """
- return self.run('/bin/uname -r').stdout.rstrip()
-
-
- def get_cmdline(self):
- """ Get the kernel command line of the remote machine. """
- return self.run('cat /proc/cmdline').stdout.rstrip()
-
-
- def path_exists(self, path):
- """ Determine if path exists on the remote machine. """
- result = self.run('ls "%s" > /dev/null' % utils.sh_escape(path),
- ignore_status=True)
- return result.exit_status == 0
-
-
- # some extra helpers for doing job-related operations
-
- def record(self, *args, **dargs):
- """ Helper method for recording status logs against Host.job that
- silently becomes a NOP if Host.job is not available. The args and
- dargs are passed on to Host.job.record unchanged. """
- if self.job:
- self.job.record(*args, **dargs)
-
-
- def log_kernel(self):
- """ Helper method for logging kernel information into the status logs.
- Intended for cases where the "current" kernel is not really defined
- and we want to explicitly log it. Does nothing if this host isn't
- actually associated with a job. """
- if self.job:
- kernel = self.get_kernel_ver()
- self.job.record("INFO", None, None,
- optional_fields={"kernel": kernel})
-
-
- def log_reboot(self, reboot_func):
- """ Decorator for wrapping a reboot in a group for status
- logging purposes. The reboot_func parameter should be an actual
- function that carries out the reboot.
- """
- if self.job and not hasattr(self, "RUNNING_LOG_REBOOT"):
- self.RUNNING_LOG_REBOOT = True
- try:
- self.job.run_reboot(reboot_func, self.get_kernel_ver)
- finally:
- del self.RUNNING_LOG_REBOOT
- else:
- reboot_func()
-
-
- def request_hardware_repair(self):
- """ Should somehow request (send a mail?) for hardware repairs on
- this machine. The implementation can either return by raising the
- special error.AutoservHardwareRepairRequestedError exception or can
- try to wait until the machine is repaired and then return normally.
- """
- raise NotImplementedError("request_hardware_repair not implemented")