Add code to SSHHost.get_crashdumps to collect some basic data: - /var/log/messages - /var/log/monitor-ssh-reboot - dmesg This modifies the job execution code to only call the crashdump collection is the job fails (i.e. is aborted somehow). The dumps will all be put in a crashdumps.hostname directory. Risk: Medium Visibility: On job aborts we now wait a while for the host to come up and then collect crash logs. Signed-off-by: John Admanski <[email protected]> git-svn-id: http://test.kernel.org/svn/autotest/trunk@2176 592f7852-d20e-0410-864c-8624ca9c26a4

commit: cdd0c40560c8d514a56834b31063bcb5ec50eccf [log] [tgz]
author: jadmanski <jadmanski@592f7852-d20e-0410-864c-8624ca9c26a4> Fri Sep 19 21:21:31 2008 +0000
committer: jadmanski <jadmanski@592f7852-d20e-0410-864c-8624ca9c26a4> Fri Sep 19 21:21:31 2008 +0000
tree: 392045f1673a9b74fa5452a6bfdad0a920f63f13
parent: 6d858fa16693074e41b51b8068aa0d5a670c6e56 [diff] [blame]
diff --git a/server/server_job.py b/server/server_job.py
index 497eaa5..2d32d8b 100755
--- a/server/server_job.py
+++ b/server/server_job.py

@@ -76,6 +76,20 @@
 job.parallel_simple(crashdumps, machines, log=False)
 """
 
+
+crashinfo = """
+def crashinfo(machine):
+    hostname, user, passwd, port = parse_machine(machine, ssh_user,
+                                                 ssh_port, ssh_pass)
+
+    host = hosts.create_host(hostname, user=user, port=port,
+                             initialize=False, password=passwd)
+    host.get_crashinfo(test_start_time)
+
+job.parallel_simple(crashinfo, machines, log=False)
+"""
+
+
 reboot_segment="""\
 def reboot(machine):
     hostname, user, passwd, port = parse_machine(machine, ssh_user,
@@ -258,11 +272,11 @@
             raise error.AutoservError(
                 'No machines specified to verify')
         try:
-            namespace = {'machines' : self.machines, 'job' : self, \
-                                     'ssh_user' : self.ssh_user, \
-                                     'ssh_port' : self.ssh_port, \
-                                     'ssh_pass' : self.ssh_pass}
-            self._execute_code(preamble + verify, namespace, namespace)
+            namespace = {'machines' : self.machines, 'job' : self,
+                         'ssh_user' : self.ssh_user,
+                         'ssh_port' : self.ssh_port,
+                         'ssh_pass' : self.ssh_pass}
+            self._execute_code(preamble + verify, namespace)
         except Exception, e:
             msg = ('Verify failed\n' + str(e) + '\n'
                     + traceback.format_exc())
@@ -279,7 +293,7 @@
                      'protection_level': host_protection}
         # no matter what happens during repair, go on to try to reverify
         try:
-            self._execute_code(preamble + repair, namespace, namespace)
+            self._execute_code(preamble + repair, namespace)
         except Exception, exc:
             print 'Exception occured during repair'
             traceback.print_exc()
@@ -371,9 +385,10 @@
 
         self.enable_external_logging()
         status_log = os.path.join(self.resultdir, 'status.log')
+        collect_crashinfo = True
         try:
             if install_before and machines:
-                self._execute_code(preamble + install, namespace, namespace)
+                self._execute_code(preamble + install, namespace)
             if self.client:
                 namespace['control'] = self.control
                 open('control', 'w').write(self.control)
@@ -382,20 +397,23 @@
             else:
                 open('control.srv', 'w').write(self.control)
                 server_control = self.control
-            self._execute_code(preamble + server_control, namespace,
-                                   namespace)
+            self._execute_code(preamble + server_control, namespace)
 
+            # disable crashinfo collection if we get this far without error
+            collect_crashinfo = False
         finally:
-            if machines and collect_crashdumps:
+            if machines and (collect_crashdumps or collect_crashinfo):
                 namespace['test_start_time'] = test_start_time
-                self._execute_code(preamble + crashdumps, namespace,
-                                       namespace)
+                if collect_crashinfo:
+                    script = crashinfo # includes crashdumps
+                else:
+                    script = crashdumps
+                self._execute_code(preamble + script, namespace)
             self.disable_external_logging()
             if reboot and machines:
-                self._execute_code(preamble + reboot_segment,namespace,
-                                       namespace)
+                self._execute_code(preamble + reboot_segment, namespace)
             if install_after and machines:
-                self._execute_code(preamble + install, namespace, namespace)
+                self._execute_code(preamble + install, namespace)
 
 
     def run_test(self, url, *args, **dargs):
@@ -662,8 +680,8 @@
         self.__parse_status(lines)
 
 
-    def _execute_code(self, code, global_scope, local_scope):
-        exec(code, global_scope, local_scope)
+    def _execute_code(self, code, scope):
+        exec(code, scope, scope)
 
 
     def _record(self, status_code, subdir, operation, status='',
commit	cdd0c40560c8d514a56834b31063bcb5ec50eccf	[log] [tgz]
author	jadmanski <jadmanski@592f7852-d20e-0410-864c-8624ca9c26a4>	Fri Sep 19 21:21:31 2008 +0000
committer	jadmanski <jadmanski@592f7852-d20e-0410-864c-8624ca9c26a4>	Fri Sep 19 21:21:31 2008 +0000
tree	392045f1673a9b74fa5452a6bfdad0a920f63f13
parent	6d858fa16693074e41b51b8068aa0d5a670c6e56 [diff] [blame]