[autotest] only retry tests if they fail with TestFailRetry For tests that have known flaky failure modes, it is still desirable for the test to fail (without retries) when an unknown flaky failure occurs. Therefore, this CL changes the behavior of test retries, so that retires of a test are only attempted if the test fails with a TestFailRetry exception. Any other test failure will be treated as a failure regardless of retries. This means it is up to the tests in question to catch errors / exceptions caused by known flaky failure modes, and rethrow those as TestFailRetry. For the purposes of testing this change, some changes have been made to the dummyflake suite. CQ-DEPEND=Ibaa84f42beac52881cd34351e92474ef1457b15b BUG=chromium:224372 TEST=added new unit test for test.py run_remote_tests.sh --remote=<ip> suite:dummyflake; Suite runs as expected. In particular -- - client/.../dummy_Fail/control.retry_alwaysfail fails immediately even though the control file uses retries, because the failure is of type TestFail and hence not retry-able - The same applies to server/.../dummy_Fail/control.retry_failfast - client/.../dummy_Fail/control.retry_alwaysflake fails and gets retried 5 times, failing with a TestRetryFail each time such that it can be retried Change-Id: I4d1354cb410856c9de8b720c9f8310cb10d03156 Reviewed-on: https://gerrit.chromium.org/gerrit/46696 Commit-Queue: Aviv Keshet <[email protected]> Reviewed-by: Aviv Keshet <[email protected]> Tested-by: Aviv Keshet <[email protected]>

commit: 39164cad07a790017c84482bb5f68768b27313cf [log] [tgz]
author: Aviv Keshet <[email protected]> Wed Mar 27 15:08:33 2013 -0700
committer: ChromeBot <[email protected]> Mon Apr 01 17:21:12 2013 -0700
tree: 2ee1aaa38c6af55ca88011548b54254c502a9f8f
parent: 7f8a79f6eb3f334f016eadea565dd5e70c418c9b [diff]
diff --git a/client/common_lib/error.py b/client/common_lib/error.py
index 2432e2b..3a0985f 100644
--- a/client/common_lib/error.py
+++ b/client/common_lib/error.py

@@ -1,3 +1,5 @@
+#pylint: disable-msg=C0111
+
 """
 Internal global error types
 """
@@ -223,6 +225,12 @@
     exit_status = "WARN"
 
 
+class TestFailRetry(TestFail):
+    """Indicates that the test failed, but in a manner that may be retried
+    if test retries are enabled for this test."""
+    exit_status = "FAIL"
+
+
 class UnhandledTestError(TestError):
     """Indicates an unhandled error in a test."""
     def __init__(self, unhandled_exception):

diff --git a/client/common_lib/test.py b/client/common_lib/test.py
index d57d6a3..29b8480 100644
--- a/client/common_lib/test.py
+++ b/client/common_lib/test.py

@@ -16,6 +16,8 @@
 #       src             eg. tests/<test>/src
 #       tmpdir          eg. tmp/<tempname>_<testname.tag>
 
+#pylint: disable-msg=C0111
+
 import fcntl, os, re, sys, shutil, tempfile, time, traceback
 import logging
 
@@ -212,7 +214,7 @@
                 self._call_run_once(constraints, profile_only,
                                     postprocess_profiled_run, args, dargs)
                 break
-            except Exception as err:
+            except error.TestFailRetry as err:
                 if retry_run == max_runs:
                     raise
                 self.job.record('INFO', None, None, 'Run %s failed with %s' % (

diff --git a/client/common_lib/test_unittest.py b/client/common_lib/test_unittest.py
index 65b5785..c735184 100755
--- a/client/common_lib/test_unittest.py
+++ b/client/common_lib/test_unittest.py

@@ -1,5 +1,5 @@
 #!/usr/bin/python
-
+#pylint: disable-msg=C0111
 """Unit Tests for autotest.client.common_lib.test"""
 
 __author__ = '[email protected] (Gregory P. Smith)'
@@ -8,6 +8,7 @@
 import common
 from autotest_lib.client.common_lib import test
 from autotest_lib.client.common_lib.test_utils import mock
+from autotest_lib.client.common_lib import error as common_lib_error
 
 class TestTestCase(unittest.TestCase):
     class _neutered_base_test(test.base_test):
@@ -109,7 +110,8 @@
                                                           error)
             info_str = 'Run %s failed with %s' % (run, error)
             # On the final run we do not emit this message.
-            if run != self.test.job.test_retry:
+            if run != self.test.job.test_retry and isinstance(error,
+                                               common_lib_error.TestFailRetry):
                 self.test.job.record.expect_call('INFO', None, None, info_str)
 
 
@@ -124,7 +126,7 @@
         after_hook = self.god.create_mock_function('after_hook')
         self.test.register_before_iteration_hook(before_hook)
         self.test.register_after_iteration_hook(after_hook)
-        error = Exception('fail')
+        error = common_lib_error.TestFailRetry('fail')
         self._setup_failed_test_calls(self.test.job.test_retry+1, error)
         try:
             self.test._call_run_once_with_retry([], False, None, (1, 2),
@@ -135,6 +137,29 @@
         self.god.check_playback()
 
 
+    def test_call_run_once_with_retry_exception_unretryable(self):
+        """
+        Test call_run_once_with_retry duplicating a test that will always fail
+        with a non-retryable exception.
+        """
+        self.test.job.test_retry = 5
+        self.god.stub_function(self.test, 'drop_caches_between_iterations')
+        self.god.stub_function(self.test, 'run_once')
+        before_hook = self.god.create_mock_function('before_hook')
+        after_hook = self.god.create_mock_function('after_hook')
+        self.test.register_before_iteration_hook(before_hook)
+        self.test.register_after_iteration_hook(after_hook)
+        error = common_lib_error.TestFail('fail')
+        self._setup_failed_test_calls(1, error)
+        try:
+            self.test._call_run_once_with_retry([], False, None, (1, 2),
+                                                {'arg': 'val'})
+        except Exception as err:
+            if err != error:
+                raise
+        self.god.check_playback()
+
+
     def test_call_run_once_with_retry_exception_and_pass(self):
         """
         Test call_run_once_with_retry duplicating a test that fails at first
@@ -155,7 +180,7 @@
         self.test.register_after_iteration_hook(after_hook)
         self.god.stub_function(self.test.job, 'record')
         # tests the test._call_run_once implementation
-        error = Exception('fail')
+        error = common_lib_error.TestFailRetry('fail')
         self._setup_failed_test_calls(num_to_fail, error)
         # Passing call
         self.test._call_run_once.expect_call([], False, None, (1, 2),
commit	39164cad07a790017c84482bb5f68768b27313cf	[log] [tgz]
author	Aviv Keshet <[email protected]>	Wed Mar 27 15:08:33 2013 -0700
committer	ChromeBot <[email protected]>	Mon Apr 01 17:21:12 2013 -0700
tree	2ee1aaa38c6af55ca88011548b54254c502a9f8f
parent	7f8a79f6eb3f334f016eadea565dd5e70c418c9b [diff]