[autotest] only retry tests if they fail with TestFailRetry

For tests that have known flaky failure modes, it is still desirable for
the test to fail (without retries) when an unknown flaky failure occurs.
Therefore, this CL changes the behavior of test retries, so that retires
of a test are only attempted if the test fails with a TestFailRetry
exception. Any other test failure will be treated as a failure
regardless of retries.

This means it is up to the tests in question to catch errors /
exceptions caused by known flaky failure modes, and rethrow those as
TestFailRetry.

For the purposes of testing this change, some changes have been made to
the dummyflake suite.

CQ-DEPEND=Ibaa84f42beac52881cd34351e92474ef1457b15b

BUG=chromium:224372
TEST=added new unit test for test.py
run_remote_tests.sh --remote=<ip> suite:dummyflake;
Suite runs as expected. In particular --
 - client/.../dummy_Fail/control.retry_alwaysfail fails immediately even
   though the control file uses retries, because the failure is of type
   TestFail and hence not retry-able
 - The same applies to server/.../dummy_Fail/control.retry_failfast
 - client/.../dummy_Fail/control.retry_alwaysflake fails and gets
   retried 5 times, failing with a TestRetryFail each time such that it
   can be retried

Change-Id: I4d1354cb410856c9de8b720c9f8310cb10d03156
Reviewed-on: https://gerrit.chromium.org/gerrit/46696
Commit-Queue: Aviv Keshet <[email protected]>
Reviewed-by: Aviv Keshet <[email protected]>
Tested-by: Aviv Keshet <[email protected]>
diff --git a/client/common_lib/error.py b/client/common_lib/error.py
index 2432e2b..3a0985f 100644
--- a/client/common_lib/error.py
+++ b/client/common_lib/error.py
@@ -1,3 +1,5 @@
+#pylint: disable-msg=C0111
+
 """
 Internal global error types
 """
@@ -223,6 +225,12 @@
     exit_status = "WARN"
 
 
+class TestFailRetry(TestFail):
+    """Indicates that the test failed, but in a manner that may be retried
+    if test retries are enabled for this test."""
+    exit_status = "FAIL"
+
+
 class UnhandledTestError(TestError):
     """Indicates an unhandled error in a test."""
     def __init__(self, unhandled_exception):
diff --git a/client/common_lib/test.py b/client/common_lib/test.py
index d57d6a3..29b8480 100644
--- a/client/common_lib/test.py
+++ b/client/common_lib/test.py
@@ -16,6 +16,8 @@
 #       src             eg. tests/<test>/src
 #       tmpdir          eg. tmp/<tempname>_<testname.tag>
 
+#pylint: disable-msg=C0111
+
 import fcntl, os, re, sys, shutil, tempfile, time, traceback
 import logging
 
@@ -212,7 +214,7 @@
                 self._call_run_once(constraints, profile_only,
                                     postprocess_profiled_run, args, dargs)
                 break
-            except Exception as err:
+            except error.TestFailRetry as err:
                 if retry_run == max_runs:
                     raise
                 self.job.record('INFO', None, None, 'Run %s failed with %s' % (
diff --git a/client/common_lib/test_unittest.py b/client/common_lib/test_unittest.py
index 65b5785..c735184 100755
--- a/client/common_lib/test_unittest.py
+++ b/client/common_lib/test_unittest.py
@@ -1,5 +1,5 @@
 #!/usr/bin/python
-
+#pylint: disable-msg=C0111
 """Unit Tests for autotest.client.common_lib.test"""
 
 __author__ = '[email protected] (Gregory P. Smith)'
@@ -8,6 +8,7 @@
 import common
 from autotest_lib.client.common_lib import test
 from autotest_lib.client.common_lib.test_utils import mock
+from autotest_lib.client.common_lib import error as common_lib_error
 
 class TestTestCase(unittest.TestCase):
     class _neutered_base_test(test.base_test):
@@ -109,7 +110,8 @@
                                                           error)
             info_str = 'Run %s failed with %s' % (run, error)
             # On the final run we do not emit this message.
-            if run != self.test.job.test_retry:
+            if run != self.test.job.test_retry and isinstance(error,
+                                               common_lib_error.TestFailRetry):
                 self.test.job.record.expect_call('INFO', None, None, info_str)
 
 
@@ -124,7 +126,7 @@
         after_hook = self.god.create_mock_function('after_hook')
         self.test.register_before_iteration_hook(before_hook)
         self.test.register_after_iteration_hook(after_hook)
-        error = Exception('fail')
+        error = common_lib_error.TestFailRetry('fail')
         self._setup_failed_test_calls(self.test.job.test_retry+1, error)
         try:
             self.test._call_run_once_with_retry([], False, None, (1, 2),
@@ -135,6 +137,29 @@
         self.god.check_playback()
 
 
+    def test_call_run_once_with_retry_exception_unretryable(self):
+        """
+        Test call_run_once_with_retry duplicating a test that will always fail
+        with a non-retryable exception.
+        """
+        self.test.job.test_retry = 5
+        self.god.stub_function(self.test, 'drop_caches_between_iterations')
+        self.god.stub_function(self.test, 'run_once')
+        before_hook = self.god.create_mock_function('before_hook')
+        after_hook = self.god.create_mock_function('after_hook')
+        self.test.register_before_iteration_hook(before_hook)
+        self.test.register_after_iteration_hook(after_hook)
+        error = common_lib_error.TestFail('fail')
+        self._setup_failed_test_calls(1, error)
+        try:
+            self.test._call_run_once_with_retry([], False, None, (1, 2),
+                                                {'arg': 'val'})
+        except Exception as err:
+            if err != error:
+                raise
+        self.god.check_playback()
+
+
     def test_call_run_once_with_retry_exception_and_pass(self):
         """
         Test call_run_once_with_retry duplicating a test that fails at first
@@ -155,7 +180,7 @@
         self.test.register_after_iteration_hook(after_hook)
         self.god.stub_function(self.test.job, 'record')
         # tests the test._call_run_once implementation
-        error = Exception('fail')
+        error = common_lib_error.TestFailRetry('fail')
         self._setup_failed_test_calls(num_to_fail, error)
         # Passing call
         self.test._call_run_once.expect_call([], False, None, (1, 2),