Autotest: Increase run timeout granularity This CL modifies all references to max_runtime_hrs to instead be max_runtime_mins. This includes the django models, rpc interfaces, the cleanup timeout code, and the frontend java views. The frontend java code will need to be recompiled once this commits to prevent the frontend from breaking. The cleanup timeout pathway has been adjusted to find all timedout jobs by minute, and has been changed to run every 5 minutes vs every hour as before. BUG=chromium-os:36067 TEST=Ran on my local afe, ensure that jobs can still be created correctly, and jobs with short timeouts do indeed get aborted when expected. Change-Id: Idfdeb3f1d4947d6b2e6b48127a31db535704e972 Reviewed-on: https://gerrit.chromium.org/gerrit/37827 Tested-by: Simran Basi <[email protected]> Reviewed-by: Scott Zawalski <[email protected]> Reviewed-by: Simran Basi <[email protected]> Commit-Ready: Simran Basi <[email protected]>

commit: 34217022229b755bc1ee52f83665acba76bd5044 [log] [tgz]
author: Simran Basi <[email protected]> Tue Nov 06 13:43:15 2012 -0800
committer: Gerrit <[email protected]> Mon Nov 12 10:08:47 2012 -0800
tree: 9f6feb2227ed06bd6fe7a95d214a17d093d7e0eb
parent: 95ee35197987dde4874a3e7044112674549d602e [diff]
diff --git a/cli/job.py b/cli/job.py
index 51ceb92..243051b 100644
--- a/cli/job.py
+++ b/cli/job.py

@@ -423,7 +423,7 @@
         self.parser.add_option('-o', '--timeout', help='Job timeout in hours.',
                                metavar='TIMEOUT')
         self.parser.add_option('--max_runtime',
-                               help='Job maximum runtime in hours')
+                               help='Job maximum runtime in minutes')
 
         self.parser.add_option('-i', '--image',
                                help='OS image to install before running the '
@@ -504,7 +504,7 @@
         if options.timeout:
             self.data['timeout'] = options.timeout
         if options.max_runtime:
-            self.data['max_runtime_hrs'] = options.max_runtime
+            self.data['max_runtime_mins'] = options.max_runtime
 
         if options.atomic_group:
             self.data['atomic_group_name'] = options.atomic_group

diff --git a/cli/job_unittest.py b/cli/job_unittest.py
index 484be47..0be8138 100755
--- a/cli/job_unittest.py
+++ b/cli/job_unittest.py

@@ -1062,11 +1062,11 @@
 
     def test_execute_create_job_with_max_runtime(self):
         data = self.data.copy()
-        data['max_runtime_hrs'] = '222'
+        data['max_runtime_mins'] = '13320'
         file_temp = cli_mock.create_file(self.ctrl_file)
         self.run_cmd(argv=['atest', 'job', 'create', '-f', file_temp.name,
-                           'test_job0', '-m', 'host0', '--max_runtime', '222',
-                           '--ignore_site_file'],
+                           'test_job0', '-m', 'host0', '--max_runtime',
+                           '13320', '--ignore_site_file'],
                      rpcs=[('create_job', data, True, 42)],
                      out_words_ok=['test_job0', 'Created'],)
         file_temp.clean()
@@ -1276,7 +1276,7 @@
                     'control_type': u'Server',
                     'dependencies': [],
                     'email_list': u'',
-                    'max_runtime_hrs': 480,
+                    'max_runtime_mins': 28800,
                     'parse_failed_repair': True,
                     'priority': u'Medium',
                     'reboot_after': u'Always',

diff --git a/database/schema_051.sql b/database/schema_051.sql
index 604651d..2b88086 100644
--- a/database/schema_051.sql
+++ b/database/schema_051.sql

@@ -297,6 +297,7 @@
   `reboot_after` smallint(6) NOT NULL,
   `parse_failed_repair` tinyint(1) NOT NULL default '1',
   `max_runtime_hrs` int(11) NOT NULL,
+  `max_runtime_mins` int(11) NOT NULL,
   PRIMARY KEY  (`id`)
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
 SET character_set_client = @saved_cs_client;

diff --git a/frontend/afe/doctests/001_rpc_test.txt b/frontend/afe/doctests/001_rpc_test.txt
index 39ab599..20b5fe7 100644
--- a/frontend/afe/doctests/001_rpc_test.txt
+++ b/frontend/afe/doctests/001_rpc_test.txt

@@ -538,7 +538,7 @@
 ...         'priority': 'Low',
 ...         'synch_count': 1,
 ...         'timeout': 72,
-...         'max_runtime_hrs': 72,
+...         'max_runtime_mins': 72*60,
 ...         'run_verify': 1,
 ...         'email_list': '',
 ...         'reboot_before': 'If dirty',

diff --git a/frontend/afe/models.py b/frontend/afe/models.py
index f560837..4193353 100644
--- a/frontend/afe/models.py
+++ b/frontend/afe/models.py

@@ -893,7 +893,9 @@
     synch_count: how many hosts should be used per autoserv execution
     run_verify: Whether or not to run the verify phase
     timeout: hours from queuing time until job times out
-    max_runtime_hrs: hours from job starting time until job times out
+    max_runtime_hrs: DEPRECATED - hours from job starting time until job
+                     times out
+    max_runtime_mins: minutes from job starting time until job times out
     email_list: list of people to email on completion delimited by any of:
                 white space, ',', ':', ';'
     dependency_labels: many-to-many relationship with labels corresponding to
@@ -906,8 +908,12 @@
     """
     DEFAULT_TIMEOUT = global_config.global_config.get_config_value(
         'AUTOTEST_WEB', 'job_timeout_default', default=240)
+    # MAX_RUNTIME_HRS is deprecated. Will be removed after switch to mins is
+    # completed.
     DEFAULT_MAX_RUNTIME_HRS = global_config.global_config.get_config_value(
         'AUTOTEST_WEB', 'job_max_runtime_hrs_default', default=72)
+    DEFAULT_MAX_RUNTIME_MINS = global_config.global_config.get_config_value(
+        'AUTOTEST_WEB', 'job_max_runtime_mins_default', default=72*60)
     DEFAULT_PARSE_FAILED_REPAIR = global_config.global_config.get_config_value(
         'AUTOTEST_WEB', 'parse_failed_repair_default', type=bool,
         default=False)
@@ -940,7 +946,10 @@
         default=DEFAULT_REBOOT_AFTER)
     parse_failed_repair = dbmodels.BooleanField(
         default=DEFAULT_PARSE_FAILED_REPAIR)
+    # max_runtime_hrs is deprecated. Will be removed after switch to mins is
+    # completed.
     max_runtime_hrs = dbmodels.IntegerField(default=DEFAULT_MAX_RUNTIME_HRS)
+    max_runtime_mins = dbmodels.IntegerField(default=DEFAULT_MAX_RUNTIME_MINS)
     drone_set = dbmodels.ForeignKey(DroneSet, null=True, blank=True)
 
     parameterized_job = dbmodels.ForeignKey(ParameterizedJob, null=True,
@@ -1018,7 +1027,7 @@
             control_type=options['control_type'],
             synch_count=options.get('synch_count'),
             timeout=options.get('timeout'),
-            max_runtime_hrs=options.get('max_runtime_hrs'),
+            max_runtime_mins=options.get('max_runtime_mins'),
             run_verify=options.get('run_verify'),
             email_list=options.get('email_list'),
             reboot_before=options.get('reboot_before'),

diff --git a/frontend/afe/resources.py b/frontend/afe/resources.py
index b783115..83a658b 100644
--- a/frontend/afe/resources.py
+++ b/frontend/afe/resources.py

@@ -436,7 +436,7 @@
             'machines_per_execution': 1,
             'run_verify': bool(_job_fields['run_verify'].default),
             'timeout_hrs': _job_fields['timeout'].default,
-            'maximum_runtime_hrs': _job_fields['max_runtime_hrs'].default,
+            'maximum_runtime_mins': _job_fields['max_runtime_mins'].default,
             'cleanup_before_job':
                 model_attributes.RebootBefore.get_string(
                     models.DEFAULT_REBOOT_BEFORE),
@@ -476,7 +476,7 @@
                 'machines_per_execution': job.synch_count,
                 'run_verify': bool(job.run_verify),
                 'timeout_hrs': job.timeout,
-                'maximum_runtime_hrs': job.max_runtime_hrs,
+                'maximum_runtime_mins': job.max_runtime_mins,
                 'cleanup_before_job':
                     model_attributes.RebootBefore.get_string(job.reboot_before),
                 'cleanup_after_job':
@@ -681,7 +681,7 @@
                 control_type=control_type,
                 is_template=input_dict.get('is_template', None),
                 timeout=execution_info.get('timeout_hrs'),
-                max_runtime_hrs=execution_info.get('maximum_runtime_hrs'),
+                max_runtime_mins=execution_info.get('maximum_runtime_mins'),
                 synch_count=execution_info.get('machines_per_execution'),
                 run_verify=execution_info.get('run_verify'),
                 email_list=input_dict.get('email_list', None),

diff --git a/frontend/afe/rpc_interface.py b/frontend/afe/rpc_interface.py
index 9c01d2c..2ddb72d 100644
--- a/frontend/afe/rpc_interface.py
+++ b/frontend/afe/rpc_interface.py

@@ -414,7 +414,7 @@
                              meta_hosts=(), one_time_hosts=(),
                              atomic_group_name=None, synch_count=None,
                              is_template=False, timeout=None,
-                             max_runtime_hrs=None, run_verify=True,
+                             max_runtime_mins=None, run_verify=True,
                              email_list='', dependencies=(), reboot_before=None,
                              reboot_after=None, parse_failed_repair=None,
                              hostless=False, keyvals=None, drone_set=None):
@@ -496,7 +496,7 @@
 def create_job(name, priority, control_file, control_type,
                hosts=(), meta_hosts=(), one_time_hosts=(),
                atomic_group_name=None, synch_count=None, is_template=False,
-               timeout=None, max_runtime_hrs=None, run_verify=True,
+               timeout=None, max_runtime_mins=None, run_verify=True,
                email_list='', dependencies=(), reboot_before=None,
                reboot_after=None, parse_failed_repair=None, hostless=False,
                keyvals=None, drone_set=None, image=None):
@@ -512,7 +512,7 @@
     given this value is treated as a minimum.
     @param is_template If true then create a template job.
     @param timeout Hours after this call returns until the job times out.
-    @param max_runtime_hrs Hours from job starting time until job times out
+    @param max_runtime_mins Minutes from job starting time until job times out
     @param run_verify Should the host be verified before running the test?
     @param email_list String containing emails to mail when the job is done
     @param dependencies List of label names on which this job depends
@@ -860,7 +860,8 @@
     result['host_statuses'] = sorted(models.Host.Status.names)
     result['job_statuses'] = sorted(models.HostQueueEntry.Status.names)
     result['job_timeout_default'] = models.Job.DEFAULT_TIMEOUT
-    result['job_max_runtime_hrs_default'] = models.Job.DEFAULT_MAX_RUNTIME_HRS
+    result['job_max_runtime_mins_default'] = (
+        models.Job.DEFAULT_MAX_RUNTIME_MINS)
     result['parse_failed_repair_default'] = bool(
         models.Job.DEFAULT_PARSE_FAILED_REPAIR)
     result['reboot_before_options'] = model_attributes.RebootBefore.names

diff --git a/frontend/afe/rpc_utils.py b/frontend/afe/rpc_utils.py
index 7250d92..a2781bd 100644
--- a/frontend/afe/rpc_utils.py
+++ b/frontend/afe/rpc_utils.py

@@ -638,7 +638,7 @@
 def create_job_common(name, priority, control_type, control_file=None,
                       hosts=(), meta_hosts=(), one_time_hosts=(),
                       atomic_group_name=None, synch_count=None,
-                      is_template=False, timeout=None, max_runtime_hrs=None,
+                      is_template=False, timeout=None, max_runtime_mins=None,
                       run_verify=True, email_list='', dependencies=(),
                       reboot_before=None, reboot_after=None,
                       parse_failed_repair=None, hostless=False, keyvals=None,
@@ -735,7 +735,7 @@
                    control_type=control_type,
                    is_template=is_template,
                    timeout=timeout,
-                   max_runtime_hrs=max_runtime_hrs,
+                   max_runtime_mins=max_runtime_mins,
                    synch_count=synch_count,
                    run_verify=run_verify,
                    email_list=email_list,

diff --git a/frontend/client/src/autotest/afe/AfeUtils.java b/frontend/client/src/autotest/afe/AfeUtils.java
index f435a0a..e78b0af 100644
--- a/frontend/client/src/autotest/afe/AfeUtils.java
+++ b/frontend/client/src/autotest/afe/AfeUtils.java

@@ -218,7 +218,7 @@
         args.put("control_type", new JSONString(TestSelector.SERVER_TYPE));
         args.put("synch_count", controlInfo.get("synch_count"));
         args.put("timeout", staticData.getData("job_timeout_default"));
-        args.put("max_runtime_hrs", staticData.getData("job_max_runtime_hrs_default"));
+        args.put("max_runtime_mins", staticData.getData("job_max_runtime_mins_default"));
         args.put("run_verify", JSONBoolean.getInstance(false));
         args.put("parse_failed_repair", JSONBoolean.getInstance(true));
         args.put("reboot_before", rebootBefore);

diff --git a/frontend/client/src/autotest/afe/JobDetailView.java b/frontend/client/src/autotest/afe/JobDetailView.java
index 36bc5ff..362de44 100644
--- a/frontend/client/src/autotest/afe/JobDetailView.java
+++ b/frontend/client/src/autotest/afe/JobDetailView.java

@@ -129,7 +129,7 @@
                     imageUrlString = Utils.jsonToString(jobObject.get("image")).trim();
                 }
                 showText(imageUrlString, "view_image_url");
-                showField(jobObject, "max_runtime_hrs", "view_max_runtime");
+                showField(jobObject, "max_runtime_mins", "view_max_runtime");
                 showField(jobObject, "email_list", "view_email_list");
                 showText(runVerify, "view_run_verify");
                 showField(jobObject, "reboot_before", "view_reboot_before");

diff --git a/frontend/client/src/autotest/afe/create/CreateJobViewPresenter.java b/frontend/client/src/autotest/afe/create/CreateJobViewPresenter.java
index 3c9c476..e0cf25c 100644
--- a/frontend/client/src/autotest/afe/create/CreateJobViewPresenter.java
+++ b/frontend/client/src/autotest/afe/create/CreateJobViewPresenter.java

@@ -144,7 +144,7 @@
         display.getPriorityList().selectByName(priority);
 
         display.getTimeout().setText(Utils.jsonToString(jobObject.get("timeout")));
-        display.getMaxRuntime().setText(Utils.jsonToString(jobObject.get("max_runtime_hrs")));
+        display.getMaxRuntime().setText(Utils.jsonToString(jobObject.get("max_runtime_mins")));
         display.getEmailList().setText(
                 jobObject.get("email_list").isString().stringValue());
 
@@ -527,7 +527,7 @@
         display.getImageUrl().setText("");
         display.getTimeout().setText(Utils.jsonToString(repository.getData("job_timeout_default")));
         display.getMaxRuntime().setText(
-                Utils.jsonToString(repository.getData("job_max_runtime_hrs_default")));
+                Utils.jsonToString(repository.getData("job_max_runtime_mins_default")));
         display.getEmailList().setText("");
         testSelector.reset();
         display.getSkipVerify().setValue(false);
@@ -579,7 +579,7 @@
                          new JSONString(controlTypeSelect.getControlType()));
                 args.put("synch_count", synchCount);
                 args.put("timeout", new JSONNumber(timeoutValue));
-                args.put("max_runtime_hrs", new JSONNumber(maxRuntimeValue));
+                args.put("max_runtime_mins", new JSONNumber(maxRuntimeValue));
                 args.put("email_list", new JSONString(display.getEmailList().getText()));
                 args.put("run_verify", JSONBoolean.getInstance(
                         !display.getSkipVerify().getValue()));

diff --git a/frontend/client/src/autotest/public/AfeClient.html b/frontend/client/src/autotest/public/AfeClient.html
index d1cb538..b3a3e53 100644
--- a/frontend/client/src/autotest/public/AfeClient.html
+++ b/frontend/client/src/autotest/public/AfeClient.html

@@ -63,7 +63,7 @@
           <span class="field-name" style="color:red">Image URL:</span>
           <span id="view_image_url"></span><br>
           <span class="field-name">Max runtime:</span>
-          <span id="view_max_runtime"></span> hours<br>
+          <span id="view_max_runtime"></span> minutes<br>
           <span class="field-name">Email List:</span>
           <span id="view_email_list"></span><br>
           <span class="field-name">Run verify:</span>
@@ -125,7 +125,7 @@
               <td id="create_image_url"></td><td></td></tr>
           <tr><td class="field-name">Timeout (hours):</td>
               <td id="create_timeout"></td><td></td></tr>
-          <tr><td class="field-name">Max runtime (hours):</td>
+          <tr><td class="field-name">Max runtime (minutes):</td>
               <td id="create_max_runtime"></td><td></td></tr>
           <tr><td class="field-name">Email List:</td>
               <td id="create_email_list"></td><td></td></tr>

diff --git a/global_config.ini b/global_config.ini
index 30b5d1d..a3b09c9 100644
--- a/global_config.ini
+++ b/global_config.ini

@@ -5,7 +5,7 @@
 user: chromeosqa-admin
 password: USE SHADOW PASSWORD
 job_timeout_default: 24
-job_max_runtime_hrs_default: 24
+job_max_runtime_mins_default: 1440
 parse_failed_repair_default: 0
 # Only set this if your server is not 'http://[SERVER] hostname/afe/'
 #base_url: http://your_autotest_server/afe/
@@ -72,7 +72,7 @@
 max_parse_processes: 100
 max_transfer_processes: 50
 tick_pause_sec: 5
-clean_interval_minutes: 60
+clean_interval_minutes: 5
 drones: atlantis2
 atlantis2_max_processes: 600
 drone_installation_directory: /usr/local/autotest

diff --git a/scheduler/monitor_db_cleanup.py b/scheduler/monitor_db_cleanup.py
index ada1edd..47cbb36 100644
--- a/scheduler/monitor_db_cleanup.py
+++ b/scheduler/monitor_db_cleanup.py

@@ -78,7 +78,8 @@
             FROM afe_host_queue_entries AS hqe
             INNER JOIN afe_jobs ON (hqe.job_id = afe_jobs.id)
             WHERE NOT hqe.complete AND NOT hqe.aborted AND
-            hqe.started_on + INTERVAL afe_jobs.max_runtime_hrs HOUR < NOW()""")
+            hqe.started_on + INTERVAL afe_jobs.max_runtime_mins MINUTE <
+            NOW()""")
         query = models.HostQueueEntry.objects.filter(
             id__in=[row[0] for row in rows])
         for queue_entry in query.distinct():

diff --git a/server/frontend.py b/server/frontend.py
index 83d7a8e..b2b27c2 100644
--- a/server/frontend.py
+++ b/server/frontend.py

@@ -355,7 +355,7 @@
     def run_test_suites(self, pairings, kernel, kernel_label=None,
                         priority='Medium', wait=True, poll_interval=10,
                         email_from=None, email_to=None, timeout=168,
-                        max_runtime_hrs=168, kernel_cmdline=None):
+                        max_runtime_mins=10080, kernel_cmdline=None):
         """
         Run a list of test suites on a particular kernel.
 
@@ -379,7 +379,7 @@
                 new_job = self.invoke_test(pairing, kernel, kernel_label,
                                            priority, timeout=timeout,
                                            kernel_cmdline=kernel_cmdline,
-                                           max_runtime_hrs=max_runtime_hrs)
+                                           max_runtime_mins=max_runtime_mins)
                 if not new_job:
                     continue
                 jobs.append(new_job)
commit	34217022229b755bc1ee52f83665acba76bd5044	[log] [tgz]
author	Simran Basi <[email protected]>	Tue Nov 06 13:43:15 2012 -0800
committer	Gerrit <[email protected]>	Mon Nov 12 10:08:47 2012 -0800
tree	9f6feb2227ed06bd6fe7a95d214a17d093d7e0eb
parent	95ee35197987dde4874a3e7044112674549d602e [diff]