Autotest: Increase run timeout granularity
This CL modifies all references to max_runtime_hrs to instead be
max_runtime_mins. This includes the django models, rpc interfaces, the
cleanup timeout code, and the frontend java views.
The frontend java code will need to be recompiled once this commits to
prevent the frontend from breaking.
The cleanup timeout pathway has been adjusted to find all timedout jobs
by minute, and has been changed to run every 5 minutes vs every hour as
before.
BUG=chromium-os:36067
TEST=Ran on my local afe, ensure that jobs can still be created correctly,
and jobs with short timeouts do indeed get aborted when expected.
Change-Id: Idfdeb3f1d4947d6b2e6b48127a31db535704e972
Reviewed-on: https://gerrit.chromium.org/gerrit/37827
Tested-by: Simran Basi <[email protected]>
Reviewed-by: Scott Zawalski <[email protected]>
Reviewed-by: Simran Basi <[email protected]>
Commit-Ready: Simran Basi <[email protected]>
diff --git a/cli/job.py b/cli/job.py
index 51ceb92..243051b 100644
--- a/cli/job.py
+++ b/cli/job.py
@@ -423,7 +423,7 @@
self.parser.add_option('-o', '--timeout', help='Job timeout in hours.',
metavar='TIMEOUT')
self.parser.add_option('--max_runtime',
- help='Job maximum runtime in hours')
+ help='Job maximum runtime in minutes')
self.parser.add_option('-i', '--image',
help='OS image to install before running the '
@@ -504,7 +504,7 @@
if options.timeout:
self.data['timeout'] = options.timeout
if options.max_runtime:
- self.data['max_runtime_hrs'] = options.max_runtime
+ self.data['max_runtime_mins'] = options.max_runtime
if options.atomic_group:
self.data['atomic_group_name'] = options.atomic_group
diff --git a/cli/job_unittest.py b/cli/job_unittest.py
index 484be47..0be8138 100755
--- a/cli/job_unittest.py
+++ b/cli/job_unittest.py
@@ -1062,11 +1062,11 @@
def test_execute_create_job_with_max_runtime(self):
data = self.data.copy()
- data['max_runtime_hrs'] = '222'
+ data['max_runtime_mins'] = '13320'
file_temp = cli_mock.create_file(self.ctrl_file)
self.run_cmd(argv=['atest', 'job', 'create', '-f', file_temp.name,
- 'test_job0', '-m', 'host0', '--max_runtime', '222',
- '--ignore_site_file'],
+ 'test_job0', '-m', 'host0', '--max_runtime',
+ '13320', '--ignore_site_file'],
rpcs=[('create_job', data, True, 42)],
out_words_ok=['test_job0', 'Created'],)
file_temp.clean()
@@ -1276,7 +1276,7 @@
'control_type': u'Server',
'dependencies': [],
'email_list': u'',
- 'max_runtime_hrs': 480,
+ 'max_runtime_mins': 28800,
'parse_failed_repair': True,
'priority': u'Medium',
'reboot_after': u'Always',
diff --git a/database/schema_051.sql b/database/schema_051.sql
index 604651d..2b88086 100644
--- a/database/schema_051.sql
+++ b/database/schema_051.sql
@@ -297,6 +297,7 @@
`reboot_after` smallint(6) NOT NULL,
`parse_failed_repair` tinyint(1) NOT NULL default '1',
`max_runtime_hrs` int(11) NOT NULL,
+ `max_runtime_mins` int(11) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
SET character_set_client = @saved_cs_client;
diff --git a/frontend/afe/doctests/001_rpc_test.txt b/frontend/afe/doctests/001_rpc_test.txt
index 39ab599..20b5fe7 100644
--- a/frontend/afe/doctests/001_rpc_test.txt
+++ b/frontend/afe/doctests/001_rpc_test.txt
@@ -538,7 +538,7 @@
... 'priority': 'Low',
... 'synch_count': 1,
... 'timeout': 72,
-... 'max_runtime_hrs': 72,
+... 'max_runtime_mins': 72*60,
... 'run_verify': 1,
... 'email_list': '',
... 'reboot_before': 'If dirty',
diff --git a/frontend/afe/models.py b/frontend/afe/models.py
index f560837..4193353 100644
--- a/frontend/afe/models.py
+++ b/frontend/afe/models.py
@@ -893,7 +893,9 @@
synch_count: how many hosts should be used per autoserv execution
run_verify: Whether or not to run the verify phase
timeout: hours from queuing time until job times out
- max_runtime_hrs: hours from job starting time until job times out
+ max_runtime_hrs: DEPRECATED - hours from job starting time until job
+ times out
+ max_runtime_mins: minutes from job starting time until job times out
email_list: list of people to email on completion delimited by any of:
white space, ',', ':', ';'
dependency_labels: many-to-many relationship with labels corresponding to
@@ -906,8 +908,12 @@
"""
DEFAULT_TIMEOUT = global_config.global_config.get_config_value(
'AUTOTEST_WEB', 'job_timeout_default', default=240)
+ # MAX_RUNTIME_HRS is deprecated. Will be removed after switch to mins is
+ # completed.
DEFAULT_MAX_RUNTIME_HRS = global_config.global_config.get_config_value(
'AUTOTEST_WEB', 'job_max_runtime_hrs_default', default=72)
+ DEFAULT_MAX_RUNTIME_MINS = global_config.global_config.get_config_value(
+ 'AUTOTEST_WEB', 'job_max_runtime_mins_default', default=72*60)
DEFAULT_PARSE_FAILED_REPAIR = global_config.global_config.get_config_value(
'AUTOTEST_WEB', 'parse_failed_repair_default', type=bool,
default=False)
@@ -940,7 +946,10 @@
default=DEFAULT_REBOOT_AFTER)
parse_failed_repair = dbmodels.BooleanField(
default=DEFAULT_PARSE_FAILED_REPAIR)
+ # max_runtime_hrs is deprecated. Will be removed after switch to mins is
+ # completed.
max_runtime_hrs = dbmodels.IntegerField(default=DEFAULT_MAX_RUNTIME_HRS)
+ max_runtime_mins = dbmodels.IntegerField(default=DEFAULT_MAX_RUNTIME_MINS)
drone_set = dbmodels.ForeignKey(DroneSet, null=True, blank=True)
parameterized_job = dbmodels.ForeignKey(ParameterizedJob, null=True,
@@ -1018,7 +1027,7 @@
control_type=options['control_type'],
synch_count=options.get('synch_count'),
timeout=options.get('timeout'),
- max_runtime_hrs=options.get('max_runtime_hrs'),
+ max_runtime_mins=options.get('max_runtime_mins'),
run_verify=options.get('run_verify'),
email_list=options.get('email_list'),
reboot_before=options.get('reboot_before'),
diff --git a/frontend/afe/resources.py b/frontend/afe/resources.py
index b783115..83a658b 100644
--- a/frontend/afe/resources.py
+++ b/frontend/afe/resources.py
@@ -436,7 +436,7 @@
'machines_per_execution': 1,
'run_verify': bool(_job_fields['run_verify'].default),
'timeout_hrs': _job_fields['timeout'].default,
- 'maximum_runtime_hrs': _job_fields['max_runtime_hrs'].default,
+ 'maximum_runtime_mins': _job_fields['max_runtime_mins'].default,
'cleanup_before_job':
model_attributes.RebootBefore.get_string(
models.DEFAULT_REBOOT_BEFORE),
@@ -476,7 +476,7 @@
'machines_per_execution': job.synch_count,
'run_verify': bool(job.run_verify),
'timeout_hrs': job.timeout,
- 'maximum_runtime_hrs': job.max_runtime_hrs,
+ 'maximum_runtime_mins': job.max_runtime_mins,
'cleanup_before_job':
model_attributes.RebootBefore.get_string(job.reboot_before),
'cleanup_after_job':
@@ -681,7 +681,7 @@
control_type=control_type,
is_template=input_dict.get('is_template', None),
timeout=execution_info.get('timeout_hrs'),
- max_runtime_hrs=execution_info.get('maximum_runtime_hrs'),
+ max_runtime_mins=execution_info.get('maximum_runtime_mins'),
synch_count=execution_info.get('machines_per_execution'),
run_verify=execution_info.get('run_verify'),
email_list=input_dict.get('email_list', None),
diff --git a/frontend/afe/rpc_interface.py b/frontend/afe/rpc_interface.py
index 9c01d2c..2ddb72d 100644
--- a/frontend/afe/rpc_interface.py
+++ b/frontend/afe/rpc_interface.py
@@ -414,7 +414,7 @@
meta_hosts=(), one_time_hosts=(),
atomic_group_name=None, synch_count=None,
is_template=False, timeout=None,
- max_runtime_hrs=None, run_verify=True,
+ max_runtime_mins=None, run_verify=True,
email_list='', dependencies=(), reboot_before=None,
reboot_after=None, parse_failed_repair=None,
hostless=False, keyvals=None, drone_set=None):
@@ -496,7 +496,7 @@
def create_job(name, priority, control_file, control_type,
hosts=(), meta_hosts=(), one_time_hosts=(),
atomic_group_name=None, synch_count=None, is_template=False,
- timeout=None, max_runtime_hrs=None, run_verify=True,
+ timeout=None, max_runtime_mins=None, run_verify=True,
email_list='', dependencies=(), reboot_before=None,
reboot_after=None, parse_failed_repair=None, hostless=False,
keyvals=None, drone_set=None, image=None):
@@ -512,7 +512,7 @@
given this value is treated as a minimum.
@param is_template If true then create a template job.
@param timeout Hours after this call returns until the job times out.
- @param max_runtime_hrs Hours from job starting time until job times out
+ @param max_runtime_mins Minutes from job starting time until job times out
@param run_verify Should the host be verified before running the test?
@param email_list String containing emails to mail when the job is done
@param dependencies List of label names on which this job depends
@@ -860,7 +860,8 @@
result['host_statuses'] = sorted(models.Host.Status.names)
result['job_statuses'] = sorted(models.HostQueueEntry.Status.names)
result['job_timeout_default'] = models.Job.DEFAULT_TIMEOUT
- result['job_max_runtime_hrs_default'] = models.Job.DEFAULT_MAX_RUNTIME_HRS
+ result['job_max_runtime_mins_default'] = (
+ models.Job.DEFAULT_MAX_RUNTIME_MINS)
result['parse_failed_repair_default'] = bool(
models.Job.DEFAULT_PARSE_FAILED_REPAIR)
result['reboot_before_options'] = model_attributes.RebootBefore.names
diff --git a/frontend/afe/rpc_utils.py b/frontend/afe/rpc_utils.py
index 7250d92..a2781bd 100644
--- a/frontend/afe/rpc_utils.py
+++ b/frontend/afe/rpc_utils.py
@@ -638,7 +638,7 @@
def create_job_common(name, priority, control_type, control_file=None,
hosts=(), meta_hosts=(), one_time_hosts=(),
atomic_group_name=None, synch_count=None,
- is_template=False, timeout=None, max_runtime_hrs=None,
+ is_template=False, timeout=None, max_runtime_mins=None,
run_verify=True, email_list='', dependencies=(),
reboot_before=None, reboot_after=None,
parse_failed_repair=None, hostless=False, keyvals=None,
@@ -735,7 +735,7 @@
control_type=control_type,
is_template=is_template,
timeout=timeout,
- max_runtime_hrs=max_runtime_hrs,
+ max_runtime_mins=max_runtime_mins,
synch_count=synch_count,
run_verify=run_verify,
email_list=email_list,
diff --git a/frontend/client/src/autotest/afe/AfeUtils.java b/frontend/client/src/autotest/afe/AfeUtils.java
index f435a0a..e78b0af 100644
--- a/frontend/client/src/autotest/afe/AfeUtils.java
+++ b/frontend/client/src/autotest/afe/AfeUtils.java
@@ -218,7 +218,7 @@
args.put("control_type", new JSONString(TestSelector.SERVER_TYPE));
args.put("synch_count", controlInfo.get("synch_count"));
args.put("timeout", staticData.getData("job_timeout_default"));
- args.put("max_runtime_hrs", staticData.getData("job_max_runtime_hrs_default"));
+ args.put("max_runtime_mins", staticData.getData("job_max_runtime_mins_default"));
args.put("run_verify", JSONBoolean.getInstance(false));
args.put("parse_failed_repair", JSONBoolean.getInstance(true));
args.put("reboot_before", rebootBefore);
diff --git a/frontend/client/src/autotest/afe/JobDetailView.java b/frontend/client/src/autotest/afe/JobDetailView.java
index 36bc5ff..362de44 100644
--- a/frontend/client/src/autotest/afe/JobDetailView.java
+++ b/frontend/client/src/autotest/afe/JobDetailView.java
@@ -129,7 +129,7 @@
imageUrlString = Utils.jsonToString(jobObject.get("image")).trim();
}
showText(imageUrlString, "view_image_url");
- showField(jobObject, "max_runtime_hrs", "view_max_runtime");
+ showField(jobObject, "max_runtime_mins", "view_max_runtime");
showField(jobObject, "email_list", "view_email_list");
showText(runVerify, "view_run_verify");
showField(jobObject, "reboot_before", "view_reboot_before");
diff --git a/frontend/client/src/autotest/afe/create/CreateJobViewPresenter.java b/frontend/client/src/autotest/afe/create/CreateJobViewPresenter.java
index 3c9c476..e0cf25c 100644
--- a/frontend/client/src/autotest/afe/create/CreateJobViewPresenter.java
+++ b/frontend/client/src/autotest/afe/create/CreateJobViewPresenter.java
@@ -144,7 +144,7 @@
display.getPriorityList().selectByName(priority);
display.getTimeout().setText(Utils.jsonToString(jobObject.get("timeout")));
- display.getMaxRuntime().setText(Utils.jsonToString(jobObject.get("max_runtime_hrs")));
+ display.getMaxRuntime().setText(Utils.jsonToString(jobObject.get("max_runtime_mins")));
display.getEmailList().setText(
jobObject.get("email_list").isString().stringValue());
@@ -527,7 +527,7 @@
display.getImageUrl().setText("");
display.getTimeout().setText(Utils.jsonToString(repository.getData("job_timeout_default")));
display.getMaxRuntime().setText(
- Utils.jsonToString(repository.getData("job_max_runtime_hrs_default")));
+ Utils.jsonToString(repository.getData("job_max_runtime_mins_default")));
display.getEmailList().setText("");
testSelector.reset();
display.getSkipVerify().setValue(false);
@@ -579,7 +579,7 @@
new JSONString(controlTypeSelect.getControlType()));
args.put("synch_count", synchCount);
args.put("timeout", new JSONNumber(timeoutValue));
- args.put("max_runtime_hrs", new JSONNumber(maxRuntimeValue));
+ args.put("max_runtime_mins", new JSONNumber(maxRuntimeValue));
args.put("email_list", new JSONString(display.getEmailList().getText()));
args.put("run_verify", JSONBoolean.getInstance(
!display.getSkipVerify().getValue()));
diff --git a/frontend/client/src/autotest/public/AfeClient.html b/frontend/client/src/autotest/public/AfeClient.html
index d1cb538..b3a3e53 100644
--- a/frontend/client/src/autotest/public/AfeClient.html
+++ b/frontend/client/src/autotest/public/AfeClient.html
@@ -63,7 +63,7 @@
<span class="field-name" style="color:red">Image URL:</span>
<span id="view_image_url"></span><br>
<span class="field-name">Max runtime:</span>
- <span id="view_max_runtime"></span> hours<br>
+ <span id="view_max_runtime"></span> minutes<br>
<span class="field-name">Email List:</span>
<span id="view_email_list"></span><br>
<span class="field-name">Run verify:</span>
@@ -125,7 +125,7 @@
<td id="create_image_url"></td><td></td></tr>
<tr><td class="field-name">Timeout (hours):</td>
<td id="create_timeout"></td><td></td></tr>
- <tr><td class="field-name">Max runtime (hours):</td>
+ <tr><td class="field-name">Max runtime (minutes):</td>
<td id="create_max_runtime"></td><td></td></tr>
<tr><td class="field-name">Email List:</td>
<td id="create_email_list"></td><td></td></tr>
diff --git a/global_config.ini b/global_config.ini
index 30b5d1d..a3b09c9 100644
--- a/global_config.ini
+++ b/global_config.ini
@@ -5,7 +5,7 @@
user: chromeosqa-admin
password: USE SHADOW PASSWORD
job_timeout_default: 24
-job_max_runtime_hrs_default: 24
+job_max_runtime_mins_default: 1440
parse_failed_repair_default: 0
# Only set this if your server is not 'http://[SERVER] hostname/afe/'
#base_url: http://your_autotest_server/afe/
@@ -72,7 +72,7 @@
max_parse_processes: 100
max_transfer_processes: 50
tick_pause_sec: 5
-clean_interval_minutes: 60
+clean_interval_minutes: 5
drones: atlantis2
atlantis2_max_processes: 600
drone_installation_directory: /usr/local/autotest
diff --git a/scheduler/monitor_db_cleanup.py b/scheduler/monitor_db_cleanup.py
index ada1edd..47cbb36 100644
--- a/scheduler/monitor_db_cleanup.py
+++ b/scheduler/monitor_db_cleanup.py
@@ -78,7 +78,8 @@
FROM afe_host_queue_entries AS hqe
INNER JOIN afe_jobs ON (hqe.job_id = afe_jobs.id)
WHERE NOT hqe.complete AND NOT hqe.aborted AND
- hqe.started_on + INTERVAL afe_jobs.max_runtime_hrs HOUR < NOW()""")
+ hqe.started_on + INTERVAL afe_jobs.max_runtime_mins MINUTE <
+ NOW()""")
query = models.HostQueueEntry.objects.filter(
id__in=[row[0] for row in rows])
for queue_entry in query.distinct():
diff --git a/server/frontend.py b/server/frontend.py
index 83d7a8e..b2b27c2 100644
--- a/server/frontend.py
+++ b/server/frontend.py
@@ -355,7 +355,7 @@
def run_test_suites(self, pairings, kernel, kernel_label=None,
priority='Medium', wait=True, poll_interval=10,
email_from=None, email_to=None, timeout=168,
- max_runtime_hrs=168, kernel_cmdline=None):
+ max_runtime_mins=10080, kernel_cmdline=None):
"""
Run a list of test suites on a particular kernel.
@@ -379,7 +379,7 @@
new_job = self.invoke_test(pairing, kernel, kernel_label,
priority, timeout=timeout,
kernel_cmdline=kernel_cmdline,
- max_runtime_hrs=max_runtime_hrs)
+ max_runtime_mins=max_runtime_mins)
if not new_job:
continue
jobs.append(new_job)