[autotest] Add actions to take when add/remove role from server This CL adds actions needed to make adding/removing role from server effective. For example, when server database is enabled and a new drone is added, scheduler needs to be restarted. BUG=chromium:424778 CQ-DEPEND=CL:232003 TEST=unittest, setup local server database, manually test follow cases (CL 232525 is needed for drone test) add scheduler: Confirm scheduler service is started in the server. remove scheduler: Confirm scheduler service is stopped in the server. add drone: Confirm scheduler service is restarted. remove drone: Confirm scheduler service is restarted Change-Id: I14d6bb15d68a9b94fa3ab5b0bcc202469b253c89 Reviewed-on: https://chromium-review.googlesource.com/233181 Tested-by: Dan Shi <[email protected]> Trybot-Ready: Dan Shi <[email protected]> Reviewed-by: Fang Deng <[email protected]> Commit-Queue: Dan Shi <[email protected]>
diff --git a/site_utils/server_manager.py b/site_utils/server_manager.py index 8a66fd0..a75c4cb 100644 --- a/site_utils/server_manager.py +++ b/site_utils/server_manager.py
@@ -3,7 +3,7 @@ # found in the LICENSE file. """This module provides functions to manage servers in server database -(defined in global config section AUTOTEST_SERVER_database). +(defined in global config section AUTOTEST_SERVER_DB). create(hostname, role=None, note=None) Create a server with given role, with status backup. @@ -30,97 +30,122 @@ """ -# TODO(dshi): crbug.com/424778 This module currently doesn't have any logic to -# do action server operations, e.g., restart scheduler to enable a drone. All it -# does is to update database. This helps the CL to be smaller for review. Next -# CL will include actual server action logic. import datetime import common -import django.core.exceptions -from autotest_lib.client.common_lib.global_config import global_config -from autotest_lib.frontend import setup_django_environment from autotest_lib.frontend.server import models as server_models +from autotest_lib.site_utils import server_manager_actions +from autotest_lib.site_utils import server_manager_utils -class ServerActionError(Exception): - """Exception raised when action on server failed. - """ - - -def _add_role(server, role): +def _add_role(server, role, action): """Add a role to the server. @param server: An object of server_models.Server. @param role: Role to be added to the server. + @param action: Execute actions after role or status is changed. Default to + False. @raise ServerActionError: If role is failed to be added. """ server_models.validate(role=role) - if server_models.ServerRole.objects.filter(server=server, role=role): - raise ServerActionError('Server %s already has role %s.' % - (server.hostname, role)) + if role in server.get_role_names(): + raise server_manager_utils.ServerActionError( + 'Server %s already has role %s.' % (server.hostname, role)) + + # Verify server + if not server_manager_utils.check_server(server.hostname, role): + raise server_manager_utils.ServerActionError( + 'Server %s is not ready for role %s.' % (server.hostname, role)) if (role in server_models.ServerRole.ROLES_REQUIRE_UNIQUE_INSTANCE and server.status == server_models.Server.STATUS.PRIMARY): servers = server_models.Server.objects.filter( roles__role=role, status=server_models.Server.STATUS.PRIMARY) if len(servers) >= 1: - raise ServerActionError('Role %s must be unique. Server %s ' - 'already has role %s.' % - (role, servers[0].hostname, role)) + raise server_manager_utils.ServerActionError( + 'Role %s must be unique. Server %s already has role %s.' % + (role, servers[0].hostname, role)) + server_models.ServerRole.objects.create(server=server, role=role) + # If needed, apply actions to enable the role for the server. + server_manager_actions.try_execute(server, [role], enable=True, + post_change=True, do_action=action) + print 'Role %s is added to server %s.' % (role, server.hostname) -def _delete_role(server, role): +def _delete_role(server, role, action): """Delete a role from the server. @param server: An object of server_models.Server. @param role: Role to be deleted from the server. + @param action: Execute actions after role or status is changed. Default to + False. @raise ServerActionError: If role is failed to be deleted. """ server_models.validate(role=role) - server_roles = server_models.ServerRole.objects.filter(server=server, - role=role) - if not server_roles: - raise ServerActionError('Server %s does not have role %s.' % - (server.hostname, role)) + if role not in server.get_role_names(): + raise server_manager_utils.ServerActionError( + 'Server %s does not have role %s.' % (server.hostname, role)) if server.status == server_models.Server.STATUS.PRIMARY: - servers = server_models.Server.objects.filter( - roles__role=role, status=server_models.Server.STATUS.PRIMARY) - if len(servers) == 1: - print ('Role %s is required in an Autotest instance. Please ' - 'add the role to another server.' % role) - # Role should be deleted after all action is completed. - server_roles[0].delete() + server_manager_utils.warn_missing_role(role, server) + + # Apply actions to disable the role for the server before the role is + # removed from the server. + server_manager_actions.try_execute(server, [role], enable=False, + post_change=False, do_action=action) + + print 'Deleting role %s from server %s...' % (role, server.hostname) + server.roles.get(role=role).delete() + + # Apply actions to disable the role for the server after the role is + # removed from the server. + server_manager_actions.try_execute(server, [role], enable=False, + post_change=True, do_action=action) + + # If the server is in status primary and has no role, change its status to + # backup. + if (not server.get_role_names() and + server.status == server_models.Server.STATUS.PRIMARY): + print ('Server %s has no role, change its status from primary to backup' + % server.hostname) + server.status = server_models.Server.STATUS.BACKUP + server.save() print 'Role %s is deleted from server %s.' % (role, server.hostname) -def _change_status(server, status): +def _change_status(server, status, action): """Change the status of the server. @param server: An object of server_models.Server. @param status: New status of the server. + @param action: Execute actions after role or status is changed. Default to + False. @raise ServerActionError: If status is failed to be changed. """ server_models.validate(status=status) if server.status == status: - raise ServerActionError('Server %s already has status of %s.' % - (server.hostname, status)) + raise server_manager_utils.ServerActionError( + 'Server %s already has status of %s.' % + (server.hostname, status)) if (not server.roles.all() and - status == server_models.Server.STATUS.PRIMARY): - raise ServerActionError('Server %s has no role associated. Server ' - 'must have a role to be in status primary.' - % server.hostname) + status == server_models.Server.STATUS.PRIMARY): + raise server_manager_utils.ServerActionError( + 'Server %s has no role associated. Server must have a role to ' + 'be in status primary.' % server.hostname) + # Abort the action if the server's status will be changed to primary and + # the Autotest instance already has another server running an unique role. + # For example, a scheduler server is already running, and a backup server + # with role scheduler should not be changed to status primary. unique_roles = server.roles.filter( role__in=server_models.ServerRole.ROLES_REQUIRE_UNIQUE_INSTANCE) if unique_roles and status == server_models.Server.STATUS.PRIMARY: @@ -129,215 +154,39 @@ roles__role=role.role, status=server_models.Server.STATUS.PRIMARY) if len(servers) == 1: - raise ServerActionError('Role %s must be unique. Server %s ' - 'already has the role.' % - (role.role, servers[0].hostname)) - old_status = server.status + raise server_manager_utils.ServerActionError( + 'Role %s must be unique. Server %s already has the ' + 'role.' % (role.role, servers[0].hostname)) + + # Post a warning if the server's status will be changed from primary to + # other value and the server is running a unique role across database, e.g. + # scheduler. + if server.status == server_models.Server.STATUS.PRIMARY: + for role in server.get_role_names(): + server_manager_utils.warn_missing_role(role, server) + + enable = status == server_models.Server.STATUS.PRIMARY + server_manager_actions.try_execute(server, server.get_role_names(), + enable=enable, post_change=False, + do_action=action) + + prev_status = server.status server.status = status server.save() + # Apply actions to enable/disable roles of the server after the status is + # changed. + server_manager_actions.try_execute(server, server.get_role_names(), + enable=enable, post_change=True, + prev_status=prev_status, + do_action=action) + print ('Status of server %s is changed from %s to %s. Affected roles: %s' % - (server.hostname, old_status, status, - ', '.join([r.role for r in server.roles.all()]))) + (server.hostname, prev_status, status, + ', '.join(server.get_role_names()))) -def _delete_attribute(server, attribute): - """Delete the attribute from the host. - - @param server: An object of server_models.Server. - @param attribute: Name of an attribute of the server. - """ - attributes = server.attributes.filter(attribute=attribute) - if not attributes: - raise ServerActionError('Server %s does not have attribute %s' % - (server.hostname, attribute)) - attributes[0].delete() - print 'Attribute %s is deleted from server %s.' % (attribute, - server.hostname) - - -def _change_attribute(server, attribute, value): - """Change the value of an attribute of the server. - - @param server: An object of server_models.Server. - @param attribute: Name of an attribute of the server. - @param value: Value of the attribute of the server. - - @raise ServerActionError: If the attribute already exists and has the - given value. - """ - attributes = server_models.ServerAttribute.objects.filter( - server=server, attribute=attribute) - if attributes and attributes[0].value == value: - raise ServerActionError('Attribute %s for Server %s already has ' - 'value of %s.' % - (attribute, server.hostname, value)) - if attributes: - old_value = attributes[0].value - attributes[0].value = value - attributes[0].save() - print ('Attribute `%s` of server %s is changed from %s to %s.' % - (attribute, server.hostname, old_value, value)) - else: - server_models.ServerAttribute.objects.create( - server=server, attribute=attribute, value=value) - print ('Attribute `%s` of server %s is set to %s.' % - (attribute, server.hostname, value)) - - -def use_server_db(): - """Check if use_server_db is enabled in configuration. - - @return: True if use_server_db is set to True in global config. - """ - return global_config.get_config_value( - 'SERVER', 'use_server_db', default=False, type=bool) - - -def get_servers(hostname=None, role=None, status=None): - """Find servers with given role and status. - - @param hostname: hostname of the server. - @param role: Role of server, default to None. - @param status: Status of server, default to None. - - @return: A list of server objects with given role and status. - """ - filters = {} - if hostname: - filters['hostname'] = hostname - if role: - filters['roles__role'] = role - if status: - filters['status'] = status - return server_models.Server.objects.filter(**filters) - - -def get_server_details(servers, table=False, summary=False): - """Get a string of given servers' details. - - The method can return a string of server information in 3 different formats: - A detail view: - Hostname : server2 - Status : primary - Roles : drone - Attributes : {'max_processes':300} - Date Created : 2014-11-25 12:00:00 - Date Modified: None - Note : Drone in lab1 - A table view: - Hostname | Status | Roles | Date Created | Date Modified | Note - server1 | backup | scheduler | 2014-11-25 23:45:19 | | - server2 | primary | drone | 2014-11-25 12:00:00 | | Drone - A summary view: - scheduler : server1(backup), server3(primary), - host_scheduler : - drone : server2(primary), - devserver : - database : - suite_scheduler: - crash_server : - No Role : - - The method returns detail view of each server and a summary view by default. - If `table` is set to True, only table view will be returned. - If `summary` is set to True, only summary view will be returned. - - @param servers: A list of servers to get details. - @param table: True to return a table view instead of a detail view, - default is set to False. - @param summary: True to only show the summary of roles and status of - given servers. - - @return: A string of the information of given servers. - """ - # Format string to display a table view. - # Hostname, Status, Roles, Date Created, Date Modified, Note - TABLEVIEW_FORMAT = ('%(hostname)-30s | %(status)-7s | %(roles)-20s | ' - '%(date_created)-19s | %(date_modified)-19s | %(note)s') - - result = '' - if not table and not summary: - for server in servers: - result += '\n' + str(server) - elif table: - result += (TABLEVIEW_FORMAT % - {'hostname':'Hostname', 'status':'Status', - 'roles':'Roles', 'date_created':'Date Created', - 'date_modified':'Date Modified', 'note':'Note'}) - for server in servers: - roles = ','.join([r.role for r in server.roles.all()]) - result += '\n' + (TABLEVIEW_FORMAT % - {'hostname':server.hostname, - 'status': server.status or '', - 'roles': roles, - 'date_created': server.date_created, - 'date_modified': server.date_modified or '', - 'note': server.note or ''}) - elif summary: - result += 'Roles and status of servers:\n\n' - for role, _ in server_models.ServerRole.ROLE.choices(): - servers_of_role = [s for s in servers if role in - [r.role for r in s.roles.all()]] - result += '%-15s: ' % role - for server in servers_of_role: - result += '%s(%s), ' % (server.hostname, server.status) - result += '\n' - servers_without_role = [s.hostname for s in servers - if not s.roles.all()] - result += '%-15s: %s' % ('No Role', ', '.join(servers_without_role)) - - return result - - -def verify_server(exist=True): - """Decorator to check if server with given hostname exists in the database. - - @param exist: Set to True to confirm server exists in the database, raise - exception if not. If it's set to False, raise exception if - server exists in database. Default is True. - - @raise ServerActionError: If `exist` is True and server does not exist in - the database, or `exist` is False and server exists - in the database. - """ - def deco_verify(func): - """Wrapper for the decorator. - - @param func: Function to be called. - """ - def func_verify(*args, **kwargs): - """Decorator to check if server exists. - - If exist is set to True, raise ServerActionError is server with - given hostname is not found in server database. - If exist is set to False, raise ServerActionError is server with - given hostname is found in server database. - - @param func: function to be called. - @param args: arguments for function to be called. - @param kwargs: keyword arguments for function to be called. - """ - hostname = kwargs['hostname'] - try: - server = server_models.Server.objects.get(hostname=hostname) - except django.core.exceptions.ObjectDoesNotExist: - server = None - - if not exist and server: - raise ServerActionError('Server %s already exists.' % - hostname) - if exist and not server: - raise ServerActionError('Server %s does not exist in the ' - 'database.' % hostname) - if server: - kwargs['server'] = server - return func(*args, **kwargs) - return func_verify - return deco_verify - - -@verify_server(exist=False) +@server_manager_utils.verify_server(exist=False) def create(hostname, role=None, note=None): """Create a new server. @@ -359,7 +208,7 @@ return server -@verify_server() +@server_manager_utils.verify_server() def delete(hostname, server=None): """Delete given server from server database. @@ -372,8 +221,8 @@ """ print 'Deleting server %s from server database.' % hostname - if (use_server_db() and - server.status == server_models.Server.STATUS.PRIMARY): + if (server_manager_utils.use_server_db() and + server.status == server_models.Server.STATUS.PRIMARY): print ('Server %s is in status primary, need to disable its ' 'current roles first.' % hostname) for role in server.roles.all(): @@ -383,9 +232,9 @@ print 'Server %s is deleted from server database.' % hostname -@verify_server() +@server_manager_utils.verify_server() def modify(hostname, role=None, status=None, delete=False, note=None, - attribute=None, value=None, server=None): + attribute=None, value=None, action=False, server=None): """Modify given server with specified actions. @param hostname: hostname of the server to be modified. @@ -395,6 +244,8 @@ @param note: Note of the server. @param attribute: Name of an attribute of the server. @param value: Value of an attribute of the server. + @param action: Execute actions after role or status is changed. Default to + False. @param server: Server object from database query, this argument should be injected by the verify_server_exists decorator. @@ -404,30 +255,20 @@ """ if role: if not delete: - _add_role(server, role) + _add_role(server, role, action) else: - _delete_role(server, role) + _delete_role(server, role, action) if status: - _change_status(server, status) + _change_status(server, status, action) if note is not None: server.note = note server.save() if attribute and value: - _change_attribute(server, attribute, value) + server_manager_utils.change_attribute(server, attribute, value) elif attribute and delete: - _delete_attribute(server, attribute) + server_manager_utils.delete_attribute(server, attribute) return server - - -def get_drones(): - """Get a list of drones in status primary. - - @return: A list of drones in status primary. - """ - servers = get_servers(role=server_models.ServerRole.ROLE.DRONE, - status=server_models.Server.STATUS.PRIMARY) - return [s.hostname for s in servers]
diff --git a/site_utils/server_manager_actions.py b/site_utils/server_manager_actions.py new file mode 100644 index 0000000..13c0d31 --- /dev/null +++ b/site_utils/server_manager_actions.py
@@ -0,0 +1,156 @@ +# Copyright 2014 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""This module provides utility functions to help managing servers in server +database (defined in global config section AUTOTEST_SERVER_DB). + +After a role is added or removed from a server, certain services may need to +be restarted. For example, scheduler needs to be restarted after a drone is +added to a primary server. This module includes functions to check if actions +are required to be executed and what actions to executed on which servers. +""" + +import subprocess +import sys + +import common + +from autotest_lib.frontend.server import models as server_models +from autotest_lib.site_utils import server_manager_utils +from autotest_lib.site_utils.lib import infra + + +# Actions that must be executed for server management action to be effective. +# Each action is a tuple: +# (the role of which the command should be executed, the command) +RESTART_SCHEDULER = (server_models.ServerRole.ROLE.SCHEDULER, + 'sudo service scheduler restart') +RESTART_HOST_SCHEDULER = (server_models.ServerRole.ROLE.HOST_SCHEDULER, + 'sudo service host-scheduler restart') +RESTART_SUITE_SCHEDULER = (server_models.ServerRole.ROLE.SUITE_SCHEDULER, + 'sudo service suite_scheduler restart') +RELOAD_APACHE = (server_models.ServerRole.ROLE.SCHEDULER, + 'sudo service apache reload') + +STOP_SCHEDULER = (server_models.ServerRole.ROLE.SCHEDULER, + 'sudo service scheduler stop') +STOP_HOST_SCHEDULER = (server_models.ServerRole.ROLE.HOST_SCHEDULER, + 'sudo service host-scheduler stop') +STOP_SUITE_SCHEDULER = (server_models.ServerRole.ROLE.SUITE_SCHEDULER, + 'sudo service suite_scheduler stop') + +# Dictionary of actions needed for a role to be enabled. Key is the role, and +# value is a list of action. All these actions should be applied after the role +# is added to the server, or the server's status is changed to primary. +ACTIONS_AFTER_ROLE_APPLIED = { + server_models.ServerRole.ROLE.SCHEDULER: [RESTART_SCHEDULER], + server_models.ServerRole.ROLE.HOST_SCHEDULER: [RESTART_HOST_SCHEDULER], + server_models.ServerRole.ROLE.SUITE_SCHEDULER: + [RESTART_SUITE_SCHEDULER], + server_models.ServerRole.ROLE.DRONE: [RESTART_SCHEDULER], + server_models.ServerRole.ROLE.DATABASE: + [RESTART_SCHEDULER, RESTART_HOST_SCHEDULER, RELOAD_APACHE], + server_models.ServerRole.ROLE.DEVSERVER: [RESTART_SCHEDULER], + } + +# Dictionary of actions needed for a role to be disabled. Key is the role, and +# value is a list of action. +# Action should be taken before role is deleted from a server, or the server's +# status is changed to primary. +ACTIONS_BEFORE_ROLE_REMOVED = { + server_models.ServerRole.ROLE.SCHEDULER: [STOP_SCHEDULER], + server_models.ServerRole.ROLE.HOST_SCHEDULER: [STOP_HOST_SCHEDULER], + server_models.ServerRole.ROLE.SUITE_SCHEDULER: [STOP_SUITE_SCHEDULER], + server_models.ServerRole.ROLE.DATABASE: + [STOP_SCHEDULER, STOP_HOST_SCHEDULER], + } +# Action should be taken after role is deleted from a server, or the server's +# status is changed to primary. +ACTIONS_AFTER_ROLE_REMOVED = { + server_models.ServerRole.ROLE.DRONE: [RESTART_SCHEDULER], + server_models.ServerRole.ROLE.DEVSERVER: [RESTART_SCHEDULER], + } + + +def apply(action): + """Apply an given action. + + It usually involves ssh to the server with specific role and run the + command, e.g., ssh to scheduler server and restart scheduler. + + @param action: A tuple of (the role of which the command should be executed, + the command) + @raise ServerActionError: If the action can't be applied due to database + issue. + @param subprocess.CalledProcessError: If command is failed to be + executed. + """ + role = action[0] + command = action[1] + # Find the servers with role + servers = server_manager_utils.get_servers( + role=role, status=server_models.Server.STATUS.PRIMARY) + if not servers: + print >> sys.stderr, ('WARNING! Action %s failed to be applied. No ' + 'server with given role %s was found.' % + (action, role)) + return + + for server in servers: + print 'Run command `%s` on server %s' % (command, server.hostname) + try: + infra.execute_command(server.hostname, command) + except subprocess.CalledProcessError as e: + print >> sys.stderr, ('Failed to check server %s, error: %s' % + (server.hostname, e)) + + +def try_execute(server, roles, enable, post_change, + prev_status=server_models.Server.STATUS.BACKUP, + do_action=False): + """Try to execute actions for given role changes of the server. + + @param server: Server that has the role changes. + @param roles: A list of roles changed. + @param enable: Set to True if the roles are enabled, i.e., added to server. + If it's False, the roles are removed from the server. + @param post_change: Set to True if to apply actions should be applied after + the role changes, otherwise, set to False. + @param prev_status: The previous status after the status change if any. This + is to help to decide if actions should be executed, + since actions should be applied if the server's status + is changed from primary to other status. Default to + backup. + @param do_action: Set to True to execute actions, otherwise, post a warning. + """ + if not server_manager_utils.use_server_db(): + return + # This check is to prevent actions to be applied to server not in primary + # role or server database is not enabled. Note that no action is needed + # before a server is changed to primary status. If that assumption is + # no longer valid, this method needs to be updated accordingly. + if (server.status != server_models.Server.STATUS.PRIMARY and + prev_status != server_models.Server.STATUS.PRIMARY): + return + + if enable: + if post_change: + possible_actions = ACTIONS_AFTER_ROLE_APPLIED + else: + if post_change: + possible_actions = ACTIONS_AFTER_ROLE_REMOVED + else: + possible_actions = ACTIONS_BEFORE_ROLE_REMOVED + + all_actions = [] + for role in roles: + all_actions.extend(possible_actions.get(role, [])) + for action in set(all_actions): + if do_action: + apply(action) + else: + message = ('WARNING! Action %s is skipped. Please manually ' + 'execute the action to make your change effective.' % + str(action)) + print >> sys.stderr, message
diff --git a/site_utils/server_manager_unittest.py b/site_utils/server_manager_unittest.py index 4549396..8d1c178 100644 --- a/site_utils/server_manager_unittest.py +++ b/site_utils/server_manager_unittest.py
@@ -12,6 +12,8 @@ from autotest_lib.frontend import setup_django_environment from autotest_lib.frontend.server import models as server_models from autotest_lib.site_utils import server_manager +from autotest_lib.site_utils import server_manager_utils +from autotest_lib.site_utils.lib import infra class QueriableList(list): @@ -24,6 +26,12 @@ raise NotImplementedError() + def get(self, **kwargs): + """Mock the get call in django model. + """ + raise NotImplementedError() + + def all(self): """Return all items in the list. @@ -75,16 +83,21 @@ 'roles': QueriableList([self.SCHEDULER_ROLE]), 'attributes': QueriableList([])}) - self.mox.StubOutWithMock(server_manager, 'use_server_db') + self.mox.StubOutWithMock(server_manager_utils, 'check_server') + self.mox.StubOutWithMock(server_manager_utils, 'warn_missing_role') + self.mox.StubOutWithMock(server_manager_utils, 'use_server_db') + self.mox.StubOutWithMock(server_models.Server, 'get_role_names') self.mox.StubOutWithMock(server_models.Server.objects, 'create') self.mox.StubOutWithMock(server_models.Server.objects, 'filter') self.mox.StubOutWithMock(server_models.Server.objects, 'get') + self.mox.StubOutWithMock(server_models.ServerRole, 'delete') self.mox.StubOutWithMock(server_models.ServerRole.objects, 'create') self.mox.StubOutWithMock(server_models.ServerRole.objects, 'filter') self.mox.StubOutWithMock(server_models.ServerAttribute.objects, 'create') self.mox.StubOutWithMock(server_models.ServerAttribute.objects, 'filter') + self.mox.StubOutWithMock(infra, 'execute_command') self.mox.StubOutWithMock(utils, 'normalize_hostname') @@ -114,16 +127,21 @@ restart scheduler to activate a new devserver. """ server_models.validate(role=server_models.ServerRole.ROLE.DEVSERVER) - server_models.ServerRole.objects.filter( - server=self.BACKUP_DRONE, - role=server_models.ServerRole.ROLE.DEVSERVER).AndReturn(None) + server_manager_utils.check_server(mox.IgnoreArg(), + mox.IgnoreArg()).AndReturn(True) + server_manager_utils.use_server_db().MultipleTimes( + ).AndReturn(True) + self.mox.StubOutWithMock(self.BACKUP_DRONE, 'get_role_names') + self.BACKUP_DRONE.get_role_names().AndReturn( + [server_models.ServerRole.ROLE.DRONE]) server_models.ServerRole.objects.create( server=mox.IgnoreArg(), role=server_models.ServerRole.ROLE.DEVSERVER ).AndReturn(self.DRONE_ROLE) self.mox.ReplayAll() server_manager._add_role(server=self.BACKUP_DRONE, - role=server_models.ServerRole.ROLE.DEVSERVER) + role=server_models.ServerRole.ROLE.DEVSERVER, + action=True) def testAddRoleToBackupFail_RoleAlreadyExists(self): @@ -131,15 +149,15 @@ has the given role. """ server_models.validate(role=server_models.ServerRole.ROLE.DRONE) - server_models.ServerRole.objects.filter( - server=self.BACKUP_DRONE, - role=server_models.ServerRole.ROLE.DRONE - ).AndReturn([self.DRONE_ROLE]) + self.mox.StubOutWithMock(self.BACKUP_DRONE, 'get_role_names') + self.BACKUP_DRONE.get_role_names().AndReturn( + [server_models.ServerRole.ROLE.DRONE]) self.mox.ReplayAll() - self.assertRaises(server_manager.ServerActionError, + self.assertRaises(server_manager_utils.ServerActionError, server_manager._add_role, server=self.BACKUP_DRONE, - role=server_models.ServerRole.ROLE.DRONE) + role=server_models.ServerRole.ROLE.DRONE, + action=True) def testDeleteRoleFromBackupSuccess(self): @@ -149,13 +167,19 @@ restart scheduler to delete an existing devserver. """ server_models.validate(role=server_models.ServerRole.ROLE.DRONE) - server_models.ServerRole.objects.filter( - server=self.BACKUP_DRONE, + server_manager_utils.use_server_db().MultipleTimes( + ).AndReturn(True) + self.mox.StubOutWithMock(self.BACKUP_DRONE, 'get_role_names') + self.BACKUP_DRONE.get_role_names().MultipleTimes().AndReturn( + [server_models.ServerRole.ROLE.DRONE]) + self.mox.StubOutWithMock(self.BACKUP_DRONE.roles, 'get') + self.BACKUP_DRONE.roles.get( role=server_models.ServerRole.ROLE.DRONE - ).AndReturn([self.DRONE_ROLE]) + ).AndReturn(self.DRONE_ROLE) self.mox.ReplayAll() server_manager._delete_role(server=self.BACKUP_DRONE, - role=server_models.ServerRole.ROLE.DRONE) + role=server_models.ServerRole.ROLE.DRONE, + action=True) def testDeleteRoleFromBackupFail_RoleNotExist(self): @@ -163,31 +187,39 @@ server does not have the given role. """ server_models.validate(role=server_models.ServerRole.ROLE.DEVSERVER) - server_models.ServerRole.objects.filter( - server=self.BACKUP_DRONE, - role=server_models.ServerRole.ROLE.DEVSERVER - ).AndReturn(None) + self.mox.StubOutWithMock(self.BACKUP_DRONE, 'get_role_names') + self.BACKUP_DRONE.get_role_names().AndReturn( + [server_models.ServerRole.ROLE.DRONE]) self.mox.ReplayAll() - self.assertRaises(server_manager.ServerActionError, + self.assertRaises(server_manager_utils.ServerActionError, server_manager._delete_role, server=self.BACKUP_DRONE, - role=server_models.ServerRole.ROLE.DEVSERVER) + role=server_models.ServerRole.ROLE.DEVSERVER, + action=True) def testChangeStatusSuccess_BackupToPrimary(self): """Test manager can change the status of a backup server to primary. """ - # TODO(dshi): After _apply_action is implemented, this unittest needs - # to be updated to verify various actions being taken to put a server - # in primary status, e.g., start scheduler for scheduler server. server_models.validate(status=server_models.Server.STATUS.PRIMARY) + server_manager_utils.use_server_db().MultipleTimes( + ).AndReturn(True) + self.mox.StubOutWithMock(self.BACKUP_DRONE, 'get_role_names') + self.BACKUP_DRONE.get_role_names().MultipleTimes().AndReturn( + [server_models.ServerRole.ROLE.DRONE]) self.mox.StubOutWithMock(self.BACKUP_DRONE.roles, 'filter') self.BACKUP_DRONE.roles.filter( role__in=server_models.ServerRole.ROLES_REQUIRE_UNIQUE_INSTANCE ).AndReturn(None) + server_models.Server.objects.filter( + roles__role=server_models.ServerRole.ROLE.SCHEDULER, + status=server_models.Server.STATUS.PRIMARY + ).AndReturn([self.PRIMARY_SCHEDULER]) + infra.execute_command(mox.IgnoreArg(), mox.IgnoreArg()) self.mox.ReplayAll() server_manager._change_status( server=self.BACKUP_DRONE, - status=server_models.Server.STATUS.PRIMARY) + status=server_models.Server.STATUS.PRIMARY, + action=True) def testChangeStatusSuccess_PrimaryToBackup(self): @@ -195,13 +227,25 @@ """ server_models.validate(status=server_models.Server.STATUS.BACKUP) self.mox.StubOutWithMock(self.PRIMARY_DRONE.roles, 'filter') + self.mox.StubOutWithMock(self.PRIMARY_DRONE, 'get_role_names') + self.PRIMARY_DRONE.get_role_names().MultipleTimes().AndReturn( + [server_models.ServerRole.ROLE.DRONE]) self.PRIMARY_DRONE.roles.filter( role__in=server_models.ServerRole.ROLES_REQUIRE_UNIQUE_INSTANCE ).AndReturn(None) + server_manager_utils.use_server_db().MultipleTimes().AndReturn(True) + server_manager_utils.warn_missing_role( + server_models.ServerRole.ROLE.DRONE, self.PRIMARY_DRONE) + server_models.Server.objects.filter( + roles__role=server_models.ServerRole.ROLE.SCHEDULER, + status=server_models.Server.STATUS.PRIMARY + ).AndReturn([self.PRIMARY_SCHEDULER]) + infra.execute_command(mox.IgnoreArg(), mox.IgnoreArg()) self.mox.ReplayAll() server_manager._change_status( server=self.PRIMARY_DRONE, - status=server_models.Server.STATUS.BACKUP) + status=server_models.Server.STATUS.BACKUP, + action=True) def testChangeStatusFail_StatusNoChange(self): @@ -210,10 +254,11 @@ """ server_models.validate(status=server_models.Server.STATUS.BACKUP) self.mox.ReplayAll() - self.assertRaises(server_manager.ServerActionError, + self.assertRaises(server_manager_utils.ServerActionError, server_manager._change_status, server=self.BACKUP_DRONE, - status=server_models.Server.STATUS.BACKUP) + status=server_models.Server.STATUS.BACKUP, + action=True) def testChangeStatusFail_UniqueInstance(self): @@ -231,10 +276,111 @@ status=server_models.Server.STATUS.PRIMARY ).AndReturn(QueriableList([self.PRIMARY_SCHEDULER])) self.mox.ReplayAll() - self.assertRaises(server_manager.ServerActionError, + self.assertRaises(server_manager_utils.ServerActionError, server_manager._change_status, server=self.BACKUP_SCHEDULER, - status=server_models.Server.STATUS.PRIMARY) + status=server_models.Server.STATUS.PRIMARY, + action=True) + + + def testAddRoleToBackupFail_CheckServerFail(self): + """Test manager fails to add a role to a backup server if check_server + is failed. + """ + server_manager_utils.check_server(mox.IgnoreArg(), + mox.IgnoreArg()).AndReturn(False) + server_models.validate(role=server_models.ServerRole.ROLE.DRONE) + self.mox.StubOutWithMock(self.BACKUP_DRONE, 'get_role_names') + self.BACKUP_DRONE.get_role_names().MultipleTimes().AndReturn( + [server_models.ServerRole.ROLE.DRONE]) + self.mox.ReplayAll() + self.assertRaises(server_manager_utils.ServerActionError, + server_manager._add_role, server=self.BACKUP_DRONE, + role=server_models.ServerRole.ROLE.SCHEDULER, + action=True) + + + def testAddRoleToPrimarySuccess(self): + """Test manager can add a role to a primary server successfully. + + Confirm that actions needs to be taken, e.g., restart scheduler for + new drone to be added. + """ + server_models.validate(role=server_models.ServerRole.ROLE.DRONE) + server_manager_utils.check_server(mox.IgnoreArg(), + mox.IgnoreArg()).AndReturn(True) + server_manager_utils.use_server_db().MultipleTimes().AndReturn(True) + self.mox.StubOutWithMock(self.PRIMARY_SCHEDULER, 'get_role_names') + self.PRIMARY_SCHEDULER.get_role_names().AndReturn( + [server_models.ServerRole.ROLE.SCHEDULER]) + server_models.ServerRole.objects.create( + server=self.PRIMARY_SCHEDULER, + role=server_models.ServerRole.ROLE.DRONE + ).AndReturn(self.DRONE_ROLE) + server_models.Server.objects.filter( + roles__role=server_models.ServerRole.ROLE.SCHEDULER, + status=server_models.Server.STATUS.PRIMARY + ).AndReturn([self.PRIMARY_SCHEDULER]) + infra.execute_command(mox.IgnoreArg(), mox.IgnoreArg()) + self.mox.ReplayAll() + server_manager._add_role(self.PRIMARY_SCHEDULER, + server_models.ServerRole.ROLE.DRONE, + action=True) + + + def testDeleteRoleFromPrimarySuccess(self): + """Test manager can delete a role from a primary server successfully. + + Confirm that database call is made, and actions are taken, e.g., + restart scheduler to delete an existing drone. + """ + server_manager_utils.use_server_db().MultipleTimes().AndReturn(True) + server_models.validate(role=server_models.ServerRole.ROLE.DRONE) + self.mox.StubOutWithMock(self.PRIMARY_DRONE, 'get_role_names') + self.PRIMARY_DRONE.get_role_names().MultipleTimes().AndReturn( + [server_models.ServerRole.ROLE.DRONE]) + + self.mox.StubOutWithMock(self.PRIMARY_DRONE.roles, 'get') + self.PRIMARY_DRONE.roles.get( + role=server_models.ServerRole.ROLE.DRONE + ).AndReturn(self.DRONE_ROLE) + + server_models.Server.objects.filter( + roles__role=server_models.ServerRole.ROLE.SCHEDULER, + status=server_models.Server.STATUS.PRIMARY + ).AndReturn([self.PRIMARY_SCHEDULER]) + server_manager.server_manager_utils.warn_missing_role( + server_models.ServerRole.ROLE.DRONE, self.PRIMARY_DRONE) + infra.execute_command(mox.IgnoreArg(), mox.IgnoreArg()) + self.mox.ReplayAll() + server_manager._delete_role(self.PRIMARY_DRONE, + server_models.ServerRole.ROLE.DRONE, + action=True) + + + def testDeleteRoleFromPrimarySuccess_NoAction(self): + """Test manager can delete a role from a primary server successfully. + + Confirm that database call is made, and no action is taken as action + is set to False. + """ + server_manager_utils.use_server_db().MultipleTimes().AndReturn(True) + server_models.validate(role=server_models.ServerRole.ROLE.DRONE) + self.mox.StubOutWithMock(self.PRIMARY_DRONE, 'get_role_names') + self.PRIMARY_DRONE.get_role_names().MultipleTimes().AndReturn( + [server_models.ServerRole.ROLE.DRONE]) + + self.mox.StubOutWithMock(self.PRIMARY_DRONE.roles, 'get') + self.PRIMARY_DRONE.roles.get( + role=server_models.ServerRole.ROLE.DRONE + ).AndReturn(self.DRONE_ROLE) + + server_manager.server_manager_utils.warn_missing_role( + server_models.ServerRole.ROLE.DRONE, self.PRIMARY_DRONE) + self.mox.ReplayAll() + server_manager._delete_role(self.PRIMARY_DRONE, + server_models.ServerRole.ROLE.DRONE, + action=False) if '__main__':
diff --git a/site_utils/server_manager_utils.py b/site_utils/server_manager_utils.py new file mode 100644 index 0000000..21c1088 --- /dev/null +++ b/site_utils/server_manager_utils.py
@@ -0,0 +1,271 @@ +# Copyright 2014 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""This module provides utility functions to help managing servers in server +database (defined in global config section AUTOTEST_SERVER_DB). + +""" + +import subprocess +import sys + +import common + +import django.core.exceptions +from autotest_lib.client.common_lib.global_config import global_config +from autotest_lib.frontend.server import models as server_models +from autotest_lib.site_utils.lib import infra + + +class ServerActionError(Exception): + """Exception raised when action on server failed. + """ + + +def use_server_db(): + """Check if use_server_db is enabled in configuration. + + @return: True if use_server_db is set to True in global config. + """ + return global_config.get_config_value( + 'SERVER', 'use_server_db', default=False, type=bool) + + +def warn_missing_role(role, exclude_server): + """Post a warning if Autotest instance has no other primary server with + given role. + + @param role: Name of the role. + @param exclude_server: Server to be excluded from search for role. + """ + servers = server_models.Server.objects.filter( + roles__role=role, + status=server_models.Server.STATUS.PRIMARY).exclude( + hostname=exclude_server.hostname) + if not servers: + message = ('WARNING! There will be no server with role %s after it\'s ' + 'removed from server %s. Autotest will not function ' + 'normally without any server in role %s.' % + (role, exclude_server.hostname, role)) + print >> sys.stderr, message + + +def get_servers(hostname=None, role=None, status=None): + """Find servers with given role and status. + + @param hostname: hostname of the server. + @param role: Role of server, default to None. + @param status: Status of server, default to None. + + @return: A list of server objects with given role and status. + """ + filters = {} + if hostname: + filters['hostname'] = hostname + if role: + filters['roles__role'] = role + if status: + filters['status'] = status + return list(server_models.Server.objects.filter(**filters)) + + +def get_server_details(servers, table=False, summary=False): + """Get a string of given servers' details. + + The method can return a string of server information in 3 different formats: + A detail view: + Hostname : server2 + Status : primary + Roles : drone + Attributes : {'max_processes':300} + Date Created : 2014-11-25 12:00:00 + Date Modified: None + Note : Drone in lab1 + A table view: + Hostname | Status | Roles | Date Created | Date Modified | Note + server1 | backup | scheduler | 2014-11-25 23:45:19 | | + server2 | primary | drone | 2014-11-25 12:00:00 | | Drone + A summary view: + scheduler : server1(backup), server3(primary), + host_scheduler : + drone : server2(primary), + devserver : + database : + suite_scheduler: + crash_server : + No Role : + + The method returns detail view of each server and a summary view by default. + If `table` is set to True, only table view will be returned. + If `summary` is set to True, only summary view will be returned. + + @param servers: A list of servers to get details. + @param table: True to return a table view instead of a detail view, + default is set to False. + @param summary: True to only show the summary of roles and status of + given servers. + + @return: A string of the information of given servers. + """ + # Format string to display a table view. + # Hostname, Status, Roles, Date Created, Date Modified, Note + TABLEVIEW_FORMAT = ('%(hostname)-30s | %(status)-7s | %(roles)-20s | ' + '%(date_created)-19s | %(date_modified)-19s | %(note)s') + + result = '' + if not table and not summary: + for server in servers: + result += '\n' + str(server) + elif table: + result += (TABLEVIEW_FORMAT % + {'hostname':'Hostname', 'status':'Status', + 'roles':'Roles', 'date_created':'Date Created', + 'date_modified':'Date Modified', 'note':'Note'}) + for server in servers: + roles = ','.join(server.get_role_names()) + result += '\n' + (TABLEVIEW_FORMAT % + {'hostname':server.hostname, + 'status': server.status or '', + 'roles': roles, + 'date_created': server.date_created, + 'date_modified': server.date_modified or '', + 'note': server.note or ''}) + elif summary: + result += 'Roles and status of servers:\n\n' + for role, _ in server_models.ServerRole.ROLE.choices(): + servers_of_role = [s for s in servers if role in + [r.role for r in s.roles.all()]] + result += '%-15s: ' % role + for server in servers_of_role: + result += '%s(%s), ' % (server.hostname, server.status) + result += '\n' + servers_without_role = [s.hostname for s in servers + if not s.roles.all()] + result += '%-15s: %s' % ('No Role', ', '.join(servers_without_role)) + + return result + + +def check_server(hostname, role): + """Confirm server with given hostname is ready to be primary of given role. + + If the server is a backup and failed to be verified for the role, remove + the role from its roles list. If it has no other role, set its status to + repair_required. + + @param hostname: hostname of the server. + @param role: Role to be checked. + @return: True if server can be verified for the given role, otherwise + return False. + """ + # TODO(dshi): Add more logic to confirm server is ready for the role. + # For now, the function just checks if server is ssh-able. + try: + infra.execute_command(hostname, 'true') + return True + except subprocess.CalledProcessError as e: + print >> sys.stderr, ('Failed to check server %s, error: %s' % + (hostname, e)) + return False + + +def verify_server(exist=True): + """Decorator to check if server with given hostname exists in the database. + + @param exist: Set to True to confirm server exists in the database, raise + exception if not. If it's set to False, raise exception if + server exists in database. Default is True. + + @raise ServerActionError: If `exist` is True and server does not exist in + the database, or `exist` is False and server exists + in the database. + """ + def deco_verify(func): + """Wrapper for the decorator. + + @param func: Function to be called. + """ + def func_verify(*args, **kwargs): + """Decorator to check if server exists. + + If exist is set to True, raise ServerActionError is server with + given hostname is not found in server database. + If exist is set to False, raise ServerActionError is server with + given hostname is found in server database. + + @param func: function to be called. + @param args: arguments for function to be called. + @param kwargs: keyword arguments for function to be called. + """ + hostname = kwargs['hostname'] + try: + server = server_models.Server.objects.get(hostname=hostname) + except django.core.exceptions.ObjectDoesNotExist: + server = None + + if not exist and server: + raise ServerActionError('Server %s already exists.' % + hostname) + if exist and not server: + raise ServerActionError('Server %s does not exist in the ' + 'database.' % hostname) + if server: + kwargs['server'] = server + return func(*args, **kwargs) + return func_verify + return deco_verify + + +def get_drones(): + """Get a list of drones in status primary. + + @return: A list of drones in status primary. + """ + servers = get_servers(role=server_models.ServerRole.ROLE.DRONE, + status=server_models.Server.STATUS.PRIMARY) + return [s.hostname for s in servers] + + +def delete_attribute(server, attribute): + """Delete the attribute from the host. + + @param server: An object of server_models.Server. + @param attribute: Name of an attribute of the server. + """ + attributes = server.attributes.filter(attribute=attribute) + if not attributes: + raise ServerActionError('Server %s does not have attribute %s' % + (server.hostname, attribute)) + attributes[0].delete() + print 'Attribute %s is deleted from server %s.' % (attribute, + server.hostname) + + +def change_attribute(server, attribute, value): + """Change the value of an attribute of the server. + + @param server: An object of server_models.Server. + @param attribute: Name of an attribute of the server. + @param value: Value of the attribute of the server. + + @raise ServerActionError: If the attribute already exists and has the + given value. + """ + attributes = server_models.ServerAttribute.objects.filter( + server=server, attribute=attribute) + if attributes and attributes[0].value == value: + raise ServerActionError('Attribute %s for Server %s already has ' + 'value of %s.' % + (attribute, server.hostname, value)) + if attributes: + old_value = attributes[0].value + attributes[0].value = value + attributes[0].save() + print ('Attribute `%s` of server %s is changed from %s to %s.' % + (attribute, server.hostname, old_value, value)) + else: + server_models.ServerAttribute.objects.create( + server=server, attribute=attribute, value=value) + print ('Attribute `%s` of server %s is set to %s.' % + (attribute, server.hostname, value))