barrier cleanups:
* renames barrier.py to base_barrier.py and adds a barrier.py
  stub to import from base_barrier and override with site_barrier
  if found.  barrier_unittest.py is renamed to match.
* Move BarrierAbortError to the error module with everything else.
* Add a rendezvous_servers abort=True from the server test case.
* Moved get_sync_control_file() from common_lib.utils to
  server.base_utils where it belongs to avoid a circular
  import of utils importing barrier.

Signed-off-by: Gregory Smith <[email protected]>



git-svn-id: http://test.kernel.org/svn/autotest/trunk@4444 592f7852-d20e-0410-864c-8624ca9c26a4
diff --git a/server/base_utils.py b/server/base_utils.py
index 7953a6b..6d683b2 100644
--- a/server/base_utils.py
+++ b/server/base_utils.py
@@ -10,7 +10,7 @@
 
 import atexit, os, re, shutil, textwrap, sys, tempfile, types
 
-from autotest_lib.client.common_lib import utils
+from autotest_lib.client.common_lib import barrier, utils
 from autotest_lib.server import subcommand
 
 
@@ -323,3 +323,106 @@
     public_key.close()
 
     return public_key_str
+
+
+def get_sync_control_file(control, host_name, host_num,
+                          instance, num_jobs, port_base=63100):
+    """
+    This function is used when there is a need to run more than one
+    job simultaneously starting exactly at the same time. It basically returns
+    a modified control file (containing the synchronization code prepended)
+    whenever it is ready to run the control file. The synchronization
+    is done using barriers to make sure that the jobs start at the same time.
+
+    Here is how the synchronization is done to make sure that the tests
+    start at exactly the same time on the client.
+    sc_bar is a server barrier and s_bar, c_bar are the normal barriers
+
+                      Job1              Job2         ......      JobN
+     Server:   |                        sc_bar
+     Server:   |                        s_bar        ......      s_bar
+     Server:   |      at.run()         at.run()      ......      at.run()
+     ----------|------------------------------------------------------
+     Client    |      sc_bar
+     Client    |      c_bar             c_bar        ......      c_bar
+     Client    |    <run test>         <run test>    ......     <run test>
+
+    @param control: The control file which to which the above synchronization
+            code will be prepended.
+    @param host_name: The host name on which the job is going to run.
+    @param host_num: (non negative) A number to identify the machine so that
+            we have different sets of s_bar_ports for each of the machines.
+    @param instance: The number of the job
+    @param num_jobs: Total number of jobs that are going to run in parallel
+            with this job starting at the same time.
+    @param port_base: Port number that is used to derive the actual barrier
+            ports.
+
+    @returns The modified control file.
+    """
+    sc_bar_port = port_base
+    c_bar_port = port_base
+    if host_num < 0:
+        print "Please provide a non negative number for the host"
+        return None
+    s_bar_port = port_base + 1 + host_num # The set of s_bar_ports are
+                                          # the same for a given machine
+
+    sc_bar_timeout = 180
+    s_bar_timeout = c_bar_timeout = 120
+
+    # The barrier code snippet is prepended into the conrol file
+    # dynamically before at.run() is called finally.
+    control_new = []
+
+    # jobid is the unique name used to identify the processes
+    # trying to reach the barriers
+    jobid = "%s#%d" % (host_name, instance)
+
+    rendv = []
+    # rendvstr is a temp holder for the rendezvous list of the processes
+    for n in range(num_jobs):
+        rendv.append("'%s#%d'" % (host_name, n))
+    rendvstr = ",".join(rendv)
+
+    if instance == 0:
+        # Do the setup and wait at the server barrier
+        # Clean up the tmp and the control dirs for the first instance
+        control_new.append('if os.path.exists(job.tmpdir):')
+        control_new.append("\t system('umount -f %s > /dev/null"
+                           "2> /dev/null' % job.tmpdir,"
+                           "ignore_status=True)")
+        control_new.append("\t system('rm -rf ' + job.tmpdir)")
+        control_new.append(
+            'b0 = job.barrier("%s", "sc_bar", %d, port=%d)'
+            % (jobid, sc_bar_timeout, sc_bar_port))
+        control_new.append(
+        'b0.rendezvous_servers("PARALLEL_MASTER", "%s")'
+         % jobid)
+
+    elif instance == 1:
+        # Wait at the server barrier to wait for instance=0
+        # process to complete setup
+        b0 = barrier.barrier("PARALLEL_MASTER", "sc_bar", sc_bar_timeout,
+                     port=sc_bar_port)
+        b0.rendezvous_servers("PARALLEL_MASTER", jobid)
+
+        if(num_jobs > 2):
+            b1 = barrier.barrier(jobid, "s_bar", s_bar_timeout,
+                         port=s_bar_port)
+            b1.rendezvous(rendvstr)
+
+    else:
+        # For the rest of the clients
+        b2 = barrier.barrier(jobid, "s_bar", s_bar_timeout, port=s_bar_port)
+        b2.rendezvous(rendvstr)
+
+    # Client side barrier for all the tests to start at the same time
+    control_new.append('b1 = job.barrier("%s", "c_bar", %d, port=%d)'
+                    % (jobid, c_bar_timeout, c_bar_port))
+    control_new.append("b1.rendezvous(%s)" % rendvstr)
+
+    # Stick in the rest of the control file
+    control_new.append(control)
+
+    return "\n".join(control_new)