(Submitting for dalecurtis. For some reason, Patchwork doesn't want to pick
up his patches...)

All,

First off, sorry for the spam, hopefully this will be last message and
Patchwork will finally pick up the patch. Reposted with modifications from
prior discussion per James Ren. Credit for suggestion to split on commas
goes to Jongki Suwandi.

The "host list", "host create", and "job create" command line options for
the atest CLI module do not support labels with commas. In our deployment we
have several labels which contain commas, leading to some bad parsing when
trying to use the CLI tools. I've added escaped comma support to the
topic_common.item_parse_info.get_values.__get_items(...) function to fix
this.

For example, atest host list --label "label0,comma\,label" will now parse
the labels as 'label0' and 'comma,label'.

Any option using topic_common.item_parse_info gets this behavior for free.
Specifically for our needs, I've changed the "atest host list" and "atest
job create" methods from using <string>.split(',') to
topic_common.item_parse_info for labels.

Implementation wise, I modified the
topic_common.item_parse_info.get_values.__get_items(...) regex so that it
would only split commas not preceded by slashes. In order to handle labels
that may end in slashes, I replace double slashes with a null character
before splitting and then put a single slash back afterwards. The new regex
is r'(?<!\),|\s' if space splitting is enabled and r'(?<!\),' otherwise.

Testing was done through 11 new unit tests, exercising escaped commas with
and without preceding escaped slashes. As well as manual testing on our own
deployment.

All unit tests pass and existing deployments should not be affected. A minor
quibble is consistency from a user point of view. I've only added support
for escaped commas to the topic_common.item_parse_info class, but there are
a couple other areas which are still using <string>.split(',') to parse
their options. Specifically status names, host names, test names, and
dependencies in the host.py and job.py files. I've left these alone for now,
but if anyone objects I can roll them in.

I've attached the patch file for the changes, but if you prefer a more GUI
driven experience, you can see the change list here:

http://codereview.chromium.org/2740001/show

Risk: Low-Medium (CLI infrastructure and tests modified)
Visibility: CLI users
Signed-off-by: Dale Curtis

Regards,

Dale Curtis
Software Engineer in Test @ Google



git-svn-id: http://test.kernel.org/svn/autotest/trunk@4718 592f7852-d20e-0410-864c-8624ca9c26a4
diff --git a/cli/host.py b/cli/host.py
index e5bfd10..4601371 100644
--- a/cli/host.py
+++ b/cli/host.py
@@ -128,8 +128,11 @@
 
     def parse(self):
         """Consume the specific options"""
-        (options, leftover) = super(host_list, self).parse()
-        self.labels = options.label
+        label_info = topic_common.item_parse_info(attribute_name='labels',
+                                                  inline_option='label')
+
+        (options, leftover) = super(host_list, self).parse([label_info])
+
         self.status = options.status
         self.acl = options.acl
         self.user = options.user
@@ -151,15 +154,12 @@
             check_results['hostname__in'] = 'hostname'
 
         if self.labels:
-            labels = self.labels.split(',')
-            labels = [label.strip() for label in labels if label.strip()]
-
-            if len(labels) == 1:
+            if len(self.labels) == 1:
                 # This is needed for labels with wildcards (x86*)
-                filters['labels__name__in'] = labels
+                filters['labels__name__in'] = self.labels
                 check_results['labels__name__in'] = None
             else:
-                filters['multiple_labels'] = labels
+                filters['multiple_labels'] = self.labels
                 check_results['multiple_labels'] = None
 
         if self.status:
diff --git a/cli/host_unittest.py b/cli/host_unittest.py
index 197e9f3..911de1a 100755
--- a/cli/host_unittest.py
+++ b/cli/host_unittest.py
@@ -70,7 +70,7 @@
         hl = host.host_list()
         sys.argv = ['atest', '--label', 'label0']
         (options, leftover) = hl.parse()
-        self.assertEqual('label0', hl.labels)
+        self.assertEqual(['label0'], hl.labels)
         self.assertEqual(leftover, [])
 
 
@@ -78,7 +78,23 @@
         hl = host.host_list()
         sys.argv = ['atest', '--label', 'label0,label2']
         (options, leftover) = hl.parse()
-        self.assertEqual('label0,label2', hl.labels)
+        self.assertEqualNoOrder(['label0', 'label2'], hl.labels)
+        self.assertEqual(leftover, [])
+
+
+    def test_parse_with_escaped_commas_label(self):
+        hl = host.host_list()
+        sys.argv = ['atest', '--label', 'label\\,0']
+        (options, leftover) = hl.parse()
+        self.assertEqual(['label,0'], hl.labels)
+        self.assertEqual(leftover, [])
+
+
+    def test_parse_with_escaped_commas_multi_labels(self):
+        hl = host.host_list()
+        sys.argv = ['atest', '--label', 'label\\,0,label\\,2']
+        (options, leftover) = hl.parse()
+        self.assertEqualNoOrder(['label,0', 'label,2'], hl.labels)
         self.assertEqual(leftover, [])
 
 
@@ -90,7 +106,7 @@
         (options, leftover) = hl.parse()
         self.assertEqualNoOrder(['host0', 'host1','host3', 'host4'],
                                 hl.hosts)
-        self.assertEqual('label0', hl.labels)
+        self.assertEqual(['label0'], hl.labels)
         self.assertEqual(leftover, [])
         mfile.clean()
 
@@ -277,8 +293,8 @@
     def test_execute_list_filter_multi_labels(self):
         self.run_cmd(argv=['atest', 'host', 'list',
                            '-b', 'label3,label2', '--ignore_site_file'],
-                     rpcs=[('get_hosts', {'multiple_labels': ['label3',
-                                                              'label2']},
+                     rpcs=[('get_hosts', {'multiple_labels': ['label2',
+                                                              'label3']},
                             True,
                             [{u'status': u'Ready',
                               u'hostname': u'host1',
@@ -309,8 +325,8 @@
         self.run_cmd(argv=['atest', 'host', 'list',
                            '-b', 'label3,label2, label4',
                            '--ignore_site_file'],
-                     rpcs=[('get_hosts', {'multiple_labels': ['label3',
-                                                              'label2',
+                     rpcs=[('get_hosts', {'multiple_labels': ['label2',
+                                                              'label3',
                                                               'label4']},
                             True,
                             [{u'status': u'Ready',
@@ -355,8 +371,8 @@
     def test_execute_list_filter_multi_labels_no_results(self):
         self.run_cmd(argv=['atest', 'host', 'list',
                            '-b', 'label3,label2, ', '--ignore_site_file'],
-                     rpcs=[('get_hosts', {'multiple_labels': ['label3',
-                                                              'label2']},
+                     rpcs=[('get_hosts', {'multiple_labels': ['label2',
+                                                              'label3']},
                             True,
                             [])],
                      out_words_ok=[],
@@ -1339,5 +1355,61 @@
                      out_words_ok=['host0', 'host1'])
 
 
+    def test_execute_create_muliple_hosts_label_escaped_quotes(self):
+        self.run_cmd(argv=['atest', 'host', 'create',
+                           '-b', 'label0,label\\,1,label\\,2',
+                           '--acls', 'acl0', 'host0', 'host1',
+                           '--ignore_site_file'],
+                     rpcs=[('get_labels', {'name': 'label0'},
+                            True,
+                            [{u'id': 4,
+                              u'platform': 0,
+                              u'name': u'label0',
+                              u'invalid': False,
+                              u'kernel_config': u''}]),
+                           ('get_labels', {'name': 'label,1'},
+                            True,
+                            [{u'id': 4,
+                              u'platform': 0,
+                              u'name': u'label,1',
+                              u'invalid': False,
+                              u'kernel_config': u''}]),
+                           ('get_labels', {'name': 'label,2'},
+                            True,
+                            [{u'id': 4,
+                              u'platform': 0,
+                              u'name': u'label,2',
+                              u'invalid': False,
+                              u'kernel_config': u''}]),
+                           ('get_acl_groups', {'name': 'acl0'},
+                            True, []),
+                           ('add_acl_group', {'name': 'acl0'},
+                            True, 5),
+                           ('add_host', {'hostname': 'host1',
+                                         'status': 'Ready',
+                                         'locked': True},
+                            True, 42),
+                           ('host_add_labels', {'id': 'host1',
+                                                'labels': ['label0', 'label,1',
+                                                           'label,2']},
+                            True, None),
+                           ('add_host', {'hostname': 'host0',
+                                         'status': 'Ready',
+                                         'locked': True},
+                            True, 42),
+                           ('host_add_labels', {'id': 'host0',
+                                                'labels': ['label0', 'label,1',
+                                                           'label,2']},
+                            True, None),
+                           ('acl_group_add_hosts',
+                            {'id': 'acl0', 'hosts': ['host1', 'host0']},
+                            True, None),
+                           ('modify_host', {'id': 'host1', 'locked': False},
+                            True, None),
+                           ('modify_host', {'id': 'host0', 'locked': False},
+                            True, None)],
+                     out_words_ok=['host0', 'host1'])
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/cli/job.py b/cli/job.py
index 276f9b1..a5122be 100644
--- a/cli/job.py
+++ b/cli/job.py
@@ -307,10 +307,12 @@
                                                 use_leftover=True)
         oth_info = topic_common.item_parse_info(attribute_name='one_time_hosts',
                                                 inline_option='one_time_hosts')
+        label_info = topic_common.item_parse_info(attribute_name='labels',
+                                                  inline_option='labels')
 
         options, leftover = super(job_create_or_clone,
-                                  self).parse([host_info, job_info, oth_info],
-                                              req_items='jobname')
+                                  self).parse([host_info, job_info, oth_info,
+                                               label_info], req_items='jobname')
         self.data = {}
         if len(self.jobname) > 1:
             self.invalid_syntax('Too many arguments specified, only expected '
@@ -323,11 +325,9 @@
         if self.one_time_hosts:
             self.data['one_time_hosts'] = self.one_time_hosts
 
-        if options.labels:
-            labels = options.labels.split(',')
-            labels = [label.strip() for label in labels if label.strip()]
+        if self.labels:
             label_hosts = self.execute_rpc(op='get_hosts',
-                                           multiple_labels=labels)
+                                           multiple_labels=self.labels)
             for host in label_hosts:
                 self.hosts.append(host['hostname'])
 
diff --git a/cli/job_unittest.py b/cli/job_unittest.py
index 15b820e..d0240e5 100755
--- a/cli/job_unittest.py
+++ b/cli/job_unittest.py
@@ -1163,6 +1163,50 @@
         file_temp.clean()
 
 
+    def test_execute_create_job_with_label_commas_and_duplicate_hosts(self):
+        data = self.data.copy()
+        data['hosts'] = ['host1', 'host0']
+        file_temp = cli_mock.create_file(self.ctrl_file)
+        self.run_cmd(argv=['atest', 'job', 'create', '-f', file_temp.name,
+                           'test_job0', '-m', 'host0,host1', '-b',
+                           'label1,label\\,2', '--ignore_site_file'],
+                     rpcs=[('get_hosts', {'multiple_labels': ['label1',
+                            'label,2']}, True,
+                            [{u'status': u'Running', u'lock_time': None,
+                              u'hostname': u'host1', u'locked': False,
+                              u'locked_by': None, u'invalid': False, u'id': 42,
+                              u'labels': [u'label1', u'label,2'], u'platform':
+                              u'Warp18_Diskfull', u'protection':
+                              u'Repair software only', u'dirty':
+                              True, u'synch_id': None}]),
+                            ('create_job', data, True, 42)],
+                     out_words_ok=['test_job0', 'Created'],
+                     out_words_no=['Uploading', 'Done'])
+        file_temp.clean()
+
+
+    def test_execute_create_job_with_label_escaping_and_duplicate_hosts(self):
+        data = self.data.copy()
+        data['hosts'] = ['host1', 'host0']
+        file_temp = cli_mock.create_file(self.ctrl_file)
+        self.run_cmd(argv=['atest', 'job', 'create', '-f', file_temp.name,
+                           'test_job0', '-m', 'host0,host1', '-b',
+                           'label1,label\\,2\\\\,label3', '--ignore_site_file'],
+                     rpcs=[('get_hosts', {'multiple_labels': ['label,2\\',
+                            'label1', 'label3']}, True,
+                            [{u'status': u'Running', u'lock_time': None,
+                              u'hostname': u'host1', u'locked': False,
+                              u'locked_by': None, u'invalid': False, u'id': 42,
+                              u'labels': [u'label1', u'label,2\\', u'label3'],
+                              u'platform': u'Warp18_Diskfull', u'protection':
+                              u'Repair software only', u'dirty':
+                              True, u'synch_id': None}]),
+                            ('create_job', data, True, 42)],
+                     out_words_ok=['test_job0', 'Created'],
+                     out_words_no=['Uploading', 'Done'])
+        file_temp.clean()
+
+
     def _test_parse_hosts(self, args, exp_hosts=[], exp_meta_hosts=[]):
         testjob = job.job_create_or_clone()
         (hosts, meta_hosts) = testjob._parse_hosts(args)
diff --git a/cli/topic_common.py b/cli/topic_common.py
index 958a0b9..d2fd891 100644
--- a/cli/topic_common.py
+++ b/cli/topic_common.py
@@ -56,7 +56,7 @@
 """
 
 import getpass, optparse, os, pwd, re, socket, sys, textwrap, traceback
-import socket, urllib2
+import socket, string, urllib2
 from autotest_lib.cli import rpc
 from autotest_lib.frontend.afe.json_rpc import proxy
 from autotest_lib.client.common_lib.test_utils import mock
@@ -176,9 +176,27 @@
         """Returns the value for that attribute by accumualting all
         the values found through the inline option, the parsing of the
         file and the leftover"""
-        def __get_items(string, split_re='[\s,]\s*'):
-            return (item.strip() for item in re.split(split_re, string)
-                    if item)
+
+        def __get_items(input, split_spaces=True):
+            """Splits a string of comma separated items. Escaped commas will not
+            be split. I.e. Splitting 'a, b\,c, d' will yield ['a', 'b,c', 'd'].
+            If split_spaces is set to False spaces will not be split. I.e.
+            Splitting 'a b, c\,d, e' will yield ['a b', 'c,d', 'e']"""
+
+            # Replace escaped slashes with null characters so we don't misparse
+            # proceeding commas.
+            input = input.replace(r'\\', '\0')
+
+            # Split on commas which are not preceded by a slash.
+            if not split_spaces:
+                split = re.split(r'(?<!\\),', input)
+            else:
+                split = re.split(r'(?<!\\),|\s', input)
+
+            # Convert null characters to single slashes and escaped commas to
+            # just plain commas.
+            return (item.strip().replace('\0', '\\').replace(r'\,', ',') for
+                    item in split if item.strip())
 
         if self.use_leftover:
             add_on = leftover
@@ -191,7 +209,7 @@
         for items in add_on:
             # Don't split on space here because the add-on
             # may have some spaces (like the job name)
-            result.update(__get_items(items, split_re='[,]'))
+            result.update(__get_items(items, split_spaces=False))
 
         # Process the inline_option, if any
         try:
diff --git a/cli/topic_common_unittest.py b/cli/topic_common_unittest.py
index 5f4284e..d3bdacb 100755
--- a/cli/topic_common_unittest.py
+++ b/cli/topic_common_unittest.py
@@ -161,6 +161,20 @@
         self.__test_parsing_flist_good(opt(), ['a', 'b', 'c'])
 
 
+    def test_file_list_escaped_commas(self):
+        class opt(object):
+            flist_obj = cli_mock.create_file('a\nb\\,c\\,d\nef\\,g')
+            flist = flist_obj.name
+        self.__test_parsing_flist_good(opt(), ['a', 'b,c,d', 'ef,g'])
+
+
+    def test_file_list_escaped_commas_slashes(self):
+        class opt(object):
+            flist_obj = cli_mock.create_file('a\nb\\\\\\,c\\,d\nef\\\\,g')
+            flist = flist_obj.name
+        self.__test_parsing_flist_good(opt(), ['a', 'b\\,c,d', 'ef\\', 'g'])
+
+
     def test_file_list_opt_list_one(self):
         class opt(object):
             inline = 'a'
@@ -191,6 +205,18 @@
         self.__test_parsing_inline_good(opt(), ['a', 'b', 'c', 'd', 'e'])
 
 
+    def test_file_list_opt_list_escaped_commas(self):
+        class opt(object):
+            inline = 'a\\,b,c, d'
+        self.__test_parsing_inline_good(opt(), ['a,b', 'c', 'd'])
+
+
+    def test_file_list_opt_list_escaped_commas_slashes(self):
+        class opt(object):
+            inline = 'a\\,b\\\\\\,c,d,e'
+        self.__test_parsing_inline_good(opt(), ['a,b\\,c', 'd', 'e'])
+
+
     def test_file_list_add_on_space(self):
         self.__test_parsing_leftover_good(['a','c','b'],
                                           ['a', 'b', 'c'])
@@ -211,6 +237,16 @@
                                           ['a', 'b', 'c', 'd'])
 
 
+    def test_file_list_add_on_escaped_commas(self):
+        self.__test_parsing_leftover_good(['a', 'c', 'b,', 'd\\,e\\,f'],
+                                          ['a', 'b', 'c', 'd,e,f'])
+
+
+    def test_file_list_add_on_escaped_commas_slashes(self):
+        self.__test_parsing_leftover_good(['a', 'c', 'b,', 'd\\\\\\,e,f'],
+                                          ['a', 'b', 'c', 'd\\,e', 'f'])
+
+
     def test_file_list_all_opt(self):
         class opt(object):
             flist_obj = cli_mock.create_file('f\ng\nh\n')
@@ -258,6 +294,26 @@
                                       'f', 'g', 'h', 'i', 'j'])
 
 
+    def test_file_list_all_opt_in_common_escaped_commas(self):
+        class opt(object):
+            flist_obj = cli_mock.create_file('a\\,b\\,c\nd,e\nf\ng')
+            flist = flist_obj.name
+            inline = 'a\\,b\\,c,d h'
+        self.__test_parsing_all_good(opt(), ['i','j,d'],
+                                     ['a,b,c', 'd', 'e', 'f', 'g', 'h',
+                                      'i', 'j'])
+
+
+    def test_file_list_all_opt_in_common_escaped_commas_slashes(self):
+        class opt(object):
+            flist_obj = cli_mock.create_file('a\\,b\\\\\\,c\nd,e\nf,ghi, ,, j,')
+            flist = flist_obj.name
+            inline = 'a\\,b\\\\\\,c,d h,ijk'
+        self.__test_parsing_all_good(opt(), ['i','j,d'],
+                                     ['a,b\\,c', 'd', 'e', 'f', 'ghi', 'h',
+                                      'i', 'j', 'ijk'])
+
+
 class atest_unittest(cli_mock.cli_unittest):
     def setUp(self):
         super(atest_unittest, self).setUp()