blob: 0e3b36deabc2fe0bcb94dc07bc2470be5e2aa2eb [file] [log] [blame]
Justin Bognerae96b7f2017-10-18 02:20:31 +00001#!/usr/bin/env python
2
3"""Updates FileCheck checks in MIR tests.
4
5This script is a utility to update MIR based tests with new FileCheck
6patterns.
7
8The checks added by this script will cover the entire body of each
9function it handles. Virtual registers used are given names via
10FileCheck patterns, so if you do want to check a subset of the body it
11should be straightforward to trim out the irrelevant parts. None of
12the YAML metadata will be checked, other than function names.
13
14If there are multiple llc commands in a test, the full set of checks
15will be repeated for each different check pattern. Checks for patterns
16that are common between different commands will be left as-is by
17default, or removed if the --remove-common-prefixes flag is provided.
18"""
19
20from __future__ import print_function
21
22import argparse
23import collections
24import os
25import re
26import subprocess
27import sys
28
29RUN_LINE_RE = re.compile('^\s*[;#]\s*RUN:\s*(.*)$')
30TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
31MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
32TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
33CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?[= ](\S+)')
34CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
35
Justin Bogner537c6ee2017-12-18 23:31:55 +000036MIR_FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)')
37MIR_BODY_BEGIN_RE = re.compile(r' *body: *\|')
38MIR_BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$')
Justin Bognerae96b7f2017-10-18 02:20:31 +000039VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?')
40VREG_DEF_RE = re.compile(
41 r'^ *(?P<vregs>{0}(?:, {0})*) '
42 r'= (?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern))
Justin Bogner537c6ee2017-12-18 23:31:55 +000043MIR_PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)')
Justin Bognerd0f18432017-10-18 22:39:55 +000044VREG_CLASS_RE = re.compile(r'^ *- *{ id: ([0-9]+), class: ([a-z0-9_]+)', re.M)
Justin Bognerae96b7f2017-10-18 02:20:31 +000045
Justin Bogner437b2402017-12-19 00:49:04 +000046IR_FUNC_NAME_RE = re.compile(
Justin Bogner7bf59882018-01-26 22:56:31 +000047 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\(')
Justin Bogner437b2402017-12-19 00:49:04 +000048IR_PREFIX_DATA_RE = re.compile(r'^ *(;|$)')
49
Justin Bognerae96b7f2017-10-18 02:20:31 +000050MIR_FUNC_RE = re.compile(
51 r'^---$'
52 r'\n'
Justin Bognerf5010322017-10-18 05:52:56 +000053 r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$'
Justin Bognerd0f18432017-10-18 22:39:55 +000054 r'(?:.*?(?P<vregs>^ *registers: *(?:\n *- {[^\n]+$)*))?'
Justin Bognerae96b7f2017-10-18 02:20:31 +000055 r'.*?'
56 r'^ *body: *\|\n'
57 r'(?P<body>.*?)\n'
58 r'^\.\.\.$',
59 flags=(re.M | re.S))
60
61class LLC:
62 def __init__(self, bin):
63 self.bin = bin
64
65 def __call__(self, args, ir):
66 if ir.endswith('.mir'):
67 args = '{} -x mir'.format(args)
68 with open(ir) as ir_file:
69 stdout = subprocess.check_output('{} {}'.format(self.bin, args),
70 shell=True, stdin=ir_file)
71 # Fix line endings to unix CR style.
72 stdout = stdout.replace('\r\n', '\n')
73 return stdout
74
75
76class Run:
77 def __init__(self, prefixes, cmd_args, triple):
78 self.prefixes = prefixes
79 self.cmd_args = cmd_args
80 self.triple = triple
81
82 def __getitem__(self, index):
83 return [self.prefixes, self.cmd_args, self.triple][index]
84
85
86def log(msg, verbose=True):
87 if verbose:
88 print(msg, file=sys.stderr)
89
90
91def warn(msg, test_file=None):
92 if test_file:
93 msg = '{}: {}'.format(test_file, msg)
94 print('WARNING: {}'.format(msg), file=sys.stderr)
95
96
97def find_triple_in_ir(lines, verbose=False):
98 for l in lines:
99 m = TRIPLE_IR_RE.match(l)
100 if m:
101 return m.group(1)
102 return None
103
104
105def find_run_lines(test, lines, verbose=False):
106 raw_lines = [m.group(1)
107 for m in [RUN_LINE_RE.match(l) for l in lines] if m]
108 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
109 for l in raw_lines[1:]:
110 if run_lines[-1].endswith("\\"):
111 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
112 else:
113 run_lines.append(l)
114 if verbose:
115 log('Found {} RUN lines:'.format(len(run_lines)))
116 for l in run_lines:
117 log(' RUN: {}'.format(l))
118 return run_lines
119
120
121def build_run_list(test, run_lines, verbose=False):
122 run_list = []
123 all_prefixes = []
124 for l in run_lines:
125 commands = [cmd.strip() for cmd in l.split('|', 1)]
126 llc_cmd = commands[0]
127 filecheck_cmd = commands[1] if len(commands) > 1 else ''
128
129 if not llc_cmd.startswith('llc '):
130 warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
131 continue
132 if not filecheck_cmd.startswith('FileCheck '):
133 warn('Skipping non-FileChecked RUN line: {}'.format(l),
134 test_file=test)
135 continue
136
137 triple = None
138 m = TRIPLE_ARG_RE.search(llc_cmd)
139 if m:
140 triple = m.group(1)
141 # If we find -march but not -mtriple, use that.
142 m = MARCH_ARG_RE.search(llc_cmd)
143 if m and not triple:
144 triple = '{}--'.format(m.group(1))
145
146 cmd_args = llc_cmd[len('llc'):].strip()
147 cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
148
149 check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
150 for item in m.group(1).split(',')]
151 if not check_prefixes:
152 check_prefixes = ['CHECK']
153 all_prefixes += check_prefixes
154
155 run_list.append(Run(check_prefixes, cmd_args, triple))
156
157 # Remove any common prefixes. We'll just leave those entirely alone.
158 common_prefixes = set([prefix for prefix in all_prefixes
159 if all_prefixes.count(prefix) > 1])
160 for run in run_list:
161 run.prefixes = [p for p in run.prefixes if p not in common_prefixes]
162
163 return run_list, common_prefixes
164
165
166def find_functions_with_one_bb(lines, verbose=False):
167 result = []
168 cur_func = None
169 bbs = 0
170 for line in lines:
Justin Bogner537c6ee2017-12-18 23:31:55 +0000171 m = MIR_FUNC_NAME_RE.match(line)
Justin Bognerae96b7f2017-10-18 02:20:31 +0000172 if m:
173 if bbs == 1:
174 result.append(cur_func)
175 cur_func = m.group('func')
176 bbs = 0
Justin Bogner537c6ee2017-12-18 23:31:55 +0000177 m = MIR_BASIC_BLOCK_RE.match(line)
Justin Bognerae96b7f2017-10-18 02:20:31 +0000178 if m:
179 bbs += 1
180 if bbs == 1:
181 result.append(cur_func)
182 return result
183
184
185def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
186 func_dict, verbose):
187 for m in MIR_FUNC_RE.finditer(raw_tool_output):
188 func = m.group('func')
189 body = m.group('body')
190 if verbose:
191 log('Processing function: {}'.format(func))
192 for l in body.splitlines():
193 log(' {}'.format(l))
194 for prefix in prefixes:
195 if func in func_dict[prefix] and func_dict[prefix][func] != body:
196 warn('Found conflicting asm for prefix: {}'.format(prefix),
197 test_file=test)
198 func_dict[prefix][func] = body
Justin Bognerd0f18432017-10-18 22:39:55 +0000199 func_dict[prefix]['{}:vregs'.format(func)] = m.group('vregs')
Justin Bognerae96b7f2017-10-18 02:20:31 +0000200
201
202def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
Justin Bognerd0f18432017-10-18 22:39:55 +0000203 add_vreg_checks, single_bb, verbose=False):
Justin Bognerae96b7f2017-10-18 02:20:31 +0000204 printed_prefixes = set()
205 for run in run_list:
206 for prefix in run.prefixes:
207 if prefix in printed_prefixes:
208 continue
209 if not func_dict[prefix][func_name]:
210 continue
211 # if printed_prefixes:
212 # # Add some space between different check prefixes.
213 # output_lines.append('')
214 printed_prefixes.add(prefix)
215 log('Adding {} lines for {}'.format(prefix, func_name), verbose)
Justin Bognerd0f18432017-10-18 22:39:55 +0000216 vregs = None
217 if add_vreg_checks:
218 vregs = func_dict[prefix]['{}:vregs'.format(func_name)]
Justin Bognerae96b7f2017-10-18 02:20:31 +0000219 add_check_lines(test, output_lines, prefix, func_name, single_bb,
Justin Bognerd0f18432017-10-18 22:39:55 +0000220 func_dict[prefix][func_name].splitlines(), vregs)
Justin Bognerae96b7f2017-10-18 02:20:31 +0000221 break
222 return output_lines
223
224
225def add_check_lines(test, output_lines, prefix, func_name, single_bb,
Justin Bognerd0f18432017-10-18 22:39:55 +0000226 func_body, vreg_data):
Justin Bognerae96b7f2017-10-18 02:20:31 +0000227 if single_bb:
228 # Don't bother checking the basic block label for a single BB
229 func_body.pop(0)
230
231 if not func_body:
232 warn('Function has no instructions to check: {}'.format(func_name),
233 test_file=test)
234 return
235
236 first_line = func_body[0]
237 indent = len(first_line) - len(first_line.lstrip(' '))
238 # A check comment, indented the appropriate amount
239 check = '{:>{}}; {}'.format('', indent, prefix)
240
241 output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
242
Justin Bognerd0f18432017-10-18 22:39:55 +0000243 if vreg_data:
244 output_lines.append('{}: registers:'.format(check))
245 for m in VREG_CLASS_RE.finditer(vreg_data):
246 output_lines.append('{}-NEXT: id: {}, class: {}'.format(
247 check, m.group(1), m.group(2)))
248
Justin Bognerae96b7f2017-10-18 02:20:31 +0000249 vreg_map = {}
250 for func_line in func_body:
251 if not func_line.strip():
252 continue
253 m = VREG_DEF_RE.match(func_line)
254 if m:
255 for vreg in VREG_RE.finditer(m.group('vregs')):
256 name = mangle_vreg(m.group('opcode'), vreg_map.values())
257 vreg_map[vreg.group(1)] = name
258 func_line = func_line.replace(
259 vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
260 for number, name in vreg_map.items():
Justin Bogner82cd9432017-11-06 21:06:09 +0000261 func_line = re.sub(r'{}\b'.format(number), '[[{}]]'.format(name),
262 func_line)
Justin Bognerae96b7f2017-10-18 02:20:31 +0000263 check_line = '{}: {}'.format(check, func_line[indent:]).rstrip()
264 output_lines.append(check_line)
265
266
267def mangle_vreg(opcode, current_names):
268 base = opcode
269 # Simplify some common prefixes and suffixes
270 if opcode.startswith('G_'):
271 base = base[len('G_'):]
272 if opcode.endswith('_PSEUDO'):
273 base = base[:len('_PSEUDO')]
274 # Shorten some common opcodes with long-ish names
275 base = dict(IMPLICIT_DEF='DEF',
276 GLOBAL_VALUE='GV',
277 CONSTANT='C',
278 FCONSTANT='C',
279 MERGE_VALUES='MV',
280 UNMERGE_VALUES='UV',
281 INTRINSIC='INT',
282 INTRINSIC_W_SIDE_EFFECTS='INT',
283 INSERT_VECTOR_ELT='IVEC',
284 EXTRACT_VECTOR_ELT='EVEC',
285 SHUFFLE_VECTOR='SHUF').get(base, base)
Justin Bogner2457f452017-10-18 15:37:09 +0000286 # Avoid ambiguity when opcodes end in numbers
287 if len(base.rstrip('0123456789')) < len(base):
288 base += '_'
Justin Bognerae96b7f2017-10-18 02:20:31 +0000289
290 i = 0
291 for name in current_names:
Justin Bogner2457f452017-10-18 15:37:09 +0000292 if name.rstrip('0123456789') == base:
Justin Bognerae96b7f2017-10-18 02:20:31 +0000293 i += 1
294 if i:
295 return '{}{}'.format(base, i)
296 return base
297
298
299def should_add_line_to_output(input_line, prefix_set):
300 # Skip any check lines that we're handling.
301 m = CHECK_RE.match(input_line)
302 if m and m.group(1) in prefix_set:
303 return False
304 return True
305
306
Justin Bognerd0f18432017-10-18 22:39:55 +0000307def update_test_file(llc, test, remove_common_prefixes=False,
308 add_vreg_checks=False, verbose=False):
Justin Bognerae96b7f2017-10-18 02:20:31 +0000309 log('Scanning for RUN lines in test file: {}'.format(test), verbose)
310 with open(test) as fd:
311 input_lines = [l.rstrip() for l in fd]
312
313 triple_in_ir = find_triple_in_ir(input_lines, verbose)
314 run_lines = find_run_lines(test, input_lines, verbose)
315 run_list, common_prefixes = build_run_list(test, run_lines, verbose)
316
317 simple_functions = find_functions_with_one_bb(input_lines, verbose)
318
319 func_dict = {}
320 for run in run_list:
321 for prefix in run.prefixes:
322 func_dict.update({prefix: dict()})
323 for prefixes, llc_args, triple_in_cmd in run_list:
324 log('Extracted LLC cmd: llc {}'.format(llc_args), verbose)
325 log('Extracted FileCheck prefixes: {}'.format(prefixes), verbose)
326
327 raw_tool_output = llc(llc_args, test)
328 if not triple_in_cmd and not triple_in_ir:
329 warn('No triple found: skipping file', test_file=test)
330 return
331
Justin Bogner4f427872017-10-18 05:39:22 +0000332 build_function_body_dictionary(test, raw_tool_output,
Justin Bognerae96b7f2017-10-18 02:20:31 +0000333 triple_in_cmd or triple_in_ir,
334 prefixes, func_dict, verbose)
335
336 state = 'toplevel'
337 func_name = None
338 prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
339 log('Rewriting FileCheck prefixes: {}'.format(prefix_set), verbose)
340
341 if remove_common_prefixes:
342 prefix_set.update(common_prefixes)
343 elif common_prefixes:
344 warn('Ignoring common prefixes: {}'.format(common_prefixes),
345 test_file=test)
346
Justin Bogner437b2402017-12-19 00:49:04 +0000347 comment_char = '#' if test.endswith('.mir') else ';'
348 autogenerated_note = ('{} NOTE: Assertions have been autogenerated by '
349 'utils/{}'.format(comment_char,
350 os.path.basename(__file__)))
Justin Bognerae96b7f2017-10-18 02:20:31 +0000351 output_lines = []
352 output_lines.append(autogenerated_note)
353
354 for input_line in input_lines:
355 if input_line == autogenerated_note:
356 continue
357
358 if state == 'toplevel':
Justin Bogner437b2402017-12-19 00:49:04 +0000359 m = IR_FUNC_NAME_RE.match(input_line)
360 if m:
361 state = 'ir function prefix'
362 func_name = m.group('func')
Justin Bogner95461912018-01-23 06:39:04 +0000363 if input_line.rstrip('| \r\n') == '---':
Justin Bognerae96b7f2017-10-18 02:20:31 +0000364 state = 'document'
365 output_lines.append(input_line)
366 elif state == 'document':
Justin Bogner537c6ee2017-12-18 23:31:55 +0000367 m = MIR_FUNC_NAME_RE.match(input_line)
Justin Bognerae96b7f2017-10-18 02:20:31 +0000368 if m:
Justin Bogner537c6ee2017-12-18 23:31:55 +0000369 state = 'mir function metadata'
Justin Bognerae96b7f2017-10-18 02:20:31 +0000370 func_name = m.group('func')
371 if input_line.strip() == '...':
372 state = 'toplevel'
373 func_name = None
374 if should_add_line_to_output(input_line, prefix_set):
375 output_lines.append(input_line)
Justin Bogner537c6ee2017-12-18 23:31:55 +0000376 elif state == 'mir function metadata':
Justin Bognerae96b7f2017-10-18 02:20:31 +0000377 if should_add_line_to_output(input_line, prefix_set):
378 output_lines.append(input_line)
Justin Bogner537c6ee2017-12-18 23:31:55 +0000379 m = MIR_BODY_BEGIN_RE.match(input_line)
Justin Bognerae96b7f2017-10-18 02:20:31 +0000380 if m:
381 if func_name in simple_functions:
382 # If there's only one block, put the checks inside it
Justin Bogner537c6ee2017-12-18 23:31:55 +0000383 state = 'mir function prefix'
Justin Bognerae96b7f2017-10-18 02:20:31 +0000384 continue
Justin Bogner537c6ee2017-12-18 23:31:55 +0000385 state = 'mir function body'
Justin Bognerae96b7f2017-10-18 02:20:31 +0000386 add_checks_for_function(test, output_lines, run_list,
Justin Bognerd0f18432017-10-18 22:39:55 +0000387 func_dict, func_name, add_vreg_checks,
388 single_bb=False, verbose=verbose)
Justin Bogner537c6ee2017-12-18 23:31:55 +0000389 elif state == 'mir function prefix':
390 m = MIR_PREFIX_DATA_RE.match(input_line)
Justin Bognerae96b7f2017-10-18 02:20:31 +0000391 if not m:
Justin Bogner537c6ee2017-12-18 23:31:55 +0000392 state = 'mir function body'
Justin Bognerae96b7f2017-10-18 02:20:31 +0000393 add_checks_for_function(test, output_lines, run_list,
Justin Bognerd0f18432017-10-18 22:39:55 +0000394 func_dict, func_name, add_vreg_checks,
395 single_bb=True, verbose=verbose)
Justin Bognerae96b7f2017-10-18 02:20:31 +0000396
397 if should_add_line_to_output(input_line, prefix_set):
398 output_lines.append(input_line)
Justin Bogner537c6ee2017-12-18 23:31:55 +0000399 elif state == 'mir function body':
Justin Bognerae96b7f2017-10-18 02:20:31 +0000400 if input_line.strip() == '...':
401 state = 'toplevel'
402 func_name = None
403 if should_add_line_to_output(input_line, prefix_set):
404 output_lines.append(input_line)
Justin Bogner437b2402017-12-19 00:49:04 +0000405 elif state == 'ir function prefix':
406 m = IR_PREFIX_DATA_RE.match(input_line)
407 if not m:
408 state = 'ir function body'
409 add_checks_for_function(test, output_lines, run_list,
410 func_dict, func_name, add_vreg_checks,
411 single_bb=False, verbose=verbose)
412
413 if should_add_line_to_output(input_line, prefix_set):
414 output_lines.append(input_line)
415 elif state == 'ir function body':
416 if input_line.strip() == '}':
417 state = 'toplevel'
418 func_name = None
419 if should_add_line_to_output(input_line, prefix_set):
420 output_lines.append(input_line)
421
Justin Bognerae96b7f2017-10-18 02:20:31 +0000422
423 log('Writing {} lines to {}...'.format(len(output_lines), test), verbose)
424
425 with open(test, 'wb') as fd:
426 fd.writelines([l + '\n' for l in output_lines])
427
428
429def main():
430 parser = argparse.ArgumentParser(
431 description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
432 parser.add_argument('-v', '--verbose', action='store_true',
433 help='Show verbose output')
434 parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
435 help='The "llc" binary to generate the test case with')
436 parser.add_argument('--remove-common-prefixes', action='store_true',
437 help='Remove existing check lines whose prefixes are '
438 'shared between multiple commands')
Justin Bognerd0f18432017-10-18 22:39:55 +0000439 parser.add_argument('--add-vreg-checks', action='store_true',
440 help='Add checks for the "registers:" block')
Justin Bognerae96b7f2017-10-18 02:20:31 +0000441 parser.add_argument('tests', nargs='+')
442 args = parser.parse_args()
443
444 for test in args.tests:
Justin Bogner0ee64422017-10-18 22:36:08 +0000445 try:
446 update_test_file(args.llc, test, args.remove_common_prefixes,
Justin Bognerd0f18432017-10-18 22:39:55 +0000447 args.add_vreg_checks, verbose=args.verbose)
Justin Bogner0ee64422017-10-18 22:36:08 +0000448 except Exception:
449 warn('Error processing file', test_file=test)
450 raise
Justin Bognerae96b7f2017-10-18 02:20:31 +0000451
452
453if __name__ == '__main__':
454 main()