Blame - python/src/Tools/scripts/reindent.py - toolchain/benchmark

blob: 3e9affb9c511b924f117244ee21a11db417eb7b7 [file] [log] [blame]

Jing Yu	c794900	2011-02-22 19:57:05 -0800	[diff] [blame^]	1	#! /usr/bin/env python
				2
				3	# Released to the public domain, by Tim Peters, 03 October 2000.
				4
				5	"""reindent [-d][-r][-v] [ path ... ]
				6
				7	-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
				8	-r (--recurse) Recurse. Search for all .py files in subdirectories too.
				9	-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
				10	-v (--verbose) Verbose. Print informative msgs; else no output.
				11	-h (--help) Help. Print this usage information and exit.
				12
				13	Change Python (.py) files to use 4-space indents and no hard tab characters.
				14	Also trim excess spaces and tabs from ends of lines, and remove empty lines
				15	at the end of files. Also ensure the last line ends with a newline.
				16
				17	If no paths are given on the command line, reindent operates as a filter,
				18	reading a single source file from standard input and writing the transformed
				19	source to standard output. In this case, the -d, -r and -v flags are
				20	ignored.
				21
				22	You can pass one or more file and/or directory paths. When a directory
				23	path, all .py files within the directory will be examined, and, if the -r
				24	option is given, likewise recursively for subdirectories.
				25
				26	If output is not to standard output, reindent overwrites files in place,
				27	renaming the originals with a .bak extension. If it finds nothing to
				28	change, the file is left alone. If reindent does change a file, the changed
				29	file is a fixed-point for future runs (i.e., running reindent on the
				30	resulting .py file won't change it again).
				31
				32	The hard part of reindenting is figuring out what to do with comment
				33	lines. So long as the input files get a clean bill of health from
				34	tabnanny.py, reindent should do a good job.
				35
				36	The backup file is a copy of the one that is being reindented. The ".bak"
				37	file is generated with shutil.copy(), but some corner cases regarding
				38	user/group and permissions could leave the backup file more readable that
				39	you'd prefer. You can always use the --nobackup option to prevent this.
				40	"""
				41
				42	__version__ = "1"
				43
				44	import tokenize
				45	import os, shutil
				46	import sys
				47
				48	verbose = 0
				49	recurse = 0
				50	dryrun = 0
				51	makebackup = True
				52
				53	def usage(msg=None):
				54	if msg is not None:
				55	print >> sys.stderr, msg
				56	print >> sys.stderr, __doc__
				57
				58	def errprint(*args):
				59	sep = ""
				60	for arg in args:
				61	sys.stderr.write(sep + str(arg))
				62	sep = " "
				63	sys.stderr.write("\n")
				64
				65	def main():
				66	import getopt
				67	global verbose, recurse, dryrun, makebackup
				68	try:
				69	opts, args = getopt.getopt(sys.argv[1:], "drnvh",
				70	["dryrun", "recurse", "nobackup", "verbose", "help"])
				71	except getopt.error, msg:
				72	usage(msg)
				73	return
				74	for o, a in opts:
				75	if o in ('-d', '--dryrun'):
				76	dryrun += 1
				77	elif o in ('-r', '--recurse'):
				78	recurse += 1
				79	elif o in ('-n', '--nobackup'):
				80	makebackup = False
				81	elif o in ('-v', '--verbose'):
				82	verbose += 1
				83	elif o in ('-h', '--help'):
				84	usage()
				85	return
				86	if not args:
				87	r = Reindenter(sys.stdin)
				88	r.run()
				89	r.write(sys.stdout)
				90	return
				91	for arg in args:
				92	check(arg)
				93
				94	def check(file):
				95	if os.path.isdir(file) and not os.path.islink(file):
				96	if verbose:
				97	print "listing directory", file
				98	names = os.listdir(file)
				99	for name in names:
				100	fullname = os.path.join(file, name)
				101	if ((recurse and os.path.isdir(fullname) and
				102	not os.path.islink(fullname) and
				103	not os.path.split(fullname)[1].startswith("."))
				104	or name.lower().endswith(".py")):
				105	check(fullname)
				106	return
				107
				108	if verbose:
				109	print "checking", file, "...",
				110	try:
				111	f = open(file)
				112	except IOError, msg:
				113	errprint("%s: I/O Error: %s" % (file, str(msg)))
				114	return
				115
				116	r = Reindenter(f)
				117	f.close()
				118	if r.run():
				119	if verbose:
				120	print "changed."
				121	if dryrun:
				122	print "But this is a dry run, so leaving it alone."
				123	if not dryrun:
				124	bak = file + ".bak"
				125	if makebackup:
				126	shutil.copyfile(file, bak)
				127	if verbose:
				128	print "backed up", file, "to", bak
				129	f = open(file, "w")
				130	r.write(f)
				131	f.close()
				132	if verbose:
				133	print "wrote new", file
				134	return True
				135	else:
				136	if verbose:
				137	print "unchanged."
				138	return False
				139
				140	def _rstrip(line, JUNK='\n \t'):
				141	"""Return line stripped of trailing spaces, tabs, newlines.
				142
				143	Note that line.rstrip() instead also strips sundry control characters,
				144	but at least one known Emacs user expects to keep junk like that, not
				145	mentioning Barry by name or anything <wink>.
				146	"""
				147
				148	i = len(line)
				149	while i > 0 and line[i-1] in JUNK:
				150	i -= 1
				151	return line[:i]
				152
				153	class Reindenter:
				154
				155	def __init__(self, f):
				156	self.find_stmt = 1 # next token begins a fresh stmt?
				157	self.level = 0 # current indent level
				158
				159	# Raw file lines.
				160	self.raw = f.readlines()
				161
				162	# File lines, rstripped & tab-expanded. Dummy at start is so
				163	# that we can use tokenize's 1-based line numbering easily.
				164	# Note that a line is all-blank iff it's "\n".
				165	self.lines = [_rstrip(line).expandtabs() + "\n"
				166	for line in self.raw]
				167	self.lines.insert(0, None)
				168	self.index = 1 # index into self.lines of next line
				169
				170	# List of (lineno, indentlevel) pairs, one for each stmt and
				171	# comment line. indentlevel is -1 for comment lines, as a
				172	# signal that tokenize doesn't know what to do about them;
				173	# indeed, they're our headache!
				174	self.stats = []
				175
				176	def run(self):
				177	tokenize.tokenize(self.getline, self.tokeneater)
				178	# Remove trailing empty lines.
				179	lines = self.lines
				180	while lines and lines[-1] == "\n":
				181	lines.pop()
				182	# Sentinel.
				183	stats = self.stats
				184	stats.append((len(lines), 0))
				185	# Map count of leading spaces to # we want.
				186	have2want = {}
				187	# Program after transformation.
				188	after = self.after = []
				189	# Copy over initial empty lines -- there's nothing to do until
				190	# we see a line with something on it.
				191	i = stats[0][0]
				192	after.extend(lines[1:i])
				193	for i in range(len(stats)-1):
				194	thisstmt, thislevel = stats[i]
				195	nextstmt = stats[i+1][0]
				196	have = getlspace(lines[thisstmt])
				197	want = thislevel * 4
				198	if want < 0:
				199	# A comment line.
				200	if have:
				201	# An indented comment line. If we saw the same
				202	# indentation before, reuse what it most recently
				203	# mapped to.
				204	want = have2want.get(have, -1)
				205	if want < 0:
				206	# Then it probably belongs to the next real stmt.
				207	for j in xrange(i+1, len(stats)-1):
				208	jline, jlevel = stats[j]
				209	if jlevel >= 0:
				210	if have == getlspace(lines[jline]):
				211	want = jlevel * 4
				212	break
				213	if want < 0: # Maybe it's a hanging
				214	# comment like this one,
				215	# in which case we should shift it like its base
				216	# line got shifted.
				217	for j in xrange(i-1, -1, -1):
				218	jline, jlevel = stats[j]
				219	if jlevel >= 0:
				220	want = have + getlspace(after[jline-1]) - \
				221	getlspace(lines[jline])
				222	break
				223	if want < 0:
				224	# Still no luck -- leave it alone.
				225	want = have
				226	else:
				227	want = 0
				228	assert want >= 0
				229	have2want[have] = want
				230	diff = want - have
				231	if diff == 0 or have == 0:
				232	after.extend(lines[thisstmt:nextstmt])
				233	else:
				234	for line in lines[thisstmt:nextstmt]:
				235	if diff > 0:
				236	if line == "\n":
				237	after.append(line)
				238	else:
				239	after.append(" " * diff + line)
				240	else:
				241	remove = min(getlspace(line), -diff)
				242	after.append(line[remove:])
				243	return self.raw != self.after
				244
				245	def write(self, f):
				246	f.writelines(self.after)
				247
				248	# Line-getter for tokenize.
				249	def getline(self):
				250	if self.index >= len(self.lines):
				251	line = ""
				252	else:
				253	line = self.lines[self.index]
				254	self.index += 1
				255	return line
				256
				257	# Line-eater for tokenize.
				258	def tokeneater(self, type, token, (sline, scol), end, line,
				259	INDENT=tokenize.INDENT,
				260	DEDENT=tokenize.DEDENT,
				261	NEWLINE=tokenize.NEWLINE,
				262	COMMENT=tokenize.COMMENT,
				263	NL=tokenize.NL):
				264
				265	if type == NEWLINE:
				266	# A program statement, or ENDMARKER, will eventually follow,
				267	# after some (possibly empty) run of tokens of the form
				268	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
				269	self.find_stmt = 1
				270
				271	elif type == INDENT:
				272	self.find_stmt = 1
				273	self.level += 1
				274
				275	elif type == DEDENT:
				276	self.find_stmt = 1
				277	self.level -= 1
				278
				279	elif type == COMMENT:
				280	if self.find_stmt:
				281	self.stats.append((sline, -1))
				282	# but we're still looking for a new stmt, so leave
				283	# find_stmt alone
				284
				285	elif type == NL:
				286	pass
				287
				288	elif self.find_stmt:
				289	# This is the first "real token" following a NEWLINE, so it
				290	# must be the first token of the next program statement, or an
				291	# ENDMARKER.
				292	self.find_stmt = 0
				293	if line: # not endmarker
				294	self.stats.append((sline, self.level))
				295
				296	# Count number of leading blanks.
				297	def getlspace(line):
				298	i, n = 0, len(line)
				299	while i < n and line[i] == " ":
				300	i += 1
				301	return i
				302
				303	if __name__ == '__main__':
				304	main()