blob: 3e9affb9c511b924f117244ee21a11db417eb7b7 [file] [log] [blame]
Jing Yuc7949002011-02-22 19:57:05 -08001#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
5"""reindent [-d][-r][-v] [ path ... ]
6
7-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose) Verbose. Print informative msgs; else no output.
11-h (--help) Help. Print this usage information and exit.
12
13Change Python (.py) files to use 4-space indents and no hard tab characters.
14Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files. Also ensure the last line ends with a newline.
16
17If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output. In this case, the -d, -r and -v flags are
20ignored.
21
22You can pass one or more file and/or directory paths. When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension. If it finds nothing to
28change, the file is left alone. If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
31
32The hard part of reindenting is figuring out what to do with comment
33lines. So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
35
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
38user/group and permissions could leave the backup file more readable that
39you'd prefer. You can always use the --nobackup option to prevent this.
40"""
41
42__version__ = "1"
43
44import tokenize
45import os, shutil
46import sys
47
48verbose = 0
49recurse = 0
50dryrun = 0
51makebackup = True
52
53def usage(msg=None):
54 if msg is not None:
55 print >> sys.stderr, msg
56 print >> sys.stderr, __doc__
57
58def errprint(*args):
59 sep = ""
60 for arg in args:
61 sys.stderr.write(sep + str(arg))
62 sep = " "
63 sys.stderr.write("\n")
64
65def main():
66 import getopt
67 global verbose, recurse, dryrun, makebackup
68 try:
69 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
70 ["dryrun", "recurse", "nobackup", "verbose", "help"])
71 except getopt.error, msg:
72 usage(msg)
73 return
74 for o, a in opts:
75 if o in ('-d', '--dryrun'):
76 dryrun += 1
77 elif o in ('-r', '--recurse'):
78 recurse += 1
79 elif o in ('-n', '--nobackup'):
80 makebackup = False
81 elif o in ('-v', '--verbose'):
82 verbose += 1
83 elif o in ('-h', '--help'):
84 usage()
85 return
86 if not args:
87 r = Reindenter(sys.stdin)
88 r.run()
89 r.write(sys.stdout)
90 return
91 for arg in args:
92 check(arg)
93
94def check(file):
95 if os.path.isdir(file) and not os.path.islink(file):
96 if verbose:
97 print "listing directory", file
98 names = os.listdir(file)
99 for name in names:
100 fullname = os.path.join(file, name)
101 if ((recurse and os.path.isdir(fullname) and
102 not os.path.islink(fullname) and
103 not os.path.split(fullname)[1].startswith("."))
104 or name.lower().endswith(".py")):
105 check(fullname)
106 return
107
108 if verbose:
109 print "checking", file, "...",
110 try:
111 f = open(file)
112 except IOError, msg:
113 errprint("%s: I/O Error: %s" % (file, str(msg)))
114 return
115
116 r = Reindenter(f)
117 f.close()
118 if r.run():
119 if verbose:
120 print "changed."
121 if dryrun:
122 print "But this is a dry run, so leaving it alone."
123 if not dryrun:
124 bak = file + ".bak"
125 if makebackup:
126 shutil.copyfile(file, bak)
127 if verbose:
128 print "backed up", file, "to", bak
129 f = open(file, "w")
130 r.write(f)
131 f.close()
132 if verbose:
133 print "wrote new", file
134 return True
135 else:
136 if verbose:
137 print "unchanged."
138 return False
139
140def _rstrip(line, JUNK='\n \t'):
141 """Return line stripped of trailing spaces, tabs, newlines.
142
143 Note that line.rstrip() instead also strips sundry control characters,
144 but at least one known Emacs user expects to keep junk like that, not
145 mentioning Barry by name or anything <wink>.
146 """
147
148 i = len(line)
149 while i > 0 and line[i-1] in JUNK:
150 i -= 1
151 return line[:i]
152
153class Reindenter:
154
155 def __init__(self, f):
156 self.find_stmt = 1 # next token begins a fresh stmt?
157 self.level = 0 # current indent level
158
159 # Raw file lines.
160 self.raw = f.readlines()
161
162 # File lines, rstripped & tab-expanded. Dummy at start is so
163 # that we can use tokenize's 1-based line numbering easily.
164 # Note that a line is all-blank iff it's "\n".
165 self.lines = [_rstrip(line).expandtabs() + "\n"
166 for line in self.raw]
167 self.lines.insert(0, None)
168 self.index = 1 # index into self.lines of next line
169
170 # List of (lineno, indentlevel) pairs, one for each stmt and
171 # comment line. indentlevel is -1 for comment lines, as a
172 # signal that tokenize doesn't know what to do about them;
173 # indeed, they're our headache!
174 self.stats = []
175
176 def run(self):
177 tokenize.tokenize(self.getline, self.tokeneater)
178 # Remove trailing empty lines.
179 lines = self.lines
180 while lines and lines[-1] == "\n":
181 lines.pop()
182 # Sentinel.
183 stats = self.stats
184 stats.append((len(lines), 0))
185 # Map count of leading spaces to # we want.
186 have2want = {}
187 # Program after transformation.
188 after = self.after = []
189 # Copy over initial empty lines -- there's nothing to do until
190 # we see a line with *something* on it.
191 i = stats[0][0]
192 after.extend(lines[1:i])
193 for i in range(len(stats)-1):
194 thisstmt, thislevel = stats[i]
195 nextstmt = stats[i+1][0]
196 have = getlspace(lines[thisstmt])
197 want = thislevel * 4
198 if want < 0:
199 # A comment line.
200 if have:
201 # An indented comment line. If we saw the same
202 # indentation before, reuse what it most recently
203 # mapped to.
204 want = have2want.get(have, -1)
205 if want < 0:
206 # Then it probably belongs to the next real stmt.
207 for j in xrange(i+1, len(stats)-1):
208 jline, jlevel = stats[j]
209 if jlevel >= 0:
210 if have == getlspace(lines[jline]):
211 want = jlevel * 4
212 break
213 if want < 0: # Maybe it's a hanging
214 # comment like this one,
215 # in which case we should shift it like its base
216 # line got shifted.
217 for j in xrange(i-1, -1, -1):
218 jline, jlevel = stats[j]
219 if jlevel >= 0:
220 want = have + getlspace(after[jline-1]) - \
221 getlspace(lines[jline])
222 break
223 if want < 0:
224 # Still no luck -- leave it alone.
225 want = have
226 else:
227 want = 0
228 assert want >= 0
229 have2want[have] = want
230 diff = want - have
231 if diff == 0 or have == 0:
232 after.extend(lines[thisstmt:nextstmt])
233 else:
234 for line in lines[thisstmt:nextstmt]:
235 if diff > 0:
236 if line == "\n":
237 after.append(line)
238 else:
239 after.append(" " * diff + line)
240 else:
241 remove = min(getlspace(line), -diff)
242 after.append(line[remove:])
243 return self.raw != self.after
244
245 def write(self, f):
246 f.writelines(self.after)
247
248 # Line-getter for tokenize.
249 def getline(self):
250 if self.index >= len(self.lines):
251 line = ""
252 else:
253 line = self.lines[self.index]
254 self.index += 1
255 return line
256
257 # Line-eater for tokenize.
258 def tokeneater(self, type, token, (sline, scol), end, line,
259 INDENT=tokenize.INDENT,
260 DEDENT=tokenize.DEDENT,
261 NEWLINE=tokenize.NEWLINE,
262 COMMENT=tokenize.COMMENT,
263 NL=tokenize.NL):
264
265 if type == NEWLINE:
266 # A program statement, or ENDMARKER, will eventually follow,
267 # after some (possibly empty) run of tokens of the form
268 # (NL | COMMENT)* (INDENT | DEDENT+)?
269 self.find_stmt = 1
270
271 elif type == INDENT:
272 self.find_stmt = 1
273 self.level += 1
274
275 elif type == DEDENT:
276 self.find_stmt = 1
277 self.level -= 1
278
279 elif type == COMMENT:
280 if self.find_stmt:
281 self.stats.append((sline, -1))
282 # but we're still looking for a new stmt, so leave
283 # find_stmt alone
284
285 elif type == NL:
286 pass
287
288 elif self.find_stmt:
289 # This is the first "real token" following a NEWLINE, so it
290 # must be the first token of the next program statement, or an
291 # ENDMARKER.
292 self.find_stmt = 0
293 if line: # not endmarker
294 self.stats.append((sline, self.level))
295
296# Count number of leading blanks.
297def getlspace(line):
298 i, n = 0, len(line)
299 while i < n and line[i] == " ":
300 i += 1
301 return i
302
303if __name__ == '__main__':
304 main()