Prevent os.walk() scanning excluded dirs. (#898)
diff --git a/CHANGELOG b/CHANGELOG
index b8906d1..a681c6b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -9,6 +9,10 @@
a custom number of blank lines between top-level imports and variable
definitions.
- Ignore end of line `# copybara:` directives when checking line length.
+### Changed
+- Do not scan exlcuded directories. Prior versions would scan an exluded
+ folder then exclude its contents on a file by file basis. Preventing the
+ folder being scanned is faster.
### Fixed
- Exclude directories on Windows.
diff --git a/yapf/yapflib/file_resources.py b/yapf/yapflib/file_resources.py
index 4c6bf38..c524c50 100644
--- a/yapf/yapflib/file_resources.py
+++ b/yapf/yapflib/file_resources.py
@@ -167,7 +167,7 @@
# TODO(morbo): Look into a version of os.walk that can handle recursion.
excluded_dirs = []
- for dirpath, _, filelist in os.walk(filename):
+ for dirpath, dirnames, filelist in os.walk(filename):
if dirpath != '.' and exclude and IsIgnored(dirpath, exclude):
excluded_dirs.append(dirpath)
continue
@@ -179,6 +179,19 @@
continue
if IsPythonFile(filepath):
python_files.append(filepath)
+ # To prevent it from scanning the contents excluded folders, os.walk()
+ # lets you amend its list of child dirs `dirnames`. These edits must be
+ # made in-place instead of creating a modified copy of `dirnames`.
+ # list.remove() is slow and list.pop() is a headache. Instead clear
+ # `dirnames` then repopulate it.
+ dirnames_ = [dirnames.pop(0) for i in range(len(dirnames))]
+ for dirname in dirnames_:
+ dir_ = os.path.join(dirpath, dirname)
+ if IsIgnored(dir_, exclude):
+ excluded_dirs.append(dir_)
+ else:
+ dirnames.append(dirname)
+
elif os.path.isfile(filename):
python_files.append(filename)
@@ -187,6 +200,8 @@
def IsIgnored(path, exclude):
"""Return True if filename matches any patterns in exclude."""
+ if exclude is None:
+ return False
path = path.lstrip(os.path.sep)
while path.startswith('.' + os.path.sep):
path = path[2:]