blob: 5db00656b5c20cca9d376180655bf1e0578376a1 [file] [log] [blame] [edit]
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import os
import re
from typing import Dict, List
OPEN_SNIPPET_REGEX = r".*\[START ([a-z0-9_]+)\].*$"
CLOSE_SNIPPET_REGEX = r".*\[END ([a-z0-9_]+)\].*$"
OPEN_EXCLUDE_REGEX = r".*\[START_EXCLUDE\].*$"
CLOSE_EXCLUDE_REGEX = r".*\[END_EXCLUDE\].*$"
def _trim_leading_whitespace(lines: List[str]) -> List[str]:
"""Trims leading, plain spaces from the snippet content. Finds the minimum
number of leading spaces, ignoring empty lines, and removes that number of
spaces from each line.
Args:
lines (List[str]): Lines of content. These lines are newline terminated.
Returns:
List of trimmed lines.
"""
def number_of_leading_spaces(input: str) -> int:
return len(input) - len(input.lstrip(" "))
def is_empty_line(input: str) -> bool:
if re.match(r"^\s*$", input):
return True
return False
leading_spaces = [
number_of_leading_spaces(line) for line in lines if not is_empty_line(line)
]
max_leading_spaces = min(leading_spaces)
return [
"\n" if is_empty_line(line) else line[max_leading_spaces:] for line in lines
]
def all_snippets_from_file(sample_file: str) -> Dict[str, str]:
"""Reads in a sample file and parse out all contained snippets.
Args:
sample_file (str): Sample file to parse.
Returns:
Dictionary of snippet name to snippet code.
"""
if not os.path.exists(sample_file):
return {}
snippet_lines = {} # type: Dict[str, List[str]]
open_snippets = set()
with open(sample_file) as f:
excluding = False
# Iterate over each line:
# - If the line matches an opening snippet tag, add that snippet tag to
# the set of open tags.
# - If the line matches a closing snippet tag, remove that snippet tag
# from the set of open tags.
# - If the line matches an opening exclude tag, record that we excluding
# content.
# - If the line matches a closing exclude tag, record that we are capturing
# content again.
# - Otherwise, if we are not excluding content, add the line to each of the
# open snippets
#
# This allows us to handle parsing nested or interleaved snippets and ignore
# blocks of code in the snippets
for line in f:
open_match = re.match(pattern=OPEN_SNIPPET_REGEX, string=line)
close_match = re.match(pattern=CLOSE_SNIPPET_REGEX, string=line)
open_exclude_match = re.match(pattern=OPEN_EXCLUDE_REGEX, string=line)
close_exclude_match = re.match(pattern=CLOSE_EXCLUDE_REGEX, string=line)
if open_match and not excluding:
open_snippets.add(open_match[1])
if not open_match[1] in snippet_lines:
snippet_lines[open_match[1]] = []
elif close_match and not excluding:
open_snippets.discard(close_match[1])
elif open_exclude_match:
excluding = True
elif close_exclude_match:
excluding = False
elif not excluding:
for snippet in open_snippets:
snippet_lines[snippet].append(line)
return {
snippet: "".join(_trim_leading_whitespace(lines))
for snippet, lines in snippet_lines.items()
}
def all_snippets(snippet_globs: List[str]) -> Dict[str, str]:
"""Walks the samples directory and parses snippets from each file.
Args:
snippet_globs (List[str]): List of path globs to expand.
Returns:
Dictionary of snippet name to snippet code.
"""
snippets = {}
for snippet_glob in snippet_globs:
for file in glob.glob(snippet_glob, recursive=True):
for snippet, code in all_snippets_from_file(file).items():
snippets[snippet] = code
return snippets