tools/parser.py - platform/external/minijail - Git at Google

 # Copyright 2020 The ChromiumOS Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """A parser for the Minijail policy file."""

 import collections
 import itertools
 import os.path
 import re


 try:
     import bpf
 except ImportError:
     from minijail import bpf


 # Representations of numbers with different radix (base) in C.
 HEX_REGEX = r"-?0[xX][0-9a-fA-F]+"
 OCTAL_REGEX = r"-?0[0-7]+"
 DECIMAL_REGEX = r"-?[0-9]+"


 Token = collections.namedtuple(
     "Token", ["type", "value", "filename", "line", "line_number", "column"]
 )

 # A regex that can tokenize a Minijail policy file line.
 _TOKEN_SPECIFICATION = (
     ("COMMENT", r"#.*$"),
     ("WHITESPACE", r"\s+"),
     ("CONTINUATION", r"\\$"),
     ("DEFAULT", r"@default\b"),
     ("INCLUDE", r"@include\b"),
     ("FREQUENCY", r"@frequency\b"),
     ("DENYLIST", r"@denylist$"),
     ("PATH", r"(?:\.)?/\S+"),
     ("NUMERIC_CONSTANT", f"{HEX_REGEX}|{OCTAL_REGEX}|{DECIMAL_REGEX}"),
     ("COLON", r":"),
     ("SEMICOLON", r";"),
     ("COMMA", r","),
     ("BITWISE_COMPLEMENT", r"~"),
     ("LPAREN", r"\("),
     ("RPAREN", r"\)"),
     ("LBRACE", r"\{"),
     ("RBRACE", r"\}"),
     ("RBRACKET", r"\]"),
     ("LBRACKET", r"\["),
     ("OR", r"\|\|"),
     ("AND", r"&&"),
     ("BITWISE_OR", r"\|"),
     ("OP", r"&|\bin\b|==|!=|<=|<|>=|>"),
     ("EQUAL", r"="),
     ("ARGUMENT", r"\barg[0-9]+\b"),
     ("RETURN", r"\breturn\b"),
     (
         "ACTION",
         r"\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|"
         r"\btrace\b|\blog\b|\buser-notify\b",
     ),
     ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z_0-9-@]*"),
 )
 _TOKEN_RE = re.compile(
     "|".join(r"(?P<%s>%s)" % pair for pair in _TOKEN_SPECIFICATION)
 )


 class ParseException(Exception):
     """An exception that is raised when parsing fails."""

     # pylint: disable=too-many-arguments
     def __init__(
         self, message, filename, *, line="", line_number=1, token=None
     ):
         if token:
             line = token.line
             line_number = token.line_number
             column = token.column
             length = len(token.value)
         else:
             column = len(line)
             length = 1

         message = ("%s(%d:%d): %s") % (
             filename,
             line_number,
             column + 1,
             message,
         )
         message += "\n    %s" % line
         message += "\n    %s%s" % (" " * column, "^" * length)
         super().__init__(message)


 class ParserState:
     """Stores the state of the Parser to provide better diagnostics."""

     def __init__(self, filename):
         self._filename = filename
         self._line = ""
         self._line_number = 0

     @property
     def filename(self):
         """Return the name of the file being processed."""
         return self._filename

     @property
     def line(self):
         """Return the current line being processed."""
         return self._line

     @property
     def line_number(self):
         """Return the current line number being processed."""
         return self._line_number

     def error(self, message, token=None):
         """Raise a ParserException with the provided message."""
         raise ParseException(
             message,
             self.filename,
             line=self._line,
             line_number=self._line_number,
             token=token,
         )

     def tokenize(self, lines):
         """Return a list of tokens for the current line."""
         tokens = []

         for line_number, line in enumerate(lines):
             self._line_number = line_number + 1
             self._line = line.rstrip("\r\n")

             last_end = 0
             for token in _TOKEN_RE.finditer(self._line):
                 if token.start() != last_end:
                     self.error(
                         "invalid token",
                         token=Token(
                             "INVALID",
                             self._line[last_end : token.start()],
                             self.filename,
                             self._line,
                             self._line_number,
                             last_end,
                         ),
                     )
                 last_end = token.end()

                 # Omit whitespace and comments now to avoid sprinkling this
                 # logic elsewhere.
                 if token.lastgroup in ("WHITESPACE", "COMMENT", "CONTINUATION"):
                     continue
                 tokens.append(
                     Token(
                         token.lastgroup,
                         token.group(),
                         self.filename,
                         self._line,
                         self._line_number,
                         token.start(),
                     )
                 )
             if last_end != len(self._line):
                 self.error(
                     "invalid token",
                     token=Token(
                         "INVALID",
                         self._line[last_end:],
                         self.filename,
                         self._line,
                         self._line_number,
                         last_end,
                     ),
                 )

             if self._line.endswith("\\"):
                 # This line is not finished yet.
                 continue

             if tokens:
                 # Return a copy of the token list so that the caller can be free
                 # to modify it.
                 yield tokens[::]
             tokens.clear()


 Atom = collections.namedtuple("Atom", ["argument_index", "op", "value"])
 """A single boolean comparison within a filter expression."""

 Filter = collections.namedtuple("Filter", ["expression", "action"])
 """The result of parsing a DNF filter expression, with its action.

 Since the expression is in Disjunctive Normal Form, it is composed of two levels
 of lists, one for disjunctions and the inner one for conjunctions. The elements
 of the inner list are Atoms.
 """

 Syscall = collections.namedtuple("Syscall", ["name", "number"])
 """A system call."""

 ParsedFilterStatement = collections.namedtuple(
     "ParsedFilterStatement", ["syscalls", "filters", "token"]
 )
 """The result of parsing a filter statement.

 Statements have a list of syscalls, and an associated list of filters that will
 be evaluated sequentially when any of the syscalls is invoked.
 """

 FilterStatement = collections.namedtuple(
     "FilterStatement", ["syscall", "frequency", "filters"]
 )
 """The filter list for a particular syscall.

 This is a mapping from one syscall to a list of filters that are evaluated
 sequentially. The last filter is always an unconditional action.
 """

 ParsedPolicy = collections.namedtuple(
     "ParsedPolicy", ["default_action", "filter_statements"]
 )
 """The result of parsing a minijail .policy file."""


 # pylint: disable=too-few-public-methods
 class PolicyParser:
     """A parser for the Minijail seccomp policy file format."""

     def __init__(
         self,
         arch,
         *,
         kill_action,
         include_depth_limit=10,
         override_default_action=None,
         denylist=False,
         ret_log=False,
     ):
         self._parser_states = [ParserState("<memory>")]
         self._kill_action = kill_action
         self._include_depth_limit = include_depth_limit
         if denylist:
             self._default_action = bpf.Allow()
         else:
             self._default_action = self._kill_action
         self._override_default_action = override_default_action
         self._frequency_mapping = collections.defaultdict(int)
         self._arch = arch
         self._denylist = denylist
         self._ret_log = ret_log

     @property
     def _parser_state(self):
         return self._parser_states[-1]

     # single-constant = identifier
     #                 | numeric-constant
     #                 ;
     def _parse_single_constant(self, token):
         if token.type == "IDENTIFIER":
             if token.value not in self._arch.constants:
                 self._parser_state.error("invalid constant", token=token)
             single_constant = self._arch.constants[token.value]
         elif token.type == "NUMERIC_CONSTANT":
             # As `int(_, 0)` in Python != `strtol(_, _, 0)` in C, to make sure
             # the number parsing behaves exactly in C, instead of using `int()`
             # directly, we list out all the possible formats for octal, decimal
             # and hex numbers, and determine the corresponding base by regex.
             try:
                 if re.match(HEX_REGEX, token.value):
                     base = 16
                 elif re.match(OCTAL_REGEX, token.value):
                     base = 8
                 elif re.match(DECIMAL_REGEX, token.value):
                     base = 10
                 else:
                     # This should never happen.
                     raise ValueError
                 single_constant = int(token.value, base=base)
             except ValueError:
                 self._parser_state.error("invalid constant", token=token)
         else:
             self._parser_state.error("invalid constant", token=token)
         if single_constant > self._arch.max_unsigned:
             self._parser_state.error("unsigned overflow", token=token)
         elif single_constant < self._arch.min_signed:
             self._parser_state.error("signed underflow", token=token)
         elif single_constant < 0:
             # This converts the constant to an unsigned representation of the
             # same value, since BPF only uses unsigned values.
             single_constant = self._arch.truncate_word(single_constant)
         return single_constant

     # constant = [ '~' ] , '(' , value , ')'
     #          | [ '~' ] , single-constant
     #          ;
     def _parse_constant(self, tokens):
         negate = False
         if tokens[0].type == "BITWISE_COMPLEMENT":
             negate = True
             tokens.pop(0)
             if not tokens:
                 self._parser_state.error("empty complement")
             if tokens[0].type == "BITWISE_COMPLEMENT":
                 self._parser_state.error(
                     "invalid double complement", token=tokens[0]
                 )
         if tokens[0].type == "LPAREN":
             last_open_paren = tokens.pop(0)
             single_value = self.parse_value(tokens)
             if not tokens or tokens[0].type != "RPAREN":
                 self._parser_state.error(
                     "unclosed parenthesis", token=last_open_paren
                 )
         else:
             single_value = self._parse_single_constant(tokens[0])
         tokens.pop(0)
         if negate:
             single_value = self._arch.truncate_word(~single_value)
         return single_value

     # value = constant , [ { '|' , constant } ]
     #       ;
     def parse_value(self, tokens):
         """Parse constants separated bitwise OR operator |.

         Constants can be:

         - A number that can be parsed with strtol() in C.
         - A named constant expression.
         - A parenthesized, valid constant expression.
         - A valid constant expression prefixed with the unary bitwise
           complement operator ~.
         - A series of valid constant expressions separated by bitwise
           OR operator |.

         If there is an error parsing any of the constants, the whole process
         fails.
         """

         value = 0
         while tokens:
             value |= self._parse_constant(tokens)
             if not tokens or tokens[0].type != "BITWISE_OR":
                 break
             tokens.pop(0)
         else:
             self._parser_state.error("empty constant")
         return value

     # atom = argument , op , value
     #      ;
     def _parse_atom(self, tokens):
         if not tokens:
             self._parser_state.error("missing argument")
         argument = tokens.pop(0)
         if argument.type != "ARGUMENT":
             self._parser_state.error("invalid argument", token=argument)

         if not tokens:
             self._parser_state.error("missing operator")
         operator = tokens.pop(0)
         if operator.type != "OP":
             self._parser_state.error("invalid operator", token=operator)

         value = self.parse_value(tokens)
         argument_index = int(argument.value[3:])
         if not 0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS:
             self._parser_state.error("invalid argument", token=argument)
         return Atom(argument_index, operator.value, value)

     # clause = atom , [ { '&&' , atom } ]
     #        ;
     def _parse_clause(self, tokens):
         atoms = []
         while tokens:
             atoms.append(self._parse_atom(tokens))
             if not tokens or tokens[0].type != "AND":
                 break
             tokens.pop(0)
         else:
             self._parser_state.error("empty clause")
         return atoms

     # argument-expression = clause , [ { '||' , clause } ]
     #                   ;
     def parse_argument_expression(self, tokens):
         """Parse a argument expression in Disjunctive Normal Form.

         Since BPF disallows back jumps, we build the basic blocks in reverse
         order so that all the jump targets are known by the time we need to
         reference them.
         """

         clauses = []
         while tokens:
             clauses.append(self._parse_clause(tokens))
             if not tokens or tokens[0].type != "OR":
                 break
             tokens.pop(0)
         else:
             self._parser_state.error("empty argument expression")
         return clauses

     # default-action = 'kill-process'
     #                | 'kill-thread'
     #                | 'kill'
     #                | 'trap'
     #                | 'user-notify'
     #                ;
     def _parse_default_action(self, tokens):
         if not tokens:
             self._parser_state.error("missing default action")
         action_token = tokens.pop(0)
         if action_token.type != "ACTION":
             return self._parser_state.error(
                 "invalid default action", token=action_token
             )
         if action_token.value == "kill-process":
             return bpf.KillProcess()
         if action_token.value == "kill-thread":
             return bpf.KillThread()
         if action_token.value == "kill":
             return self._kill_action
         if action_token.value == "trap":
             return bpf.Trap()
         if action_token.value == "user-notify":
             return bpf.UserNotify()
         return self._parser_state.error(
             "invalid permissive default action", token=action_token
         )

     # action = 'allow' | '1'
     #        | 'kill-process'
     #        | 'kill-thread'
     #        | 'kill'
     #        | 'trap'
     #        | 'trace'
     #        | 'log'
     #        | 'user-notify'
     #        | 'return' , single-constant
     #        ;
     def parse_action(self, tokens):
         if not tokens:
             self._parser_state.error("missing action")
         action_token = tokens.pop(0)
         # denylist policies must specify a return for every line.
         if self._denylist:
             if action_token.type != "RETURN":
                 self._parser_state.error("invalid denylist policy")

         if action_token.type == "ACTION":
             if action_token.value == "allow":
                 return bpf.Allow()
             if action_token.value == "kill":
                 return self._kill_action
             if action_token.value == "kill-process":
                 return bpf.KillProcess()
             if action_token.value == "kill-thread":
                 return bpf.KillThread()
             if action_token.value == "trap":
                 return bpf.Trap()
             if action_token.value == "trace":
                 return bpf.Trace()
             if action_token.value == "user-notify":
                 return bpf.UserNotify()
             if action_token.value == "log":
                 return bpf.Log()
         elif action_token.type == "NUMERIC_CONSTANT":
             constant = self._parse_single_constant(action_token)
             if constant == 1:
                 return bpf.Allow()
         elif action_token.type == "RETURN":
             if not tokens:
                 self._parser_state.error("missing return value")
             if self._ret_log:
                 tokens.pop(0)
                 return bpf.Log()
             else:
                 return bpf.ReturnErrno(
                     self._parse_single_constant(tokens.pop(0))
                 )
         return self._parser_state.error("invalid action", token=action_token)

     # single-filter = action
     #               | argument-expression , [ ';' , action ]
     #               | '!','(', argument-expression, [ ';', action ], ')'
     #               ;
     def _parse_single_filter(self, tokens):
         if not tokens:
             self._parser_state.error("missing filter")
         if tokens[0].type == "ARGUMENT":
             # Only argument expressions can start with an ARGUMENT token.
             argument_expression = self.parse_argument_expression(tokens)
             if tokens and tokens[0].type == "SEMICOLON":
                 tokens.pop(0)
                 action = self.parse_action(tokens)
             else:
                 action = bpf.Allow()
             return Filter(argument_expression, action)
         else:
             return Filter(None, self.parse_action(tokens))

     # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
     #        | single-filter
     #        ;
     def parse_filter(self, tokens):
         """Parse a filter and return a list of Filter objects."""
         if not tokens:
             self._parser_state.error("missing filter")
         filters = []
         if tokens[0].type == "LBRACE":
             opening_brace = tokens.pop(0)
             while tokens:
                 filters.append(self._parse_single_filter(tokens))
                 if not tokens or tokens[0].type != "COMMA":
                     break
                 tokens.pop(0)
             if not tokens or tokens[0].type != "RBRACE":
                 self._parser_state.error("unclosed brace", token=opening_brace)
             tokens.pop(0)
         else:
             filters.append(self._parse_single_filter(tokens))
         return filters

     # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
     #                ;
     def _parse_key_value_pair(self, tokens):
         if not tokens:
             self._parser_state.error("missing key")
         key = tokens.pop(0)
         if key.type != "IDENTIFIER":
             self._parser_state.error("invalid key", token=key)
         if not tokens:
             self._parser_state.error("missing equal")
         if tokens[0].type != "EQUAL":
             self._parser_state.error("invalid equal", token=tokens[0])
         tokens.pop(0)
         value_list = []
         while tokens:
             value = tokens.pop(0)
             if value.type != "IDENTIFIER":
                 self._parser_state.error("invalid value", token=value)
             value_list.append(value.value)
             if not tokens or tokens[0].type != "COMMA":
                 break
             tokens.pop(0)
         else:
             self._parser_state.error("empty value")
         return (key.value, value_list)

     # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
     #          ;
     def _parse_metadata(self, tokens):
         if not tokens:
             self._parser_state.error("missing opening bracket")
         opening_bracket = tokens.pop(0)
         if opening_bracket.type != "LBRACKET":
             self._parser_state.error(
                 "invalid opening bracket", token=opening_bracket
             )
         metadata = {}
         while tokens:
             first_token = tokens[0]
             key, value = self._parse_key_value_pair(tokens)
             if key in metadata:
                 self._parser_state.error(
                     'duplicate metadata key: "%s"' % key, token=first_token
                 )
             metadata[key] = value
             if not tokens or tokens[0].type != "SEMICOLON":
                 break
             tokens.pop(0)
         if not tokens or tokens[0].type != "RBRACKET":
             self._parser_state.error("unclosed bracket", token=opening_bracket)
         tokens.pop(0)
         return metadata

     # syscall-descriptor = syscall-name , [ metadata ]
     #                    | syscall-group-name , [ metadata ]
     #                    ;
     def _parse_syscall_descriptor(self, tokens):
         if not tokens:
             self._parser_state.error("missing syscall descriptor")
         syscall_descriptor = tokens.pop(0)
         # `kill` as a syscall name is a special case since kill is also a valid
         # action and actions have precendence over identifiers.
         if (
             syscall_descriptor.type != "IDENTIFIER"
             and syscall_descriptor.value != "kill"
         ):
             self._parser_state.error(
                 "invalid syscall descriptor", token=syscall_descriptor
             )
         if tokens and tokens[0].type == "LBRACKET":
             metadata = self._parse_metadata(tokens)
             if (
                 "arch" in metadata
                 and self._arch.arch_name not in metadata["arch"]
             ):
                 return ()
         if "@" in syscall_descriptor.value:
             # This is a syscall group.
             subtokens = syscall_descriptor.value.split("@")
             if len(subtokens) != 2:
                 self._parser_state.error(
                     "invalid syscall group name", token=syscall_descriptor
                 )
             syscall_group_name, syscall_namespace_name = subtokens
             if syscall_namespace_name not in self._arch.syscall_groups:
                 self._parser_state.error(
                     "nonexistent syscall group namespace",
                     token=syscall_descriptor,
                 )
             syscall_namespace = self._arch.syscall_groups[
                 syscall_namespace_name
             ]
             if syscall_group_name not in syscall_namespace:
                 self._parser_state.error(
                     "nonexistent syscall group", token=syscall_descriptor
                 )
             return (
                 Syscall(name, self._arch.syscalls[name])
                 for name in syscall_namespace[syscall_group_name]
             )
         if syscall_descriptor.value not in self._arch.syscalls:
             self._parser_state.error(
                 "nonexistent syscall", token=syscall_descriptor
             )
         return (
             Syscall(
                 syscall_descriptor.value,
                 self._arch.syscalls[syscall_descriptor.value],
             ),
         )

     # filter-statement =
     #     '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
     #         ':' , filter
     #   | syscall-descriptor , ':' , filter
     #   ;
     def parse_filter_statement(self, tokens):
         """Parse a filter statement and return a ParsedFilterStatement."""
         if not tokens:
             self._parser_state.error("empty filter statement")
         syscall_descriptors = []
         if tokens[0].type == "LBRACE":
             opening_brace = tokens.pop(0)
             while tokens:
                 syscall_descriptors.extend(
                     self._parse_syscall_descriptor(tokens)
                 )
                 if not tokens or tokens[0].type != "COMMA":
                     break
                 tokens.pop(0)
             if not tokens or tokens[0].type != "RBRACE":
                 self._parser_state.error("unclosed brace", token=opening_brace)
             tokens.pop(0)
         else:
             syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
         if not tokens:
             self._parser_state.error("missing colon")
         if tokens[0].type != "COLON":
             self._parser_state.error("invalid colon", token=tokens[0])
         # Given that there can be multiple syscalls and filters in a single
         # filter statement, use the colon token as the anchor for error location
         # purposes.
         colon_token = tokens.pop(0)
         parsed_filter = self.parse_filter(tokens)
         if not syscall_descriptors:
             return None
         return ParsedFilterStatement(
             tuple(syscall_descriptors), parsed_filter, colon_token
         )

     # include-statement = '@include' , posix-path
     #                   ;
     def _parse_include_statement(self, tokens):
         if not tokens:
             self._parser_state.error("empty filter statement")
         if tokens[0].type != "INCLUDE":
             self._parser_state.error("invalid include", token=tokens[0])
         tokens.pop(0)
         if not tokens:
             self._parser_state.error("empty include path")
         include_path = tokens.pop(0)
         if include_path.type != "PATH":
             self._parser_state.error("invalid include path", token=include_path)
         if len(self._parser_states) == self._include_depth_limit:
             self._parser_state.error("@include statement nested too deep")
         include_filename = os.path.normpath(
             os.path.join(
                 os.path.dirname(self._parser_state.filename), include_path.value
             )
         )
         if not os.path.isfile(include_filename):
             self._parser_state.error(
                 "Could not @include %s" % include_filename, token=include_path
             )
         return self._parse_policy_file(include_filename)

     def _parse_frequency_file(self, filename):
         self._parser_states.append(ParserState(filename))
         try:
             frequency_mapping = collections.defaultdict(int)
             with open(filename, encoding="utf-8") as frequency_file:
                 for tokens in self._parser_state.tokenize(frequency_file):
                     syscall_numbers = self._parse_syscall_descriptor(tokens)
                     if not tokens:
                         self._parser_state.error("missing colon")
                     if tokens[0].type != "COLON":
                         self._parser_state.error(
                             "invalid colon", token=tokens[0]
                         )
                     tokens.pop(0)

                     if not tokens:
                         self._parser_state.error("missing number")
                     number = tokens.pop(0)
                     if number.type != "NUMERIC_CONSTANT":
                         self._parser_state.error("invalid number", token=number)
                     number_value = int(number.value, base=0)
                     if number_value < 0:
                         self._parser_state.error("invalid number", token=number)

                     for syscall_number in syscall_numbers:
                         frequency_mapping[syscall_number] += number_value
             return frequency_mapping
         finally:
             self._parser_states.pop()

     # frequency-statement = '@frequency' , posix-path
     #                      ;
     def _parse_frequency_statement(self, tokens):
         if not tokens:
             self._parser_state.error("empty frequency statement")
         if tokens[0].type != "FREQUENCY":
             self._parser_state.error("invalid frequency", token=tokens[0])
         tokens.pop(0)
         if not tokens:
             self._parser_state.error("empty frequency path")
         frequency_path = tokens.pop(0)
         if frequency_path.type != "PATH":
             self._parser_state.error(
                 "invalid frequency path", token=frequency_path
             )
         frequency_filename = os.path.normpath(
             os.path.join(
                 os.path.dirname(self._parser_state.filename),
                 frequency_path.value,
             )
         )
         if not os.path.isfile(frequency_filename):
             self._parser_state.error(
                 "Could not open frequency file %s" % frequency_filename,
                 token=frequency_path,
             )
         return self._parse_frequency_file(frequency_filename)

     # default-statement = '@default' , default-action
     #                   ;
     def _parse_default_statement(self, tokens):
         if not tokens:
             self._parser_state.error("empty default statement")
         if tokens[0].type != "DEFAULT":
             self._parser_state.error("invalid default", token=tokens[0])
         tokens.pop(0)
         if not tokens:
             self._parser_state.error("empty action")
         return self._parse_default_action(tokens)

     def _parse_policy_file(self, filename):
         self._parser_states.append(ParserState(filename))
         try:
             statements = []
             denylist_header = False
             with open(filename, encoding="utf-8") as policy_file:
                 for tokens in self._parser_state.tokenize(policy_file):
                     if tokens[0].type == "INCLUDE":
                         statements.extend(self._parse_include_statement(tokens))
                     elif tokens[0].type == "FREQUENCY":
                         for (
                             syscall_number,
                             frequency,
                         ) in self._parse_frequency_statement(tokens).items():
                             self._frequency_mapping[syscall_number] += frequency
                     elif tokens[0].type == "DEFAULT":
                         self._default_action = self._parse_default_statement(
                             tokens
                         )
                     elif tokens[0].type == "DENYLIST":
                         tokens.pop()
                         if not self._denylist:
                             self._parser_state.error(
                                 "policy is denylist, but "
                                 "flag --denylist not "
                                 "passed in."
                             )
                         else:
                             denylist_header = True
                     else:
                         statement = self.parse_filter_statement(tokens)
                         if statement is None:
                             # If all the syscalls in the statement are for
                             # another arch, skip the whole statement.
                             continue
                         statements.append(statement)

                     if tokens:
                         self._parser_state.error(
                             "extra tokens", token=tokens[0]
                         )
             if self._denylist and not denylist_header:
                 self._parser_state.error(
                     "policy must contain @denylist flag to"
                     " be compiled with --denylist flag."
                 )
             return statements
         finally:
             self._parser_states.pop()

     def parse_file(self, filename):
         """Parse a file and return the list of FilterStatements."""
         self._frequency_mapping = collections.defaultdict(int)
         try:
             statements = self._parse_policy_file(filename)
         except RecursionError:
             raise ParseException(
                 "recursion limit exceeded",
                 filename,
                 line=self._parser_states[-1].line,
             )

         # Collapse statements into a single syscall-to-filter-list, remembering
         # the token for each filter for better diagnostics.
         syscall_filter_mapping = {}
         syscall_filter_definitions = {}
         filter_statements = []
         for syscalls, filters, token in statements:
             for syscall in syscalls:
                 if syscall not in syscall_filter_mapping:
                     filter_statements.append(
                         FilterStatement(
                             syscall, self._frequency_mapping.get(syscall, 1), []
                         )
                     )
                     syscall_filter_mapping[syscall] = filter_statements[-1]
                     syscall_filter_definitions[syscall] = []
                 for filt in filters:
                     syscall_filter_mapping[syscall].filters.append(filt)
                     syscall_filter_definitions[syscall].append(token)
         default_action = self._override_default_action or self._default_action
         for filter_statement in filter_statements:
             unconditional_actions_suffix = list(
                 itertools.dropwhile(
                     lambda filt: filt.expression is not None,
                     filter_statement.filters,
                 )
             )
             if len(unconditional_actions_suffix) == 1:
                 # The last filter already has an unconditional action, no need
                 # to add another one.
                 continue
             if len(unconditional_actions_suffix) > 1:
                 previous_definition_token = syscall_filter_definitions[
                     filter_statement.syscall
                 ][-len(unconditional_actions_suffix)]
                 current_definition_token = syscall_filter_definitions[
                     filter_statement.syscall
                 ][-len(unconditional_actions_suffix) + 1]
                 raise ParseException(
                     (
                         "Syscall %s (number %d) already had "
                         "an unconditional action applied"
                     )
                     % (
                         filter_statement.syscall.name,
                         filter_statement.syscall.number,
                     ),
                     filename=current_definition_token.filename,
                     token=current_definition_token,
                 ) from ParseException(
                     "Previous definition",
                     filename=previous_definition_token.filename,
                     token=previous_definition_token,
                 )
             assert not unconditional_actions_suffix
             filter_statement.filters.append(
                 Filter(expression=None, action=default_action)
             )
         return ParsedPolicy(default_action, filter_statements)