tools/compile_seccomp_policy: Add the beginning of a new parser am: d4ce449ed0 am: 93bb1d5d6d
am: 44ca0f3fdd
Change-Id: Id7aedd87e940789046b090caba2700602aa563ac
diff --git a/.gitignore b/.gitignore
index 2414029..fed0adb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,10 @@
# libseccomp.
/libseccomp/
+# Python-related files.
+/tools/__pycache__/
+*.pyc
+
# Shared libraries when compiling in-tree.
*.so
diff --git a/tools/arch.py b/tools/arch.py
new file mode 100644
index 0000000..6f2dfb2
--- /dev/null
+++ b/tools/arch.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Architecture-specific information."""
+
+import collections
+import json
+
+
+class Arch(
+ collections.namedtuple(
+ 'Arch',
+ ['arch_nr', 'arch_name', 'bits', 'syscalls', 'constants'])):
+ """Holds architecture-specific information."""
+
+ def truncate_word(self, value):
+ """Return the value truncated to fit in a word."""
+ return value & self.max_unsigned
+
+ @property
+ def min_signed(self):
+ """The smallest signed value that can be represented in a word."""
+ return -(1 << (self.bits - 1))
+
+ @property
+ def max_unsigned(self):
+ """The largest unsigned value that can be represented in a word."""
+ return (1 << self.bits) - 1
+
+ @staticmethod
+ def load_from_json(json_path):
+ """Return an Arch from a .json file."""
+ with open(json_path, 'r') as json_file:
+ constants = json.load(json_file)
+ return Arch(
+ arch_nr=constants['arch_nr'],
+ arch_name=constants['arch_name'],
+ bits=constants['bits'],
+ syscalls=constants['syscalls'],
+ constants=constants['constants'],
+ )
diff --git a/tools/parser.py b/tools/parser.py
new file mode 100644
index 0000000..05b6628
--- /dev/null
+++ b/tools/parser.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A parser for the Minijail policy file."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import re
+
+Token = collections.namedtuple('token',
+ ['type', 'value', 'filename', 'line', 'column'])
+
+# A regex that can tokenize a Minijail policy file line.
+_TOKEN_SPECIFICATION = (
+ ('COMMENT', r'#.*$'),
+ ('WHITESPACE', r'\s+'),
+ ('INCLUDE', r'@include'),
+ ('PATH', r'(?:\.)?/\S+'),
+ ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
+ ('COLON', r':'),
+ ('SEMICOLON', r';'),
+ ('COMMA', r','),
+ ('BITWISE_COMPLEMENT', r'~'),
+ ('LPAREN', r'\('),
+ ('RPAREN', r'\)'),
+ ('LBRACE', r'\{'),
+ ('RBRACE', r'\}'),
+ ('RBRACKET', r'\]'),
+ ('LBRACKET', r'\['),
+ ('OR', r'\|\|'),
+ ('AND', r'&&'),
+ ('BITWISE_OR', r'\|'),
+ ('OP', r'&|in|==|!=|<=|<|>=|>'),
+ ('EQUAL', r'='),
+ ('ARGUMENT', r'arg[0-9]+'),
+ ('RETURN', r'return'),
+ ('ACTION', r'allow|kill-process|kill-thread|kill|trap|trace|log'),
+ ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9@]*'),
+)
+_TOKEN_RE = re.compile('|'.join(
+ r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
+
+
+class ParseException(Exception):
+ """An exception that is raised when parsing fails."""
+
+ # pylint: disable=too-many-arguments
+ def __init__(self, message, filename, line, line_number=1, token=None):
+ if token:
+ column = token.column
+ length = len(token.value)
+ else:
+ column = len(line)
+ length = 1
+
+ message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
+ message)
+ message += '\n %s' % line
+ message += '\n %s%s' % (' ' * column, '^' * length)
+ super().__init__(message)
+
+
+class ParserState:
+ """Stores the state of the Parser to provide better diagnostics."""
+
+ def __init__(self, filename):
+ self._filename = filename
+ self._line = ''
+ self._line_number = 0
+
+ @property
+ def filename(self):
+ """Return the name of the file being processed."""
+ return self._filename
+
+ @property
+ def line(self):
+ """Return the current line being processed."""
+ return self._line
+
+ @property
+ def line_number(self):
+ """Return the current line number being processed."""
+ return self._line_number
+
+ def set_line(self, line):
+ """Update the current line being processed."""
+ self._line = line
+ self._line_number += 1
+
+ def error(self, message, token=None):
+ """Raise a ParserException with the provided message."""
+ raise ParseException(message, self.filename, self.line,
+ self.line_number, token)
+
+ def tokenize(self):
+ """Return a list of tokens for the current line."""
+ tokens = []
+
+ last_end = 0
+ for token in _TOKEN_RE.finditer(self.line):
+ if token.start() != last_end:
+ self.error(
+ 'invalid token',
+ token=Token('INVALID', self.line[last_end:token.start()],
+ self.filename, self.line_number, last_end))
+ last_end = token.end()
+
+ # Omit whitespace and comments now to avoid sprinkling this logic
+ # elsewhere.
+ if token.lastgroup in ('WHITESPACE', 'COMMENT'):
+ continue
+ tokens.append(
+ Token(token.lastgroup, token.group(), self.filename,
+ self.line_number, token.start()))
+ if last_end != len(self.line):
+ self.error(
+ 'invalid token',
+ token=Token('INVALID', self.line[last_end:], self.filename,
+ self.line_number, last_end))
+ return tokens
+
+
+# pylint: disable=too-few-public-methods
+class PolicyParser:
+ """A parser for the Minijail seccomp policy file format."""
+
+ def __init__(self, arch):
+ self._parser_states = [ParserState("<memory>")]
+ self._arch = arch
+
+ @property
+ def _parser_state(self):
+ return self._parser_states[-1]
+
+ # single-constant = identifier
+ # | numeric-constant
+ # ;
+ def _parse_single_constant(self, token):
+ if token.type == 'IDENTIFIER':
+ if token.value not in self._arch.constants:
+ self._parser_state.error('invalid constant', token=token)
+ single_constant = self._arch.constants[token.value]
+ elif token.type == 'NUMERIC_CONSTANT':
+ try:
+ single_constant = int(token.value, base=0)
+ except ValueError:
+ self._parser_state.error('invalid constant', token=token)
+ else:
+ self._parser_state.error('invalid constant', token=token)
+ if single_constant > self._arch.max_unsigned:
+ self._parser_state.error('unsigned overflow', token=token)
+ elif single_constant < self._arch.min_signed:
+ self._parser_state.error('signed underflow', token=token)
+ elif single_constant < 0:
+ # This converts the constant to an unsigned representation of the
+ # same value, since BPF only uses unsigned values.
+ single_constant = self._arch.truncate_word(single_constant)
+ return single_constant
+
+ # constant = [ '~' ] , '(' , value , ')'
+ # | [ '~' ] , single-constant
+ # ;
+ def _parse_constant(self, tokens):
+ negate = False
+ if tokens[0].type == 'BITWISE_COMPLEMENT':
+ negate = True
+ tokens.pop(0)
+ if not tokens:
+ self._parser_state.error('empty complement')
+ if tokens[0].type == 'BITWISE_COMPLEMENT':
+ self._parser_state.error(
+ 'invalid double complement', token=tokens[0])
+ if tokens[0].type == 'LPAREN':
+ last_open_paren = tokens.pop(0)
+ single_value = self.parse_value(tokens)
+ if not tokens or tokens[0].type != 'RPAREN':
+ self._parser_state.error(
+ 'unclosed parenthesis', token=last_open_paren)
+ else:
+ single_value = self._parse_single_constant(tokens[0])
+ tokens.pop(0)
+ if negate:
+ single_value = self._arch.truncate_word(~single_value)
+ return single_value
+
+ # value = constant , [ { '|' , constant } ]
+ # ;
+ def parse_value(self, tokens):
+ """Parse constants separated bitwise OR operator |.
+
+ Constants can be:
+
+ - A number that can be parsed with int(..., base=0)
+ - A named constant expression.
+ - A parenthesized, valid constant expression.
+ - A valid constant expression prefixed with the unary bitwise
+ complement operator ~.
+ - A series of valid constant expressions separated by bitwise
+ OR operator |.
+
+ If there is an error parsing any of the constants, the whole process
+ fails.
+ """
+
+ value = 0
+ while tokens:
+ value |= self._parse_constant(tokens)
+ if not tokens or tokens[0].type != 'BITWISE_OR':
+ break
+ tokens.pop(0)
+ else:
+ self._parser_state.error('empty constant')
+ return value
diff --git a/tools/parser_unittest.py b/tools/parser_unittest.py
new file mode 100755
index 0000000..d40ab42
--- /dev/null
+++ b/tools/parser_unittest.py
@@ -0,0 +1,237 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unittests for the parser module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import unittest
+
+import arch
+import parser # pylint: disable=wrong-import-order
+
+ARCH_64 = arch.Arch.load_from_json(
+ os.path.join(
+ os.path.dirname(os.path.abspath(__file__)), 'testdata/arch_64.json'))
+
+
+class TokenizerTests(unittest.TestCase):
+ """Tests for ParserState.tokenize."""
+
+ @staticmethod
+ def _tokenize(line):
+ parser_state = parser.ParserState('<memory>')
+ parser_state.set_line(line)
+ return parser_state.tokenize()
+
+ def test_tokenize(self):
+ """Accept valid tokens."""
+ self.assertEqual([
+ (token.type, token.value)
+ for token in TokenizerTests._tokenize('@include /minijail.policy')
+ ], [
+ ('INCLUDE', '@include'),
+ ('PATH', '/minijail.policy'),
+ ])
+ self.assertEqual([
+ (token.type, token.value)
+ for token in TokenizerTests._tokenize('@include ./minijail.policy')
+ ], [
+ ('INCLUDE', '@include'),
+ ('PATH', './minijail.policy'),
+ ])
+ self.assertEqual(
+ [(token.type, token.value) for token in TokenizerTests._tokenize(
+ 'read: arg0 in ~0xffff || arg0 & (1|2) && arg0 == 0o755; '
+ 'return ENOSYS # ignored')], [
+ ('IDENTIFIER', 'read'),
+ ('COLON', ':'),
+ ('ARGUMENT', 'arg0'),
+ ('OP', 'in'),
+ ('BITWISE_COMPLEMENT', '~'),
+ ('NUMERIC_CONSTANT', '0xffff'),
+ ('OR', '||'),
+ ('ARGUMENT', 'arg0'),
+ ('OP', '&'),
+ ('LPAREN', '('),
+ ('NUMERIC_CONSTANT', '1'),
+ ('BITWISE_OR', '|'),
+ ('NUMERIC_CONSTANT', '2'),
+ ('RPAREN', ')'),
+ ('AND', '&&'),
+ ('ARGUMENT', 'arg0'),
+ ('OP', '=='),
+ ('NUMERIC_CONSTANT', '0o755'),
+ ('SEMICOLON', ';'),
+ ('RETURN', 'return'),
+ ('IDENTIFIER', 'ENOSYS'),
+ ])
+
+ def test_tokenize_invalid_token(self):
+ """Reject tokenizer errors."""
+ with self.assertRaisesRegex(parser.ParseException,
+ (r'<memory>\(1:1\): invalid token\n'
+ r' %invalid-token%\n'
+ r' \^')):
+ TokenizerTests._tokenize('%invalid-token%')
+
+
+class ParseConstantTests(unittest.TestCase):
+ """Tests for PolicyParser.parse_value."""
+
+ def setUp(self):
+ self.arch = ARCH_64
+ self.parser = parser.PolicyParser(self.arch)
+
+ def _tokenize(self, line):
+ # pylint: disable=protected-access
+ self.parser._parser_state.set_line(line)
+ return self.parser._parser_state.tokenize()
+
+ def test_parse_constant_unsigned(self):
+ """Accept reasonably-sized unsigned constants."""
+ self.assertEqual(
+ self.parser.parse_value(self._tokenize('0x80000000')), 0x80000000)
+ if self.arch.bits == 64:
+ self.assertEqual(
+ self.parser.parse_value(self._tokenize('0x8000000000000000')),
+ 0x8000000000000000)
+
+ def test_parse_constant_unsigned_too_big(self):
+ """Reject unreasonably-sized unsigned constants."""
+ if self.arch.bits == 32:
+ with self.assertRaisesRegex(parser.ParseException,
+ 'unsigned overflow'):
+ self.parser.parse_value(self._tokenize('0x100000000'))
+ with self.assertRaisesRegex(parser.ParseException,
+ 'unsigned overflow'):
+ self.parser.parse_value(self._tokenize('0x10000000000000000'))
+
+ def test_parse_constant_signed(self):
+ """Accept reasonably-sized signed constants."""
+ self.assertEqual(
+ self.parser.parse_value(self._tokenize('-1')),
+ self.arch.max_unsigned)
+
+ def test_parse_constant_signed_too_negative(self):
+ """Reject unreasonably-sized signed constants."""
+ if self.arch.bits == 32:
+ with self.assertRaisesRegex(parser.ParseException,
+ 'signed underflow'):
+ self.parser.parse_value(self._tokenize('-0x800000001'))
+ with self.assertRaisesRegex(parser.ParseException, 'signed underflow'):
+ self.parser.parse_value(self._tokenize('-0x8000000000000001'))
+
+ def test_parse_mask(self):
+ """Accept parsing a mask value."""
+ self.assertEqual(
+ self.parser.parse_value(self._tokenize('0x1|0x2|0x4|0x8')), 0xf)
+
+ def test_parse_parenthesized_expressions(self):
+ """Accept parsing parenthesized expressions."""
+ bad_expressions = [
+ '(1',
+ '|(1)',
+ '(1)|',
+ '()',
+ '(',
+ '((',
+ '(()',
+ '(()1',
+ ]
+ for expression in bad_expressions:
+ with self.assertRaises(parser.ParseException, msg=expression):
+ self.parser.parse_value(self._tokenize(expression))
+
+ bad_partial_expressions = [
+ '1)',
+ '(1)1',
+ '1(0)',
+ ]
+ for expression in bad_partial_expressions:
+ tokens = self._tokenize(expression)
+ self.parser.parse_value(tokens)
+ self.assertNotEqual(tokens, [])
+
+ good_expressions = [
+ '(3)',
+ '(1)|2',
+ '1|(2)',
+ '(1)|(2)',
+ '((3))',
+ '0|(1|2)',
+ '(0|1|2)',
+ ]
+ for expression in good_expressions:
+ self.assertEqual(
+ self.parser.parse_value(self._tokenize(expression)), 3)
+
+ def test_parse_constant_complements(self):
+ """Accept complementing constants."""
+ self.assertEqual(
+ self.parser.parse_value(self._tokenize('~0')),
+ self.arch.max_unsigned)
+ self.assertEqual(
+ self.parser.parse_value(self._tokenize('~0|~0')),
+ self.arch.max_unsigned)
+ if self.arch.bits == 32:
+ self.assertEqual(
+ self.parser.parse_value(
+ self._tokenize('~0x005AF0FF|~0xFFA50FFF')), 0xFFFFFF00)
+ self.assertEqual(
+ self.parser.parse_value(
+ self._tokenize('0x0F|~(0x005AF000|0x00A50FFF)|0xF0')),
+ 0xFF0000FF)
+ else:
+ self.assertEqual(
+ self.parser.parse_value(
+ self._tokenize('~0x00005A5AF0F0FFFF|~0xFFFFA5A50F0FFFFF')),
+ 0xFFFFFFFFFFFF0000)
+ self.assertEqual(
+ self.parser.parse_value(
+ self._tokenize(
+ '0x00FF|~(0x00005A5AF0F00000|0x0000A5A50F0FFFFF)|0xFF00'
+ )), 0xFFFF00000000FFFF)
+
+ def test_parse_double_complement(self):
+ """Reject double-complementing constants."""
+ with self.assertRaisesRegex(parser.ParseException,
+ 'double complement'):
+ self.parser.parse_value(self._tokenize('~~0'))
+
+ def test_parse_empty_complement(self):
+ """Reject complementing nothing."""
+ with self.assertRaisesRegex(parser.ParseException, 'empty complement'):
+ self.parser.parse_value(self._tokenize('0|~'))
+
+ def test_parse_named_constant(self):
+ """Accept parsing a named constant."""
+ self.assertEqual(
+ self.parser.parse_value(self._tokenize('O_RDONLY')), 0)
+
+ def test_parse_empty_constant(self):
+ """Reject parsing nothing."""
+ with self.assertRaisesRegex(parser.ParseException, 'empty constant'):
+ self.parser.parse_value(self._tokenize(''))
+ with self.assertRaisesRegex(parser.ParseException, 'empty constant'):
+ self.parser.parse_value(self._tokenize('0|'))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tools/testdata/arch_64.json b/tools/testdata/arch_64.json
new file mode 100644
index 0000000..c23f988
--- /dev/null
+++ b/tools/testdata/arch_64.json
@@ -0,0 +1,14 @@
+{
+ "arch_nr": 3735928559,
+ "arch_name": "test",
+ "bits": 64,
+ "syscalls": {
+ "read": 0,
+ "write": 1
+ },
+ "constants": {
+ "O_RDONLY": 0,
+ "PROT_WRITE": 2,
+ "PROT_EXEC": 4
+ }
+}