blob: f239740eb344f772716b8bd29aa0be3d206f21e0 [file] [log] [blame]
Luis Hector Chavez05392b82018-10-28 21:40:10 -07001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A BPF compiler for the Minijail policy file."""
18
19from __future__ import print_function
20
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -080021import enum
22
Matt Delcoa12687b2020-02-07 17:12:47 -080023try:
24 import bpf
25 import parser # pylint: disable=wrong-import-order
26except ImportError:
27 from minijail import bpf
28 from minijail import parser # pylint: disable=wrong-import-order
Luis Hector Chavez05392b82018-10-28 21:40:10 -070029
30
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -080031class OptimizationStrategy(enum.Enum):
32 """The available optimization strategies."""
33
34 # Generate a linear chain of syscall number checks. Works best for policies
35 # with very few syscalls.
36 LINEAR = 'linear'
37
38 # Generate a binary search tree for the syscalls. Works best for policies
39 # with a lot of syscalls, where no one syscall dominates.
40 BST = 'bst'
41
42 def __str__(self):
43 return self.value
44
45
Luis Hector Chavez05392b82018-10-28 21:40:10 -070046class SyscallPolicyEntry:
47 """The parsed version of a seccomp policy line."""
48
49 def __init__(self, name, number, frequency):
50 self.name = name
51 self.number = number
52 self.frequency = frequency
53 self.accumulated = 0
54 self.filter = None
55
56 def __repr__(self):
57 return ('SyscallPolicyEntry<name: %s, number: %d, '
Luis Hector Chavez891d3552019-03-09 18:46:53 -080058 'frequency: %d, filter: %r>') % (
59 self.name, self.number, self.frequency,
60 self.filter.instructions if self.filter else None)
Luis Hector Chavez05392b82018-10-28 21:40:10 -070061
62 def simulate(self, arch, syscall_number, *args):
63 """Simulate the policy with the given arguments."""
64 if not self.filter:
65 return (0, 'ALLOW')
66 return bpf.simulate(self.filter.instructions, arch, syscall_number,
67 *args)
68
69
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -070070class SyscallPolicyRange:
71 """A contiguous range of SyscallPolicyEntries that have the same action."""
72
73 def __init__(self, *entries):
74 self.numbers = (entries[0].number, entries[-1].number + 1)
75 self.frequency = sum(e.frequency for e in entries)
76 self.accumulated = 0
77 self.filter = entries[0].filter
78
79 def __repr__(self):
80 return 'SyscallPolicyRange<numbers: %r, frequency: %d, filter: %r>' % (
Luis Hector Chavez891d3552019-03-09 18:46:53 -080081 self.numbers, self.frequency,
82 self.filter.instructions if self.filter else None)
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -070083
84 def simulate(self, arch, syscall_number, *args):
85 """Simulate the policy with the given arguments."""
86 if not self.filter:
87 return (0, 'ALLOW')
88 return self.filter.simulate(arch, syscall_number, *args)
89
90
91def _convert_to_ranges(entries):
92 entries = list(sorted(entries, key=lambda r: r.number))
93 lower = 0
94 while lower < len(entries):
95 upper = lower + 1
96 while upper < len(entries):
97 if entries[upper - 1].filter != entries[upper].filter:
98 break
99 if entries[upper - 1].number + 1 != entries[upper].number:
100 break
101 upper += 1
102 yield SyscallPolicyRange(*entries[lower:upper])
103 lower = upper
104
105
106def _compile_single_range(entry,
107 accept_action,
108 reject_action,
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700109 lower_bound=0,
110 upper_bound=1e99):
111 action = accept_action
112 if entry.filter:
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700113 action = entry.filter
114 if entry.numbers[1] - entry.numbers[0] == 1:
115 # Single syscall.
116 # Accept if |X == nr|.
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700117 return (1,
118 bpf.SyscallEntry(
119 entry.numbers[0], action, reject_action, op=bpf.BPF_JEQ))
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700120 elif entry.numbers[0] == lower_bound:
121 # Syscall range aligned with the lower bound.
122 # Accept if |X < nr[1]|.
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700123 return (1,
124 bpf.SyscallEntry(
125 entry.numbers[1], reject_action, action, op=bpf.BPF_JGE))
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700126 elif entry.numbers[1] == upper_bound:
127 # Syscall range aligned with the upper bound.
128 # Accept if |X >= nr[0]|.
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700129 return (1,
130 bpf.SyscallEntry(
131 entry.numbers[0], action, reject_action, op=bpf.BPF_JGE))
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700132 # Syscall range in the middle.
133 # Accept if |nr[0] <= X < nr[1]|.
134 upper_entry = bpf.SyscallEntry(
135 entry.numbers[1], reject_action, action, op=bpf.BPF_JGE)
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700136 return (2,
137 bpf.SyscallEntry(
138 entry.numbers[0], upper_entry, reject_action, op=bpf.BPF_JGE))
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700139
140
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700141def _compile_ranges_linear(ranges, accept_action, reject_action):
142 # Compiles the list of ranges into a simple linear list of comparisons. In
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800143 # order to make the generated code a bit more efficient, we sort the
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700144 # ranges by frequency, so that the most frequently-called syscalls appear
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800145 # earlier in the chain.
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700146 cost = 0
147 accumulated_frequencies = 0
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800148 next_action = reject_action
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700149 for entry in sorted(ranges, key=lambda r: r.frequency):
150 current_cost, next_action = _compile_single_range(
151 entry, accept_action, next_action)
152 accumulated_frequencies += entry.frequency
153 cost += accumulated_frequencies * current_cost
154 return (cost, next_action)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800155
156
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700157def _compile_entries_linear(entries, accept_action, reject_action):
158 return _compile_ranges_linear(
159 _convert_to_ranges(entries), accept_action, reject_action)[1]
160
161
162def _compile_entries_bst(entries, accept_action, reject_action):
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800163 # Instead of generating a linear list of comparisons, this method generates
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700164 # a binary search tree, where some of the leaves can be linear chains of
165 # comparisons.
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800166 #
167 # Even though we are going to perform a binary search over the syscall
168 # number, we would still like to rotate some of the internal nodes of the
169 # binary search tree so that more frequently-used syscalls can be accessed
170 # more cheaply (i.e. fewer internal nodes need to be traversed to reach
171 # them).
172 #
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700173 # This uses Dynamic Programming to generate all possible BSTs efficiently
174 # (in O(n^3)) so that we can get the absolute minimum-cost tree that matches
175 # all syscall entries. It does so by considering all of the O(n^2) possible
176 # sub-intervals, and for each one of those try all of the O(n) partitions of
177 # that sub-interval. At each step, it considers putting the remaining
178 # entries in a linear comparison chain as well as another BST, and chooses
179 # the option that minimizes the total overall cost.
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800180 #
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700181 # Between every pair of non-contiguous allowed syscalls, there are two
182 # locally optimal options as to where to set the partition for the
183 # subsequent ranges: aligned to the end of the left subrange or to the
184 # beginning of the right subrange. The fact that these two options have
185 # slightly different costs, combined with the possibility of a subtree to
186 # use the linear chain strategy (which has a completely different cost
187 # model), causes the target cost function that we are trying to optimize to
188 # not be unimodal / convex. This unfortunately means that more clever
189 # techniques like using ternary search (which would reduce the overall
190 # complexity to O(n^2 log n)) do not work in all cases.
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700191 ranges = list(_convert_to_ranges(entries))
192
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800193 accumulated = 0
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700194 for entry in ranges:
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800195 accumulated += entry.frequency
196 entry.accumulated = accumulated
197
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700198 # Memoization cache to build the DP table top-down, which is easier to
199 # understand.
200 memoized_costs = {}
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800201
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700202 def _generate_syscall_bst(ranges, indices, bounds=(0, 2**64 - 1)):
203 assert bounds[0] <= ranges[indices[0]].numbers[0], (indices, bounds)
204 assert ranges[indices[1] - 1].numbers[1] <= bounds[1], (indices,
205 bounds)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800206
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700207 if bounds in memoized_costs:
208 return memoized_costs[bounds]
209 if indices[1] - indices[0] == 1:
210 if bounds == ranges[indices[0]].numbers:
211 # If bounds are tight around the syscall, it costs nothing.
212 memoized_costs[bounds] = (0, ranges[indices[0]].filter
213 or accept_action)
214 return memoized_costs[bounds]
215 result = _compile_single_range(ranges[indices[0]], accept_action,
216 reject_action)
217 memoized_costs[bounds] = (result[0] * ranges[indices[0]].frequency,
218 result[1])
219 return memoized_costs[bounds]
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700220
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700221 # Try the linear model first and use that as the best estimate so far.
222 best_cost = _compile_ranges_linear(ranges[slice(*indices)],
223 accept_action, reject_action)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800224
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700225 # Now recursively go through all possible partitions of the interval
226 # currently being considered.
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800227 previous_accumulated = ranges[indices[0]].accumulated - ranges[
228 indices[0]].frequency
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700229 bst_comparison_cost = (
230 ranges[indices[1] - 1].accumulated - previous_accumulated)
231 for i, entry in enumerate(ranges[slice(*indices)]):
232 candidates = [entry.numbers[0]]
233 if i:
234 candidates.append(ranges[i - 1 + indices[0]].numbers[1])
235 for cutoff_bound in candidates:
236 if not bounds[0] < cutoff_bound < bounds[1]:
237 continue
238 if not indices[0] < i + indices[0] < indices[1]:
239 continue
240 left_subtree = _generate_syscall_bst(
241 ranges, (indices[0], i + indices[0]),
242 (bounds[0], cutoff_bound))
243 right_subtree = _generate_syscall_bst(
244 ranges, (i + indices[0], indices[1]),
245 (cutoff_bound, bounds[1]))
246 best_cost = min(
247 best_cost,
248 (bst_comparison_cost + left_subtree[0] + right_subtree[0],
249 bpf.SyscallEntry(
250 cutoff_bound,
251 right_subtree[1],
252 left_subtree[1],
253 op=bpf.BPF_JGE)))
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800254
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700255 memoized_costs[bounds] = best_cost
256 return memoized_costs[bounds]
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800257
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700258 return _generate_syscall_bst(ranges, (0, len(ranges)))[1]
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800259
260
Luis Hector Chavez05392b82018-10-28 21:40:10 -0700261class PolicyCompiler:
262 """A parser for the Minijail seccomp policy file format."""
263
264 def __init__(self, arch):
265 self._arch = arch
266
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800267 def compile_file(self,
268 policy_filename,
269 *,
270 optimization_strategy,
271 kill_action,
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800272 include_depth_limit=10,
Nicole Anderson-Auceb80002021-05-27 21:43:46 +0000273 override_default_action=None,
Nicole Anderson-Au60f60e22021-09-14 19:56:45 +0000274 denylist=False,
275 ret_log=False):
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800276 """Return a compiled BPF program from the provided policy file."""
277 policy_parser = parser.PolicyParser(
278 self._arch,
279 kill_action=kill_action,
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800280 include_depth_limit=include_depth_limit,
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000281 override_default_action=override_default_action,
Nicole Anderson-Au60f60e22021-09-14 19:56:45 +0000282 denylist=denylist,
283 ret_log=ret_log)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800284 parsed_policy = policy_parser.parse_file(policy_filename)
285 entries = [
286 self.compile_filter_statement(
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000287 filter_statement, kill_action=kill_action, denylist=denylist)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800288 for filter_statement in parsed_policy.filter_statements
289 ]
290
291 visitor = bpf.FlatteningVisitor(
292 arch=self._arch, kill_action=kill_action)
Nicole Anderson-Auceb80002021-05-27 21:43:46 +0000293 if denylist:
Nicole Anderson-Au60f60e22021-09-14 19:56:45 +0000294 accept_action = kill_action
Nicole Anderson-Auceb80002021-05-27 21:43:46 +0000295 reject_action = bpf.Allow()
296 else:
297 accept_action = bpf.Allow()
298 reject_action = parsed_policy.default_action
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800299 if entries:
300 if optimization_strategy == OptimizationStrategy.BST:
301 next_action = _compile_entries_bst(entries, accept_action,
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700302 reject_action)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800303 else:
304 next_action = _compile_entries_linear(entries, accept_action,
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700305 reject_action)
306 next_action.accept(bpf.ArgFilterForwardingVisitor(visitor))
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800307 reject_action.accept(visitor)
308 accept_action.accept(visitor)
309 bpf.ValidateArch(next_action).accept(visitor)
310 else:
311 reject_action.accept(visitor)
312 bpf.ValidateArch(reject_action).accept(visitor)
313 return visitor.result
314
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000315 def compile_filter_statement(self,
316 filter_statement,
317 *,
318 kill_action,
319 denylist=False):
Luis Hector Chavez05392b82018-10-28 21:40:10 -0700320 """Compile one parser.FilterStatement into BPF."""
321 policy_entry = SyscallPolicyEntry(filter_statement.syscall.name,
322 filter_statement.syscall.number,
323 filter_statement.frequency)
324 # In each step of the way, the false action is the one that is taken if
325 # the immediate boolean condition does not match. This means that the
326 # false action taken here is the one that applies if the whole
327 # expression fails to match.
328 false_action = filter_statement.filters[-1].action
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000329 if not denylist and false_action == bpf.Allow():
Luis Hector Chavez05392b82018-10-28 21:40:10 -0700330 return policy_entry
331 # We then traverse the list of filters backwards since we want
332 # the root of the DAG to be the very first boolean operation in
333 # the filter chain.
334 for filt in filter_statement.filters[:-1][::-1]:
335 for disjunction in filt.expression:
336 # This is the jump target of the very last comparison in the
337 # conjunction. Given that any conjunction that succeeds should
338 # make the whole expression succeed, make the very last
339 # comparison jump to the accept action if it succeeds.
340 true_action = filt.action
341 for atom in disjunction:
342 block = bpf.Atom(atom.argument_index, atom.op, atom.value,
343 true_action, false_action)
344 true_action = block
345 false_action = true_action
346 policy_filter = false_action
347
348 # Lower all Atoms into WideAtoms.
349 lowering_visitor = bpf.LoweringVisitor(arch=self._arch)
350 policy_filter = lowering_visitor.process(policy_filter)
351
352 # Flatten the IR DAG into a single BasicBlock.
353 flattening_visitor = bpf.FlatteningVisitor(
354 arch=self._arch, kill_action=kill_action)
355 policy_filter.accept(flattening_visitor)
356 policy_entry.filter = flattening_visitor.result
357 return policy_entry