Integrate BPF seccomp_filters to Minijail.
BUG=chromium-os:25429
BUG=chromium-os:27878
TEST=security_Minijail_seccomp
CQ-DEPEND=I13a9b22ac8d55f02d5a77b5beedb955386b63723
Change-Id: I5fa8f40b9a539a61d69439cad778c926fc934cb1
Reviewed-on: https://gerrit.chromium.org/gerrit/19527
Tested-by: Jorge Lucangeli Obes <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Commit-Ready: Jorge Lucangeli Obes <[email protected]>
diff --git a/Makefile b/Makefile
index 8cc4da1..e2ba1e1 100644
--- a/Makefile
+++ b/Makefile
@@ -11,10 +11,10 @@
tests : libminijail_unittest.wrapper syscall_filter_unittest
-minijail0 : libsyscalls.gen.o libminijail.o minijail0.c
+minijail0 : libsyscalls.gen.o libminijail.o syscall_filter.o bpf.o minijail0.c
$(CC) $(CFLAGS) -o $@ $^ -lcap
-libminijail.so : libminijail.o libsyscalls.gen.o
+libminijail.so : libminijail.o syscall_filter.o bpf.o libsyscalls.gen.o
$(CC) $(CFLAGS) -shared -o $@ $^ -lcap
# Allow unittests to access what are normally internal symbols.
@@ -24,10 +24,12 @@
$(MAKE) $(MAKEARGS) test-clean
libminijail_unittest : CFLAGS := $(filter-out -fvisibility=%,$(CFLAGS))
-libminijail_unittest : libminijail_unittest.o libminijail.o libsyscalls.gen.o
+libminijail_unittest : libminijail_unittest.o libminijail.o \
+ syscall_filter.o bpf.o libsyscalls.gen.o
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(filter-out $(CFLAGS_FILE),$^) -lcap
-libminijailpreload.so : libminijailpreload.c libsyscalls.gen.o libminijail.o
+libminijailpreload.so : libminijailpreload.c libminijail.o libsyscalls.gen.o \
+ syscall_filter.o bpf.o
$(CC) $(CFLAGS) -shared -o $@ $^ -ldl -lcap
libminijail.o : libminijail.c libminijail.h
@@ -44,9 +46,9 @@
syscall_filter_unittest.o : syscall_filter_unittest.c test_harness.h
$(CC) $(CFLAGS) -c -o $@ $<
-syscall_filter.o : syscall_filter.c
+syscall_filter.o : syscall_filter.c syscall_filter.h
-bpf.o : bpf.c
+bpf.o : bpf.c bpf.h
# sed expression which extracts system calls that are
# defined via asm/unistd.h. It converts them from:
diff --git a/libminijail-private.h b/libminijail-private.h
index 62d2625..eafcdf5 100644
--- a/libminijail-private.h
+++ b/libminijail-private.h
@@ -18,8 +18,6 @@
static const char *kFdEnvVar = "__MINIJAIL_FD";
static const char *kLdPreloadEnvVar = "LD_PRELOAD";
-#define MINIJAIL_MAX_SECCOMP_FILTER_LINE 512
-
struct minijail;
/* minijail_size: returns the size (in bytes) of @j if marshalled
diff --git a/libminijail.c b/libminijail.c
index 0080c49..78cfbc1 100644
--- a/libminijail.c
+++ b/libminijail.c
@@ -6,6 +6,7 @@
#define _BSD_SOURCE
#define _GNU_SOURCE
+#include <asm/unistd.h>
#include <ctype.h>
#include <errno.h>
#include <grp.h>
@@ -17,6 +18,7 @@
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
+#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -25,6 +27,7 @@
#include <sys/mount.h>
#include <sys/param.h>
#include <sys/prctl.h>
+#include <sys/user.h>
#include <sys/wait.h>
#include <syslog.h>
#include <unistd.h>
@@ -33,13 +36,19 @@
#include "libsyscalls.h"
#include "libminijail-private.h"
+#include "syscall_filter.h"
+
/* Until these are reliably available in linux/prctl.h */
-#ifndef PR_SET_SECCOMP_FILTER
-# define PR_SECCOMP_FILTER_SYSCALL 0
-# define PR_SECCOMP_FILTER_EVENT 1
-# define PR_GET_SECCOMP_FILTER 35
-# define PR_SET_SECCOMP_FILTER 36
-# define PR_CLEAR_SECCOMP_FILTER 37
+#ifndef PR_SET_SECCOMP
+# define PR_SET_SECCOMP 22
+#endif
+
+/* For seccomp_filter using BPF. */
+#ifndef PR_SET_NO_NEW_PRIVS
+# define PR_SET_NO_NEW_PRIVS 38
+#endif
+#ifndef SECCOMP_MODE_FILTER
+# define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
#endif
#define die(_msg, ...) do { \
@@ -53,12 +62,6 @@
#define warn(_msg, ...) \
syslog(LOG_WARNING, "libminijail: " _msg, ## __VA_ARGS__)
-struct seccomp_filter {
- int nr;
- char *filter;
- struct seccomp_filter *next, *prev;
-};
-
struct binding {
char *src;
char *dest;
@@ -86,10 +89,10 @@
char *user;
uint64_t caps;
pid_t initpid;
- int filter_count;
+ int filter_len;
int binding_count;
char *chrootdir;
- struct seccomp_filter *filters;
+ struct sock_fprog *filter_prog;
struct binding *bindings_head;
struct binding *bindings_tail;
};
@@ -279,106 +282,21 @@
return -ENOMEM;
}
-int API minijail_add_seccomp_filter(struct minijail *j, int nr,
- const char *filter)
-{
- struct seccomp_filter *sf;
- if (!filter || nr < 0)
- return -EINVAL;
-
- sf = malloc(sizeof(*sf));
- if (!sf)
- return -ENOMEM;
- sf->nr = nr;
- sf->filter = strndup(filter, MINIJAIL_MAX_SECCOMP_FILTER_LINE);
- if (!sf->filter) {
- free(sf);
- return -ENOMEM;
- }
-
- j->filter_count++;
-
- if (!j->filters) {
- j->filters = sf;
- sf->next = sf;
- sf->prev = sf;
- return 0;
- }
- sf->next = j->filters;
- sf->prev = j->filters->prev;
- sf->prev->next = sf;
- j->filters->prev = sf;
- return 0;
-}
-
-int API minijail_lookup_syscall(const char *name)
-{
- const struct syscall_entry *entry = syscall_table;
- for (; entry->name && entry->nr >= 0; ++entry)
- if (!strcmp(entry->name, name))
- return entry->nr;
- return -1;
-}
-
-char *strip(char *s)
-{
- char *end;
- while (*s && isblank(*s))
- s++;
- end = s + strlen(s) - 1;
- while (*end && (isblank(*end) || *end == '\n'))
- end--;
- *(end + 1) = '\0';
- return s;
-}
-
void API minijail_parse_seccomp_filters(struct minijail *j, const char *path)
{
FILE *file = fopen(path, "r");
- char line[MINIJAIL_MAX_SECCOMP_FILTER_LINE];
- int count = 0;
- if (!file)
- pdie("failed to open seccomp filters file");
-
- /*
- * Format is simple:
- * syscall_name<COLON><FILTER STRING>[\n|EOF]
- * #...comment...
- * <empty line?
- */
- while (fgets(line, sizeof(line), file)) {
- char *filter = line;
- char *name = strsep(&filter, ":");
- char *name_end = NULL;
- int nr = -1;
- count++;
-
- /* Allow comment lines */
- if (*name == '#')
- continue;
-
- name = strip(name);
-
- if (!filter) {
- if (strlen(name))
- die("invalid filter on line %d", count);
- /* Allow empty lines */
- continue;
- }
-
- filter = strip(filter);
-
- /* Take direct syscall numbers */
- nr = strtol(name, &name_end, 0);
- /* Or fail-over to using names */
- if (*name_end != '\0')
- nr = minijail_lookup_syscall(name);
- if (nr < 0)
- die("syscall '%s' unknown", name);
-
- if (minijail_add_seccomp_filter(j, nr, filter))
- pdie("failed to add filter for syscall '%s'", name);
+ if (!file) {
+ pdie("failed to open seccomp filters file '%s'", path);
}
+
+ struct sock_fprog *fprog = malloc(sizeof(struct sock_fprog));
+ if (compile_filter(file, fprog)) {
+ die("failed to compile seccomp filters BPF program in '%s'", path);
+ }
+
+ j->filter_len = fprog->len;
+ j->filter_prog = fprog;
+
fclose(file);
}
@@ -420,13 +338,10 @@
marshal_append(state, j->user, strlen(j->user) + 1);
if (j->chrootdir)
marshal_append(state, j->chrootdir, strlen(j->chrootdir) + 1);
- if (j->flags.seccomp_filter && j->filters) {
- struct seccomp_filter *f = j->filters;
- do {
- marshal_append(state, (char *)&f->nr, sizeof(f->nr));
- marshal_append(state, f->filter, strlen(f->filter) + 1);
- f = f->next;
- } while (f != j->filters);
+ if (j->flags.seccomp_filter && j->filter_prog) {
+ struct sock_fprog *fp = j->filter_prog;
+ marshal_append(state, (char *)fp->filter,
+ fp->len * sizeof(struct sock_filter));
}
for (b = j->bindings_head; b; b = b->next) {
marshal_append(state, b->src, strlen(b->src) + 1);
@@ -496,7 +411,7 @@
/* Potentially stale pointers not used as signals. */
j->bindings_head = NULL;
j->bindings_tail = NULL;
- j->filters = NULL;
+ j->filter_prog = NULL;
if (j->user) { /* stale pointer */
char *user = consumestr(&serialized, &length);
@@ -516,22 +431,21 @@
goto bad_chrootdir;
}
- if (j->flags.seccomp_filter && j->filter_count) {
- count = j->filter_count;
- /* Let add_seccomp_filter recompute the value. */
- j->filter_count = 0;
- for (; count > 0; --count) {
- int *nr = (int *)consumebytes(sizeof(*nr), &serialized,
- &length);
- char *filter;
- if (!nr)
- goto bad_filters;
- filter = consumestr(&serialized, &length);
- if (!filter)
- goto bad_filters;
- if (minijail_add_seccomp_filter(j, *nr, filter))
- goto bad_filters;
- }
+ if (j->flags.seccomp_filter && j->filter_len > 0) {
+ size_t ninstrs = j->filter_len;
+ if (ninstrs > (SIZE_MAX / sizeof(struct sock_filter)) ||
+ ninstrs > USHRT_MAX)
+ goto bad_filters;
+
+ size_t program_len = ninstrs * sizeof(struct sock_filter);
+ void *program = consumebytes(program_len, &serialized, &length);
+ if (!program)
+ goto bad_filters;
+
+ j->filter_prog = malloc(sizeof(struct sock_fprog));
+ j->filter_prog->len = ninstrs;
+ j->filter_prog->filter = malloc(program_len);
+ memcpy(j->filter_prog->filter, program, program_len);
}
count = j->binding_count;
@@ -555,6 +469,10 @@
return 0;
bad_bindings:
+ if (j->flags.seccomp_filter && j->filter_len > 0) {
+ free(j->filter_prog->filter);
+ free(j->filter_prog);
+ }
bad_filters:
if (j->chrootdir)
free(j->chrootdir);
@@ -686,62 +604,12 @@
}
}
-int setup_seccomp_filters(const struct minijail *j)
-{
- const struct seccomp_filter *sf = j->filters;
- int ret = 0;
- int broaden = 0;
-
- /* No filters installed isn't necessarily an error. */
- if (!sf)
- return ret;
-
- do {
- errno = 0;
- ret = prctl(PR_SET_SECCOMP_FILTER, PR_SECCOMP_FILTER_SYSCALL,
- sf->nr, broaden ? "1" : sf->filter);
- if (ret) {
- switch (errno) {
- case ENOSYS:
- /* TODO(wad) make this a config option */
- if (broaden)
- die("CONFIG_SECCOMP_FILTER is not"
- "supported by your kernel");
- warn("missing CONFIG_FTRACE_SYSCALLS; relaxing"
- "the filter for %d", sf->nr);
- broaden = 1;
- continue;
- case E2BIG:
- warn("seccomp filter too long: %d", sf->nr);
- pdie("filter too long");
- case ENOSPC:
- pdie("too many seccomp filters");
- case EPERM:
- warn("syscall filter disallowed for %d",
- sf->nr);
- pdie("failed to install seccomp filter");
- case EINVAL:
- warn("seccomp filter or call method is"
- " invalid. %d:'%s'", sf->nr, sf->filter);
- default:
- pdie("failed to install seccomp filter");
- }
- }
- sf = sf->next;
- broaden = 0;
- } while (sf != j->filters);
- return ret;
-}
-
void API minijail_enter(const struct minijail *j)
{
if (j->flags.pids)
die("tried to enter a pid-namespaced jail;"
"try minijail_run()?");
- if (j->flags.seccomp_filter && setup_seccomp_filters(j))
- pdie("failed to configure seccomp filters");
-
if (j->flags.usergroups && !j->user)
die("usergroup inheritance without username");
@@ -796,8 +664,15 @@
* seccomp has to come last since it cuts off all the other
* privilege-dropping syscalls :)
*/
- if (j->flags.seccomp_filter && prctl(PR_SET_SECCOMP, 13))
- pdie("prctl(PR_SET_SECCOMP, 13)");
+ if (j->flags.seccomp_filter) {
+ /* TODO(jorgelo): document call to PR_SET_NO_NEW_PRIVS. */
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ pdie("prctl(PR_SET_NO_NEW_PRIVS)");
+ }
+ if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, j->filter_prog)) {
+ pdie("prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER)");
+ }
+ }
if (j->flags.seccomp && prctl(PR_SET_SECCOMP, 1))
pdie("prctl(PR_SET_SECCOMP)");
@@ -1028,15 +903,9 @@
void API minijail_destroy(struct minijail *j)
{
- struct seccomp_filter *f = j->filters;
- /* Unlink the tail and head */
- if (f)
- f->prev->next = NULL;
- while (f) {
- struct seccomp_filter *next = f->next;
- free(f->filter);
- free(f);
- f = next;
+ if (j->flags.seccomp_filter && j->filter_prog) {
+ free(j->filter_prog->filter);
+ free(j->filter_prog);
}
while (j->bindings_head) {
struct binding *b = j->bindings_head;
diff --git a/libminijail.h b/libminijail.h
index 5753196..2f4ed7e 100644
--- a/libminijail.h
+++ b/libminijail.h
@@ -48,6 +48,8 @@
void minijail_parse_seccomp_filters(struct minijail *j, const char *path);
int minijail_add_seccomp_filter(struct minijail *j, int nr,
const char *filter);
+void minijail_use_seccomp_bpf(struct minijail *j);
+void minijail_parse_seccomp_bpf(struct minijail *j, const char *path);
void minijail_use_caps(struct minijail *j, uint64_t capmask);
void minijail_namespace_vfs(struct minijail *j);
/* Implies namespace_vfs and remount_readonly */
diff --git a/libsyscalls.h b/libsyscalls.h
index d31ec8f..a2eb43c 100644
--- a/libsyscalls.h
+++ b/libsyscalls.h
@@ -12,4 +12,5 @@
};
extern const struct syscall_entry syscall_table[];
+
#endif /* MINIJAIL_LIBSYSCALLS_H_ */
diff --git a/syscall_filter.h b/syscall_filter.h
index fb68018..18ea415 100644
--- a/syscall_filter.h
+++ b/syscall_filter.h
@@ -22,6 +22,8 @@
size_t total_len;
};
+struct bpf_labels;
+
struct filter_block *compile_section(int nr, const char *policy_line,
unsigned int label_id, struct bpf_labels *labels);
int compile_filter(FILE *policy, struct sock_fprog *prog);