Add minijail_run_fd_env_pid_pipes with support for fexecve.

Bug: 193885716
Test: cros_workon_make --board=${BOARD} --test chromeos-base/minijail
Change-Id: I9943431c2d90545c7f699540b42b786fa629b73f
diff --git a/libminijail.c b/libminijail.c
index ab400de..b935dfd 100644
--- a/libminijail.c
+++ b/libminijail.c
@@ -2624,7 +2624,7 @@
 	      "If true, ensure_no_fd_conflict will always find an unused fd.");
 
 /* If parent_fd will be used by a child fd, move it to an unused fd. */
-static int ensure_no_fd_conflict(const fd_set* child_fds,
+static int ensure_no_fd_conflict(const fd_set *child_fds,
 				 int child_fd, int *parent_fd)
 {
 	if (!FD_ISSET(*parent_fd, child_fds)) {
@@ -2663,6 +2663,32 @@
 }
 
 /*
+ * Populate child_fds_out with the set of file descriptors that will be replaced
+ * by redirect_fds().
+ *
+ * NOTE: This creates temporaries for parent file descriptors that would
+ * otherwise be overwritten during redirect_fds().
+ */
+static int get_child_fds(struct minijail *j, fd_set *child_fds_out) {
+	/* Relocate parent_fds that would be replaced by a child_fd. */
+	for (size_t i = 0; i < j->preserved_fd_count; i++) {
+		int child_fd = j->preserved_fds[i].child_fd;
+		if (FD_ISSET(child_fd, child_fds_out)) {
+			die("fd %d is mapped more than once", child_fd);
+		}
+
+		int *parent_fd = &j->preserved_fds[i].parent_fd;
+		if (ensure_no_fd_conflict(child_fds_out, child_fd,
+					  parent_fd) == -1) {
+			return -1;
+		}
+
+		FD_SET(child_fd, child_fds_out);
+	}
+	return 0;
+}
+
+/*
  * Structure holding resources and state created when running a minijail.
  */
 struct minijail_run_state {
@@ -2675,28 +2701,11 @@
 	char **child_env;
 };
 
-static int redirect_fds(struct minijail *j, struct minijail_run_state *state)
-{
-	fd_set child_fds;
-	FD_ZERO(&child_fds);
-
-	/* Relocate parent_fds that would be replaced by a child_fd. */
-	for (size_t i = 0; i < j->preserved_fd_count; i++) {
-		int child_fd = j->preserved_fds[i].child_fd;
-		if (FD_ISSET(child_fd, &child_fds)) {
-			die("fd %d is mapped more than once", child_fd);
-		}
-
-		int *parent_fd = &j->preserved_fds[i].parent_fd;
-		if (ensure_no_fd_conflict(&child_fds, child_fd,
-					  parent_fd) == -1) {
-			return -1;
-		}
-
-		FD_SET(child_fd, &child_fds);
-	}
-
-	/* Move pipe_fds if they conflict with a child_fd. */
+/*
+ * Move pipe_fds if they conflict with a child_fd.
+ */
+static int avoid_pipe_conflicts(struct minijail_run_state *state,
+				fd_set *child_fds_out) {
 	int *pipe_fds[] = {
 	    state->pipe_fds,   state->child_sync_pipe_fds,
 	    state->stdin_fds,  state->stdout_fds,
@@ -2704,18 +2713,26 @@
 	};
 	for (size_t i = 0; i < ARRAY_SIZE(pipe_fds); ++i) {
 		if (pipe_fds[i][0] != -1 &&
-		    ensure_no_fd_conflict(&child_fds, -1,
+		    ensure_no_fd_conflict(child_fds_out, -1,
 					  &pipe_fds[i][0]) == -1) {
 			return -1;
 		}
 		if (pipe_fds[i][1] != -1 &&
-		    ensure_no_fd_conflict(&child_fds, -1,
+		    ensure_no_fd_conflict(child_fds_out, -1,
 					  &pipe_fds[i][1]) == -1) {
 			return -1;
 		}
 	}
+	return 0;
+}
 
-	/* Perform redirection. */
+/*
+ * Redirect j->preserved_fds from the parent_fd to the child_fd.
+ *
+ * NOTE: This will clear FD_CLOEXEC since otherwise the child_fd would not be
+ * inherited after the exec call.
+ */
+static int redirect_fds(struct minijail *j, fd_set *child_fds) {
 	for (size_t i = 0; i < j->preserved_fd_count; i++) {
 		if (j->preserved_fds[i].parent_fd ==
 		    j->preserved_fds[i].child_fd) {
@@ -2750,7 +2767,7 @@
 	 */
 	for (size_t i = 0; i < j->preserved_fd_count; i++) {
 		int parent_fd = j->preserved_fds[i].parent_fd;
-		if (!FD_ISSET(parent_fd, &child_fds)) {
+		if (!FD_ISSET(parent_fd, child_fds)) {
 			close(parent_fd);
 		}
 	}
@@ -2821,7 +2838,9 @@
 /*
  * Structure that specifies how to start a minijail.
  *
- * filename - The program to exec in the child. Required if |exec_in_child| = 1.
+ * filename - The program to exec in the child. Should be NULL if elf_fd is set.
+ * elf_fd - A fd to be used with fexecve. Should be -1 if filename is set.
+ *   NOTE: either filename or elf_fd is required if |exec_in_child| = 1.
  * argv - Arguments for the child program. Required if |exec_in_child| = 1.
  * envp - Environment for the child program. Available if |exec_in_child| = 1.
  * use_preload - If true use LD_PRELOAD.
@@ -2834,6 +2853,7 @@
  */
 struct minijail_run_config {
 	const char *filename;
+	int elf_fd;
 	char *const *argv;
 	char *const *envp;
 	int use_preload;
@@ -2853,6 +2873,7 @@
 {
 	struct minijail_run_config config = {
 	    .filename = filename,
+	    .elf_fd = -1,
 	    .argv = argv,
 	    .envp = NULL,
 	    .use_preload = true,
@@ -2866,6 +2887,7 @@
 {
 	struct minijail_run_config config = {
 	    .filename = filename,
+	    .elf_fd = -1,
 	    .argv = argv,
 	    .envp = NULL,
 	    .use_preload = true,
@@ -2880,6 +2902,7 @@
 {
 	struct minijail_run_config config = {
 	    .filename = filename,
+	    .elf_fd = -1,
 	    .argv = argv,
 	    .envp = NULL,
 	    .use_preload = true,
@@ -2895,6 +2918,7 @@
 {
 	struct minijail_run_config config = {
 	    .filename = filename,
+	    .elf_fd = -1,
 	    .argv = argv,
 	    .envp = NULL,
 	    .use_preload = true,
@@ -2914,6 +2938,27 @@
 {
 	struct minijail_run_config config = {
 	    .filename = filename,
+	    .elf_fd = -1,
+	    .argv = argv,
+	    .envp = envp,
+	    .use_preload = true,
+	    .exec_in_child = true,
+	    .pstdin_fd = pstdin_fd,
+	    .pstdout_fd = pstdout_fd,
+	    .pstderr_fd = pstderr_fd,
+	    .pchild_pid = pchild_pid,
+	};
+	return minijail_run_config_internal(j, &config);
+}
+
+int API minijail_run_fd_env_pid_pipes(struct minijail *j, int elf_fd,
+				      char *const argv[], char *const envp[],
+				      pid_t *pchild_pid, int *pstdin_fd,
+				      int *pstdout_fd, int *pstderr_fd)
+{
+	struct minijail_run_config config = {
+	    .filename = NULL,
+	    .elf_fd = elf_fd,
 	    .argv = argv,
 	    .envp = envp,
 	    .use_preload = true,
@@ -2931,6 +2976,7 @@
 {
 	struct minijail_run_config config = {
 	    .filename = filename,
+	    .elf_fd = -1,
 	    .argv = argv,
 	    .envp = NULL,
 	    .use_preload = false,
@@ -2949,6 +2995,7 @@
 {
 	struct minijail_run_config config = {
 	    .filename = filename,
+	    .elf_fd = -1,
 	    .argv = argv,
 	    .envp = NULL,
 	    .use_preload = false,
@@ -2970,6 +3017,7 @@
 {
 	struct minijail_run_config config = {
 	    .filename = filename,
+	    .elf_fd = -1,
 	    .argv = argv,
 	    .envp = envp,
 	    .use_preload = false,
@@ -2984,7 +3032,9 @@
 
 pid_t API minijail_fork(struct minijail *j)
 {
-	struct minijail_run_config config = {};
+	struct minijail_run_config config = {
+	    .elf_fd = -1,
+	};
 	return minijail_run_config_internal(j, &config);
 }
 
@@ -3003,6 +3053,10 @@
 	int do_init = j->flags.do_init && !j->flags.run_as_init;
 	int use_preload = config->use_preload;
 
+	if (config->filename != NULL && config->elf_fd != -1) {
+		die("filename and elf_fd cannot be set at the same time");
+	}
+
 	if (use_preload) {
 		if (j->hooks_head != NULL)
 			die("Minijail hooks are not supported with LD_PRELOAD");
@@ -3225,7 +3279,7 @@
 	}
 
 	if (j->flags.close_open_fds) {
-		const size_t kMaxInheritableFdsSize = 10 + MAX_PRESERVED_FDS;
+		const size_t kMaxInheritableFdsSize = 11 + MAX_PRESERVED_FDS;
 		int inheritable_fds[kMaxInheritableFdsSize];
 		size_t size = 0;
 
@@ -3262,11 +3316,29 @@
 			inheritable_fds[size++] = j->preserved_fds[i].parent_fd;
 		}
 
+		if (config->elf_fd > -1) {
+			inheritable_fds[size++] = config->elf_fd;
+		}
+
 		if (close_open_fds(inheritable_fds, size) < 0)
 			die("failed to close open file descriptors");
 	}
 
-	if (redirect_fds(j, state_out))
+	/* The set of fds will be replaced. */
+	fd_set child_fds;
+	FD_ZERO(&child_fds);
+	if (get_child_fds(j, &child_fds))
+		die("failed to set up fd redirections");
+
+	if (avoid_pipe_conflicts(state_out, &child_fds))
+		die("failed to redirect conflicting pipes");
+
+	/* The elf_fd needs to be mutable so use a stack copy from now on. */
+	int elf_fd = config->elf_fd;
+	if (elf_fd != -1 && ensure_no_fd_conflict(&child_fds, -1, &elf_fd))
+		die("failed to redirect elf_fd");
+
+	if (redirect_fds(j, &child_fds))
 		die("failed to set up fd redirections");
 
 	if (sync_child)
@@ -3356,10 +3428,15 @@
 	 */
 	if (!child_env)
 		child_env = config->envp ? config->envp : environ;
-	execve(config->filename, config->argv, child_env);
+	if (elf_fd > -1) {
+		fexecve(elf_fd, config->argv, child_env);
+		pwarn("fexecve(%d) failed", config->elf_fd);
+	} else {
+		execve(config->filename, config->argv, child_env);
+		pwarn("execve(%s) failed", config->filename);
+	}
 
 	ret = (errno == ENOENT ? MINIJAIL_ERR_NO_COMMAND : MINIJAIL_ERR_NO_ACCESS);
-	pwarn("execve(%s) failed", config->filename);
 	_exit(ret);
 }
 
diff --git a/libminijail.h b/libminijail.h
index cfd42d2..bcfd995 100644
--- a/libminijail.h
+++ b/libminijail.h
@@ -404,6 +404,23 @@
 			       int *pstdout_fd, int *pstderr_fd);
 
 /*
+ * Execute the specified file descriptor in the given minijail,
+ * fexecve(3)-style.
+ * Pass |envp| as the full environment for the child or NULL to inherit.
+ * Update |*pchild_pid| with the pid of the child.
+ * Update |*pstdin_fd| with a fd that allows writing to the child's
+ * standard input.
+ * Update |*pstdout_fd| with a fd that allows reading from the child's
+ * standard output.
+ * Update |*pstderr_fd| with a fd that allows reading from the child's
+ * standard error.
+ */
+int minijail_run_fd_env_pid_pipes(struct minijail *j, int elf_fd,
+			          char *const argv[], char *const envp[],
+				  pid_t *pchild_pid, int *pstdin_fd,
+				  int *pstdout_fd, int *pstderr_fd);
+
+/*
  * Run the specified command in the given minijail, execve(2)-style.
  * Update |*pchild_pid| with the pid of the child.
  * Update |*pstdin_fd| with a fd that allows writing to the child's
diff --git a/libminijail_unittest.cc b/libminijail_unittest.cc
index 496738b..fda4dd4 100644
--- a/libminijail_unittest.cc
+++ b/libminijail_unittest.cc
@@ -9,6 +9,7 @@
 
 #include <dirent.h>
 #include <fcntl.h>
+#include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
 #include <sys/types.h>
@@ -624,6 +625,98 @@
   EXPECT_EQ(WEXITSTATUS(status), 0);
 }
 
+TEST(Test, minijail_run_fd_env_pid_pipes) {
+  // TODO(crbug.com/895875): The preload library interferes with ASan since they
+  // both need to use LD_PRELOAD.
+  if (running_with_asan())
+    GTEST_SKIP();
+
+  ScopedMinijail j(minijail_new());
+  minijail_set_preload_path(j.get(), kPreloadPath);
+
+  char *argv[4];
+  argv[0] = const_cast<char*>(kCatPath);
+  argv[1] = nullptr;
+
+  pid_t pid;
+  int child_stdin, child_stdout;
+  int mj_run_ret = minijail_run_pid_pipes(
+      j.get(), argv[0], argv, &pid, &child_stdin, &child_stdout, NULL);
+  EXPECT_EQ(mj_run_ret, 0);
+
+  char teststr[] = "test\n";
+  const size_t teststr_len = strlen(teststr);
+  ssize_t write_ret = write(child_stdin, teststr, teststr_len);
+  EXPECT_EQ(write_ret, static_cast<ssize_t>(teststr_len));
+
+  char buf[kBufferSize] = {};
+  ssize_t read_ret = read(child_stdout, buf, sizeof(buf) - 1);
+  EXPECT_EQ(read_ret, static_cast<ssize_t>(teststr_len));
+  EXPECT_STREQ(buf, teststr);
+
+  int status;
+  EXPECT_EQ(kill(pid, SIGTERM), 0);
+  EXPECT_EQ(waitpid(pid, &status, 0), pid);
+  ASSERT_TRUE(WIFSIGNALED(status));
+  EXPECT_EQ(WTERMSIG(status), SIGTERM);
+
+  argv[0] = const_cast<char*>(kShellPath);
+  argv[1] = "-c";
+  argv[2] = "#!/bin/sh\necho \"${TEST_PARENT+set}|${TEST_VAR}\" >&2\n";
+  argv[3] = nullptr;
+
+  char *envp[2];
+  envp[0] = "TEST_VAR=test";
+  envp[1] = nullptr;
+
+  // Set a canary env var in the parent that should not be present in the child.
+  ASSERT_EQ(setenv("TEST_PARENT", "test", 1 /*overwrite*/), 0);
+
+  // MFD_CLOEXEC cannot be used because the test fd begins with #!.
+  int elf_fd = memfd_create("test", MFD_ALLOW_SEALING);
+  if (elf_fd < 0) {
+    pdie("memfd_create(elf_fd):");
+  }
+
+  // This is technically not an ELF, but it works for the purposes of the test.
+  ssize_t script_len = strlen(argv[2]);
+  if (write(elf_fd, argv[2], script_len) != script_len) {
+    pdie("write(elf_fd):");
+  }
+  if (ftruncate(elf_fd, script_len) == -1) {
+    pdie("ftruncate(elf_fd):");
+  }
+  if (lseek(elf_fd, 0, SEEK_SET) == -1) {
+    pdie("lseek(elf_fd):");
+  }
+  if (fcntl(elf_fd, F_ADD_SEALS,
+            F_SEAL_WRITE | F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL) == -1) {
+    pdie("fcntl(elf_fd):");
+  }
+
+  int dev_null = open("/dev/null", O_RDONLY);
+  ASSERT_NE(dev_null, -1);
+  // Create a mapping to dev_null that would clobber elf_fd if it is not
+  // relocated.
+  minijail_preserve_fd(j.get(), dev_null, elf_fd);
+
+  int child_stderr;
+  mj_run_ret =
+      minijail_run_fd_env_pid_pipes(j.get(), elf_fd, argv, envp, &pid,
+                                    &child_stdin, &child_stdout, &child_stderr);
+  EXPECT_EQ(mj_run_ret, 0);
+  close(dev_null);
+
+  memset(buf, 0, sizeof(buf));
+  read_ret = read(child_stderr, buf, sizeof(buf) - 1);
+  EXPECT_GE(read_ret, 0);
+  EXPECT_STREQ(buf, "|test\n");
+
+  EXPECT_EQ(waitpid(pid, &status, 0), pid);
+  ASSERT_TRUE(WIFEXITED(status));
+  EXPECT_EQ(WEXITSTATUS(status), 0);
+}
+
 TEST(Test, minijail_run_env_pid_pipes_with_local_preload) {
   // TODO(crbug.com/895875): The preload library interferes with ASan since they
   // both need to use LD_PRELOAD.
@@ -1230,7 +1323,7 @@
   argv[0] = const_cast<char*>(kShellPath);
   argv[1] = "-c";
   argv[2] = "grep -E 'shared:|master:|propagate_from:|unbindable:' "
-	    "/proc/self/mountinfo";
+            "/proc/self/mountinfo";
   argv[3] = NULL;
   mj_run_ret = minijail_run_pid_pipes_no_preload(
       j, argv[0], argv, &pid, NULL, &child_stdout, NULL);