| // Copyright 2019 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Implementation file for the sandbox2::Client class. |
| |
| #include "sandboxed_api/sandbox2/client.h" |
| |
| #include <fcntl.h> |
| #include <linux/bpf_common.h> |
| #include <linux/filter.h> |
| #include <linux/seccomp.h> |
| #include <sys/prctl.h> |
| #include <syscall.h> |
| #include <unistd.h> |
| |
| #include <atomic> |
| #include <cerrno> |
| #include <cinttypes> |
| #include <cstdint> |
| #include <cstdlib> |
| #include <limits> |
| #include <memory> |
| #include <string> |
| #include <thread> // NOLINT(build/c++11) |
| #include <utility> |
| #include <vector> |
| |
| #include "absl/base/attributes.h" |
| #include "absl/base/macros.h" |
| #include "absl/container/flat_hash_map.h" |
| #include "absl/status/status.h" |
| #include "absl/strings/numbers.h" |
| #include "absl/strings/str_cat.h" |
| #include "absl/strings/str_join.h" |
| #include "absl/strings/str_split.h" |
| #include "absl/strings/string_view.h" |
| #include "sandboxed_api/sandbox2/comms.h" |
| #include "sandboxed_api/sandbox2/policy.h" |
| #include "sandboxed_api/sandbox2/sanitizer.h" |
| #include "sandboxed_api/sandbox2/syscall.h" |
| #include "sandboxed_api/sandbox2/util/bpf_helper.h" |
| #include "sandboxed_api/util/raw_logging.h" |
| |
| #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER |
| #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) |
| #endif |
| |
| namespace sandbox2 { |
| namespace { |
| |
| void InitSeccompUnotify(sock_fprog prog, Comms* comms) { |
| // The policy might not allow sending the notify FD. |
| // Create a separate thread that won't get the seccomp policy to send the FD. |
| // Synchronize with it using plain atomics + seccomp TSYNC, so we don't need |
| // any additional syscalls. |
| std::atomic<int> fd(-1); |
| std::atomic<int> tid(-1); |
| |
| std::thread th([comms, &fd, &tid]() { |
| int notify_fd = -1; |
| while (notify_fd == -1) { |
| notify_fd = fd.load(std::memory_order_seq_cst); |
| } |
| SAPI_RAW_CHECK(comms->SendFD(notify_fd), "sending unotify fd"); |
| SAPI_RAW_CHECK(close(notify_fd) == 0, "closing unotify fd"); |
| sock_filter filter = ALLOW; |
| struct sock_fprog allow_prog = { |
| .len = 1, |
| .filter = &filter, |
| }; |
| int result = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, 0, |
| reinterpret_cast<uintptr_t>(&allow_prog)); |
| SAPI_RAW_PCHECK(result != -1, "setting seccomp filter"); |
| tid.store(syscall(__NR_gettid), std::memory_order_seq_cst); |
| }); |
| th.detach(); |
| int result = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, |
| SECCOMP_FILTER_FLAG_NEW_LISTENER, |
| reinterpret_cast<uintptr_t>(&prog)); |
| SAPI_RAW_PCHECK(result != -1, "setting seccomp filter"); |
| fd.store(result, std::memory_order_seq_cst); |
| pid_t child = -1; |
| while (child == -1) { |
| child = tid.load(std::memory_order_seq_cst); |
| } |
| // Apply seccomp. |
| struct sock_filter code[] = { |
| LOAD_ARCH, |
| JNE32(sandbox2::Syscall::GetHostAuditArch(), ALLOW), |
| LOAD_SYSCALL_NR, |
| BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_seccomp, 0, 3), |
| ARG_32(3), |
| BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, internal::kExecveMagic, 0, 1), |
| DENY, |
| ALLOW, |
| }; |
| prog.len = ABSL_ARRAYSIZE(code); |
| prog.filter = code; |
| do { |
| result = syscall( |
| __NR_seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
| reinterpret_cast<uintptr_t>(&prog), internal::kExecveMagic); |
| } while (result == child); |
| SAPI_RAW_CHECK(result == 0, "Enabling seccomp filter"); |
| } |
| |
| void InitSeccompRegular(sock_fprog prog) { |
| int result = |
| syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, |
| reinterpret_cast<uintptr_t>(&prog)); |
| SAPI_RAW_PCHECK(result != -1, "setting seccomp filter"); |
| SAPI_RAW_PCHECK(result == 0, |
| "synchronizing threads using SECCOMP_FILTER_FLAG_TSYNC flag " |
| "for thread=%d", |
| result); |
| } |
| |
| } // namespace |
| |
| Client::Client(Comms* comms) : comms_(comms) { |
| char* fdmap_envvar = getenv(kFDMapEnvVar); |
| if (!fdmap_envvar) { |
| return; |
| } |
| absl::flat_hash_map<absl::string_view, absl::string_view> vars = |
| absl::StrSplit(fdmap_envvar, ',', absl::SkipEmpty()); |
| for (const auto& [name, mapped_fd] : vars) { |
| int fd; |
| SAPI_RAW_CHECK(absl::SimpleAtoi(mapped_fd, &fd), "failed to parse fd map"); |
| SAPI_RAW_CHECK(fd_map_.emplace(std::string(name), fd).second, |
| "could not insert mapping into fd map (duplicate)"); |
| } |
| unsetenv(kFDMapEnvVar); |
| } |
| |
| std::string Client::GetFdMapEnvVar() const { |
| return absl::StrCat(kFDMapEnvVar, "=", |
| absl::StrJoin(fd_map_, ",", absl::PairFormatter(","))); |
| } |
| |
| void Client::PrepareEnvironment(int* preserved_fd) { |
| SetUpIPC(preserved_fd); |
| SetUpCwd(); |
| } |
| |
| void Client::EnableSandbox() { |
| ReceivePolicy(); |
| ApplyPolicyAndBecomeTracee(); |
| } |
| |
| void Client::SandboxMeHere() { |
| PrepareEnvironment(); |
| EnableSandbox(); |
| } |
| |
| void Client::SetUpCwd() { |
| { |
| // Get the current working directory to check if we are in a mount |
| // namespace. |
| // Note: glibc 2.27 no longer returns a relative path in that case, but |
| // fails with ENOENT and returns a nullptr instead. The code still |
| // needs to run on lower version for the time being. |
| char cwd_buf[PATH_MAX + 1] = {0}; |
| char* cwd = getcwd(cwd_buf, ABSL_ARRAYSIZE(cwd_buf)); |
| SAPI_RAW_PCHECK(cwd != nullptr || errno == ENOENT, |
| "no current working directory"); |
| |
| // Outside of the mount namespace, the path is of the form |
| // '(unreachable)/...'. Only check for the slash, since Linux might make up |
| // other prefixes in the future. |
| if (errno == ENOENT || cwd_buf[0] != '/') { |
| SAPI_RAW_VLOG(1, "chdir into mount namespace, cwd was '%s'", cwd_buf); |
| // If we are in a mount namespace but fail to chdir, then it can lead to a |
| // sandbox escape -- we need to fail with FATAL if the chdir fails. |
| SAPI_RAW_PCHECK(chdir("/") != -1, "corrective chdir"); |
| } |
| } |
| |
| // Receive the user-supplied current working directory and change into it. |
| std::string cwd; |
| SAPI_RAW_CHECK(comms_->RecvString(&cwd), "receiving working directory"); |
| if (!cwd.empty()) { |
| // On the other hand this chdir can fail without a sandbox escape. It will |
| // probably not have the intended behavior though. |
| if (chdir(cwd.c_str()) == -1 && SAPI_RAW_VLOG_IS_ON(1)) { |
| SAPI_RAW_PLOG( |
| INFO, |
| "chdir(%s) failed, falling back to previous cwd or / (with " |
| "namespaces). Use Executor::SetCwd() to set a working directory", |
| cwd.c_str()); |
| } |
| } |
| } |
| |
| void Client::SetUpIPC(int* preserved_fd) { |
| uint32_t num_of_fd_pairs; |
| SAPI_RAW_CHECK(comms_->RecvUint32(&num_of_fd_pairs), |
| "receiving number of fd pairs"); |
| SAPI_RAW_CHECK(fd_map_.empty(), "fd map not empty"); |
| |
| SAPI_RAW_VLOG(1, "Will receive %d file descriptor pairs", num_of_fd_pairs); |
| |
| absl::flat_hash_map<int, int*> preserve_fds_map; |
| if (preserved_fd) { |
| preserve_fds_map.emplace(*preserved_fd, preserved_fd); |
| } |
| |
| for (uint32_t i = 0; i < num_of_fd_pairs; ++i) { |
| int32_t requested_fd; |
| int32_t fd; |
| std::string name; |
| |
| SAPI_RAW_CHECK(comms_->RecvInt32(&requested_fd), "receiving requested fd"); |
| SAPI_RAW_CHECK(comms_->RecvFD(&fd), "receiving current fd"); |
| SAPI_RAW_CHECK(comms_->RecvString(&name), "receiving name string"); |
| |
| if (auto it = preserve_fds_map.find(requested_fd); |
| it != preserve_fds_map.end()) { |
| int old_fd = it->first; |
| int new_fd = dup(old_fd); |
| SAPI_RAW_PCHECK(new_fd != -1, "Failed to duplicate preserved fd=%d", |
| old_fd); |
| SAPI_RAW_LOG(INFO, "Moved preserved fd=%d to %d", old_fd, new_fd); |
| close(old_fd); |
| int* pfd = it->second; |
| *pfd = new_fd; |
| preserve_fds_map.erase(it); |
| preserve_fds_map.emplace(new_fd, pfd); |
| } |
| |
| if (requested_fd == comms_->GetConnectionFD()) { |
| comms_->MoveToAnotherFd(); |
| SAPI_RAW_LOG(INFO, |
| "Trying to map over comms fd (%d). Remapped comms to %d", |
| requested_fd, comms_->GetConnectionFD()); |
| } |
| |
| if (requested_fd != -1 && fd != requested_fd) { |
| if (requested_fd > STDERR_FILENO && fcntl(requested_fd, F_GETFD) != -1) { |
| // Dup2 will silently close the FD if one is already at requested_fd. |
| // If someone is using the deferred sandbox entry, ie. SandboxMeHere, |
| // the application might have something actually using that fd. |
| // Therefore let's log a big warning if that FD is already in use. |
| // Note: this check doesn't happen for STDIN,STDOUT,STDERR. |
| SAPI_RAW_LOG( |
| WARNING, |
| "Cloning received fd %d over %d which is already open and will " |
| "be silently closed. This may lead to unexpected behavior!", |
| fd, requested_fd); |
| } |
| |
| SAPI_RAW_VLOG(1, "Cloning received fd=%d onto fd=%d", fd, requested_fd); |
| SAPI_RAW_PCHECK(dup2(fd, requested_fd) != -1, ""); |
| |
| // Close the newly received FD if it differs from the new one. |
| close(fd); |
| fd = requested_fd; |
| } |
| |
| if (!name.empty()) { |
| SAPI_RAW_CHECK(fd_map_.emplace(name, fd).second, "duplicate fd mapping"); |
| } |
| } |
| } |
| |
| void Client::ReceivePolicy() { |
| std::vector<uint8_t> bytes; |
| SAPI_RAW_CHECK(comms_->RecvBytes(&bytes), "receive bytes"); |
| policy_ = std::move(bytes); |
| } |
| |
| void Client::ApplyPolicyAndBecomeTracee() { |
| // When running under *SAN, we need to notify *SANs background thread that we |
| // want it to exit and wait for it to be done. When not running under *SAN, |
| // this function does nothing. |
| sanitizer::WaitForSanitizer(); |
| |
| // Creds can be received w/o synchronization, once the connection is |
| // established. |
| pid_t cred_pid; |
| uid_t cred_uid ABSL_ATTRIBUTE_UNUSED; |
| gid_t cred_gid ABSL_ATTRIBUTE_UNUSED; |
| SAPI_RAW_CHECK(comms_->RecvCreds(&cred_pid, &cred_uid, &cred_gid), |
| "receiving credentials"); |
| |
| SAPI_RAW_CHECK(prctl(PR_SET_DUMPABLE, 1) == 0, |
| "setting PR_SET_DUMPABLE flag"); |
| if (prctl(PR_SET_PTRACER, cred_pid) == -1) { |
| SAPI_RAW_VLOG(1, "No YAMA on this system. Continuing"); |
| } |
| |
| SAPI_RAW_CHECK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == 0, |
| "setting PR_SET_NO_NEW_PRIVS flag"); |
| SAPI_RAW_CHECK(prctl(PR_SET_KEEPCAPS, 0) == 0, |
| "setting PR_SET_KEEPCAPS flag"); |
| |
| sock_fprog prog; |
| SAPI_RAW_CHECK(policy_.size() / sizeof(sock_filter) <= |
| std::numeric_limits<uint16_t>::max(), |
| "seccomp policy too long"); |
| prog.len = static_cast<uint16_t>(policy_.size() / sizeof(sock_filter)); |
| prog.filter = reinterpret_cast<sock_filter*>(&policy_.front()); |
| |
| SAPI_RAW_VLOG(1, |
| "Applying policy in PID %zd, sock_fprog.len: %" PRId16 |
| " entries (%" PRIuPTR " bytes)", |
| syscall(__NR_gettid), prog.len, policy_.size()); |
| |
| // Signal executor we are ready to have limits applied on us and be ptraced. |
| // We want limits at the last moment to avoid triggering them too early and we |
| // want ptrace at the last moment to avoid synchronization deadlocks. |
| SAPI_RAW_CHECK(comms_->SendUint32(kClient2SandboxReady), |
| "receiving ready signal from executor"); |
| uint32_t ret; // wait for confirmation |
| SAPI_RAW_CHECK(comms_->RecvUint32(&ret), |
| "receving confirmation from executor"); |
| if (ret == kSandbox2ClientUnotify) { |
| InitSeccompUnotify(prog, comms_); |
| } else { |
| SAPI_RAW_CHECK(ret == kSandbox2ClientDone, |
| "invalid confirmation from executor"); |
| InitSeccompRegular(prog); |
| } |
| } |
| |
| int Client::GetMappedFD(const std::string& name) { |
| auto it = fd_map_.find(name); |
| SAPI_RAW_CHECK(it != fd_map_.end(), |
| "mapped fd not found (function called twice?)"); |
| int fd = it->second; |
| fd_map_.erase(it); |
| return fd; |
| } |
| |
| bool Client::HasMappedFD(const std::string& name) { |
| return fd_map_.find(name) != fd_map_.end(); |
| } |
| |
| void Client::SendLogsToSupervisor() { |
| // This LogSink will register itself and send all logs to the executor until |
| // the object is destroyed. |
| logsink_ = std::make_unique<LogSink>(GetMappedFD(LogSink::kLogFDName)); |
| } |
| |
| NetworkProxyClient* Client::GetNetworkProxyClient() { |
| if (proxy_client_ == nullptr) { |
| proxy_client_ = std::make_unique<NetworkProxyClient>( |
| GetMappedFD(NetworkProxyClient::kFDName)); |
| } |
| return proxy_client_.get(); |
| } |
| |
| absl::Status Client::InstallNetworkProxyHandler() { |
| if (fd_map_.find(NetworkProxyClient::kFDName) == fd_map_.end()) { |
| return absl::FailedPreconditionError( |
| "InstallNetworkProxyHandler() must be called at most once after the " |
| "sandbox is installed. Also, the NetworkProxyServer needs to be " |
| "enabled."); |
| } |
| return NetworkProxyHandler::InstallNetworkProxyHandler( |
| GetNetworkProxyClient()); |
| } |
| |
| } // namespace sandbox2 |