| /* |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "host/commands/run_cvd/process_monitor.h" |
| |
| #include <sys/prctl.h> |
| #include <sys/types.h> |
| #include <sys/wait.h> |
| |
| #include <assert.h> |
| #include <errno.h> |
| #include <signal.h> |
| #include <stdio.h> |
| |
| #include <algorithm> |
| #include <future> |
| #include <thread> |
| |
| #include <android-base/logging.h> |
| |
| #include "common/libs/fs/shared_buf.h" |
| #include "common/libs/fs/shared_select.h" |
| |
| namespace cuttlefish { |
| |
| struct ParentToChildMessage { |
| bool stop; |
| }; |
| |
| ProcessMonitor::Properties& ProcessMonitor::Properties::RestartSubprocesses( |
| bool r) & { |
| restart_subprocesses_ = r; |
| return *this; |
| } |
| |
| ProcessMonitor::Properties ProcessMonitor::Properties::RestartSubprocesses( |
| bool r) && { |
| restart_subprocesses_ = r; |
| return std::move(*this); |
| } |
| |
| ProcessMonitor::Properties& ProcessMonitor::Properties::AddCommand( |
| Command cmd) & { |
| auto& entry = entries_.emplace_back(); |
| entry.cmd.reset(new Command(std::move(cmd))); |
| return *this; |
| } |
| |
| ProcessMonitor::Properties ProcessMonitor::Properties::AddCommand( |
| Command cmd) && { |
| auto& entry = entries_.emplace_back(); |
| entry.cmd.reset(new Command(std::move(cmd))); |
| return std::move(*this); |
| } |
| |
| ProcessMonitor::ProcessMonitor(ProcessMonitor::Properties&& properties) |
| : properties_(std::move(properties)), monitor_(-1) {} |
| |
| Result<void> ProcessMonitor::StopMonitoredProcesses() { |
| CF_EXPECT(monitor_ != -1, "The monitor process has already exited."); |
| CF_EXPECT(monitor_socket_->IsOpen(), "The monitor socket is already closed"); |
| ParentToChildMessage message; |
| message.stop = true; |
| CF_EXPECT(WriteAllBinary(monitor_socket_, &message) == sizeof(message), |
| "Failed to communicate with monitor socket: " |
| << monitor_socket_->StrError()); |
| |
| pid_t last_monitor = monitor_; |
| monitor_ = -1; |
| monitor_socket_->Close(); |
| int wstatus; |
| CF_EXPECT(waitpid(last_monitor, &wstatus, 0) == last_monitor, |
| "Failed to wait for monitor process"); |
| CF_EXPECT(!WIFSIGNALED(wstatus), "Monitor process exited due to a signal"); |
| CF_EXPECT(WIFEXITED(wstatus), "Monitor process exited for unknown reasons"); |
| CF_EXPECT(WEXITSTATUS(wstatus) == 0, |
| "Monitor process exited with code " << WEXITSTATUS(wstatus)); |
| return {}; |
| } |
| |
| Result<void> ProcessMonitor::StartAndMonitorProcesses() { |
| CF_EXPECT(monitor_ == -1, "The monitor process was already started"); |
| CF_EXPECT(!monitor_socket_->IsOpen(), "Monitor socket was already opened"); |
| |
| SharedFD client_pipe, host_pipe; |
| CF_EXPECT(SharedFD::Pipe(&client_pipe, &host_pipe), |
| "Could not create the monitor socket."); |
| monitor_ = fork(); |
| if (monitor_ == 0) { |
| monitor_socket_ = client_pipe; |
| host_pipe->Close(); |
| auto monitor = MonitorRoutine(); |
| if (!monitor.ok()) { |
| LOG(ERROR) << "Monitoring processes failed:\n" << monitor.error(); |
| } |
| std::exit(monitor.ok() ? 0 : 1); |
| } else { |
| client_pipe->Close(); |
| monitor_socket_ = host_pipe; |
| return {}; |
| } |
| } |
| |
| static void LogSubprocessExit(const std::string& name, pid_t pid, int wstatus) { |
| LOG(INFO) << "Detected unexpected exit of monitored subprocess " << name; |
| if (WIFEXITED(wstatus)) { |
| LOG(INFO) << "Subprocess " << name << " (" << pid |
| << ") has exited with exit code " << WEXITSTATUS(wstatus); |
| } else if (WIFSIGNALED(wstatus)) { |
| LOG(ERROR) << "Subprocess " << name << " (" << pid |
| << ") was interrupted by a signal: " << WTERMSIG(wstatus); |
| } else { |
| LOG(INFO) << "subprocess " << name << " (" << pid |
| << ") has exited for unknown reasons"; |
| } |
| } |
| |
| static void LogSubprocessExit(const std::string& name, const siginfo_t& infop) { |
| LOG(INFO) << "Detected unexpected exit of monitored subprocess " << name; |
| if (infop.si_code == CLD_EXITED) { |
| LOG(INFO) << "Subprocess " << name << " (" << infop.si_pid |
| << ") has exited with exit code " << infop.si_status; |
| } else if (infop.si_code == CLD_KILLED) { |
| LOG(ERROR) << "Subprocess " << name << " (" << infop.si_pid |
| << ") was interrupted by a signal: " << infop.si_status; |
| } else { |
| LOG(INFO) << "subprocess " << name << " (" << infop.si_pid |
| << ") has exited for unknown reasons (code = " << infop.si_code |
| << ", status = " << infop.si_status << ")"; |
| } |
| } |
| |
| Result<void> ProcessMonitor::MonitorRoutine() { |
| // Make this process a subreaper to reliably catch subprocess exits. |
| // See https://man7.org/linux/man-pages/man2/prctl.2.html |
| prctl(PR_SET_CHILD_SUBREAPER, 1); |
| prctl(PR_SET_PDEATHSIG, SIGHUP); // Die when parent dies |
| |
| LOG(DEBUG) << "Starting monitoring subprocesses"; |
| for (auto& monitored : properties_.entries_) { |
| LOG(INFO) << monitored.cmd->GetShortName(); |
| auto options = SubprocessOptions().InGroup(true); |
| monitored.proc.reset(new Subprocess(monitored.cmd->Start(options))); |
| CF_EXPECT(monitored.proc->Started(), "Failed to start process"); |
| } |
| |
| bool running = true; |
| auto policy = std::launch::async; |
| auto parent_comms = std::async(policy, [&running, this]() -> Result<void> { |
| LOG(DEBUG) << "Waiting for a `stop` message from the parent."; |
| while (running) { |
| ParentToChildMessage message; |
| CF_EXPECT(ReadExactBinary(monitor_socket_, &message) == sizeof(message), |
| "Could not read message from parent."); |
| if (message.stop) { |
| running = false; |
| // Wake up the wait() loop by giving it an exited child process |
| if (fork() == 0) { |
| std::exit(0); |
| } |
| } |
| } |
| return {}; |
| }); |
| |
| auto& monitored = properties_.entries_; |
| |
| LOG(DEBUG) << "Monitoring subprocesses"; |
| while(running) { |
| int wstatus; |
| pid_t pid = wait(&wstatus); |
| int error_num = errno; |
| CF_EXPECT(pid != -1, "Wait failed: " << strerror(error_num)); |
| if (!WIFSIGNALED(wstatus) && !WIFEXITED(wstatus)) { |
| LOG(DEBUG) << "Unexpected status from wait: " << wstatus |
| << " for pid " << pid; |
| continue; |
| } |
| if (!running) { // Avoid extra restarts near the end |
| break; |
| } |
| auto matches = [pid](const auto& it) { return it.proc->pid() == pid; }; |
| auto it = std::find_if(monitored.begin(), monitored.end(), matches); |
| if (it == monitored.end()) { |
| LogSubprocessExit("(unknown)", pid, wstatus); |
| } else { |
| LogSubprocessExit(it->cmd->GetShortName(), it->proc->pid(), wstatus); |
| if (properties_.restart_subprocesses_) { |
| auto options = SubprocessOptions().InGroup(true); |
| it->proc.reset(new Subprocess(it->cmd->Start(options))); |
| } else { |
| properties_.entries_.erase(it); |
| } |
| } |
| } |
| |
| CF_EXPECT(parent_comms.get()); // Should have exited if `running` is false |
| auto stop = [](const auto& it) { |
| auto stop_result = it.proc->Stop(); |
| if (stop_result == StopperResult::kStopFailure) { |
| LOG(WARNING) << "Error in stopping \"" << it.cmd->GetShortName() << "\""; |
| return false; |
| } |
| siginfo_t infop; |
| auto success = it.proc->Wait(&infop, WEXITED); |
| if (success < 0) { |
| LOG(WARNING) << "Failed to wait for process " << it.cmd->GetShortName(); |
| return false; |
| } |
| if (stop_result == StopperResult::kStopCrash) { |
| LogSubprocessExit(it.cmd->GetShortName(), infop); |
| } |
| return true; |
| }; |
| // Processes were started in the order they appear in the vector, stop them in |
| // reverse order for symmetry. |
| size_t stopped = std::count_if(monitored.rbegin(), monitored.rend(), stop); |
| LOG(DEBUG) << "Done monitoring subprocesses"; |
| CF_EXPECT(stopped == monitored.size(), "Didn't stop all subprocesses"); |
| return {}; |
| } |
| |
| } // namespace cuttlefish |