| /* |
| * Copyright (C) 2008 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.android.server; |
| |
| import android.app.IActivityController; |
| import android.content.BroadcastReceiver; |
| import android.content.Context; |
| import android.content.Intent; |
| import android.content.IntentFilter; |
| import android.hidl.manager.V1_0.IServiceManager; |
| import android.os.Binder; |
| import android.os.Build; |
| import android.os.Debug; |
| import android.os.Handler; |
| import android.os.IPowerManager; |
| import android.os.Looper; |
| import android.os.Process; |
| import android.os.RemoteException; |
| import android.os.ServiceManager; |
| import android.os.SystemClock; |
| import android.system.ErrnoException; |
| import android.system.Os; |
| import android.system.OsConstants; |
| import android.system.StructRlimit; |
| import android.util.EventLog; |
| import android.util.Log; |
| import android.util.Slog; |
| import android.util.SparseArray; |
| |
| import com.android.internal.os.ProcessCpuTracker; |
| import com.android.internal.os.ZygoteConnectionConstants; |
| import com.android.internal.util.FrameworkStatsLog; |
| import com.android.server.am.ActivityManagerService; |
| import com.android.server.wm.SurfaceAnimationThread; |
| |
| import java.io.File; |
| import java.io.FileWriter; |
| import java.io.IOException; |
| import java.io.StringWriter; |
| import java.nio.charset.StandardCharsets; |
| import java.nio.file.Files; |
| import java.nio.file.Path; |
| import java.nio.file.Paths; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.HashSet; |
| import java.util.List; |
| |
| /** This class calls its monitor every minute. Killing this process if they don't return **/ |
| public class Watchdog extends Thread { |
| static final String TAG = "Watchdog"; |
| |
| /** Debug flag. */ |
| public static final boolean DEBUG = false; |
| |
| // Set this to true to use debug default values. |
| private static final boolean DB = false; |
| |
| // Note 1: Do not lower this value below thirty seconds without tightening the invoke-with |
| // timeout in com.android.internal.os.ZygoteConnection, or wrapped applications |
| // can trigger the watchdog. |
| // Note 2: The debug value is already below the wait time in ZygoteConnection. Wrapped |
| // applications may not work with a debug build. CTS will fail. |
| private static final long DEFAULT_TIMEOUT = DB ? 10 * 1000 : 60 * 1000; |
| private static final long CHECK_INTERVAL = DEFAULT_TIMEOUT / 2; |
| |
| // These are temporally ordered: larger values as lateness increases |
| private static final int COMPLETED = 0; |
| private static final int WAITING = 1; |
| private static final int WAITED_HALF = 2; |
| private static final int OVERDUE = 3; |
| |
| // Which native processes to dump into dropbox's stack traces |
| public static final String[] NATIVE_STACKS_OF_INTEREST = new String[] { |
| "/system/bin/audioserver", |
| "/system/bin/cameraserver", |
| "/system/bin/drmserver", |
| "/system/bin/mediadrmserver", |
| "/system/bin/mediaserver", |
| "/system/bin/netd", |
| "/system/bin/sdcard", |
| "/system/bin/surfaceflinger", |
| "/system/bin/vold", |
| "media.extractor", // system/bin/mediaextractor |
| "media.metrics", // system/bin/mediametrics |
| "media.codec", // vendor/bin/hw/[email protected] |
| "media.swcodec", // /apex/com.android.media.swcodec/bin/mediaswcodec |
| "com.android.bluetooth", // Bluetooth service |
| "/apex/com.android.os.statsd/bin/statsd", // Stats daemon |
| }; |
| |
| public static final List<String> HAL_INTERFACES_OF_INTEREST = Arrays.asList( |
| "[email protected]::IDevicesFactory", |
| "[email protected]::IDevicesFactory", |
| "[email protected]::IDevicesFactory", |
| "[email protected]::IDevicesFactory", |
| "[email protected]::IBiometricsFace", |
| "[email protected]::IBiometricsFingerprint", |
| "[email protected]::IBluetoothHci", |
| "[email protected]::ICameraProvider", |
| "[email protected]::IGnss", |
| "[email protected]::IAllocator", |
| "[email protected]::IComposer", |
| "[email protected]::IHealth", |
| "[email protected]::ILight", |
| "[email protected]::IComponentStore", |
| "[email protected]::IOmx", |
| "[email protected]::IOmxStore", |
| "[email protected]::IDevice", |
| "[email protected]::IPowerStats", |
| "[email protected]::ISensors", |
| "[email protected]::ISensors", |
| "[email protected]::ISensors", |
| "[email protected]::IVr", |
| "[email protected]::ISystemSuspend" |
| ); |
| |
| private static Watchdog sWatchdog; |
| |
| /* This handler will be used to post message back onto the main thread */ |
| private final ArrayList<HandlerChecker> mHandlerCheckers = new ArrayList<>(); |
| private final HandlerChecker mMonitorChecker; |
| private ActivityManagerService mActivity; |
| |
| private IActivityController mController; |
| private boolean mAllowRestart = true; |
| private final OpenFdMonitor mOpenFdMonitor; |
| private final List<Integer> mInterestingJavaPids = new ArrayList<>(); |
| |
| /** |
| * Used for checking status of handle threads and scheduling monitor callbacks. |
| */ |
| public final class HandlerChecker implements Runnable { |
| private final Handler mHandler; |
| private final String mName; |
| private final long mWaitMax; |
| private final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>(); |
| private final ArrayList<Monitor> mMonitorQueue = new ArrayList<Monitor>(); |
| private boolean mCompleted; |
| private Monitor mCurrentMonitor; |
| private long mStartTime; |
| private int mPauseCount; |
| |
| HandlerChecker(Handler handler, String name, long waitMaxMillis) { |
| mHandler = handler; |
| mName = name; |
| mWaitMax = waitMaxMillis; |
| mCompleted = true; |
| } |
| |
| void addMonitorLocked(Monitor monitor) { |
| // We don't want to update mMonitors when the Handler is in the middle of checking |
| // all monitors. We will update mMonitors on the next schedule if it is safe |
| mMonitorQueue.add(monitor); |
| } |
| |
| public void scheduleCheckLocked() { |
| if (mCompleted) { |
| // Safe to update monitors in queue, Handler is not in the middle of work |
| mMonitors.addAll(mMonitorQueue); |
| mMonitorQueue.clear(); |
| } |
| if ((mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling()) |
| || (mPauseCount > 0)) { |
| // Don't schedule until after resume OR |
| // If the target looper has recently been polling, then |
| // there is no reason to enqueue our checker on it since that |
| // is as good as it not being deadlocked. This avoid having |
| // to do a context switch to check the thread. Note that we |
| // only do this if we have no monitors since those would need to |
| // be executed at this point. |
| mCompleted = true; |
| return; |
| } |
| if (!mCompleted) { |
| // we already have a check in flight, so no need |
| return; |
| } |
| |
| mCompleted = false; |
| mCurrentMonitor = null; |
| mStartTime = SystemClock.uptimeMillis(); |
| mHandler.postAtFrontOfQueue(this); |
| } |
| |
| boolean isOverdueLocked() { |
| return (!mCompleted) && (SystemClock.uptimeMillis() > mStartTime + mWaitMax); |
| } |
| |
| public int getCompletionStateLocked() { |
| if (mCompleted) { |
| return COMPLETED; |
| } else { |
| long latency = SystemClock.uptimeMillis() - mStartTime; |
| if (latency < mWaitMax/2) { |
| return WAITING; |
| } else if (latency < mWaitMax) { |
| return WAITED_HALF; |
| } |
| } |
| return OVERDUE; |
| } |
| |
| public Thread getThread() { |
| return mHandler.getLooper().getThread(); |
| } |
| |
| public String getName() { |
| return mName; |
| } |
| |
| String describeBlockedStateLocked() { |
| if (mCurrentMonitor == null) { |
| return "Blocked in handler on " + mName + " (" + getThread().getName() + ")"; |
| } else { |
| return "Blocked in monitor " + mCurrentMonitor.getClass().getName() |
| + " on " + mName + " (" + getThread().getName() + ")"; |
| } |
| } |
| |
| @Override |
| public void run() { |
| // Once we get here, we ensure that mMonitors does not change even if we call |
| // #addMonitorLocked because we first add the new monitors to mMonitorQueue and |
| // move them to mMonitors on the next schedule when mCompleted is true, at which |
| // point we have completed execution of this method. |
| final int size = mMonitors.size(); |
| for (int i = 0 ; i < size ; i++) { |
| synchronized (Watchdog.this) { |
| mCurrentMonitor = mMonitors.get(i); |
| } |
| mCurrentMonitor.monitor(); |
| } |
| |
| synchronized (Watchdog.this) { |
| mCompleted = true; |
| mCurrentMonitor = null; |
| } |
| } |
| |
| /** Pause the HandlerChecker. */ |
| public void pauseLocked(String reason) { |
| mPauseCount++; |
| // Mark as completed, because there's a chance we called this after the watchog |
| // thread loop called Object#wait after 'WAITED_HALF'. In that case we want to ensure |
| // the next call to #getCompletionStateLocked for this checker returns 'COMPLETED' |
| mCompleted = true; |
| Slog.i(TAG, "Pausing HandlerChecker: " + mName + " for reason: " |
| + reason + ". Pause count: " + mPauseCount); |
| } |
| |
| /** Resume the HandlerChecker from the last {@link #pauseLocked}. */ |
| public void resumeLocked(String reason) { |
| if (mPauseCount > 0) { |
| mPauseCount--; |
| Slog.i(TAG, "Resuming HandlerChecker: " + mName + " for reason: " |
| + reason + ". Pause count: " + mPauseCount); |
| } else { |
| Slog.wtf(TAG, "Already resumed HandlerChecker: " + mName); |
| } |
| } |
| } |
| |
| final class RebootRequestReceiver extends BroadcastReceiver { |
| @Override |
| public void onReceive(Context c, Intent intent) { |
| if (intent.getIntExtra("nowait", 0) != 0) { |
| rebootSystem("Received ACTION_REBOOT broadcast"); |
| return; |
| } |
| Slog.w(TAG, "Unsupported ACTION_REBOOT broadcast: " + intent); |
| } |
| } |
| |
| /** Monitor for checking the availability of binder threads. The monitor will block until |
| * there is a binder thread available to process in coming IPCs to make sure other processes |
| * can still communicate with the service. |
| */ |
| private static final class BinderThreadMonitor implements Watchdog.Monitor { |
| @Override |
| public void monitor() { |
| Binder.blockUntilThreadAvailable(); |
| } |
| } |
| |
| public interface Monitor { |
| void monitor(); |
| } |
| |
| public static Watchdog getInstance() { |
| if (sWatchdog == null) { |
| sWatchdog = new Watchdog(); |
| } |
| |
| return sWatchdog; |
| } |
| |
| private Watchdog() { |
| super("watchdog"); |
| // Initialize handler checkers for each common thread we want to check. Note |
| // that we are not currently checking the background thread, since it can |
| // potentially hold longer running operations with no guarantees about the timeliness |
| // of operations there. |
| |
| // The shared foreground thread is the main checker. It is where we |
| // will also dispatch monitor checks and do other work. |
| mMonitorChecker = new HandlerChecker(FgThread.getHandler(), |
| "foreground thread", DEFAULT_TIMEOUT); |
| mHandlerCheckers.add(mMonitorChecker); |
| // Add checker for main thread. We only do a quick check since there |
| // can be UI running on the thread. |
| mHandlerCheckers.add(new HandlerChecker(new Handler(Looper.getMainLooper()), |
| "main thread", DEFAULT_TIMEOUT)); |
| // Add checker for shared UI thread. |
| mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(), |
| "ui thread", DEFAULT_TIMEOUT)); |
| // And also check IO thread. |
| mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(), |
| "i/o thread", DEFAULT_TIMEOUT)); |
| // And the display thread. |
| mHandlerCheckers.add(new HandlerChecker(DisplayThread.getHandler(), |
| "display thread", DEFAULT_TIMEOUT)); |
| // And the animation thread. |
| mHandlerCheckers.add(new HandlerChecker(AnimationThread.getHandler(), |
| "animation thread", DEFAULT_TIMEOUT)); |
| // And the surface animation thread. |
| mHandlerCheckers.add(new HandlerChecker(SurfaceAnimationThread.getHandler(), |
| "surface animation thread", DEFAULT_TIMEOUT)); |
| |
| // Initialize monitor for Binder threads. |
| addMonitor(new BinderThreadMonitor()); |
| |
| mOpenFdMonitor = OpenFdMonitor.create(); |
| |
| mInterestingJavaPids.add(Process.myPid()); |
| |
| // See the notes on DEFAULT_TIMEOUT. |
| assert DB || |
| DEFAULT_TIMEOUT > ZygoteConnectionConstants.WRAPPED_PID_TIMEOUT_MILLIS; |
| } |
| |
| /** |
| * Registers a {@link BroadcastReceiver} to listen to reboot broadcasts and trigger reboot. |
| * Should be called during boot after the ActivityManagerService is up and registered |
| * as a system service so it can handle registration of a {@link BroadcastReceiver}. |
| */ |
| public void init(Context context, ActivityManagerService activity) { |
| mActivity = activity; |
| context.registerReceiver(new RebootRequestReceiver(), |
| new IntentFilter(Intent.ACTION_REBOOT), |
| android.Manifest.permission.REBOOT, null); |
| } |
| |
| private static boolean isInterestingJavaProcess(String processName) { |
| return processName.equals(StorageManagerService.sMediaStoreAuthorityProcessName) |
| || processName.equals("com.android.phone"); |
| } |
| |
| /** |
| * Notifies the watchdog when a Java process with {@code pid} is started. |
| * This process may have its stack trace dumped during an ANR. |
| */ |
| public void processStarted(String processName, int pid) { |
| if (isInterestingJavaProcess(processName)) { |
| Slog.i(TAG, "Interesting Java process " + processName + " started. Pid " + pid); |
| synchronized (this) { |
| mInterestingJavaPids.add(pid); |
| } |
| } |
| } |
| |
| /** |
| * Notifies the watchdog when a Java process with {@code pid} dies. |
| */ |
| public void processDied(String processName, int pid) { |
| if (isInterestingJavaProcess(processName)) { |
| Slog.i(TAG, "Interesting Java process " + processName + " died. Pid " + pid); |
| synchronized (this) { |
| mInterestingJavaPids.remove(Integer.valueOf(pid)); |
| } |
| } |
| } |
| |
| public void setActivityController(IActivityController controller) { |
| synchronized (this) { |
| mController = controller; |
| } |
| } |
| |
| public void setAllowRestart(boolean allowRestart) { |
| synchronized (this) { |
| mAllowRestart = allowRestart; |
| } |
| } |
| |
| public void addMonitor(Monitor monitor) { |
| synchronized (this) { |
| mMonitorChecker.addMonitorLocked(monitor); |
| } |
| } |
| |
| public void addThread(Handler thread) { |
| addThread(thread, DEFAULT_TIMEOUT); |
| } |
| |
| public void addThread(Handler thread, long timeoutMillis) { |
| synchronized (this) { |
| final String name = thread.getLooper().getThread().getName(); |
| mHandlerCheckers.add(new HandlerChecker(thread, name, timeoutMillis)); |
| } |
| } |
| |
| /** |
| * Pauses Watchdog action for the currently running thread. Useful before executing long running |
| * operations that could falsely trigger the watchdog. Each call to this will require a matching |
| * call to {@link #resumeWatchingCurrentThread}. |
| * |
| * <p>If the current thread has not been added to the Watchdog, this call is a no-op. |
| * |
| * <p>If the Watchdog is already paused for the current thread, this call adds |
| * adds another pause and will require an additional {@link #resumeCurrentThread} to resume. |
| * |
| * <p>Note: Use with care, as any deadlocks on the current thread will be undetected until all |
| * pauses have been resumed. |
| */ |
| public void pauseWatchingCurrentThread(String reason) { |
| synchronized (this) { |
| for (HandlerChecker hc : mHandlerCheckers) { |
| if (Thread.currentThread().equals(hc.getThread())) { |
| hc.pauseLocked(reason); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Resumes the last pause from {@link #pauseWatchingCurrentThread} for the currently running |
| * thread. |
| * |
| * <p>If the current thread has not been added to the Watchdog, this call is a no-op. |
| * |
| * <p>If the Watchdog action for the current thread is already resumed, this call logs a wtf. |
| * |
| * <p>If all pauses have been resumed, the Watchdog action is finally resumed, otherwise, |
| * the Watchdog action for the current thread remains paused until resume is called at least |
| * as many times as the calls to pause. |
| */ |
| public void resumeWatchingCurrentThread(String reason) { |
| synchronized (this) { |
| for (HandlerChecker hc : mHandlerCheckers) { |
| if (Thread.currentThread().equals(hc.getThread())) { |
| hc.resumeLocked(reason); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Perform a full reboot of the system. |
| */ |
| void rebootSystem(String reason) { |
| Slog.i(TAG, "Rebooting system because: " + reason); |
| IPowerManager pms = (IPowerManager)ServiceManager.getService(Context.POWER_SERVICE); |
| try { |
| pms.reboot(false, reason, false); |
| } catch (RemoteException ex) { |
| } |
| } |
| |
| private int evaluateCheckerCompletionLocked() { |
| int state = COMPLETED; |
| for (int i=0; i<mHandlerCheckers.size(); i++) { |
| HandlerChecker hc = mHandlerCheckers.get(i); |
| state = Math.max(state, hc.getCompletionStateLocked()); |
| } |
| return state; |
| } |
| |
| private ArrayList<HandlerChecker> getBlockedCheckersLocked() { |
| ArrayList<HandlerChecker> checkers = new ArrayList<HandlerChecker>(); |
| for (int i=0; i<mHandlerCheckers.size(); i++) { |
| HandlerChecker hc = mHandlerCheckers.get(i); |
| if (hc.isOverdueLocked()) { |
| checkers.add(hc); |
| } |
| } |
| return checkers; |
| } |
| |
| private String describeCheckersLocked(List<HandlerChecker> checkers) { |
| StringBuilder builder = new StringBuilder(128); |
| for (int i=0; i<checkers.size(); i++) { |
| if (builder.length() > 0) { |
| builder.append(", "); |
| } |
| builder.append(checkers.get(i).describeBlockedStateLocked()); |
| } |
| return builder.toString(); |
| } |
| |
| private static ArrayList<Integer> getInterestingHalPids() { |
| try { |
| IServiceManager serviceManager = IServiceManager.getService(); |
| ArrayList<IServiceManager.InstanceDebugInfo> dump = |
| serviceManager.debugDump(); |
| HashSet<Integer> pids = new HashSet<>(); |
| for (IServiceManager.InstanceDebugInfo info : dump) { |
| if (info.pid == IServiceManager.PidConstant.NO_PID) { |
| continue; |
| } |
| |
| if (!HAL_INTERFACES_OF_INTEREST.contains(info.interfaceName)) { |
| continue; |
| } |
| |
| pids.add(info.pid); |
| } |
| return new ArrayList<Integer>(pids); |
| } catch (RemoteException e) { |
| return new ArrayList<Integer>(); |
| } |
| } |
| |
| static ArrayList<Integer> getInterestingNativePids() { |
| ArrayList<Integer> pids = getInterestingHalPids(); |
| |
| int[] nativePids = Process.getPidsForCommands(NATIVE_STACKS_OF_INTEREST); |
| if (nativePids != null) { |
| pids.ensureCapacity(pids.size() + nativePids.length); |
| for (int i : nativePids) { |
| pids.add(i); |
| } |
| } |
| |
| return pids; |
| } |
| |
| @Override |
| public void run() { |
| boolean waitedHalf = false; |
| while (true) { |
| final List<HandlerChecker> blockedCheckers; |
| final String subject; |
| final boolean allowRestart; |
| int debuggerWasConnected = 0; |
| synchronized (this) { |
| long timeout = CHECK_INTERVAL; |
| // Make sure we (re)spin the checkers that have become idle within |
| // this wait-and-check interval |
| for (int i=0; i<mHandlerCheckers.size(); i++) { |
| HandlerChecker hc = mHandlerCheckers.get(i); |
| hc.scheduleCheckLocked(); |
| } |
| |
| if (debuggerWasConnected > 0) { |
| debuggerWasConnected--; |
| } |
| |
| // NOTE: We use uptimeMillis() here because we do not want to increment the time we |
| // wait while asleep. If the device is asleep then the thing that we are waiting |
| // to timeout on is asleep as well and won't have a chance to run, causing a false |
| // positive on when to kill things. |
| long start = SystemClock.uptimeMillis(); |
| while (timeout > 0) { |
| if (Debug.isDebuggerConnected()) { |
| debuggerWasConnected = 2; |
| } |
| try { |
| wait(timeout); |
| // Note: mHandlerCheckers and mMonitorChecker may have changed after waiting |
| } catch (InterruptedException e) { |
| Log.wtf(TAG, e); |
| } |
| if (Debug.isDebuggerConnected()) { |
| debuggerWasConnected = 2; |
| } |
| timeout = CHECK_INTERVAL - (SystemClock.uptimeMillis() - start); |
| } |
| |
| boolean fdLimitTriggered = false; |
| if (mOpenFdMonitor != null) { |
| fdLimitTriggered = mOpenFdMonitor.monitor(); |
| } |
| |
| if (!fdLimitTriggered) { |
| final int waitState = evaluateCheckerCompletionLocked(); |
| if (waitState == COMPLETED) { |
| // The monitors have returned; reset |
| waitedHalf = false; |
| continue; |
| } else if (waitState == WAITING) { |
| // still waiting but within their configured intervals; back off and recheck |
| continue; |
| } else if (waitState == WAITED_HALF) { |
| if (!waitedHalf) { |
| Slog.i(TAG, "WAITED_HALF"); |
| // We've waited half the deadlock-detection interval. Pull a stack |
| // trace and wait another half. |
| ArrayList<Integer> pids = new ArrayList<>(mInterestingJavaPids); |
| ActivityManagerService.dumpStackTraces(pids, null, null, |
| getInterestingNativePids(), null); |
| waitedHalf = true; |
| } |
| continue; |
| } |
| |
| // something is overdue! |
| blockedCheckers = getBlockedCheckersLocked(); |
| subject = describeCheckersLocked(blockedCheckers); |
| } else { |
| blockedCheckers = Collections.emptyList(); |
| subject = "Open FD high water mark reached"; |
| } |
| allowRestart = mAllowRestart; |
| } |
| |
| // If we got here, that means that the system is most likely hung. |
| // First collect stack traces from all threads of the system process. |
| // Then kill this process so that the system will restart. |
| EventLog.writeEvent(EventLogTags.WATCHDOG, subject); |
| |
| ArrayList<Integer> pids = new ArrayList<>(mInterestingJavaPids); |
| |
| long anrTime = SystemClock.uptimeMillis(); |
| StringBuilder report = new StringBuilder(); |
| report.append(MemoryPressureUtil.currentPsiState()); |
| ProcessCpuTracker processCpuTracker = new ProcessCpuTracker(false); |
| StringWriter tracesFileException = new StringWriter(); |
| final File stack = ActivityManagerService.dumpStackTraces( |
| pids, processCpuTracker, new SparseArray<>(), getInterestingNativePids(), |
| tracesFileException); |
| |
| // Give some extra time to make sure the stack traces get written. |
| // The system's been hanging for a minute, another second or two won't hurt much. |
| SystemClock.sleep(5000); |
| |
| processCpuTracker.update(); |
| report.append(processCpuTracker.printCurrentState(anrTime)); |
| report.append(tracesFileException.getBuffer()); |
| |
| // Trigger the kernel to dump all blocked threads, and backtraces on all CPUs to the kernel log |
| doSysRq('w'); |
| doSysRq('l'); |
| |
| // Try to add the error to the dropbox, but assuming that the ActivityManager |
| // itself may be deadlocked. (which has happened, causing this statement to |
| // deadlock and the watchdog as a whole to be ineffective) |
| Thread dropboxThread = new Thread("watchdogWriteToDropbox") { |
| public void run() { |
| // If a watched thread hangs before init() is called, we don't have a |
| // valid mActivity. So we can't log the error to dropbox. |
| if (mActivity != null) { |
| mActivity.addErrorToDropBox( |
| "watchdog", null, "system_server", null, null, null, |
| subject, report.toString(), stack, null); |
| } |
| FrameworkStatsLog.write(FrameworkStatsLog.SYSTEM_SERVER_WATCHDOG_OCCURRED, |
| subject); |
| } |
| }; |
| dropboxThread.start(); |
| try { |
| dropboxThread.join(2000); // wait up to 2 seconds for it to return. |
| } catch (InterruptedException ignored) {} |
| |
| IActivityController controller; |
| synchronized (this) { |
| controller = mController; |
| } |
| if (controller != null) { |
| Slog.i(TAG, "Reporting stuck state to activity controller"); |
| try { |
| Binder.setDumpDisabled("Service dumps disabled due to hung system process."); |
| // 1 = keep waiting, -1 = kill system |
| int res = controller.systemNotResponding(subject); |
| if (res >= 0) { |
| Slog.i(TAG, "Activity controller requested to coninue to wait"); |
| waitedHalf = false; |
| continue; |
| } |
| } catch (RemoteException e) { |
| } |
| } |
| |
| // Only kill the process if the debugger is not attached. |
| if (Debug.isDebuggerConnected()) { |
| debuggerWasConnected = 2; |
| } |
| if (debuggerWasConnected >= 2) { |
| Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process"); |
| } else if (debuggerWasConnected > 0) { |
| Slog.w(TAG, "Debugger was connected: Watchdog is *not* killing the system process"); |
| } else if (!allowRestart) { |
| Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process"); |
| } else { |
| Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + subject); |
| WatchdogDiagnostics.diagnoseCheckers(blockedCheckers); |
| Slog.w(TAG, "*** GOODBYE!"); |
| Process.killProcess(Process.myPid()); |
| System.exit(10); |
| } |
| |
| waitedHalf = false; |
| } |
| } |
| |
| private void doSysRq(char c) { |
| try { |
| FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger"); |
| sysrq_trigger.write(c); |
| sysrq_trigger.close(); |
| } catch (IOException e) { |
| Slog.w(TAG, "Failed to write to /proc/sysrq-trigger", e); |
| } |
| } |
| |
| public static final class OpenFdMonitor { |
| /** |
| * Number of FDs below the soft limit that we trigger a runtime restart at. This was |
| * chosen arbitrarily, but will need to be at least 6 in order to have a sufficient number |
| * of FDs in reserve to complete a dump. |
| */ |
| private static final int FD_HIGH_WATER_MARK = 12; |
| |
| private final File mDumpDir; |
| private final File mFdHighWaterMark; |
| |
| public static OpenFdMonitor create() { |
| // Only run the FD monitor on debuggable builds (such as userdebug and eng builds). |
| if (!Build.IS_DEBUGGABLE) { |
| return null; |
| } |
| |
| final StructRlimit rlimit; |
| try { |
| rlimit = android.system.Os.getrlimit(OsConstants.RLIMIT_NOFILE); |
| } catch (ErrnoException errno) { |
| Slog.w(TAG, "Error thrown from getrlimit(RLIMIT_NOFILE)", errno); |
| return null; |
| } |
| |
| // The assumption we're making here is that FD numbers are allocated (more or less) |
| // sequentially, which is currently (and historically) true since open is currently |
| // specified to always return the lowest-numbered non-open file descriptor for the |
| // current process. |
| // |
| // We do this to avoid having to enumerate the contents of /proc/self/fd in order to |
| // count the number of descriptors open in the process. |
| final File fdThreshold = new File("/proc/self/fd/" + (rlimit.rlim_cur - FD_HIGH_WATER_MARK)); |
| return new OpenFdMonitor(new File("/data/anr"), fdThreshold); |
| } |
| |
| OpenFdMonitor(File dumpDir, File fdThreshold) { |
| mDumpDir = dumpDir; |
| mFdHighWaterMark = fdThreshold; |
| } |
| |
| /** |
| * Dumps open file descriptors and their full paths to a temporary file in {@code mDumpDir}. |
| */ |
| private void dumpOpenDescriptors() { |
| // We cannot exec lsof to get more info about open file descriptors because a newly |
| // forked process will not have the permissions to readlink. Instead list all open |
| // descriptors from /proc/pid/fd and resolve them. |
| List<String> dumpInfo = new ArrayList<>(); |
| String fdDirPath = String.format("/proc/%d/fd/", Process.myPid()); |
| File[] fds = new File(fdDirPath).listFiles(); |
| if (fds == null) { |
| dumpInfo.add("Unable to list " + fdDirPath); |
| } else { |
| for (File f : fds) { |
| String fdSymLink = f.getAbsolutePath(); |
| String resolvedPath = ""; |
| try { |
| resolvedPath = Os.readlink(fdSymLink); |
| } catch (ErrnoException ex) { |
| resolvedPath = ex.getMessage(); |
| } |
| dumpInfo.add(fdSymLink + "\t" + resolvedPath); |
| } |
| } |
| |
| // Dump the fds & paths to a temp file. |
| try { |
| File dumpFile = File.createTempFile("anr_fd_", "", mDumpDir); |
| Path out = Paths.get(dumpFile.getAbsolutePath()); |
| Files.write(out, dumpInfo, StandardCharsets.UTF_8); |
| } catch (IOException ex) { |
| Slog.w(TAG, "Unable to write open descriptors to file: " + ex); |
| } |
| } |
| |
| /** |
| * @return {@code true} if the high water mark was breached and a dump was written, |
| * {@code false} otherwise. |
| */ |
| public boolean monitor() { |
| if (mFdHighWaterMark.exists()) { |
| dumpOpenDescriptors(); |
| return true; |
| } |
| |
| return false; |
| } |
| } |
| } |