metrics: Emit kAbnormalTermination if last update attempt failed.
This allows us to track how many update attempts terminates
abnormally, for example if the device is rebooted in the middle of an
update or if update_engine crashes. We use a marker state variable
(aka a "prefs" file) to keep track of this.
Note that we don't currently report any of the other metrics in the
UpdateEngine.Attempt.* namespace. If necessary, this can be changed in
the future - I left a TODO item in the code with more details.
BUG=chromium:357676
TEST=New unit tests + Unit tests pass + Manual tests.
Change-Id: I83fe284c7c46917c0c55b92314c58098e2fd1789
Reviewed-on: https://chromium-review.googlesource.com/197175
Reviewed-by: Alex Deymo <[email protected]>
Tested-by: David Zeuthen <[email protected]>
Commit-Queue: David Zeuthen <[email protected]>
diff --git a/constants.cc b/constants.cc
index f094016..eec9144 100644
--- a/constants.cc
+++ b/constants.cc
@@ -21,6 +21,7 @@
const char kSystemRebootedMarkerFile[] = "/tmp/update_engine_update_recorded";
// Constants defining keys for the persisted state of update engine.
+const char kPrefsAttemptInProgress[] = "attempt-in-progress";
const char kPrefsBackoffExpiryTime[] = "backoff-expiry-time";
const char kPrefsCertificateReportToSendDownload[] =
"certificate-report-to-send-download";
diff --git a/constants.h b/constants.h
index 59e3737..3a74156 100644
--- a/constants.h
+++ b/constants.h
@@ -27,6 +27,7 @@
extern const char kStatefulPartition[];
// Constants related to preferences.
+extern const char kPrefsAttemptInProgress[];
extern const char kPrefsBackoffExpiryTime[];
extern const char kPrefsCertificateReportToSendDownload[];
extern const char kPrefsCertificateReportToSendUpdate[];
diff --git a/metrics.cc b/metrics.cc
index 94685e6..f8a316aa 100644
--- a/metrics.cc
+++ b/metrics.cc
@@ -168,6 +168,20 @@
}
}
+void ReportAbnormallyTerminatedUpdateAttemptMetrics(
+ SystemState *system_state) {
+
+ string metric = metrics::kMetricAttemptResult;
+ AttemptResult attempt_result = AttemptResult::kAbnormalTermination;
+
+ LOG(INFO) << "Uploading " << static_cast<int>(attempt_result)
+ << " for metric " << metric;
+ system_state->metrics_lib()->SendEnumToUMA(
+ metric,
+ static_cast<int>(attempt_result),
+ static_cast<int>(AttemptResult::kNumConstants));
+}
+
void ReportUpdateAttemptMetrics(
SystemState *system_state,
int attempt_number,
diff --git a/metrics.h b/metrics.h
index 895e816..ba5e6c3 100644
--- a/metrics.h
+++ b/metrics.h
@@ -254,6 +254,11 @@
DownloadErrorCode payload_download_error_code,
ConnectionType connection_type);
+// Reports the |kAbnormalTermination| for the |kMetricAttemptResult|
+// metric. No other metrics in the UpdateEngine.Attempt.* namespace
+// will be reported.
+void ReportAbnormallyTerminatedUpdateAttemptMetrics(SystemState *system_state);
+
// Helper function to report the after the completion of a successful
// update attempt. The following metrics are reported:
//
diff --git a/payload_state.cc b/payload_state.cc
index c176238..975ba15 100644
--- a/payload_state.cc
+++ b/payload_state.cc
@@ -162,6 +162,9 @@
}
void PayloadState::AttemptStarted(AttemptType attempt_type) {
+ // Flush previous state from abnormal attempt failure, if any.
+ ReportAndClearPersistedAttemptMetrics();
+
attempt_type_ = attempt_type;
ClockInterface *clock = system_state_->clock();
@@ -183,6 +186,9 @@
type = utils::GetConnectionType(network_connection_type, tethering);
}
attempt_connection_type_ = type;
+
+ if (attempt_type == AttemptType::kUpdate)
+ PersistAttemptMetrics();
}
void PayloadState::UpdateResumed() {
@@ -207,6 +213,7 @@
case AttemptType::kUpdate:
CollectAndReportAttemptMetrics(kErrorCodeSuccess);
CollectAndReportSuccessfulUpdateMetrics();
+ ClearPersistedAttemptMetrics();
break;
case AttemptType::kRollback:
@@ -238,6 +245,7 @@
switch (attempt_type_) {
case AttemptType::kUpdate:
CollectAndReportAttemptMetrics(base_error);
+ ClearPersistedAttemptMetrics();
break;
case AttemptType::kRollback:
@@ -629,6 +637,33 @@
attempt_connection_type_);
}
+void PayloadState::PersistAttemptMetrics() {
+ // TODO(zeuthen): For now we only persist whether an attempt was in
+ // progress and not values/metrics related to the attempt. This
+ // means that when this happens, of all the UpdateEngine.Attempt.*
+ // metrics, only UpdateEngine.Attempt.Result is reported (with the
+ // value |kAbnormalTermination|). In the future we might want to
+ // persist more data so we can report other metrics in the
+ // UpdateEngine.Attempt.* namespace when this happens.
+ prefs_->SetBoolean(kPrefsAttemptInProgress, true);
+}
+
+void PayloadState::ClearPersistedAttemptMetrics() {
+ prefs_->Delete(kPrefsAttemptInProgress);
+}
+
+void PayloadState::ReportAndClearPersistedAttemptMetrics() {
+ bool attempt_in_progress = false;
+ if (!prefs_->GetBoolean(kPrefsAttemptInProgress, &attempt_in_progress))
+ return;
+ if (!attempt_in_progress)
+ return;
+
+ metrics::ReportAbnormallyTerminatedUpdateAttemptMetrics(system_state_);
+
+ ClearPersistedAttemptMetrics();
+}
+
void PayloadState::CollectAndReportSuccessfulUpdateMetrics() {
string metric;
@@ -1316,6 +1351,9 @@
}
void PayloadState::UpdateEngineStarted() {
+ // Flush previous state from abnormal attempt failure, if any.
+ ReportAndClearPersistedAttemptMetrics();
+
// Avoid the UpdateEngineStarted actions if this is not the first time we
// run the update engine since reboot.
if (!system_state_->system_rebooted())
diff --git a/payload_state.h b/payload_state.h
index 9ad4377..f8cc678 100644
--- a/payload_state.h
+++ b/payload_state.h
@@ -165,6 +165,18 @@
// Collects and reports the various metrics related to an update attempt.
void CollectAndReportAttemptMetrics(ErrorCode code);
+ // Persists values related to the UpdateEngine.Attempt.* metrics so
+ // we can identify later if an update attempt ends abnormally.
+ void PersistAttemptMetrics();
+
+ // Clears persistent state previously set using AttemptMetricsPersist().
+ void ClearPersistedAttemptMetrics();
+
+ // Checks if persistent state previously set using AttemptMetricsPersist()
+ // exists and, if so, emits it with |attempt_result| set to
+ // metrics::AttemptResult::kAbnormalTermination.
+ void ReportAndClearPersistedAttemptMetrics();
+
// Collects and reports the various metrics related to a successful update.
void CollectAndReportSuccessfulUpdateMetrics();
diff --git a/payload_state_unittest.cc b/payload_state_unittest.cc
index 24aa070..3071a6b 100644
--- a/payload_state_unittest.cc
+++ b/payload_state_unittest.cc
@@ -13,6 +13,7 @@
#include "update_engine/constants.h"
#include "update_engine/fake_clock.h"
#include "update_engine/fake_hardware.h"
+#include "update_engine/fake_prefs.h"
#include "update_engine/fake_system_state.h"
#include "update_engine/omaha_request_action.h"
#include "update_engine/payload_state.h"
@@ -1221,7 +1222,7 @@
EXPECT_TRUE(payload_state.Initialize(&fake_system_state));
- // No prefs should be used after a crash.
+ // Only the |kPrefsAttemptInProgress| state variable should be read.
EXPECT_CALL(*prefs, Exists(_)).Times(0);
EXPECT_CALL(*prefs, SetString(_, _)).Times(0);
EXPECT_CALL(*prefs, SetInt64(_, _)).Times(0);
@@ -1229,6 +1230,7 @@
EXPECT_CALL(*prefs, GetString(_, _)).Times(0);
EXPECT_CALL(*prefs, GetInt64(_, _)).Times(0);
EXPECT_CALL(*prefs, GetBoolean(_, _)).Times(0);
+ EXPECT_CALL(*prefs, GetBoolean(kPrefsAttemptInProgress, _));
// No metrics are reported after a crash.
EXPECT_CALL(*fake_system_state.mock_metrics_lib(),
@@ -1240,6 +1242,77 @@
payload_state.UpdateEngineStarted();
}
+TEST(PayloadStateTest, AbnormalTerminationAttemptMetricsNoReporting) {
+ PayloadState payload_state;
+ FakeSystemState fake_system_state;
+
+ // If there's no marker at startup, ensure we don't report a metric.
+ EXPECT_TRUE(payload_state.Initialize(&fake_system_state));
+ EXPECT_CALL(*fake_system_state.mock_metrics_lib(),
+ SendEnumToUMA(
+ metrics::kMetricAttemptResult,
+ static_cast<int>(metrics::AttemptResult::kAbnormalTermination),
+ _)).Times(0);
+ payload_state.UpdateEngineStarted();
+}
+
+TEST(PayloadStateTest, AbnormalTerminationAttemptMetricsReported) {
+ PayloadState payload_state;
+ FakeSystemState fake_system_state;
+ FakePrefs prefs;
+
+ // If we have a marker at startup, ensure it's reported and the
+ // marker is then cleared.
+ fake_system_state.set_prefs(&prefs);
+ prefs.SetBoolean(kPrefsAttemptInProgress, true);
+
+ EXPECT_TRUE(payload_state.Initialize(&fake_system_state));
+
+ EXPECT_CALL(*fake_system_state.mock_metrics_lib(),
+ SendEnumToUMA(
+ metrics::kMetricAttemptResult,
+ static_cast<int>(metrics::AttemptResult::kAbnormalTermination),
+ _)).Times(1);
+ payload_state.UpdateEngineStarted();
+
+ EXPECT_FALSE(prefs.Exists(kPrefsAttemptInProgress));
+}
+
+TEST(PayloadStateTest, AbnormalTerminationAttemptMetricsClearedOnSucceess) {
+ PayloadState payload_state;
+ FakeSystemState fake_system_state;
+ FakePrefs prefs;
+
+ // Make sure the marker is written and cleared during an attempt and
+ // also that we DO NOT emit the metric (since the attempt didn't end
+ // abnormally).
+ fake_system_state.set_prefs(&prefs);
+ EXPECT_TRUE(payload_state.Initialize(&fake_system_state));
+
+ EXPECT_CALL(*fake_system_state.mock_metrics_lib(), SendToUMA(_, _, _, _, _))
+ .Times(AnyNumber());
+ EXPECT_CALL(*fake_system_state.mock_metrics_lib(), SendEnumToUMA(_, _, _))
+ .Times(AnyNumber());
+ EXPECT_CALL(*fake_system_state.mock_metrics_lib(),
+ SendEnumToUMA(
+ metrics::kMetricAttemptResult,
+ static_cast<int>(metrics::AttemptResult::kAbnormalTermination),
+ _)).Times(0);
+
+ // Attempt not in progress, should be clear.
+ EXPECT_FALSE(prefs.Exists(kPrefsAttemptInProgress));
+
+ payload_state.UpdateRestarted();
+
+ // Attempt not in progress, should be set.
+ EXPECT_TRUE(prefs.Exists(kPrefsAttemptInProgress));
+
+ payload_state.UpdateSucceeded();
+
+ // Attempt not in progress, should be clear.
+ EXPECT_FALSE(prefs.Exists(kPrefsAttemptInProgress));
+}
+
TEST(PayloadStateTest, CandidateUrlsComputedCorrectly) {
OmahaResponse response;
FakeSystemState fake_system_state;