update_engine: Add PayloadState Exclusion Logic

|PayloadState| will exclude payloads based on specific update failures.
This is to prevent critical platform updates from being blocked by less
critical updates (e.g. DLCs). A layer of robustness is added in
protecting CrOS devices from falling off the update train.

Some important changes to mention:
 - Only during updates will update_engine exclude non-critical payloads
 - |OmahaRequestAction|, the current precursor |Action| to
 |OmahaResponseHandlerAction|, during a update will exclude
 faulty/excluded payloads prior to setting the |OmahaResponse| as an
 output object for suqsequent bonded |Action| to consume
 - When all payloads are excluded for an update, the |ErrorCode| will
 be indicated as |OmahaResponseInvalid| as this case is not a valid
 Omaha response update_engine should ever run into because non-critical
 updates must tag alongside a critical update

BUG=chromium:928805
TEST=FEATURES=test emerge-$B update_engine update_engine-client

Change-Id: I0551a228d0b84defb4d59966e8ed46a5d9278d60
Reviewed-on: https://chromium-review.googlesource.com/c/aosp/platform/system/update_engine/+/2190237
Tested-by: Jae Hoon Kim <[email protected]>
Auto-Submit: Jae Hoon Kim <[email protected]>
Reviewed-by: Amin Hassani <[email protected]>
Commit-Queue: Jae Hoon Kim <[email protected]>
diff --git a/common/utils.cc b/common/utils.cc
index 3a234cb..644493d 100644
--- a/common/utils.cc
+++ b/common/utils.cc
@@ -959,6 +959,10 @@
   }
 }
 
+string GetExclusionName(const string& str_to_convert) {
+  return base::NumberToString(base::StringPieceHash()(str_to_convert));
+}
+
 }  // namespace utils
 
 }  // namespace chromeos_update_engine
diff --git a/common/utils.h b/common/utils.h
index d949a3e..ee2dce0 100644
--- a/common/utils.h
+++ b/common/utils.h
@@ -313,6 +313,10 @@
                              uint16_t* high_version,
                              uint16_t* low_version);
 
+// Returns the string format of the hashed |str_to_convert| that can be used
+// with |Excluder| as the exclusion name.
+std::string GetExclusionName(const std::string& str_to_convert);
+
 }  // namespace utils
 
 // Utility class to close a file descriptor
diff --git a/omaha_request_action.cc b/omaha_request_action.cc
index 86d4b93..3a0b91c 100644
--- a/omaha_request_action.cc
+++ b/omaha_request_action.cc
@@ -55,6 +55,7 @@
 #include "update_engine/omaha_request_params.h"
 #include "update_engine/p2p_manager.h"
 #include "update_engine/payload_state_interface.h"
+#include "update_engine/update_attempter.h"
 
 using base::Optional;
 using base::Time;
@@ -534,6 +535,7 @@
 // False otherwise, in which case it sets any error code using |completer|.
 bool ParsePackage(OmahaParserData::App* app,
                   OmahaResponse* output_object,
+                  bool can_exclude,
                   ScopedActionCompleter* completer) {
   if (app->updatecheck_status.empty() ||
       app->updatecheck_status == kValNoUpdate) {
@@ -580,6 +582,7 @@
     LOG(INFO) << "Found package " << package.name;
 
     OmahaResponse::Package out_package;
+    out_package.can_exclude = can_exclude;
     for (const string& codebase : app->url_codebase) {
       if (codebase.empty()) {
         LOG(ERROR) << "Omaha Response URL has empty codebase";
@@ -625,6 +628,42 @@
   return true;
 }
 
+// Removes the candidate URLs which are excluded within packages, if all the
+// candidate URLs are excluded within a package, the package will be excluded.
+void ProcessExclusions(OmahaResponse* output_object,
+                       ExcluderInterface* excluder) {
+  for (auto package_it = output_object->packages.begin();
+       package_it != output_object->packages.end();
+       /* Increment logic in loop */) {
+    // If package cannot be excluded, quickly continue.
+    if (!package_it->can_exclude) {
+      ++package_it;
+      continue;
+    }
+    // Remove the excluded payload URLs.
+    for (auto payload_url_it = package_it->payload_urls.begin();
+         payload_url_it != package_it->payload_urls.end();
+         /* Increment logic in loop */) {
+      auto exclusion_name = utils::GetExclusionName(*payload_url_it);
+      // If payload URL is not excluded, quickly continue.
+      if (!excluder->IsExcluded(exclusion_name)) {
+        ++payload_url_it;
+        continue;
+      }
+      LOG(INFO) << "Excluding payload URL=" << *payload_url_it
+                << " for payload hash=" << package_it->hash;
+      payload_url_it = package_it->payload_urls.erase(payload_url_it);
+    }
+    // If there are no candidate payload URLs, remove the package.
+    if (package_it->payload_urls.empty()) {
+      LOG(INFO) << "Excluding payload hash=" << package_it->hash;
+      package_it = output_object->packages.erase(package_it);
+      continue;
+    }
+    ++package_it;
+  }
+}
+
 // Parses the 2 key version strings kernel_version and firmware_version. If the
 // field is not present, or cannot be parsed the values default to 0xffff.
 void ParseRollbackVersions(int allowed_milestones,
@@ -751,9 +790,15 @@
 
   // Package has to be parsed after Params now because ParseParams need to make
   // sure that postinstall action exists.
-  for (auto& app : parser_data->apps)
-    if (!ParsePackage(&app, output_object, completer))
+  for (auto& app : parser_data->apps) {
+    // Only allow exclusions for a non-critical package during an update. For
+    // non-critical package installations, let the errors propagate instead
+    // of being handled inside update_engine as installations are a dlcservice
+    // specific feature.
+    bool can_exclude = !params_->is_install() && params_->IsDlcAppId(app.id);
+    if (!ParsePackage(&app, output_object, can_exclude, completer))
       return false;
+  }
 
   return true;
 }
@@ -977,6 +1022,8 @@
   OmahaResponse output_object;
   if (!ParseResponse(&parser_data, &output_object, &completer))
     return;
+  ProcessExclusions(&output_object,
+                    system_state_->update_attempter()->GetExcluder());
   output_object.update_exists = true;
   SetOutputObject(output_object);
 
@@ -1469,6 +1516,14 @@
     return true;
   }
 
+  // Currently non-critical updates always update alongside the platform update
+  // (a critical update) so this case should never actually be hit if the
+  // request to Omaha for updates are correct. In other words, stop the update
+  // from happening as there are no packages in the response to process.
+  if (response.packages.empty()) {
+    LOG(ERROR) << "All packages were excluded.";
+  }
+
   // Note: We could technically delete the UpdateFirstSeenAt state when we
   // return true. If we do, it'll mean a device has to restart the
   // UpdateFirstSeenAt and thus help scattering take effect when the AU is
diff --git a/omaha_request_action_unittest.cc b/omaha_request_action_unittest.cc
index e530de4..6a0c213 100644
--- a/omaha_request_action_unittest.cc
+++ b/omaha_request_action_unittest.cc
@@ -44,6 +44,7 @@
 #include "update_engine/common/constants.h"
 #include "update_engine/common/fake_prefs.h"
 #include "update_engine/common/hash_calculator.h"
+#include "update_engine/common/mock_excluder.h"
 #include "update_engine/common/mock_http_fetcher.h"
 #include "update_engine/common/platform_constants.h"
 #include "update_engine/common/prefs.h"
@@ -75,6 +76,7 @@
 using testing::ReturnRef;
 using testing::SaveArg;
 using testing::SetArgPointee;
+using testing::StrictMock;
 
 namespace {
 
@@ -204,7 +206,8 @@
                 ? "<app appid=\"" + request_params.GetDlcAppId(kDlcId1) +
                       "\" status=\"ok\">"
                       "<updatecheck status=\"ok\"><urls><url codebase=\"" +
-                      codebase + "\"/></urls><manifest version=\"" + version +
+                      codebase + "\"/><url codebase=\"" + codebase2 +
+                      "\"/></urls><manifest version=\"" + version +
                       "\"><packages><package name=\"package3\" size=\"333\" "
                       "hash_sha256=\"hash3\"/></packages><actions>"
                       "<action event=\"install\" run=\".signed\"/>"
@@ -389,6 +392,9 @@
         .expected_check_reaction = metrics::CheckReaction::kUpdating,
         .expected_download_error_code = metrics::DownloadErrorCode::kUnset,
     };
+
+    ON_CALL(*fake_system_state_.mock_update_attempter(), GetExcluder())
+        .WillByDefault(Return(&mock_excluder_));
   }
 
   // This function uses the parameters in |tuc_params_| to do an update check.
@@ -429,6 +435,7 @@
                bool expected_allow_p2p_for_sharing,
                const string& expected_p2p_url);
 
+  StrictMock<MockExcluder> mock_excluder_;
   FakeSystemState fake_system_state_;
   FakeUpdateResponse fake_update_response_;
   // Used by all tests.
@@ -2759,8 +2766,44 @@
       {{request_params_.GetDlcAppId(kDlcId1), {.name = kDlcId1}}});
   fake_update_response_.dlc_app_update = true;
   tuc_params_.http_response = fake_update_response_.GetUpdateResponse();
+  EXPECT_CALL(mock_excluder_, IsExcluded(_)).WillRepeatedly(Return(false));
   ASSERT_TRUE(TestUpdateCheck());
 
+  EXPECT_EQ(response.packages.size(), 2u);
+  // Two candidate URLs.
+  EXPECT_EQ(response.packages[1].payload_urls.size(), 2u);
+  EXPECT_TRUE(response.update_exists);
+}
+
+TEST_F(OmahaRequestActionTest, UpdateWithPartiallyExcludedDlcTest) {
+  request_params_.set_dlc_apps_params(
+      {{request_params_.GetDlcAppId(kDlcId1), {.name = kDlcId1}}});
+  fake_update_response_.dlc_app_update = true;
+  tuc_params_.http_response = fake_update_response_.GetUpdateResponse();
+  // The first DLC candidate URL is excluded.
+  EXPECT_CALL(mock_excluder_, IsExcluded(_))
+      .WillOnce(Return(true))
+      .WillOnce(Return(false));
+  ASSERT_TRUE(TestUpdateCheck());
+
+  EXPECT_EQ(response.packages.size(), 2u);
+  // One candidate URL.
+  EXPECT_EQ(response.packages[1].payload_urls.size(), 1u);
+  EXPECT_TRUE(response.update_exists);
+}
+
+TEST_F(OmahaRequestActionTest, UpdateWithExcludedDlcTest) {
+  request_params_.set_dlc_apps_params(
+      {{request_params_.GetDlcAppId(kDlcId1), {.name = kDlcId1}}});
+  fake_update_response_.dlc_app_update = true;
+  tuc_params_.http_response = fake_update_response_.GetUpdateResponse();
+  // Both DLC candidate URLs are excluded.
+  EXPECT_CALL(mock_excluder_, IsExcluded(_))
+      .WillOnce(Return(true))
+      .WillOnce(Return(true));
+  ASSERT_TRUE(TestUpdateCheck());
+
+  EXPECT_EQ(response.packages.size(), 1u);
   EXPECT_TRUE(response.update_exists);
 }
 
@@ -2769,6 +2812,7 @@
       {{request_params_.GetDlcAppId(kDlcId2), {.name = kDlcId2}}});
   fake_update_response_.dlc_app_no_update = true;
   tuc_params_.http_response = fake_update_response_.GetUpdateResponse();
+  EXPECT_CALL(mock_excluder_, IsExcluded(_)).WillRepeatedly(Return(false));
   ASSERT_TRUE(TestUpdateCheck());
 
   EXPECT_TRUE(response.update_exists);
@@ -2781,6 +2825,7 @@
   fake_update_response_.dlc_app_update = true;
   fake_update_response_.dlc_app_no_update = true;
   tuc_params_.http_response = fake_update_response_.GetUpdateResponse();
+  EXPECT_CALL(mock_excluder_, IsExcluded(_)).WillRepeatedly(Return(false));
   ASSERT_TRUE(TestUpdateCheck());
 
   EXPECT_TRUE(response.update_exists);
@@ -2991,4 +3036,37 @@
   EXPECT_EQ(temp_str, "4763");
 }
 
+TEST_F(OmahaRequestActionTest, OmahaResponseUpdateCanExcludeCheck) {
+  request_params_.set_dlc_apps_params(
+      {{request_params_.GetDlcAppId(kDlcId1), {.name = kDlcId1}}});
+  fake_update_response_.dlc_app_update = true;
+  tuc_params_.http_response = fake_update_response_.GetUpdateResponse();
+
+  EXPECT_CALL(mock_excluder_, IsExcluded(_)).WillRepeatedly(Return(false));
+  ASSERT_TRUE(TestUpdateCheck());
+  ASSERT_TRUE(delegate_.omaha_response_);
+  const auto& packages = delegate_.omaha_response_->packages;
+  ASSERT_EQ(packages.size(), 2);
+
+  EXPECT_FALSE(packages[0].can_exclude);
+  EXPECT_TRUE(packages[1].can_exclude);
+}
+
+TEST_F(OmahaRequestActionTest, OmahaResponseInstallCannotExcludeCheck) {
+  request_params_.set_is_install(true);
+  request_params_.set_dlc_apps_params(
+      {{request_params_.GetDlcAppId(kDlcId1), {.name = kDlcId1}}});
+  fake_update_response_.dlc_app_update = true;
+  tuc_params_.http_response = fake_update_response_.GetUpdateResponse();
+
+  EXPECT_CALL(mock_excluder_, IsExcluded(_)).WillRepeatedly(Return(false));
+  ASSERT_TRUE(TestUpdateCheck());
+  ASSERT_TRUE(delegate_.omaha_response_);
+  const auto& packages = delegate_.omaha_response_->packages;
+  ASSERT_EQ(packages.size(), 2);
+
+  EXPECT_FALSE(packages[0].can_exclude);
+  EXPECT_FALSE(packages[1].can_exclude);
+}
+
 }  // namespace chromeos_update_engine
diff --git a/omaha_response.h b/omaha_response.h
index ab253a1..2b86fe7 100644
--- a/omaha_response.h
+++ b/omaha_response.h
@@ -51,6 +51,9 @@
     // True if the payload described in this response is a delta payload.
     // False if it's a full payload.
     bool is_delta = false;
+    // True if the payload can be excluded from updating if consistently faulty.
+    // False if the payload is critical to update.
+    bool can_exclude = false;
   };
   std::vector<Package> packages;
 
diff --git a/omaha_response_handler_action.cc b/omaha_response_handler_action.cc
index 915e839..040f8e7 100644
--- a/omaha_response_handler_action.cc
+++ b/omaha_response_handler_action.cc
@@ -70,6 +70,9 @@
   }
 
   // This is the url to the first package, not all packages.
+  // (For updates): All |Action|s prior to this must pass in non-excluded URLs
+  // within the |OmahaResponse|, reference exlusion logic in
+  // |OmahaRequestAction| and keep the enforcement of exclusions for updates.
   install_plan_.download_url = current_url;
   install_plan_.version = response.version;
   install_plan_.system_version = response.system_version;
diff --git a/payload_state.cc b/payload_state.cc
index 2e07ad9..cf3aab9 100644
--- a/payload_state.cc
+++ b/payload_state.cc
@@ -312,6 +312,7 @@
     case ErrorCode::kUnsupportedMinorPayloadVersion:
     case ErrorCode::kPayloadTimestampError:
     case ErrorCode::kVerityCalculationError:
+      ExcludeCurrentPayload();
       IncrementUrlIndex();
       break;
 
@@ -502,10 +503,29 @@
   } else {
     LOG(INFO) << "Reached max number of failures for Url" << GetUrlIndex()
               << ". Trying next available URL";
+    ExcludeCurrentPayload();
     IncrementUrlIndex();
   }
 }
 
+void PayloadState::ExcludeCurrentPayload() {
+  const auto& package = response_.packages[payload_index_];
+  if (!package.can_exclude) {
+    LOG(INFO) << "Not excluding as marked non-excludable for package hash="
+              << package.hash;
+    return;
+  }
+  auto exclusion_name = utils::GetExclusionName(GetCurrentUrl());
+  if (!excluder_->Exclude(exclusion_name))
+    LOG(WARNING) << "Failed to exclude "
+                 << " Package Hash=" << package.hash
+                 << " CurrentUrl=" << GetCurrentUrl();
+  else
+    LOG(INFO) << "Excluded "
+              << " Package Hash=" << package.hash
+              << " CurrentUrl=" << GetCurrentUrl();
+}
+
 void PayloadState::UpdateBackoffExpiryTime() {
   if (response_.disable_payload_backoff) {
     LOG(INFO) << "Resetting backoff expiry time as payload backoff is disabled";
diff --git a/payload_state.h b/payload_state.h
index bc4bf0d..d13c642 100644
--- a/payload_state.h
+++ b/payload_state.h
@@ -158,6 +158,9 @@
   FRIEND_TEST(PayloadStateTest, RollbackVersion);
   FRIEND_TEST(PayloadStateTest, UpdateSuccessWithWipedPrefs);
   FRIEND_TEST(PayloadStateTest, NextPayloadResetsUrlIndex);
+  FRIEND_TEST(PayloadStateTest, ExcludeNoopForNonExcludables);
+  FRIEND_TEST(PayloadStateTest, ExcludeOnlyCanExcludables);
+  FRIEND_TEST(PayloadStateTest, IncrementFailureExclusionTest);
 
   // Helper called when an attempt has begun, is called by
   // UpdateResumed(), UpdateRestarted() and Rollback().
@@ -182,6 +185,12 @@
   // to the next URL and resets the failure count for that URL.
   void IncrementFailureCount();
 
+  // Excludes the current payload + current candidate URL from being part of
+  // future updates/retries. Whenever |SetResponse()| or |NextPayload()| decide
+  // on the initial current URL index and the next payload respectively, it will
+  // advanced based on exclusions.
+  void ExcludeCurrentPayload();
+
   // Updates the backoff expiry time exponentially based on the current
   // payload attempt number.
   void UpdateBackoffExpiryTime();
diff --git a/payload_state_unittest.cc b/payload_state_unittest.cc
index 4a0afcf..bf9aed4 100644
--- a/payload_state_unittest.cc
+++ b/payload_state_unittest.cc
@@ -23,9 +23,11 @@
 #include <gtest/gtest.h>
 
 #include "update_engine/common/constants.h"
+#include "update_engine/common/excluder_interface.h"
 #include "update_engine/common/fake_clock.h"
 #include "update_engine/common/fake_hardware.h"
 #include "update_engine/common/fake_prefs.h"
+#include "update_engine/common/mock_excluder.h"
 #include "update_engine/common/mock_prefs.h"
 #include "update_engine/common/prefs.h"
 #include "update_engine/common/test_utils.h"
@@ -44,6 +46,7 @@
 using testing::NiceMock;
 using testing::Return;
 using testing::SetArgPointee;
+using testing::StrictMock;
 
 namespace chromeos_update_engine {
 
@@ -1012,10 +1015,6 @@
 
   NiceMock<MockPrefs>* mock_powerwash_safe_prefs =
       fake_system_state.mock_powerwash_safe_prefs();
-  EXPECT_TRUE(payload_state.Initialize(&fake_system_state));
-
-  // Verify pre-conditions are good.
-  EXPECT_TRUE(payload_state.GetRollbackVersion().empty());
 
   // Mock out the os version and make sure it's blacklisted correctly.
   string rollback_version = "2345.0.0";
@@ -1023,6 +1022,11 @@
   params.Init(rollback_version, "", false);
   fake_system_state.set_request_params(&params);
 
+  EXPECT_TRUE(payload_state.Initialize(&fake_system_state));
+
+  // Verify pre-conditions are good.
+  EXPECT_TRUE(payload_state.GetRollbackVersion().empty());
+
   EXPECT_CALL(*mock_powerwash_safe_prefs,
               SetString(kPrefsRollbackVersion, rollback_version));
   payload_state.Rollback();
@@ -1353,15 +1357,15 @@
   PayloadState payload_state;
   FakeSystemState fake_system_state;
 
-  EXPECT_TRUE(payload_state.Initialize(&fake_system_state));
-  SetupPayloadStateWith2Urls(
-      "Hash6437", true, false, &payload_state, &response);
-
   // Mock the request to a request where the delta was disabled.
   OmahaRequestParams params(&fake_system_state);
   params.set_delta_okay(false);
   fake_system_state.set_request_params(&params);
 
+  EXPECT_TRUE(payload_state.Initialize(&fake_system_state));
+  SetupPayloadStateWith2Urls(
+      "Hash6437", true, false, &payload_state, &response);
+
   // Simulate a successful download and update.
   payload_state.DownloadComplete();
 
@@ -1658,6 +1662,9 @@
 TEST(PayloadStateTest, NextPayloadResetsUrlIndex) {
   PayloadState payload_state;
   FakeSystemState fake_system_state;
+  StrictMock<MockExcluder> mock_excluder;
+  EXPECT_CALL(*fake_system_state.mock_update_attempter(), GetExcluder())
+      .WillOnce(Return(&mock_excluder));
   EXPECT_TRUE(payload_state.Initialize(&fake_system_state));
 
   OmahaResponse response;
@@ -1682,4 +1689,93 @@
   EXPECT_EQ(payload_state.GetCurrentUrl(), "http://test1b");
 }
 
+TEST(PayloadStateTest, ExcludeNoopForNonExcludables) {
+  PayloadState payload_state;
+  FakeSystemState fake_system_state;
+  StrictMock<MockExcluder> mock_excluder;
+  EXPECT_CALL(*fake_system_state.mock_update_attempter(), GetExcluder())
+      .WillOnce(Return(&mock_excluder));
+  EXPECT_TRUE(payload_state.Initialize(&fake_system_state));
+
+  OmahaResponse response;
+  response.packages.push_back(
+      {.payload_urls = {"http://test1a", "http://test2a"},
+       .size = 123456789,
+       .metadata_size = 58123,
+       .metadata_signature = "msign",
+       .hash = "hash",
+       .can_exclude = false});
+  payload_state.SetResponse(response);
+
+  EXPECT_CALL(mock_excluder, Exclude(_)).Times(0);
+  payload_state.ExcludeCurrentPayload();
+}
+
+TEST(PayloadStateTest, ExcludeOnlyCanExcludables) {
+  PayloadState payload_state;
+  FakeSystemState fake_system_state;
+  StrictMock<MockExcluder> mock_excluder;
+  EXPECT_CALL(*fake_system_state.mock_update_attempter(), GetExcluder())
+      .WillOnce(Return(&mock_excluder));
+  EXPECT_TRUE(payload_state.Initialize(&fake_system_state));
+
+  OmahaResponse response;
+  response.packages.push_back(
+      {.payload_urls = {"http://test1a", "http://test2a"},
+       .size = 123456789,
+       .metadata_size = 58123,
+       .metadata_signature = "msign",
+       .hash = "hash",
+       .can_exclude = true});
+  payload_state.SetResponse(response);
+
+  EXPECT_CALL(mock_excluder, Exclude(utils::GetExclusionName("http://test1a")))
+      .WillOnce(Return(true));
+  payload_state.ExcludeCurrentPayload();
+}
+
+TEST(PayloadStateTest, IncrementFailureExclusionTest) {
+  PayloadState payload_state;
+  FakeSystemState fake_system_state;
+  StrictMock<MockExcluder> mock_excluder;
+  EXPECT_CALL(*fake_system_state.mock_update_attempter(), GetExcluder())
+      .WillOnce(Return(&mock_excluder));
+  EXPECT_TRUE(payload_state.Initialize(&fake_system_state));
+
+  OmahaResponse response;
+  // Critical package.
+  response.packages.push_back(
+      {.payload_urls = {"http://crit-test1a", "http://crit-test2a"},
+       .size = 123456789,
+       .metadata_size = 58123,
+       .metadata_signature = "msign",
+       .hash = "hash",
+       .can_exclude = false});
+  // Non-critical package.
+  response.packages.push_back(
+      {.payload_urls = {"http://test1a", "http://test2a"},
+       .size = 123456789,
+       .metadata_size = 58123,
+       .metadata_signature = "msign",
+       .hash = "hash",
+       .can_exclude = true});
+  response.max_failure_count_per_url = 2;
+  payload_state.SetResponse(response);
+
+  // Critical package won't be excluded.
+  // Increment twice as failure count allowed per URL is set to 2.
+  payload_state.IncrementFailureCount();
+  payload_state.IncrementFailureCount();
+
+  EXPECT_TRUE(payload_state.NextPayload());
+
+  // First increment failure should not exclude.
+  payload_state.IncrementFailureCount();
+
+  // Second increment failure should exclude.
+  EXPECT_CALL(mock_excluder, Exclude(utils::GetExclusionName("http://test1a")))
+      .WillOnce(Return(true));
+  payload_state.IncrementFailureCount();
+}
+
 }  // namespace chromeos_update_engine