Preliminary support for lz4diff + EROFS parsing

Most of the changes in this CL is to propagate compressed file
information from FilesystemInterface to delta_diff_utils.
But delta_diff_util functions already take so many parameters, so
instead of adding two more, we just pass the FilesystemInterface::File
object.

Test: th
Bug: 206729162

Change-Id: Iaf3df2acda294d2d23dd5ac4f4b3d9a708c46393
diff --git a/payload_consumer/bzip_extent_writer_unittest.cc b/payload_consumer/bzip_extent_writer_unittest.cc
index c93545a..9493f74 100644
--- a/payload_consumer/bzip_extent_writer_unittest.cc
+++ b/payload_consumer/bzip_extent_writer_unittest.cc
@@ -28,6 +28,7 @@
 #include "update_engine/common/test_utils.h"
 #include "update_engine/common/utils.h"
 #include "update_engine/payload_generator/extent_ranges.h"
+#include "update_engine/payload_generator/delta_diff_generator.h"
 
 using std::min;
 using std::string;
@@ -35,10 +36,6 @@
 
 namespace chromeos_update_engine {
 
-namespace {
-const uint32_t kBlockSize = 4096;
-}
-
 class BzipExtentWriterTest : public ::testing::Test {
  protected:
   void SetUp() override {
diff --git a/payload_consumer/delta_performer.cc b/payload_consumer/delta_performer.cc
index 2ac783a..19877db 100644
--- a/payload_consumer/delta_performer.cc
+++ b/payload_consumer/delta_performer.cc
@@ -571,6 +571,8 @@
       case InstallOperation::BROTLI_BSDIFF:
       case InstallOperation::PUFFDIFF:
       case InstallOperation::ZUCCHINI:
+      case InstallOperation::LZ4DIFF_PUFFDIFF:
+      case InstallOperation::LZ4DIFF_BSDIFF:
         op_result = PerformDiffOperation(op, error);
         OP_DURATION_HISTOGRAM(op_name, op_start_time);
         break;
diff --git a/payload_consumer/install_operation_executor.cc b/payload_consumer/install_operation_executor.cc
index e95130b..5318cc3 100644
--- a/payload_consumer/install_operation_executor.cc
+++ b/payload_consumer/install_operation_executor.cc
@@ -33,6 +33,8 @@
 #include <zucchini/zucchini.h>
 
 #include "update_engine/common/utils.h"
+#include "update_engine/lz4diff/lz4patch.h"
+#include "update_engine/lz4diff/lz4diff_compress.h"
 #include "update_engine/payload_consumer/bzip_extent_writer.h"
 #include "update_engine/payload_consumer/cached_file_descriptor.h"
 #include "update_engine/payload_consumer/extent_reader.h"
@@ -247,13 +249,33 @@
     case InstallOperation::ZUCCHINI:
       return ExecuteZucchiniOperation(
           operation, std::move(writer), source_fd, data, count);
+    case InstallOperation::LZ4DIFF_BSDIFF:
+    case InstallOperation::LZ4DIFF_PUFFDIFF:
+      return ExecuteLz4diffOperation(
+          operation, std::move(writer), source_fd, data, count);
     default:
       LOG(ERROR) << "Unexpected operation type when executing diff ops "
-                 << operation.type();
+                 << operation.Type_Name(operation.type());
       return false;
   }
 }
 
+bool InstallOperationExecutor::ExecuteLz4diffOperation(
+    const InstallOperation& operation,
+    std::unique_ptr<ExtentWriter> writer,
+    FileDescriptorPtr source_fd,
+    const void* data,
+    size_t count) {
+  brillo::Blob src_data;
+
+  brillo::Blob dst_data;
+  TEST_AND_RETURN_FALSE(utils::ReadExtents(
+      source_fd, operation.src_extents(), &src_data, block_size_));
+  TEST_AND_RETURN_FALSE(
+      Lz4Patch(ToStringView(src_data), ToStringView(data, count), &dst_data));
+  return writer->Write(dst_data.data(), dst_data.size());
+}
+
 bool InstallOperationExecutor::ExecuteSourceBsdiffOperation(
     const InstallOperation& operation,
     std::unique_ptr<ExtentWriter> writer,
diff --git a/payload_consumer/install_operation_executor.h b/payload_consumer/install_operation_executor.h
index f64318a..6c84faf 100644
--- a/payload_consumer/install_operation_executor.h
+++ b/payload_consumer/install_operation_executor.h
@@ -62,6 +62,11 @@
                                 FileDescriptorPtr source_fd,
                                 const void* data,
                                 size_t count);
+  bool ExecuteLz4diffOperation(const InstallOperation& operation,
+                               std::unique_ptr<ExtentWriter> writer,
+                               FileDescriptorPtr source_fd,
+                               const void* data,
+                               size_t count);
 
   size_t block_size_;
 };
diff --git a/payload_generator/deflate_utils.cc b/payload_generator/deflate_utils.cc
index 6791b3d..30e3f67 100644
--- a/payload_generator/deflate_utils.cc
+++ b/payload_generator/deflate_utils.cc
@@ -116,11 +116,20 @@
 
 }  // namespace
 
-bool IsFileExtensions(const string& name,
-                      const std::initializer_list<string>& extensions) {
-  return any_of(extensions.begin(), extensions.end(), [&name](const auto& ext) {
-    return base::EndsWith(name, ext, base::CompareCase::INSENSITIVE_ASCII);
-  });
+constexpr base::StringPiece ToStringPiece(std::string_view s) {
+  return base::StringPiece(s.data(), s.length());
+}
+
+bool IsFileExtensions(
+    const std::string_view name,
+    const std::initializer_list<std::string_view>& extensions) {
+  return any_of(extensions.begin(),
+                extensions.end(),
+                [name = ToStringPiece(name)](const auto& ext) {
+                  return base::EndsWith(name,
+                                        ToStringPiece(ext),
+                                        base::CompareCase::INSENSITIVE_ASCII);
+                });
 }
 
 ByteExtent ExpandToByteExtent(const BitExtent& extent) {
@@ -180,8 +189,8 @@
   // This check is needed to make sure the number of bytes in |over_extents|
   // does not exceed |base_extents|.
   auto last_extent = ExpandToByteExtent(over_extents->back());
-  TEST_AND_RETURN_FALSE(last_extent.offset + last_extent.length <=
-                        utils::BlocksInExtents(base_extents) * kBlockSize);
+  TEST_LE(last_extent.offset + last_extent.length,
+          utils::BlocksInExtents(base_extents) * kBlockSize);
 
   for (auto o_ext = over_extents->begin(); o_ext != over_extents->end();) {
     size_t gap_blocks = base_extents[0].start_block();
@@ -266,6 +275,28 @@
   return true;
 }
 
+bool DeflatePreprocessFileData(const std::string_view filename,
+                               const brillo::Blob& data,
+                               vector<puffin::BitExtent>* deflates) {
+  bool is_zip = IsFileExtensions(
+      filename, {".apk", ".zip", ".jar", ".zvoice", ".apex", "capex"});
+  bool is_gzip = IsFileExtensions(filename, {".gz", ".gzip", ".tgz"});
+  if (is_zip) {
+    if (!puffin::LocateDeflatesInZipArchive(data, deflates)) {
+      LOG(ERROR) << "Failed to locate deflates in zip file " << filename;
+      deflates->clear();
+      return false;
+    }
+  } else if (is_gzip) {
+    if (!puffin::LocateDeflatesInGzip(data, deflates)) {
+      LOG(ERROR) << "Failed to locate deflates in gzip file " << filename;
+      deflates->clear();
+      return false;
+    }
+  }
+  return true;
+}
+
 bool PreprocessPartitionFiles(const PartitionConfig& part,
                               vector<FilesystemInterface::File>* result_files,
                               bool extract_deflates) {
@@ -331,16 +362,10 @@
           data.resize(file.file_stat.st_size);
         }
         vector<puffin::BitExtent> deflates;
-        if (is_zip) {
-          if (!puffin::LocateDeflatesInZipArchive(data, &deflates)) {
-            LOG(ERROR) << "Failed to process deflate in zip " << file.name;
-            return false;
-          }
-        } else if (is_gzip) {
-          if (!puffin::LocateDeflatesInGzip(data, &deflates)) {
-            LOG(ERROR) << "Failed to process deflate in gzip " << file.name;
-            return false;
-          }
+        if (!DeflatePreprocessFileData(file.name, data, &deflates)) {
+          LOG(ERROR) << "Failed to preprocess deflate data in partition "
+                     << part.name;
+          return false;
         }
         // Shift the deflate's extent to the offset starting from the beginning
         // of the current partition; and the delta processor will align the
diff --git a/payload_generator/deflate_utils.h b/payload_generator/deflate_utils.h
index d69a551..517fc4e 100644
--- a/payload_generator/deflate_utils.h
+++ b/payload_generator/deflate_utils.h
@@ -95,8 +95,13 @@
 // Expands a BitExtents to a ByteExtent.
 puffin::ByteExtent ExpandToByteExtent(const puffin::BitExtent& extent);
 
-bool IsFileExtensions(const std::string& name,
-                      const std::initializer_list<std::string>& extensions);
+bool IsFileExtensions(
+    const std::string_view name,
+    const std::initializer_list<std::string_view>& extensions);
+
+bool DeflatePreprocessFileData(const std::string_view filename,
+                               const brillo::Blob& data,
+                               std::vector<puffin::BitExtent>* deflates);
 
 }  // namespace deflate_utils
 }  // namespace chromeos_update_engine
diff --git a/payload_generator/delta_diff_utils.cc b/payload_generator/delta_diff_utils.cc
index b0f3469..92cfe9e 100644
--- a/payload_generator/delta_diff_utils.cc
+++ b/payload_generator/delta_diff_utils.cc
@@ -17,6 +17,7 @@
 #include "update_engine/payload_generator/delta_diff_utils.h"
 
 #include <endian.h>
+#include <sys/user.h>
 #if defined(__clang__)
 // TODO(*): Remove these pragmas when b/35721782 is fixed.
 #pragma clang diagnostic push
@@ -69,6 +70,7 @@
 #include "update_engine/payload_generator/merge_sequence_generator.h"
 #include "update_engine/payload_generator/squashfs_filesystem.h"
 #include "update_engine/payload_generator/xz.h"
+#include "update_engine/lz4diff/lz4diff.h"
 
 using std::list;
 using std::map;
@@ -214,6 +216,25 @@
     brillo::Blob* data_blob) {
   CHECK(aop);
   CHECK(data_blob);
+  if (!old_block_info_.blocks.empty() && !new_block_info_.blocks.empty() &&
+      config_.enable_lz4diff &&
+      config_.version.OperationAllowed(InstallOperation::LZ4DIFF_BSDIFF) &&
+      config_.version.OperationAllowed(InstallOperation::LZ4DIFF_PUFFDIFF)) {
+    brillo::Blob patch;
+    InstallOperation::Type op_type;
+    if (Lz4Diff(old_data_,
+                new_data_,
+                old_block_info_,
+                new_block_info_,
+                &patch,
+                &op_type)) {
+      aop->op.set_type(op_type);
+      // LZ4DIFF is likely significantly better than BSDIFF/PUFFDIFF when
+      // working with EROFS. So no need to even try other diffing algorithms.
+      *data_blob = std::move(patch);
+      return true;
+    }
+  }
 
   const auto& version = config_.version;
   const uint64_t input_bytes = std::max(utils::BlocksInExtents(src_extents_),
@@ -227,8 +248,8 @@
 
     // Disable the specific diff algorithm when the data is too big.
     if (input_bytes > limit) {
-      LOG(INFO) << op_type << " ignored, data too big: " << input_bytes
-                << " bytes";
+      LOG(INFO) << op_type << " ignored, file " << aop->name
+                << " too big: " << input_bytes << " bytes";
       continue;
     }
 
@@ -290,6 +311,7 @@
                             data_blob->size(),
                             bsdiff_delta.size(),
                             src_extents_.size())) {
+    // VABC XOR won't work with compressed files just yet.
     if (config_.enable_vabc_xor) {
       StoreExtents(src_extents_, operation.mutable_src_extents());
       diff_utils::PopulateXorOps(aop, bsdiff_delta);
@@ -381,10 +403,8 @@
   FileDeltaProcessor(const string& old_part,
                      const string& new_part,
                      const PayloadGenerationConfig& config,
-                     const vector<Extent>& old_extents,
-                     const vector<Extent>& new_extents,
-                     const vector<puffin::BitExtent>& old_deflates,
-                     const vector<puffin::BitExtent>& new_deflates,
+                     const File& old_extents,
+                     const File& new_extents,
                      const string& name,
                      ssize_t chunk_blocks,
                      BlobFileWriter* blob_file)
@@ -393,9 +413,7 @@
         config_(config),
         old_extents_(old_extents),
         new_extents_(new_extents),
-        new_extents_blocks_(utils::BlocksInExtents(new_extents)),
-        old_deflates_(old_deflates),
-        new_deflates_(new_deflates),
+        new_extents_blocks_(utils::BlocksInExtents(new_extents.extents)),
         name_(name),
         chunk_blocks_(chunk_blocks),
         blob_file_(blob_file) {}
@@ -420,11 +438,9 @@
   const PayloadGenerationConfig& config_;
 
   // The block ranges of the old/new file within the src/tgt image
-  const vector<Extent> old_extents_;
-  const vector<Extent> new_extents_;
+  const File old_extents_;
+  const File new_extents_;
   const size_t new_extents_blocks_;
-  const vector<puffin::BitExtent> old_deflates_;
-  const vector<puffin::BitExtent> new_deflates_;
   const string name_;
   // Block limit of one aop.
   const ssize_t chunk_blocks_;
@@ -447,9 +463,6 @@
                      new_part_,
                      old_extents_,
                      new_extents_,
-                     old_deflates_,
-                     new_deflates_,
-                     name_,
                      chunk_blocks_,
                      config_,
                      blob_file_)) {
@@ -609,13 +622,13 @@
     // etc.
     // 2. dst extent is completely filtered, no duplicate blocks or zero blocks
     // whatsoever.
+    auto filtered_new_file = new_file;
+    filtered_new_file.extents = RemoveDuplicateBlocks(new_file_extents);
     file_delta_processors.emplace_back(old_part.path,
                                        new_part.path,
                                        config,
-                                       std::move(old_file.extents),
-                                       RemoveDuplicateBlocks(new_file_extents),
-                                       old_file.deflates,
-                                       new_file.deflates,
+                                       std::move(old_file),
+                                       std::move(filtered_new_file),
                                        new_file.name,  // operation name
                                        hard_chunk_blocks,
                                        blob_file);
@@ -638,17 +651,18 @@
     // We use the soft_chunk_blocks limit for the <non-file-data> as we don't
     // really know the structure of this data and we should not expect it to
     // have redundancy between partitions.
-    file_delta_processors.emplace_back(
-        old_part.path,
-        new_part.path,
-        config,
-        std::move(old_unvisited),
-        RemoveDuplicateBlocks(new_unvisited),
-        vector<puffin::BitExtent>{},  // old_deflates,
-        vector<puffin::BitExtent>{},  // new_deflates
-        "<non-file-data>",            // operation name
-        soft_chunk_blocks,
-        blob_file);
+    File old_file;
+    old_file.extents = old_unvisited;
+    File new_file;
+    new_file.extents = RemoveDuplicateBlocks(new_unvisited);
+    file_delta_processors.emplace_back(old_part.path,
+                                       new_part.path,
+                                       config,
+                                       old_file,
+                                       new_file,
+                                       "<non-file-data>",  // operation name
+                                       soft_chunk_blocks,
+                                       blob_file);
   }
 
   size_t max_threads = GetMaxThreads();
@@ -764,14 +778,15 @@
         aops->push_back({.name = "<zeros>", .op = operation});
       }
     } else {
+      File old_file;
+      File new_file;
+      new_file.name = "<zeros>";
+      new_file.extents = {extent};
       TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
                                           "",
                                           new_part,
-                                          {},        // old_extents
-                                          {extent},  // new_extents
-                                          {},        // old_deflates
-                                          {},        // new_deflates
-                                          "<zeros>",
+                                          old_file,  // old_extents
+                                          new_file,  // new_extents
                                           chunk_blocks,
                                           config,
                                           blob_file));
@@ -823,17 +838,18 @@
   return true;
 }
 
-bool DeltaReadFile(vector<AnnotatedOperation>* aops,
-                   const string& old_part,
-                   const string& new_part,
-                   const vector<Extent>& old_extents,
-                   const vector<Extent>& new_extents,
-                   const vector<puffin::BitExtent>& old_deflates,
-                   const vector<puffin::BitExtent>& new_deflates,
-                   const string& name,
+bool DeltaReadFile(std::vector<AnnotatedOperation>* aops,
+                   const std::string& old_part,
+                   const std::string& new_part,
+                   const File& old_file,
+                   const File& new_file,
                    ssize_t chunk_blocks,
                    const PayloadGenerationConfig& config,
                    BlobFileWriter* blob_file) {
+  const auto& old_extents = old_file.extents;
+  const auto& new_extents = new_file.extents;
+  const auto& name = new_file.name;
+
   brillo::Blob data;
 
   uint64_t total_blocks = utils::BlocksInExtents(new_extents);
@@ -860,13 +876,13 @@
 
     // Now, insert into the list of operations.
     AnnotatedOperation aop;
-    aop.name = name;
+    aop.name = new_file.name;
     TEST_AND_RETURN_FALSE(ReadExtentsToDiff(old_part,
                                             new_part,
                                             old_extents_chunk,
                                             new_extents_chunk,
-                                            old_deflates,
-                                            new_deflates,
+                                            old_file,
+                                            new_file,
                                             config,
                                             &data,
                                             &aop));
@@ -1011,10 +1027,10 @@
 
 bool ReadExtentsToDiff(const string& old_part,
                        const string& new_part,
-                       const vector<Extent>& old_extents,
-                       const vector<Extent>& new_extents,
-                       const vector<puffin::BitExtent>& old_deflates,
-                       const vector<puffin::BitExtent>& new_deflates,
+                       const vector<Extent>& src_extents,
+                       const vector<Extent>& dst_extents,
+                       const File& old_file,
+                       const File& new_file,
                        const PayloadGenerationConfig& config,
                        brillo::Blob* out_data,
                        AnnotatedOperation* out_op) {
@@ -1023,18 +1039,16 @@
   InstallOperation& operation = aop.op;
 
   // We read blocks from old_extents and write blocks to new_extents.
-  uint64_t blocks_to_read = utils::BlocksInExtents(old_extents);
-  uint64_t blocks_to_write = utils::BlocksInExtents(new_extents);
+  const uint64_t blocks_to_read = utils::BlocksInExtents(src_extents);
+  const uint64_t blocks_to_write = utils::BlocksInExtents(dst_extents);
 
-  const vector<Extent>& src_extents = old_extents;
-  const vector<Extent>& dst_extents = new_extents;
   // All operations have dst_extents.
   StoreExtents(dst_extents, operation.mutable_dst_extents());
 
   // Read in bytes from new data.
   brillo::Blob new_data;
   TEST_AND_RETURN_FALSE(utils::ReadExtents(new_part,
-                                           new_extents,
+                                           dst_extents,
                                            &new_data,
                                            kBlockSize * blocks_to_write,
                                            kBlockSize));
@@ -1050,8 +1064,8 @@
       GenerateBestFullOperation(new_data, version, &data_blob, &op_type));
   operation.set_type(op_type);
 
-  brillo::Blob old_data;
   if (blocks_to_read > 0) {
+    brillo::Blob old_data;
     // Read old data.
     TEST_AND_RETURN_FALSE(utils::ReadExtents(old_part,
                                              src_extents,
@@ -1067,36 +1081,17 @@
       // No point in trying diff if zero blob size diff operation is
       // still worse than replace.
 
-      // Find all deflate positions inside the given extents and then put all
-      // deflates together because we have already read all the extents into
-      // one buffer.
-      vector<puffin::BitExtent> src_deflates;
-      TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates(
-          src_extents, old_deflates, &src_deflates));
-
-      vector<puffin::BitExtent> dst_deflates;
-      TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates(
-          dst_extents, new_deflates, &dst_deflates));
-
-      puffin::RemoveEqualBitExtents(
-          old_data, new_data, &src_deflates, &dst_deflates);
-      // See crbug.com/915559.
-      if (config.version.minor <= kPuffdiffMinorPayloadVersion) {
-        TEST_AND_RETURN_FALSE(puffin::RemoveDeflatesWithBadDistanceCaches(
-            old_data, &src_deflates));
-
-        TEST_AND_RETURN_FALSE(puffin::RemoveDeflatesWithBadDistanceCaches(
-            new_data, &dst_deflates));
-      }
       BestDiffGenerator best_diff_generator(old_data,
                                             new_data,
                                             src_extents,
                                             dst_extents,
-                                            std::move(src_deflates),
-                                            std::move(dst_deflates),
+                                            old_file,
+                                            new_file,
                                             config);
-      TEST_AND_RETURN_FALSE(
-          best_diff_generator.GenerateBestDiffOperation(&aop, &data_blob));
+      if (!best_diff_generator.GenerateBestDiffOperation(&aop, &data_blob)) {
+        LOG(INFO) << "Failed to generate diff for " << new_file.name;
+        return false;
+      }
     }
   }
 
@@ -1108,8 +1103,8 @@
   // parameters for those minor versions, the delta payloads will be invalid.
   if (operation.type() == InstallOperation::SOURCE_BSDIFF &&
       version.minor <= kOpSrcHashMinorPayloadVersion) {
-    operation.set_src_length(old_data.size());
-    operation.set_dst_length(new_data.size());
+    operation.set_src_length(blocks_to_read * kBlockSize);
+    operation.set_dst_length(blocks_to_write * kBlockSize);
   }
 
   // Embed extents in the operation. Replace (all variants), zero and discard
diff --git a/payload_generator/delta_diff_utils.h b/payload_generator/delta_diff_utils.h
index 8d1dca6..1fd1f46 100644
--- a/payload_generator/delta_diff_utils.h
+++ b/payload_generator/delta_diff_utils.h
@@ -14,8 +14,8 @@
 // limitations under the License.
 //
 
-#ifndef UPDATE_ENGINE_PAYLOAD_GENERATOR_DELTA_DIFF_UTILS_H_
-#define UPDATE_ENGINE_PAYLOAD_GENERATOR_DELTA_DIFF_UTILS_H_
+#ifndef PAYLOAD_GENERATOR_DELTA_DIFF_UTILS_H_
+#define PAYLOAD_GENERATOR_DELTA_DIFF_UTILS_H_
 
 #include <map>
 #include <string>
@@ -25,6 +25,7 @@
 #include <brillo/secure_blob.h>
 #include <puffin/puffdiff.h>
 
+#include "payload_generator/deflate_utils.h"
 #include "update_engine/payload_generator/annotated_operation.h"
 #include "update_engine/payload_generator/extent_ranges.h"
 #include "update_engine/payload_generator/payload_generation_config.h"
@@ -33,6 +34,7 @@
 namespace chromeos_update_engine {
 
 namespace diff_utils {
+using File = FilesystemInterface::File;
 
 // Create operations in |aops| to produce all the blocks in the |new_part|
 // partition using the filesystem opened in that PartitionConfig.
@@ -85,11 +87,8 @@
 bool DeltaReadFile(std::vector<AnnotatedOperation>* aops,
                    const std::string& old_part,
                    const std::string& new_part,
-                   const std::vector<Extent>& old_extents,
-                   const std::vector<Extent>& new_extents,
-                   const std::vector<puffin::BitExtent>& old_deflates,
-                   const std::vector<puffin::BitExtent>& new_deflates,
-                   const std::string& name,
+                   const File& old_file,
+                   const File& new_file,
                    ssize_t chunk_blocks,
                    const PayloadGenerationConfig& config,
                    BlobFileWriter* blob_file);
@@ -108,8 +107,8 @@
                        const std::string& new_part,
                        const std::vector<Extent>& old_extents,
                        const std::vector<Extent>& new_extents,
-                       const std::vector<puffin::BitExtent>& old_deflates,
-                       const std::vector<puffin::BitExtent>& new_deflates,
+                       const File& old_file,
+                       const File& new_file,
                        const PayloadGenerationConfig& config,
                        brillo::Blob* out_data,
                        AnnotatedOperation* out_op);
@@ -162,22 +161,49 @@
 
 // A utility class that tries different algorithms and pick the patch with the
 // smallest size.
+
 class BestDiffGenerator {
  public:
   BestDiffGenerator(const brillo::Blob& old_data,
                     const brillo::Blob& new_data,
                     const std::vector<Extent>& src_extents,
                     const std::vector<Extent>& dst_extents,
-                    const std::vector<puffin::BitExtent>& old_deflates,
-                    const std::vector<puffin::BitExtent>& new_deflates,
+                    const File& old_file,
+                    const File& new_file,
                     const PayloadGenerationConfig& config)
       : old_data_(old_data),
         new_data_(new_data),
         src_extents_(src_extents),
         dst_extents_(dst_extents),
-        old_deflates_(old_deflates),
-        new_deflates_(new_deflates),
-        config_(config) {}
+        old_deflates_(old_file.deflates),
+        new_deflates_(new_file.deflates),
+        old_block_info_(old_file.compressed_file_info),
+        new_block_info_(new_file.compressed_file_info),
+        config_(config) {
+    using std::vector;
+    // Find all deflate positions inside the given extents and then put all
+    // deflates together because we have already read all the extents into
+    // one buffer.
+    vector<puffin::BitExtent> src_deflates;
+    TEST_AND_RETURN(deflate_utils::FindAndCompactDeflates(
+        src_extents_, old_deflates_, &src_deflates));
+
+    vector<puffin::BitExtent> dst_deflates;
+    TEST_AND_RETURN(deflate_utils::FindAndCompactDeflates(
+        dst_extents_, new_deflates_, &dst_deflates));
+    puffin::RemoveEqualBitExtents(
+        old_data_, new_data_, &src_deflates, &dst_deflates);
+    // See crbug.com/915559.
+    if (config.version.minor <= kPuffdiffMinorPayloadVersion) {
+      CHECK(
+          puffin::RemoveDeflatesWithBadDistanceCaches(old_data, &src_deflates));
+
+      CHECK(
+          puffin::RemoveDeflatesWithBadDistanceCaches(new_data, &dst_deflates));
+    }
+    old_deflates_ = std::move(src_deflates);
+    new_deflates_ = std::move(dst_deflates);
+  }
 
   // Tries different algorithms and compares their patch sizes with the
   // compressed full operation data in |data_blob|. If the size is smaller,
@@ -201,12 +227,14 @@
   bool TryZucchiniAndUpdateOperation(AnnotatedOperation* aop,
                                      brillo::Blob* data_blob);
 
-  const brillo::Blob& old_data_;
-  const brillo::Blob& new_data_;
+  brillo::Blob old_data_;
+  brillo::Blob new_data_;
   const std::vector<Extent>& src_extents_;
   const std::vector<Extent>& dst_extents_;
-  const std::vector<puffin::BitExtent>& old_deflates_;
-  const std::vector<puffin::BitExtent>& new_deflates_;
+  std::vector<puffin::BitExtent> old_deflates_;
+  std::vector<puffin::BitExtent> new_deflates_;
+  const CompressedFile& old_block_info_;
+  const CompressedFile& new_block_info_;
   const PayloadGenerationConfig& config_;
 };
 
@@ -214,4 +242,4 @@
 
 }  // namespace chromeos_update_engine
 
-#endif  // UPDATE_ENGINE_PAYLOAD_GENERATOR_DELTA_DIFF_UTILS_H_
+#endif  // PAYLOAD_GENERATOR_DELTA_DIFF_UTILS_H_
diff --git a/payload_generator/delta_diff_utils_unittest.cc b/payload_generator/delta_diff_utils_unittest.cc
index 489081b..7348d74 100644
--- a/payload_generator/delta_diff_utils_unittest.cc
+++ b/payload_generator/delta_diff_utils_unittest.cc
@@ -27,6 +27,7 @@
 #include <bsdiff/patch_writer.h>
 #include <gtest/gtest.h>
 
+#include "payload_generator/filesystem_interface.h"
 #include "update_engine/common/test_utils.h"
 #include "update_engine/common/utils.h"
 #include "update_engine/payload_generator/delta_diff_generator.h"
@@ -215,8 +216,8 @@
         new_part_.path,
         old_extents,
         new_extents,
-        {},  // old_deflates
-        {},  // new_deflates
+        {},  // old_file
+        {},  // new_file
         {.version = PayloadVersion(kBrilloMajorPayloadVersion,
                                    kSourceMinorPayloadVersion)},
         &data,
@@ -329,16 +330,15 @@
   PayloadGenerationConfig config{
       .version = PayloadVersion(kBrilloMajorPayloadVersion,
                                 kBrotliBsdiffMinorPayloadVersion)};
-  ASSERT_TRUE(diff_utils::ReadExtentsToDiff(
-      old_part_.path,
-      new_part_.path,
-      old_extents,
-      new_extents,
-      empty,  // old_deflates
-      empty,  // new_deflates
-      config,
-      &data,
-      &aop));
+  ASSERT_TRUE(diff_utils::ReadExtentsToDiff(old_part_.path,
+                                            new_part_.path,
+                                            old_extents,
+                                            new_extents,
+                                            {},  // old_file
+                                            {},  // new_file
+                                            config,
+                                            &data,
+                                            &aop));
   auto& op = aop.op;
   ASSERT_FALSE(data.empty());
   ASSERT_TRUE(op.has_type());
@@ -369,18 +369,17 @@
   // Zucchini is only enabled on files with certain extensions
   aop.name = "data.so";
 
-  std::vector<puffin::BitExtent> empty;
+  const FilesystemInterface::File empty;
   PayloadGenerationConfig config{
       .version = PayloadVersion(kBrilloMajorPayloadVersion,
                                 kZucchiniMinorPayloadVersion)};
-  diff_utils::BestDiffGenerator best_diff_generator(
-      src_data_blob,
-      dst_data_blob,
-      old_extents,
-      new_extents,
-      empty,
-      empty,
-      config);
+  diff_utils::BestDiffGenerator best_diff_generator(src_data_blob,
+                                                    dst_data_blob,
+                                                    old_extents,
+                                                    new_extents,
+                                                    empty,
+                                                    empty,
+                                                    config);
   ASSERT_TRUE(best_diff_generator.GenerateBestDiffOperation(
       {{InstallOperation::ZUCCHINI, 1024 * 1024}}, &aop, &data));
 
@@ -414,18 +413,17 @@
   AnnotatedOperation aop;
   aop.op.set_type(InstallOperation::REPLACE_XZ);
 
-  std::vector<puffin::BitExtent> empty;
+  const FilesystemInterface::File empty;
   PayloadGenerationConfig config{
       .version = PayloadVersion(kBrilloMajorPayloadVersion,
                                 kZucchiniMinorPayloadVersion)};
-  diff_utils::BestDiffGenerator best_diff_generator(
-      src_data_blob,
-      dst_data_blob,
-      old_extents,
-      new_extents,
-      empty,
-      empty,
-      config);
+  diff_utils::BestDiffGenerator best_diff_generator(src_data_blob,
+                                                    dst_data_blob,
+                                                    old_extents,
+                                                    new_extents,
+                                                    empty,
+                                                    empty,
+                                                    config);
   ASSERT_TRUE(best_diff_generator.GenerateBestDiffOperation(
       {{InstallOperation::ZUCCHINI, 1024 * 1024}}, &aop, &data));
 
@@ -450,20 +448,19 @@
   brillo::Blob data;
   AnnotatedOperation aop;
 
-  std::vector<puffin::BitExtent> empty;
+  const FilesystemInterface::File empty;
   PayloadGenerationConfig config{
       .version = PayloadVersion(kMaxSupportedMajorPayloadVersion,
                                 kMaxSupportedMinorPayloadVersion)};
-  ASSERT_TRUE(diff_utils::ReadExtentsToDiff(
-      old_part_.path,
-      new_part_.path,
-      extents,
-      extents,
-      empty,  // old_deflates
-      empty,  // new_deflates
-      config,
-      &data,
-      &aop));
+  ASSERT_TRUE(diff_utils::ReadExtentsToDiff(old_part_.path,
+                                            new_part_.path,
+                                            extents,
+                                            extents,
+                                            empty,  // old_file
+                                            empty,  // new_file
+                                            config,
+                                            &data,
+                                            &aop));
   auto& op = aop.op;
   ASSERT_FALSE(data.empty());
   ASSERT_TRUE(op.has_type());
diff --git a/payload_generator/filesystem_interface.h b/payload_generator/filesystem_interface.h
index 2d2846d..0c2f05e 100644
--- a/payload_generator/filesystem_interface.h
+++ b/payload_generator/filesystem_interface.h
@@ -35,6 +35,7 @@
 #include <puffin/utils.h>
 
 #include "update_engine/lz4diff/lz4diff_format.h"
+#include "update_engine/lz4diff/lz4diff.h"
 #include "update_engine/update_metadata.pb.h"
 
 namespace chromeos_update_engine {
@@ -44,12 +45,13 @@
   // This represents a file or pseudo-file in the filesystem. It can include
   // all sort of files, like symlinks, hardlinks, directories and even a file
   // entry representing the metadata, free space, journaling data, etc.
+
   struct File {
     File() { memset(&file_stat, 0, sizeof(file_stat)); }
 
     // The stat struct for the file. This is invalid (inode 0) for some
     // pseudo-files.
-    struct stat file_stat;
+    struct stat file_stat = {};
 
     // The absolute path to the file inside the filesystem, for example,
     // "/usr/bin/bash". For pseudo-files, like blocks associated to internal
diff --git a/payload_generator/generate_delta_main.cc b/payload_generator/generate_delta_main.cc
index 4ede12b..1ca5b9d 100644
--- a/payload_generator/generate_delta_main.cc
+++ b/payload_generator/generate_delta_main.cc
@@ -434,6 +434,10 @@
                 "bz2:brotli",
                 "Colon ':' separated list of compressors. Allowed valures are "
                 "bz2 and brotli.");
+  DEFINE_bool(
+      enable_lz4diff,
+      false,
+      "Whether to enable LZ4diff feature when processing EROFS images.");
 
   brillo::FlagHelper::Init(
       argc,
@@ -551,6 +555,7 @@
   }
 
   payload_config.enable_vabc_xor = FLAGS_enable_vabc_xor;
+  payload_config.enable_lz4diff = FLAGS_enable_lz4diff;
   payload_config.ParseCompressorTypes(FLAGS_compressor_types);
 
   if (!FLAGS_new_partitions.empty()) {
diff --git a/payload_generator/payload_generation_config.h b/payload_generator/payload_generation_config.h
index 36ec676..b3f3c74 100644
--- a/payload_generator/payload_generation_config.h
+++ b/payload_generator/payload_generation_config.h
@@ -240,6 +240,9 @@
   // Whether to enable VABC xor op
   bool enable_vabc_xor = false;
 
+  // Whether to enable LZ4diff ops
+  bool enable_lz4diff = false;
+
   std::vector<bsdiff::CompressorType> compressors{
       bsdiff::CompressorType::kBZ2, bsdiff::CompressorType::kBrotli};
 };