update_engine: Introduce FilesystemInterface abstraction.

The interaction with the filesystem in the payload generation process
is hard-coded in several places, making it hard to mock out or use a
different filesystem like squashfs for delta generation. For example,
the metadata, regular file data and non-file data are handled by three
different functions in a similar way, but with different code.

This patch introcudes a filesystem abstraction to map files or
pseudo-files (like the metadata, free-space, etc) into the same interface.
The interface includes three implementations: for parsing ext2 filesystems
using ext2fs (already used by the metadata parsing but not by the file
data processing), a raw one for monolitic partitions like the kernel
and a fake one used for testing without requiring to build/parse a real
ext2 filesystem.

BUG=chromium:331965
TEST=FEATURES=test emerge-link update_engine

Change-Id: I1e14cf8f3883c8e9a1d471c8193c8da60776aa7c
Reviewed-on: https://chromium-review.googlesource.com/275803
Reviewed-by: Don Garrett <[email protected]>
Tested-by: Alex Deymo <[email protected]>
Commit-Queue: Alex Deymo <[email protected]>
diff --git a/generate_image.gypi b/generate_image.gypi
new file mode 100644
index 0000000..025e925
--- /dev/null
+++ b/generate_image.gypi
@@ -0,0 +1,27 @@
+{
+  'variables': {
+    'out_dir': '<(SHARED_INTERMEDIATE_DIR)/<(image_out_dir)',
+    'generator': 'sample_images/generate_image.sh',
+  },
+  'rules': [
+    {
+      'rule_name': 'generate_image',
+      'extension': 'txt',
+      'inputs': [
+        '<(generator)',
+        '<(RULE_INPUT_PATH)',
+      ],
+      'outputs': [
+        '<(out_dir)/<(RULE_INPUT_ROOT).img',
+      ],
+      'action': [
+        '<(generator)',
+        '<(RULE_INPUT_PATH)',
+        '<(out_dir)',
+      ],
+      'msvs_cygwin_shell': 0,
+      'message': 'Generating image from <(RULE_INPUT_PATH)',
+      'process_outputs_as_sources': 1,
+    },
+  ],
+}
diff --git a/payload_generator/ext2_filesystem.cc b/payload_generator/ext2_filesystem.cc
new file mode 100644
index 0000000..d3e0930
--- /dev/null
+++ b/payload_generator/ext2_filesystem.cc
@@ -0,0 +1,306 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "update_engine/payload_generator/ext2_filesystem.h"
+
+#include <et/com_err.h>
+#include <ext2fs/ext2_io.h>
+#include <ext2fs/ext2fs.h>
+
+#include <map>
+#include <set>
+
+#include <base/logging.h>
+#include <base/strings/stringprintf.h>
+
+#include "update_engine/extent_ranges.h"
+#include "update_engine/payload_generator/extent_utils.h"
+#include "update_engine/update_metadata.pb.h"
+#include "update_engine/utils.h"
+
+using std::set;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+namespace chromeos_update_engine {
+
+namespace {
+// Processes all blocks belonging to an inode and adds them to the extent list.
+// This function should match the prototype expected by ext2fs_block_iterate2().
+int ProcessInodeAllBlocks(ext2_filsys fs,
+                          blk_t* blocknr,
+                          e2_blkcnt_t blockcnt,
+                          blk_t ref_blk,
+                          int ref_offset,
+                          void* priv) {
+  vector<Extent>* extents = static_cast<vector<Extent>*>(priv);
+  AppendBlockToExtents(extents, *blocknr);
+  return 0;
+}
+
+// Processes only indirect, double indirect or triple indirect metadata
+// blocks belonging to an inode. This function should match the prototype of
+// ext2fs_block_iterate2().
+int AddMetadataBlocks(ext2_filsys fs,
+                      blk_t* blocknr,
+                      e2_blkcnt_t blockcnt,
+                      blk_t ref_blk,
+                      int ref_offset,
+                      void* priv) {
+  set<uint64_t>* blocks = static_cast<set<uint64_t>*>(priv);
+  // If |blockcnt| is non-negative, |blocknr| points to the physical block
+  // number.
+  // If |blockcnt| is negative, it is one of the values: BLOCK_COUNT_IND,
+  // BLOCK_COUNT_DIND, BLOCK_COUNT_TIND or BLOCK_COUNT_TRANSLATOR and
+  // |blocknr| points to a block in the first three cases. The last case is
+  // only used by GNU Hurd, so we shouldn't see those cases here.
+  if (blockcnt == BLOCK_COUNT_IND || blockcnt == BLOCK_COUNT_DIND ||
+      blockcnt == BLOCK_COUNT_TIND) {
+    blocks->insert(*blocknr);
+  }
+  return 0;
+}
+
+struct UpdateFileAndAppendState {
+  std::map<ext2_ino_t, FilesystemInterface::File>* inodes = nullptr;
+  set<ext2_ino_t>* used_inodes = nullptr;
+  vector<FilesystemInterface::File>* files = nullptr;
+  ext2_filsys filsys;
+};
+
+int UpdateFileAndAppend(ext2_ino_t dir,
+                        int entry,
+                        struct ext2_dir_entry *dirent,
+                        int offset,
+                        int blocksize,
+                        char *buf,
+                        void *priv_data) {
+  UpdateFileAndAppendState* state =
+      static_cast<UpdateFileAndAppendState*>(priv_data);
+  uint32_t file_type = dirent->name_len >> 8;
+  // Directories can't have hard links, and they are added from the outer loop.
+  if (file_type == EXT2_FT_DIR)
+    return 0;
+
+  auto ino_file = state->inodes->find(dirent->inode);
+  if (ino_file == state->inodes->end())
+    return 0;
+  auto dir_file = state->inodes->find(dir);
+  if (dir_file == state->inodes->end())
+    return 0;
+  string basename(dirent->name, dirent->name_len & 0xff);
+  ino_file->second.name = dir_file->second.name;
+  if (dir_file->second.name != "/")
+    ino_file->second.name += "/";
+  ino_file->second.name += basename;
+
+  // Append this file to the output. If the file has a hard link, it will be
+  // added twice to the output, but with different names, which is ok. That will
+  // help identify all the versions of the same file.
+  state->files->push_back(ino_file->second);
+  state->used_inodes->insert(dirent->inode);
+  return 0;
+}
+
+}  // namespace
+
+unique_ptr<Ext2Filesystem> Ext2Filesystem::CreateFromFile(
+    const string& filename) {
+  if (filename.empty())
+    return nullptr;
+  unique_ptr<Ext2Filesystem> result(new Ext2Filesystem());
+
+  errcode_t err = ext2fs_open(filename.c_str(),
+                              0,  // flags (read only)
+                              0,  // superblock block number
+                              0,  // block_size (autodetect)
+                              unix_io_manager,
+                              &result->filsys_);
+  if (err) {
+    LOG(ERROR) << "Opening ext2fs " << filename;
+    return nullptr;
+  }
+  return result;
+}
+
+Ext2Filesystem::~Ext2Filesystem() {
+  ext2fs_free(filsys_);
+}
+
+size_t Ext2Filesystem::GetBlockSize() const {
+  return filsys_->blocksize;
+}
+
+size_t Ext2Filesystem::GetBlockCount() const {
+  return ext2fs_blocks_count(filsys_->super);
+}
+
+bool Ext2Filesystem::GetFiles(vector<File>* files) const {
+  TEST_AND_RETURN_FALSE_ERRCODE(ext2fs_read_inode_bitmap(filsys_));
+
+  ext2_inode_scan iscan;
+  TEST_AND_RETURN_FALSE_ERRCODE(
+      ext2fs_open_inode_scan(filsys_, 0 /* buffer_blocks */, &iscan));
+
+  std::map<ext2_ino_t, File> inodes;
+
+  // List of directories. We need to first parse all the files in a directory
+  // to later fix the absolute paths.
+  vector<ext2_ino_t> directories;
+
+  set<uint64_t> inode_blocks;
+
+  // Iterator
+  ext2_ino_t it_ino;
+  ext2_inode it_inode;
+
+  bool ok = true;
+  while (true) {
+    errcode_t error = ext2fs_get_next_inode(iscan, &it_ino, &it_inode);
+    if (error) {
+      LOG(ERROR) << "Failed to retrieve next inode (" << error << ")";
+      ok = false;
+      break;
+    }
+    if (it_ino == 0)
+      break;
+
+    // Skip inodes that are not in use.
+    if (!ext2fs_test_inode_bitmap(filsys_->inode_map, it_ino))
+      continue;
+
+    File& file = inodes[it_ino];
+    if (it_ino == EXT2_RESIZE_INO) {
+      file.name = "<group-descriptors>";
+    } else {
+      file.name = base::StringPrintf("<inode-%u>", it_ino);
+    }
+
+    memset(&file.file_stat, 0, sizeof(file.file_stat));
+    file.file_stat.st_ino = it_ino;
+    file.file_stat.st_mode = it_inode.i_mode;
+    file.file_stat.st_nlink = it_inode.i_links_count;
+    file.file_stat.st_uid = it_inode.i_uid;
+    file.file_stat.st_gid = it_inode.i_gid;
+    file.file_stat.st_size = it_inode.i_size;
+    file.file_stat.st_blksize = filsys_->blocksize;
+    file.file_stat.st_blocks = it_inode.i_blocks;
+    file.file_stat.st_atime = it_inode.i_atime;
+    file.file_stat.st_mtime = it_inode.i_mtime;
+    file.file_stat.st_ctime = it_inode.i_ctime;
+
+    bool is_dir = (ext2fs_check_directory(filsys_, it_ino) == 0);
+    if (is_dir)
+      directories.push_back(it_ino);
+
+    if (!ext2fs_inode_has_valid_blocks(&it_inode))
+      continue;
+
+    // Process the inode data and metadata blocks.
+    // For normal files, inode blocks are indirect, double indirect
+    // and triple indirect blocks (no data blocks). For directories and
+    // the journal, all blocks are considered metadata blocks.
+    int flags = it_ino < EXT2_GOOD_OLD_FIRST_INO ? 0 : BLOCK_FLAG_DATA_ONLY;
+    error = ext2fs_block_iterate2(filsys_, it_ino, flags,
+                                  nullptr,  // block_buf
+                                  ProcessInodeAllBlocks,
+                                  &file.extents);
+
+    if (error) {
+      LOG(ERROR) << "Failed to enumerate inode " << it_ino
+                << " blocks (" << error << ")";
+      continue;
+    }
+    if (it_ino >= EXT2_GOOD_OLD_FIRST_INO) {
+      ext2fs_block_iterate2(filsys_, it_ino, 0, nullptr,
+                            AddMetadataBlocks,
+                            &inode_blocks);
+    }
+  }
+  ext2fs_close_inode_scan(iscan);
+  if (!ok)
+    return false;
+
+  // The set of inodes already added to the output. There can be less elements
+  // here than in files since the later can contain repeated inodes due to
+  // hardlink files.
+  set<ext2_ino_t> used_inodes;
+
+  UpdateFileAndAppendState priv_data;
+  priv_data.inodes = &inodes;
+  priv_data.used_inodes = &used_inodes;
+  priv_data.files = files;
+  priv_data.filsys = filsys_;
+
+  files->clear();
+  // Iterate over all the files of each directory to update the name and add it.
+  for (ext2_ino_t dir_ino : directories) {
+    char* dir_name = nullptr;
+    errcode_t error = ext2fs_get_pathname(filsys_, dir_ino, 0, &dir_name);
+    if (error) {
+      // Not being able to read a directory name is not a fatal error, it is
+      // just skiped.
+      LOG(WARNING) << "Reading directory name on inode " << dir_ino
+                   << " (error " << error << ")";
+      inodes[dir_ino].name = base::StringPrintf("<dir-%u>", dir_ino);
+    } else {
+      inodes[dir_ino].name = dir_name;
+      files->push_back(inodes[dir_ino]);
+      used_inodes.insert(dir_ino);
+    }
+
+    error = ext2fs_dir_iterate2(
+        filsys_, dir_ino, 0, nullptr /* block_buf */,
+        UpdateFileAndAppend, &priv_data);
+    if (error) {
+      LOG(WARNING) << "Failed to enumerate files in directory "
+                   << inodes[dir_ino].name << " (error " << error << ")";
+    }
+  }
+
+  // Add <inode-blocks> file with the blocks that hold inodes.
+  File inode_file;
+  inode_file.name = "<inode-blocks>";
+  for (uint64_t block : inode_blocks) {
+    AppendBlockToExtents(&inode_file.extents, block);
+  }
+  files->push_back(inode_file);
+
+  // Add <free-spacce> blocs.
+  errcode_t error = ext2fs_read_block_bitmap(filsys_);
+  if (error) {
+    LOG(ERROR) << "Reading the blocks bitmap (error " << error << ")";
+  } else {
+    File free_space;
+    free_space.name = "<free-space>";
+    blk64_t blk_start = ext2fs_get_block_bitmap_start2(filsys_->block_map);
+    blk64_t blk_end = ext2fs_get_block_bitmap_end2(filsys_->block_map);
+    for (blk64_t block = blk_start; block < blk_end; block++) {
+      if (!ext2fs_test_block_bitmap2(filsys_->block_map, block))
+        AppendBlockToExtents(&free_space.extents, block);
+    }
+    files->push_back(free_space);
+  }
+
+  // Add all the unreachable files plus the pseudo-files with an inode. Since
+  // these inodes aren't files in the filesystem, ignore the empty ones.
+  for (const auto& ino_file : inodes) {
+    if (used_inodes.find(ino_file.first) != used_inodes.end())
+      continue;
+    if (ino_file.second.extents.empty())
+      continue;
+
+    File file = ino_file.second;
+    ExtentRanges ranges;
+    ranges.AddExtents(file.extents);
+    file.extents = ranges.GetExtentsForBlockCount(ranges.blocks());
+
+    files->push_back(file);
+  }
+
+  return true;
+}
+
+}  // namespace chromeos_update_engine
diff --git a/payload_generator/ext2_filesystem.h b/payload_generator/ext2_filesystem.h
new file mode 100644
index 0000000..c87849c
--- /dev/null
+++ b/payload_generator/ext2_filesystem.h
@@ -0,0 +1,54 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef UPDATE_ENGINE_PAYLOAD_GENERATOR_EXT2_FILESYSTEM_H_
+#define UPDATE_ENGINE_PAYLOAD_GENERATOR_EXT2_FILESYSTEM_H_
+
+#include "update_engine/payload_generator/filesystem_interface.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <ext2fs/ext2fs.h>
+
+namespace chromeos_update_engine {
+
+class Ext2Filesystem : public FilesystemInterface {
+ public:
+  // Creates an Ext2Filesystem from a ext2 formatted filesystem stored in a
+  // file. The file doesn't need to be loop-back mounted.
+  static std::unique_ptr<Ext2Filesystem> CreateFromFile(
+      const std::string& filename);
+  virtual ~Ext2Filesystem();
+
+  // FilesystemInterface overrides.
+  size_t GetBlockSize() const override;
+  size_t GetBlockCount() const override;
+
+  // GetFiles will return one FilesystemInterface::File for every file and every
+  // directory in the filesystem. Hard-linked files will appear in the list
+  // several times with the same list of blocks.
+  // On addition to actual files, it also returns these pseudo-files:
+  //  <free-space>: With all the unallocated data-blocks.
+  //  <inode-blocks>: Will all the data-blocks for second and third level inodes
+  //    of all the files.
+  //  <group-descriptors>: With the block group descriptor and their reserved
+  //    space.
+  //  <metadata>: With the rest of ext2 metadata blocks, such as superblocks
+  //    and bitmap tables.
+  bool GetFiles(std::vector<File>* files) const override;
+
+ private:
+  Ext2Filesystem() = default;
+
+  // The ext2 main data structure holding the filesystem.
+  ext2_filsys filsys_ = nullptr;
+
+  DISALLOW_COPY_AND_ASSIGN(Ext2Filesystem);
+};
+
+}  // namespace chromeos_update_engine
+
+#endif  // UPDATE_ENGINE_PAYLOAD_GENERATOR_EXT2_FILESYSTEM_H_
diff --git a/payload_generator/ext2_filesystem_unittest.cc b/payload_generator/ext2_filesystem_unittest.cc
new file mode 100644
index 0000000..122ba02
--- /dev/null
+++ b/payload_generator/ext2_filesystem_unittest.cc
@@ -0,0 +1,180 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "update_engine/payload_generator/ext2_filesystem.h"
+
+#include <unistd.h>
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <base/format_macros.h>
+#include <base/logging.h>
+#include <base/strings/stringprintf.h>
+#include <base/strings/string_number_conversions.h>
+#include <base/strings/string_util.h>
+#include <gtest/gtest.h>
+
+#include "update_engine/payload_generator/extent_utils.h"
+#include "update_engine/test_utils.h"
+#include "update_engine/utils.h"
+
+using chromeos_update_engine::test_utils::System;
+using std::map;
+using std::set;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+namespace chromeos_update_engine {
+
+namespace {
+
+uint64_t kDefaultFilesystemSize = 4 * 1024 * 1024;
+size_t kDefaultFilesystemBlockCount = 1024;
+size_t kDefaultFilesystemBlockSize = 4096;
+
+// Checks that all the blocks in |extents| are in the range [0, total_blocks).
+void ExpectBlocksInRange(const vector<Extent>& extents, uint64_t total_blocks) {
+  for (const Extent& extent : extents) {
+    EXPECT_LE(0, extent.start_block());
+    EXPECT_LE(extent.start_block() + extent.num_blocks(), total_blocks);
+  }
+}
+
+}  // namespace
+
+
+class Ext2FilesystemTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    ASSERT_TRUE(utils::MakeTempFile("Ext2FilesystemTest-XXXXXX",
+                                    &fs_filename_, nullptr));
+    ASSERT_EQ(0, truncate(fs_filename_.c_str(), kDefaultFilesystemSize));
+  }
+
+  void TearDown() override {
+    unlink(fs_filename_.c_str());
+  }
+
+  string fs_filename_;
+};
+
+TEST_F(Ext2FilesystemTest, InvalidFilesystem) {
+  unique_ptr<Ext2Filesystem> fs = Ext2Filesystem::CreateFromFile(fs_filename_);
+  ASSERT_EQ(nullptr, fs.get());
+
+  fs = Ext2Filesystem::CreateFromFile("/path/to/invalid/file");
+  ASSERT_EQ(nullptr, fs.get());
+}
+
+TEST_F(Ext2FilesystemTest, EmptyFilesystem) {
+  EXPECT_EQ(0, System(base::StringPrintf(
+      "/sbin/mkfs.ext2 -q -b %" PRIuS " -F %s",
+      kDefaultFilesystemBlockSize, fs_filename_.c_str())));
+  unique_ptr<Ext2Filesystem> fs = Ext2Filesystem::CreateFromFile(fs_filename_);
+
+  ASSERT_NE(nullptr, fs.get());
+  EXPECT_EQ(kDefaultFilesystemBlockCount, fs->GetBlockCount());
+  EXPECT_EQ(kDefaultFilesystemBlockSize, fs->GetBlockSize());
+
+  vector<FilesystemInterface::File> files;
+  EXPECT_TRUE(fs->GetFiles(&files));
+
+  map<string, FilesystemInterface::File> map_files;
+  for (const auto& file : files) {
+    EXPECT_EQ(map_files.end(), map_files.find(file.name))
+        << "File " << file.name << " repeated in the list.";
+    map_files[file.name] = file;
+    ExpectBlocksInRange(file.extents, fs->GetBlockCount());
+  }
+  EXPECT_EQ(2, map_files["/"].file_stat.st_ino);
+  EXPECT_FALSE(map_files["<free-space>"].extents.empty());
+}
+
+// This test parses the sample images generated during build time with the
+// "generate_image.sh" script. The expected conditions of each file in these
+// images is encoded in the file name, as defined in the mentioned script.
+TEST_F(Ext2FilesystemTest, ParseGeneratedImages) {
+  const vector<string> kGeneratedImages = {
+      "disk_ext2_1k.img",
+      "disk_ext2_4k.img" };
+  base::FilePath build_path = test_utils::GetBuildArtifactsPath().Append("gen");
+  for (const string& fs_name : kGeneratedImages) {
+    LOG(INFO) << "Testing " << fs_name;
+    unique_ptr<Ext2Filesystem> fs = Ext2Filesystem::CreateFromFile(
+        build_path.Append(fs_name).value());
+    ASSERT_NE(nullptr, fs.get());
+
+    vector<FilesystemInterface::File> files;
+    map<string, FilesystemInterface::File> map_files;
+    set<string> filenames;
+    EXPECT_TRUE(fs->GetFiles(&files));
+    for (const auto& file : files) {
+      // Check no repeated files. We should parse hard-links with two different
+      // names.
+      EXPECT_EQ(map_files.end(), map_files.find(file.name))
+          << "File " << file.name << " repeated in the list.";
+      map_files[file.name] = file;
+      filenames.insert(file.name);
+      ExpectBlocksInRange(file.extents, fs->GetBlockCount());
+    }
+
+    // Check that all the files are parsed, and the /removed file should not
+    // be included in the list.
+    set<string> kExpectedFiles = {
+        "/",
+        "/dir1",
+        "/dir1/file",
+        "/dir1/dir2",
+        "/dir1/dir2/file",
+        "/dir1/dir2/dir1",
+        "/empty-file",
+        "/link-hard-regular-16k",
+        "/link-long_symlink",
+        "/link-short_symlink",
+        "/lost+found",
+        "/regular-small",
+        "/regular-16k",
+        "/regular-32k-zeros",
+        "/regular-with_net_cap",
+        "/sparse_empty-10k",
+        "/sparse_empty-2blocks",
+        "/sparse-10000blocks",
+        "/sparse-16k-last_block",
+        "/sparse-16k-first_block",
+        "/sparse-16k-holes",
+        "<inode-blocks>",
+        "<free-space>",
+        "<group-descriptors>",
+    };
+    EXPECT_EQ(kExpectedFiles, filenames);
+
+    FilesystemInterface::File file;
+
+    // Small symlinks don't actually have data blocks.
+    EXPECT_TRUE(map_files["/link-short_symlink"].extents.empty());
+    EXPECT_EQ(1, BlocksInExtents(map_files["/link-long_symlink"].extents));
+
+    // Hard-links report the same list of blocks.
+    EXPECT_EQ(map_files["/link-hard-regular-16k"].extents,
+              map_files["/regular-16k"].extents);
+    EXPECT_FALSE(map_files["/regular-16k"].extents.empty());
+
+    // The number of blocks in these files doesn't depend on the
+    // block size.
+    EXPECT_TRUE(map_files["/empty-file"].extents.empty());
+    EXPECT_EQ(1, BlocksInExtents(map_files["/regular-small"].extents));
+    EXPECT_EQ(1, BlocksInExtents(map_files["/regular-with_net_cap"].extents));
+    EXPECT_TRUE(map_files["/sparse_empty-10k"].extents.empty());
+    EXPECT_TRUE(map_files["/sparse_empty-2blocks"].extents.empty());
+    EXPECT_EQ(1, BlocksInExtents(map_files["/sparse-16k-last_block"].extents));
+    EXPECT_EQ(1, BlocksInExtents(map_files["/sparse-16k-first_block"].extents));
+    EXPECT_EQ(2, BlocksInExtents(map_files["/sparse-16k-holes"].extents));
+  }
+}
+
+}  // namespace chromeos_update_engine
diff --git a/payload_generator/fake_filesystem.cc b/payload_generator/fake_filesystem.cc
new file mode 100644
index 0000000..b474b26
--- /dev/null
+++ b/payload_generator/fake_filesystem.cc
@@ -0,0 +1,41 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "update_engine/payload_generator/fake_filesystem.h"
+
+#include <gtest/gtest.h>
+
+namespace chromeos_update_engine {
+
+FakeFilesystem::FakeFilesystem(uint64_t block_size, uint64_t block_count) :
+    block_size_(block_size),
+    block_count_(block_count) {
+}
+
+size_t FakeFilesystem::GetBlockSize() const {
+  return block_size_;
+}
+
+size_t FakeFilesystem::GetBlockCount() const {
+  return block_count_;
+}
+
+bool FakeFilesystem::GetFiles(std::vector<File>* files) const {
+  *files = files_;
+  return true;
+}
+
+void FakeFilesystem::AddFile(const std::string& filename,
+                             const std::vector<Extent> extents) {
+  File file;
+  file.name = filename;
+  file.extents = extents;
+  for (const Extent& extent : extents) {
+    EXPECT_LE(0, extent.start_block());
+    EXPECT_LE(extent.start_block() + extent.num_blocks(), block_count_);
+  }
+  files_.push_back(file);
+}
+
+}  // namespace chromeos_update_engine
diff --git a/payload_generator/fake_filesystem.h b/payload_generator/fake_filesystem.h
new file mode 100644
index 0000000..10b6683
--- /dev/null
+++ b/payload_generator/fake_filesystem.h
@@ -0,0 +1,48 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef UPDATE_ENGINE_PAYLOAD_GENERATOR_FAKE_FILESYSTEM_H_
+#define UPDATE_ENGINE_PAYLOAD_GENERATOR_FAKE_FILESYSTEM_H_
+
+// A fake filesystem interface implementation allowing the user to add arbitrary
+// files/metadata.
+
+#include "update_engine/payload_generator/filesystem_interface.h"
+
+#include <string>
+#include <vector>
+
+#include "update_engine/update_metadata.pb.h"
+
+namespace chromeos_update_engine {
+
+class FakeFilesystem : public FilesystemInterface {
+ public:
+  FakeFilesystem(uint64_t block_size, uint64_t block_count);
+  virtual ~FakeFilesystem() = default;
+
+  // FilesystemInterface overrides.
+  size_t GetBlockSize() const override;
+  size_t GetBlockCount() const override;
+  bool GetFiles(std::vector<File>* files) const override;
+
+  // Fake methods.
+
+  // Add a file to the list of fake files.
+  void AddFile(const std::string& filename, const std::vector<Extent> extents);
+
+ private:
+  FakeFilesystem() = default;
+
+  uint64_t block_size_;
+  uint64_t block_count_;
+
+  std::vector<File> files_;
+
+  DISALLOW_COPY_AND_ASSIGN(FakeFilesystem);
+};
+
+}  // namespace chromeos_update_engine
+
+#endif  // UPDATE_ENGINE_PAYLOAD_GENERATOR_FAKE_FILESYSTEM_H_
diff --git a/payload_generator/filesystem_interface.h b/payload_generator/filesystem_interface.h
new file mode 100644
index 0000000..501a6be
--- /dev/null
+++ b/payload_generator/filesystem_interface.h
@@ -0,0 +1,78 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef UPDATE_ENGINE_PAYLOAD_GENERATOR_FILESYSTEM_INTERFACE_H_
+#define UPDATE_ENGINE_PAYLOAD_GENERATOR_FILESYSTEM_INTERFACE_H_
+
+// This class is used to abstract a filesystem and iterate the blocks
+// associated with the files and filesystem structures.
+// For the purposes of the update payload generation, a filesystem is a formated
+// partition composed by fixed-size blocks, since that's the interface used in
+// the update payload.
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <base/macros.h>
+
+#include "update_engine/update_metadata.pb.h"
+
+namespace chromeos_update_engine {
+
+class FilesystemInterface {
+ public:
+  // This represents a file or pseudo-file in the filesystem. It can include
+  // all sort of files, like symlinks, hardlinks, directories and even a file
+  // entry representing the metadata, free space, journaling data, etc.
+  struct File {
+    File() {
+      memset(&file_stat, 0, sizeof(file_stat));
+    }
+
+    // The stat struct for the file. This is invalid (inode 0) for some
+    // pseudo-files.
+    struct stat file_stat;
+
+    // The absolute path to the file inside the filesystem, for example,
+    // "/usr/bin/bash". For pseudo-files, like blocks associated to internal
+    // filesystem tables or free space, the path doesn't start with a /.
+    std::string name;
+
+    // The list of all physical blocks holding the data of this file in
+    // the same order as the logical data. All the block numbers shall be
+    // between 0 and GetBlockCount() - 1. The blocks are encoded in extents,
+    // indicating the starting block, and the number of consecutive blocks.
+    std::vector<Extent> extents;
+  };
+
+  virtual ~FilesystemInterface() = default;
+
+  // Returns the size of a block in the filesystem.
+  virtual size_t GetBlockSize() const = 0;
+
+  // Returns the number of blocks in the filesystem.
+  virtual size_t GetBlockCount() const = 0;
+
+  // Stores in |files| the list of files and pseudo-files in the filesystem. See
+  // FileInterface for details. The paths returned by this method shall not
+  // be repeated; but the same block could be present in more than one file as
+  // happens for example with hard-linked files, but not limited to those cases.
+  // Returns whether the function succeeded.
+  virtual bool GetFiles(std::vector<File>* files) const = 0;
+
+ protected:
+  FilesystemInterface() = default;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(FilesystemInterface);
+};
+
+}  // namespace chromeos_update_engine
+
+#endif  // UPDATE_ENGINE_PAYLOAD_GENERATOR_FILESYSTEM_INTERFACE_H_
diff --git a/payload_generator/raw_filesystem.cc b/payload_generator/raw_filesystem.cc
new file mode 100644
index 0000000..ee4423e
--- /dev/null
+++ b/payload_generator/raw_filesystem.cc
@@ -0,0 +1,41 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "update_engine/payload_generator/raw_filesystem.h"
+
+#include "update_engine/extent_ranges.h"
+#include "update_engine/update_metadata.pb.h"
+#include "update_engine/utils.h"
+
+using std::unique_ptr;
+
+namespace chromeos_update_engine {
+
+unique_ptr<RawFilesystem> RawFilesystem::Create(
+      const std::string& filename, uint64_t block_size, uint64_t block_count) {
+  unique_ptr<RawFilesystem> result(new RawFilesystem());
+  result->filename_ = filename;
+  result->block_size_ = block_size;
+  result->block_count_ = block_count;
+  return result;
+}
+
+size_t RawFilesystem::GetBlockSize() const {
+  return block_size_;
+}
+
+size_t RawFilesystem::GetBlockCount() const {
+  return block_count_;
+}
+
+bool RawFilesystem::GetFiles(std::vector<File>* files) const {
+  files->clear();
+  File file;
+  file.name = filename_;
+  file.extents = { ExtentForRange(0, block_count_) };
+  files->push_back(file);
+  return true;
+}
+
+}  // namespace chromeos_update_engine
diff --git a/payload_generator/raw_filesystem.h b/payload_generator/raw_filesystem.h
new file mode 100644
index 0000000..d8e1fe5
--- /dev/null
+++ b/payload_generator/raw_filesystem.h
@@ -0,0 +1,44 @@
+// Copyright 2015 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef UPDATE_ENGINE_PAYLOAD_GENERATOR_RAW_FILESYSTEM_H_
+#define UPDATE_ENGINE_PAYLOAD_GENERATOR_RAW_FILESYSTEM_H_
+
+// A simple filesystem interface implementation used for unknown filesystem
+// format such as the kernel.
+
+#include "update_engine/payload_generator/filesystem_interface.h"
+
+#include <string>
+#include <vector>
+
+namespace chromeos_update_engine {
+
+class RawFilesystem : public FilesystemInterface {
+ public:
+  static std::unique_ptr<RawFilesystem> Create(
+      const std::string& filename, uint64_t block_size, uint64_t block_count);
+  virtual ~RawFilesystem() = default;
+
+  // FilesystemInterface overrides.
+  size_t GetBlockSize() const override;
+  size_t GetBlockCount() const override;
+
+  // GetFiles will return only one file with all the blocks of the filesystem
+  // with the name passed during construction.
+  bool GetFiles(std::vector<File>* files) const override;
+
+ private:
+  RawFilesystem() = default;
+
+  std::string filename_;
+  uint64_t block_count_;
+  uint64_t block_size_;
+
+  DISALLOW_COPY_AND_ASSIGN(RawFilesystem);
+};
+
+}  // namespace chromeos_update_engine
+
+#endif  // UPDATE_ENGINE_PAYLOAD_GENERATOR_RAW_FILESYSTEM_H_
diff --git a/sample_images/disk_ext2_1k.txt b/sample_images/disk_ext2_1k.txt
new file mode 100644
index 0000000..f13c080
--- /dev/null
+++ b/sample_images/disk_ext2_1k.txt
@@ -0,0 +1 @@
+16777216 1024
diff --git a/sample_images/disk_ext2_4k.txt b/sample_images/disk_ext2_4k.txt
new file mode 100644
index 0000000..d01d105
--- /dev/null
+++ b/sample_images/disk_ext2_4k.txt
@@ -0,0 +1 @@
+16777216 4096
diff --git a/sample_images/generate_image.sh b/sample_images/generate_image.sh
new file mode 100755
index 0000000..0f0c384
--- /dev/null
+++ b/sample_images/generate_image.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+
+# Copyright 2015 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+set -e
+
+# cleanup <path>
+# Unmount and remove the mountpoint <path>
+cleanup() {
+  if ! sudo umount "$1" 2>/dev/null; then
+    if mountpoint -q "$1"; then
+      sync && sudo umount "$1"
+    fi
+  fi
+  rmdir "$1"
+}
+
+# generate_fs <filename> <size> [block_size] [block_groups]
+generate_fs() {
+  local filename="$1"
+  local size="$2"
+  local block_size="${3:-4096}"
+  local block_groups="${4:-}"
+
+  local mkfs_opts=( -q -F -b "${block_size}" -L "ROOT-TEST" -t ext2 )
+  if [[ -n "${block_groups}" ]]; then
+    mkfs_opts+=( -G "${block_groups}" )
+  fi
+
+  local mntdir=$(mktemp --tmpdir -d generate_ext2.XXXXXX)
+  trap 'cleanup "${mntdir}"; rm -f "${filename}"' INT TERM EXIT
+
+  # Cleanup old image.
+  if [[ -e "${filename}" ]]; then
+    rm -f "${filename}"
+  fi
+  truncate --size="${size}" "${filename}"
+
+  mkfs.ext2 "${mkfs_opts[@]}" "${filename}"
+  sudo mount "${filename}" "${mntdir}" -o loop
+
+  ### Generate the files used in unittest with descriptive names.
+  sudo touch "${mntdir}"/empty-file
+
+  # regular: Regular files.
+  echo "small file" | sudo dd of="${mntdir}"/regular-small status=none
+  dd if=/dev/zero bs=1024 count=16 status=none | tr '\0' '\141' |
+    sudo dd of="${mntdir}"/regular-16k status=none
+  sudo dd if=/dev/zero of="${mntdir}"/regular-32k-zeros bs=1024 count=16 \
+    status=none
+
+  echo "with net_cap" | sudo dd of="${mntdir}"/regular-with_net_cap status=none
+  sudo setcap cap_net_raw=ep "${mntdir}"/regular-with_net_cap
+
+  # sparse_empty: Files with no data blocks at all (only sparse holes).
+  sudo truncate --size=10240 "${mntdir}"/sparse_empty-10k
+  sudo truncate --size=$(( block_size * 2 )) "${mntdir}"/sparse_empty-2blocks
+
+  # sparse: Files with some data blocks but also sparse holes.
+  echo -n "foo" |
+    sudo dd of="${mntdir}"/sparse-16k-last_block bs=1 \
+      seek=$(( 16 * 1024 - 3)) status=none
+
+  # ext2 inodes have 12 direct blocks, one indirect, one double indirect and
+  # one triple indirect. 10000 should be enough to have an indirect and double
+  # indirect block.
+  echo -n "foo" |
+    sudo dd of="${mntdir}"/sparse-10000blocks bs=1 \
+      seek=$(( block_size * 10000 )) status=none
+
+  sudo truncate --size=16384 "${mntdir}"/sparse-16k-first_block
+  echo "first block" | sudo dd of="${mntdir}"/sparse-16k-first_block status=none
+
+  sudo truncate --size=16384 "${mntdir}"/sparse-16k-holes
+  echo "a" | sudo dd of="${mntdir}"/sparse-16k-holes bs=1 seek=100 status=none
+  echo "b" | sudo dd of="${mntdir}"/sparse-16k-holes bs=1 seek=10000 status=none
+
+  # link: symlinks and hardlinks.
+  sudo ln -s "broken-link" "${mntdir}"/link-short_symlink
+  sudo ln -s $(dd if=/dev/zero bs=256 count=1 status=none | tr '\0' '\141') \
+    "${mntdir}"/link-long_symlink
+  sudo ln "${mntdir}"/regular-16k "${mntdir}"/link-hard-regular-16k
+
+  # Directories.
+  sudo mkdir -p "${mntdir}"/dir1/dir2/dir1
+  echo "foo" | sudo tee "${mntdir}"/dir1/dir2/file >/dev/null
+  echo "bar" | sudo tee "${mntdir}"/dir1/file >/dev/null
+
+  # removed: removed files that should not be listed.
+  echo "We will remove this file so it's contents will be somewhere in the " \
+    "empty space data but it won't be all zeros." |
+    sudo dd of="${mntdir}"/removed conv=fsync status=none
+  sudo rm "${mntdir}"/removed
+
+  cleanup "${mntdir}"
+  trap - INT TERM EXIT
+}
+
+image_desc="${1:-}"
+output_dir="${2:-}"
+
+if [[ ! -e "${image_desc}" || ! -d "${output_dir}" ]]; then
+  echo "Use: $0 <image_description.txt> <output_dir>" >&2
+  exit 1
+fi
+
+args=( $(cat ${image_desc}) )
+dest_image="${output_dir}/$(basename ${image_desc} .txt).img"
+generate_fs "${dest_image}" "${args[@]}"
diff --git a/test_utils.cc b/test_utils.cc
index 712df64..66db978 100644
--- a/test_utils.cc
+++ b/test_utils.cc
@@ -16,9 +16,10 @@
 #include <string>
 #include <vector>
 
+#include <base/files/file_util.h>
 #include <base/logging.h>
-#include <base/strings/string_util.h>
 #include <base/strings/stringprintf.h>
+#include <base/strings/string_util.h>
 
 #include "update_engine/file_writer.h"
 #include "update_engine/payload_generator/filesystem_iterator.h"
@@ -389,5 +390,11 @@
   g_free(gval);
 }
 
+base::FilePath GetBuildArtifactsPath() {
+  base::FilePath exe_path;
+  base::ReadSymbolicLink(base::FilePath("/proc/self/exe"), &exe_path);
+  return exe_path.DirName();
+}
+
 }  // namespace test_utils
 }  // namespace chromeos_update_engine
diff --git a/test_utils.h b/test_utils.h
index dddd563..5453d27 100644
--- a/test_utils.h
+++ b/test_utils.h
@@ -17,6 +17,7 @@
 #include <vector>
 
 #include <base/callback.h>
+#include <base/files/file_path.h>
 #include <glib-object.h>
 #include <gtest/gtest.h>
 
@@ -209,8 +210,11 @@
 // Frees a GValue object and its allocated resources.
 void GValueFree(gpointer arg);
 
-}  // namespace test_utils
+// Returns the path where the build artifacts are stored. This is the directory
+// where the unittest executable is being run from.
+base::FilePath GetBuildArtifactsPath();
 
+}  // namespace test_utils
 
 // Useful actions for test. These need to be defined in the
 // chromeos_update_engine namespace.
diff --git a/update_engine.gyp b/update_engine.gyp
index b1ee122..1731757 100644
--- a/update_engine.gyp
+++ b/update_engine.gyp
@@ -279,6 +279,7 @@
         'payload_generator/annotated_operation.cc',
         'payload_generator/cycle_breaker.cc',
         'payload_generator/delta_diff_generator.cc',
+        'payload_generator/ext2_filesystem.cc',
         'payload_generator/extent_mapper.cc',
         'payload_generator/extent_utils.cc',
         'payload_generator/filesystem_iterator.cc',
@@ -289,6 +290,7 @@
         'payload_generator/metadata.cc',
         'payload_generator/payload_generation_config.cc',
         'payload_generator/payload_signer.cc',
+        'payload_generator/raw_filesystem.cc',
         'payload_generator/tarjan.cc',
         'payload_generator/topological_sort.cc',
         'payload_generator/verity_utils.cc',
@@ -329,6 +331,19 @@
           ],
           'includes': ['../common-mk/openssl_pem.gypi'],
         },
+        # Sample images used for testing.
+        {
+          'target_name': 'update_engine-test_images',
+          'type': 'none',
+          'variables': {
+            'image_out_dir': '.',
+          },
+          'sources': [
+            'sample_images/disk_ext2_1k.txt',
+            'sample_images/disk_ext2_4k.txt',
+          ],
+          'includes': ['generate_image.gypi'],
+        },
         # Test HTTP Server.
         {
           'target_name': 'test_http_server',
@@ -373,8 +388,10 @@
             'p2p_manager_unittest.cc',
             'payload_generator/cycle_breaker_unittest.cc',
             'payload_generator/delta_diff_generator_unittest.cc',
+            'payload_generator/ext2_filesystem_unittest.cc',
             'payload_generator/extent_mapper_unittest.cc',
             'payload_generator/extent_utils_unittest.cc',
+            'payload_generator/fake_filesystem.cc',
             'payload_generator/filesystem_iterator_unittest.cc',
             'payload_generator/full_update_generator_unittest.cc',
             'payload_generator/graph_utils_unittest.cc',