Implement XzExtentWriter.

The new XzExtentWriter class is an ExtentWriter that uses xz-embedded
to decompress an xz stream and write it to the underlying extent
writer. This will be used to implement REPLACE_XZ in a follow up CL.

Bug: 23604708
Test: Added unittests.

Change-Id: Ifd2ba0c917b66ab33e5cff1d6069da833ad54882
diff --git a/Android.mk b/Android.mk
index c12fc16..abbae26 100644
--- a/Android.mk
+++ b/Android.mk
@@ -106,7 +106,8 @@
     update_engine-dbus-libcros-client \
     update_engine_client-dbus-proxies \
     libbz \
-    libfs_mgr
+    libfs_mgr \
+    libxz
 LOCAL_SHARED_LIBRARIES += \
     libprotobuf-cpp-lite-rtti \
     libdbus \
@@ -176,7 +177,8 @@
     update_manager/real_updater_provider.cc \
     update_manager/state_factory.cc \
     update_manager/update_manager.cc \
-    utils.cc
+    utils.cc \
+    xz_extent_writer.cc
 $(eval $(update_engine_common))
 include $(BUILD_STATIC_LIBRARY)
 
@@ -190,6 +192,7 @@
     libupdate_engine \
     libbz \
     libfs_mgr \
+    libxz \
     update_metadata-protos \
     update_engine-dbus-adaptor \
     update_engine-dbus-libcros-client \
@@ -237,6 +240,7 @@
     libupdate_engine \
     libbz \
     libfs_mgr \
+    libxz \
     update_metadata-protos \
     update_engine-dbus-adaptor \
     update_engine-dbus-libcros-client \
@@ -287,6 +291,7 @@
     libupdate_engine \
     libbz \
     libfs_mgr \
+    libxz \
     update_metadata-protos \
     update_engine-dbus-adaptor \
     update_engine-dbus-libcros-client \
diff --git a/fake_extent_writer.h b/fake_extent_writer.h
new file mode 100644
index 0000000..a876893
--- /dev/null
+++ b/fake_extent_writer.h
@@ -0,0 +1,71 @@
+//
+// Copyright (C) 2015 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef UPDATE_ENGINE_FAKE_EXTENT_WRITER_H_
+#define UPDATE_ENGINE_FAKE_EXTENT_WRITER_H_
+
+#include <memory>
+#include <vector>
+
+#include <chromeos/secure_blob.h>
+
+#include "update_engine/extent_writer.h"
+
+namespace chromeos_update_engine {
+
+// FakeExtentWriter is a concrete ExtentWriter subclass that keeps track of all
+// the written data, useful for testing.
+class FakeExtentWriter : public ExtentWriter {
+ public:
+  FakeExtentWriter() = default;
+  ~FakeExtentWriter() override = default;
+
+  // ExtentWriter overrides.
+  bool Init(FileDescriptorPtr /* fd */,
+            const std::vector<Extent>& /* extents */,
+            uint32_t /* block_size */) override {
+    init_called_ = true;
+    return true;
+  };
+  bool Write(const void* bytes, size_t count) override {
+    if (!init_called_ || end_called_)
+      return false;
+    written_data_.insert(written_data_.end(),
+                         reinterpret_cast<const uint8_t*>(bytes),
+                         reinterpret_cast<const uint8_t*>(bytes) + count);
+    return true;
+  }
+  bool EndImpl() override {
+    end_called_ = true;
+    return true;
+  }
+
+  // Fake methods.
+  bool InitCalled() { return init_called_; }
+  bool EndCalled() { return end_called_; }
+  chromeos::Blob WrittenData() { return written_data_; }
+
+ private:
+  bool init_called_{false};
+  bool end_called_{false};
+  chromeos::Blob written_data_;
+
+  DISALLOW_COPY_AND_ASSIGN(FakeExtentWriter);
+};
+
+}  // namespace chromeos_update_engine
+
+#endif  // UPDATE_ENGINE_FAKE_EXTENT_WRITER_H_
diff --git a/main.cc b/main.cc
index 516c72c..f56d447 100644
--- a/main.cc
+++ b/main.cc
@@ -14,7 +14,10 @@
 // limitations under the License.
 //
 
+#include <sys/stat.h>
+#include <sys/types.h>
 #include <unistd.h>
+#include <xz.h>
 
 #include <string>
 
@@ -27,8 +30,6 @@
 #include <chromeos/flag_helper.h>
 #include <chromeos/message_loops/base_message_loop.h>
 #include <metrics/metrics_library.h>
-#include <sys/stat.h>
-#include <sys/types.h>
 
 #include "update_engine/daemon.h"
 #include "update_engine/terminator.h"
@@ -113,6 +114,9 @@
 
   LOG(INFO) << "Chrome OS Update Engine starting";
 
+  // xz-embedded requires to initialize its CRC-32 table once on startup.
+  xz_crc32_init();
+
   // Ensure that all written files have safe permissions.
   // This is a mask, so we _block_ all permissions for the group owner and other
   // users but allow all permissions for the user owner. We allow execution
diff --git a/testrunner.cc b/testrunner.cc
index 325d1af..35295a9 100644
--- a/testrunner.cc
+++ b/testrunner.cc
@@ -16,6 +16,8 @@
 
 // based on pam_google_testrunner.cc
 
+#include <xz.h>
+
 #include <base/at_exit.h>
 #include <base/command_line.h>
 #include <chromeos/test_helpers.h>
@@ -26,6 +28,8 @@
 int main(int argc, char **argv) {
   LOG(INFO) << "started";
   base::AtExitManager exit_manager;
+  // xz-embedded requires to initialize its CRC-32 table once on startup.
+  xz_crc32_init();
   // TODO(garnold) temporarily cause the unittest binary to exit with status
   // code 2 upon catching a SIGTERM. This will help diagnose why the unittest
   // binary is perceived as failing by the buildbot.  We should revert it to use
diff --git a/update_engine.gyp b/update_engine.gyp
index 05bb6b4..2a1768e 100644
--- a/update_engine.gyp
+++ b/update_engine.gyp
@@ -130,6 +130,7 @@
           'libshill-client',
           'libssl',
           'expat',
+          'xz-embedded',
         ],
         'deps': ['<@(exported_deps)'],
       },
@@ -213,6 +214,7 @@
         'update_manager/state_factory.cc',
         'update_manager/update_manager.cc',
         'utils.cc',
+        'xz_extent_writer.cc',
       ],
       'conditions': [
         ['USE_mtd == 1', {
@@ -447,6 +449,7 @@
             'update_manager/update_manager_unittest.cc',
             'update_manager/variable_unittest.cc',
             'utils_unittest.cc',
+            'xz_extent_writer_unittest.cc',
             'zip_unittest.cc',
             # Main entry point for runnning tests.
             'testrunner.cc',
diff --git a/xz_extent_writer.cc b/xz_extent_writer.cc
new file mode 100644
index 0000000..99eb023
--- /dev/null
+++ b/xz_extent_writer.cc
@@ -0,0 +1,118 @@
+//
+// Copyright (C) 2015 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "update_engine/xz_extent_writer.h"
+
+using std::vector;
+
+namespace chromeos_update_engine {
+
+namespace {
+const chromeos::Blob::size_type kOutputBufferLength = 16 * 1024;
+
+// xz uses a variable dictionary size which impacts on the compression ratio
+// and is required to be reconstructed in RAM during decompression. While we
+// control the required memory from the compressor side, the decompressor allows
+// to set a limit on this dictionary size, rejecting compressed streams that
+// require more than that. "xz -9" requires up to 64 MiB, so a 64 MiB limit
+// will allow compressed streams up to -9, the maximum compression setting.
+const uint32_t kXzMaxDictSize = 64 * 1024 * 1024;
+
+const char* XzErrorString(enum xz_ret error) {
+  #define __XZ_ERROR_STRING_CASE(code) case code: return #code;
+  switch (error) {
+    __XZ_ERROR_STRING_CASE(XZ_OK)
+    __XZ_ERROR_STRING_CASE(XZ_STREAM_END)
+    __XZ_ERROR_STRING_CASE(XZ_UNSUPPORTED_CHECK)
+    __XZ_ERROR_STRING_CASE(XZ_MEM_ERROR)
+    __XZ_ERROR_STRING_CASE(XZ_MEMLIMIT_ERROR)
+    __XZ_ERROR_STRING_CASE(XZ_FORMAT_ERROR)
+    __XZ_ERROR_STRING_CASE(XZ_OPTIONS_ERROR)
+    __XZ_ERROR_STRING_CASE(XZ_DATA_ERROR)
+    __XZ_ERROR_STRING_CASE(XZ_BUF_ERROR)
+    default:
+      return "<unknown xz error>";
+  }
+  #undef __XZ_ERROR_STRING_CASE
+};
+}  // namespace
+
+XzExtentWriter::~XzExtentWriter() {
+  xz_dec_end(stream_);
+}
+
+bool XzExtentWriter::Init(FileDescriptorPtr fd,
+                          const vector<Extent>& extents,
+                          uint32_t block_size) {
+  stream_ = xz_dec_init(XZ_DYNALLOC, kXzMaxDictSize);
+  TEST_AND_RETURN_FALSE(stream_ != nullptr);
+  return underlying_writer_->Init(fd, extents, block_size);
+}
+
+bool XzExtentWriter::Write(const void* bytes, size_t count) {
+  // Copy the input data into |input_buffer_| only if |input_buffer_| already
+  // contains unconsumed data. Otherwise, process the data directly from the
+  // source.
+  const uint8_t* input = reinterpret_cast<const uint8_t*>(bytes);
+  if (!input_buffer_.empty()) {
+    input_buffer_.insert(input_buffer_.end(), input, input + count);
+    input = input_buffer_.data();
+    count = input_buffer_.size();
+  }
+
+  xz_buf request;
+  request.in = input;
+  request.in_pos = 0;
+  request.in_size = count;
+
+  chromeos::Blob output_buffer(kOutputBufferLength);
+  request.out = output_buffer.data();
+  request.out_size = output_buffer.size();
+  for (;;) {
+    request.out_pos = 0;
+
+    xz_ret ret = xz_dec_run(stream_, &request);
+    if (ret != XZ_OK && ret != XZ_STREAM_END) {
+      LOG(ERROR) << "xz_dec_run returned " << XzErrorString(ret);
+      return false;
+    }
+
+    if (request.out_pos == 0)
+      break;
+
+    TEST_AND_RETURN_FALSE(
+        underlying_writer_->Write(output_buffer.data(), request.out_pos));
+    if (ret == XZ_STREAM_END)
+      CHECK_EQ(request.in_size, request.in_pos);
+    if (request.in_size == request.in_pos)
+      break;  // No more input to process.
+  }
+  output_buffer.clear();
+
+  // Store unconsumed data (if any) in |input_buffer_|. Since |input| can point
+  // to the existing |input_buffer_| we create a new one before assigning it.
+  chromeos::Blob new_input_buffer(request.in + request.in_pos,
+                                  request.in + request.in_size);
+  input_buffer_ = std::move(new_input_buffer);
+  return true;
+}
+
+bool XzExtentWriter::EndImpl() {
+  TEST_AND_RETURN_FALSE(input_buffer_.empty());
+  return underlying_writer_->End();
+}
+
+}  // namespace chromeos_update_engine
diff --git a/xz_extent_writer.h b/xz_extent_writer.h
new file mode 100644
index 0000000..0b9ee17
--- /dev/null
+++ b/xz_extent_writer.h
@@ -0,0 +1,60 @@
+//
+// Copyright (C) 2015 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef UPDATE_ENGINE_XZ_EXTENT_WRITER_H_
+#define UPDATE_ENGINE_XZ_EXTENT_WRITER_H_
+
+#include <xz.h>
+
+#include <memory>
+#include <vector>
+
+#include <chromeos/secure_blob.h>
+
+#include "update_engine/extent_writer.h"
+
+// XzExtentWriter is a concrete ExtentWriter subclass that xz-decompresses
+// what it's given in Write using xz-embedded. Note that xz-embedded only
+// supports files with either no CRC or CRC-32. It passes the decompressed data
+// to an underlying ExtentWriter.
+
+namespace chromeos_update_engine {
+
+class XzExtentWriter : public ExtentWriter {
+ public:
+  explicit XzExtentWriter(std::unique_ptr<ExtentWriter> underlying_writer)
+      : underlying_writer_(std::move(underlying_writer)) {}
+  ~XzExtentWriter() override;
+
+  bool Init(FileDescriptorPtr fd,
+            const std::vector<Extent>& extents,
+            uint32_t block_size) override;
+  bool Write(const void* bytes, size_t count) override;
+  bool EndImpl() override;
+
+ private:
+  // The underlying ExtentWriter.
+  std::unique_ptr<ExtentWriter> underlying_writer_;
+  // The opaque xz decompressor struct.
+  xz_dec* stream_{nullptr};
+  chromeos::Blob input_buffer_;
+
+  DISALLOW_COPY_AND_ASSIGN(XzExtentWriter);
+};
+
+}  // namespace chromeos_update_engine
+
+#endif  // UPDATE_ENGINE_XZ_EXTENT_WRITER_H_
diff --git a/xz_extent_writer_unittest.cc b/xz_extent_writer_unittest.cc
new file mode 100644
index 0000000..2fd580d
--- /dev/null
+++ b/xz_extent_writer_unittest.cc
@@ -0,0 +1,165 @@
+//
+// Copyright (C) 2015 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "update_engine/xz_extent_writer.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include <chromeos/make_unique_ptr.h>
+#include <gtest/gtest.h>
+
+#include "update_engine/fake_extent_writer.h"
+#include "update_engine/test_utils.h"
+#include "update_engine/utils.h"
+
+using std::string;
+using std::vector;
+
+namespace chromeos_update_engine {
+
+namespace {
+
+const char kSampleData[] = "Redundaaaaaaaaaaaaaant\n";
+
+// Compressed data with CRC-32 check, generated with:
+// echo "Redundaaaaaaaaaaaaaant" | xz -9 --check=crc32 |
+// hexdump -v -e '"    " 12/1 "0x%02x, " "\n"'
+const uint8_t kCompressedDataCRC32[] = {
+    0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00, 0x00, 0x01, 0x69, 0x22, 0xde, 0x36,
+    0x02, 0x00, 0x21, 0x01, 0x1c, 0x00, 0x00, 0x00, 0x10, 0xcf, 0x58, 0xcc,
+    0xe0, 0x00, 0x16, 0x00, 0x10, 0x5d, 0x00, 0x29, 0x19, 0x48, 0x87, 0x88,
+    0xec, 0x49, 0x88, 0x73, 0x8b, 0x5d, 0xa6, 0x46, 0xb4, 0x00, 0x00, 0x00,
+    0x68, 0xfc, 0x7b, 0x25, 0x00, 0x01, 0x28, 0x17, 0x46, 0x9e, 0x08, 0xfe,
+    0x90, 0x42, 0x99, 0x0d, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x59, 0x5a,
+};
+
+// Compressed data without checksum, generated with:
+// echo "Redundaaaaaaaaaaaaaant" | xz -9 --check=none |
+// hexdump -v -e '"    " 12/1 "0x%02x, " "\n"'
+const uint8_t kCompressedDataNoCheck[] = {
+    0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00, 0x00, 0x00, 0xff, 0x12, 0xd9, 0x41,
+    0x02, 0x00, 0x21, 0x01, 0x1c, 0x00, 0x00, 0x00, 0x10, 0xcf, 0x58, 0xcc,
+    0xe0, 0x00, 0x16, 0x00, 0x10, 0x5d, 0x00, 0x29, 0x19, 0x48, 0x87, 0x88,
+    0xec, 0x49, 0x88, 0x73, 0x8b, 0x5d, 0xa6, 0x46, 0xb4, 0x00, 0x00, 0x00,
+    0x00, 0x01, 0x24, 0x17, 0x4a, 0xd1, 0xbd, 0x52, 0x06, 0x72, 0x9e, 0x7a,
+    0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0x5a,
+};
+
+// Highly redundant data bigger than the internal buffer, generated with:
+// dd if=/dev/zero bs=30K count=1 | tr '\0' 'a' | xz -9 --check=crc32 |
+// hexdump -v -e '"    " 12/1 "0x%02x, " "\n"'
+const uint8_t kCompressed30KiBofA[] = {
+    0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00, 0x00, 0x01, 0x69, 0x22, 0xde, 0x36,
+    0x02, 0x00, 0x21, 0x01, 0x1c, 0x00, 0x00, 0x00, 0x10, 0xcf, 0x58, 0xcc,
+    0xe0, 0x77, 0xff, 0x00, 0x41, 0x5d, 0x00, 0x30, 0xef, 0xfb, 0xbf, 0xfe,
+    0xa3, 0xb1, 0x5e, 0xe5, 0xf8, 0x3f, 0xb2, 0xaa, 0x26, 0x55, 0xf8, 0x68,
+    0x70, 0x41, 0x70, 0x15, 0x0f, 0x8d, 0xfd, 0x1e, 0x4c, 0x1b, 0x8a, 0x42,
+    0xb7, 0x19, 0xf4, 0x69, 0x18, 0x71, 0xae, 0x66, 0x23, 0x8a, 0x8a, 0x4d,
+    0x2f, 0xa3, 0x0d, 0xd9, 0x7f, 0xa6, 0xe3, 0x8c, 0x23, 0x11, 0x53, 0xe0,
+    0x59, 0x18, 0xc5, 0x75, 0x8a, 0xe2, 0x76, 0x4c, 0xee, 0x30, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xf9, 0x47, 0xb5, 0xee, 0x00, 0x01, 0x59, 0x80,
+    0xf0, 0x01, 0x00, 0x00, 0xe0, 0x41, 0x96, 0xde, 0x3e, 0x30, 0x0d, 0x8b,
+    0x02, 0x00, 0x00, 0x00, 0x00, 0x01, 0x59, 0x5a,
+};
+
+}  // namespace
+
+class XzExtentWriterTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    fake_extent_writer_ = new FakeExtentWriter();
+    xz_writer_.reset(
+        new XzExtentWriter(chromeos::make_unique_ptr(fake_extent_writer_)));
+  }
+
+  void WriteAll(const chromeos::Blob& compressed) {
+    EXPECT_TRUE(xz_writer_->Init(fd_, {}, 1024));
+    EXPECT_TRUE(xz_writer_->Write(compressed.data(), compressed.size()));
+    EXPECT_TRUE(xz_writer_->End());
+
+    EXPECT_TRUE(fake_extent_writer_->InitCalled());
+    EXPECT_TRUE(fake_extent_writer_->EndCalled());
+  }
+
+  // Owned by |xz_writer_|. This object is invalidated after |xz_writer_| is
+  // deleted.
+  FakeExtentWriter* fake_extent_writer_{nullptr};
+  std::unique_ptr<XzExtentWriter> xz_writer_;
+
+  const chromeos::Blob sample_data_{
+      std::begin(kSampleData),
+      std::begin(kSampleData) + strlen(kSampleData)};
+  FileDescriptorPtr fd_;
+};
+
+TEST_F(XzExtentWriterTest, CreateAndDestroy) {
+  // Test that no Init() or End() called doesn't crash the program.
+  EXPECT_FALSE(fake_extent_writer_->InitCalled());
+  EXPECT_FALSE(fake_extent_writer_->EndCalled());
+}
+
+TEST_F(XzExtentWriterTest, CompressedSampleData) {
+  WriteAll(chromeos::Blob(std::begin(kCompressedDataNoCheck),
+                          std::end(kCompressedDataNoCheck)));
+  EXPECT_EQ(sample_data_, fake_extent_writer_->WrittenData());
+}
+
+TEST_F(XzExtentWriterTest, CompressedSampleDataWithCrc) {
+  WriteAll(chromeos::Blob(std::begin(kCompressedDataCRC32),
+                          std::end(kCompressedDataCRC32)));
+  EXPECT_EQ(sample_data_, fake_extent_writer_->WrittenData());
+}
+
+TEST_F(XzExtentWriterTest, CompressedDataBiggerThanTheBuffer) {
+  // Test that even if the output data is bigger than the internal buffer, all
+  // the data is written.
+  WriteAll(chromeos::Blob(std::begin(kCompressed30KiBofA),
+                          std::end(kCompressed30KiBofA)));
+  chromeos::Blob expected_data(30 * 1024, 'a');
+  EXPECT_EQ(expected_data, fake_extent_writer_->WrittenData());
+}
+
+TEST_F(XzExtentWriterTest, GarbageDataRejected) {
+  EXPECT_TRUE(xz_writer_->Init(fd_, {}, 1024));
+  // The sample_data_ is an uncompressed string.
+  EXPECT_FALSE(xz_writer_->Write(sample_data_.data(), sample_data_.size()));
+  EXPECT_TRUE(xz_writer_->End());
+
+  EXPECT_TRUE(fake_extent_writer_->EndCalled());
+}
+
+TEST_F(XzExtentWriterTest, PartialDataIsKept) {
+  chromeos::Blob compressed(std::begin(kCompressed30KiBofA),
+                            std::end(kCompressed30KiBofA));
+  EXPECT_TRUE(xz_writer_->Init(fd_, {}, 1024));
+  for (uint8_t byte : compressed) {
+    EXPECT_TRUE(xz_writer_->Write(&byte, 1));
+  }
+  EXPECT_TRUE(xz_writer_->End());
+
+  // The sample_data_ is an uncompressed string.
+  chromeos::Blob expected_data(30 * 1024, 'a');
+  EXPECT_EQ(expected_data, fake_extent_writer_->WrittenData());
+}
+
+}  // namespace chromeos_update_engine