fs_mgr/libsnapshot/snapuserd.cpp - platform/system/core - Git at Google

 /*
  * Copyright (C) 2020 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "snapuserd.h"

 #include <csignal>
 #include <optional>
 #include <set>

 #include <libsnapshot/snapuserd_client.h>

 namespace android {
 namespace snapshot {

 using namespace android;
 using namespace android::dm;
 using android::base::unique_fd;

 #define SNAP_LOG(level) LOG(level) << misc_name_ << ": "
 #define SNAP_PLOG(level) PLOG(level) << misc_name_ << ": "

 Snapuserd::Snapuserd(const std::string& misc_name, const std::string& cow_device,
                      const std::string& backing_device) {
     misc_name_ = misc_name;
     cow_device_ = cow_device;
     backing_store_device_ = backing_device;
     control_device_ = "/dev/dm-user/" + misc_name;
 }

 bool Snapuserd::InitializeWorkers() {
     for (int i = 0; i < NUM_THREADS_PER_PARTITION; i++) {
         std::unique_ptr<WorkerThread> wt = std::make_unique<WorkerThread>(
                 cow_device_, backing_store_device_, control_device_, misc_name_, GetSharedPtr());

         worker_threads_.push_back(std::move(wt));
     }

     read_ahead_thread_ = std::make_unique<ReadAheadThread>(cow_device_, backing_store_device_,
                                                            misc_name_, GetSharedPtr());
     return true;
 }

 bool Snapuserd::CommitMerge(int num_merge_ops) {
     struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);
     ch->num_merge_ops += num_merge_ops;

     if (read_ahead_feature_ && read_ahead_ops_.size() > 0) {
         struct BufferState* ra_state = GetBufferState();
         ra_state->read_ahead_state = kCowReadAheadInProgress;
     }

     int ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
     if (ret < 0) {
         PLOG(ERROR) << "msync header failed: " << ret;
         return false;
     }

     merge_initiated_ = true;

     return true;
 }

 void Snapuserd::PrepareReadAhead() {
     if (!read_ahead_feature_) {
         return;
     }

     struct BufferState* ra_state = GetBufferState();
     // Check if the data has to be re-constructed from COW device
     if (ra_state->read_ahead_state == kCowReadAheadDone) {
         populate_data_from_cow_ = true;
     } else {
         populate_data_from_cow_ = false;
     }

     StartReadAhead();
 }

 bool Snapuserd::GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block, void* buffer) {
     if (!lock->owns_lock()) {
         SNAP_LOG(ERROR) << "GetRABuffer - Lock not held";
         return false;
     }
     std::unordered_map<uint64_t, void*>::iterator it = read_ahead_buffer_map_.find(block);

     // This will be true only for IO's generated as part of reading a root
     // filesystem. IO's related to merge should always be in read-ahead cache.
     if (it == read_ahead_buffer_map_.end()) {
         return false;
     }

     // Theoretically, we can send the data back from the read-ahead buffer
     // all the way to the kernel without memcpy. However, if the IO is
     // un-aligned, the wrapper function will need to touch the read-ahead
     // buffers and transitions will be bit more complicated.
     memcpy(buffer, it->second, BLOCK_SZ);
     return true;
 }

 // ========== State transition functions for read-ahead operations ===========

 bool Snapuserd::GetReadAheadPopulatedBuffer(uint64_t block, void* buffer) {
     if (!read_ahead_feature_) {
         return false;
     }

     {
         std::unique_lock<std::mutex> lock(lock_);
         if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) {
             return false;
         }

         if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS) {
             return GetRABuffer(&lock, block, buffer);
         }
     }

     {
         // Read-ahead thread IO is in-progress. Wait for it to complete
         std::unique_lock<std::mutex> lock(lock_);
         while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE ||
                  io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS)) {
             cv.wait(lock);
         }

         return GetRABuffer(&lock, block, buffer);
     }
 }

 // This is invoked by read-ahead thread waiting for merge IO's
 // to complete
 bool Snapuserd::WaitForMergeToComplete() {
     {
         std::unique_lock<std::mutex> lock(lock_);
         while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN ||
                  io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED)) {
             cv.wait(lock);
         }

         if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED) {
             return false;
         }

         io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_IN_PROGRESS;
         return true;
     }
 }

 // This is invoked during the launch of worker threads. We wait
 // for read-ahead thread to by fully up before worker threads
 // are launched; else we will have a race between worker threads
 // and read-ahead thread specifically during re-construction.
 bool Snapuserd::WaitForReadAheadToStart() {
     {
         std::unique_lock<std::mutex> lock(lock_);
         while (!(io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS ||
                  io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE)) {
             cv.wait(lock);
         }

         if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) {
             return false;
         }

         return true;
     }
 }

 // Invoked by worker threads when a sequence of merge operation
 // is complete notifying read-ahead thread to make forward
 // progress.
 void Snapuserd::StartReadAhead() {
     {
         std::lock_guard<std::mutex> lock(lock_);
         io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN;
     }

     cv.notify_one();
 }

 void Snapuserd::MergeCompleted() {
     {
         std::lock_guard<std::mutex> lock(lock_);
         io_state_ = READ_AHEAD_IO_TRANSITION::IO_TERMINATED;
     }

     cv.notify_one();
 }

 bool Snapuserd::ReadAheadIOCompleted(bool sync) {
     if (sync) {
         // Flush the entire buffer region
         int ret = msync(mapped_addr_, total_mapped_addr_length_, MS_SYNC);
         if (ret < 0) {
             PLOG(ERROR) << "msync failed after ReadAheadIOCompleted: " << ret;
             return false;
         }

         // Metadata and data are synced. Now, update the state.
         // We need to update the state after flushing data; if there is a crash
         // when read-ahead IO is in progress, the state of data in the COW file
         // is unknown. kCowReadAheadDone acts as a checkpoint wherein the data
         // in the scratch space is good and during next reboot, read-ahead thread
         // can safely re-construct the data.
         struct BufferState* ra_state = GetBufferState();
         ra_state->read_ahead_state = kCowReadAheadDone;

         ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
         if (ret < 0) {
             PLOG(ERROR) << "msync failed to flush Readahead completion state...";
             return false;
         }
     }

     // Notify the worker threads
     {
         std::lock_guard<std::mutex> lock(lock_);
         io_state_ = READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS;
     }

     cv.notify_all();
     return true;
 }

 void Snapuserd::ReadAheadIOFailed() {
     {
         std::lock_guard<std::mutex> lock(lock_);
         io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE;
     }

     cv.notify_all();
 }

 //========== End of state transition functions ====================

 bool Snapuserd::IsChunkIdMetadata(chunk_t chunk) {
     uint32_t stride = exceptions_per_area_ + 1;
     lldiv_t divresult = lldiv(chunk, stride);

     return (divresult.rem == NUM_SNAPSHOT_HDR_CHUNKS);
 }

 // Find the next free chunk-id to be assigned. Check if the next free
 // chunk-id represents a metadata page. If so, skip it.
 chunk_t Snapuserd::GetNextAllocatableChunkId(chunk_t chunk) {
     chunk_t next_chunk = chunk + 1;

     if (IsChunkIdMetadata(next_chunk)) {
         next_chunk += 1;
     }
     return next_chunk;
 }

 void Snapuserd::CheckMergeCompletionStatus() {
     if (!merge_initiated_) {
         SNAP_LOG(INFO) << "Merge was not initiated. Total-data-ops: " << reader_->total_data_ops();
         return;
     }

     struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);

     SNAP_LOG(INFO) << "Merge-status: Total-Merged-ops: " << ch->num_merge_ops
                    << " Total-data-ops: " << reader_->total_data_ops();
 }

 /*
  * Read the metadata from COW device and
  * construct the metadata as required by the kernel.
  *
  * Please see design on kernel COW format
  *
  * 1: Read the metadata from internal COW device
  * 2: There are 3 COW operations:
  *     a: Replace op
  *     b: Copy op
  *     c: Zero op
  * 3: For each of the 3 operations, op->new_block
  *    represents the block number in the base device
  *    for which one of the 3 operations have to be applied.
  *    This represents the old_chunk in the kernel COW format
  * 4: We need to assign new_chunk for a corresponding old_chunk
  * 5: The algorithm is similar to how kernel assigns chunk number
  *    while creating exceptions. However, there are few cases
  *    which needs to be addressed here:
  *      a: During merge process, kernel scans the metadata page
  *      from backwards when merge is initiated. Since, we need
  *      to make sure that the merge ordering follows our COW format,
  *      we read the COW operation from backwards and populate the
  *      metadata so that when kernel starts the merging from backwards,
  *      those ops correspond to the beginning of our COW format.
  *      b: Kernel can merge successive operations if the two chunk IDs
  *      are contiguous. This can be problematic when there is a crash
  *      during merge; specifically when the merge operation has dependency.
  *      These dependencies can only happen during copy operations.
  *
  *      To avoid this problem, we make sure overlap copy operations
  *      are not batch merged.
  * 6: Use a monotonically increasing chunk number to assign the
  *    new_chunk
  * 7: Each chunk-id represents either
  *        a: Metadata page or
  *        b: Data page
  * 8: Chunk-id representing a data page is stored in a map.
  * 9: Chunk-id representing a metadata page is converted into a vector
  *    index. We store this in vector as kernel requests metadata during
  *    two stage:
  *       a: When initial dm-snapshot device is created, kernel requests
  *          all the metadata and stores it in its internal data-structures.
  *       b: During merge, kernel once again requests the same metadata
  *          once-again.
  *    In both these cases, a quick lookup based on chunk-id is done.
  * 10: When chunk number is incremented, we need to make sure that
  *    if the chunk is representing a metadata page and skip.
  * 11: Each 4k page will contain 256 disk exceptions. We call this
  *    exceptions_per_area_
  * 12: Kernel will stop issuing metadata IO request when new-chunk ID is 0.
  */
 bool Snapuserd::ReadMetadata() {
     reader_ = std::make_unique<CowReader>();
     CowHeader header;
     CowOptions options;
     bool metadata_found = false;
     int replace_ops = 0, zero_ops = 0, copy_ops = 0;

     SNAP_LOG(DEBUG) << "ReadMetadata: Parsing cow file";

     if (!reader_->Parse(cow_fd_)) {
         SNAP_LOG(ERROR) << "Failed to parse";
         return false;
     }

     if (!reader_->GetHeader(&header)) {
         SNAP_LOG(ERROR) << "Failed to get header";
         return false;
     }

     if (!(header.block_size == BLOCK_SZ)) {
         SNAP_LOG(ERROR) << "Invalid header block size found: " << header.block_size;
         return false;
     }

     reader_->InitializeMerge();
     SNAP_LOG(DEBUG) << "Merge-ops: " << header.num_merge_ops;

     if (!MmapMetadata()) {
         SNAP_LOG(ERROR) << "mmap failed";
         return false;
     }

     // Initialize the iterator for reading metadata
     cowop_riter_ = reader_->GetRevOpIter();

     exceptions_per_area_ = (CHUNK_SIZE << SECTOR_SHIFT) / sizeof(struct disk_exception);

     // Start from chunk number 2. Chunk 0 represents header and chunk 1
     // represents first metadata page.
     chunk_t data_chunk_id = NUM_SNAPSHOT_HDR_CHUNKS + 1;
     size_t num_ops = 0;

     loff_t offset = 0;
     std::unique_ptr<uint8_t[]> de_ptr =
             std::make_unique<uint8_t[]>(exceptions_per_area_ * sizeof(struct disk_exception));

     // This memset is important. Kernel will stop issuing IO when new-chunk ID
     // is 0. When Area is not filled completely with all 256 exceptions,
     // this memset will ensure that metadata read is completed.
     memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));

     while (!cowop_riter_->Done()) {
         const CowOperation* cow_op = &cowop_riter_->Get();
         struct disk_exception* de =
                 reinterpret_cast<struct disk_exception*>((char*)de_ptr.get() + offset);

         if (IsMetadataOp(*cow_op)) {
             cowop_riter_->Next();
             continue;
         }

         metadata_found = true;
         // This loop will handle all the replace and zero ops.
         // We will handle the copy ops later as it requires special
         // handling of assigning chunk-id's. Furthermore, we make
         // sure that replace/zero and copy ops are not batch merged; hence,
         // the bump in the chunk_id before break of this loop
         if (cow_op->type == kCowCopyOp) {
             data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
             break;
         }

         if (cow_op->type == kCowReplaceOp) {
             replace_ops++;
         } else if (cow_op->type == kCowZeroOp) {
             zero_ops++;
         }

         // Construct the disk-exception
         de->old_chunk = cow_op->new_block;
         de->new_chunk = data_chunk_id;


         // Store operation pointer.
         chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), cow_op));
         num_ops += 1;
         offset += sizeof(struct disk_exception);
         cowop_riter_->Next();

         SNAP_LOG(DEBUG) << num_ops << ":"
                         << " Old-chunk: " << de->old_chunk << " New-chunk: " << de->new_chunk;

         if (num_ops == exceptions_per_area_) {
             // Store it in vector at the right index. This maps the chunk-id to
             // vector index.
             vec_.push_back(std::move(de_ptr));
             offset = 0;
             num_ops = 0;

             // Create buffer for next area
             de_ptr = std::make_unique<uint8_t[]>(exceptions_per_area_ *
                                                  sizeof(struct disk_exception));
             memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));

             if (cowop_riter_->Done()) {
                 vec_.push_back(std::move(de_ptr));
             }
         }

         data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
     }

     int num_ra_ops_per_iter = ((GetBufferDataSize()) / BLOCK_SZ);
     std::optional<chunk_t> prev_id = {};
     std::vector<const CowOperation*> vec;
     std::set<uint64_t> dest_blocks;
     std::set<uint64_t> source_blocks;
     size_t pending_copy_ops = exceptions_per_area_ - num_ops;
     uint64_t total_copy_ops = reader_->total_copy_ops();

     SNAP_LOG(DEBUG) << " Processing copy-ops at Area: " << vec_.size()
                     << " Number of replace/zero ops completed in this area: " << num_ops
                     << " Pending copy ops for this area: " << pending_copy_ops;
     while (!cowop_riter_->Done()) {
         do {
             const CowOperation* cow_op = &cowop_riter_->Get();
             if (IsMetadataOp(*cow_op)) {
                 cowop_riter_->Next();
                 continue;
             }

             // We have two cases specific cases:
             //
             // =====================================================
             // Case 1: Overlapping copy regions
             //
             // Ex:
             //
             // Source -> Destination
             //
             // 1: 15 -> 18
             // 2: 16 -> 19
             // 3: 17 -> 20
             // 4: 18 -> 21
             // 5: 19 -> 22
             // 6: 20 -> 23
             //
             // We have 6 copy operations to be executed in OTA and there is a overlap. Update-engine
             // will write to COW file as follows:
             //
             // Op-1: 20 -> 23
             // Op-2: 19 -> 22
             // Op-3: 18 -> 21
             // Op-4: 17 -> 20
             // Op-5: 16 -> 19
             // Op-6: 15 -> 18
             //
             // Note that the blocks numbers are contiguous. Hence, all 6 copy
             // operations can be batch merged. However, that will be
             // problematic if we have a crash as block 20, 19, 18 would have
             // been overwritten and hence subsequent recovery may end up with
             // a silent data corruption when op-1, op-2 and op-3 are
             // re-executed.
             //
             // To address the above problem, read-ahead thread will
             // read all the 6 source blocks, cache them in the scratch
             // space of the COW file. During merge, read-ahead
             // thread will serve the blocks from the read-ahead cache.
             // If there is a crash during merge; on subsequent reboot,
             // read-ahead thread will recover the data from the
             // scratch space and re-construct it thereby there
             // is no loss of data.
             //
             // Note that we will follow the same order of COW operations
             // as present in the COW file. This will make sure that\
             // the merge of operations are done based on the ops present
             // in the file.
             //===========================================================
             if (prev_id.has_value()) {
                 if (dest_blocks.count(cow_op->new_block) || source_blocks.count(cow_op->source)) {
                     break;
                 }
             }
             metadata_found = true;
             pending_copy_ops -= 1;
             vec.push_back(cow_op);
             dest_blocks.insert(cow_op->source);
             source_blocks.insert(cow_op->new_block);
             prev_id = cow_op->new_block;
             cowop_riter_->Next();
         } while (!cowop_riter_->Done() && pending_copy_ops);

         data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
         SNAP_LOG(DEBUG) << "Batch Merge copy-ops of size: " << vec.size()
                         << " Area: " << vec_.size() << " Area offset: " << offset
                         << " Pending-copy-ops in this area: " << pending_copy_ops;

         for (size_t i = 0; i < vec.size(); i++) {
             struct disk_exception* de =
                     reinterpret_cast<struct disk_exception*>((char*)de_ptr.get() + offset);
             const CowOperation* cow_op = vec[i];

             de->old_chunk = cow_op->new_block;
             de->new_chunk = data_chunk_id;

             // Store operation pointer.
             chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), cow_op));
             offset += sizeof(struct disk_exception);
             num_ops += 1;
             copy_ops++;
             if (read_ahead_feature_) {
                 read_ahead_ops_.push_back(cow_op);
             }

             SNAP_LOG(DEBUG) << num_ops << ":"
                             << " Copy-op: "
                             << " Old-chunk: " << de->old_chunk << " New-chunk: " << de->new_chunk;

             if (num_ops == exceptions_per_area_) {
                 // Store it in vector at the right index. This maps the chunk-id to
                 // vector index.
                 vec_.push_back(std::move(de_ptr));
                 num_ops = 0;
                 offset = 0;

                 // Create buffer for next area
                 de_ptr = std::make_unique<uint8_t[]>(exceptions_per_area_ *
                                                      sizeof(struct disk_exception));
                 memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));

                 if (cowop_riter_->Done()) {
                     vec_.push_back(std::move(de_ptr));
                     SNAP_LOG(DEBUG) << "ReadMetadata() completed; Number of Areas: " << vec_.size();
                 }

                 if (!(pending_copy_ops == 0)) {
                     SNAP_LOG(ERROR)
                             << "Invalid pending_copy_ops: expected: 0 found: " << pending_copy_ops;
                     return false;
                 }
                 pending_copy_ops = exceptions_per_area_;
             }

             data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
             total_copy_ops -= 1;
             /*
              * Split the number of ops based on the size of read-ahead buffer
              * region. We need to ensure that kernel doesn't issue IO on blocks
              * which are not read by the read-ahead thread.
              */
             if (read_ahead_feature_ && (total_copy_ops % num_ra_ops_per_iter == 0)) {
                 data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
             }
         }
         vec.clear();
         dest_blocks.clear();
         source_blocks.clear();
         prev_id.reset();
     }

     // Partially filled area or there is no metadata
     // If there is no metadata, fill with zero so that kernel
     // is aware that merge is completed.
     if (num_ops || !metadata_found) {
         vec_.push_back(std::move(de_ptr));
         SNAP_LOG(DEBUG) << "ReadMetadata() completed. Partially filled area num_ops: " << num_ops
                         << "Areas : " << vec_.size();
     }

     chunk_vec_.shrink_to_fit();
     vec_.shrink_to_fit();
     read_ahead_ops_.shrink_to_fit();

     // Sort the vector based on sectors as we need this during un-aligned access
     std::sort(chunk_vec_.begin(), chunk_vec_.end(), compare);

     SNAP_LOG(INFO) << "ReadMetadata completed. Final-chunk-id: " << data_chunk_id
                    << " Num Sector: " << ChunkToSector(data_chunk_id)
                    << " Replace-ops: " << replace_ops << " Zero-ops: " << zero_ops
                    << " Copy-ops: " << copy_ops << " Areas: " << vec_.size()
                    << " Num-ops-merged: " << header.num_merge_ops
                    << " Total-data-ops: " << reader_->total_data_ops();

     // Total number of sectors required for creating dm-user device
     num_sectors_ = ChunkToSector(data_chunk_id);
     merge_initiated_ = false;
     PrepareReadAhead();

     return true;
 }

 bool Snapuserd::MmapMetadata() {
     CowHeader header;
     reader_->GetHeader(&header);

     if (header.major_version >= 2 && header.buffer_size > 0) {
         total_mapped_addr_length_ = header.header_size + BUFFER_REGION_DEFAULT_SIZE;
         read_ahead_feature_ = true;
     } else {
         // mmap the first 4k page - older COW format
         total_mapped_addr_length_ = BLOCK_SZ;
         read_ahead_feature_ = false;
     }

     mapped_addr_ = mmap(NULL, total_mapped_addr_length_, PROT_READ | PROT_WRITE, MAP_SHARED,
                         cow_fd_.get(), 0);
     if (mapped_addr_ == MAP_FAILED) {
         SNAP_LOG(ERROR) << "mmap metadata failed";
         return false;
     }

     return true;
 }

 void Snapuserd::UnmapBufferRegion() {
     int ret = munmap(mapped_addr_, total_mapped_addr_length_);
     if (ret < 0) {
         SNAP_PLOG(ERROR) << "munmap failed";
     }
 }

 void MyLogger(android::base::LogId, android::base::LogSeverity severity, const char*, const char*,
               unsigned int, const char* message) {
     if (severity == android::base::ERROR) {
         fprintf(stderr, "%s\n", message);
     } else {
         fprintf(stdout, "%s\n", message);
     }
 }

 bool Snapuserd::InitCowDevice() {
     cow_fd_.reset(open(cow_device_.c_str(), O_RDWR));
     if (cow_fd_ < 0) {
         SNAP_PLOG(ERROR) << "Open Failed: " << cow_device_;
         return false;
     }

     return ReadMetadata();
 }

 /*
  * Entry point to launch threads
  */
 bool Snapuserd::Start() {
     std::vector<std::future<bool>> threads;
     std::future<bool> ra_thread;
     bool rathread = (read_ahead_feature_ && (read_ahead_ops_.size() > 0));

     // Start the read-ahead thread and wait
     // for it as the data has to be re-constructed
     // from COW device.
     if (rathread) {
         ra_thread = std::async(std::launch::async, &ReadAheadThread::RunThread,
                                read_ahead_thread_.get());
         if (!WaitForReadAheadToStart()) {
             SNAP_LOG(ERROR) << "Failed to start Read-ahead thread...";
             return false;
         }

         SNAP_LOG(INFO) << "Read-ahead thread started...";
     }

     // Launch worker threads
     for (int i = 0; i < worker_threads_.size(); i++) {
         threads.emplace_back(
                 std::async(std::launch::async, &WorkerThread::RunThread, worker_threads_[i].get()));
     }

     bool ret = true;
     for (auto& t : threads) {
         ret = t.get() && ret;
     }

     if (rathread) {
         // Notify the read-ahead thread that all worker threads
         // are done. We need this explicit notification when
         // there is an IO failure or there was a switch
         // of dm-user table; thus, forcing the read-ahead
         // thread to wake up.
         MergeCompleted();
         ret = ret && ra_thread.get();
     }

     return ret;
 }

 uint64_t Snapuserd::GetBufferMetadataOffset() {
     CowHeader header;
     reader_->GetHeader(&header);

     size_t size = header.header_size + sizeof(BufferState);
     return size;
 }

 /*
  * Metadata for read-ahead is 16 bytes. For a 2 MB region, we will
  * end up with 8k (2 PAGE) worth of metadata. Thus, a 2MB buffer
  * region is split into:
  *
  * 1: 8k metadata
  *
  */
 size_t Snapuserd::GetBufferMetadataSize() {
     CowHeader header;
     reader_->GetHeader(&header);

     size_t metadata_bytes = (header.buffer_size * sizeof(struct ScratchMetadata)) / BLOCK_SZ;
     return metadata_bytes;
 }

 size_t Snapuserd::GetBufferDataOffset() {
     CowHeader header;
     reader_->GetHeader(&header);

     return (header.header_size + GetBufferMetadataSize());
 }

 /*
  * (2MB - 8K = 2088960 bytes) will be the buffer region to hold the data.
  */
 size_t Snapuserd::GetBufferDataSize() {
     CowHeader header;
     reader_->GetHeader(&header);

     size_t size = header.buffer_size - GetBufferMetadataSize();
     return size;
 }

 struct BufferState* Snapuserd::GetBufferState() {
     CowHeader header;
     reader_->GetHeader(&header);

     struct BufferState* ra_state =
             reinterpret_cast<struct BufferState*>((char*)mapped_addr_ + header.header_size);
     return ra_state;
 }

 }  // namespace snapshot
 }  // namespace android