| // Copyright 2017 The Chromium OS Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "puffin/src/huffman_table.h" |
| |
| #include <algorithm> |
| #include <vector> |
| |
| #include "puffin/src/logging.h" |
| |
| using std::string; |
| using std::vector; |
| |
| namespace puffin { |
| |
| // Permutations of input Huffman code lengths (used only to read code lengths |
| // necessary for reading Huffman table.) |
| const uint8_t kPermutations[19] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, |
| 11, 4, 12, 3, 13, 2, 14, 1, 15}; |
| |
| // The bases of each alphabet which is added to the integer value of extra |
| // bits that comes after the Huffman code in the input to create the given |
| // length value. The last element is a guard. |
| const uint16_t kLengthBases[30] = { |
| 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, |
| 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0xFFFF}; |
| |
| // Number of extra bits that comes after the associating Huffman code. |
| const uint8_t kLengthExtraBits[29] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, |
| 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, |
| 4, 4, 4, 4, 5, 5, 5, 5, 0}; |
| |
| // Same as |kLengthBases| but for the distances instead of lengths. The last |
| // element is a guard. |
| const uint16_t kDistanceBases[31] = { |
| 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, |
| 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, |
| 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0xFFFF}; |
| |
| // Same as |kLengthExtraBits| but for distances instead of lengths. |
| const uint8_t kDistanceExtraBits[30] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, |
| 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, |
| 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; |
| |
| // 288 is the maximum number of needed huffman codes for an alphabet. Fixed |
| // huffman table needs 288 and dynamic huffman table needs maximum 286. |
| // 286 = 256 (coding a byte) + |
| // 1 (coding the end of block symbole) + |
| // 29 (coding the lengths) |
| HuffmanTable::HuffmanTable() : codeindexpairs_(288), initialized_(false) {} |
| |
| bool HuffmanTable::InitHuffmanCodes(const Buffer& lens, size_t* max_bits) { |
| // Temporary buffers used in |InitHuffmanCodes|. |
| uint16_t len_count_[kMaxHuffmanBits + 1] = {0}; |
| uint16_t next_code_[kMaxHuffmanBits + 1] = {0}; |
| |
| // 1. Count the number of codes for each length; |
| for (auto len : lens) { |
| len_count_[len]++; |
| } |
| |
| for (*max_bits = kMaxHuffmanBits; *max_bits >= 1; (*max_bits)--) { |
| if (len_count_[*max_bits] != 0) { |
| break; |
| } |
| } |
| |
| // Check for oversubscribed code lengths. (A code with length 'L' cannot have |
| // more than 2^L items. |
| for (size_t idx = 1; idx <= *max_bits; idx++) { |
| if (len_count_[idx] > (1 << idx)) { |
| LOG(ERROR) << "Oversubscribed code lengths error!"; |
| return false; |
| } |
| } |
| |
| // 2. Compute the coding of the first element for each length. |
| uint16_t code = 0; |
| len_count_[0] = 0; |
| for (size_t bits = 1; bits <= kMaxHuffmanBits; bits++) { |
| code = (code + len_count_[bits - 1]) << 1; |
| next_code_[bits] = code; |
| } |
| |
| codeindexpairs_.clear(); |
| // 3. Calculate all the code values. |
| for (size_t idx = 0; idx < lens.size(); idx++) { |
| auto len = lens[idx]; |
| if (len == 0) { |
| continue; |
| } |
| |
| // Reverse the code |
| CodeIndexPair cip; |
| cip.code = 0; |
| auto tmp_code = next_code_[len]; |
| for (size_t r = 0; r < len; r++) { |
| cip.code <<= 1; |
| cip.code |= tmp_code & 1U; |
| tmp_code >>= 1; |
| } |
| cip.index = idx; |
| codeindexpairs_.push_back(cip); |
| next_code_[len]++; |
| } |
| return true; |
| } |
| |
| bool HuffmanTable::BuildHuffmanCodes(const Buffer& lens, |
| vector<uint16_t>* hcodes, |
| size_t* max_bits) { |
| TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits)); |
| // Sort descending based on the bit-length of the code. |
| std::sort(codeindexpairs_.begin(), codeindexpairs_.end(), |
| [&lens](const CodeIndexPair& a, const CodeIndexPair& b) { |
| return lens[a.index] > lens[b.index]; |
| }); |
| |
| // Only zero out the part of hcodes which is valuable. |
| memset(hcodes->data(), 0, (1 << *max_bits) * sizeof(uint16_t)); |
| for (const auto& cip : codeindexpairs_) { |
| // The MSB bit of the code in hcodes is set if it is a valid code and its |
| // code exists in the input Huffman table. |
| (*hcodes)[cip.code] = cip.index | 0x8000; |
| auto fill_bits = *max_bits - lens[cip.index]; |
| for (auto idx = 1; idx < (1 << fill_bits); idx++) { |
| auto location = (idx << lens[cip.index]) | cip.code; |
| if (!((*hcodes)[location] & 0x8000)) { |
| (*hcodes)[location] = cip.index | 0x8000; |
| } |
| } |
| } |
| return true; |
| } |
| |
| bool HuffmanTable::BuildHuffmanReverseCodes(const Buffer& lens, |
| vector<uint16_t>* rcodes, |
| size_t* max_bits) { |
| TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits)); |
| // Sort ascending based on the index. |
| std::sort(codeindexpairs_.begin(), codeindexpairs_.end(), |
| [](const CodeIndexPair& a, const CodeIndexPair& b) -> bool { |
| return a.index < b.index; |
| }); |
| |
| size_t index = 0; |
| for (size_t idx = 0; idx < rcodes->size(); idx++) { |
| if (index < codeindexpairs_.size() && idx == codeindexpairs_[index].index) { |
| (*rcodes)[idx] = codeindexpairs_[index].code; |
| index++; |
| } else { |
| (*rcodes)[idx] = 0; |
| } |
| } |
| return true; |
| } |
| |
| bool HuffmanTable::BuildFixedHuffmanTable() { |
| if (!initialized_) { |
| // For all the vectors used in this class, we set the size in the |
| // constructor and we do not change the size later. This is for optimization |
| // purposes. The total size of data in this class is approximately |
| // 2KB. Because it is a constructor return values cannot be checked. |
| lit_len_lens_.resize(288); |
| lit_len_rcodes_.resize(288); |
| lit_len_hcodes_.resize(1 << 9); |
| |
| distance_lens_.resize(30); |
| distance_rcodes_.resize(30); |
| distance_hcodes_.resize(1 << 5); |
| |
| size_t i = 0; |
| while (i < 144) { |
| lit_len_lens_[i++] = 8; |
| } |
| while (i < 256) { |
| lit_len_lens_[i++] = 9; |
| } |
| while (i < 280) { |
| lit_len_lens_[i++] = 7; |
| } |
| while (i < 288) { |
| lit_len_lens_[i++] = 8; |
| } |
| |
| i = 0; |
| while (i < 30) { |
| distance_lens_[i++] = 5; |
| } |
| |
| TEST_AND_RETURN_FALSE( |
| BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_)); |
| |
| TEST_AND_RETURN_FALSE(BuildHuffmanCodes(distance_lens_, &distance_hcodes_, |
| &distance_max_bits_)); |
| |
| TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes( |
| lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_)); |
| |
| TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes( |
| distance_lens_, &distance_rcodes_, &distance_max_bits_)); |
| |
| initialized_ = true; |
| } |
| return true; |
| } |
| |
| bool HuffmanTable::BuildDynamicHuffmanTable(BitReaderInterface* br, |
| uint8_t* buffer, |
| size_t* length) { |
| // Initilize only once and reuse. |
| if (!initialized_) { |
| // Only resizing the arrays needed. |
| code_lens_.resize(19); |
| code_hcodes_.resize(1 << 7); |
| |
| lit_len_lens_.resize(286); |
| lit_len_hcodes_.resize(1 << 15); |
| |
| distance_lens_.resize(30); |
| distance_hcodes_.resize(1 << 15); |
| |
| // 286: Maximum number of literal/lengths symbols. |
| // 30: Maximum number of distance symbols. |
| // The reason we reserve this to the sum of both maximum sizes is that we |
| // need to calculate both huffman codes contiguously. See b/72815313. |
| tmp_lens_.resize(286 + 30); |
| initialized_ = true; |
| } |
| |
| // Read the header. Reads the first portion of the Huffman data from input and |
| // writes it into the puff |buffer|. The first portion includes the size |
| // (|num_lit_len|) of the literals/lengths Huffman code length array |
| // (|dynamic_lit_len_lens_|), the size (|num_distance|) of distance Huffman |
| // code length array (|dynamic_distance_lens_|), and the size (|num_codes|) of |
| // Huffman code length array (dynamic_code_lens_) for reading |
| // |dynamic_lit_len_lens_| and |dynamic_distance_lens_|. Then it follows by |
| // reading |dynamic_code_lens_|. |
| |
| TEST_AND_RETURN_FALSE(*length >= 3); |
| size_t index = 0; |
| TEST_AND_RETURN_FALSE(br->CacheBits(14)); |
| buffer[index++] = br->ReadBits(5); // HLIST |
| auto num_lit_len = br->ReadBits(5) + 257; |
| br->DropBits(5); |
| |
| buffer[index++] = br->ReadBits(5); // HDIST |
| auto num_distance = br->ReadBits(5) + 1; |
| br->DropBits(5); |
| |
| buffer[index++] = br->ReadBits(4); // HCLEN |
| auto num_codes = br->ReadBits(4) + 4; |
| br->DropBits(4); |
| |
| TEST_AND_RETURN_FALSE( |
| CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes)); |
| |
| bool checked = false; |
| size_t idx = 0; |
| TEST_AND_RETURN_FALSE(*length - index >= (num_codes + 1) / 2); |
| // Two codes per byte |
| for (; idx < num_codes; idx++) { |
| TEST_AND_RETURN_FALSE(br->CacheBits(3)); |
| code_lens_[kPermutations[idx]] = br->ReadBits(3); |
| if (checked) { |
| buffer[index++] |= br->ReadBits(3); |
| } else { |
| buffer[index] = br->ReadBits(3) << 4; |
| } |
| checked = !checked; |
| br->DropBits(3); |
| } |
| // Pad the last byte if odd number of codes. |
| if (checked) { |
| index++; |
| } |
| for (; idx < 19; idx++) { |
| code_lens_[kPermutations[idx]] = 0; |
| } |
| |
| TEST_AND_RETURN_FALSE( |
| BuildHuffmanCodes(code_lens_, &code_hcodes_, &code_max_bits_)); |
| |
| // Build literals/lengths and distance Huffman code length arrays. |
| auto bytes_available = (*length - index); |
| tmp_lens_.clear(); |
| TEST_AND_RETURN_FALSE(BuildHuffmanCodeLengths( |
| br, buffer + index, &bytes_available, code_max_bits_, |
| num_lit_len + num_distance, &tmp_lens_)); |
| index += bytes_available; |
| |
| // TODO(ahassani): Optimize this so the memcpy is not needed anymore. |
| lit_len_lens_.clear(); |
| lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(), |
| tmp_lens_.begin() + num_lit_len); |
| |
| distance_lens_.clear(); |
| distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len, |
| tmp_lens_.end()); |
| |
| TEST_AND_RETURN_FALSE( |
| BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_)); |
| |
| // Build distance Huffman codes. |
| TEST_AND_RETURN_FALSE(BuildHuffmanCodes(distance_lens_, &distance_hcodes_, |
| &distance_max_bits_)); |
| |
| *length = index; |
| return true; |
| } |
| |
| bool HuffmanTable::BuildHuffmanCodeLengths(BitReaderInterface* br, |
| uint8_t* buffer, |
| size_t* length, |
| size_t max_bits, |
| size_t num_codes, |
| Buffer* lens) { |
| size_t index = 0; |
| lens->clear(); |
| for (size_t idx = 0; idx < num_codes;) { |
| TEST_AND_RETURN_FALSE(br->CacheBits(max_bits)); |
| auto bits = br->ReadBits(max_bits); |
| uint16_t code; |
| size_t nbits; |
| TEST_AND_RETURN_FALSE(CodeAlphabet(bits, &code, &nbits)); |
| TEST_AND_RETURN_FALSE(index < *length); |
| br->DropBits(nbits); |
| if (code < 16) { |
| buffer[index++] = code; |
| lens->push_back(code); |
| idx++; |
| } else { |
| TEST_AND_RETURN_FALSE(code < 19); |
| size_t copy_num = 0; |
| uint8_t copy_val; |
| switch (code) { |
| case 16: |
| TEST_AND_RETURN_FALSE(idx != 0); |
| TEST_AND_RETURN_FALSE(br->CacheBits(2)); |
| copy_num = 3 + br->ReadBits(2); |
| buffer[index++] = 16 + br->ReadBits(2); // 3 - 6 times |
| copy_val = (*lens)[idx - 1]; |
| br->DropBits(2); |
| break; |
| |
| case 17: |
| TEST_AND_RETURN_FALSE(br->CacheBits(3)); |
| copy_num = 3 + br->ReadBits(3); |
| buffer[index++] = 20 + br->ReadBits(3); // 3 - 10 times |
| copy_val = 0; |
| br->DropBits(3); |
| break; |
| |
| case 18: |
| TEST_AND_RETURN_FALSE(br->CacheBits(7)); |
| copy_num = 11 + br->ReadBits(7); |
| buffer[index++] = 28 + br->ReadBits(7); // 11 - 138 times |
| copy_val = 0; |
| br->DropBits(7); |
| break; |
| |
| default: |
| LOG(ERROR) << "Invalid code!"; |
| return false; |
| } |
| idx += copy_num; |
| while (copy_num--) { |
| lens->push_back(copy_val); |
| } |
| } |
| } |
| TEST_AND_RETURN_FALSE(lens->size() == num_codes); |
| *length = index; |
| return true; |
| } |
| |
| bool HuffmanTable::BuildDynamicHuffmanTable(const uint8_t* buffer, |
| size_t length, |
| BitWriterInterface* bw) { |
| if (!initialized_) { |
| // Only resizing the arrays needed. |
| code_lens_.resize(19); |
| code_rcodes_.resize(19); |
| |
| lit_len_lens_.resize(286); |
| lit_len_rcodes_.resize(286); |
| |
| distance_lens_.resize(30); |
| distance_rcodes_.resize(30); |
| |
| tmp_lens_.resize(286 + 30); |
| |
| initialized_ = true; |
| } |
| |
| TEST_AND_RETURN_FALSE(length >= 3); |
| size_t index = 0; |
| // Write the header. |
| size_t num_lit_len = buffer[index] + 257; |
| TEST_AND_RETURN_FALSE(bw->WriteBits(5, buffer[index++])); |
| |
| size_t num_distance = buffer[index] + 1; |
| TEST_AND_RETURN_FALSE(bw->WriteBits(5, buffer[index++])); |
| |
| size_t num_codes = buffer[index] + 4; |
| TEST_AND_RETURN_FALSE(bw->WriteBits(4, buffer[index++])); |
| |
| TEST_AND_RETURN_FALSE( |
| CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes)); |
| |
| TEST_AND_RETURN_FALSE(length - index >= (num_codes + 1) / 2); |
| bool checked = false; |
| size_t idx = 0; |
| for (; idx < num_codes; idx++) { |
| uint8_t len; |
| if (checked) { |
| len = buffer[index++] & 0x0F; |
| } else { |
| len = buffer[index] >> 4; |
| } |
| checked = !checked; |
| code_lens_[kPermutations[idx]] = len; |
| TEST_AND_RETURN_FALSE(bw->WriteBits(3, len)); |
| } |
| if (checked) { |
| index++; |
| } |
| for (; idx < 19; idx++) { |
| code_lens_[kPermutations[idx]] = 0; |
| } |
| |
| TEST_AND_RETURN_FALSE( |
| BuildHuffmanReverseCodes(code_lens_, &code_rcodes_, &code_max_bits_)); |
| |
| // Build literal/lengths and distance Huffman code length arrays. |
| auto bytes_available = length - index; |
| TEST_AND_RETURN_FALSE( |
| BuildHuffmanCodeLengths(buffer + index, &bytes_available, bw, |
| num_lit_len + num_distance, &tmp_lens_)); |
| index += bytes_available; |
| |
| lit_len_lens_.clear(); |
| lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(), |
| tmp_lens_.begin() + num_lit_len); |
| |
| distance_lens_.clear(); |
| distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len, |
| tmp_lens_.end()); |
| |
| // Build literal/lengths Huffman reverse codes. |
| TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes( |
| lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_)); |
| |
| // Build distance Huffman reverse codes. |
| TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes( |
| distance_lens_, &distance_rcodes_, &distance_max_bits_)); |
| |
| TEST_AND_RETURN_FALSE(length == index); |
| |
| return true; |
| } |
| |
| bool HuffmanTable::BuildHuffmanCodeLengths(const uint8_t* buffer, |
| size_t* length, |
| BitWriterInterface* bw, |
| size_t num_codes, |
| Buffer* lens) { |
| lens->clear(); |
| uint16_t hcode; |
| size_t nbits; |
| size_t index = 0; |
| for (size_t idx = 0; idx < num_codes;) { |
| TEST_AND_RETURN_FALSE(index < *length); |
| auto pcode = buffer[index++]; |
| TEST_AND_RETURN_FALSE(pcode <= 155); |
| |
| auto code = pcode < 16 ? pcode : pcode < 20 ? 16 : pcode < 28 ? 17 : 18; |
| TEST_AND_RETURN_FALSE(CodeHuffman(code, &hcode, &nbits)); |
| TEST_AND_RETURN_FALSE(bw->WriteBits(nbits, hcode)); |
| if (code < 16) { |
| lens->push_back(code); |
| idx++; |
| } else { |
| size_t copy_num = 0; |
| uint8_t copy_val; |
| switch (code) { |
| case 16: |
| // Cannot repeat a non-existent last code if idx == 0. |
| TEST_AND_RETURN_FALSE(idx != 0); |
| TEST_AND_RETURN_FALSE(bw->WriteBits(2, pcode - 16)); |
| copy_num = 3 + pcode - 16; |
| copy_val = (*lens)[idx - 1]; |
| break; |
| |
| case 17: |
| TEST_AND_RETURN_FALSE(bw->WriteBits(3, pcode - 20)); |
| copy_num = 3 + pcode - 20; |
| copy_val = 0; |
| break; |
| |
| case 18: |
| TEST_AND_RETURN_FALSE(bw->WriteBits(7, pcode - 28)); |
| copy_num = 11 + pcode - 28; |
| copy_val = 0; |
| break; |
| |
| default: |
| break; |
| } |
| idx += copy_num; |
| while (copy_num--) { |
| lens->push_back(copy_val); |
| } |
| } |
| } |
| TEST_AND_RETURN_FALSE(lens->size() == num_codes); |
| *length = index; |
| return true; |
| } |
| |
| string BlockTypeToString(BlockType type) { |
| switch (type) { |
| case BlockType::kUncompressed: |
| return "Uncompressed"; |
| |
| case BlockType::kFixed: |
| return "Fixed"; |
| |
| case BlockType::kDynamic: |
| return "Dynamic"; |
| |
| default: |
| return "Unknown"; |
| } |
| } |
| |
| } // namespace puffin |