| // Copyright 2019 The Android Open Source Project |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // For implementation details, please refer to: |
| // https://www.khronos.org/registry/OpenGL/extensions/KHR/KHR_texture_compression_astc_hdr.txt |
| |
| // Please refer to this document for operator precendence (slightly different from C): |
| // https://www.khronos.org/registry/OpenGL/specs/gl/GLSLangSpec.4.60.html#operators |
| |
| #version 450 |
| #include "AstcUnquantMap.comp" |
| #include "Common.comp" |
| |
| precision highp int; |
| |
| layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; |
| |
| layout(push_constant) uniform ImageFormatBlock { |
| uvec2 blockSize; |
| uint baseLayer; |
| uint smallBlock; |
| } |
| u_pushConstant; |
| |
| layout(binding = 0, rgba32ui) readonly uniform WITH_TYPE(uimage) u_image0; |
| layout(binding = 1, rgba8ui) writeonly uniform WITH_TYPE(uimage) u_image1; |
| |
| // HDR CEM: 2, 3, 7, 11, 14, 15 |
| |
| const bool kHDRCEM[16] = { |
| false, false, true, true, false, false, false, true, |
| false, false, false, true, false, false, true, true, |
| }; |
| |
| // Encoding table for C.2.12 |
| |
| const uint kTritEncodings[256][5] = { |
| {0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0}, {0, 1, 0, 0, 0}, |
| {1, 1, 0, 0, 0}, {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0}, {0, 2, 0, 0, 0}, {1, 2, 0, 0, 0}, |
| {2, 2, 0, 0, 0}, {2, 0, 2, 0, 0}, {0, 2, 2, 0, 0}, {1, 2, 2, 0, 0}, {2, 2, 2, 0, 0}, |
| {2, 0, 2, 0, 0}, {0, 0, 1, 0, 0}, {1, 0, 1, 0, 0}, {2, 0, 1, 0, 0}, {0, 1, 2, 0, 0}, |
| {0, 1, 1, 0, 0}, {1, 1, 1, 0, 0}, {2, 1, 1, 0, 0}, {1, 1, 2, 0, 0}, {0, 2, 1, 0, 0}, |
| {1, 2, 1, 0, 0}, {2, 2, 1, 0, 0}, {2, 1, 2, 0, 0}, {0, 0, 0, 2, 2}, {1, 0, 0, 2, 2}, |
| {2, 0, 0, 2, 2}, {0, 0, 2, 2, 2}, {0, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {2, 0, 0, 1, 0}, |
| {0, 0, 2, 1, 0}, {0, 1, 0, 1, 0}, {1, 1, 0, 1, 0}, {2, 1, 0, 1, 0}, {1, 0, 2, 1, 0}, |
| {0, 2, 0, 1, 0}, {1, 2, 0, 1, 0}, {2, 2, 0, 1, 0}, {2, 0, 2, 1, 0}, {0, 2, 2, 1, 0}, |
| {1, 2, 2, 1, 0}, {2, 2, 2, 1, 0}, {2, 0, 2, 1, 0}, {0, 0, 1, 1, 0}, {1, 0, 1, 1, 0}, |
| {2, 0, 1, 1, 0}, {0, 1, 2, 1, 0}, {0, 1, 1, 1, 0}, {1, 1, 1, 1, 0}, {2, 1, 1, 1, 0}, |
| {1, 1, 2, 1, 0}, {0, 2, 1, 1, 0}, {1, 2, 1, 1, 0}, {2, 2, 1, 1, 0}, {2, 1, 2, 1, 0}, |
| {0, 1, 0, 2, 2}, {1, 1, 0, 2, 2}, {2, 1, 0, 2, 2}, {1, 0, 2, 2, 2}, {0, 0, 0, 2, 0}, |
| {1, 0, 0, 2, 0}, {2, 0, 0, 2, 0}, {0, 0, 2, 2, 0}, {0, 1, 0, 2, 0}, {1, 1, 0, 2, 0}, |
| {2, 1, 0, 2, 0}, {1, 0, 2, 2, 0}, {0, 2, 0, 2, 0}, {1, 2, 0, 2, 0}, {2, 2, 0, 2, 0}, |
| {2, 0, 2, 2, 0}, {0, 2, 2, 2, 0}, {1, 2, 2, 2, 0}, {2, 2, 2, 2, 0}, {2, 0, 2, 2, 0}, |
| {0, 0, 1, 2, 0}, {1, 0, 1, 2, 0}, {2, 0, 1, 2, 0}, {0, 1, 2, 2, 0}, {0, 1, 1, 2, 0}, |
| {1, 1, 1, 2, 0}, {2, 1, 1, 2, 0}, {1, 1, 2, 2, 0}, {0, 2, 1, 2, 0}, {1, 2, 1, 2, 0}, |
| {2, 2, 1, 2, 0}, {2, 1, 2, 2, 0}, {0, 2, 0, 2, 2}, {1, 2, 0, 2, 2}, {2, 2, 0, 2, 2}, |
| {2, 0, 2, 2, 2}, {0, 0, 0, 0, 2}, {1, 0, 0, 0, 2}, {2, 0, 0, 0, 2}, {0, 0, 2, 0, 2}, |
| {0, 1, 0, 0, 2}, {1, 1, 0, 0, 2}, {2, 1, 0, 0, 2}, {1, 0, 2, 0, 2}, {0, 2, 0, 0, 2}, |
| {1, 2, 0, 0, 2}, {2, 2, 0, 0, 2}, {2, 0, 2, 0, 2}, {0, 2, 2, 0, 2}, {1, 2, 2, 0, 2}, |
| {2, 2, 2, 0, 2}, {2, 0, 2, 0, 2}, {0, 0, 1, 0, 2}, {1, 0, 1, 0, 2}, {2, 0, 1, 0, 2}, |
| {0, 1, 2, 0, 2}, {0, 1, 1, 0, 2}, {1, 1, 1, 0, 2}, {2, 1, 1, 0, 2}, {1, 1, 2, 0, 2}, |
| {0, 2, 1, 0, 2}, {1, 2, 1, 0, 2}, {2, 2, 1, 0, 2}, {2, 1, 2, 0, 2}, {0, 2, 2, 2, 2}, |
| {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 0, 2, 2, 2}, {0, 0, 0, 0, 1}, {1, 0, 0, 0, 1}, |
| {2, 0, 0, 0, 1}, {0, 0, 2, 0, 1}, {0, 1, 0, 0, 1}, {1, 1, 0, 0, 1}, {2, 1, 0, 0, 1}, |
| {1, 0, 2, 0, 1}, {0, 2, 0, 0, 1}, {1, 2, 0, 0, 1}, {2, 2, 0, 0, 1}, {2, 0, 2, 0, 1}, |
| {0, 2, 2, 0, 1}, {1, 2, 2, 0, 1}, {2, 2, 2, 0, 1}, {2, 0, 2, 0, 1}, {0, 0, 1, 0, 1}, |
| {1, 0, 1, 0, 1}, {2, 0, 1, 0, 1}, {0, 1, 2, 0, 1}, {0, 1, 1, 0, 1}, {1, 1, 1, 0, 1}, |
| {2, 1, 1, 0, 1}, {1, 1, 2, 0, 1}, {0, 2, 1, 0, 1}, {1, 2, 1, 0, 1}, {2, 2, 1, 0, 1}, |
| {2, 1, 2, 0, 1}, {0, 0, 1, 2, 2}, {1, 0, 1, 2, 2}, {2, 0, 1, 2, 2}, {0, 1, 2, 2, 2}, |
| {0, 0, 0, 1, 1}, {1, 0, 0, 1, 1}, {2, 0, 0, 1, 1}, {0, 0, 2, 1, 1}, {0, 1, 0, 1, 1}, |
| {1, 1, 0, 1, 1}, {2, 1, 0, 1, 1}, {1, 0, 2, 1, 1}, {0, 2, 0, 1, 1}, {1, 2, 0, 1, 1}, |
| {2, 2, 0, 1, 1}, {2, 0, 2, 1, 1}, {0, 2, 2, 1, 1}, {1, 2, 2, 1, 1}, {2, 2, 2, 1, 1}, |
| {2, 0, 2, 1, 1}, {0, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}, {0, 1, 2, 1, 1}, |
| {0, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {1, 1, 2, 1, 1}, {0, 2, 1, 1, 1}, |
| {1, 2, 1, 1, 1}, {2, 2, 1, 1, 1}, {2, 1, 2, 1, 1}, {0, 1, 1, 2, 2}, {1, 1, 1, 2, 2}, |
| {2, 1, 1, 2, 2}, {1, 1, 2, 2, 2}, {0, 0, 0, 2, 1}, {1, 0, 0, 2, 1}, {2, 0, 0, 2, 1}, |
| {0, 0, 2, 2, 1}, {0, 1, 0, 2, 1}, {1, 1, 0, 2, 1}, {2, 1, 0, 2, 1}, {1, 0, 2, 2, 1}, |
| {0, 2, 0, 2, 1}, {1, 2, 0, 2, 1}, {2, 2, 0, 2, 1}, {2, 0, 2, 2, 1}, {0, 2, 2, 2, 1}, |
| {1, 2, 2, 2, 1}, {2, 2, 2, 2, 1}, {2, 0, 2, 2, 1}, {0, 0, 1, 2, 1}, {1, 0, 1, 2, 1}, |
| {2, 0, 1, 2, 1}, {0, 1, 2, 2, 1}, {0, 1, 1, 2, 1}, {1, 1, 1, 2, 1}, {2, 1, 1, 2, 1}, |
| {1, 1, 2, 2, 1}, {0, 2, 1, 2, 1}, {1, 2, 1, 2, 1}, {2, 2, 1, 2, 1}, {2, 1, 2, 2, 1}, |
| {0, 2, 1, 2, 2}, {1, 2, 1, 2, 2}, {2, 2, 1, 2, 2}, {2, 1, 2, 2, 2}, {0, 0, 0, 1, 2}, |
| {1, 0, 0, 1, 2}, {2, 0, 0, 1, 2}, {0, 0, 2, 1, 2}, {0, 1, 0, 1, 2}, {1, 1, 0, 1, 2}, |
| {2, 1, 0, 1, 2}, {1, 0, 2, 1, 2}, {0, 2, 0, 1, 2}, {1, 2, 0, 1, 2}, {2, 2, 0, 1, 2}, |
| {2, 0, 2, 1, 2}, {0, 2, 2, 1, 2}, {1, 2, 2, 1, 2}, {2, 2, 2, 1, 2}, {2, 0, 2, 1, 2}, |
| {0, 0, 1, 1, 2}, {1, 0, 1, 1, 2}, {2, 0, 1, 1, 2}, {0, 1, 2, 1, 2}, {0, 1, 1, 1, 2}, |
| {1, 1, 1, 1, 2}, {2, 1, 1, 1, 2}, {1, 1, 2, 1, 2}, {0, 2, 1, 1, 2}, {1, 2, 1, 1, 2}, |
| {2, 2, 1, 1, 2}, {2, 1, 2, 1, 2}, {0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, |
| {2, 1, 2, 2, 2}, |
| }; |
| |
| const uint kQuintEncodings[128][3] = { |
| {0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0}, {4, 0, 0}, {0, 4, 0}, {4, 4, 0}, {4, 4, 4}, |
| {0, 1, 0}, {1, 1, 0}, {2, 1, 0}, {3, 1, 0}, {4, 1, 0}, {1, 4, 0}, {4, 4, 1}, {4, 4, 4}, |
| {0, 2, 0}, {1, 2, 0}, {2, 2, 0}, {3, 2, 0}, {4, 2, 0}, {2, 4, 0}, {4, 4, 2}, {4, 4, 4}, |
| {0, 3, 0}, {1, 3, 0}, {2, 3, 0}, {3, 3, 0}, {4, 3, 0}, {3, 4, 0}, {4, 4, 3}, {4, 4, 4}, |
| {0, 0, 1}, {1, 0, 1}, {2, 0, 1}, {3, 0, 1}, {4, 0, 1}, {0, 4, 1}, {4, 0, 4}, {0, 4, 4}, |
| {0, 1, 1}, {1, 1, 1}, {2, 1, 1}, {3, 1, 1}, {4, 1, 1}, {1, 4, 1}, {4, 1, 4}, {1, 4, 4}, |
| {0, 2, 1}, {1, 2, 1}, {2, 2, 1}, {3, 2, 1}, {4, 2, 1}, {2, 4, 1}, {4, 2, 4}, {2, 4, 4}, |
| {0, 3, 1}, {1, 3, 1}, {2, 3, 1}, {3, 3, 1}, {4, 3, 1}, {3, 4, 1}, {4, 3, 4}, {3, 4, 4}, |
| {0, 0, 2}, {1, 0, 2}, {2, 0, 2}, {3, 0, 2}, {4, 0, 2}, {0, 4, 2}, {2, 0, 4}, {3, 0, 4}, |
| {0, 1, 2}, {1, 1, 2}, {2, 1, 2}, {3, 1, 2}, {4, 1, 2}, {1, 4, 2}, {2, 1, 4}, {3, 1, 4}, |
| {0, 2, 2}, {1, 2, 2}, {2, 2, 2}, {3, 2, 2}, {4, 2, 2}, {2, 4, 2}, {2, 2, 4}, {3, 2, 4}, |
| {0, 3, 2}, {1, 3, 2}, {2, 3, 2}, {3, 3, 2}, {4, 3, 2}, {3, 4, 2}, {2, 3, 4}, {3, 3, 4}, |
| {0, 0, 3}, {1, 0, 3}, {2, 0, 3}, {3, 0, 3}, {4, 0, 3}, {0, 4, 3}, {0, 0, 4}, {1, 0, 4}, |
| {0, 1, 3}, {1, 1, 3}, {2, 1, 3}, {3, 1, 3}, {4, 1, 3}, {1, 4, 3}, {0, 1, 4}, {1, 1, 4}, |
| {0, 2, 3}, {1, 2, 3}, {2, 2, 3}, {3, 2, 3}, {4, 2, 3}, {2, 4, 3}, {0, 2, 4}, {1, 2, 4}, |
| {0, 3, 3}, {1, 3, 3}, {2, 3, 3}, {3, 3, 3}, {4, 3, 3}, {3, 4, 3}, {0, 3, 4}, {1, 3, 4}}; |
| |
| const int kRQuantParamTableLength = 19; |
| // T, Q, B values in Table c.2.16, including binaries, in reversed order |
| const uint kRQuantParamTable[kRQuantParamTableLength][3] = { |
| {0, 0, 8}, // 255 |
| {1, 0, 6}, // 191 |
| {0, 1, 5}, // 159 |
| {0, 0, 7}, // 127 |
| {1, 0, 5}, // 95 |
| {0, 1, 4}, // 79 |
| {0, 0, 6}, // 63 |
| {1, 0, 4}, // 47 |
| {0, 1, 3}, // 39 |
| {0, 0, 5}, // 31 |
| {1, 0, 3}, // 23 |
| {0, 1, 2}, // 19 |
| {0, 0, 4}, // 15 |
| {1, 0, 2}, // 11 |
| {0, 1, 1}, // 9 |
| {0, 0, 3}, // 7 |
| {1, 0, 1}, // 5 |
| //{0, 1, 0}, // 4 |
| {0, 0, 2}, // 3 |
| //{1, 0, 0}, // 2 |
| {0, 0, 1}, // 1 |
| }; |
| |
| uint bit(uint u, int bit) { return (u >> bit) & 1; } |
| |
| uint bits128(uvec4 u, uint bitStart, uint bitCount) { |
| uint firstIdx = bitStart / 32; |
| uint firstOffset = bitStart % 32; |
| uint bitMask = (1 << bitCount) - 1; |
| if (firstIdx == ((bitStart + bitCount - 1) / 32)) { |
| return (u[3 - firstIdx] >> firstOffset) & bitMask; |
| } else { |
| uint firstCount = 32 - firstOffset; |
| uint ret = u[3 - firstIdx - 1] << firstCount; |
| ret |= ((u[3 - firstIdx] >> firstOffset) & ((1 << firstCount) - 1)); |
| return ret & bitMask; |
| } |
| } |
| |
| uint bits128fillZeros(uvec4 u, uint bitStart, uint bitEnd, uint bitCount) { |
| if (bitEnd <= bitStart) { |
| return 0; |
| } |
| return bits128(u, bitStart, min(bitEnd - bitStart, bitCount)); |
| } |
| |
| uint get_bit_count(uint num_vals, uint trits, uint quints, uint bits) { |
| // See section C.2.22 for the formula used here. |
| uint trit_bit_count = ((num_vals * 8 * trits) + 4) / 5; |
| uint quint_bit_count = ((num_vals * 7 * quints) + 2) / 3; |
| uint base_bit_count = num_vals * bits; |
| return trit_bit_count + quint_bit_count + base_bit_count; |
| } |
| |
| void get_pack_size(uint trits, uint quints, uint bits, out uint pack, out uint packedSize) { |
| if (trits == 1) { |
| pack = 5; |
| packedSize = 8 + 5 * bits; |
| } else if (quints == 1) { |
| pack = 3; |
| packedSize = 7 + 3 * bits; |
| } else { |
| pack = 1; |
| packedSize = bits; |
| } |
| } |
| |
| uint[5] decode_trit(uvec4 data, uint start, uint end, uint n) { |
| // We either have three quints or five trits |
| const int kNumVals = 5; |
| const int kInterleavedBits[5] = {2, 2, 1, 2, 1}; |
| |
| // Decode the block |
| uint m[kNumVals]; |
| uint encoded = 0; |
| uint encoded_bits_read = 0; |
| for (int i = 0; i < kNumVals; ++i) { |
| m[i] = bits128fillZeros(data, start, end, n); |
| start += n; |
| |
| uint encoded_bits = bits128fillZeros(data, start, end, kInterleavedBits[i]); |
| start += kInterleavedBits[i]; |
| encoded |= encoded_bits << encoded_bits_read; |
| encoded_bits_read += kInterleavedBits[i]; |
| } |
| |
| uint[kNumVals] result; |
| for (int i = 0; i < kNumVals; ++i) { |
| result[i] = kTritEncodings[encoded][i] << n | m[i]; |
| } |
| return result; |
| } |
| |
| uint[3] decode_quint(uvec4 data, uint start, uint end, uint n) { |
| // We either have three quints or five trits |
| const int kNumVals = 3; |
| const int kInterleavedBits[3] = {3, 2, 2}; |
| |
| // Decode the block |
| uint m[kNumVals]; |
| uint encoded = 0; |
| uint encoded_bits_read = 0; |
| uint bitMask = (1 << n) - 1; |
| for (int i = 0; i < kNumVals; ++i) { |
| m[i] = bits128fillZeros(data, start, end, n); |
| start += n; |
| |
| uint encoded_bits = bits128fillZeros(data, start, end, kInterleavedBits[i]); |
| start += kInterleavedBits[i]; |
| encoded |= encoded_bits << encoded_bits_read; |
| encoded_bits_read += kInterleavedBits[i]; |
| } |
| |
| uint[kNumVals] result; |
| for (int i = 0; i < kNumVals; ++i) { |
| result[i] = kQuintEncodings[encoded][i] << n | m[i]; |
| } |
| return result; |
| } |
| |
| uint get_v_count(uint cem) { return (cem / 4 + 1) * 2; } |
| |
| const uint kLDRLumaDirect = 0; |
| const uint kLDRLumaBaseOffset = 1; |
| const uint kHDRLumaLargeRange = 2; |
| const uint kHDRLumaSmallRange = 3; |
| const uint kLDRLumaAlphaDirect = 4; |
| const uint kLDRLumaAlphaBaseOffset = 5; |
| const uint kLDRRGBBaseScale = 6; |
| const uint kHDRRGBBaseScale = 7; |
| const uint kLDRRGBDirect = 8; |
| const uint kLDRRGBBaseOffset = 9; |
| const uint kLDRRGBBaseScaleTwoA = 10; |
| const uint kHDRRGBDirect = 11; |
| const uint kLDRRGBADirect = 12; |
| const uint kLDRRGBABaseOffset = 13; |
| const uint kHDRRGBDirectLDRAlpha = 14; |
| const uint kHDRRGBDirectHDRAlpha = 15; |
| |
| void swap(inout ivec4 v1, inout ivec4 v2) { |
| ivec4 tmp = v1; |
| v1 = v2; |
| v2 = tmp; |
| } |
| |
| void bit_transfer_signed(inout int a, inout int b) { |
| b >>= 1; |
| b |= (a & 0x80); |
| a >>= 1; |
| a &= 0x3F; |
| if ((a & 0x20) != 0) a -= 0x40; |
| } |
| |
| void blue_contract(inout ivec4 val) { |
| val.r = (val.r + val.b) / 2; |
| val.g = (val.g + val.b) / 2; |
| } |
| |
| void decode_ldr_for_mode(const uint[40] vals, uint start_idx, uint mode, out uvec4 c1, |
| out uvec4 c2) { |
| int v0 = int(vals[start_idx + 0]); |
| int v1 = int(vals[start_idx + 1]); |
| int v2 = int(vals[start_idx + 2]); |
| int v3 = int(vals[start_idx + 3]); |
| int v4 = int(vals[start_idx + 4]); |
| int v5 = int(vals[start_idx + 5]); |
| int v6 = int(vals[start_idx + 6]); |
| int v7 = int(vals[start_idx + 7]); |
| ivec4 endpoint_low_rgba; |
| ivec4 endpoint_high_rgba; |
| switch (mode) { |
| case kLDRLumaDirect: { |
| endpoint_low_rgba = ivec4(v0, v0, v0, 255); |
| endpoint_high_rgba = ivec4(v1, v1, v1, 255); |
| } break; |
| |
| case kLDRLumaBaseOffset: { |
| const int l0 = (v0 >> 2) | (v1 & 0xC0); |
| const int l1 = min(l0 + (v1 & 0x3F), 0xFF); |
| |
| endpoint_low_rgba = ivec4(l0, l0, l0, 255); |
| endpoint_high_rgba = ivec4(l1, l1, l1, 255); |
| } break; |
| |
| case kLDRLumaAlphaDirect: { |
| endpoint_low_rgba = ivec4(v0, v0, v0, v2); |
| endpoint_high_rgba = ivec4(v1, v1, v1, v3); |
| } break; |
| |
| case kLDRLumaAlphaBaseOffset: { |
| bit_transfer_signed(v1, v0); |
| bit_transfer_signed(v3, v2); |
| |
| endpoint_low_rgba = clamp(ivec4(v0, v0, v0, v2), 0, 255); |
| const int high_luma = v0 + v1; |
| endpoint_high_rgba = clamp(ivec4(high_luma, high_luma, high_luma, v2 + v3), 0, 255); |
| } break; |
| |
| case kLDRRGBBaseScale: { |
| endpoint_high_rgba = ivec4(v0, v1, v2, 255); |
| for (int i = 0; i < 3; ++i) { |
| const int x = endpoint_high_rgba[i]; |
| endpoint_low_rgba[i] = (x * v3) >> 8; |
| } |
| endpoint_low_rgba[3] = 255; |
| } break; |
| |
| case kLDRRGBDirect: { |
| const int s0 = v0 + v2 + v4; |
| const int s1 = v1 + v3 + v5; |
| |
| endpoint_low_rgba = ivec4(v0, v2, v4, 255); |
| endpoint_high_rgba = ivec4(v1, v3, v5, 255); |
| |
| if (s1 < s0) { |
| swap(endpoint_low_rgba, endpoint_high_rgba); |
| blue_contract(endpoint_low_rgba); |
| blue_contract(endpoint_high_rgba); |
| } |
| } break; |
| |
| case kLDRRGBBaseOffset: { |
| bit_transfer_signed(v1, v0); |
| bit_transfer_signed(v3, v2); |
| bit_transfer_signed(v5, v4); |
| |
| endpoint_low_rgba = ivec4(v0, v2, v4, 255); |
| endpoint_high_rgba = ivec4(v0 + v1, v2 + v3, v4 + v5, 255); |
| |
| if (v1 + v3 + v5 < 0) { |
| swap(endpoint_low_rgba, endpoint_high_rgba); |
| blue_contract(endpoint_low_rgba); |
| blue_contract(endpoint_high_rgba); |
| } |
| |
| endpoint_low_rgba = clamp(endpoint_low_rgba, 0, 255); |
| endpoint_high_rgba = clamp(endpoint_high_rgba, 0, 255); |
| } break; |
| |
| case kLDRRGBBaseScaleTwoA: { |
| // Base |
| endpoint_low_rgba = endpoint_high_rgba = ivec4(v0, v1, v2, 255); |
| |
| // Scale |
| endpoint_low_rgba = (endpoint_low_rgba * v3) >> 8; |
| |
| // Two A |
| endpoint_low_rgba[3] = v4; |
| endpoint_high_rgba[3] = v5; |
| } break; |
| |
| case kLDRRGBADirect: { |
| const uint s0 = v0 + v2 + v4; |
| const uint s1 = v1 + v3 + v5; |
| |
| endpoint_low_rgba = ivec4(v0, v2, v4, v6); |
| endpoint_high_rgba = ivec4(v1, v3, v5, v7); |
| |
| if (s1 < s0) { |
| swap(endpoint_low_rgba, endpoint_high_rgba); |
| blue_contract(endpoint_low_rgba); |
| blue_contract(endpoint_high_rgba); |
| } |
| } break; |
| |
| case kLDRRGBABaseOffset: { |
| bit_transfer_signed(v1, v0); |
| bit_transfer_signed(v3, v2); |
| bit_transfer_signed(v5, v4); |
| bit_transfer_signed(v7, v6); |
| |
| endpoint_low_rgba = ivec4(v0, v2, v4, v6); |
| endpoint_high_rgba = ivec4(v0 + v1, v2 + v3, v4 + v5, v6 + v7); |
| |
| if (v1 + v3 + v5 < 0) { |
| swap(endpoint_low_rgba, endpoint_high_rgba); |
| blue_contract(endpoint_low_rgba); |
| blue_contract(endpoint_high_rgba); |
| } |
| |
| endpoint_low_rgba = clamp(endpoint_low_rgba, 0, 255); |
| endpoint_high_rgba = clamp(endpoint_high_rgba, 0, 255); |
| } break; |
| |
| default: |
| // Unimplemented color encoding. |
| // TODO(google): Is this the correct error handling? |
| endpoint_high_rgba = endpoint_low_rgba = ivec4(0, 0, 0, 0); |
| } |
| c1 = uvec4(endpoint_low_rgba); |
| c2 = uvec4(endpoint_high_rgba); |
| } |
| |
| uint hash52(uint p) { |
| p ^= p >> 15; |
| p -= p << 17; |
| p += p << 7; |
| p += p << 4; |
| p ^= p >> 5; |
| p += p << 16; |
| p ^= p >> 7; |
| p ^= p >> 3; |
| p ^= p << 6; |
| p ^= p >> 17; |
| return p; |
| } |
| |
| uint select_partition(uint seed, uint x, uint y, uint partitioncount) { |
| if (partitioncount == 1) { |
| return 0; |
| } |
| uint z = 0; |
| if (u_pushConstant.smallBlock != 0) { |
| x <<= 1; |
| y <<= 1; |
| } |
| seed += (partitioncount - 1) * 1024; |
| uint rnum = hash52(seed); |
| uint seed1 = rnum & 0xF; |
| uint seed2 = (rnum >> 4) & 0xF; |
| uint seed3 = (rnum >> 8) & 0xF; |
| uint seed4 = (rnum >> 12) & 0xF; |
| uint seed5 = (rnum >> 16) & 0xF; |
| uint seed6 = (rnum >> 20) & 0xF; |
| uint seed7 = (rnum >> 24) & 0xF; |
| uint seed8 = (rnum >> 28) & 0xF; |
| uint seed9 = (rnum >> 18) & 0xF; |
| uint seed10 = (rnum >> 22) & 0xF; |
| uint seed11 = (rnum >> 26) & 0xF; |
| uint seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; |
| |
| seed1 *= seed1; |
| seed2 *= seed2; |
| seed3 *= seed3; |
| seed4 *= seed4; |
| seed5 *= seed5; |
| seed6 *= seed6; |
| seed7 *= seed7; |
| seed8 *= seed8; |
| seed9 *= seed9; |
| seed10 *= seed10; |
| seed11 *= seed11; |
| seed12 *= seed12; |
| |
| uint sh1, sh2, sh3; |
| if ((seed & 1) != 0) { |
| sh1 = ((seed & 2) != 0 ? 4 : 5); |
| sh2 = (partitioncount == 3 ? 6 : 5); |
| } else { |
| sh1 = (partitioncount == 3 ? 6 : 5); |
| sh2 = ((seed & 2) != 0 ? 4 : 5); |
| } |
| sh3 = ((seed & 0x10) != 0) ? sh1 : sh2; |
| |
| seed1 >>= sh1; |
| seed2 >>= sh2; |
| seed3 >>= sh1; |
| seed4 >>= sh2; |
| seed5 >>= sh1; |
| seed6 >>= sh2; |
| seed7 >>= sh1; |
| seed8 >>= sh2; |
| seed9 >>= sh3; |
| seed10 >>= sh3; |
| seed11 >>= sh3; |
| seed12 >>= sh3; |
| |
| uint a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); |
| uint b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); |
| uint c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); |
| uint d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); |
| |
| a &= 0x3F; |
| b &= 0x3F; |
| c &= 0x3F; |
| d &= 0x3F; |
| |
| if (partitioncount < 4) d = 0; |
| if (partitioncount < 3) c = 0; |
| |
| if (a >= b && a >= c && a >= d) |
| return 0; |
| else if (b >= c && b >= d) |
| return 1; |
| else if (c >= d) |
| return 2; |
| else |
| return 3; |
| } |
| |
| uvec4[144] single_color_block(uvec4 color) { |
| uvec4 ret[144]; |
| for (int h = 0; h < u_pushConstant.blockSize.y; h++) { |
| for (int w = 0; w < u_pushConstant.blockSize.x; w++) { |
| ret[h * u_pushConstant.blockSize.x + w] = color; |
| } |
| } |
| return ret; |
| } |
| |
| uvec4[144] error_color_block() { return single_color_block(uvec4(0xff, 0, 0xff, 0xff)); } |
| |
| uvec4[144] astc_decode_block(const uvec4 u) { |
| uint d; |
| uint hdr; |
| uint b; |
| uint a; |
| uint r; |
| uint width; |
| uint height; |
| uvec4 cem; |
| uint weightGrid[120]; |
| const uint u3 = u[3]; |
| const uint b87 = u3 >> 7 & 3; |
| const uint b65 = u3 >> 5 & 3; |
| const uint b32 = u3 >> 2 & 3; |
| a = b65; |
| b = b87; |
| d = bit(u3, 10); |
| hdr = bit(u3, 9); |
| if ((u3 & 3) == 0) { |
| r = b32 << 1 | bit(u3, 4); |
| if (b87 == 0) { |
| width = 12; |
| height = a + 2; |
| } else if (b87 == 1) { |
| width = a + 2; |
| height = 12; |
| } else if (b87 == 3) { |
| if (b65 == 0) { |
| width = 6; |
| height = 10; |
| } else if (b65 == 1) { |
| width = 10; |
| height = 6; |
| } else if ((u3 & 0xDFF) == 0xDFC) { |
| // Void-extent |
| // In void extend, the last 12 bits should be |
| // 1 1 D 1 1 1 1 1 1 1 0 0 |
| // Where D is the HDR bit |
| |
| uvec4 color = |
| uvec4(u[1] >> 8 & 0xff, u[1] >> 24 & 0xff, u[0] >> 8 & 0xff, u[0] >> 24 & 0xff); |
| return single_color_block(color); |
| } else { // reserved |
| return error_color_block(); |
| } |
| } else { // b87 == 2 |
| b = u3 >> 9 & 3; |
| width = a + 6; |
| height = b + 6; |
| d = 0; |
| hdr = 0; |
| } |
| } else { |
| r = (u3 & 3) << 1 | bit(u3, 4); |
| if (b32 == 0) { |
| width = b + 4; |
| height = a + 2; |
| } else if (b32 == 1) { |
| width = b + 8; |
| height = a + 2; |
| } else if (b32 == 2) { |
| width = a + 2; |
| height = b + 8; |
| } else if (bit(u3, 8) == 0) { |
| width = a + 2; |
| height = (b & 1) + 6; |
| } else { |
| width = (b & 1) + 2; |
| height = a + 2; |
| } |
| } |
| |
| if (width > u_pushConstant.blockSize.x || height > u_pushConstant.blockSize.y) { |
| return error_color_block(); |
| } |
| // Decode weight |
| uint trits = 0; |
| uint quints = 0; |
| uint bits = 0; |
| const uint weightCounts = height * width * (d + 1); |
| const int kMaxNumWeights = 64; |
| if (kMaxNumWeights < weightCounts) { |
| return error_color_block(); |
| } |
| { |
| if (hdr == 0) { |
| switch (r) { |
| case 2: |
| bits = 1; |
| break; |
| case 3: |
| trits = 1; |
| break; |
| case 4: |
| bits = 2; |
| break; |
| case 5: |
| quints = 1; |
| break; |
| case 6: |
| trits = 1; |
| bits = 1; |
| break; |
| case 7: |
| bits = 3; |
| break; |
| default: |
| return error_color_block(); |
| } |
| } else { |
| switch (r) { |
| case 2: |
| bits = 1; |
| quints = 1; |
| break; |
| case 3: |
| trits = 1; |
| bits = 2; |
| break; |
| case 4: |
| bits = 4; |
| break; |
| case 5: |
| quints = 1; |
| bits = 2; |
| break; |
| case 6: |
| trits = 1; |
| bits = 3; |
| break; |
| case 7: |
| bits = 5; |
| break; |
| default: |
| return error_color_block(); |
| } |
| } |
| uint packedSize = 0; |
| uint pack = 0; |
| get_pack_size(trits, quints, bits, pack, packedSize); |
| uint srcIdx = 0; |
| uint dstIdx = 0; |
| uvec4 uReversed = bitfieldReverse(u); |
| const uint weightBitCount = get_bit_count(weightCounts, trits, quints, bits); |
| const int kWeightGridMinBitLength = 24; |
| const int kWeightGridMaxBitLength = 96; |
| if (weightBitCount < kWeightGridMinBitLength || weightBitCount > kWeightGridMaxBitLength) { |
| return error_color_block(); |
| } |
| uReversed = uvec4(uReversed[3], uReversed[2], uReversed[1], uReversed[0]); |
| const uint kUnquantBinMulTable[] = {0x3f, 0x15, 0x9, 0x4, 0x2, 0x1}; |
| const uint kUnquantBinMovTable[] = {0x8, 0x8, 0x8, 0x2, 0x4, 0x8}; |
| while (dstIdx < weightCounts) { |
| if (trits == 1) { |
| uint decoded[5] = decode_trit(uReversed, srcIdx, weightBitCount, bits); |
| // uint decoded[5] = {0, 0, 0, 0, 0}; |
| for (int i = 0; i < 5; i++) { |
| weightGrid[dstIdx] = |
| kUnquantTritWeightMap[kUnquantTritWeightMapBitIdx[bits] + decoded[i]]; |
| if (weightGrid[dstIdx] > 32) { |
| weightGrid[dstIdx] += 1; |
| } |
| dstIdx++; |
| if (dstIdx >= weightCounts) { |
| break; |
| } |
| } |
| } else if (quints == 1) { |
| uint decoded[3] = decode_quint(uReversed, srcIdx, weightBitCount, bits); |
| for (int i = 0; i < 3; i++) { |
| // TODO: handle overflow in the last |
| weightGrid[dstIdx] = |
| kUnquantQuintWeightMap[kUnquantQuintWeightMapBitIdx[bits] + decoded[i]]; |
| if (weightGrid[dstIdx] > 32) { |
| weightGrid[dstIdx] += 1; |
| } |
| dstIdx++; |
| if (dstIdx >= weightCounts) { |
| break; |
| } |
| } |
| } else { |
| uint decodedRaw = bits128(uReversed, srcIdx, packedSize); |
| uint decoded = decodedRaw * kUnquantBinMulTable[bits - 1] | |
| decodedRaw >> kUnquantBinMovTable[bits - 1]; |
| weightGrid[dstIdx] = decoded; |
| if (weightGrid[dstIdx] > 32) { |
| weightGrid[dstIdx] += 1; |
| } |
| dstIdx++; |
| } |
| srcIdx += packedSize; |
| } |
| } |
| uint partitionCount = (u3 >> 11 & 3) + 1; |
| if (d == 1 && partitionCount == 4) { |
| return error_color_block(); |
| } |
| const uint weightStart = 128 - get_bit_count(weightCounts, trits, quints, bits); |
| uint dualPlaneStart = 0; |
| // Decode cem mode |
| if (partitionCount == 1) { |
| // Single-partition mode |
| cem[0] = u3 >> 13 & 0xf; |
| dualPlaneStart = weightStart - d * 2; |
| } else { |
| // Multi-partition mode |
| // Calculate CEM for all 4 partitions, even when partitionCount < 4 |
| uint partMode = u3 >> 23 & 3; |
| const uint kExtraMBitsTable[4] = {0, 2, 5, 8}; |
| const uint extraMBitCount = (partMode == 0) ? 0 : kExtraMBitsTable[partitionCount - 1]; |
| const uint extraMStart = weightStart - extraMBitCount; |
| dualPlaneStart = extraMStart - d * 2; |
| |
| if (partMode == 0) { |
| uint cem_all = u3 >> 25 & 0xf; |
| cem = uvec4(cem_all, cem_all, cem_all, cem_all); |
| } else { |
| uint cemBase = partMode - 1; |
| uvec4 cemHigh = cemBase + uvec4(bit(u3, 25), bit(u3, 26), bit(u3, 27), bit(u3, 28)); |
| const uint extraM = bits128(u, extraMStart, extraMBitCount); |
| const uint kMainMBitsTable[4] = {0, 2, 1, 0}; |
| const uint mainMBitCount = kMainMBitsTable[partitionCount - 1]; |
| const uint m = extraM << mainMBitCount | ((u3 >> 27 & 3) >> (2 - mainMBitCount)); |
| cem = cemHigh << 2 | uvec4(m & 3, m >> 2 & 3, m >> 4 & 3, m >> 6 & 3); |
| } |
| } |
| // Decode end points |
| uvec4 endPoints[4][2]; |
| { |
| uint totalV = 0; |
| for (uint part = 0; part < partitionCount; part++) { |
| totalV += get_v_count(cem[part]); |
| } |
| const uint epStart = (partitionCount == 1) ? 17 : 29; |
| const uint totalAvailBits = dualPlaneStart - epStart; |
| if (totalAvailBits >= 128) { |
| // overflowed |
| return error_color_block(); |
| } |
| uint epQuints = 0; |
| uint epTrits = 0; |
| uint epBits = 0; |
| uint i; |
| for (i = 0; i < kRQuantParamTableLength; i++) { |
| epTrits = kRQuantParamTable[i][0]; |
| epQuints = kRQuantParamTable[i][1]; |
| epBits = kRQuantParamTable[i][2]; |
| if (get_bit_count(totalV, epTrits, epQuints, epBits) <= totalAvailBits) { |
| break; |
| } |
| } |
| if (i >= kRQuantParamTableLength) { |
| return error_color_block(); |
| } |
| |
| const uint epBitCount = get_bit_count(totalV, epTrits, epQuints, epBits); |
| const uint epEnd = epStart + epBitCount; |
| uint packedSize = 0; |
| uint pack = 0; |
| get_pack_size(epTrits, epQuints, epBits, pack, packedSize); |
| |
| // Decode end point parameters into buffer |
| uint vBuffer[40]; |
| uint srcIdx = epStart; |
| uint dstIdx = 0; |
| const uint kUnquantBinMulTable[8] = {0xff, 0x55, 0x24, 0x11, 0x8, 0x4, 0x2, 0x1}; |
| const uint kUnquantBinMovTable[8] = {8, 8, 1, 8, 2, 4, 6, 8}; |
| while (dstIdx < totalV) { |
| if (epTrits == 1) { |
| uint decoded[5] = decode_trit(u, srcIdx, epEnd, epBits); |
| for (int i = 0; i < 5; i++) { |
| vBuffer[dstIdx] = |
| kUnquantTritColorMap[kUnquantTritColorMapBitIdx[epBits] + decoded[i]]; |
| dstIdx++; |
| if (dstIdx >= totalV) { |
| break; |
| } |
| } |
| } else if (epQuints == 1) { |
| uint decoded[3] = decode_quint(u, srcIdx, epEnd, epBits); |
| for (int i = 0; i < 3; i++) { |
| vBuffer[dstIdx] = |
| kUnquantQuintColorMap[kUnquantQuintColorMapBitIdx[epBits] + decoded[i]]; |
| dstIdx++; |
| if (dstIdx >= totalV) { |
| break; |
| } |
| } |
| } else { |
| uint src = bits128(u, srcIdx, packedSize); |
| uint decoded = |
| src * kUnquantBinMulTable[epBits - 1] | src >> kUnquantBinMovTable[epBits - 1]; |
| vBuffer[dstIdx] = decoded; |
| dstIdx++; |
| } |
| srcIdx += packedSize; |
| } |
| uint bufferIdx = 0; |
| for (uint part = 0; part < partitionCount; part++) { |
| // TODO: HDR support |
| decode_ldr_for_mode(vBuffer, bufferIdx, cem[part], endPoints[part][0], |
| endPoints[part][1]); |
| bufferIdx += get_v_count(cem[part]); |
| } |
| } |
| uvec4 ret[144]; |
| { |
| uvec2 dst = (1024 + u_pushConstant.blockSize / 2) / (u_pushConstant.blockSize - 1); |
| uint dd = d + 1; |
| for (uint h = 0; h < u_pushConstant.blockSize.y; h++) { |
| for (uint w = 0; w < u_pushConstant.blockSize.x; w++) { |
| uint part = select_partition(u3 >> 13 & 1023, w, h, partitionCount); |
| if (kHDRCEM[cem[part]]) { |
| // HDR not supported |
| ret[h * u_pushConstant.blockSize.x + w] = uvec4(0xff, 0, 0xff, 0xff); |
| continue; |
| } |
| // Calculate weight |
| uvec2 st = uvec2(w, h); |
| uvec2 cst = dst * st; |
| uvec2 gst = (cst * (uvec2(width, height) - 1) + 32) >> 6; |
| uvec2 jst = gst >> 4; |
| uvec2 fst = gst & 0xf; |
| uint v0 = jst.x + jst.y * width; |
| uvec2 p00 = uvec2(weightGrid[v0 * dd], weightGrid[v0 * dd + 1]); |
| uvec2 p01 = uvec2(weightGrid[(v0 + 1) * dd], weightGrid[(v0 + 1) * dd + 1]); |
| uvec2 p10 = uvec2(weightGrid[(v0 + width) * dd], weightGrid[(v0 + width) * dd + 1]); |
| uvec2 p11 = |
| uvec2(weightGrid[(v0 + width + 1) * dd], weightGrid[(v0 + width + 1) * dd + 1]); |
| uint w11 = (fst.x * fst.y + 8) >> 4; |
| uint w10 = fst.y - w11; |
| uint w01 = fst.x - w11; |
| uint w00 = 16 - fst.x - fst.y + w11; |
| uvec2 i = (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4; |
| |
| uvec4 c0 = endPoints[part][0]; |
| uvec4 c1 = endPoints[part][1]; |
| uvec4 c = (c0 * (64 - i[0]) + c1 * i[0] + 32) / 64; |
| if (d == 1) { |
| uint ccs = bits128(u, dualPlaneStart, 2); |
| c[ccs] = (c0[ccs] * (64 - i[1]) + c1[ccs] * i[1] + 32) / 64; |
| } |
| ret[h * u_pushConstant.blockSize.x + w] = c; |
| } |
| } |
| } |
| return ret; |
| } |
| |
| uint block_y_size_1DArray() { return 1; } |
| |
| uint block_y_size_2DArray() { return u_pushConstant.blockSize.y; } |
| |
| uint block_y_size_3D() { return u_pushConstant.blockSize.y; } |
| |
| uvec4 flip32(uvec4 a) { |
| return ((a & 0xff) << 24) | ((a & 0xff00) << 8) | ((a & 0xff0000) >> 8) | |
| ((a & 0xff000000) >> 24); |
| } |
| |
| void main(void) { |
| ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); |
| pos.z += int(u_pushConstant.baseLayer); |
| uvec4 srcBlock = uvec4(imageLoad(u_image0, WITH_TYPE(getPos)(pos))); |
| srcBlock = uvec4(srcBlock[3], srcBlock[2], srcBlock[1], srcBlock[0]); |
| uvec4[144] decompressed = astc_decode_block(srcBlock); |
| |
| for (uint y = 0; y < WITH_TYPE(block_y_size_)(); y++) { |
| for (uint x = 0; x < u_pushConstant.blockSize.x; x++) { |
| imageStore( |
| u_image1, |
| WITH_TYPE(getPos)(ivec3(pos.xy * u_pushConstant.blockSize + ivec2(x, y), pos.z)), |
| decompressed[y * u_pushConstant.blockSize.x + x]); |
| } |
| } |
| } |