| /* |
| * Copyright (C) 2022 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.android.modules.utils; |
| |
| import java.io.UTFDataFormatException; |
| |
| public class ModifiedUtf8 { |
| /** |
| * Decodes a byte array containing <i>modified UTF-8</i> bytes into a string. |
| * |
| * <p>Note that although this method decodes the (supposedly impossible) zero byte to U+0000, |
| * that's what the RI does too. |
| */ |
| public static String decode(byte[] in, char[] out, int offset, int utfSize) |
| throws UTFDataFormatException { |
| int count = 0, s = 0, a; |
| while (count < utfSize) { |
| if ((out[s] = (char) in[offset + count++]) < '\u0080') { |
| s++; |
| } else if (((a = out[s]) & 0xe0) == 0xc0) { |
| if (count >= utfSize) { |
| throw new UTFDataFormatException("bad second byte at " + count); |
| } |
| int b = in[offset + count++]; |
| if ((b & 0xC0) != 0x80) { |
| throw new UTFDataFormatException("bad second byte at " + (count - 1)); |
| } |
| out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F)); |
| } else if ((a & 0xf0) == 0xe0) { |
| if (count + 1 >= utfSize) { |
| throw new UTFDataFormatException("bad third byte at " + (count + 1)); |
| } |
| int b = in[offset + count++]; |
| int c = in[offset + count++]; |
| if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) { |
| throw new UTFDataFormatException("bad second or third byte at " + (count - 2)); |
| } |
| out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F)); |
| } else { |
| throw new UTFDataFormatException("bad byte at " + (count - 1)); |
| } |
| } |
| return new String(out, 0, s); |
| } |
| |
| /** |
| * Returns the number of bytes the modified UTF-8 representation of 's' would take. Note |
| * that this is just the space for the bytes representing the characters, not the length |
| * which precedes those bytes, because different callers represent the length differently, |
| * as two, four, or even eight bytes. If {@code shortLength} is true, we'll throw an |
| * exception if the string is too long for its length to be represented by a short. |
| */ |
| public static long countBytes(String s, boolean shortLength) throws UTFDataFormatException { |
| long result = 0; |
| final int length = s.length(); |
| for (int i = 0; i < length; ++i) { |
| char ch = s.charAt(i); |
| if (ch != 0 && ch <= 127) { // U+0000 uses two bytes. |
| ++result; |
| } else if (ch <= 2047) { |
| result += 2; |
| } else { |
| result += 3; |
| } |
| if (shortLength && result > 65535) { |
| throw new UTFDataFormatException("String more than 65535 UTF bytes long"); |
| } |
| } |
| return result; |
| } |
| |
| /** |
| * Encodes the <i>modified UTF-8</i> bytes corresponding to string {@code s} into the |
| * byte array {@code dst}, starting at the given {@code offset}. |
| */ |
| public static void encode(byte[] dst, int offset, String s) { |
| final int length = s.length(); |
| for (int i = 0; i < length; i++) { |
| char ch = s.charAt(i); |
| if (ch != 0 && ch <= 127) { // U+0000 uses two bytes. |
| dst[offset++] = (byte) ch; |
| } else if (ch <= 2047) { |
| dst[offset++] = (byte) (0xc0 | (0x1f & (ch >> 6))); |
| dst[offset++] = (byte) (0x80 | (0x3f & ch)); |
| } else { |
| dst[offset++] = (byte) (0xe0 | (0x0f & (ch >> 12))); |
| dst[offset++] = (byte) (0x80 | (0x3f & (ch >> 6))); |
| dst[offset++] = (byte) (0x80 | (0x3f & ch)); |
| } |
| } |
| } |
| |
| private ModifiedUtf8() { |
| } |
| } |