| /* |
| * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| /* |
| * @test |
| * @bug 4831163 5053096 5056440 |
| * @summary NIO charset basic verification of JISAutodetect decoder |
| * @author Martin Buchholz |
| */ |
| |
| import java.io.*; |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetDecoder; |
| import java.nio.charset.CoderResult; |
| import static java.lang.System.*; |
| |
| public class NIOJISAutoDetectTest { |
| private static int failures = 0; |
| |
| private static void fail(String failureMsg) { |
| System.out.println(failureMsg); |
| failures++; |
| } |
| |
| private static void check(boolean cond, String msg) { |
| if (!cond) { |
| fail("test failed: " + msg); |
| new Exception().printStackTrace(); |
| } |
| } |
| |
| private static String SJISName() throws Exception { |
| return detectingCharset(new byte[] {(byte)0xbb, (byte)0xdd, |
| (byte)0xcf, (byte)0xb2}); |
| } |
| |
| private static String EUCJName() throws Exception { |
| return detectingCharset(new byte[] {(byte)0xa4, (byte)0xd2, |
| (byte)0xa4, (byte)0xe9}); |
| } |
| |
| private static String detectingCharset(byte[] bytes) throws Exception { |
| //---------------------------------------------------------------- |
| // Test special public methods of CharsetDecoder while we're here |
| //---------------------------------------------------------------- |
| CharsetDecoder cd = Charset.forName("JISAutodetect").newDecoder(); |
| check(cd.isAutoDetecting(), "isAutodecting()"); |
| check(! cd.isCharsetDetected(), "isCharsetDetected"); |
| cd.decode(ByteBuffer.wrap(new byte[] {(byte)'A'})); |
| check(! cd.isCharsetDetected(), "isCharsetDetected"); |
| try { |
| cd.detectedCharset(); |
| fail("no IllegalStateException"); |
| } catch (IllegalStateException e) {} |
| cd.decode(ByteBuffer.wrap(bytes)); |
| check(cd.isCharsetDetected(), "isCharsetDetected"); |
| Charset cs = cd.detectedCharset(); |
| check(cs != null, "cs != null"); |
| check(! cs.newDecoder().isAutoDetecting(), "isAutodetecting()"); |
| return cs.name(); |
| } |
| |
| public static void main(String[] argv) throws Exception { |
| //---------------------------------------------------------------- |
| // Used to throw BufferOverflowException |
| //---------------------------------------------------------------- |
| out.println(new String(new byte[] {0x61}, "JISAutoDetect")); |
| |
| //---------------------------------------------------------------- |
| // InputStreamReader(...JISAutoDetect) used to infloop |
| //---------------------------------------------------------------- |
| { |
| byte[] bytes = "ABCD\n".getBytes(); |
| ByteArrayInputStream bais = new ByteArrayInputStream(bytes); |
| InputStreamReader isr = new InputStreamReader(bais, "JISAutoDetect"); |
| BufferedReader reader = new BufferedReader(isr); |
| check (reader.readLine().equals("ABCD"), "first read gets text"); |
| // used to return "ABCD" on second and subsequent reads |
| check (reader.readLine() == null, "second read gets null"); |
| } |
| |
| //---------------------------------------------------------------- |
| // Check all Japanese chars for sanity |
| //---------------------------------------------------------------- |
| String SJIS = SJISName(); |
| String EUCJ = EUCJName(); |
| out.printf("SJIS charset is %s%n", SJIS); |
| out.printf("EUCJ charset is %s%n", EUCJ); |
| |
| int cnt2022 = 0; |
| int cnteucj = 0; |
| int cntsjis = 0; |
| int cntBAD = 0; |
| for (char c = '\u0000'; c < '\uffff'; c++) { |
| if (c == '\u001b' || // ESC |
| c == '\u2014') // Em-Dash? |
| continue; |
| String s = new String (new char[] {c}); |
| |
| //---------------------------------------------------------------- |
| // JISAutoDetect can handle all chars that EUC-JP can, |
| // unless there is an ambiguity with SJIS. |
| //---------------------------------------------------------------- |
| byte[] beucj = s.getBytes(EUCJ); |
| String seucj = new String(beucj, EUCJ); |
| if (seucj.equals(s)) { |
| cnteucj++; |
| String sauto = new String(beucj, "JISAutoDetect"); |
| |
| if (! sauto.equals(seucj)) { |
| cntBAD++; |
| String ssjis = new String(beucj, SJIS); |
| if (! sauto.equals(ssjis)) { |
| fail("Autodetection agrees with neither EUC nor SJIS"); |
| } |
| } |
| } else |
| continue; // Optimization |
| |
| //---------------------------------------------------------------- |
| // JISAutoDetect can handle all chars that ISO-2022-JP can. |
| //---------------------------------------------------------------- |
| byte[] b2022 = s.getBytes("ISO-2022-JP"); |
| if (new String(b2022, "ISO-2022-JP").equals(s)) { |
| cnt2022++; |
| check(new String(b2022,"JISAutoDetect").equals(s), |
| "ISO2022 autodetection"); |
| } |
| |
| //---------------------------------------------------------------- |
| // JISAutoDetect can handle almost all chars that SJIS can. |
| //---------------------------------------------------------------- |
| byte[] bsjis = s.getBytes(SJIS); |
| if (new String(bsjis, SJIS).equals(s)) { |
| cntsjis++; |
| check(new String(bsjis,"JISAutoDetect").equals(s), |
| "SJIS autodetection"); |
| } |
| } |
| out.printf("There are %d ISO-2022-JP-encodable characters.%n", cnt2022); |
| out.printf("There are %d SJIS-encodable characters.%n", cntsjis); |
| out.printf("There are %d EUC-JP-encodable characters.%n", cnteucj); |
| out.printf("There are %d characters that are " + |
| "misdetected as SJIS after being EUC-encoded.%n", cntBAD); |
| |
| |
| //---------------------------------------------------------------- |
| // tests for specific byte sequences |
| //---------------------------------------------------------------- |
| test("ISO-2022-JP", new byte[] {'A', 'B', 'C'}); |
| test("EUC-JP", new byte[] {'A', 'B', 'C'}); |
| test("SJIS", new byte[] {'A', 'B', 'C'}); |
| |
| test("SJIS", |
| new byte[] { 'C', 'o', 'p', 'y', 'r', 'i', 'g', 'h', 't', |
| ' ', (byte)0xa9, ' ', '1', '9', '9', '8' }); |
| |
| test("SJIS", |
| new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, |
| (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, |
| (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde, |
| (byte)0x82, (byte)0xc5, (byte)0x82, (byte)0xb7 }); |
| |
| test("EUC-JP", |
| new byte[] { (byte)0xa4, (byte)0xd2, (byte)0xa4, (byte)0xe9, |
| (byte)0xa4, (byte)0xac, (byte)0xa4, (byte)0xca }); |
| |
| test("SJIS", |
| new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, |
| (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, |
| (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde}); |
| |
| test("SJIS", |
| new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, |
| (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, |
| (byte)0xc3, (byte)0xd1, (byte)0xbd }); |
| |
| test("SJIS", |
| new byte[] { (byte)0x8f, (byte)0xa1, (byte)0xaa }); |
| |
| test("EUC-JP", |
| new byte[] { (byte)0x8f, (byte)0xc5, (byte)0xe0, (byte)0x20}); |
| |
| test("EUC-JP", |
| new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2, |
| (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd, |
| (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde, |
| (byte)0xa4, (byte)0xc7, (byte)0xa4, (byte)0xb9 }); |
| |
| test("ISO-2022-JP", |
| new byte[] { 0x1b, '$', 'B', '#', '4', '$', '5', 0x1b, '(', 'B' }); |
| |
| |
| //---------------------------------------------------------------- |
| // Check handling of ambiguous end-of-input in middle of first char |
| //---------------------------------------------------------------- |
| { |
| CharsetDecoder dc = Charset.forName("x-JISAutoDetect").newDecoder(); |
| ByteBuffer bb = ByteBuffer.allocate(128); |
| CharBuffer cb = CharBuffer.allocate(128); |
| bb.put((byte)'A').put((byte)0x8f); |
| bb.flip(); |
| CoderResult res = dc.decode(bb,cb,false); |
| check(res.isUnderflow(), "isUnderflow"); |
| check(bb.position() == 1, "bb.position()"); |
| check(cb.position() == 1, "cb.position()"); |
| res = dc.decode(bb,cb,false); |
| check(res.isUnderflow(), "isUnderflow"); |
| check(bb.position() == 1, "bb.position()"); |
| check(cb.position() == 1, "cb.position()"); |
| bb.compact(); |
| bb.put((byte)0xa1); |
| bb.flip(); |
| res = dc.decode(bb,cb,true); |
| check(res.isUnderflow(), "isUnderflow"); |
| check(bb.position() == 2, "bb.position()"); |
| check(cb.position() == 2, "cb.position()"); |
| } |
| |
| |
| if (failures > 0) |
| throw new RuntimeException(failures + " tests failed"); |
| } |
| |
| static void checkCoderResult(CoderResult result) { |
| check(result.isUnderflow(), |
| "Unexpected coder result: " + result); |
| } |
| |
| static void test(String expectedCharset, byte[] input) throws Exception { |
| Charset cs = Charset.forName("x-JISAutoDetect"); |
| CharsetDecoder autoDetect = cs.newDecoder(); |
| |
| Charset cs2 = Charset.forName(expectedCharset); |
| CharsetDecoder decoder = cs2.newDecoder(); |
| |
| ByteBuffer bb = ByteBuffer.allocate(128); |
| CharBuffer charOutput = CharBuffer.allocate(128); |
| CharBuffer charExpected = CharBuffer.allocate(128); |
| |
| bb.put(input); |
| bb.flip(); |
| bb.mark(); |
| |
| CoderResult result = autoDetect.decode(bb, charOutput, true); |
| checkCoderResult(result); |
| charOutput.flip(); |
| String actual = charOutput.toString(); |
| |
| bb.reset(); |
| |
| result = decoder.decode(bb, charExpected, true); |
| checkCoderResult(result); |
| charExpected.flip(); |
| String expected = charExpected.toString(); |
| |
| check(actual.equals(expected), |
| String.format("actual=%s expected=%s", actual, expected)); |
| } |
| } |