EUC_JP_OLD.java revision 6073:cea72c2bf071
1/*
2 * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26/*
27 */
28
29import java.nio.ByteBuffer;
30import java.nio.CharBuffer;
31import java.nio.charset.Charset;
32import java.nio.charset.CharsetDecoder;
33import java.nio.charset.CharsetEncoder;
34import java.nio.charset.CoderResult;
35import sun.nio.cs.HistoricallyNamedCharset;
36import sun.nio.cs.Surrogate;
37
38public class EUC_JP_OLD
39    extends Charset
40    implements HistoricallyNamedCharset
41{
42    public EUC_JP_OLD() {
43        super("EUC-JP_OLD", null);
44    }
45
46    public String historicalName() {
47        return "EUC_JP";
48    }
49
50    public boolean contains(Charset cs) {
51        return ((cs.name().equals("US-ASCII"))
52                || (cs instanceof JIS_X_0201_OLD)
53                || (cs instanceof JIS_X_0208_OLD)
54                || (cs instanceof JIS_X_0212_OLD)
55                || (cs instanceof EUC_JP_OLD));
56    }
57
58    public CharsetDecoder newDecoder() {
59        return new Decoder(this);
60    }
61
62    public CharsetEncoder newEncoder() {
63
64        // Need to force the replacement byte to 0x3f
65        // because JIS_X_0208_Encoder defines its own
66        // alternative 2 byte substitution to permit it
67        // to exist as a self-standing Encoder
68
69        byte[] replacementBytes = { (byte)0x3f };
70        return new Encoder(this).replaceWith(replacementBytes);
71    }
72
73
74    static class Decoder extends JIS_X_0208_Decoder {
75
76        JIS_X_0201_OLD.Decoder decoderJ0201;
77        JIS_X_0212_Decoder decoderJ0212;
78
79        private static final short[] j0208Index1 =
80          JIS_X_0208_Decoder.getIndex1();
81        private static final String[] j0208Index2 =
82          JIS_X_0208_Decoder.getIndex2();
83
84        protected Decoder(Charset cs) {
85            super(cs);
86            decoderJ0201 = new JIS_X_0201_OLD.Decoder(cs);
87            decoderJ0212 = new JIS_X_0212_Decoder(cs);
88            start = 0xa1;
89            end = 0xfe;
90        }
91        protected char decode0212(int byte1, int byte2) {
92             return decoderJ0212.decodeDouble(byte1, byte2);
93        }
94
95        protected char decodeDouble(int byte1, int byte2) {
96            if (byte1 == 0x8e) {
97                return decoderJ0201.decode(byte2 - 256);
98            }
99            // Fix for bug 4121358 - similar fix for bug 4117820 put
100            // into ByteToCharDoubleByte.getUnicode()
101            if (((byte1 < 0) || (byte1 > getIndex1().length))
102                || ((byte2 < start) || (byte2 > end)))
103                return REPLACE_CHAR;
104
105            int n = (j0208Index1[byte1 - 0x80] & 0xf) * (end - start + 1)
106                    + (byte2 - start);
107            return j0208Index2[j0208Index1[byte1 - 0x80] >> 4].charAt(n);
108        }
109
110        private CoderResult decodeArrayLoop(ByteBuffer src,
111                                            CharBuffer dst)
112        {
113            byte[] sa = src.array();
114            int sp = src.arrayOffset() + src.position();
115            int sl = src.arrayOffset() + src.limit();
116            assert (sp <= sl);
117            sp = (sp <= sl ? sp : sl);
118
119            char[] da = dst.array();
120            int dp = dst.arrayOffset() + dst.position();
121            int dl = dst.arrayOffset() + dst.limit();
122            assert (dp <= dl);
123            dp = (dp <= dl ? dp : dl);
124
125            int b1 = 0, b2 = 0;
126            int inputSize = 0;
127            char outputChar = REPLACE_CHAR; // U+FFFD;
128
129            try {
130                while (sp < sl) {
131                    b1 = sa[sp] & 0xff;
132                    inputSize = 1;
133
134                    if ((b1 & 0x80) == 0) {
135                        outputChar = (char)b1;
136                    }
137                    else {      // Multibyte char
138                        if ((b1 & 0xff) == 0x8f) {   // JIS0212
139                            if (sp + 3 > sl)
140                               return CoderResult.UNDERFLOW;
141                            b1 = sa[sp + 1] & 0xff;
142                            b2 = sa[sp + 2] & 0xff;
143                            inputSize += 2;
144                            outputChar = decode0212(b1-0x80, b2-0x80);
145                        } else {
146                          // JIS0208
147                            if (sp + 2 > sl)
148                               return CoderResult.UNDERFLOW;
149                            b2 = sa[sp + 1] & 0xff;
150                            inputSize++;
151                            outputChar = decodeDouble(b1, b2);
152                        }
153                    }
154                    if (outputChar == REPLACE_CHAR) { // can't be decoded
155                        return CoderResult.unmappableForLength(inputSize);
156                    }
157                    if (dp + 1 > dl)
158                        return CoderResult.OVERFLOW;
159                    da[dp++] = outputChar;
160                    sp += inputSize;
161                }
162                return CoderResult.UNDERFLOW;
163            } finally {
164                src.position(sp - src.arrayOffset());
165                dst.position(dp - dst.arrayOffset());
166            }
167        }
168
169        private CoderResult decodeBufferLoop(ByteBuffer src,
170                                             CharBuffer dst)
171        {
172            int mark = src.position();
173            int b1 = 0, b2 = 0;
174            int inputSize = 0;
175
176            char outputChar = REPLACE_CHAR; // U+FFFD;
177
178            try {
179                while (src.hasRemaining()) {
180                    b1 = src.get() & 0xff;
181                    inputSize = 1;
182
183                    if ((b1 & 0x80) == 0) {
184                        outputChar = (char)b1;
185                    } else {    // Multibyte char
186                        if ((b1 & 0xff) == 0x8f) {   // JIS0212
187                            if (src.remaining() < 2)
188                               return CoderResult.UNDERFLOW;
189                            b1 = src.get() & 0xff;
190                            b2 = src.get() & 0xff;
191                            inputSize += 2;
192                            outputChar = decode0212(b1-0x80, b2-0x80);
193                        } else {
194                          // JIS0208
195                            if (src.remaining() < 1)
196                               return CoderResult.UNDERFLOW;
197                            b2 = src.get() & 0xff;
198                            inputSize++;
199                            outputChar = decodeDouble(b1, b2);
200                        }
201                    }
202
203                    if (outputChar == REPLACE_CHAR) {
204                        return CoderResult.unmappableForLength(inputSize);
205                    }
206                if (dst.remaining() < 1)
207                    return CoderResult.OVERFLOW;
208                dst.put(outputChar);
209                mark += inputSize;
210                }
211                return CoderResult.UNDERFLOW;
212            } finally {
213                src.position(mark);
214            }
215        }
216
217        // Make some protected methods public for use by JISAutoDetect
218        public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
219            if (src.hasArray() && dst.hasArray())
220                return decodeArrayLoop(src, dst);
221            else
222                return decodeBufferLoop(src, dst);
223        }
224        public void implReset() {
225            super.implReset();
226        }
227        public CoderResult implFlush(CharBuffer out) {
228            return super.implFlush(out);
229        }
230    }
231
232
233    static class Encoder extends JIS_X_0208_Encoder {
234
235        JIS_X_0201_OLD.Encoder encoderJ0201;
236        JIS_X_0212_Encoder encoderJ0212;
237
238        private static final short[] j0208Index1 =
239          JIS_X_0208_Encoder.getIndex1();
240        private static final String[] j0208Index2 =
241          JIS_X_0208_Encoder.getIndex2();
242
243        private final Surrogate.Parser sgp = new Surrogate.Parser();
244
245        protected Encoder(Charset cs) {
246            super(cs, 3.0f, 3.0f);
247            encoderJ0201 = new JIS_X_0201_OLD.Encoder(cs);
248            encoderJ0212 = new JIS_X_0212_Encoder(cs);
249        }
250
251        public boolean canEncode(char c) {
252            byte[]  encodedBytes = new byte[3];
253
254            if (encodeSingle(c, encodedBytes) == 0) { //doublebyte
255                if (encodeDouble(c) == 0)
256                    return false;
257            }
258            return true;
259        }
260
261        protected int encodeSingle(char inputChar, byte[] outputByte) {
262            byte b;
263
264            if (inputChar == 0) {
265                outputByte[0] = (byte)0;
266                return 1;
267            }
268
269            if ((b = encoderJ0201.encode(inputChar)) == 0)
270                return 0;
271
272            if (b > 0 && b < 128) {
273                outputByte[0] = b;
274                return 1;
275            }
276
277            outputByte[0] = (byte)0x8e;
278            outputByte[1] = b;
279            return 2;
280        }
281
282        protected int encodeDouble(char ch) {
283            int offset = j0208Index1[((ch & 0xff00) >> 8 )] << 8;
284            int r = j0208Index2[offset >> 12].charAt((offset & 0xfff) +
285                    (ch & 0xff));
286            if (r != 0)
287                return r + 0x8080;
288            r = encoderJ0212.encodeDouble(ch);
289            if (r == 0)
290                return r;
291            return r + 0x8F8080;
292        }
293
294        private CoderResult encodeArrayLoop(CharBuffer src,
295                                            ByteBuffer dst)
296        {
297            char[] sa = src.array();
298            int sp = src.arrayOffset() + src.position();
299            int sl = src.arrayOffset() + src.limit();
300            assert (sp <= sl);
301            sp = (sp <= sl ? sp : sl);
302            byte[] da = dst.array();
303            int dp = dst.arrayOffset() + dst.position();
304            int dl = dst.arrayOffset() + dst.limit();
305            assert (dp <= dl);
306            dp = (dp <= dl ? dp : dl);
307
308            int outputSize = 0;
309            byte[]  outputByte;
310            int     inputSize = 0;                 // Size of input
311            byte[]  tmpBuf = new byte[3];
312
313            try {
314                while (sp < sl) {
315                    outputByte = tmpBuf;
316                    char c = sa[sp];
317
318                    if (Character.isSurrogate(c)) {
319                        if (sgp.parse(c, sa, sp, sl) < 0)
320                            return sgp.error();
321                        return sgp.unmappableResult();
322                    }
323
324                    outputSize = encodeSingle(c, outputByte);
325
326                    if (outputSize == 0) { // DoubleByte
327                        int ncode = encodeDouble(c);
328                        if (ncode != 0 ) {
329                            if ((ncode & 0xFF0000) == 0) {
330                                outputByte[0] = (byte) ((ncode & 0xff00) >> 8);
331                                outputByte[1] = (byte) (ncode & 0xff);
332                                outputSize = 2;
333                            } else {
334                                outputByte[0] = (byte) 0x8f;
335                                outputByte[1] = (byte) ((ncode & 0xff00) >> 8);
336                                outputByte[2] = (byte) (ncode & 0xff);
337                                outputSize = 3;
338                            }
339                        } else {
340                                return CoderResult.unmappableForLength(1);
341                        }
342                    }
343                    if (dl - dp < outputSize)
344                        return CoderResult.OVERFLOW;
345                    // Put the byte in the output buffer
346                    for (int i = 0; i < outputSize; i++) {
347                        da[dp++] = outputByte[i];
348                    }
349                    sp++;
350                }
351                return CoderResult.UNDERFLOW;
352            } finally {
353                src.position(sp - src.arrayOffset());
354                dst.position(dp - dst.arrayOffset());
355            }
356        }
357
358        private CoderResult encodeBufferLoop(CharBuffer src,
359                                             ByteBuffer dst)
360        {
361            int outputSize = 0;
362            byte[]  outputByte;
363            int     inputSize = 0;                 // Size of input
364            byte[]  tmpBuf = new byte[3];
365
366            int mark = src.position();
367
368            try {
369                while (src.hasRemaining()) {
370                    outputByte = tmpBuf;
371                    char c = src.get();
372                    if (Character.isSurrogate(c)) {
373                        if (sgp.parse(c, src) < 0)
374                            return sgp.error();
375                        return sgp.unmappableResult();
376                    }
377
378                    outputSize = encodeSingle(c, outputByte);
379                    if (outputSize == 0) { // DoubleByte
380                        int ncode = encodeDouble(c);
381                        if (ncode != 0 ) {
382                            if ((ncode & 0xFF0000) == 0) {
383                                outputByte[0] = (byte) ((ncode & 0xff00) >> 8);
384                                outputByte[1] = (byte) (ncode & 0xff);
385                                outputSize = 2;
386                            } else {
387                                outputByte[0] = (byte) 0x8f;
388                                outputByte[1] = (byte) ((ncode & 0xff00) >> 8);
389                                outputByte[2] = (byte) (ncode & 0xff);
390                                outputSize = 3;
391                            }
392                        } else {
393                                return CoderResult.unmappableForLength(1);
394                        }
395                    }
396
397                    if (dst.remaining() < outputSize)
398                        return CoderResult.OVERFLOW;
399                    // Put the byte in the output buffer
400                    for (int i = 0; i < outputSize; i++) {
401                        dst.put(outputByte[i]);
402                    }
403                    mark++;
404                }
405                return CoderResult.UNDERFLOW;
406            } finally {
407                src.position(mark);
408            }
409        }
410
411        protected CoderResult encodeLoop(CharBuffer src,
412                                         ByteBuffer dst)
413        {
414            if (src.hasArray() && dst.hasArray())
415                return encodeArrayLoop(src, dst);
416            else
417                return encodeBufferLoop(src, dst);
418        }
419    }
420}
421