1/*
2 * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26/*
27 */
28
29package sun.nio.cs.ext;
30
31import java.nio.ByteBuffer;
32import java.nio.CharBuffer;
33import java.nio.charset.Charset;
34import java.nio.charset.CharsetDecoder;
35import java.nio.charset.CharsetEncoder;
36import java.nio.charset.CoderResult;
37import java.nio.charset.CharacterCodingException;
38import sun.nio.cs.DoubleByte;
39import sun.nio.cs.HistoricallyNamedCharset;
40import sun.nio.cs.US_ASCII;
41import sun.nio.cs.*;
42
43public class ISO2022_CN
44    extends Charset
45    implements HistoricallyNamedCharset
46{
47    private static final byte ISO_ESC = 0x1b;
48    private static final byte ISO_SI = 0x0f;
49    private static final byte ISO_SO = 0x0e;
50    private static final byte ISO_SS2_7 = 0x4e;
51    private static final byte ISO_SS3_7 = 0x4f;
52    private static final byte MSB = (byte)0x80;
53    private static final char REPLACE_CHAR = '\uFFFD';
54
55    private static final byte SODesigGB = 0;
56    private static final byte SODesigCNS = 1;
57
58    public ISO2022_CN() {
59        super("ISO-2022-CN", ExtendedCharsets.aliasesFor("ISO-2022-CN"));
60    }
61
62    public String historicalName() {
63        return "ISO2022CN";
64    }
65
66    public boolean contains(Charset cs) {
67        return ((cs instanceof EUC_CN)     // GB2312-80 repertoire
68                || (cs instanceof US_ASCII)
69                || (cs instanceof EUC_TW)  // CNS11643 repertoire
70                || (cs instanceof ISO2022_CN));
71    }
72
73    public CharsetDecoder newDecoder() {
74        return new Decoder(this);
75    }
76
77    public CharsetEncoder newEncoder() {
78        throw new UnsupportedOperationException();
79    }
80
81    public boolean canEncode() {
82        return false;
83    }
84
85    static class Decoder extends CharsetDecoder {
86        private boolean shiftOut;
87        private byte currentSODesig;
88
89        private static final Charset gb2312 = new EUC_CN();
90        private static final Charset cns = new EUC_TW();
91        private final DoubleByte.Decoder gb2312Decoder;
92        private final EUC_TW.Decoder cnsDecoder;
93
94        Decoder(Charset cs) {
95            super(cs, 1.0f, 1.0f);
96            shiftOut = false;
97            currentSODesig = SODesigGB;
98            gb2312Decoder = (DoubleByte.Decoder)gb2312.newDecoder();
99            cnsDecoder = (EUC_TW.Decoder)cns.newDecoder();
100        }
101
102        protected void implReset() {
103            shiftOut= false;
104            currentSODesig = SODesigGB;
105        }
106
107        private char cnsDecode(byte byte1, byte byte2, byte SS) {
108            byte1 |= MSB;
109            byte2 |= MSB;
110            int p = 0;
111            if (SS == ISO_SS2_7)
112                p = 1;    //plane 2, index -- 1
113            else if (SS == ISO_SS3_7)
114                p = 2;    //plane 3, index -- 2
115            else
116                return REPLACE_CHAR;  //never happen.
117            char[] ret = cnsDecoder.toUnicode(byte1 & 0xff,
118                                              byte2 & 0xff,
119                                              p);
120            if (ret == null || ret.length == 2)
121                return REPLACE_CHAR;
122            return ret[0];
123        }
124
125        private char SODecode(byte byte1, byte byte2, byte SOD) {
126            byte1 |= MSB;
127            byte2 |= MSB;
128            if (SOD == SODesigGB) {
129                return gb2312Decoder.decodeDouble(byte1 & 0xff,
130                                                  byte2 & 0xff);
131            } else {    // SOD == SODesigCNS
132                char[] ret = cnsDecoder.toUnicode(byte1 & 0xff,
133                                                  byte2 & 0xff,
134                                                  0);
135                if (ret == null)
136                    return REPLACE_CHAR;
137                return ret[0];
138            }
139        }
140
141        private CoderResult decodeBufferLoop(ByteBuffer src,
142                                             CharBuffer dst)
143        {
144            int mark = src.position();
145            byte b1 = 0, b2 = 0, b3 = 0, b4 = 0;
146            int inputSize = 0;
147            char c = REPLACE_CHAR;
148            try {
149                while (src.hasRemaining()) {
150                    b1 = src.get();
151                    inputSize = 1;
152
153                    while (b1 == ISO_ESC ||
154                           b1 == ISO_SO ||
155                           b1 == ISO_SI) {
156                        if (b1 == ISO_ESC) {  // ESC
157                            currentSODesig = SODesigGB;
158
159                            if (src.remaining() < 1)
160                                return CoderResult.UNDERFLOW;
161
162                            b2 = src.get();
163                            inputSize++;
164
165                            if ((b2 & (byte)0x80) != 0)
166                                return CoderResult.malformedForLength(inputSize);
167
168                            if (b2 == (byte)0x24) {
169                                if (src.remaining() < 1)
170                                    return CoderResult.UNDERFLOW;
171
172                                b3 = src.get();
173                                inputSize++;
174
175                                if ((b3 & (byte)0x80) != 0)
176                                    return CoderResult.malformedForLength(inputSize);
177                                if (b3 == 'A'){              // "$A"
178                                    currentSODesig = SODesigGB;
179                                } else if (b3 == ')') {
180                                    if (src.remaining() < 1)
181                                        return CoderResult.UNDERFLOW;
182                                    b4 = src.get();
183                                    inputSize++;
184                                    if (b4 == 'A'){          // "$)A"
185                                        currentSODesig = SODesigGB;
186                                    } else if (b4 == 'G'){   // "$)G"
187                                        currentSODesig = SODesigCNS;
188                                    } else {
189                                        return CoderResult.malformedForLength(inputSize);
190                                    }
191                                } else if (b3 == '*') {
192                                    if (src.remaining() < 1)
193                                        return CoderResult.UNDERFLOW;
194                                    b4 = src.get();
195                                    inputSize++;
196                                    if (b4 != 'H') {         // "$*H"
197                                        //SS2Desig -> CNS-P1
198                                        return CoderResult.malformedForLength(inputSize);
199                                    }
200                                } else if (b3 == '+') {
201                                    if (src.remaining() < 1)
202                                        return CoderResult.UNDERFLOW;
203                                    b4 = src.get();
204                                    inputSize++;
205                                    if (b4 != 'I'){          // "$+I"
206                                        //SS3Desig -> CNS-P2.
207                                        return CoderResult.malformedForLength(inputSize);
208                                    }
209                                } else {
210                                        return CoderResult.malformedForLength(inputSize);
211                                }
212                            } else if (b2 == ISO_SS2_7 || b2 == ISO_SS3_7) {
213                                if (src.remaining() < 2)
214                                    return CoderResult.UNDERFLOW;
215                                b3 = src.get();
216                                b4 = src.get();
217                                inputSize += 2;
218                                if (dst.remaining() < 1)
219                                    return CoderResult.OVERFLOW;
220                                //SS2->CNS-P2, SS3->CNS-P3
221                                c = cnsDecode(b3, b4, b2);
222                                if (c == REPLACE_CHAR)
223                                    return CoderResult.unmappableForLength(inputSize);
224                                dst.put(c);
225                            } else {
226                                return CoderResult.malformedForLength(inputSize);
227                            }
228                        } else if (b1 == ISO_SO) {
229                            shiftOut = true;
230                        } else if (b1 == ISO_SI) { // shift back in
231                            shiftOut = false;
232                        }
233                        mark += inputSize;
234                        if (src.remaining() < 1)
235                            return CoderResult.UNDERFLOW;
236                        b1 = src.get();
237                        inputSize = 1;
238                    }
239
240                    if (dst.remaining() < 1)
241                        return CoderResult.OVERFLOW;
242
243                    if (!shiftOut) {
244                        dst.put((char)(b1 & 0xff));  //clear the upper byte
245                        mark += inputSize;
246                    } else {
247                        if (src.remaining() < 1)
248                            return CoderResult.UNDERFLOW;
249                        b2 = src.get();
250                        inputSize++;
251                        c = SODecode(b1, b2, currentSODesig);
252                        if (c == REPLACE_CHAR)
253                            return CoderResult.unmappableForLength(inputSize);
254                        dst.put(c);
255                        mark += inputSize;
256                    }
257                }
258                return CoderResult.UNDERFLOW;
259            } finally {
260                src.position(mark);
261            }
262        }
263
264        private CoderResult decodeArrayLoop(ByteBuffer src,
265                                            CharBuffer dst)
266        {
267            int inputSize = 0;
268            byte b1 = 0, b2 = 0, b3 = 0, b4 = 0;
269            char c = REPLACE_CHAR;
270
271            byte[] sa = src.array();
272            int sp = src.arrayOffset() + src.position();
273            int sl = src.arrayOffset() + src.limit();
274            assert (sp <= sl);
275            sp = (sp <= sl ? sp : sl);
276
277            char[] da = dst.array();
278            int dp = dst.arrayOffset() + dst.position();
279            int dl = dst.arrayOffset() + dst.limit();
280            assert (dp <= dl);
281            dp = (dp <= dl ? dp : dl);
282
283            try {
284                while (sp < sl) {
285                    b1 = sa[sp];
286                    inputSize = 1;
287
288                    while (b1 == ISO_ESC || b1 == ISO_SO || b1 == ISO_SI) {
289                        if (b1 == ISO_ESC) {  // ESC
290                            currentSODesig = SODesigGB;
291
292                            if (sp + 2 > sl)
293                                return CoderResult.UNDERFLOW;
294
295                            b2 = sa[sp + 1];
296                            inputSize++;
297
298                            if ((b2 & (byte)0x80) != 0)
299                                return CoderResult.malformedForLength(inputSize);
300                            if (b2 == (byte)0x24) {
301                                if (sp + 3 > sl)
302                                    return CoderResult.UNDERFLOW;
303
304                                b3 = sa[sp + 2];
305                                inputSize++;
306
307                                if ((b3 & (byte)0x80) != 0)
308                                    return CoderResult.malformedForLength(inputSize);
309                                if (b3 == 'A'){              // "$A"
310                                    /* <ESC>$A is not a legal designator sequence for
311                                       ISO2022_CN, it is listed as an escape sequence
312                                       for GB2312 in ISO2022-JP-2. Keep it here just for
313                                       the sake of "compatibility".
314                                     */
315                                    currentSODesig = SODesigGB;
316                                } else if (b3 == ')') {
317                                    if (sp + 4 > sl)
318                                        return CoderResult.UNDERFLOW;
319                                    b4 = sa[sp + 3];
320                                    inputSize++;
321
322                                    if (b4 == 'A'){          // "$)A"
323                                        currentSODesig = SODesigGB;
324                                    } else if (b4 == 'G'){   // "$)G"
325                                        currentSODesig = SODesigCNS;
326                                    } else {
327                                        return CoderResult.malformedForLength(inputSize);
328                                    }
329                                } else if (b3 == '*') {
330                                    if (sp + 4 > sl)
331                                        return CoderResult.UNDERFLOW;
332                                    b4 = sa[sp + 3];
333                                    inputSize++;
334                                    if (b4 != 'H'){          // "$*H"
335                                        return CoderResult.malformedForLength(inputSize);
336                                    }
337                                } else if (b3 == '+') {
338                                    if (sp + 4 > sl)
339                                        return CoderResult.UNDERFLOW;
340                                    b4 = sa[sp + 3];
341                                    inputSize++;
342                                    if (b4 != 'I'){          // "$+I"
343                                        return CoderResult.malformedForLength(inputSize);
344                                    }
345                                } else {
346                                        return CoderResult.malformedForLength(inputSize);
347                                }
348                            } else if (b2 == ISO_SS2_7 || b2 == ISO_SS3_7) {
349                                if (sp + 4 > sl) {
350                                    return CoderResult.UNDERFLOW;
351                                }
352                                b3 = sa[sp + 2];
353                                b4 = sa[sp + 3];
354                                if (dl - dp < 1)  {
355                                    return CoderResult.OVERFLOW;
356                                }
357                                inputSize += 2;
358                                c = cnsDecode(b3, b4, b2);
359                                if (c == REPLACE_CHAR)
360                                    return CoderResult.unmappableForLength(inputSize);
361                                da[dp++] = c;
362                            } else {
363                                return CoderResult.malformedForLength(inputSize);
364                            }
365                        } else if (b1 == ISO_SO) {
366                            shiftOut = true;
367                        } else if (b1 == ISO_SI) { // shift back in
368                            shiftOut = false;
369                        }
370                        sp += inputSize;
371                        if (sp + 1 > sl)
372                            return CoderResult.UNDERFLOW;
373                        b1 = sa[sp];
374                        inputSize = 1;
375                    }
376
377                    if (dl - dp < 1) {
378                        return CoderResult.OVERFLOW;
379                    }
380
381                    if (!shiftOut) {
382                        da[dp++] = (char)(b1 & 0xff);  //clear the upper byte
383                    } else {
384                        if (sp + 2 > sl)
385                            return CoderResult.UNDERFLOW;
386                        b2 = sa[sp + 1];
387                        inputSize++;
388                        c = SODecode(b1, b2, currentSODesig);
389                        if (c == REPLACE_CHAR)
390                            return CoderResult.unmappableForLength(inputSize);
391                        da[dp++] = c;
392                    }
393                    sp += inputSize;
394                }
395                return CoderResult.UNDERFLOW;
396            } finally {
397                src.position(sp - src.arrayOffset());
398                dst.position(dp - dst.arrayOffset());
399            }
400        }
401
402        protected CoderResult decodeLoop(ByteBuffer src,
403                                         CharBuffer dst)
404        {
405            if (src.hasArray() && dst.hasArray())
406                return decodeArrayLoop(src, dst);
407            else
408                return decodeBufferLoop(src, dst);
409        }
410    }
411}
412