1/*
2 * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24/* @test
25 * @bug 4328178
26 * @summary Performs baseline and regression test on the ISCII91 charset
27 * @modules jdk.charsets
28 */
29
30import java.io.*;
31
32public class ISCIITest {
33
34    private static void failureReport() {
35        System.err.println ("Failed ISCII91 Regression Test");
36    }
37
38    private static void mapEquiv(int start,
39                                 int end,
40                                 String testName)
41    throws Exception
42    {
43        byte[] singleByte = new byte[1];
44        byte[] encoded = new byte[1];
45
46        for (int i = start; i <= end; i++ ) {
47            singleByte[0] = (byte) i;
48            try {
49                String unicodeStr =
50                        new String (singleByte,"ISCII91");
51
52                if (i != (int)unicodeStr.charAt(0)) {
53                    System.err.println ("FAILED ISCII91 Regression test"
54                                        + "input byte is " + i );
55                    throw new Exception("");
56                }
57                encoded = unicodeStr.getBytes("ISCII91");
58
59                if (encoded[0] != singleByte[0]) {
60                   System.err.println("Encoding error " + testName);
61                   throw new Exception("Failed ISCII91 Regression test");
62                }
63
64            } catch (UnsupportedEncodingException e) {
65                failureReport();
66            }
67        }
68        return;
69    }
70
71    private static void checkUnmapped(int start,
72                                      int end,
73                                      String testName)
74    throws Exception {
75
76        byte[] singleByte = new byte[1];
77
78        for (int i = start; i <= end; i++ ) {
79            singleByte[0] = (byte) i;
80            try {
81                String unicodeStr = new String (singleByte, "ISCII91");
82
83                if (unicodeStr.charAt(0) != '\uFFFD') {
84                    System.err.println("FAILED " + testName +
85                                        "input byte is " + i );
86                    throw new Exception ("Failed ISCII91 regression test");
87                }
88            } catch (UnsupportedEncodingException e) {
89                System.err.println("Unsupported character encoding");
90            }
91        }
92        return;
93    }
94
95    /*
96     *
97     */
98    private static void checkRange(int start, int end,
99                                   char[] expectChars,
100                                   String testName)
101                                   throws Exception {
102        byte[] singleByte = new byte[1];
103        byte[] encoded = new byte[1];
104        int lookupOffset = 0;
105
106        for (int i=start; i <= end; i++ ) {
107            singleByte[0] = (byte) i;
108            String unicodeStr = new String (singleByte, "ISCII91");
109            if (unicodeStr.charAt(0) != expectChars[lookupOffset++]) {
110                throw new Exception ("Failed ISCII91 Regression Test");
111            }
112            encoded = unicodeStr.getBytes("ISCII");
113        }
114        return;
115    }
116
117    /*
118     * Tests the ISCII91 Indic character encoding
119     * as per IS 13194:1991 Bureau of Indian Standards.
120     */
121
122    private static void test () throws Exception {
123
124        try {
125
126
127            // ISCII91 is an 8-byte encoding which retains the ASCII
128            // mappings in the lower half.
129
130            mapEquiv(0, 0x7f, "7 bit ASCII range");
131
132            // Checks a range of characters which are unmappable according
133            // to the standards.
134
135            checkUnmapped(0x81, 0x9f, "UNMAPPED");
136
137            // Vowel Modifier chars can be used to modify the vowel
138            // sound of the preceding consonant, vowel or matra character.
139
140            byte[] testByte = new byte[1];
141            char[] vowelModChars = {
142                '\u0901', // Vowel modifier Chandrabindu
143                '\u0902', // Vowel modifier Anuswar
144                '\u0903'  // Vowel modifier Visarg
145            };
146
147            checkRange(0xa1, 0xa3, vowelModChars, "INDIC VOWEL MODIFIER CHARS");
148
149            char[] expectChars = {
150                '\u0905', // a4 -- Vowel A
151                '\u0906', // a5 -- Vowel AA
152                '\u0907', // a6 -- Vowel I
153                '\u0908', // a7 -- Vowel II
154                '\u0909', // a8 -- Vowel U
155                '\u090a', // a9 -- Vowel UU
156                '\u090b', // aa -- Vowel RI
157                '\u090e', // ab -- Vowel E ( Southern Scripts )
158                '\u090f', // ac -- Vowel EY
159                '\u0910', // ad -- Vowel AI
160                '\u090d', // ae -- Vowel AYE ( Devanagari Script )
161                '\u0912', // af -- Vowel O ( Southern Scripts )
162                '\u0913', // b0 -- Vowel OW
163                '\u0914', // b1 -- Vowel AU
164                '\u0911', // b2 -- Vowel AWE ( Devanagari Script )
165            };
166
167            checkRange(0xa4, 0xb2, expectChars, "INDIC VOWELS");
168
169            char[] expectConsChars =
170            {
171                '\u0915', // b3 -- Consonant KA
172                '\u0916', // b4 -- Consonant KHA
173                '\u0917', // b5 -- Consonant GA
174                '\u0918', // b6 -- Consonant GHA
175                '\u0919', // b7 -- Consonant NGA
176                '\u091a', // b8 -- Consonant CHA
177                '\u091b', // b9 -- Consonant CHHA
178                '\u091c', // ba -- Consonant JA
179                '\u091d', // bb -- Consonant JHA
180                '\u091e', // bc -- Consonant JNA
181                '\u091f', // bd -- Consonant Hard TA
182                '\u0920', // be -- Consonant Hard THA
183                '\u0921', // bf -- Consonant Hard DA
184                '\u0922', // c0 -- Consonant Hard DHA
185                '\u0923', // c1 -- Consonant Hard NA
186                '\u0924', // c2 -- Consonant Soft TA
187                '\u0925', // c3 -- Consonant Soft THA
188                '\u0926', // c4 -- Consonant Soft DA
189                '\u0927', // c5 -- Consonant Soft DHA
190                '\u0928', // c6 -- Consonant Soft NA
191                '\u0929', // c7 -- Consonant NA ( Tamil )
192                '\u092a', // c8 -- Consonant PA
193                '\u092b', // c9 -- Consonant PHA
194                '\u092c', // ca -- Consonant BA
195                '\u092d', // cb -- Consonant BHA
196                '\u092e', // cc -- Consonant MA
197                '\u092f', // cd -- Consonant YA
198                '\u095f', // ce -- Consonant JYA ( Bengali, Assamese & Oriya )
199                '\u0930', // cf -- Consonant RA
200                '\u0931', // d0 -- Consonant Hard RA ( Southern Scripts )
201                '\u0932', // d1 -- Consonant LA
202                '\u0933', // d2 -- Consonant Hard LA
203                '\u0934', // d3 -- Consonant ZHA ( Tamil & Malayalam )
204                '\u0935', // d4 -- Consonant VA
205                '\u0936', // d5 -- Consonant SHA
206                '\u0937', // d6 -- Consonant Hard SHA
207                '\u0938', // d7 -- Consonant SA
208                '\u0939', // d8 -- Consonant HA
209            };
210
211            checkRange(0xb3, 0xd8, expectConsChars, "INDIC CONSONANTS");
212
213            char[] matraChars = {
214                '\u093e', // da -- Vowel Sign AA
215                '\u093f', // db -- Vowel Sign I
216                '\u0940', // dc -- Vowel Sign II
217                '\u0941', // dd -- Vowel Sign U
218                '\u0942', // de -- Vowel Sign UU
219                '\u0943', // df -- Vowel Sign RI
220                '\u0946', // e0 -- Vowel Sign E ( Southern Scripts )
221                '\u0947', // e1 -- Vowel Sign EY
222                '\u0948', // e2 -- Vowel Sign AI
223                '\u0945', // e3 -- Vowel Sign AYE ( Devanagari Script )
224                '\u094a', // e4 -- Vowel Sign O ( Southern Scripts )
225                '\u094b', // e5 -- Vowel Sign OW
226                '\u094c', // e6 -- Vowel Sign AU
227                '\u0949' // e7 -- Vowel Sign AWE ( Devanagari Script )
228            };
229
230            // Matras or Vowel signs alter the implicit
231            // vowel sound associated with an Indic consonant.
232
233            checkRange(0xda, 0xe7, matraChars, "INDIC MATRAS");
234
235            char[] loneContextModifierChars = {
236            '\u094d', // e8 -- Vowel Omission Sign ( Halant )
237            '\u093c', // e9 -- Diacritic Sign ( Nukta )
238            '\u0964' // ea -- Full Stop ( Viram, Northern Scripts )
239            };
240
241            checkRange(0xe8, 0xea,
242                       loneContextModifierChars, "LONE INDIC CONTEXT CHARS");
243
244
245            // Test Indic script numeral chars
246            // (as opposed to international numerals)
247
248            char[] expectNumeralChars =
249            {
250                '\u0966', // f1 -- Digit 0
251                '\u0967', // f2 -- Digit 1
252                '\u0968', // f3 -- Digit 2
253                '\u0969', // f4 -- Digit 3
254                '\u096a', // f5 -- Digit 4
255                '\u096b', // f6 -- Digit 5
256                '\u096c', // f7 -- Digit 6
257                '\u096d', // f8 -- Digit 7
258                '\u096e', // f9 -- Digit 8
259                '\u096f'  // fa -- Digit 9
260            };
261
262            checkRange(0xf1, 0xfa,
263                       expectNumeralChars, "NUMERAL/DIGIT CHARACTERS");
264            int lookupOffset = 0;
265
266            char[] expectNuktaSub = {
267                '\u0950',
268                '\u090c',
269                '\u0961',
270                '\u0960',
271                '\u0962',
272                '\u0963',
273                '\u0944',
274                '\u093d'
275            };
276
277            /*
278             * ISCII uses a number of code extension techniques
279             * to access a number of lesser used characters.
280             * The Nukta character which ordinarily signifies
281             * a diacritic is used in combination with existing
282             * characters to escape them to a different character.
283             * value.
284            */
285
286            byte[] codeExtensionBytes = {
287                (byte)0xa1 , (byte)0xe9, // Chandrabindu + Nukta
288                                         // =>DEVANAGARI OM SIGN
289                (byte)0xa6 , (byte)0xe9, // Vowel I + Nukta
290                                         // => DEVANAGARI VOCALIC L
291                (byte)0xa7 , (byte)0xe9, // Vowel II + Nukta
292                                         // => DEVANAGARI VOCALIC LL
293                (byte)0xaa , (byte)0xe9, // Vowel RI + Nukta
294                                         // => DEVANAGARI VOCALIC RR
295                (byte)0xdb , (byte)0xe9, //  Vowel sign I + Nukta
296                                         // => DEVANAGARI VOWEL SIGN VOCALIC L
297                (byte)0xdc , (byte)0xe9, // Vowel sign II + Nukta
298                                         // => DEVANAGARI VOWEL SIGN VOCALIC LL
299
300                (byte)0xdf , (byte)0xe9, // Vowel sign Vocalic R + Nukta
301                                         // => DEVANAGARI VOWEL SIGN VOCALIC RR
302                (byte)0xea , (byte)0xe9  // Full stop/Phrase separator + Nukta
303                                         // => DEVANAGARI SIGN AVAGRAHA
304            };
305
306            lookupOffset = 0;
307            byte[] bytePair = new byte[2];
308
309            for (int i=0; i < (codeExtensionBytes.length)/2; i++ ) {
310                bytePair[0] = (byte) codeExtensionBytes[lookupOffset++];
311                bytePair[1] = (byte) codeExtensionBytes[lookupOffset++];
312
313                String unicodeStr = new String (bytePair,"ISCII91");
314                if (unicodeStr.charAt(0) != expectNuktaSub[i]) {
315                    throw new Exception("Failed Nukta Sub");
316                }
317            }
318
319            lookupOffset = 0;
320            byte[] comboBytes = {
321                (byte)0xe8 , (byte)0xe8, //HALANT + HALANT
322                (byte)0xe8 , (byte)0xe9  //HALANT + NUKTA    aka. Soft Halant
323            };
324            char[] expectCombChars = {
325                '\u094d',
326                '\u200c',
327                '\u094d',
328                '\u200d'
329            };
330
331            for (int i=0; i < (comboBytes.length)/2; i++ ) {
332                bytePair[0] = (byte) comboBytes[lookupOffset++];
333                bytePair[1] = (byte) comboBytes[lookupOffset];
334                String unicodeStr = new String (bytePair, "ISCII91");
335                if (unicodeStr.charAt(0) != expectCombChars[lookupOffset-1]
336                    && unicodeStr.charAt(1) != expectCombChars[lookupOffset]) {
337                    throw new Exception("Failed ISCII91 Regression Test");
338                }
339                lookupOffset++;
340            }
341
342        } catch (UnsupportedEncodingException e) {
343             System.err.println ("ISCII91 encoding not supported");
344             throw new Exception ("Failed ISCII91 Regression Test");
345        }
346    }
347
348    public static void main (String[] args) throws Exception {
349        test();
350    }
351}
352