1/* 2 * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24/* @test 25 * @bug 4328178 26 * @summary Performs baseline and regression test on the ISCII91 charset 27 * @modules jdk.charsets 28 */ 29 30import java.io.*; 31 32public class ISCIITest { 33 34 private static void failureReport() { 35 System.err.println ("Failed ISCII91 Regression Test"); 36 } 37 38 private static void mapEquiv(int start, 39 int end, 40 String testName) 41 throws Exception 42 { 43 byte[] singleByte = new byte[1]; 44 byte[] encoded = new byte[1]; 45 46 for (int i = start; i <= end; i++ ) { 47 singleByte[0] = (byte) i; 48 try { 49 String unicodeStr = 50 new String (singleByte,"ISCII91"); 51 52 if (i != (int)unicodeStr.charAt(0)) { 53 System.err.println ("FAILED ISCII91 Regression test" 54 + "input byte is " + i ); 55 throw new Exception(""); 56 } 57 encoded = unicodeStr.getBytes("ISCII91"); 58 59 if (encoded[0] != singleByte[0]) { 60 System.err.println("Encoding error " + testName); 61 throw new Exception("Failed ISCII91 Regression test"); 62 } 63 64 } catch (UnsupportedEncodingException e) { 65 failureReport(); 66 } 67 } 68 return; 69 } 70 71 private static void checkUnmapped(int start, 72 int end, 73 String testName) 74 throws Exception { 75 76 byte[] singleByte = new byte[1]; 77 78 for (int i = start; i <= end; i++ ) { 79 singleByte[0] = (byte) i; 80 try { 81 String unicodeStr = new String (singleByte, "ISCII91"); 82 83 if (unicodeStr.charAt(0) != '\uFFFD') { 84 System.err.println("FAILED " + testName + 85 "input byte is " + i ); 86 throw new Exception ("Failed ISCII91 regression test"); 87 } 88 } catch (UnsupportedEncodingException e) { 89 System.err.println("Unsupported character encoding"); 90 } 91 } 92 return; 93 } 94 95 /* 96 * 97 */ 98 private static void checkRange(int start, int end, 99 char[] expectChars, 100 String testName) 101 throws Exception { 102 byte[] singleByte = new byte[1]; 103 byte[] encoded = new byte[1]; 104 int lookupOffset = 0; 105 106 for (int i=start; i <= end; i++ ) { 107 singleByte[0] = (byte) i; 108 String unicodeStr = new String (singleByte, "ISCII91"); 109 if (unicodeStr.charAt(0) != expectChars[lookupOffset++]) { 110 throw new Exception ("Failed ISCII91 Regression Test"); 111 } 112 encoded = unicodeStr.getBytes("ISCII"); 113 } 114 return; 115 } 116 117 /* 118 * Tests the ISCII91 Indic character encoding 119 * as per IS 13194:1991 Bureau of Indian Standards. 120 */ 121 122 private static void test () throws Exception { 123 124 try { 125 126 127 // ISCII91 is an 8-byte encoding which retains the ASCII 128 // mappings in the lower half. 129 130 mapEquiv(0, 0x7f, "7 bit ASCII range"); 131 132 // Checks a range of characters which are unmappable according 133 // to the standards. 134 135 checkUnmapped(0x81, 0x9f, "UNMAPPED"); 136 137 // Vowel Modifier chars can be used to modify the vowel 138 // sound of the preceding consonant, vowel or matra character. 139 140 byte[] testByte = new byte[1]; 141 char[] vowelModChars = { 142 '\u0901', // Vowel modifier Chandrabindu 143 '\u0902', // Vowel modifier Anuswar 144 '\u0903' // Vowel modifier Visarg 145 }; 146 147 checkRange(0xa1, 0xa3, vowelModChars, "INDIC VOWEL MODIFIER CHARS"); 148 149 char[] expectChars = { 150 '\u0905', // a4 -- Vowel A 151 '\u0906', // a5 -- Vowel AA 152 '\u0907', // a6 -- Vowel I 153 '\u0908', // a7 -- Vowel II 154 '\u0909', // a8 -- Vowel U 155 '\u090a', // a9 -- Vowel UU 156 '\u090b', // aa -- Vowel RI 157 '\u090e', // ab -- Vowel E ( Southern Scripts ) 158 '\u090f', // ac -- Vowel EY 159 '\u0910', // ad -- Vowel AI 160 '\u090d', // ae -- Vowel AYE ( Devanagari Script ) 161 '\u0912', // af -- Vowel O ( Southern Scripts ) 162 '\u0913', // b0 -- Vowel OW 163 '\u0914', // b1 -- Vowel AU 164 '\u0911', // b2 -- Vowel AWE ( Devanagari Script ) 165 }; 166 167 checkRange(0xa4, 0xb2, expectChars, "INDIC VOWELS"); 168 169 char[] expectConsChars = 170 { 171 '\u0915', // b3 -- Consonant KA 172 '\u0916', // b4 -- Consonant KHA 173 '\u0917', // b5 -- Consonant GA 174 '\u0918', // b6 -- Consonant GHA 175 '\u0919', // b7 -- Consonant NGA 176 '\u091a', // b8 -- Consonant CHA 177 '\u091b', // b9 -- Consonant CHHA 178 '\u091c', // ba -- Consonant JA 179 '\u091d', // bb -- Consonant JHA 180 '\u091e', // bc -- Consonant JNA 181 '\u091f', // bd -- Consonant Hard TA 182 '\u0920', // be -- Consonant Hard THA 183 '\u0921', // bf -- Consonant Hard DA 184 '\u0922', // c0 -- Consonant Hard DHA 185 '\u0923', // c1 -- Consonant Hard NA 186 '\u0924', // c2 -- Consonant Soft TA 187 '\u0925', // c3 -- Consonant Soft THA 188 '\u0926', // c4 -- Consonant Soft DA 189 '\u0927', // c5 -- Consonant Soft DHA 190 '\u0928', // c6 -- Consonant Soft NA 191 '\u0929', // c7 -- Consonant NA ( Tamil ) 192 '\u092a', // c8 -- Consonant PA 193 '\u092b', // c9 -- Consonant PHA 194 '\u092c', // ca -- Consonant BA 195 '\u092d', // cb -- Consonant BHA 196 '\u092e', // cc -- Consonant MA 197 '\u092f', // cd -- Consonant YA 198 '\u095f', // ce -- Consonant JYA ( Bengali, Assamese & Oriya ) 199 '\u0930', // cf -- Consonant RA 200 '\u0931', // d0 -- Consonant Hard RA ( Southern Scripts ) 201 '\u0932', // d1 -- Consonant LA 202 '\u0933', // d2 -- Consonant Hard LA 203 '\u0934', // d3 -- Consonant ZHA ( Tamil & Malayalam ) 204 '\u0935', // d4 -- Consonant VA 205 '\u0936', // d5 -- Consonant SHA 206 '\u0937', // d6 -- Consonant Hard SHA 207 '\u0938', // d7 -- Consonant SA 208 '\u0939', // d8 -- Consonant HA 209 }; 210 211 checkRange(0xb3, 0xd8, expectConsChars, "INDIC CONSONANTS"); 212 213 char[] matraChars = { 214 '\u093e', // da -- Vowel Sign AA 215 '\u093f', // db -- Vowel Sign I 216 '\u0940', // dc -- Vowel Sign II 217 '\u0941', // dd -- Vowel Sign U 218 '\u0942', // de -- Vowel Sign UU 219 '\u0943', // df -- Vowel Sign RI 220 '\u0946', // e0 -- Vowel Sign E ( Southern Scripts ) 221 '\u0947', // e1 -- Vowel Sign EY 222 '\u0948', // e2 -- Vowel Sign AI 223 '\u0945', // e3 -- Vowel Sign AYE ( Devanagari Script ) 224 '\u094a', // e4 -- Vowel Sign O ( Southern Scripts ) 225 '\u094b', // e5 -- Vowel Sign OW 226 '\u094c', // e6 -- Vowel Sign AU 227 '\u0949' // e7 -- Vowel Sign AWE ( Devanagari Script ) 228 }; 229 230 // Matras or Vowel signs alter the implicit 231 // vowel sound associated with an Indic consonant. 232 233 checkRange(0xda, 0xe7, matraChars, "INDIC MATRAS"); 234 235 char[] loneContextModifierChars = { 236 '\u094d', // e8 -- Vowel Omission Sign ( Halant ) 237 '\u093c', // e9 -- Diacritic Sign ( Nukta ) 238 '\u0964' // ea -- Full Stop ( Viram, Northern Scripts ) 239 }; 240 241 checkRange(0xe8, 0xea, 242 loneContextModifierChars, "LONE INDIC CONTEXT CHARS"); 243 244 245 // Test Indic script numeral chars 246 // (as opposed to international numerals) 247 248 char[] expectNumeralChars = 249 { 250 '\u0966', // f1 -- Digit 0 251 '\u0967', // f2 -- Digit 1 252 '\u0968', // f3 -- Digit 2 253 '\u0969', // f4 -- Digit 3 254 '\u096a', // f5 -- Digit 4 255 '\u096b', // f6 -- Digit 5 256 '\u096c', // f7 -- Digit 6 257 '\u096d', // f8 -- Digit 7 258 '\u096e', // f9 -- Digit 8 259 '\u096f' // fa -- Digit 9 260 }; 261 262 checkRange(0xf1, 0xfa, 263 expectNumeralChars, "NUMERAL/DIGIT CHARACTERS"); 264 int lookupOffset = 0; 265 266 char[] expectNuktaSub = { 267 '\u0950', 268 '\u090c', 269 '\u0961', 270 '\u0960', 271 '\u0962', 272 '\u0963', 273 '\u0944', 274 '\u093d' 275 }; 276 277 /* 278 * ISCII uses a number of code extension techniques 279 * to access a number of lesser used characters. 280 * The Nukta character which ordinarily signifies 281 * a diacritic is used in combination with existing 282 * characters to escape them to a different character. 283 * value. 284 */ 285 286 byte[] codeExtensionBytes = { 287 (byte)0xa1 , (byte)0xe9, // Chandrabindu + Nukta 288 // =>DEVANAGARI OM SIGN 289 (byte)0xa6 , (byte)0xe9, // Vowel I + Nukta 290 // => DEVANAGARI VOCALIC L 291 (byte)0xa7 , (byte)0xe9, // Vowel II + Nukta 292 // => DEVANAGARI VOCALIC LL 293 (byte)0xaa , (byte)0xe9, // Vowel RI + Nukta 294 // => DEVANAGARI VOCALIC RR 295 (byte)0xdb , (byte)0xe9, // Vowel sign I + Nukta 296 // => DEVANAGARI VOWEL SIGN VOCALIC L 297 (byte)0xdc , (byte)0xe9, // Vowel sign II + Nukta 298 // => DEVANAGARI VOWEL SIGN VOCALIC LL 299 300 (byte)0xdf , (byte)0xe9, // Vowel sign Vocalic R + Nukta 301 // => DEVANAGARI VOWEL SIGN VOCALIC RR 302 (byte)0xea , (byte)0xe9 // Full stop/Phrase separator + Nukta 303 // => DEVANAGARI SIGN AVAGRAHA 304 }; 305 306 lookupOffset = 0; 307 byte[] bytePair = new byte[2]; 308 309 for (int i=0; i < (codeExtensionBytes.length)/2; i++ ) { 310 bytePair[0] = (byte) codeExtensionBytes[lookupOffset++]; 311 bytePair[1] = (byte) codeExtensionBytes[lookupOffset++]; 312 313 String unicodeStr = new String (bytePair,"ISCII91"); 314 if (unicodeStr.charAt(0) != expectNuktaSub[i]) { 315 throw new Exception("Failed Nukta Sub"); 316 } 317 } 318 319 lookupOffset = 0; 320 byte[] comboBytes = { 321 (byte)0xe8 , (byte)0xe8, //HALANT + HALANT 322 (byte)0xe8 , (byte)0xe9 //HALANT + NUKTA aka. Soft Halant 323 }; 324 char[] expectCombChars = { 325 '\u094d', 326 '\u200c', 327 '\u094d', 328 '\u200d' 329 }; 330 331 for (int i=0; i < (comboBytes.length)/2; i++ ) { 332 bytePair[0] = (byte) comboBytes[lookupOffset++]; 333 bytePair[1] = (byte) comboBytes[lookupOffset]; 334 String unicodeStr = new String (bytePair, "ISCII91"); 335 if (unicodeStr.charAt(0) != expectCombChars[lookupOffset-1] 336 && unicodeStr.charAt(1) != expectCombChars[lookupOffset]) { 337 throw new Exception("Failed ISCII91 Regression Test"); 338 } 339 lookupOffset++; 340 } 341 342 } catch (UnsupportedEncodingException e) { 343 System.err.println ("ISCII91 encoding not supported"); 344 throw new Exception ("Failed ISCII91 Regression Test"); 345 } 346 } 347 348 public static void main (String[] args) throws Exception { 349 test(); 350 } 351} 352