1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2001 by Sun Microsystems, Inc.
23 * All rights reserved.
24 *
25 */
26
27//  IANACharCode.java: SLPv1 Character encoding support
28//  Author:           James Kempf
29//  Created On:       Fri Sep 11 13:24:02 1998
30//  Last Modified By: James Kempf
31//  Last Modified On: Wed Oct 28 14:33:02 1998
32//  Update Count:     7
33//
34
35
36package com.sun.slp;
37
38import java.util.*;
39import java.io.*;
40
41/**
42 * The IANACharCode class supports static methods for decoding IANA
43 * character codes into strings appropriate for the Java Writer subclass
44 * encoding String arguments, and for encoding the String descriptions
45 * of character codings into the integer codes. Ideally, Java itself
46 * should support this.
47 *
48 * @author James Kempf
49 */
50
51abstract class IANACharCode extends Object {
52
53    // Character code descriptors. These can be used with the Java
54    //  character encoding utilities. For Unicode, we use little on
55    //  input,
56
57    static final String ASCII = "Default";
58    static final String LATIN1 = "latin1";
59    static final String UTF8 = "UTF8";
60    static final String UNICODE = "Unicode";
61    static final String UNICODE_LITTLE = "UnicodeLittle";
62    static final String UNICODE_BIG = "UnicodeBig";
63    static final String UNICODE_BIG_NO_HDR = "UnicodeBigNoHdr";
64
65    // Error code for misidentified character set.
66
67    static final short CHARSET_NOT_UNDERSTOOD = 5;
68
69    // Character codes.
70
71    protected static final int CHAR_ASCII   = 3;
72    protected static final int CHAR_LATIN1  = 4;
73    protected static final int CHAR_UTF8    = 6;
74    protected static final int CHAR_UNICODE = 1000;
75
76    // First two bytes indicate that string is big/little endian Unicode.
77    //  If this flag isn't set, then big endian is assumed and we
78    //  must add the big endian bytes on every call.
79
80    protected static final byte[] UNICODE_LITTLE_FLAG =
81					{(byte)0xFF, (byte)0xFE};
82
83    protected static final byte[] UNICODE_BIG_FLAG =
84					{(byte)0xFE, (byte)0xFF};
85
86    /**
87     * Encode the String describing a character encoding into
88     * the approprate integer descriptor code.
89     *
90     * @param encoding The String describing the encoding.
91     * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the
92     *			String is not recognized.
93     */
94
95    static int encodeCharacterEncoding(String encoding)
96	throws ServiceLocationException {
97
98	if (encoding.equals(ASCII)) {
99	    return CHAR_ASCII;
100	} else if (encoding.equals(LATIN1)) {
101	    return CHAR_LATIN1;
102	} else if (encoding.equals(UTF8)) {
103	    return CHAR_UTF8;
104	} else if (encoding.equals(UNICODE)) {
105	    return CHAR_UNICODE;
106	} else if (encoding.equals(UNICODE_BIG)) {
107	    return CHAR_UNICODE;
108	} else if (encoding.equals(UNICODE_LITTLE)) {
109	    return CHAR_UNICODE;
110	} else if (encoding.equals(UNICODE_BIG_NO_HDR)) {
111	    return CHAR_UNICODE;
112	}
113
114	throw
115	    new ServiceLocationException(
116				CHARSET_NOT_UNDERSTOOD,
117				"v1_unsupported_encoding",
118				new Object[] {encoding});
119    }
120
121    /**
122     * Decode the integer describing a character encoding into
123     * the approprate String descriptor.
124     *
125     * @param code The integer coding the String set.
126     * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the
127     *			integer is not recognized.
128     */
129
130    static String decodeCharacterEncoding(int code)
131	throws ServiceLocationException {
132
133	switch (code) {
134	case CHAR_ASCII: 	return ASCII;
135	case CHAR_LATIN1:	return LATIN1;
136	case CHAR_UTF8:	return UTF8;
137	case CHAR_UNICODE:	return UNICODE;
138	}
139
140	throw
141	    new ServiceLocationException(
142				CHARSET_NOT_UNDERSTOOD,
143				"v1_unsupported_encoding",
144				new Object[] {Integer.toString(code)});
145    }
146
147    /**
148     * Return a string of integers giving the character's encoding in
149     * the character set passed in as encoding.
150     *
151     * @param c The character to escape.
152     * @param encoding The character set encoding to use.
153     * @return The character as a string of integers for the encoding.
154     * @exception ServiceLocationException Thrown if the encoding is not
155     *		 recognized, if the character's encoding
156     *		 has more than 8 bytes or if the sign bit gets turned on.
157     */
158
159    static String escapeChar(char c, String encoding)
160	throws ServiceLocationException {
161
162	ByteArrayOutputStream baos = new ByteArrayOutputStream();
163
164	try {
165	    OutputStreamWriter osw = new OutputStreamWriter(baos, encoding);
166
167	    osw.write(c);
168	    osw.flush();
169
170	} catch (UnsupportedEncodingException ex) {
171
172	    throw
173		new ServiceLocationException(
174				CHARSET_NOT_UNDERSTOOD,
175				"v1_unsupported_encoding",
176				new Object[] {encoding});
177
178	} catch (IOException ex) {
179
180	}
181
182	byte b[] = baos.toByteArray();
183	int code = 0;
184
185	// Assemble the character code based on the encoding type.
186
187	if (encoding.equals(UNICODE) ||
188	    encoding.equals(UNICODE_BIG) ||
189	    encoding.equals(UNICODE_LITTLE)) {
190
191	    code = (int)(b[0] & 0xFF);		// control bytes...
192	    code = (int)(code | ((b[1] & 0xFF) << 8));
193	    code = (int)(code | ((b[2] & 0xFF) << 16));
194	    code = (int)(code | ((b[3] & 0xFF) << 24));
195
196	    if (b.length <= 4) {
197		throw
198		    new ServiceLocationException(
199				ServiceLocationException.PARSE_ERROR,
200				"v1_charcode_error",
201				new Object[] {new Character(c), encoding});
202	    }
203
204	} else if (encoding.equals(ASCII) || encoding.equals(LATIN1)) {
205
206	    code = (int)(b[0] & 0xFF);
207
208	    if (b.length > 1) {
209		throw
210		    new ServiceLocationException(
211				ServiceLocationException.PARSE_ERROR,
212				"v1_charcode_error",
213				new Object[] {new Character(c), encoding});
214	    }
215	} else if (encoding.equals(UTF8)) {
216
217	    if (b.length > 3) {
218		throw
219		    new ServiceLocationException(
220				ServiceLocationException.PARSE_ERROR,
221				"v1_charcode_error",
222				new Object[] {new Character(c), encoding});
223	    }
224
225
226	    code = (int)(b[0] & 0xFF);
227
228	    if (b.length > 1) {
229		code = (int)(code | ((b[1] & 0xFF) << 8));
230	    }
231
232	    if (b.length > 2) {
233		code = (int)(code | ((b[2] & 0xFF) << 16));
234	    }
235	}
236
237	return Integer.toString(code);
238    }
239
240    /**
241     * Unescape the character encoded as the string.
242     *
243     * @param ch The character as a string of Integers.
244     * @param encoding The character set encoding to use.
245     * @return The character.
246     * @exception ServiceLocationException Thrown if the string can't
247     *		 be parsed into an integer or if the encoding isn't
248     *		 recognized.
249     */
250
251    static String unescapeChar(String ch, String encoding)
252	throws ServiceLocationException {
253
254	int code = 0;
255
256	try {
257	    code = Integer.parseInt(ch);
258
259	} catch (NumberFormatException ex) {
260	    throw
261		new ServiceLocationException(
262				ServiceLocationException.PARSE_ERROR,
263				"v1_stringcode_error",
264				new Object[] {ch, encoding});
265
266	}
267
268	// Convert to bytes. We need to taylor the array size to the
269	//  number of bytes because otherwise, in encodings that
270	//  take less bytes, the resulting string will have garbage
271	//  in it.
272
273	String str = null;
274	byte b0 = 0, b1 = 0, b2 = 0, b3 = 0;
275	byte b[] = null;
276
277	b0 = (byte) (code & 0xFF);
278	b1 = (byte) ((code >> 8) & 0xFF);
279	b2 = (byte) ((code >> 16) & 0xFF);
280	b3 = (byte) ((code >> 24) & 0xFf);
281
282	// We create an array sized to the encoding.
283
284	if (encoding.equals(UNICODE_BIG) ||
285	    encoding.equals(UNICODE_LITTLE)) {
286	    b = new byte[4];
287	    b[0] = b0;
288	    b[1] = b1;
289	    b[2] = b2;
290	    b[3] = b3;
291
292	} else if (encoding.equals(LATIN1) || encoding.equals(ASCII)) {
293	    // single byte
294	    b = new byte[1];
295	    b[0] = b0;
296
297	    if (b1 != 0 || b2 != 0) {
298		throw
299		    new ServiceLocationException(
300				ServiceLocationException.PARSE_ERROR,
301				"v1_stringcode_error",
302				new Object[] {ch, encoding});
303	    }
304
305
306	} else if (encoding.equals(UTF8)) {// vari-byte
307
308	    if (b3 != 0) {
309		throw
310		    new ServiceLocationException(
311				ServiceLocationException.PARSE_ERROR,
312				"v1_stringcode_error",
313				new Object[] {ch, encoding});
314	    }
315
316	    if (b2 != 0) {
317		b = new byte[3];
318		b[2] = b2;
319		b[1] = b1;
320		b[0] = b0;
321	    } else if (b1 != 0) {
322		b = new byte[2];
323		b[1] = b1;
324		b[0] = b0;
325	    } else {
326		b = new byte[1];
327		b[0] = b0;
328	    }
329	}
330
331	// Make a string out of it.
332
333	try {
334	    str = new String(b, encoding);
335
336	} catch (UnsupportedEncodingException ex) {
337	    Assert.slpassert(false,
338			  "v1_unsupported_encoding",
339			  new Object[] {encoding});
340	}
341
342	return str;
343    }
344
345    // Determine from the flag bytes whether this is big or little endian
346    //  Unicode. If there are no flag bytes, then just return UNICODE.
347
348    static String getUnicodeEndianess(byte[] bytes) {
349
350	if (bytes.length >= 2) {
351
352	    if (bytes[0] == UNICODE_LITTLE_FLAG[0] &&
353		bytes[1] == UNICODE_LITTLE_FLAG[1]) {
354		return UNICODE_LITTLE;
355
356	    } else if (bytes[0] == UNICODE_BIG_FLAG[0] &&
357		       bytes[1] == UNICODE_BIG_FLAG[1]) {
358		return UNICODE_BIG;
359
360	    }
361	}
362
363	// We can`t tell from the byte header, so it's big endian. But
364	//  since we need to add the byte header, we say we don't know.
365
366	return UNICODE;
367
368    }
369
370    // Add the big endian flag to a Unicode string.
371
372    static byte[] addBigEndianFlag(byte[] bytes) {
373
374	byte[] flaggedBytes = new byte[bytes.length + 2];
375
376	flaggedBytes[0] = UNICODE_BIG_FLAG[0];
377	flaggedBytes[1] = UNICODE_BIG_FLAG[1];
378
379	System.arraycopy(flaggedBytes, 2, bytes, 0, bytes.length);
380
381	return flaggedBytes;
382
383    }
384}
385