Convert.java revision 4231:7b1789ef6956
1/*
2 * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package com.sun.tools.javac.util;
27
28/** Utility class for static conversion methods between numbers
29 *  and strings in various formats.
30 *
31 *  <p>Note regarding UTF-8.
32 *  The JVMS defines its own version of the UTF-8 format so that it
33 *  contains no zero bytes (modified UTF-8). This is not actually the same
34 *  as Charset.forName("UTF-8").
35 *
36 *  <p>
37 *  See also:
38 *  <ul>
39 *  <li><a href="http://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.4.7">
40 *    JVMS 4.4.7 </a></li>
41 *  <li><a href="http://docs.oracle.com/javase/7/docs/api/java/io/DataInput.html#modified-utf-8">
42      java.io.DataInput: Modified UTF-8 </a></li>
43    <li><a href="https://en.wikipedia.org/wiki/UTF-8#Modified_UTF-8">
44      Modified UTF-8 (wikipedia) </a></li>
45 *  </ul>
46 *
47 *  The methods here support modified UTF-8.
48 *
49 *  <p><b>This is NOT part of any supported API.
50 *  If you write code that depends on this, you do so at your own risk.
51 *  This code and its internal interfaces are subject to change or
52 *  deletion without notice.</b>
53 */
54public class Convert {
55
56    /** Convert string to integer.
57     */
58    public static int string2int(String s, int radix)
59        throws NumberFormatException {
60        if (radix == 10) {
61            return Integer.parseInt(s, radix);
62        } else {
63            char[] cs = s.toCharArray();
64            int limit = Integer.MAX_VALUE / (radix/2);
65            int n = 0;
66            for (char c : cs) {
67                int d = Character.digit(c, radix);
68                if (n < 0 ||
69                    n > limit ||
70                    n * radix > Integer.MAX_VALUE - d)
71                    throw new NumberFormatException();
72                n = n * radix + d;
73            }
74            return n;
75        }
76    }
77
78    /** Convert string to long integer.
79     */
80    public static long string2long(String s, int radix)
81        throws NumberFormatException {
82        if (radix == 10) {
83            return Long.parseLong(s, radix);
84        } else {
85            char[] cs = s.toCharArray();
86            long limit = Long.MAX_VALUE / (radix/2);
87            long n = 0;
88            for (char c : cs) {
89                int d = Character.digit(c, radix);
90                if (n < 0 ||
91                    n > limit ||
92                    n * radix > Long.MAX_VALUE - d)
93                    throw new NumberFormatException();
94                n = n * radix + d;
95            }
96            return n;
97        }
98    }
99
100/* Conversion routines between names, strings, and byte arrays in Utf8 format
101 */
102
103    /** Convert `len' bytes from utf8 to characters.
104     *  Parameters are as in System.arraycopy
105     *  Return first index in `dst' past the last copied char.
106     *  @param src        The array holding the bytes to convert.
107     *  @param sindex     The start index from which bytes are converted.
108     *  @param dst        The array holding the converted characters..
109     *  @param dindex     The start index from which converted characters
110     *                    are written.
111     *  @param len        The maximum number of bytes to convert.
112     */
113    public static int utf2chars(byte[] src, int sindex,
114                                char[] dst, int dindex,
115                                int len) {
116        int i = sindex;
117        int j = dindex;
118        int limit = sindex + len;
119        while (i < limit) {
120            int b = src[i++] & 0xFF;
121            if (b >= 0xE0) {
122                b = (b & 0x0F) << 12;
123                b = b | (src[i++] & 0x3F) << 6;
124                b = b | (src[i++] & 0x3F);
125            } else if (b >= 0xC0) {
126                b = (b & 0x1F) << 6;
127                b = b | (src[i++] & 0x3F);
128            }
129            dst[j++] = (char)b;
130        }
131        return j;
132    }
133
134    /** Return bytes in Utf8 representation as an array of characters.
135     *  @param src        The array holding the bytes.
136     *  @param sindex     The start index from which bytes are converted.
137     *  @param len        The maximum number of bytes to convert.
138     */
139    public static char[] utf2chars(byte[] src, int sindex, int len) {
140        char[] dst = new char[len];
141        int len1 = utf2chars(src, sindex, dst, 0, len);
142        char[] result = new char[len1];
143        System.arraycopy(dst, 0, result, 0, len1);
144        return result;
145    }
146
147    /** Return all bytes of a given array in Utf8 representation
148     *  as an array of characters.
149     *  @param src        The array holding the bytes.
150     */
151    public static char[] utf2chars(byte[] src) {
152        return utf2chars(src, 0, src.length);
153    }
154
155    /** Return bytes in Utf8 representation as a string.
156     *  @param src        The array holding the bytes.
157     *  @param sindex     The start index from which bytes are converted.
158     *  @param len        The maximum number of bytes to convert.
159     */
160    public static String utf2string(byte[] src, int sindex, int len) {
161        char dst[] = new char[len];
162        int len1 = utf2chars(src, sindex, dst, 0, len);
163        return new String(dst, 0, len1);
164    }
165
166    /** Return all bytes of a given array in Utf8 representation
167     *  as a string.
168     *  @param src        The array holding the bytes.
169     */
170    public static String utf2string(byte[] src) {
171        return utf2string(src, 0, src.length);
172    }
173
174    /** Copy characters in source array to bytes in target array,
175     *  converting them to Utf8 representation.
176     *  The target array must be large enough to hold the result.
177     *  returns first index in `dst' past the last copied byte.
178     *  @param src        The array holding the characters to convert.
179     *  @param sindex     The start index from which characters are converted.
180     *  @param dst        The array holding the converted characters..
181     *  @param dindex     The start index from which converted bytes
182     *                    are written.
183     *  @param len        The maximum number of characters to convert.
184     */
185    public static int chars2utf(char[] src, int sindex,
186                                byte[] dst, int dindex,
187                                int len) {
188        int j = dindex;
189        int limit = sindex + len;
190        for (int i = sindex; i < limit; i++) {
191            char ch = src[i];
192            if (1 <= ch && ch <= 0x7F) {
193                dst[j++] = (byte)ch;
194            } else if (ch <= 0x7FF) {
195                dst[j++] = (byte)(0xC0 | (ch >> 6));
196                dst[j++] = (byte)(0x80 | (ch & 0x3F));
197            } else {
198                dst[j++] = (byte)(0xE0 | (ch >> 12));
199                dst[j++] = (byte)(0x80 | ((ch >> 6) & 0x3F));
200                dst[j++] = (byte)(0x80 | (ch & 0x3F));
201            }
202        }
203        return j;
204    }
205
206    /** Return characters as an array of bytes in Utf8 representation.
207     *  @param src        The array holding the characters.
208     *  @param sindex     The start index from which characters are converted.
209     *  @param len        The maximum number of characters to convert.
210     */
211    public static byte[] chars2utf(char[] src, int sindex, int len) {
212        byte[] dst = new byte[len * 3];
213        int len1 = chars2utf(src, sindex, dst, 0, len);
214        byte[] result = new byte[len1];
215        System.arraycopy(dst, 0, result, 0, len1);
216        return result;
217    }
218
219    /** Return all characters in given array as an array of bytes
220     *  in Utf8 representation.
221     *  @param src        The array holding the characters.
222     */
223    public static byte[] chars2utf(char[] src) {
224        return chars2utf(src, 0, src.length);
225    }
226
227    /** Return string as an array of bytes in in Utf8 representation.
228     */
229    public static byte[] string2utf(String s) {
230        return chars2utf(s.toCharArray());
231    }
232
233    /**
234     * Escapes each character in a string that has an escape sequence or
235     * is non-printable ASCII.  Leaves non-ASCII characters alone.
236     */
237    public static String quote(String s) {
238        StringBuilder buf = new StringBuilder();
239        for (int i = 0; i < s.length(); i++) {
240            buf.append(quote(s.charAt(i)));
241        }
242        return buf.toString();
243    }
244
245    /**
246     * Escapes a character if it has an escape sequence or is
247     * non-printable ASCII.  Leaves non-ASCII characters alone.
248     */
249    public static String quote(char ch) {
250        switch (ch) {
251        case '\b':  return "\\b";
252        case '\f':  return "\\f";
253        case '\n':  return "\\n";
254        case '\r':  return "\\r";
255        case '\t':  return "\\t";
256        case '\'':  return "\\'";
257        case '\"':  return "\\\"";
258        case '\\':  return "\\\\";
259        default:
260            return (isPrintableAscii(ch))
261                ? String.valueOf(ch)
262                : String.format("\\u%04x", (int) ch);
263        }
264    }
265
266    /**
267     * Is a character printable ASCII?
268     */
269    private static boolean isPrintableAscii(char ch) {
270        return ch >= ' ' && ch <= '~';
271    }
272
273    /** Escape all unicode characters in string.
274     */
275    public static String escapeUnicode(String s) {
276        int len = s.length();
277        int i = 0;
278        while (i < len) {
279            char ch = s.charAt(i);
280            if (ch > 255) {
281                StringBuilder buf = new StringBuilder();
282                buf.append(s.substring(0, i));
283                while (i < len) {
284                    ch = s.charAt(i);
285                    if (ch > 255) {
286                        buf.append("\\u");
287                        buf.append(Character.forDigit((ch >> 12) % 16, 16));
288                        buf.append(Character.forDigit((ch >>  8) % 16, 16));
289                        buf.append(Character.forDigit((ch >>  4) % 16, 16));
290                        buf.append(Character.forDigit((ch      ) % 16, 16));
291                    } else {
292                        buf.append(ch);
293                    }
294                    i++;
295                }
296                s = buf.toString();
297            } else {
298                i++;
299            }
300        }
301        return s;
302    }
303
304/* Conversion routines for qualified name splitting
305 */
306    /** Return the last part of a qualified name.
307     *  @param name the qualified name
308     *  @return the last part of the qualified name
309     */
310    public static Name shortName(Name name) {
311        int start = name.lastIndexOf((byte)'.') + 1;
312        int end = name.getByteLength();
313        if (start == 0 && end == name.length()) {
314            return name;
315        }
316        return name.subName(
317            name.lastIndexOf((byte)'.') + 1, name.getByteLength());
318    }
319
320    /** Return the last part of a qualified name from its string representation
321     *  @param name the string representation of the qualified name
322     *  @return the last part of the qualified name
323     */
324    public static String shortName(String name) {
325        return name.substring(name.lastIndexOf('.') + 1);
326    }
327
328    /** Return the package name of a class name, excluding the trailing '.',
329     *  "" if not existent.
330     */
331    public static Name packagePart(Name classname) {
332        return classname.subName(0, classname.lastIndexOf((byte)'.'));
333    }
334
335    public static String packagePart(String classname) {
336        int lastDot = classname.lastIndexOf('.');
337        return (lastDot < 0 ? "" : classname.substring(0, lastDot));
338    }
339
340    public static List<Name> enclosingCandidates(Name name) {
341        List<Name> names = List.nil();
342        int index;
343        while ((index = name.lastIndexOf((byte)'$')) > 0) {
344            name = name.subName(0, index);
345            names = names.prepend(name);
346        }
347        return names;
348    }
349
350    public static List<Name> classCandidates(Name name) {
351        List<Name> names = List.nil();
352        String nameStr = name.toString();
353        int index = -1;
354        while ((index = nameStr.indexOf('.', index + 1)) > 0) {
355            String pack = nameStr.substring(0, index + 1);
356            String clz = nameStr.substring(index + 1).replace('.', '$');
357            names = names.prepend(name.table.names.fromString(pack + clz));
358        }
359        return names.reverse();
360    }
361}
362