1/*
2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24import java.io.BufferedReader;
25import java.io.BufferedWriter;
26import java.io.FilterReader;
27import java.io.FilterWriter;
28import java.io.IOException;
29import java.io.Reader;
30import java.io.Writer;
31import java.nio.charset.Charset;
32import java.nio.charset.CharsetEncoder;
33import java.nio.file.Files;
34import java.nio.file.Path;
35import static java.nio.charset.StandardCharsets.*;
36
37/**
38 * Simple utility to convert from native encoding file to ascii or reverse
39 * including \udddd Unicode notation.
40 */
41public class Native2Ascii {
42    final Charset cs;
43    final CharsetEncoder encoder;
44    public Native2Ascii(Charset cs) {
45        this.cs = cs;
46        this.encoder = cs.newEncoder();
47    }
48
49    /**
50     * ASCII to Native conversion
51     */
52    public void asciiToNative(Path infile, Path outfile) throws IOException {
53        try (BufferedReader in = Files.newBufferedReader(infile, US_ASCII);
54             BufferedReader reader = new BufferedReader(new A2NFilter(in));
55             BufferedWriter writer = Files.newBufferedWriter(outfile, cs)) {
56            String line;
57            while ((line = reader.readLine()) != null) {
58                writer.write(line.toCharArray());
59                writer.newLine();
60            }
61        }
62    }
63
64    /**
65     * Native to ASCII conversion
66     */
67    public void nativeToAscii(Path infile, Path outfile) throws IOException {
68        try (BufferedReader reader = Files.newBufferedReader(infile, cs);
69             BufferedWriter out = Files.newBufferedWriter(outfile, US_ASCII);
70             BufferedWriter writer = new BufferedWriter(new N2AFilter(out))) {
71            String line;
72            while ((line = reader.readLine()) != null) {
73                writer.write(line.toCharArray());
74                writer.newLine();
75            }
76        }
77    }
78
79    // A copy of native2ascii N2AFilter
80    class N2AFilter extends FilterWriter {
81        public N2AFilter(Writer out) { super(out); }
82        public void write(char b) throws IOException {
83            char[] buf = new char[1];
84            buf[0] = b;
85            write(buf, 0, 1);
86        }
87
88        public void write(char[] buf, int off, int len) throws IOException {
89            for (int i = 0; i < len; i++) {
90                if ((buf[i] > '\u007f')) {
91                    // write \udddd
92                    out.write('\\');
93                    out.write('u');
94                    String hex = Integer.toHexString(buf[i]);
95                    StringBuilder hex4 = new StringBuilder(hex);
96                    hex4.reverse();
97                    int length = 4 - hex4.length();
98                    for (int j = 0; j < length; j++) {
99                        hex4.append('0');
100                    }
101                    for (int j = 0; j < 4; j++) {
102                        out.write(hex4.charAt(3 - j));
103                    }
104                } else
105                    out.write(buf[i]);
106            }
107        }
108    }
109
110    // A copy of native2ascii A2NFilter
111    class A2NFilter extends FilterReader {
112        // maintain a trailing buffer to hold any incompleted
113        // unicode escaped sequences
114        private char[] trailChars = null;
115
116        public A2NFilter(Reader in) {
117            super(in);
118        }
119
120        public int read(char[] buf, int off, int len) throws IOException {
121            int numChars = 0;        // how many characters have been read
122            int retChars = 0;        // how many characters we'll return
123
124            char[] cBuf = new char[len];
125            int cOffset = 0;         // offset at which we'll start reading
126            boolean eof = false;
127
128            // copy trailing chars from previous invocation to input buffer
129            if (trailChars != null) {
130                for (int i = 0; i < trailChars.length; i++)
131                    cBuf[i] = trailChars[i];
132                numChars = trailChars.length;
133                trailChars = null;
134            }
135
136            int n = in.read(cBuf, numChars, len - numChars);
137            if (n < 0) {
138                eof = true;
139                if (numChars == 0)
140                    return -1;              // EOF;
141            } else {
142                numChars += n;
143            }
144
145            for (int i = 0; i < numChars; ) {
146                char c = cBuf[i++];
147
148                if (c != '\\' || (eof && numChars <= 5)) {
149                    // Not a backslash, so copy and continue
150                    // Always pass non backslash chars straight thru
151                    // for regular encoding. If backslash occurs in
152                    // input stream at the final 5 chars then don't
153                    // attempt to read-ahead and de-escape since these
154                    // are literal occurrences of U+005C which need to
155                    // be encoded verbatim in the target encoding.
156                    buf[retChars++] = c;
157                    continue;
158                }
159
160                int remaining = numChars - i;
161                if (remaining < 5) {
162                    // Might be the first character of a unicode escape, but we
163                    // don't have enough characters to tell, so save it and finish
164                    trailChars = new char[1 + remaining];
165                    trailChars[0] = c;
166                    for (int j = 0; j < remaining; j++)
167                        trailChars[1 + j] = cBuf[i + j];
168                    break;
169                }
170                // At this point we have at least five characters remaining
171
172                c = cBuf[i++];
173                if (c != 'u') {
174                    // Not a unicode escape, so copy and continue
175                    buf[retChars++] = '\\';
176                    buf[retChars++] = c;
177                    continue;
178                }
179
180                // The next four characters are the hex part of a unicode escape
181                char rc = 0;
182                boolean isUE = true;
183                try {
184                    rc = (char) Integer.parseInt(new String(cBuf, i, 4), 16);
185                } catch (NumberFormatException x) {
186                    isUE = false;
187                }
188                if (isUE && encoder.canEncode(rc)) {
189                    // We'll be able to convert this
190                    buf[retChars++] = rc;
191                    i += 4; // Align beyond the current uXXXX sequence
192                } else {
193                    // We won't, so just retain the original sequence
194                    buf[retChars++] = '\\';
195                    buf[retChars++] = 'u';
196                    continue;
197                }
198
199            }
200
201            return retChars;
202        }
203
204        public int read() throws IOException {
205            char[] buf = new char[1];
206
207            if (read(buf, 0, 1) == -1)
208                return -1;
209            else
210                return (int) buf[0];
211        }
212    }
213}
214