1/* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24import java.io.BufferedReader; 25import java.io.BufferedWriter; 26import java.io.FilterReader; 27import java.io.FilterWriter; 28import java.io.IOException; 29import java.io.Reader; 30import java.io.Writer; 31import java.nio.charset.Charset; 32import java.nio.charset.CharsetEncoder; 33import java.nio.file.Files; 34import java.nio.file.Path; 35import static java.nio.charset.StandardCharsets.*; 36 37/** 38 * Simple utility to convert from native encoding file to ascii or reverse 39 * including \udddd Unicode notation. 40 */ 41public class Native2Ascii { 42 final Charset cs; 43 final CharsetEncoder encoder; 44 public Native2Ascii(Charset cs) { 45 this.cs = cs; 46 this.encoder = cs.newEncoder(); 47 } 48 49 /** 50 * ASCII to Native conversion 51 */ 52 public void asciiToNative(Path infile, Path outfile) throws IOException { 53 try (BufferedReader in = Files.newBufferedReader(infile, US_ASCII); 54 BufferedReader reader = new BufferedReader(new A2NFilter(in)); 55 BufferedWriter writer = Files.newBufferedWriter(outfile, cs)) { 56 String line; 57 while ((line = reader.readLine()) != null) { 58 writer.write(line.toCharArray()); 59 writer.newLine(); 60 } 61 } 62 } 63 64 /** 65 * Native to ASCII conversion 66 */ 67 public void nativeToAscii(Path infile, Path outfile) throws IOException { 68 try (BufferedReader reader = Files.newBufferedReader(infile, cs); 69 BufferedWriter out = Files.newBufferedWriter(outfile, US_ASCII); 70 BufferedWriter writer = new BufferedWriter(new N2AFilter(out))) { 71 String line; 72 while ((line = reader.readLine()) != null) { 73 writer.write(line.toCharArray()); 74 writer.newLine(); 75 } 76 } 77 } 78 79 // A copy of native2ascii N2AFilter 80 class N2AFilter extends FilterWriter { 81 public N2AFilter(Writer out) { super(out); } 82 public void write(char b) throws IOException { 83 char[] buf = new char[1]; 84 buf[0] = b; 85 write(buf, 0, 1); 86 } 87 88 public void write(char[] buf, int off, int len) throws IOException { 89 for (int i = 0; i < len; i++) { 90 if ((buf[i] > '\u007f')) { 91 // write \udddd 92 out.write('\\'); 93 out.write('u'); 94 String hex = Integer.toHexString(buf[i]); 95 StringBuilder hex4 = new StringBuilder(hex); 96 hex4.reverse(); 97 int length = 4 - hex4.length(); 98 for (int j = 0; j < length; j++) { 99 hex4.append('0'); 100 } 101 for (int j = 0; j < 4; j++) { 102 out.write(hex4.charAt(3 - j)); 103 } 104 } else 105 out.write(buf[i]); 106 } 107 } 108 } 109 110 // A copy of native2ascii A2NFilter 111 class A2NFilter extends FilterReader { 112 // maintain a trailing buffer to hold any incompleted 113 // unicode escaped sequences 114 private char[] trailChars = null; 115 116 public A2NFilter(Reader in) { 117 super(in); 118 } 119 120 public int read(char[] buf, int off, int len) throws IOException { 121 int numChars = 0; // how many characters have been read 122 int retChars = 0; // how many characters we'll return 123 124 char[] cBuf = new char[len]; 125 int cOffset = 0; // offset at which we'll start reading 126 boolean eof = false; 127 128 // copy trailing chars from previous invocation to input buffer 129 if (trailChars != null) { 130 for (int i = 0; i < trailChars.length; i++) 131 cBuf[i] = trailChars[i]; 132 numChars = trailChars.length; 133 trailChars = null; 134 } 135 136 int n = in.read(cBuf, numChars, len - numChars); 137 if (n < 0) { 138 eof = true; 139 if (numChars == 0) 140 return -1; // EOF; 141 } else { 142 numChars += n; 143 } 144 145 for (int i = 0; i < numChars; ) { 146 char c = cBuf[i++]; 147 148 if (c != '\\' || (eof && numChars <= 5)) { 149 // Not a backslash, so copy and continue 150 // Always pass non backslash chars straight thru 151 // for regular encoding. If backslash occurs in 152 // input stream at the final 5 chars then don't 153 // attempt to read-ahead and de-escape since these 154 // are literal occurrences of U+005C which need to 155 // be encoded verbatim in the target encoding. 156 buf[retChars++] = c; 157 continue; 158 } 159 160 int remaining = numChars - i; 161 if (remaining < 5) { 162 // Might be the first character of a unicode escape, but we 163 // don't have enough characters to tell, so save it and finish 164 trailChars = new char[1 + remaining]; 165 trailChars[0] = c; 166 for (int j = 0; j < remaining; j++) 167 trailChars[1 + j] = cBuf[i + j]; 168 break; 169 } 170 // At this point we have at least five characters remaining 171 172 c = cBuf[i++]; 173 if (c != 'u') { 174 // Not a unicode escape, so copy and continue 175 buf[retChars++] = '\\'; 176 buf[retChars++] = c; 177 continue; 178 } 179 180 // The next four characters are the hex part of a unicode escape 181 char rc = 0; 182 boolean isUE = true; 183 try { 184 rc = (char) Integer.parseInt(new String(cBuf, i, 4), 16); 185 } catch (NumberFormatException x) { 186 isUE = false; 187 } 188 if (isUE && encoder.canEncode(rc)) { 189 // We'll be able to convert this 190 buf[retChars++] = rc; 191 i += 4; // Align beyond the current uXXXX sequence 192 } else { 193 // We won't, so just retain the original sequence 194 buf[retChars++] = '\\'; 195 buf[retChars++] = 'u'; 196 continue; 197 } 198 199 } 200 201 return retChars; 202 } 203 204 public int read() throws IOException { 205 char[] buf = new char[1]; 206 207 if (read(buf, 0, 1) == -1) 208 return -1; 209 else 210 return (int) buf[0]; 211 } 212 } 213} 214