1/* 2 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24/** 25 * @test 26 * @bug 8008738 8065138 27 * @summary checks that the mapping implemented by 28 * com.sun.org.apache.xml.internal.serializer.Encodings 29 * correctly identifies valid Charset names and 30 * correctly maps them to their preferred mime names. 31 * Also checks that the Encodings.properties resource file 32 * is consistent. 33 * @modules java.xml/com.sun.org.apache.xml.internal.serializer:+open 34 * @compile -XDignore.symbol.file CheckEncodingPropertiesFile.java 35 * @run main CheckEncodingPropertiesFile 36 * @author Daniel Fuchs 37 */ 38 39import com.sun.org.apache.xml.internal.serializer.EncodingInfo; 40import com.sun.org.apache.xml.internal.serializer.Encodings; 41import java.io.InputStreamReader; 42import java.lang.reflect.Method; 43import java.nio.charset.Charset; 44import java.util.ArrayList; 45import java.util.Arrays; 46import java.util.Collection; 47import java.util.Collections; 48import java.util.HashMap; 49import java.util.HashSet; 50import java.util.LinkedHashSet; 51import java.util.List; 52import java.util.Map; 53import java.util.Map.Entry; 54import java.util.Properties; 55import java.util.Set; 56import java.util.StringTokenizer; 57 58public class CheckEncodingPropertiesFile { 59 60 private static final String ENCODINGS_FILE = "com/sun/org/apache/xml/internal/serializer/Encodings.properties"; 61 62 public static void main(String[] args) throws Exception { 63 Properties props = new Properties(); 64 Module xmlModule = EncodingInfo.class.getModule(); 65 try (InputStreamReader is = new InputStreamReader(xmlModule.getResourceAsStream(ENCODINGS_FILE))) { 66 props.load(is); 67 } 68 69 if (!props.containsKey("UTF8")) { 70 // If the test fails here - it may indicate that you stumbled on an 71 // issue similar to that fixed by JDK-8065138. 72 // Check that the content of the Encodings.properties included in 73 // the tested build image matches the content of the file in the source 74 // jaxp tree of the jdk forest. 75 throw new RuntimeException("UTF8 key missing in " + ENCODINGS_FILE); 76 } 77 78 //printAllCharsets(); 79 80 test(props); 81 } 82 83 84 private static final class CheckCharsetMapping { 85 86 /** 87 * A map that maps Java or XML name to canonical charset names. 88 * key: upper cased value of Java or XML name. 89 * value: case-sensitive canonical name of charset. 90 */ 91 private final Map<String, String> charsetMap = new HashMap<>(); 92 93 private final Map<String, String> preferredMime = new HashMap<>(); 94 95 /** 96 * Unresolved alias names. 97 * For a given set of names pointing to the same unresolved charset, 98 * this map will contain, for each alias in the set, a mapping 99 * with the alias.toUpperValue() as key and the set of known aliases 100 * as value. 101 */ 102 private final Map<String, Collection<String>> unresolved = new HashMap<>(); 103 104 public final static class ConflictingCharsetError extends Error { 105 ConflictingCharsetError(String a, String cs1, String cs2) { 106 super("Conflicting charset mapping for '"+a+"': '"+cs1+"' and '"+cs2+"'"); 107 } 108 } 109 110 public final static class MissingValidCharsetNameError extends Error { 111 MissingValidCharsetNameError(String name, Collection<String> aliases) { 112 super(name+": Line "+aliases+" has no recognized charset alias"); 113 } 114 } 115 116 public final static class ConflictingPreferredMimeNameError extends Error { 117 ConflictingPreferredMimeNameError(String a, String cs1, String cs2) { 118 super("Conflicting preferred mime name for '"+a+"': '"+cs1+"' and '"+cs2+"'"); 119 } 120 } 121 122 /** 123 * For each alias in aliases, attempt to find the canonical 124 * charset name. 125 * All names in aliases are supposed to point to the same charset. 126 * Names in aliases can be java names or XML names, indifferently. 127 * @param aliases list of names (aliases) for a given charset. 128 * @return The canonical name of the charset, if found, null otherwise. 129 */ 130 private String findCharsetNameFor(String[] aliases) { 131 String cs = null; 132 String res = null; 133 for (String a : aliases) { 134 final String k = a.toUpperCase(); 135 String cachedCs = charsetMap.get(k); 136 if (cs == null) { 137 cs = cachedCs; 138 } 139 if (cachedCs != null && cs != null 140 && !Charset.forName(cachedCs).name().equals(Charset.forName(cs).name())) { 141 throw new ConflictingCharsetError(a,cs,cachedCs); 142 } 143 try { 144 final String rcs = Charset.forName(a).name(); 145 if (cs != null && !Charset.forName(cs).name().equals(rcs)) { 146 throw new ConflictingCharsetError(a,cs,rcs); 147 } 148 if (res == null) { 149 if (a.equals(aliases[0])) { 150 res = a; 151 } else { 152 res = cs; 153 } 154 } 155 cs = rcs; 156 charsetMap.put(k, res == null ? cs : res); 157 } catch (Exception x) { 158 continue; 159 } 160 } 161 return res == null ? cs : res; 162 } 163 164 /** 165 * Register a canonical charset name for a given set of aliases. 166 * 167 * @param charsetName the canonical charset name. 168 * @param aliases a list of aliases for the given charset. 169 */ 170 private void registerCharsetNameFor(String charsetName, String[] aliases) { 171 if (charsetName == null) throw new NullPointerException(); 172 173 for (String a : aliases) { 174 String k = a.toUpperCase(); 175 String csv = charsetMap.get(k); 176 if (csv == null) { 177 charsetMap.put(k, charsetName); 178 csv = charsetName; 179 } else if (!csv.equals(charsetName)) { 180 throw new ConflictingCharsetError(a,charsetName,csv); 181 } 182 183 final Collection<String> c = unresolved.get(k); 184 if (c != null) { 185 for (String aa : c) { 186 k = aa.toUpperCase(); 187 String csvv = charsetMap.get(k); 188 if (csvv == null) charsetMap.put(k, csv); 189 unresolved.remove(k); 190 } 191 throw new MissingValidCharsetNameError(charsetName,c); 192 } 193 } 194 } 195 196 /** 197 * Register a set of aliases as being unresolved. 198 * @param names the list of names - this should be what is returned by 199 * nameSet.toArray(new String[nameSet.size()]) 200 * @param nameSet the set of unresolved aliases. 201 */ 202 private void registerUnresolvedNamesFor(String[] names, Collection<String> nameSet) { 203 // This is not necessarily an error: it could happen that some 204 // charsets are simply not supported on some OS/Arch 205 System.err.println("Warning: unresolved charset names: '"+ nameSet 206 + "' This is not necessarily an error " 207 + "- this charset may not be supported on this platform."); 208 for (String a : names) { 209 final String k = a.toUpperCase(); 210 final Collection<String> c = unresolved.get(k); 211 if (c != null) { 212 //System.out.println("Found: "+a+" -> "+c); 213 //System.out.println("\t merging "+ c + " with " + nameSet); 214 nameSet.addAll(c); 215 for (String aa : c) { 216 unresolved.put(aa.toUpperCase(), nameSet); 217 } 218 } 219 unresolved.put(k, nameSet); 220 } 221 } 222 223 224 /** 225 * Add a new charset name mapping 226 * @param javaName the (supposedly) java name of the charset. 227 * @param xmlNames a list of corresponding XML names for that charset. 228 */ 229 void addMapping(String javaName, Collection<String> xmlNames) { 230 final LinkedHashSet<String> aliasNames = new LinkedHashSet<>(); 231 aliasNames.add(javaName); 232 aliasNames.addAll(xmlNames); 233 final String[] aliases = aliasNames.toArray(new String[aliasNames.size()]); 234 final String cs = findCharsetNameFor(aliases); 235 if (cs != null) { 236 registerCharsetNameFor(cs, aliases); 237 if (xmlNames.size() > 0) { 238 String preferred = xmlNames.iterator().next(); 239 String cachedPreferred = preferredMime.get(cs.toUpperCase()); 240 if (cachedPreferred != null && !cachedPreferred.equals(preferred)) { 241 throw new ConflictingPreferredMimeNameError(cs, cachedPreferred, preferred); 242 } 243 preferredMime.put(cs.toUpperCase(), preferred); 244 } 245 } else { 246 registerUnresolvedNamesFor(aliases, aliasNames); 247 } 248 } 249 250 /** 251 * Returns the canonical name of the charset for the given Java or XML 252 * alias name. 253 * @param alias the alias name 254 * @return the canonical charset name - or null if unknown. 255 */ 256 public String getCharsetNameFor(String alias) { 257 return charsetMap.get(alias.toUpperCase()); 258 } 259 260 } 261 262 public static void test(Properties props) throws Exception { 263 264 // First, build a mapping from the properties read from the resource 265 // file. 266 // We're going to check the consistency of the resource file 267 // while building this mapping, and throw errors if the file 268 // does not meet our assumptions. 269 // 270 Map<String, Collection<String>> lines = new HashMap<>(); 271 final CheckCharsetMapping mapping = new CheckCharsetMapping(); 272 273 for (String key : props.stringPropertyNames()) { 274 Collection<String> values = getValues(props.getProperty(key)); 275 lines.put(key, values); 276 mapping.addMapping(key, values); 277 } 278 279 // Then build maps of EncodingInfos, and print along debugging 280 // information that should help understand the content of the 281 // resource file and the mapping it defines. 282 // 283 Map<String, EncodingInfo> javaInfos = new HashMap<>(); // Map indexed by java names 284 Map<String, EncodingInfo> xmlMap = new HashMap<>(); // Map indexed by XML names 285 Map<String, String> preferred = 286 new HashMap<>(mapping.preferredMime); // Java Name -> Preferred Mime Name 287 List<EncodingInfo> all = new ArrayList<>(); // unused... 288 for (Entry<String, Collection<String>> e : lines.entrySet()) { 289 final String charsetName = mapping.getCharsetNameFor(e.getKey()); 290 if (charsetName == null) { 291 System.out.println("!! No charset for: "+e.getKey()+ " "+ e.getValue()); 292 continue; 293 } 294 Charset c = Charset.forName(charsetName); 295 EncodingInfo info; 296 final String k = e.getKey().toUpperCase(); 297 final String kc = charsetName.toUpperCase(); 298 StringBuilder sb = new StringBuilder(); 299 for (String xml : e.getValue()) { 300 final String kx = xml.toUpperCase(); 301 info = xmlMap.get(kx); 302 if (info == null) { 303 info = new EncodingInfo(xml, charsetName); 304 System.out.println("** XML: "+xml+" -> "+charsetName); 305 xmlMap.put(kx, info); 306 all.add(info); 307 } 308 if (!javaInfos.containsKey(k)) { 309 javaInfos.put(k, info); 310 if (!preferred.containsKey(k)) { 311 preferred.put(k, xml); 312 } 313 sb.append("** Java: ").append(k).append(" -> ") 314 .append(xml).append(" (charset: ") 315 .append(charsetName).append(")\n"); 316 } 317 if (!javaInfos.containsKey(kc)) { 318 if (!preferred.containsKey(kc)) { 319 preferred.put(kc, xml); 320 } 321 javaInfos.put(kc, info); 322 sb.append("** Java: ").append(kc).append(" -> ") 323 .append(xml).append(" (charset: ") 324 .append(charsetName).append(")\n"); 325 } 326 if (!javaInfos.containsKey(c.name().toUpperCase())) { 327 if (!preferred.containsKey(c.name().toUpperCase())) { 328 preferred.put(c.name().toUpperCase(), xml); 329 } 330 javaInfos.put(c.name().toUpperCase(), info); 331 sb.append("** Java: ").append(c.name().toUpperCase()).append(" -> ") 332 .append(xml).append(" (charset: ") 333 .append(charsetName).append(")\n"); 334 } 335 } 336 if (sb.length() == 0) { 337 System.out.println("Nothing new for "+charsetName+": "+e.getKey()+" -> "+e.getValue()); 338 } else { 339 System.out.print(sb); 340 } 341 342 } 343 344 // Now we're going to verify that Encodings.java has done its job 345 // correctly. We're going to ask Encodings to convert java names to mime 346 // names and mime names to java names - and verify that the returned 347 // java names do map to recognized charsets. 348 // 349 // We're also going to verify that Encodings has recorded the preferred 350 // mime name correctly. 351 352 Method m = Encodings.class.getDeclaredMethod("getMimeEncoding", String.class); 353 m.setAccessible(true); 354 355 Set<String> xNames = new HashSet<>(); 356 Set<String> jNames = new HashSet<>(); 357 for (String name: xmlMap.keySet()) { 358 final String javaName = checkConvertMime2Java(name); 359 checkPreferredMime(m, javaName, preferred); 360 jNames.add(javaName); 361 xNames.add(name); 362 } 363 364 365 for (String javaName : lines.keySet()) { 366 final String javaCharsetName = mapping.getCharsetNameFor(javaName.toUpperCase()); 367 if (javaCharsetName == null) continue; 368 if (!jNames.contains(javaName)) { 369 checkPreferredMime(m, javaName, preferred); 370 jNames.add(javaName); 371 } 372 for (String xml : lines.get(javaName)) { 373 if (xNames.contains(xml)) continue; 374 final String jName = checkConvertMime2Java(xml); 375 xNames.add(xml); 376 if (jNames.contains(jName)) continue; 377 checkPreferredMime(m, jName, preferred); 378 } 379 } 380 } 381 382 private static String checkConvertMime2Java(String xml) { 383 final String jName = Encodings.convertMime2JavaEncoding(xml); 384 final String jCharsetName; 385 try { 386 jCharsetName = Charset.forName(jName).name(); 387 } catch (Exception x) { 388 throw new Error("Unrecognized charset returned by Encodings.convertMime2JavaEncoding(\""+xml+"\")", x); 389 } 390 System.out.println("Encodings.convertMime2JavaEncoding(\""+xml+"\") = \""+jName+"\" ("+jCharsetName+")"); 391 return jName; 392 } 393 394 private static void checkPreferredMime(Method m, String javaName, Map<String,String> preferred) 395 throws Exception { 396 final String mime = (String) m.invoke(null, javaName); 397 final String expected = preferred.get(javaName.toUpperCase()); 398 if (Arrays.deepEquals(new String[] {mime}, new String[] {expected})) { 399 System.out.println("Encodings.getMimeEncoding(\""+javaName+"\") = \""+mime+"\""); 400 } else { 401 throw new Error("Bad preferred mime type for: '"+javaName+"': expected '"+ 402 expected+"' but got '"+mime+"'"); 403 } 404 } 405 406 private static Collection<String> getValues(String val) { 407 int pos = val.indexOf(' '); 408 if (pos < 0) { 409 return Collections.singletonList(val); 410 } 411 //lastPrintable = 412 // Integer.decode(val.substring(pos).trim()).intValue(); 413 StringTokenizer st = 414 new StringTokenizer(val.substring(0, pos), ","); 415 final List<String> values = new ArrayList<>(st.countTokens()); 416 while (st.hasMoreTokens()) { 417 values.add(st.nextToken()); 418 } 419 return values; 420 } 421 422 // can be called in main() to help debugging. 423 // Prints out all available charsets and their recognized aliases 424 // as returned by the Charset API. 425 private static void printAllCharsets() { 426 Map<String, Charset> all = Charset.availableCharsets(); 427 System.out.println("\n=========================================\n"); 428 for (String can : all.keySet()) { 429 System.out.println(can + ": " + all.get(can).aliases()); 430 } 431 } 432} 433