1/*
2 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24/**
25 * @test
26 * @bug 8008738 8065138
27 * @summary checks that the mapping implemented by
28 *      com.sun.org.apache.xml.internal.serializer.Encodings
29 *      correctly identifies valid Charset names and
30 *      correctly maps them to their preferred mime names.
31 *      Also checks that the Encodings.properties resource file
32 *      is consistent.
33 * @modules java.xml/com.sun.org.apache.xml.internal.serializer:+open
34 * @compile -XDignore.symbol.file CheckEncodingPropertiesFile.java
35 * @run main CheckEncodingPropertiesFile
36 * @author Daniel Fuchs
37 */
38
39import com.sun.org.apache.xml.internal.serializer.EncodingInfo;
40import com.sun.org.apache.xml.internal.serializer.Encodings;
41import java.io.InputStreamReader;
42import java.lang.reflect.Method;
43import java.nio.charset.Charset;
44import java.util.ArrayList;
45import java.util.Arrays;
46import java.util.Collection;
47import java.util.Collections;
48import java.util.HashMap;
49import java.util.HashSet;
50import java.util.LinkedHashSet;
51import java.util.List;
52import java.util.Map;
53import java.util.Map.Entry;
54import java.util.Properties;
55import java.util.Set;
56import java.util.StringTokenizer;
57
58public class CheckEncodingPropertiesFile {
59
60    private static final String ENCODINGS_FILE = "com/sun/org/apache/xml/internal/serializer/Encodings.properties";
61
62    public static void main(String[] args) throws Exception {
63        Properties props = new Properties();
64        Module xmlModule = EncodingInfo.class.getModule();
65        try (InputStreamReader is = new InputStreamReader(xmlModule.getResourceAsStream(ENCODINGS_FILE))) {
66            props.load(is);
67        }
68
69       if (!props.containsKey("UTF8")) {
70           // If the test fails here - it may indicate that you stumbled on an
71           // issue similar to that fixed by JDK-8065138.
72           // Check that the content of the Encodings.properties included in
73           // the tested build image matches the content of the file in the source
74           // jaxp tree of the jdk forest.
75           throw new RuntimeException("UTF8 key missing in " + ENCODINGS_FILE);
76       }
77
78        //printAllCharsets();
79
80        test(props);
81    }
82
83
84    private static final class CheckCharsetMapping {
85
86        /**
87         * A map that maps Java or XML name to canonical charset names.
88         * key:    upper cased value of Java or XML name.
89         * value:  case-sensitive canonical name of charset.
90         */
91        private final Map<String, String> charsetMap = new HashMap<>();
92
93        private final Map<String, String> preferredMime = new HashMap<>();
94
95        /**
96         * Unresolved alias names.
97         * For a given set of names pointing to the same unresolved charset,
98         * this map will contain, for each alias in the set, a mapping
99         * with the alias.toUpperValue() as key and the set of known aliases
100         * as value.
101         */
102        private final Map<String, Collection<String>> unresolved = new HashMap<>();
103
104        public final static class ConflictingCharsetError extends Error {
105            ConflictingCharsetError(String a, String cs1, String cs2) {
106                super("Conflicting charset mapping for '"+a+"': '"+cs1+"' and '"+cs2+"'");
107            }
108        }
109
110        public final static class MissingValidCharsetNameError extends Error {
111            MissingValidCharsetNameError(String name, Collection<String> aliases) {
112                super(name+": Line "+aliases+" has no recognized charset alias");
113            }
114        }
115
116        public final static class ConflictingPreferredMimeNameError extends Error {
117            ConflictingPreferredMimeNameError(String a, String cs1, String cs2) {
118                super("Conflicting preferred mime name for '"+a+"': '"+cs1+"' and '"+cs2+"'");
119            }
120        }
121
122        /**
123         * For each alias in aliases, attempt to find the canonical
124         * charset name.
125         * All names in aliases are supposed to point to the same charset.
126         * Names in aliases can be java names or XML names, indifferently.
127         * @param aliases list of names (aliases) for a given charset.
128         * @return The canonical name of the charset, if found, null otherwise.
129         */
130        private String findCharsetNameFor(String[] aliases) {
131            String cs = null;
132            String res = null;
133            for (String a : aliases) {
134                final String k = a.toUpperCase();
135                String cachedCs = charsetMap.get(k);
136                if (cs == null) {
137                    cs = cachedCs;
138                }
139                if (cachedCs != null && cs != null
140                        && !Charset.forName(cachedCs).name().equals(Charset.forName(cs).name())) {
141                    throw new ConflictingCharsetError(a,cs,cachedCs);
142                }
143                try {
144                    final String rcs = Charset.forName(a).name();
145                    if (cs != null && !Charset.forName(cs).name().equals(rcs)) {
146                        throw new ConflictingCharsetError(a,cs,rcs);
147                    }
148                    if (res == null) {
149                        if (a.equals(aliases[0])) {
150                            res = a;
151                        } else {
152                            res = cs;
153                        }
154                    }
155                    cs = rcs;
156                    charsetMap.put(k, res == null ? cs : res);
157                } catch (Exception x) {
158                    continue;
159                }
160            }
161            return res == null ? cs : res;
162        }
163
164        /**
165         * Register a canonical charset name for a given set of aliases.
166         *
167         * @param charsetName the canonical charset name.
168         * @param aliases a list of aliases for the given charset.
169         */
170        private void registerCharsetNameFor(String charsetName, String[] aliases) {
171            if (charsetName == null) throw new NullPointerException();
172
173            for (String a : aliases) {
174                String k = a.toUpperCase();
175                String csv = charsetMap.get(k);
176                if (csv == null) {
177                    charsetMap.put(k, charsetName);
178                    csv = charsetName;
179                } else if (!csv.equals(charsetName)) {
180                    throw new ConflictingCharsetError(a,charsetName,csv);
181                }
182
183                final Collection<String> c = unresolved.get(k);
184                if (c != null) {
185                    for (String aa : c) {
186                        k = aa.toUpperCase();
187                        String csvv = charsetMap.get(k);
188                        if (csvv == null) charsetMap.put(k, csv);
189                        unresolved.remove(k);
190                    }
191                    throw new MissingValidCharsetNameError(charsetName,c);
192                }
193            }
194        }
195
196        /**
197         * Register a set of aliases as being unresolved.
198         * @param names    the list of names - this should be what is returned by
199         *                 nameSet.toArray(new String[nameSet.size()])
200         * @param nameSet  the set of unresolved aliases.
201         */
202        private void registerUnresolvedNamesFor(String[] names, Collection<String> nameSet) {
203            // This is not necessarily an error: it could happen that some
204            //    charsets are simply not supported on some OS/Arch
205            System.err.println("Warning: unresolved charset names: '"+ nameSet
206                    + "' This is not necessarily an error "
207                    + "- this charset may not be supported on this platform.");
208            for (String a : names) {
209                final String k = a.toUpperCase();
210                final Collection<String> c = unresolved.get(k);
211                if (c != null) {
212                    //System.out.println("Found: "+a+" -> "+c);
213                    //System.out.println("\t merging "+ c + " with " + nameSet);
214                    nameSet.addAll(c);
215                    for (String aa : c) {
216                        unresolved.put(aa.toUpperCase(), nameSet);
217                    }
218                }
219                unresolved.put(k, nameSet);
220            }
221        }
222
223
224        /**
225         * Add a new charset name mapping
226         * @param javaName the (supposedly) java name of the charset.
227         * @param xmlNames a list of corresponding XML names for that charset.
228         */
229        void addMapping(String javaName, Collection<String> xmlNames) {
230            final LinkedHashSet<String> aliasNames = new LinkedHashSet<>();
231            aliasNames.add(javaName);
232            aliasNames.addAll(xmlNames);
233            final String[] aliases = aliasNames.toArray(new String[aliasNames.size()]);
234            final String cs = findCharsetNameFor(aliases);
235            if (cs != null) {
236                registerCharsetNameFor(cs, aliases);
237                if (xmlNames.size() > 0) {
238                    String preferred = xmlNames.iterator().next();
239                    String cachedPreferred = preferredMime.get(cs.toUpperCase());
240                    if (cachedPreferred != null && !cachedPreferred.equals(preferred)) {
241                        throw new ConflictingPreferredMimeNameError(cs, cachedPreferred, preferred);
242                    }
243                    preferredMime.put(cs.toUpperCase(), preferred);
244                }
245            } else {
246                registerUnresolvedNamesFor(aliases, aliasNames);
247            }
248        }
249
250        /**
251         * Returns the canonical name of the charset for the given Java or XML
252         * alias name.
253         * @param alias the alias name
254         * @return the canonical charset name - or null if unknown.
255         */
256        public String getCharsetNameFor(String alias) {
257            return charsetMap.get(alias.toUpperCase());
258        }
259
260    }
261
262    public static void test(Properties props) throws Exception {
263
264        // First, build a mapping from the properties read from the resource
265        // file.
266        // We're going to check the consistency of the resource file
267        // while building this mapping, and throw errors if the file
268        // does not meet our assumptions.
269        //
270        Map<String, Collection<String>> lines = new HashMap<>();
271        final CheckCharsetMapping mapping = new CheckCharsetMapping();
272
273        for (String key : props.stringPropertyNames()) {
274            Collection<String> values = getValues(props.getProperty(key));
275            lines.put(key, values);
276            mapping.addMapping(key, values);
277        }
278
279        // Then build maps of EncodingInfos, and print along debugging
280        // information that should help understand the content of the
281        // resource file and the mapping it defines.
282        //
283        Map<String, EncodingInfo> javaInfos = new HashMap<>(); // Map indexed by java names
284        Map<String, EncodingInfo> xmlMap = new HashMap<>();    // Map indexed by XML names
285        Map<String, String> preferred =
286                new HashMap<>(mapping.preferredMime);          // Java Name -> Preferred Mime Name
287        List<EncodingInfo> all = new ArrayList<>();            // unused...
288        for (Entry<String, Collection<String>> e : lines.entrySet()) {
289            final String charsetName = mapping.getCharsetNameFor(e.getKey());
290            if (charsetName == null) {
291                System.out.println("!! No charset for: "+e.getKey()+ " "+ e.getValue());
292                continue;
293            }
294            Charset c = Charset.forName(charsetName);
295            EncodingInfo info;
296            final String k = e.getKey().toUpperCase();
297            final String kc = charsetName.toUpperCase();
298            StringBuilder sb = new StringBuilder();
299            for (String xml : e.getValue()) {
300                final String kx = xml.toUpperCase();
301                info = xmlMap.get(kx);
302                if (info == null) {
303                    info = new EncodingInfo(xml, charsetName);
304                    System.out.println("** XML: "+xml+" -> "+charsetName);
305                    xmlMap.put(kx, info);
306                    all.add(info);
307                }
308                if (!javaInfos.containsKey(k)) {
309                    javaInfos.put(k, info);
310                    if (!preferred.containsKey(k)) {
311                        preferred.put(k, xml);
312                    }
313                    sb.append("** Java: ").append(k).append(" -> ")
314                            .append(xml).append(" (charset: ")
315                            .append(charsetName).append(")\n");
316                }
317                if (!javaInfos.containsKey(kc)) {
318                    if (!preferred.containsKey(kc)) {
319                        preferred.put(kc, xml);
320                    }
321                    javaInfos.put(kc, info);
322                    sb.append("** Java: ").append(kc).append(" -> ")
323                            .append(xml).append(" (charset: ")
324                            .append(charsetName).append(")\n");
325                }
326                if (!javaInfos.containsKey(c.name().toUpperCase())) {
327                    if (!preferred.containsKey(c.name().toUpperCase())) {
328                        preferred.put(c.name().toUpperCase(), xml);
329                    }
330                    javaInfos.put(c.name().toUpperCase(), info);
331                    sb.append("** Java: ").append(c.name().toUpperCase()).append(" -> ")
332                            .append(xml).append(" (charset: ")
333                            .append(charsetName).append(")\n");
334                }
335            }
336            if (sb.length() == 0) {
337                System.out.println("Nothing new for "+charsetName+": "+e.getKey()+" -> "+e.getValue());
338            } else {
339                System.out.print(sb);
340            }
341
342        }
343
344        // Now we're going to verify that Encodings.java has done its job
345        // correctly. We're going to ask Encodings to convert java names to mime
346        // names and mime names to java names - and verify that the returned
347        // java names do map to recognized charsets.
348        //
349        // We're also going to verify that Encodings has recorded the preferred
350        // mime name correctly.
351
352        Method m = Encodings.class.getDeclaredMethod("getMimeEncoding", String.class);
353        m.setAccessible(true);
354
355        Set<String> xNames = new HashSet<>();
356        Set<String> jNames = new HashSet<>();
357        for (String name: xmlMap.keySet()) {
358            final String javaName = checkConvertMime2Java(name);
359            checkPreferredMime(m, javaName, preferred);
360            jNames.add(javaName);
361            xNames.add(name);
362        }
363
364
365        for (String javaName : lines.keySet()) {
366            final String javaCharsetName = mapping.getCharsetNameFor(javaName.toUpperCase());
367            if (javaCharsetName == null) continue;
368            if (!jNames.contains(javaName)) {
369                checkPreferredMime(m, javaName, preferred);
370                jNames.add(javaName);
371            }
372            for (String xml : lines.get(javaName)) {
373                if (xNames.contains(xml)) continue;
374                final String jName = checkConvertMime2Java(xml);
375                xNames.add(xml);
376                if (jNames.contains(jName)) continue;
377                checkPreferredMime(m, jName, preferred);
378            }
379        }
380    }
381
382    private static String checkConvertMime2Java(String xml) {
383        final String jName = Encodings.convertMime2JavaEncoding(xml);
384        final String jCharsetName;
385        try {
386            jCharsetName = Charset.forName(jName).name();
387        } catch (Exception x) {
388            throw new Error("Unrecognized charset returned by Encodings.convertMime2JavaEncoding(\""+xml+"\")", x);
389        }
390        System.out.println("Encodings.convertMime2JavaEncoding(\""+xml+"\") = \""+jName+"\" ("+jCharsetName+")");
391        return jName;
392    }
393
394    private static void checkPreferredMime(Method m, String javaName, Map<String,String> preferred)
395            throws Exception {
396        final String mime = (String) m.invoke(null, javaName);
397        final String expected = preferred.get(javaName.toUpperCase());
398        if (Arrays.deepEquals(new String[] {mime}, new String[] {expected})) {
399            System.out.println("Encodings.getMimeEncoding(\""+javaName+"\") = \""+mime+"\"");
400        } else {
401            throw new Error("Bad preferred mime type for: '"+javaName+"': expected '"+
402                expected+"' but got '"+mime+"'");
403        }
404    }
405
406    private static Collection<String> getValues(String val) {
407        int pos = val.indexOf(' ');
408        if (pos < 0) {
409            return Collections.singletonList(val);
410        }
411        //lastPrintable =
412        //    Integer.decode(val.substring(pos).trim()).intValue();
413        StringTokenizer st =
414            new StringTokenizer(val.substring(0, pos), ",");
415        final List<String> values = new ArrayList<>(st.countTokens());
416        while (st.hasMoreTokens()) {
417            values.add(st.nextToken());
418        }
419        return values;
420    }
421
422    // can be called in main() to help debugging.
423    // Prints out all available charsets and their recognized aliases
424    // as returned by the Charset API.
425    private static void printAllCharsets() {
426        Map<String, Charset> all = Charset.availableCharsets();
427        System.out.println("\n=========================================\n");
428        for (String can : all.keySet()) {
429            System.out.println(can + ": " + all.get(can).aliases());
430        }
431    }
432}
433