1/*
2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25package javax.xml.catalog;
26
27import java.io.UnsupportedEncodingException;
28import java.net.URLDecoder;
29import java.net.URLEncoder;
30
31/**
32 * The Normalizer is responsible for normalizing Public and System Identifiers
33 * as specified in section 6.2, 6.3 and 6.4 of the specification
34 *  * <a
35 * href="https://www.oasis-open.org/committees/download.php/14809/xml-catalogs.html">
36 * XML Catalogs, OASIS Standard V1.1, 7 October 2005</a>.
37 *
38 * @since 9
39 */
40class Normalizer {
41
42    /**
43     * Normalize a public identifier in accordance with section 6.2 of the
44     * Catalog specification.
45     *
46     * <p>
47     * All strings of white space in public identifiers must be normalized to
48     * single space characters (#x20), and leading and trailing white space must
49     * be removed.
50     *
51     * @param publicId The unnormalized public identifier
52     *
53     * @return The normalized identifier
54     */
55    static String normalizePublicId(String publicId) {
56        if (publicId == null) return null;
57
58        StringBuilder sb = new StringBuilder(publicId.length());
59        char last = 'a';
60        for (char c : publicId.toCharArray()) {
61            //skip beginning and duplicate space
62            if ((c == ' ') && (sb.length() == 0 || last == ' ')) {
63                continue;
64            }
65
66            //replace whitespace with space
67            if (c == '\t' || c == '\r' || c == '\n') {
68                if (last != ' ') {
69                    sb.append(' ');
70                    last = ' ';
71                }
72            } else {
73                sb.append(c);
74                last = c;
75            }
76        }
77        //remove the last space
78        if (last == ' ') {
79            sb.deleteCharAt(sb.length() - 1);
80        }
81
82        return sb.toString();
83    }
84
85    /**
86     * Encode a public identifier as a "publicid" URN.
87     *
88     * @param publicId The unnormalized public identifier
89     *
90     * @return The normalized identifier
91     * @throws CatalogException if encoding failed
92     */
93    static String encodeURN(String publicId) {
94        String urn = normalizePublicId(publicId);
95
96        try {
97            urn = URLEncoder.encode(urn, "UTF-8");
98            urn = urn.replace("::", ";");
99            urn = urn.replace("//", ":");
100        } catch (UnsupportedEncodingException ex) {
101            CatalogMessages.reportRunTimeError(CatalogMessages.ERR_OTHER, ex);
102        }
103        return Util.URN + urn;
104    }
105
106    /**
107     * Decode a "publicid" URN into a public identifier.
108     *
109     * @param urn The urn:publicid: URN
110     *
111     * @return The normalized identifier
112     * @throws CatalogException if decoding failed
113     */
114    static String decodeURN(String urn) {
115        String publicId;
116
117        if (urn != null && urn.startsWith(Util.URN)) {
118            publicId = urn.substring(13);
119        } else {
120            return urn;
121        }
122        try {
123            publicId = publicId.replace(":", "//");
124            publicId = publicId.replace(";", "::");
125            publicId = URLDecoder.decode(publicId, "UTF-8");
126        } catch (UnsupportedEncodingException ex) {
127            CatalogMessages.reportRunTimeError(CatalogMessages.ERR_OTHER, ex);
128        }
129
130        return publicId;
131    }
132
133    /**
134     * Perform character normalization on a URI reference.
135     *
136     * @param uriref The URI reference
137     * @return The normalized URI reference
138     */
139    static String normalizeURI(String uriref) {
140        if (uriref == null) {
141            return null;
142        }
143
144        byte[] bytes;
145        uriref = uriref.trim();
146        try {
147            bytes = uriref.getBytes("UTF-8");
148        } catch (UnsupportedEncodingException uee) {
149            // this can't happen
150            return uriref;
151        }
152
153        StringBuilder newRef = new StringBuilder(bytes.length);
154        for (int count = 0; count < bytes.length; count++) {
155            int ch = bytes[count] & 0xFF;
156
157            if ((ch <= 0x20) // ctrl
158                    || (ch > 0x7F) // high ascii
159                    || (ch == 0x22) // "
160                    || (ch == 0x3C) // <
161                    || (ch == 0x3E) // >
162                    || (ch == 0x5C) // \
163                    || (ch == 0x5E) // ^
164                    || (ch == 0x60) // `
165                    || (ch == 0x7B) // {
166                    || (ch == 0x7C) // |
167                    || (ch == 0x7D) // }
168                    || (ch == 0x7F)) {
169                newRef.append("%").append(String.format("%02X", ch));
170            } else {
171                newRef.append((char) bytes[count]);
172            }
173        }
174
175        return newRef.toString().trim();
176    }
177}
178