1/* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25package javax.xml.catalog; 26 27import java.io.UnsupportedEncodingException; 28import java.net.URLDecoder; 29import java.net.URLEncoder; 30 31/** 32 * The Normalizer is responsible for normalizing Public and System Identifiers 33 * as specified in section 6.2, 6.3 and 6.4 of the specification 34 * * <a 35 * href="https://www.oasis-open.org/committees/download.php/14809/xml-catalogs.html"> 36 * XML Catalogs, OASIS Standard V1.1, 7 October 2005</a>. 37 * 38 * @since 9 39 */ 40class Normalizer { 41 42 /** 43 * Normalize a public identifier in accordance with section 6.2 of the 44 * Catalog specification. 45 * 46 * <p> 47 * All strings of white space in public identifiers must be normalized to 48 * single space characters (#x20), and leading and trailing white space must 49 * be removed. 50 * 51 * @param publicId The unnormalized public identifier 52 * 53 * @return The normalized identifier 54 */ 55 static String normalizePublicId(String publicId) { 56 if (publicId == null) return null; 57 58 StringBuilder sb = new StringBuilder(publicId.length()); 59 char last = 'a'; 60 for (char c : publicId.toCharArray()) { 61 //skip beginning and duplicate space 62 if ((c == ' ') && (sb.length() == 0 || last == ' ')) { 63 continue; 64 } 65 66 //replace whitespace with space 67 if (c == '\t' || c == '\r' || c == '\n') { 68 if (last != ' ') { 69 sb.append(' '); 70 last = ' '; 71 } 72 } else { 73 sb.append(c); 74 last = c; 75 } 76 } 77 //remove the last space 78 if (last == ' ') { 79 sb.deleteCharAt(sb.length() - 1); 80 } 81 82 return sb.toString(); 83 } 84 85 /** 86 * Encode a public identifier as a "publicid" URN. 87 * 88 * @param publicId The unnormalized public identifier 89 * 90 * @return The normalized identifier 91 * @throws CatalogException if encoding failed 92 */ 93 static String encodeURN(String publicId) { 94 String urn = normalizePublicId(publicId); 95 96 try { 97 urn = URLEncoder.encode(urn, "UTF-8"); 98 urn = urn.replace("::", ";"); 99 urn = urn.replace("//", ":"); 100 } catch (UnsupportedEncodingException ex) { 101 CatalogMessages.reportRunTimeError(CatalogMessages.ERR_OTHER, ex); 102 } 103 return Util.URN + urn; 104 } 105 106 /** 107 * Decode a "publicid" URN into a public identifier. 108 * 109 * @param urn The urn:publicid: URN 110 * 111 * @return The normalized identifier 112 * @throws CatalogException if decoding failed 113 */ 114 static String decodeURN(String urn) { 115 String publicId; 116 117 if (urn != null && urn.startsWith(Util.URN)) { 118 publicId = urn.substring(13); 119 } else { 120 return urn; 121 } 122 try { 123 publicId = publicId.replace(":", "//"); 124 publicId = publicId.replace(";", "::"); 125 publicId = URLDecoder.decode(publicId, "UTF-8"); 126 } catch (UnsupportedEncodingException ex) { 127 CatalogMessages.reportRunTimeError(CatalogMessages.ERR_OTHER, ex); 128 } 129 130 return publicId; 131 } 132 133 /** 134 * Perform character normalization on a URI reference. 135 * 136 * @param uriref The URI reference 137 * @return The normalized URI reference 138 */ 139 static String normalizeURI(String uriref) { 140 if (uriref == null) { 141 return null; 142 } 143 144 byte[] bytes; 145 uriref = uriref.trim(); 146 try { 147 bytes = uriref.getBytes("UTF-8"); 148 } catch (UnsupportedEncodingException uee) { 149 // this can't happen 150 return uriref; 151 } 152 153 StringBuilder newRef = new StringBuilder(bytes.length); 154 for (int count = 0; count < bytes.length; count++) { 155 int ch = bytes[count] & 0xFF; 156 157 if ((ch <= 0x20) // ctrl 158 || (ch > 0x7F) // high ascii 159 || (ch == 0x22) // " 160 || (ch == 0x3C) // < 161 || (ch == 0x3E) // > 162 || (ch == 0x5C) // \ 163 || (ch == 0x5E) // ^ 164 || (ch == 0x60) // ` 165 || (ch == 0x7B) // { 166 || (ch == 0x7C) // | 167 || (ch == 0x7D) // } 168 || (ch == 0x7F)) { 169 newRef.append("%").append(String.format("%02X", ch)); 170 } else { 171 newRef.append((char) bytes[count]); 172 } 173 } 174 175 return newRef.toString().trim(); 176 } 177} 178