1/* 2 * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package java.net; 27 28import java.io.*; 29 30/** 31 * Utility class for HTML form decoding. This class contains static methods 32 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE> 33 * MIME format. 34 * <p> 35 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed 36 * that all characters in the encoded string are one of the following: 37 * "{@code a}" through "{@code z}", 38 * "{@code A}" through "{@code Z}", 39 * "{@code 0}" through "{@code 9}", and 40 * "{@code -}", "{@code _}", 41 * "{@code .}", and "{@code *}". The 42 * character "{@code %}" is allowed but is interpreted 43 * as the start of a special escaped sequence. 44 * <p> 45 * The following rules are applied in the conversion: 46 * 47 * <ul> 48 * <li>The alphanumeric characters "{@code a}" through 49 * "{@code z}", "{@code A}" through 50 * "{@code Z}" and "{@code 0}" 51 * through "{@code 9}" remain the same. 52 * <li>The special characters "{@code .}", 53 * "{@code -}", "{@code *}", and 54 * "{@code _}" remain the same. 55 * <li>The plus sign "{@code +}" is converted into a 56 * space character " " . 57 * <li>A sequence of the form "<i>{@code %xy}</i>" will be 58 * treated as representing a byte where <i>xy</i> is the two-digit 59 * hexadecimal representation of the 8 bits. Then, all substrings 60 * that contain one or more of these byte sequences consecutively 61 * will be replaced by the character(s) whose encoding would result 62 * in those consecutive bytes. 63 * The encoding scheme used to decode these characters may be specified, 64 * or if unspecified, the default encoding of the platform will be used. 65 * </ul> 66 * <p> 67 * There are two possible ways in which this decoder could deal with 68 * illegal strings. It could either leave illegal characters alone or 69 * it could throw an {@link java.lang.IllegalArgumentException}. 70 * Which approach the decoder takes is left to the 71 * implementation. 72 * 73 * @author Mark Chamness 74 * @author Michael McCloskey 75 * @since 1.2 76 */ 77 78public class URLDecoder { 79 80 // The platform default encoding 81 static String dfltEncName = URLEncoder.dfltEncName; 82 83 /** 84 * Decodes a {@code x-www-form-urlencoded} string. 85 * The platform's default encoding is used to determine what characters 86 * are represented by any consecutive sequences of the form 87 * "<i>{@code %xy}</i>". 88 * @param s the {@code String} to decode 89 * @deprecated The resulting string may vary depending on the platform's 90 * default encoding. Instead, use the decode(String,String) method 91 * to specify the encoding. 92 * @return the newly decoded {@code String} 93 */ 94 @Deprecated 95 public static String decode(String s) { 96 97 String str = null; 98 99 try { 100 str = decode(s, dfltEncName); 101 } catch (UnsupportedEncodingException e) { 102 // The system should always have the platform default 103 } 104 105 return str; 106 } 107 108 /** 109 * Decodes an {@code application/x-www-form-urlencoded} string using 110 * a specific encoding scheme. 111 * The supplied encoding is used to determine 112 * what characters are represented by any consecutive sequences of the 113 * form "<i>{@code %xy}</i>". 114 * <p> 115 * <em><strong>Note:</strong> The <a href= 116 * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"> 117 * World Wide Web Consortium Recommendation</a> states that 118 * UTF-8 should be used. Not doing so may introduce 119 * incompatibilities.</em> 120 * 121 * @param s the {@code String} to decode 122 * @param enc The name of a supported 123 * <a href="../lang/package-summary.html#charenc">character 124 * encoding</a>. 125 * @return the newly decoded {@code String} 126 * @exception UnsupportedEncodingException 127 * If character encoding needs to be consulted, but 128 * named character encoding is not supported 129 * @see URLEncoder#encode(java.lang.String, java.lang.String) 130 * @since 1.4 131 */ 132 public static String decode(String s, String enc) 133 throws UnsupportedEncodingException{ 134 135 boolean needToChange = false; 136 int numChars = s.length(); 137 StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars); 138 int i = 0; 139 140 if (enc.length() == 0) { 141 throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter"); 142 } 143 144 char c; 145 byte[] bytes = null; 146 while (i < numChars) { 147 c = s.charAt(i); 148 switch (c) { 149 case '+': 150 sb.append(' '); 151 i++; 152 needToChange = true; 153 break; 154 case '%': 155 /* 156 * Starting with this instance of %, process all 157 * consecutive substrings of the form %xy. Each 158 * substring %xy will yield a byte. Convert all 159 * consecutive bytes obtained this way to whatever 160 * character(s) they represent in the provided 161 * encoding. 162 */ 163 164 try { 165 166 // (numChars-i)/3 is an upper bound for the number 167 // of remaining bytes 168 if (bytes == null) 169 bytes = new byte[(numChars-i)/3]; 170 int pos = 0; 171 172 while ( ((i+2) < numChars) && 173 (c=='%')) { 174 int v = Integer.parseInt(s, i + 1, i + 3, 16); 175 if (v < 0) 176 throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value"); 177 bytes[pos++] = (byte) v; 178 i+= 3; 179 if (i < numChars) 180 c = s.charAt(i); 181 } 182 183 // A trailing, incomplete byte encoding such as 184 // "%x" will cause an exception to be thrown 185 186 if ((i < numChars) && (c=='%')) 187 throw new IllegalArgumentException( 188 "URLDecoder: Incomplete trailing escape (%) pattern"); 189 190 sb.append(new String(bytes, 0, pos, enc)); 191 } catch (NumberFormatException e) { 192 throw new IllegalArgumentException( 193 "URLDecoder: Illegal hex characters in escape (%) pattern - " 194 + e.getMessage()); 195 } 196 needToChange = true; 197 break; 198 default: 199 sb.append(c); 200 i++; 201 break; 202 } 203 } 204 205 return (needToChange? sb.toString() : s); 206 } 207} 208