1/*
2 * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.net;
27
28import java.io.*;
29
30/**
31 * Utility class for HTML form decoding. This class contains static methods
32 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
33 * MIME format.
34 * <p>
35 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
36 * that all characters in the encoded string are one of the following:
37 * &quot;{@code a}&quot; through &quot;{@code z}&quot;,
38 * &quot;{@code A}&quot; through &quot;{@code Z}&quot;,
39 * &quot;{@code 0}&quot; through &quot;{@code 9}&quot;, and
40 * &quot;{@code -}&quot;, &quot;{@code _}&quot;,
41 * &quot;{@code .}&quot;, and &quot;{@code *}&quot;. The
42 * character &quot;{@code %}&quot; is allowed but is interpreted
43 * as the start of a special escaped sequence.
44 * <p>
45 * The following rules are applied in the conversion:
46 *
47 * <ul>
48 * <li>The alphanumeric characters &quot;{@code a}&quot; through
49 *     &quot;{@code z}&quot;, &quot;{@code A}&quot; through
50 *     &quot;{@code Z}&quot; and &quot;{@code 0}&quot;
51 *     through &quot;{@code 9}&quot; remain the same.
52 * <li>The special characters &quot;{@code .}&quot;,
53 *     &quot;{@code -}&quot;, &quot;{@code *}&quot;, and
54 *     &quot;{@code _}&quot; remain the same.
55 * <li>The plus sign &quot;{@code +}&quot; is converted into a
56 *     space character &quot; &nbsp; &quot; .
57 * <li>A sequence of the form "<i>{@code %xy}</i>" will be
58 *     treated as representing a byte where <i>xy</i> is the two-digit
59 *     hexadecimal representation of the 8 bits. Then, all substrings
60 *     that contain one or more of these byte sequences consecutively
61 *     will be replaced by the character(s) whose encoding would result
62 *     in those consecutive bytes.
63 *     The encoding scheme used to decode these characters may be specified,
64 *     or if unspecified, the default encoding of the platform will be used.
65 * </ul>
66 * <p>
67 * There are two possible ways in which this decoder could deal with
68 * illegal strings.  It could either leave illegal characters alone or
69 * it could throw an {@link java.lang.IllegalArgumentException}.
70 * Which approach the decoder takes is left to the
71 * implementation.
72 *
73 * @author  Mark Chamness
74 * @author  Michael McCloskey
75 * @since   1.2
76 */
77
78public class URLDecoder {
79
80    // The platform default encoding
81    static String dfltEncName = URLEncoder.dfltEncName;
82
83    /**
84     * Decodes a {@code x-www-form-urlencoded} string.
85     * The platform's default encoding is used to determine what characters
86     * are represented by any consecutive sequences of the form
87     * "<i>{@code %xy}</i>".
88     * @param s the {@code String} to decode
89     * @deprecated The resulting string may vary depending on the platform's
90     *          default encoding. Instead, use the decode(String,String) method
91     *          to specify the encoding.
92     * @return the newly decoded {@code String}
93     */
94    @Deprecated
95    public static String decode(String s) {
96
97        String str = null;
98
99        try {
100            str = decode(s, dfltEncName);
101        } catch (UnsupportedEncodingException e) {
102            // The system should always have the platform default
103        }
104
105        return str;
106    }
107
108    /**
109     * Decodes an {@code application/x-www-form-urlencoded} string using
110     * a specific encoding scheme.
111     * The supplied encoding is used to determine
112     * what characters are represented by any consecutive sequences of the
113     * form "<i>{@code %xy}</i>".
114     * <p>
115     * <em><strong>Note:</strong> The <a href=
116     * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
117     * World Wide Web Consortium Recommendation</a> states that
118     * UTF-8 should be used. Not doing so may introduce
119     * incompatibilities.</em>
120     *
121     * @param s the {@code String} to decode
122     * @param enc   The name of a supported
123     *    <a href="../lang/package-summary.html#charenc">character
124     *    encoding</a>.
125     * @return the newly decoded {@code String}
126     * @exception  UnsupportedEncodingException
127     *             If character encoding needs to be consulted, but
128     *             named character encoding is not supported
129     * @see URLEncoder#encode(java.lang.String, java.lang.String)
130     * @since 1.4
131     */
132    public static String decode(String s, String enc)
133        throws UnsupportedEncodingException{
134
135        boolean needToChange = false;
136        int numChars = s.length();
137        StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars);
138        int i = 0;
139
140        if (enc.length() == 0) {
141            throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
142        }
143
144        char c;
145        byte[] bytes = null;
146        while (i < numChars) {
147            c = s.charAt(i);
148            switch (c) {
149            case '+':
150                sb.append(' ');
151                i++;
152                needToChange = true;
153                break;
154            case '%':
155                /*
156                 * Starting with this instance of %, process all
157                 * consecutive substrings of the form %xy. Each
158                 * substring %xy will yield a byte. Convert all
159                 * consecutive  bytes obtained this way to whatever
160                 * character(s) they represent in the provided
161                 * encoding.
162                 */
163
164                try {
165
166                    // (numChars-i)/3 is an upper bound for the number
167                    // of remaining bytes
168                    if (bytes == null)
169                        bytes = new byte[(numChars-i)/3];
170                    int pos = 0;
171
172                    while ( ((i+2) < numChars) &&
173                            (c=='%')) {
174                        int v = Integer.parseInt(s, i + 1, i + 3, 16);
175                        if (v < 0)
176                            throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
177                        bytes[pos++] = (byte) v;
178                        i+= 3;
179                        if (i < numChars)
180                            c = s.charAt(i);
181                    }
182
183                    // A trailing, incomplete byte encoding such as
184                    // "%x" will cause an exception to be thrown
185
186                    if ((i < numChars) && (c=='%'))
187                        throw new IllegalArgumentException(
188                         "URLDecoder: Incomplete trailing escape (%) pattern");
189
190                    sb.append(new String(bytes, 0, pos, enc));
191                } catch (NumberFormatException e) {
192                    throw new IllegalArgumentException(
193                    "URLDecoder: Illegal hex characters in escape (%) pattern - "
194                    + e.getMessage());
195                }
196                needToChange = true;
197                break;
198            default:
199                sb.append(c);
200                i++;
201                break;
202            }
203        }
204
205        return (needToChange? sb.toString() : s);
206    }
207}
208