1/* 2 * Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24/* 25 * @test 26 * @bug 4396708 27 * @summary Test URL encoder and decoder on a string that contains 28 * surrogate pairs. 29 * 30 */ 31 32import java.io.*; 33import java.net.*; 34 35/* 36 * Surrogate pairs are two character Unicode sequences where the first 37 * character lies in the range [d800, dbff] and the second character lies 38 * in the range [dc00, dfff]. They are used as an escaping mechanism to add 39 * 1M more characters to Unicode. 40 */ 41public class SurrogatePairs { 42 43 static String[] testStrings = {"\uD800\uDC00", 44 "\uD800\uDFFF", 45 "\uDBFF\uDC00", 46 "\uDBFF\uDFFF", 47 "1\uDBFF\uDC00", 48 "@\uDBFF\uDC00", 49 "\uDBFF\uDC001", 50 "\uDBFF\uDC00@", 51 "\u0101\uDBFF\uDC00", 52 "\uDBFF\uDC00\u0101" 53 }; 54 55 static String[] correctEncodings = {"%F0%90%80%80", 56 "%F0%90%8F%BF", 57 "%F4%8F%B0%80", 58 "%F4%8F%BF%BF", 59 "1%F4%8F%B0%80", 60 "%40%F4%8F%B0%80", 61 "%F4%8F%B0%801", 62 "%F4%8F%B0%80%40", 63 "%C4%81%F4%8F%B0%80", 64 "%F4%8F%B0%80%C4%81" 65 }; 66 67 public static void main(String[] args) throws Exception { 68 69 for (int i=0; i < testStrings.length; i++) { 70 test(testStrings[i], correctEncodings[i]); 71 } 72 } 73 74 private static void test(String str, String correctEncoding) 75 throws Exception { 76 77 System.out.println("Unicode bytes of test string are: " 78 + getHexBytes(str)); 79 80 String encoded = URLEncoder.encode(str, "UTF-8"); 81 82 System.out.println("URLEncoding is: " + encoded); 83 84 if (encoded.equals(correctEncoding)) 85 System.out.println("The encoding is correct!"); 86 else { 87 throw new Exception("The encoding is incorrect!" + 88 " It should be " + correctEncoding); 89 } 90 91 String decoded = URLDecoder.decode(encoded, "UTF-8"); 92 93 System.out.println("Unicode bytes for URLDecoding are: " 94 + getHexBytes(decoded)); 95 96 if (str.equals(decoded)) 97 System.out.println("The decoding is correct"); 98 else { 99 throw new Exception("The decoded is not equal to the original"); 100 } 101 System.out.println("---"); 102 } 103 104 private static String getHexBytes(String s) throws Exception { 105 StringBuffer sb = new StringBuffer(); 106 for (int i = 0; i < s.length(); i++) { 107 108 int a = s.charAt(i); 109 int b1 = (a >>8) & 0xff; 110 int b2 = (byte)a; 111 int b11 = (b1>>4) & 0x0f; 112 int b12 = b1 & 0x0f; 113 int b21 = (b2 >>4) & 0x0f; 114 int b22 = b2 & 0x0f; 115 116 sb.append(Integer.toHexString(b11)); 117 sb.append(Integer.toHexString(b12)); 118 sb.append(Integer.toHexString(b21)); 119 sb.append(Integer.toHexString(b22)); 120 sb.append(' '); 121 } 122 return sb.toString(); 123 } 124 125} 126