1#!/usr/bin/env perl 2 3use strict; 4use warnings; 5 6use WgetFeature qw(iri); 7use HTTPTest; 8 9# cf. http://en.wikipedia.org/wiki/Latin1 10# http://en.wikipedia.org/wiki/ISO-8859-15 11 12############################################################################### 13# 14# mime : charset found in Content-Type HTTP MIME header 15# meta : charset found in Content-Type meta tag 16# 17# index.html mime + file = iso-8859-15 18# p1_fran��ais.html meta + file = iso-8859-1, mime = utf-8 19# p2_����n.html meta + file = utf-8, mime =iso-8859-1 20# p3_���������.html meta + file = utf-8, mime = iso-8859-1 21# p4_m����r.html mime + file = utf-8 22# 23 24my $ccedilla_l15 = "\xE7"; 25my $ccedilla_u8 = "\xC3\xA7"; 26my $eacute_l1 = "\xE9"; 27my $eacute_u8 = "\xC3\xA9"; 28my $eurosign_l15 = "\xA4"; 29my $eurosign_u8 = "\xE2\x82\xAC"; 30 31my $pageindex = <<EOF; 32<html> 33<head> 34 <title>Main Page</title> 35</head> 36<body> 37 <p> 38 Link to page 1 <a href="http://localhost:{{port}}/p1_fran${ccedilla_l15}ais.html">La seule page en français</a>. 39 Link to page 3 <a href="http://localhost:{{port}}/p3_${eurosign_l15}${eurosign_l15}${eurosign_l15}.html">My tailor is rich</a>. 40 </p> 41</body> 42</html> 43EOF 44 45# specifying a wrong charset in http-equiv - it will be overridden by Content-Type HTTP header 46my $pagefrancais = <<EOF; 47<html> 48<head> 49 <title>La seule page en fran??ais</title> 50 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> 51</head> 52<body> 53 <p> 54 Link to page 2 <a href="http://localhost:{{port}}/p2_${eacute_l1}${eacute_l1}n.html">Die enkele nerderlangstalige pagina</a>. 55 </p> 56</body> 57</html> 58EOF 59 60my $pageeen = <<EOF; 61<html> 62<head> 63 <title>Die enkele nederlandstalige pagina</title> 64 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> 65</head> 66<body> 67 <p> 68 Één is niet veel maar toch meer dan nul.<br/> 69 Nerdelands is een mooie taal... dit zin stuckje spreekt vanzelf, of niet :)<br/> 70 <a href="http://localhost:{{port}}/p4_m${eacute_u8}${eacute_u8}r.html">Méér</a> 71 </p> 72</body> 73</html> 74EOF 75 76my $pageeuro = <<EOF; 77<html> 78<head> 79 <title>Euro page</title> 80 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> 81</head> 82<body> 83 <p> 84 My tailor isn't rich anymore. 85 </p> 86</body> 87</html> 88EOF 89 90my $pagemeer = <<EOF; 91<html> 92<head> 93 <title>Bekende supermarkt</title> 94</head> 95<body> 96 <p> 97 Ik ben toch niet gek ! 98 </p> 99</body> 100</html> 101EOF 102 103my $page404 = <<EOF; 104<html> 105<head> 106 <title>404</title> 107</head> 108<body> 109 <p> 110 Nop nop nop... 111 </p> 112</body> 113</html> 114EOF 115 116# code, msg, headers, content 117my %urls = ( 118 '/index.html' => { 119 code => "200", 120 msg => "Ok", 121 headers => { 122 "Content-type" => "text/html; charset=ISO-8859-15", 123 }, 124 content => $pageindex, 125 }, 126 '/robots.txt' => { 127 code => "200", 128 msg => "Ok", 129 headers => { 130 "Content-type" => "text/plain", 131 }, 132 content => "", 133 }, 134 '/p1_fran%C3%A7ais.html' => { # UTF-8 encoded 135 code => "200", 136 msg => "Ok", 137 headers => { 138 # Content-Type header overrides http-equiv Content-Type 139 "Content-type" => "text/html; charset=ISO-8859-15", 140 }, 141 content => $pagefrancais, 142 }, 143 '/p2_%C3%A9%C3%A9n.html' => { # UTF-8 encoded 144 code => "200", 145 msg => "Ok", 146 request_headers => { 147 "Referer" => qr|http://localhost:[0-9]+/p1_fran%C3%A7ais.html|, 148 }, 149 headers => { 150 "Content-type" => "text/html; charset=UTF-8", 151 }, 152 content => $pageeen, 153 }, 154 '/p3_%E2%82%AC%E2%82%AC%E2%82%AC.html' => { # UTF-8 encoded 155 code => "200", 156 msg => "Ok", 157 headers => { 158 "Content-type" => "text/plain; charset=ISO-8859-1", 159 }, 160 content => $pageeuro, 161 }, 162 '/p4_m%C3%A9%C3%A9r.html' => { 163 code => "200", 164 msg => "Ok", 165 request_headers => { 166 "Referer" => qr|http://localhost:[0-9]+/p2_%C3%A9%C3%A9n.html|, 167 }, 168 headers => { 169 "Content-type" => "text/plain; charset=UTF-8", 170 }, 171 content => $pagemeer, 172 }, 173); 174 175my $cmdline = $WgetTest::WGETPATH . " --iri --trust-server-names --restrict-file-names=nocontrol -nH -r http://localhost:{{port}}/"; 176 177my $expected_error_code = 0; 178 179my %expected_downloaded_files = ( 180 'index.html' => { 181 content => $pageindex, 182 }, 183 'robots.txt' => { 184 content => "", 185 }, 186 "p1_fran${ccedilla_u8}ais.html" => { 187 content => $pagefrancais, 188 }, 189 "p2_${eacute_u8}${eacute_u8}n.html" => { 190 content => $pageeen, 191 }, 192 "p3_${eurosign_u8}${eurosign_u8}${eurosign_u8}.html" => { 193 content => $pageeuro, 194 }, 195 "p4_m${eacute_u8}${eacute_u8}r.html" => { 196 content => $pagemeer, 197 }, 198); 199 200############################################################################### 201 202my $the_test = HTTPTest->new (input => \%urls, 203 cmdline => $cmdline, 204 errcode => $expected_error_code, 205 output => \%expected_downloaded_files); 206exit $the_test->run(); 207 208# vim: et ts=4 sw=4 209