1#!/usr/bin/perl 2 3use strict; 4use warnings; 5 6use WgetFeature qw(iri); 7use HTTPTest; 8 9# cf. http://en.wikipedia.org/wiki/Latin1 10# http://en.wikipedia.org/wiki/ISO-8859-15 11 12############################################################################### 13# Force remote encoding to ISO-8859-1 14# 15# mime : charset found in Content-Type HTTP MIME header 16# meta : charset found in Content-Type meta tag 17# 18# index.html mime + file = iso-8859-15 19# p1_français.html meta + file = iso-8859-1, mime = utf-8 20# p2_één.html mime + file = iso-8859-1 21# p3_€€€.html meta + file = utf-8, mime = iso-8859-1 22# 23 24my $ccedilla_l15 = "\xE7"; 25my $ccedilla_u8 = "\xC3\xA7"; 26my $eacute_l1 = "\xE9"; 27my $eacute_u8 = "\xC3\xA9"; 28my $eurosign_l15 = "\xA4"; 29my $eurosign_u8 = "\xE2\x82\xAC"; 30my $currency_l1 = "\xA4"; 31my $currency_u8 = "\xC2\xA4"; 32 33my $pageindex = <<EOF; 34<html> 35<head> 36 <title>Main Page</title> 37</head> 38<body> 39 <p> 40 Link to page 1 <a href="http://localhost:{{port}}/p1_fran${ccedilla_l15}ais.html">La seule page en français</a>. 41 Link to page 3 <a href="http://localhost:{{port}}/p3_${eurosign_l15}${eurosign_l15}${eurosign_l15}.html">My tailor is rich</a>. 42 </p> 43</body> 44</html> 45EOF 46 47my $pagefrancais = <<EOF; 48<html> 49<head> 50 <title>La seule page en fran��ais</title> 51 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"/> 52</head> 53<body> 54 <p> 55 Link to page 2 <a href="http://localhost:{{port}}/p2_${eacute_l1}${eacute_l1}n.html">Die enkele nerderlangstalige pagina</a>. 56 </p> 57</body> 58</html> 59EOF 60 61my $pageeen = <<EOF; 62<html> 63<head> 64 <title>Die enkele nederlandstalige pagina</title> 65</head> 66<body> 67 <p> 68 Één is niet veel maar toch meer dan nul.<br/> 69 Nerdelands is een mooie taal... dit zin stuckje spreekt vanzelf, of niet :) 70 </p> 71</body> 72</html> 73EOF 74 75my $pageeuro = <<EOF; 76<html> 77<head> 78 <title>Euro page</title> 79</head> 80<body> 81 <p> 82 My tailor isn't rich anymore. 83 </p> 84</body> 85</html> 86EOF 87 88my $page404 = <<EOF; 89<html> 90<head> 91 <title>404</title> 92</head> 93<body> 94 <p> 95 Nop nop nop... 96 </p> 97</body> 98</html> 99EOF 100 101# code, msg, headers, content 102my %urls = ( 103 '/index.html' => { 104 code => "200", 105 msg => "Ok", 106 headers => { 107 "Content-type" => "text/html; charset=ISO-8859-15", 108 }, 109 content => $pageindex, 110 }, 111 '/robots.txt' => { 112 code => "200", 113 msg => "Ok", 114 headers => { 115 "Content-type" => "text/plain", 116 }, 117 content => "", 118 }, 119 '/p1_fran%C3%A7ais.html' => { # UTF-8 encoded 120 code => "404", 121 msg => "File not found", 122 headers => { 123 "Content-type" => "text/html; charset=UTF-8", 124 }, 125 content => $page404, 126 }, 127 '/p1_fran%E7ais.html' => { 128 code => "200", 129 msg => "Ok", 130 headers => { 131 "Content-type" => "text/html; charset=UTF-8", 132 }, 133 content => $pagefrancais, 134 }, 135 '/p2_%C3%A9%C3%A9n.html' => { # UTF-8 encoded 136 code => "200", 137 msg => "Ok", 138 headers => { 139 "Content-type" => "text/html; charset=UTF-8", 140 }, 141 content => $pageeen, 142 }, 143 '/p2_%E9%E9n.html' => { 144 code => "200", 145 msg => "Ok", 146 headers => { 147 "Content-type" => "text/html; charset=ISO-8859-1", 148 }, 149 content => $pageeen, 150 }, 151 '/p3_%E2%82%AC%E2%82%AC%E2%82%AC.html' => { # UTF-8 encoded 152 code => "200", 153 msg => "Ok", 154 headers => { 155 "Content-type" => "text/plain", 156 }, 157 content => $pageeuro, 158 }, 159 '/p3_%A4%A4%A4.html' => { 160 code => "200", 161 msg => "Ok", 162 headers => { 163 "Content-type" => "text/plain", 164 }, 165 content => $pageeuro, 166 }, 167 '/p3_%C2%A4%C2%A4%C2%A4.html' => { # UTF-8 encoded 168 code => "200", 169 msg => "Ok", 170 headers => { 171 "Content-type" => "text/plain", 172 }, 173 content => $pageeuro, 174 }, 175); 176 177my $cmdline = $WgetTest::WGETPATH . " --iri --remote-encoding=iso-8859-1 -nH -r http://localhost:{{port}}/"; 178 179my $expected_error_code = 0; 180 181my %expected_downloaded_files = ( 182 'index.html' => { 183 content => $pageindex, 184 }, 185 'robots.txt' => { 186 content => "", 187 }, 188 "p1_fran${ccedilla_l15}ais.html" => { 189 content => $pagefrancais, 190 }, 191 "p2_${eacute_u8}${eacute_u8}n.html" => { 192 content => $pageeen, 193 }, 194 "p3_${currency_u8}${currency_u8}${currency_u8}.html" => { 195 content => $pageeuro, 196 }, 197); 198 199############################################################################### 200 201my $the_test = HTTPTest->new (name => "Test-iri-forced-remote", 202 input => \%urls, 203 cmdline => $cmdline, 204 errcode => $expected_error_code, 205 output => \%expected_downloaded_files); 206exit $the_test->run(); 207 208# vim: et ts=4 sw=4 209 210