1#!/usr/bin/env perl 2 3use strict; 4use warnings; 5 6use WgetFeature qw(iri); 7use HTTPTest; 8 9# cf. http://en.wikipedia.org/wiki/Latin1 10# http://en.wikipedia.org/wiki/ISO-8859-15 11 12############################################################################### 13# Force remote encoding to ISO-8859-1 14# 15# mime : charset found in Content-Type HTTP MIME header 16# meta : charset found in Content-Type meta tag 17# 18# index.html mime + file = iso-8859-15 19# p1_fran��ais.html meta + file = iso-8859-1, mime = utf-8 20# p2_����n.html mime + file = iso-8859-1 21# p3_���������.html meta + file = utf-8, mime = iso-8859-1 22# 23 24my $ccedilla_l15 = "\xE7"; 25my $ccedilla_u8 = "\xC3\xA7"; 26my $eacute_l1 = "\xE9"; 27my $eacute_u8 = "\xC3\xA9"; 28my $eurosign_l15 = "\xA4"; 29my $eurosign_u8 = "\xE2\x82\xAC"; 30my $currency_l1 = "\xA4"; 31my $currency_u8 = "\xC2\xA4"; 32 33my $pageindex = <<EOF; 34<html> 35<head> 36 <title>Main Page</title> 37</head> 38<body> 39 <p> 40 Link to page 1 <a href="http://localhost:{{port}}/p1_fran${ccedilla_l15}ais.html">La seule page en français</a>. 41 Link to page 3 <a href="http://localhost:{{port}}/p3_${eurosign_l15}${eurosign_l15}${eurosign_l15}.html">My tailor is rich</a>. 42 </p> 43</body> 44</html> 45EOF 46 47my $pagefrancais = <<EOF; 48<html> 49<head> 50 <title>La seule page en fran??ais</title> 51 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/> 52</head> 53<body> 54 <p> 55 Link to page 2 <a href="http://localhost:{{port}}/p2_${eacute_l1}${eacute_l1}n.html">Die enkele nerderlangstalige pagina</a>. 56 </p> 57</body> 58</html> 59EOF 60 61my $pageeen = <<EOF; 62<html> 63<head> 64 <title>Die enkele nederlandstalige pagina</title> 65</head> 66<body> 67 <p> 68 Één is niet veel maar toch meer dan nul.<br/> 69 Nerdelands is een mooie taal... dit zin stuckje spreekt vanzelf, of niet :) 70 </p> 71</body> 72</html> 73EOF 74 75my $pageeuro = <<EOF; 76<html> 77<head> 78 <title>Euro page</title> 79</head> 80<body> 81 <p> 82 My tailor isn't rich anymore. 83 </p> 84</body> 85</html> 86EOF 87 88my $page404 = <<EOF; 89<html> 90<head> 91 <title>404</title> 92</head> 93<body> 94 <p> 95 Nop nop nop... 96 </p> 97</body> 98</html> 99EOF 100 101# code, msg, headers, content 102my %urls = ( 103 '/index.html' => { 104 code => "200", 105 msg => "Ok", 106 headers => { 107 "Content-type" => "text/html; charset=ISO-8859-15", 108 }, 109 content => $pageindex, 110 }, 111 '/robots.txt' => { 112 code => "200", 113 msg => "Ok", 114 headers => { 115 "Content-type" => "text/plain", 116 }, 117 content => "", 118 }, 119 '/p1_fran%C3%A7ais.html' => { # UTF-8 encoded 120 code => "200", 121 msg => "Ok", 122 headers => { 123 # wrong charset here, overridden by --remote-encoding=iso-8859-1 124 "Content-type" => "text/html; charset=UTF-8", 125 }, 126 content => $pagefrancais, 127 }, 128 '/p2_%C3%A9%C3%A9n.html' => { # UTF-8 encoded 129 code => "200", 130 msg => "Ok", 131 headers => { 132 "Content-type" => "text/html; charset=UTF-8", 133 }, 134 content => $pageeen, 135 }, 136 '/p3_%E2%82%AC%E2%82%AC%E2%82%AC.html' => { # UTF-8 encoded 137 code => "200", 138 msg => "Ok", 139 headers => { 140 "Content-type" => "text/plain", 141 }, 142 content => $pageeuro, 143 }, 144 '/p3_%C2%A4%C2%A4%C2%A4.html' => { # UTF-8 encoded 145 code => "200", 146 msg => "Ok", 147 headers => { 148 "Content-type" => "text/plain", 149 }, 150 content => $pageeuro, 151 }, 152); 153 154my $cmdline = $WgetTest::WGETPATH . " --iri -e robots=on --trust-server-names --remote-encoding=iso-8859-1 -nH -r http://localhost:{{port}}/"; 155 156my $expected_error_code = 0; 157 158my %expected_downloaded_files = ( 159 'index.html' => { 160 content => $pageindex, 161 }, 162 'robots.txt' => { 163 content => "", 164 }, 165 "p1_fran${ccedilla_u8}ais.html" => { 166 content => $pagefrancais, 167 }, 168 "p2_${eacute_u8}${eacute_u8}n.html" => { 169 content => $pageeen, 170 }, 171 "p3_${currency_u8}${currency_u8}${currency_u8}.html" => { 172 content => $pageeuro, 173 }, 174); 175 176############################################################################### 177 178my $the_test = HTTPTest->new (input => \%urls, 179 cmdline => $cmdline, 180 errcode => $expected_error_code, 181 output => \%expected_downloaded_files); 182exit $the_test->run(); 183 184# vim: et ts=4 sw=4 185