1#!/usr/bin/env perl 2 3use strict; 4use warnings; 5 6use HTTPTest; 7 8# This test checks that Wget parses "nofollow" when it appears in <meta 9# name="robots"> tags, regardless of where in a list of comma-separated 10# values it appears, and regardless of spelling. 11# 12# Three different files contain links to the file "bombshell.html", each 13# with "nofollow" set, at various positions in a list of values for a 14# <meta name="robots"> tag, and with various degrees of separating 15# whitesspace. If bombshell.html is downloaded, the test 16# has failed. 17 18############################################################################### 19 20my $nofollow_start = <<EOF; 21<meta name="roBoTS" content="noFolLow , foo, bar "> 22<a href="/bombshell.html">Don't follow me!</a> 23EOF 24 25my $nofollow_mid = <<EOF; 26<meta name="rObOts" content=" foo , NOfOllow , bar "> 27<a href="/bombshell.html">Don't follow me!</a> 28EOF 29 30my $nofollow_end = <<EOF; 31<meta name="RoBotS" content="foo,BAr, nofOLLOw "> 32<a href="/bombshell.html">Don't follow me!</a> 33EOF 34 35my $nofollow_solo = <<EOF; 36<meta name="robots" content="nofollow"> 37<a href="/bombshell.html">Don't follow me!</a> 38EOF 39 40# code, msg, headers, content 41my %urls = ( 42 '/start.html' => { 43 code => "200", 44 msg => "Ok", 45 headers => { 46 "Content-type" => "text/html", 47 }, 48 content => $nofollow_start, 49 }, 50 '/mid.html' => { 51 code => "200", 52 msg => "Ok", 53 headers => { 54 "Content-type" => "text/html", 55 }, 56 content => $nofollow_mid, 57 }, 58 '/end.html' => { 59 code => "200", 60 msg => "Ok", 61 headers => { 62 "Content-type" => "text/html", 63 }, 64 content => $nofollow_end, 65 }, 66 '/solo.html' => { 67 code => "200", 68 msg => "Ok", 69 headers => { 70 "Content-type" => "text/html", 71 }, 72 content => $nofollow_solo, 73 }, 74 '/bombshell.html' => { 75 code => "200", 76 msg => "Ok", 77 headers => { 78 "Content-type" => "text/html", 79 }, 80 content => 'Hello', 81 }, 82); 83 84my $cmdline = $WgetTest::WGETPATH . " -r -nd " 85 . join(' ',(map "http://localhost:{{port}}/$_.html", 86 qw(start mid end solo))); 87 88my $expected_error_code = 0; 89 90my %expected_downloaded_files = ( 91 'start.html' => { 92 content => $nofollow_start, 93 }, 94 'mid.html' => { 95 content => $nofollow_mid, 96 }, 97 'end.html' => { 98 content => $nofollow_end, 99 }, 100 'solo.html' => { 101 content => $nofollow_solo, 102 } 103); 104 105############################################################################### 106 107my $the_test = HTTPTest->new (input => \%urls, 108 cmdline => $cmdline, 109 errcode => $expected_error_code, 110 output => \%expected_downloaded_files); 111exit $the_test->run(); 112 113# vim: et ts=4 sw=4 114