1#!/usr/bin/env perl
2
3use strict;
4use warnings;
5
6use HTTPTest;
7
8# This test checks that Wget parses "nofollow" when it appears in <meta
9# name="robots"> tags, regardless of where in a list of comma-separated
10# values it appears, and regardless of spelling.
11#
12# Three different files contain links to the file "bombshell.html", each
13# with "nofollow" set, at various positions in a list of values for a
14# <meta name="robots"> tag, and with various degrees of separating
15# whitesspace. If bombshell.html is downloaded, the test
16# has failed.
17
18###############################################################################
19
20my $nofollow_start = <<EOF;
21<meta name="roBoTS" content="noFolLow ,  foo, bar ">
22<a href="/bombshell.html">Don't follow me!</a>
23EOF
24
25my $nofollow_mid = <<EOF;
26<meta name="rObOts" content=" foo  ,  NOfOllow ,  bar ">
27<a href="/bombshell.html">Don't follow me!</a>
28EOF
29
30my $nofollow_end = <<EOF;
31<meta name="RoBotS" content="foo,BAr,   nofOLLOw    ">
32<a href="/bombshell.html">Don't follow me!</a>
33EOF
34
35my $nofollow_solo = <<EOF;
36<meta name="robots" content="nofollow">
37<a href="/bombshell.html">Don't follow me!</a>
38EOF
39
40# code, msg, headers, content
41my %urls = (
42    '/start.html' => {
43        code => "200",
44        msg => "Ok",
45        headers => {
46            "Content-type" => "text/html",
47        },
48        content => $nofollow_start,
49    },
50    '/mid.html' => {
51        code => "200",
52        msg => "Ok",
53        headers => {
54            "Content-type" => "text/html",
55        },
56        content => $nofollow_mid,
57    },
58    '/end.html' => {
59        code => "200",
60        msg => "Ok",
61        headers => {
62            "Content-type" => "text/html",
63        },
64        content => $nofollow_end,
65    },
66    '/solo.html' => {
67        code => "200",
68        msg => "Ok",
69        headers => {
70            "Content-type" => "text/html",
71        },
72        content => $nofollow_solo,
73    },
74    '/bombshell.html' => {
75        code => "200",
76        msg => "Ok",
77        headers => {
78            "Content-type" => "text/html",
79        },
80        content => 'Hello',
81    },
82);
83
84my $cmdline = $WgetTest::WGETPATH . " -r -nd "
85    . join(' ',(map "http://localhost:{{port}}/$_.html",
86                qw(start mid end solo)));
87
88my $expected_error_code = 0;
89
90my %expected_downloaded_files = (
91    'start.html' => {
92        content => $nofollow_start,
93    },
94    'mid.html' => {
95        content => $nofollow_mid,
96    },
97    'end.html' => {
98        content => $nofollow_end,
99    },
100    'solo.html' => {
101        content => $nofollow_solo,
102    }
103);
104
105###############################################################################
106
107my $the_test = HTTPTest->new (input => \%urls,
108                              cmdline => $cmdline,
109                              errcode => $expected_error_code,
110                              output => \%expected_downloaded_files);
111exit $the_test->run();
112
113# vim: et ts=4 sw=4
114