1# -*- Mode: cperl; coding: utf-8; cperl-indent-level: 4 -*-
2# vim: ts=4 sts=4 sw=4:
3=head1 NAME
4
5CPAN::Mirrors - Get CPAN mirror information and select a fast one
6
7=head1 SYNOPSIS
8
9    use CPAN::Mirrors;
10
11    my $mirrors = CPAN::Mirrors->new( $mirrored_by_file );
12
13    my $seen = {};
14
15    my $best_continent = $mirrors->find_best_continents( { seen => $seen } );
16    my @mirrors        = $mirrors->get_mirrors_by_continents( $best_continent );
17
18    my $callback = sub {
19        my( $m ) = @_;
20        printf "%s = %s\n", $m->hostname, $m->rtt
21        };
22    $mirrors->get_mirrors_timings( \@mirrors, $seen, $callback, %args );
23
24    @mirrors = sort { $a->rtt <=> $b->rtt } @mirrors;
25
26    print "Best mirrors are ", map( { $_->rtt } @mirrors[0..3] ), "\n";
27
28=head1 DESCRIPTION
29
30=over
31
32=cut
33
34package CPAN::Mirrors;
35use strict;
36use vars qw($VERSION $urllist $silent);
37$VERSION = "2.27";
38
39use Carp;
40use FileHandle;
41use Fcntl ":flock";
42use Net::Ping ();
43use CPAN::Version;
44
45=item new( LOCAL_FILE_NAME )
46
47Create a new CPAN::Mirrors object from LOCAL_FILE_NAME. This file
48should look like that in http://www.cpan.org/MIRRORED.BY .
49
50=cut
51
52sub new {
53    my ($class, $file) = @_;
54    croak "CPAN::Mirrors->new requires a filename" unless defined $file;
55    croak "The file [$file] was not found" unless -e $file;
56
57    my $self = bless {
58        mirrors      => [],
59        geography    => {},
60    }, $class;
61
62    $self->parse_mirrored_by( $file );
63
64    return $self;
65}
66
67sub parse_mirrored_by {
68    my ($self, $file) = @_;
69    my $handle = FileHandle->new;
70    $handle->open($file)
71        or croak "Couldn't open $file: $!";
72    flock $handle, LOCK_SH;
73    $self->_parse($file,$handle);
74    flock $handle, LOCK_UN;
75    $handle->close;
76}
77
78=item continents()
79
80Return a list of continents based on those defined in F<MIRRORED.BY>.
81
82=cut
83
84sub continents {
85    my ($self) = @_;
86    return sort keys %{$self->{geography} || {}};
87}
88
89=item countries( [CONTINENTS] )
90
91Return a list of countries based on those defined in F<MIRRORED.BY>.
92It only returns countries for the continents you specify (as defined
93in C<continents>). If you don't specify any continents, it returns all
94of the countries listed in F<MIRRORED.BY>.
95
96=cut
97
98sub countries {
99    my ($self, @continents) = @_;
100    @continents = $self->continents unless @continents;
101    my @countries;
102    for my $c (@continents) {
103        push @countries, sort keys %{ $self->{geography}{$c} || {} };
104    }
105    return @countries;
106}
107
108=item mirrors( [COUNTRIES] )
109
110Return a list of mirrors based on those defined in F<MIRRORED.BY>.
111It only returns mirrors for the countries you specify (as defined
112in C<countries>). If you don't specify any countries, it returns all
113of the mirrors listed in F<MIRRORED.BY>.
114
115=cut
116
117sub mirrors {
118    my ($self, @countries) = @_;
119    return @{$self->{mirrors}} unless @countries;
120    my %wanted = map { $_ => 1 } @countries;
121    my @found;
122    for my $m (@{$self->{mirrors}}) {
123        push @found, $m if exists $wanted{$m->country};
124    }
125    return @found;
126}
127
128=item get_mirrors_by_countries( [COUNTRIES] )
129
130A more sensible synonym for mirrors.
131
132=cut
133
134sub get_mirrors_by_countries { &mirrors }
135
136=item get_mirrors_by_continents( [CONTINENTS] )
137
138Return a list of mirrors for all of continents you specify. If you don't
139specify any continents, it returns all of the mirrors.
140
141You can specify a single continent or an array reference of continents.
142
143=cut
144
145sub get_mirrors_by_continents {
146    my ($self, $continents ) = @_;
147    $continents = [ $continents ] unless ref $continents;
148
149    eval {
150        $self->mirrors( $self->get_countries_by_continents( @$continents ) );
151        };
152    }
153
154=item get_countries_by_continents( [CONTINENTS] )
155
156A more sensible synonym for countries.
157
158=cut
159
160sub get_countries_by_continents { &countries }
161
162=item default_mirror
163
164Returns the default mirror, http://www.cpan.org/ . This mirror uses
165dynamic DNS to give a close mirror.
166
167=cut
168
169sub default_mirror {
170    CPAN::Mirrored::By->new({ http => 'http://www.cpan.org/'});
171}
172
173=item best_mirrors
174
175C<best_mirrors> checks for the best mirrors based on the list of
176continents you pass, or, without that, all continents, as defined
177by C<CPAN::Mirrored::By>. It pings each mirror, up to the value of
178C<how_many>. In list context, it returns up to C<how_many> mirrors.
179In scalar context, it returns the single best mirror.
180
181Arguments
182
183    how_many      - the number of mirrors to return. Default: 1
184    callback      - a callback for find_best_continents
185    verbose       - true or false on all the whining and moaning. Default: false
186    continents    - an array ref of the continents to check
187    external_ping - if true, use external ping via Net::Ping::External. Default: false
188
189If you don't specify the continents, C<best_mirrors> calls
190C<find_best_continents> to get the list of continents to check.
191
192If you don't have L<Net::Ping> v2.13 or later, needed for timings,
193this returns the default mirror.
194
195C<external_ping> should be set and then C<Net::Ping::External> needs
196to be installed, if the local network has a transparent proxy.
197
198=cut
199
200sub best_mirrors {
201    my ($self, %args) = @_;
202    my $how_many      = $args{how_many} || 1;
203    my $callback      = $args{callback};
204    my $verbose       = defined $args{verbose} ? $args{verbose} : 0;
205    my $continents    = $args{continents} || [];
206       $continents    = [$continents] unless ref $continents;
207    $args{external_ping} = 0 unless defined $args{external_ping};
208    my $external_ping = $args{external_ping};
209
210    # Old Net::Ping did not do timings at all
211    my $min_version = '2.13';
212    unless( CPAN::Version->vgt(Net::Ping->VERSION, $min_version) ) {
213        carp sprintf "Net::Ping version is %s (< %s). Returning %s",
214            Net::Ping->VERSION, $min_version, $self->default_mirror;
215        return $self->default_mirror;
216    }
217
218    my $seen = {};
219
220    if ( ! @$continents ) {
221        print "Searching for the best continent ...\n" if $verbose;
222        my @best_continents = $self->find_best_continents(
223            seen          => $seen,
224            verbose       => $verbose,
225            callback      => $callback,
226            external_ping => $external_ping,
227            );
228
229        # Only add enough continents to find enough mirrors
230        my $count = 0;
231        for my $continent ( @best_continents ) {
232            push @$continents, $continent;
233            $count += $self->mirrors( $self->countries($continent) );
234            last if $count >= $how_many;
235        }
236    }
237
238    return $self->default_mirror unless @$continents;
239    print "Scanning " . join(", ", @$continents) . " ...\n" if $verbose;
240
241    my $trial_mirrors = $self->get_n_random_mirrors_by_continents( 3 * $how_many, $continents->[0] );
242
243    my $timings = $self->get_mirrors_timings(
244        $trial_mirrors,
245        $seen,
246        $callback,
247        %args,
248    );
249    return $self->default_mirror unless @$timings;
250
251    $how_many = @$timings if $how_many > @$timings;
252
253    return wantarray ? @{$timings}[0 .. $how_many-1] : $timings->[0];
254}
255
256=item get_n_random_mirrors_by_continents( N, [CONTINENTS] )
257
258Returns up to N random mirrors for the specified continents. Specify the
259continents as an array reference.
260
261=cut
262
263sub get_n_random_mirrors_by_continents {
264    my( $self, $n, $continents ) = @_;
265    $n ||= 3;
266    $continents = [ $continents ] unless ref $continents;
267
268    if ( $n <= 0 ) {
269        return wantarray ? () : [];
270    }
271
272    my @long_list = $self->get_mirrors_by_continents( $continents );
273
274    if ( $n eq '*' or $n > @long_list ) {
275        return wantarray ? @long_list : \@long_list;
276    }
277
278    @long_list = map  {$_->[0]}
279                 sort {$a->[1] <=> $b->[1]}
280                 map  {[$_, rand]} @long_list;
281
282    splice @long_list, $n; # truncate
283
284    \@long_list;
285}
286
287=item get_mirrors_timings( MIRROR_LIST, SEEN, CALLBACK, %ARGS );
288
289Pings the listed mirrors and returns a list of mirrors sorted in
290ascending ping times.
291
292C<MIRROR_LIST> is an anonymous array of C<CPAN::Mirrored::By> objects to
293ping.
294
295The optional argument C<SEEN> is a hash reference used to track the
296mirrors you've already pinged.
297
298The optional argument C<CALLBACK> is a subroutine reference to call
299after each ping. It gets the C<CPAN::Mirrored::By> object after each
300ping.
301
302=cut
303
304sub get_mirrors_timings {
305    my( $self, $mirror_list, $seen, $callback, %args ) = @_;
306
307    $seen = {} unless defined $seen;
308    croak "The mirror list argument must be an array reference"
309        unless ref $mirror_list eq ref [];
310    croak "The seen argument must be a hash reference"
311        unless ref $seen eq ref {};
312    croak "callback must be a subroutine"
313        if( defined $callback and ref $callback ne ref sub {} );
314
315    my $timings = [];
316    for my $m ( @$mirror_list ) {
317        $seen->{$m->hostname} = $m;
318        next unless eval{ $m->http };
319
320        if( $self->_try_a_ping( $seen, $m, ) ) {
321            my $ping = $m->ping(%args);
322            next unless defined $ping;
323            # printf "m %s ping %s\n", $m, $ping;
324            push @$timings, $m;
325            $callback->( $m ) if $callback;
326        }
327        else {
328            push @$timings, $seen->{$m->hostname}
329                if defined $seen->{$m->hostname}->rtt;
330        }
331    }
332
333    my @best = sort {
334           if( defined $a->rtt and defined $b->rtt )     {
335            $a->rtt <=> $b->rtt
336            }
337        elsif( defined $a->rtt and ! defined $b->rtt )   {
338            return -1;
339            }
340        elsif( ! defined $a->rtt and defined $b->rtt )   {
341            return 1;
342            }
343        elsif( ! defined $a->rtt and ! defined $b->rtt ) {
344            return 0;
345            }
346
347        } @$timings;
348
349    return wantarray ? @best : \@best;
350}
351
352=item find_best_continents( HASH_REF );
353
354C<find_best_continents> goes through each continent and pings C<N>
355random mirrors on that continent. It then orders the continents by
356ascending median ping time. In list context, it returns the ordered list
357of continent. In scalar context, it returns the same list as an
358anonymous array.
359
360Arguments:
361
362    n        - the number of hosts to ping for each continent. Default: 3
363    seen     - a hashref of cached hostname ping times
364    verbose  - true or false for noisy or quiet. Default: false
365    callback - a subroutine to run after each ping.
366    ping_cache_limit - how long, in seconds, to reuse previous ping times.
367        Default: 1 day
368
369The C<seen> hash has hostnames as keys and anonymous arrays as values.
370The anonymous array is a triplet of a C<CPAN::Mirrored::By> object, a
371ping time, and the epoch time for the measurement.
372
373The callback subroutine gets the C<CPAN::Mirrored::By> object, the ping
374time, and measurement time (the same things in the C<seen> hashref) as
375arguments. C<find_best_continents> doesn't care what the callback does
376and ignores the return value.
377
378With a low value for C<N>, a single mirror might skew the results enough
379to choose a worse continent. If you have that problem, try a larger
380value.
381
382=cut
383
384sub find_best_continents {
385    my ($self, %args) = @_;
386
387    $args{n}     ||= 3;
388    $args{verbose} = 0 unless defined $args{verbose};
389    $args{seen}    = {} unless defined $args{seen};
390    croak "The seen argument must be a hash reference"
391        unless ref $args{seen} eq ref {};
392    $args{ping_cache_limit} = 24 * 60 * 60
393        unless defined $args{ping_cache_limit};
394    croak "callback must be a subroutine"
395        if( defined $args{callback} and ref $args{callback} ne ref sub {} );
396
397    my %medians;
398    CONT: for my $c ( $self->continents ) {
399        my @mirrors = $self->mirrors( $self->countries($c) );
400        printf "Testing %s (%d mirrors)\n", $c, scalar @mirrors
401            if $args{verbose};
402
403        next CONT unless @mirrors;
404        my $n = (@mirrors < $args{n}) ? @mirrors : $args{n};
405
406        my @tests;
407        my $tries = 0;
408        RANDOM: while ( @mirrors && @tests < $n && $tries++ < 15 ) {
409            my $m = splice( @mirrors, int(rand(@mirrors)), 1 );
410            if( $self->_try_a_ping(
411                    $args{seen}, $m, $args{ping_cache_limit}
412                )) {
413                $self->get_mirrors_timings(
414                    [ $m ],
415                    $args{seen},
416                    $args{callback},
417                    %args,
418                );
419                next RANDOM unless defined $args{seen}{$m->hostname}->rtt;
420            }
421            printf "(%s -> %0.2f ms)",
422                $m->hostname,
423                join ' ', 1000 * $args{seen}{$m->hostname}->rtt
424                    if $args{verbose};
425
426            push @tests, $args{seen}{$m->hostname}->rtt;
427        }
428
429        my $median = $self->_get_median_ping_time( \@tests, $args{verbose} );
430        $medians{$c} = $median if defined $median;
431    }
432
433    my @best_cont = sort { $medians{$a} <=> $medians{$b} } keys %medians;
434
435    if ( $args{verbose} ) {
436        print "Median result by continent:\n";
437        if ( @best_cont ) {
438            for my $c ( @best_cont ) {
439                printf( "  %7.2f ms  %s\n", $medians{$c}*1000, $c );
440            }
441        } else {
442            print "  **** No results found ****\n"
443        }
444    }
445
446    return wantarray ? @best_cont : $best_cont[0];
447}
448
449# retry if
450sub _try_a_ping {
451    my ($self, $seen, $mirror, $ping_cache_limit ) = @_;
452
453    ( ! exists $seen->{$mirror->hostname}
454        or
455    ! defined $seen->{$mirror->hostname}->rtt
456      or
457    ! defined $ping_cache_limit
458      or
459      time - $seen->{$mirror->hostname}->ping_time
460        > $ping_cache_limit
461    )
462}
463
464sub _get_median_ping_time {
465    my ($self, $tests, $verbose ) = @_;
466
467    my @sorted = sort { $a <=> $b } @$tests;
468
469    my $median = do {
470           if ( @sorted == 0 ) { undef }
471        elsif ( @sorted == 1 ) { $sorted[0] }
472        elsif ( @sorted % 2 )  { $sorted[ int(@sorted / 2) ] }
473        else {
474            my $mid_high = int(@sorted/2);
475            ($sorted[$mid_high-1] + $sorted[$mid_high])/2;
476        }
477    };
478
479    if ($verbose){
480        if ($median) {
481            printf " => median time: %.2f ms\n", $median * 1000
482        } else {
483            printf " => **** no median time ****\n";
484        }
485    }
486
487    return $median;
488}
489
490# Adapted from Parse::CPAN::MirroredBy by Adam Kennedy
491sub _parse {
492    my ($self, $file, $handle) = @_;
493    my $output = $self->{mirrors};
494    my $geo    = $self->{geography};
495
496    local $/ = "\012";
497    my $line = 0;
498    my $mirror = undef;
499    while ( 1 ) {
500        # Next line
501        my $string = <$handle>;
502        last if ! defined $string;
503        $line = $line + 1;
504
505        # Remove the useless lines
506        chomp( $string );
507        next if $string =~ /^\s*$/;
508        next if $string =~ /^\s*#/;
509
510        # Hostname or property?
511        if ( $string =~ /^\s/ ) {
512            # Property
513            unless ( $string =~ /^\s+(\w+)\s+=\s+\"(.*)\"$/ ) {
514                croak("Invalid property on line $line");
515            }
516            my ($prop, $value) = ($1,$2);
517            $mirror ||= {};
518            if ( $prop eq 'dst_location' ) {
519                my (@location,$continent,$country);
520                @location = (split /\s*,\s*/, $value)
521                    and ($continent, $country) = @location[-1,-2];
522                $continent =~ s/\s\(.*//;
523                $continent =~ s/\W+$//; # if Jarkko doesn't know latitude/longitude
524                $geo->{$continent}{$country} = 1 if $continent && $country;
525                $mirror->{continent} = $continent || "unknown";
526                $mirror->{country} = $country || "unknown";
527            }
528            elsif ( $prop eq 'dst_http' ) {
529                $mirror->{http} = $value;
530            }
531            elsif ( $prop eq 'dst_ftp' ) {
532                $mirror->{ftp} = $value;
533            }
534            elsif ( $prop eq 'dst_rsync' ) {
535                $mirror->{rsync} = $value;
536            }
537            else {
538                $prop =~ s/^dst_//;
539                $mirror->{$prop} = $value;
540            }
541        } else {
542            # Hostname
543            unless ( $string =~ /^([\w\.-]+)\:\s*$/ ) {
544                croak("Invalid host name on line $line");
545            }
546            my $current = $mirror;
547            $mirror     = { hostname => "$1" };
548            if ( $current ) {
549                push @$output, CPAN::Mirrored::By->new($current);
550            }
551        }
552    }
553    if ( $mirror ) {
554        push @$output, CPAN::Mirrored::By->new($mirror);
555    }
556
557    return;
558}
559
560#--------------------------------------------------------------------------#
561
562package CPAN::Mirrored::By;
563use strict;
564use Net::Ping   ();
565
566sub new {
567    my($self,$arg) = @_;
568    $arg ||= {};
569    bless $arg, $self;
570}
571sub hostname  { shift->{hostname}    }
572sub continent { shift->{continent}   }
573sub country   { shift->{country}     }
574sub http      { shift->{http}  || '' }
575sub ftp       { shift->{ftp}   || '' }
576sub rsync     { shift->{rsync} || '' }
577sub rtt       { shift->{rtt}         }
578sub ping_time { shift->{ping_time}   }
579
580sub url {
581    my $self = shift;
582    return $self->{http} || $self->{ftp};
583}
584
585sub ping {
586    my($self, %args) = @_;
587
588    my $external_ping = $args{external_ping};
589    if ($external_ping) {
590        eval { require Net::Ping::External }
591            or die "Net::Ping::External required to use external ping command";
592    }
593    my $ping = Net::Ping->new(
594        $external_ping ? 'external' : $^O eq 'VMS' ? 'icmp' : 'tcp',
595        1
596    );
597    my ($proto) = $self->url =~ m{^([^:]+)};
598    my $port = $proto eq 'http' ? 80 : 21;
599    return unless $port;
600
601    if ( $ping->can('port_number') ) {
602        $ping->port_number($port);
603    }
604    else {
605        $ping->{'port_num'} = $port;
606    }
607
608    $ping->hires(1) if $ping->can('hires');
609    my ($alive,$rtt) = eval { $ping->ping($self->hostname); };
610    my $verbose = $args{verbose};
611    if ($verbose && !$alive) {
612        printf "(host %s not alive)", $self->hostname;
613    }
614
615    $self->{rtt} = $alive ? $rtt : undef;
616    $self->{ping_time} = time;
617
618    $self->rtt;
619}
620
621
6221;
623
624=back
625
626=head1 AUTHOR
627
628Andreas Koenig C<< <andk@cpan.org> >>, David Golden C<< <dagolden@cpan.org> >>,
629brian d foy C<< <bdfoy@cpan.org> >>
630
631=head1 LICENSE
632
633This program is free software; you can redistribute it and/or
634modify it under the same terms as Perl itself.
635
636See L<http://www.perl.com/perl/misc/Artistic.html>
637
638=cut
639