HTTP.pm revision 1.7
1#! /usr/bin/perl 2# ex:ts=8 sw=4: 3# $OpenBSD: HTTP.pm,v 1.7 2011/07/18 20:47:28 espie Exp $ 4# 5# Copyright (c) 2011 Marc Espie <espie@openbsd.org> 6# 7# Permission to use, copy, modify, and distribute this software for any 8# purpose with or without fee is hereby granted, provided that the above 9# copyright notice and this permission notice appear in all copies. 10# 11# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 19use strict; 20use warnings; 21 22package OpenBSD::Repository::HTTP; 23sub urlscheme 24{ 25 return 'http'; 26} 27 28sub initiate 29{ 30 my $self = shift; 31 my ($rdfh, $wrfh); 32 pipe($self->{getfh}, $rdfh); 33 pipe($wrfh, $self->{cmdfh}); 34 my $pid = fork(); 35 if ($pid == 0) { 36 close($self->{getfh}); 37 close($self->{cmdfh}); 38 close(STDOUT); 39 close(STDIN); 40 open(STDOUT, '>&', $wrfh); 41 open(STDIN, '<&', $rdfh); 42 _Proxy::main($self); 43 } else { 44 close($rdfh); 45 close($wrfh); 46 $self->{controller} = $pid; 47 } 48} 49 50package _Proxy::Connection; 51sub new 52{ 53 my ($class, $host, $port) = @_; 54 require IO::Socket::INET; 55 my $o = IO::Socket::INET->new( 56 PeerHost => $host, 57 PeerPort => $port); 58 bless {fh => $o, host => $host, buffer => ''}, $class; 59} 60 61sub getline 62{ 63 my $self = shift; 64 while (1) { 65 if ($self->{buffer} =~ s/^(.*?)\015\012//) { 66 return $1; 67 } 68 my $buffer; 69 $self->{fh}->recv($buffer, 1024); 70 $self->{buffer}.=$buffer; 71 } 72} 73 74sub retrieve 75{ 76 my ($self, $sz) = @_; 77 while(length($self->{buffer}) < $sz) { 78 my $buffer; 79 $self->{fh}->recv($buffer, $sz - length($self->{buffer})); 80 $self->{buffer}.=$buffer; 81 } 82 my $result= substr($self->{buffer}, 0, $sz); 83 $self->{buffer} = substr($self->{buffer}, $sz); 84 return $result; 85} 86 87sub retrieve_chunked 88{ 89 my $self = shift; 90 my $result = ''; 91 while (1) { 92 my $sz = $self->getline; 93 if ($sz =~ m/^([0-9a-fA-F]+)/) { 94 my $realsize = hex($1); 95 last if $realsize == 0; 96 $result .= $self->retrieve($realsize); 97 } 98 } 99 return $result; 100} 101 102sub retrieve_response 103{ 104 my ($self, $h) = @_; 105 106 if (defined $h->{'Content-Length'}) { 107 return $self->retrieve($h->{'Content-Length'}); 108 } 109 if (($h->{'Transfer-Encoding'}//'') eq 'chunked') { 110 return $self->retrieve_chunked; 111 } 112 return undef; 113} 114 115sub print 116{ 117 my ($self, @l) = @_; 118 print {$self->{fh}} @l; 119} 120 121package _Proxy; 122 123my $pid; 124my $token = 0; 125 126sub batch(&) 127{ 128 my $code = shift; 129 if (defined $pid) { 130 waitpid($pid, 0); 131 undef $pid; 132 } 133 $token++; 134 $pid = fork(); 135 if (!defined $pid) { 136 print "ERROR: fork failed: $!\n"; 137 } 138 if ($pid == 0) { 139 &$code(); 140 exit(0); 141 } 142} 143 144sub abort_batch() 145{ 146 if (defined $pid) { 147 kill 1, $pid; 148 waitpid($pid, 0); 149 undef $pid; 150 } 151 print "\nABORTED $token\n"; 152} 153 154sub get_directory 155{ 156 my ($o, $dname) = @_; 157 my $crlf="\015\012"; 158 $o->print("GET $dname/ HTTP/1.1", $crlf, 159 "Host: ", $o->{host}, $crlf, $crlf); 160 # get header 161 162 my $_ = $o->getline; 163 if (!m,^HTTP/1\.1\s+(\d\d\d),) { 164 print "ERROR\n"; 165 return; 166 } 167 my $code = $1; 168 my $h = {}; 169 while ($_ = $o->getline) { 170 last if m/^$/; 171 if (m/^([\w\-]+)\:\s*(.*)$/) { 172 print STDERR "$1 => $2\n"; 173 $h->{$1} = $2; 174 } else { 175 print STDERR "unknown line: $_\n"; 176 } 177 } 178 my $r = $o->retrieve_response($h); 179 if (!defined $r) { 180 print "ERROR: can't decode response\n"; 181 } 182 if ($code != 200) { 183 print "ERROR: code was $code\n"; 184 return; 185 } 186 print "SUCCESS: directory $dname\n"; 187 for my $pkg ($r =~ m/\<A\s+HREF=\"(.+?)\.tgz\"\>/gio) { 188 $pkg = $1 if $pkg =~ m|^.*/(.*)$|; 189 # decode uri-encoding; from URI::Escape 190 $pkg =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; 191 print $pkg, "\n"; 192 } 193 print "\n"; 194 return; 195} 196 197sub get_file 198{ 199 my ($o, $fname) = @_; 200 201 my $crlf="\015\012"; 202 my $first = 1; 203 my $start = 0; 204 my $end = 4000; 205 my $total_size = 0; 206 207 do { 208 $o->print("GET $fname HTTP/1.1", $crlf, 209 "Host: ", $o->{host}, $crlf, 210 "Range: bytes=",$start, "-", $end-1, $crlf, $crlf); 211 # get header 212 213 my $_ = $o->getline; 214 if (!m,^HTTP/1\.1\s+(\d\d\d),) { 215 print "ERROR\n"; 216 return; 217 } 218 $end *= 2; 219 my $code = $1; 220 my $h = {}; 221 while ($_ = $o->getline) { 222 last if m/^$/; 223 if (m/^([\w\-]+)\:\s*(.*)$/) { 224 print STDERR "$1 => $2\n"; 225 $h->{$1} = $2; 226 } else { 227 print STDERR "unknown line: $_\n"; 228 } 229 } 230 if (defined $h->{'Content-Range'} && $h->{'Content-Range'} =~ 231 m/^bytes\s+\d+\-\d+\/(\d+)/) { 232 $total_size = $1; 233 } 234 if ($first) { 235 print "TRANSFER: $total_size\n"; 236 $first = 0; 237 } 238 my $r = $o->retrieve_response($h); 239 if (!defined $r) { 240 print "ERROR: can't decode response\n"; 241 } 242 if ($code != 200 && $code != 206) { 243 print "ERROR: code was $code\n"; 244 return; 245 } 246 print $r; 247 $start = $end; 248 } while ($end < $total_size); 249} 250 251sub main 252{ 253 my $self = shift; 254 my $o = _Proxy::Connection->new($self->{host}, "www"); 255 while (<STDIN>) { 256 chomp; 257 if (m/^LIST\s+(.*)$/o) { 258 my $dname = $1; 259 batch(sub {get_directory($o, $dname);}); 260 } elsif (m/^GET\s+(.*)$/o) { 261 my $fname = $1; 262 batch(sub { get_file($o, $fname);}); 263 } elsif (m/^BYE$/o) { 264 exit(0); 265 } elsif (m/^ABORT$/o) { 266 abort_batch(); 267 } else { 268 print "ERROR: Unknown command\n"; 269 } 270 } 271} 272 273 274sub todo 275{ 276 my ($o, $file) = @_; 277 my $crlf="\015\012"; 278 open my $fh, '>', $file; 279 280 my $start = 0; 281 my $end = 4000; 282 my $total_size = 0; 283 284 do { 285 $end *= 2; 286 $o->print("GET /pub/OpenBSD/snapshots/packages/amd64/$file HTTP/1.1$crlf", 287 "Host: www.w3.org$crlf", 288 "Range: bytes=",$start, "-", $end-1, $crlf, $crlf); 289 290 # get header 291 292 my $_ = $o->getline; 293 if (m,^HTTP/1\.1\s+(\d\d\d),) { 294 my $code = $1; 295 print "Code: $code\n"; 296 } else { 297 print $_, "\n"; 298 } 299 my $h = {}; 300 while ($_ = $o->getline) { 301 last if m/^$/; 302 if (m/^([\w\-]+)\:\s*(.*)$/) { 303 print "$1 => $2\n"; 304 $h->{$1} = $2; 305 } else { 306 print "unknown line: $_\n"; 307 } 308 } 309 310 if (defined $h->{'Content-Range'} && $h->{'Content-Range'} =~ 311 m/^bytes\s+\d+\-\d+\/(\d+)/) { 312 $total_size = $1; 313 } 314 print "END OF HEADER\n"; 315 316 if (defined $h->{'Content-Length'}) { 317 my $v = $o->retrieve($h->{'Content-Length'}); 318 print $fh $v; 319 } 320 $start = $end; 321 } while ($end < $total_size); 322} 323 3241; 325