HTTP.pm revision 1.7
1#! /usr/bin/perl
2# ex:ts=8 sw=4:
3# $OpenBSD: HTTP.pm,v 1.7 2011/07/18 20:47:28 espie Exp $
4#
5# Copyright (c) 2011 Marc Espie <espie@openbsd.org>
6#
7# Permission to use, copy, modify, and distribute this software for any
8# purpose with or without fee is hereby granted, provided that the above
9# copyright notice and this permission notice appear in all copies.
10#
11# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
19use strict;
20use warnings;
21
22package OpenBSD::Repository::HTTP;
23sub urlscheme
24{
25	return 'http';
26}
27
28sub initiate
29{
30	my $self = shift;
31	my ($rdfh, $wrfh);
32	pipe($self->{getfh}, $rdfh);
33	pipe($wrfh, $self->{cmdfh});
34	my $pid = fork();
35	if ($pid == 0) {
36		close($self->{getfh});
37		close($self->{cmdfh});
38		close(STDOUT);
39		close(STDIN);
40		open(STDOUT, '>&', $wrfh);
41		open(STDIN, '<&', $rdfh);
42		_Proxy::main($self);
43	} else {
44		close($rdfh);
45		close($wrfh);
46		$self->{controller} = $pid;
47	}
48}
49
50package _Proxy::Connection;
51sub new
52{
53	my ($class, $host, $port) = @_;
54	require IO::Socket::INET;
55	my $o = IO::Socket::INET->new(
56		PeerHost => $host,
57		PeerPort => $port);
58	bless {fh => $o, host => $host, buffer => ''}, $class;
59}
60
61sub getline
62{
63	my $self = shift;
64	while (1) {
65		if ($self->{buffer} =~ s/^(.*?)\015\012//) {
66			return $1;
67		}
68		my $buffer;
69		$self->{fh}->recv($buffer, 1024);
70		$self->{buffer}.=$buffer;
71    	}
72}
73
74sub retrieve
75{
76	my ($self, $sz) = @_;
77	while(length($self->{buffer}) < $sz) {
78		my $buffer;
79		$self->{fh}->recv($buffer, $sz - length($self->{buffer}));
80		$self->{buffer}.=$buffer;
81	}
82	my $result= substr($self->{buffer}, 0, $sz);
83	$self->{buffer} = substr($self->{buffer}, $sz);
84	return $result;
85}
86
87sub retrieve_chunked
88{
89	my $self = shift;
90	my $result = '';
91	while (1) {
92		my $sz = $self->getline;
93		if ($sz =~ m/^([0-9a-fA-F]+)/) {
94			my $realsize = hex($1);
95			last if $realsize == 0;
96			$result .= $self->retrieve($realsize);
97		}
98	}
99	return $result;
100}
101
102sub retrieve_response
103{
104	my ($self, $h) = @_;
105
106	if (defined $h->{'Content-Length'}) {
107		return $self->retrieve($h->{'Content-Length'});
108	}
109	if (($h->{'Transfer-Encoding'}//'') eq 'chunked') {
110		return $self->retrieve_chunked;
111	}
112	return undef;
113}
114
115sub print
116{
117	my ($self, @l) = @_;
118	print {$self->{fh}} @l;
119}
120
121package _Proxy;
122
123my $pid;
124my $token = 0;
125
126sub batch(&)
127{
128	my $code = shift;
129	if (defined $pid) {
130		waitpid($pid, 0);
131		undef $pid;
132	}
133	$token++;
134	$pid = fork();
135	if (!defined $pid) {
136		print "ERROR: fork failed: $!\n";
137	}
138	if ($pid == 0) {
139		&$code();
140		exit(0);
141	}
142}
143
144sub abort_batch()
145{
146	if (defined $pid) {
147		kill 1, $pid;
148		waitpid($pid, 0);
149		undef $pid;
150	}
151	print "\nABORTED $token\n";
152}
153
154sub get_directory
155{
156	my ($o, $dname) = @_;
157	my $crlf="\015\012";
158	$o->print("GET $dname/ HTTP/1.1", $crlf,
159	    "Host: ", $o->{host}, $crlf, $crlf);
160	# get header
161
162	my $_ = $o->getline;
163	if (!m,^HTTP/1\.1\s+(\d\d\d),) {
164		print "ERROR\n";
165		return;
166	}
167	my $code = $1;
168	my $h = {};
169	while ($_ = $o->getline) {
170		last if m/^$/;
171		if (m/^([\w\-]+)\:\s*(.*)$/) {
172			print STDERR "$1 => $2\n";
173			$h->{$1} = $2;
174		} else {
175			print STDERR "unknown line: $_\n";
176		}
177	}
178	my $r = $o->retrieve_response($h);
179	if (!defined $r) {
180		print "ERROR: can't decode response\n";
181	}
182	if ($code != 200) {
183			print "ERROR: code was $code\n";
184			return;
185	}
186	print "SUCCESS: directory $dname\n";
187	for my $pkg ($r =~ m/\<A\s+HREF=\"(.+?)\.tgz\"\>/gio) {
188		$pkg = $1 if $pkg =~ m|^.*/(.*)$|;
189		# decode uri-encoding; from URI::Escape
190		$pkg =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
191		print $pkg, "\n";
192	}
193	print "\n";
194	return;
195}
196
197sub get_file
198{
199	my ($o, $fname) = @_;
200
201	my $crlf="\015\012";
202	my $first = 1;
203	my $start = 0;
204	my $end = 4000;
205	my $total_size = 0;
206
207	do {
208		$o->print("GET $fname HTTP/1.1", $crlf,
209		    "Host: ", $o->{host}, $crlf,
210		    "Range: bytes=",$start, "-", $end-1, $crlf, $crlf);
211		# get header
212
213		my $_ = $o->getline;
214		if (!m,^HTTP/1\.1\s+(\d\d\d),) {
215			print "ERROR\n";
216			return;
217		}
218		$end *= 2;
219		my $code = $1;
220		my $h = {};
221		while ($_ = $o->getline) {
222			last if m/^$/;
223			if (m/^([\w\-]+)\:\s*(.*)$/) {
224				print STDERR "$1 => $2\n";
225				$h->{$1} = $2;
226			} else {
227				print STDERR "unknown line: $_\n";
228			}
229		}
230		if (defined $h->{'Content-Range'} && $h->{'Content-Range'} =~
231			m/^bytes\s+\d+\-\d+\/(\d+)/) {
232				$total_size = $1;
233		}
234		if ($first) {
235			print "TRANSFER: $total_size\n";
236			$first = 0;
237		}
238		my $r = $o->retrieve_response($h);
239		if (!defined $r) {
240			print "ERROR: can't decode response\n";
241		}
242		if ($code != 200 && $code != 206) {
243			print "ERROR: code was $code\n";
244			return;
245		}
246		print $r;
247		$start = $end;
248	} while ($end < $total_size);
249}
250
251sub main
252{
253	my $self = shift;
254	my $o = _Proxy::Connection->new($self->{host}, "www");
255	while (<STDIN>) {
256		chomp;
257		if (m/^LIST\s+(.*)$/o) {
258			my $dname = $1;
259			batch(sub {get_directory($o, $dname);});
260		} elsif (m/^GET\s+(.*)$/o) {
261			my $fname = $1;
262			batch(sub { get_file($o, $fname);});
263		} elsif (m/^BYE$/o) {
264			exit(0);
265		} elsif (m/^ABORT$/o) {
266			abort_batch();
267		} else {
268			print "ERROR: Unknown command\n";
269		}
270	}
271}
272
273
274sub todo
275{
276	my ($o, $file) = @_;
277	my $crlf="\015\012";
278	open my $fh, '>', $file;
279
280	my $start = 0;
281	my $end = 4000;
282	my $total_size = 0;
283
284	do {
285		$end *= 2;
286		$o->print("GET /pub/OpenBSD/snapshots/packages/amd64/$file HTTP/1.1$crlf",
287    "Host: www.w3.org$crlf",
288		"Range: bytes=",$start, "-", $end-1, $crlf, $crlf);
289
290		# get header
291
292		my $_ = $o->getline;
293		if (m,^HTTP/1\.1\s+(\d\d\d),) {
294			my $code = $1;
295			print "Code: $code\n";
296		} else {
297			print $_, "\n";
298		}
299		my $h = {};
300		while ($_ = $o->getline) {
301			last if m/^$/;
302			if (m/^([\w\-]+)\:\s*(.*)$/) {
303				print "$1 => $2\n";
304				$h->{$1} = $2;
305			} else {
306				print "unknown line: $_\n";
307			}
308		}
309
310		if (defined $h->{'Content-Range'} && $h->{'Content-Range'} =~
311			m/^bytes\s+\d+\-\d+\/(\d+)/) {
312				$total_size = $1;
313		}
314		print "END OF HEADER\n";
315
316		if (defined $h->{'Content-Length'}) {
317			my $v = $o->retrieve($h->{'Content-Length'});
318			print $fh $v;
319		}
320		$start = $end;
321	} while ($end < $total_size);
322}
323
3241;
325