HTTP.pm revision 1.6
1112918Sjeff#! /usr/bin/perl
2112918Sjeff# ex:ts=8 sw=4:
3112918Sjeff# $OpenBSD: HTTP.pm,v 1.6 2011/07/18 20:21:40 espie Exp $
4112918Sjeff#
5112918Sjeff# Copyright (c) 2011 Marc Espie <espie@openbsd.org>
6112918Sjeff#
7112918Sjeff# Permission to use, copy, modify, and distribute this software for any
8112918Sjeff# purpose with or without fee is hereby granted, provided that the above
9112918Sjeff# copyright notice and this permission notice appear in all copies.
10112918Sjeff#
11112918Sjeff# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12112918Sjeff# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13165967Simp# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14112918Sjeff# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15112918Sjeff# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16112918Sjeff# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17112918Sjeff# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18112918Sjeff
19112918Sjeffuse strict;
20112918Sjeffuse warnings;
21112918Sjeff
22112918Sjeffpackage OpenBSD::Repository::HTTP;
23112918Sjeffsub urlscheme
24112918Sjeff{
25112918Sjeff	return 'http';
26112918Sjeff}
27112918Sjeff
28112918Sjeffsub initiate
29144518Sdavidxu{
30297706Skib	my $self = shift;
31297706Skib	my ($rdfh, $wrfh);
32297706Skib	pipe($self->{getfh}, $rdfh);
33157457Sdavidxu	pipe($wrfh, $self->{cmdfh});
34112918Sjeff	my $pid = fork();
35112918Sjeff	if ($pid == 0) {
36112918Sjeff		close($self->{getfh});
37112918Sjeff		close($self->{cmdfh});
38157457Sdavidxu		close(STDOUT);
39144518Sdavidxu		close(STDIN);
40112918Sjeff		open(STDOUT, '>&', $wrfh);
41112918Sjeff		open(STDIN, '<&', $rdfh);
42179662Sdavidxu		_Proxy::main($self);
43179662Sdavidxu	} else {
44179662Sdavidxu		close($rdfh);
45179662Sdavidxu		close($wrfh);
46112918Sjeff		$self->{controller} = $pid;
47112918Sjeff	}
48112918Sjeff}
49112918Sjeff
50179662Sdavidxupackage _Proxy::Connection;
51179662Sdavidxusub new
52112918Sjeff{
53144518Sdavidxu	my ($class, $host, $port) = @_;
54179662Sdavidxu	require IO::Socket::INET;
55112918Sjeff	my $o = IO::Socket::INET->new(
56179662Sdavidxu		PeerHost => $host,
57179662Sdavidxu		PeerPort => $port);
58179662Sdavidxu	bless {fh => $o, host => $host, buffer => ''}, $class;
59179662Sdavidxu}
60179662Sdavidxu
61179662Sdavidxusub getline
62112918Sjeff{
63112918Sjeff	my $self = shift;
64112918Sjeff	while (1) {
65179662Sdavidxu		if ($self->{buffer} =~ s/^(.*?)\015\012//) {
66112918Sjeff			return $1;
67144518Sdavidxu		}
68112918Sjeff		my $buffer;
69112918Sjeff		$self->{fh}->recv($buffer, 1024);
70112918Sjeff		$self->{buffer}.=$buffer;
71179662Sdavidxu    	}
72179662Sdavidxu}
73112918Sjeff
74179662Sdavidxusub retrieve
75144518Sdavidxu{
76112918Sjeff	my ($self, $sz) = @_;
77112918Sjeff	while(length($self->{buffer}) < $sz) {
78179662Sdavidxu		my $buffer;
79179662Sdavidxu		$self->{fh}->recv($buffer, $sz - length($self->{buffer}));
80179662Sdavidxu		$self->{buffer}.=$buffer;
81179662Sdavidxu	}
82179662Sdavidxu	my $result= substr($self->{buffer}, 0, $sz);
83179662Sdavidxu	$self->{buffer} = substr($self->{buffer}, $sz);
84213163Sdavidxu	return $result;
85213159Sdavidxu}
86213163Sdavidxu
87179662Sdavidxusub retrieve_chunked
88281857Spfg{
89179662Sdavidxu	my $self = shift;
90179662Sdavidxu	my $result = '';
91179662Sdavidxu	while (1) {
92179662Sdavidxu		my $sz = $self->getline;
93179662Sdavidxu		if ($sz =~ m/^([0-9a-fA-F]+)/) {
94179662Sdavidxu			my $realsize = hex($1);
95179662Sdavidxu			last if $realsize == 0;
96179662Sdavidxu			$result .= $self->retrieve($realsize);
97179662Sdavidxu		}
98179662Sdavidxu	}
99179662Sdavidxu	return $result;
100179662Sdavidxu}
101179662Sdavidxu
102sub retrieve_response
103{
104	my ($self, $h) = @_;
105
106	if ($h->{'Transfer-Encoding'} eq 'chunked') {
107		return $self->retrieve_chunked;
108	}
109	if (defined $h->{'Content-Length'}) {
110		return $self->retrieve($h->{'Content-Length'});
111	}
112	return undef;
113}
114
115sub print
116{
117	my ($self, @l) = @_;
118	print {$self->{fh}} @l;
119}
120
121package _Proxy;
122
123my $pid;
124my $token = 0;
125
126sub batch(&)
127{
128	my $code = shift;
129	if (defined $pid) {
130		waitpid($pid, 0);
131		undef $pid;
132	}
133	$token++;
134	$pid = fork();
135	if (!defined $pid) {
136		print "ERROR: fork failed: $!\n";
137	}
138	if ($pid == 0) {
139		&$code();
140		exit(0);
141	}
142}
143
144sub abort_batch()
145{
146	if (defined $pid) {
147		kill 1, $pid;
148		waitpid($pid, 0);
149		undef $pid;
150	}
151	print "\nABORTED $token\n";
152}
153
154sub get_directory
155{
156	my ($o, $dname) = @_;
157	my $crlf="\015\012";
158	$o->print("GET $dname/ HTTP/1.1", $crlf,
159	    "Host: ", $o->{host}, $crlf, $crlf);
160	# get header
161
162	my $_ = $o->getline;
163	if (!m,^HTTP/1\.1\s+(\d\d\d),) {
164		print "ERROR\n";
165		return;
166	}
167	my $code = $1;
168	my $h = {};
169	while ($_ = $o->getline) {
170		last if m/^$/;
171		if (m/^([\w\-]+)\:\s*(.*)$/) {
172			print STDERR "$1 => $2\n";
173			$h->{$1} = $2;
174		} else {
175			print STDERR "unknown line: $_\n";
176		}
177	}
178	my $r = $o->retrieve_response($h);
179	if (!defined $r) {
180		print "ERROR: can't decode response\n";
181	}
182	if ($code != 200) {
183			print "ERROR: code was $code\n";
184			return;
185	}
186	print "SUCCESS: directory $dname\n";
187	for my $pkg ($r =~ m/\<A\s+HREF=\"(.+?)\.tgz\"\>/gio) {
188		$pkg = $1 if $pkg =~ m|^.*/(.*)$|;
189		# decode uri-encoding; from URI::Escape
190		$pkg =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
191		print $pkg, "\n";
192	}
193	print "\n";
194	return;
195}
196
197sub get_file
198{
199	my ($o, $fname) = @_;
200	my $crlf="\015\012";
201	$o->print("GET $fname HTTP/1.1", $crlf,
202	    "Host: ", $o->{host}, $crlf, $crlf);
203	# get header
204
205	my $_ = $o->getline;
206	if (!m,^HTTP/1\.1\s+(\d\d\d),) {
207		print "ERROR\n";
208		return;
209	}
210	my $code = $1;
211	my $h = {};
212	while ($_ = $o->getline) {
213		last if m/^$/;
214		if (m/^([\w\-]+)\:\s*(.*)$/) {
215			print STDERR "$1 => $2\n";
216			$h->{$1} = $2;
217		} else {
218			print STDERR "unknown line: $_\n";
219		}
220	}
221	my $r = $o->retrieve_response($h);
222	if (!defined $r) {
223		print "ERROR: can't decode response\n";
224	}
225	if ($code != 200) {
226		print "ERROR: code was $code\n";
227		return;
228	}
229}
230
231sub main
232{
233	my $self = shift;
234	my $o = _Proxy::Connection->new($self->{host}, "www");
235	while (<STDIN>) {
236		chomp;
237		if (m/^LIST\s+(.*)$/o) {
238			my $dname = $1;
239			batch(sub {get_directory($o, $dname);});
240		} elsif (m/^GET\s+(.*)$/o) {
241			my $fname = $1;
242			batch(sub { get_file($o, $fname);});
243		} elsif (m/^BYE$/o) {
244			exit(0);
245		} elsif (m/^ABORT$/o) {
246			abort_batch();
247		} else {
248			print "ERROR: Unknown command\n";
249		}
250	}
251}
252
253
254sub todo
255{
256	my ($o, $file) = @_;
257	my $crlf="\015\012";
258	open my $fh, '>', $file;
259
260	my $start = 0;
261	my $end = 4000;
262	my $total_size = 0;
263
264	do {
265		$end *= 2;
266		$o->print("GET /pub/OpenBSD/snapshots/packages/amd64/$file HTTP/1.1$crlf",
267    "Host: www.w3.org$crlf",
268		"Range: bytes=",$start, "-", $end-1, $crlf, $crlf);
269
270		# get header
271
272		my $_ = $o->getline;
273		if (m,^HTTP/1\.1\s+(\d\d\d),) {
274			my $code = $1;
275			print "Code: $code\n";
276		} else {
277			print $_, "\n";
278		}
279		my $h = {};
280		while ($_ = $o->getline) {
281			last if m/^$/;
282			if (m/^([\w\-]+)\:\s*(.*)$/) {
283				print "$1 => $2\n";
284				$h->{$1} = $2;
285			} else {
286				print "unknown line: $_\n";
287			}
288		}
289
290		if (defined $h->{'Content-Range'} && $h->{'Content-Range'} =~
291			m/^bytes\s+\d+\-\d+\/(\d+)/) {
292				$total_size = $1;
293		}
294		print "END OF HEADER\n";
295
296		if (defined $h->{'Content-Length'}) {
297			my $v = $o->retrieve($h->{'Content-Length'});
298			print $fh $v;
299		}
300		$start = $end;
301	} while ($end < $total_size);
302}
303
3041;
305