HTTP.pm revision 1.15
1#! /usr/bin/perl
2# ex:ts=8 sw=4:
3# $OpenBSD: HTTP.pm,v 1.15 2023/06/13 09:07:18 espie Exp $
4#
5# Copyright (c) 2011 Marc Espie <espie@openbsd.org>
6#
7# Permission to use, copy, modify, and distribute this software for any
8# purpose with or without fee is hereby granted, provided that the above
9# copyright notice and this permission notice appear in all copies.
10#
11# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
19use strict;
20use warnings;
21
22use OpenBSD::PackageRepository::Persistent;
23
24package OpenBSD::PackageRepository::HTTP1;
25our @ISA = qw(OpenBSD::PackageRepository::Persistent);
26sub urlscheme($)
27{
28	return 'http';
29}
30
31sub initiate($self)
32{
33	my ($rdfh, $wrfh);
34	pipe($self->{getfh}, $wrfh) or die;
35	pipe($rdfh, $self->{cmdfh}) or die;
36
37	my $old =select $self->{getfh};
38	$| = 1;
39	select $self->{cmdfh};
40	$| = 1;
41	select $rdfh;
42	$| = 1;
43	select $wrfh;
44	$| = 1;
45	select $old;
46	my $pid = fork();
47	if ($pid == 0) {
48		close($self->{getfh});
49		close($self->{cmdfh});
50#		close(STDOUT);
51#		close(STDIN);
52		open(STDOUT, '>&', $wrfh);
53		open(STDIN, '<&', $rdfh);
54		_Proxy::main($self);
55	} else {
56		close($rdfh);
57		close($wrfh);
58		$self->{controller} = $pid;
59	}
60}
61
62package _Proxy::Header;
63
64sub new($class)
65{
66	bless {}, $class;
67}
68
69sub code($self)
70{
71	my $self = shift;
72	return $self->{code};
73}
74
75package _Proxy::Connection;
76sub new($class, $host, $port)
77{
78	require IO::Socket::INET;
79	my $o = IO::Socket::INET->new(
80		PeerHost => $host,
81		PeerPort => $port);
82	my $old = select($o);
83	$| = 1;
84	select($old);
85	bless {fh => $o, host => $host, buffer => ''}, $class;
86}
87
88sub send_header($o, $document, %extra)
89{
90	my $crlf="\015\012";
91	$o->print("GET $document HTTP/1.1", $crlf,
92	    "Host: ", $o->{host}, $crlf);
93	if (defined $extra{range}) {
94		my ($a, $b) = @{$extra{range}};
95	    	$o->print("Range: bytes=$a-$b", $crlf);
96	}
97	$o->print($crlf);
98}
99
100sub get_header($o)
101{
102	my $l = $o->getline;
103	if ($l !~ m,^HTTP/1\.1\s+(\d\d\d),) {
104		return undef;
105	}
106	my $h = _Proxy::Header->new;
107	$h->{code} = $1;
108	while ($l = $o->getline) {
109		last if $l =~ m/^$/;
110		if ($l =~ m/^([\w\-]+)\:\s*(.*)$/) {
111			$h->{$1} = $2;
112		} else {
113			print STDERR "unknown line: $l\n";
114		}
115	}
116	if (defined $h->{'Content-Length'}) {
117		$h->{length} = $h->{'Content-Length'}
118	} elsif (defined $h->{'Transfer-Encoding'} &&
119	    $h->{'Transfer-Encoding'} eq 'chunked') {
120		$h->{chunked} = 1;
121	}
122	if (defined $h->{'Content-Range'} &&
123	    $h->{'Content-Range'} =~ m/^bytes\s+(\d+)\-(\d+)\/(\d+)/) {
124		($h->{start}, $h->{end}, $h->{size}) = ($1, $2, $3);
125	}
126	$o->{header} = $h;
127	return $h;
128}
129
130sub getline($self)
131{
132	while (1) {
133		if ($self->{buffer} =~ s/^(.*?)\015\012//) {
134			return $1;
135		}
136		my $buffer;
137		$self->{fh}->recv($buffer, 1024);
138		$self->{buffer}.=$buffer;
139    	}
140}
141
142sub retrieve($self, $sz)
143{
144	while(length($self->{buffer}) < $sz) {
145		my $buffer;
146		$self->{fh}->recv($buffer, $sz - length($self->{buffer}));
147		$self->{buffer}.=$buffer;
148	}
149	my $result= substr($self->{buffer}, 0, $sz);
150	$self->{buffer} = substr($self->{buffer}, $sz);
151	return $result;
152}
153
154sub retrieve_and_print($self, $sz, $fh)
155{
156	my $result = substr($self->{buffer}, 0, $sz);
157	print $fh $result;
158	my $retrieved = length($result);
159	if ($retrieved == $sz) {
160		$self->{buffer} = substr($self->{buffer}, $sz);
161		return;
162	} else {
163		$self->{buffer} = '';
164	}
165	while ($retrieved < $sz) {
166		$self->{fh}->recv($result, $sz - $retrieved);
167		print $fh $result;
168		$retrieved += length($result);
169	}
170}
171
172sub retrieve_chunked($self)
173{
174	my $result = '';
175	while (1) {
176		my $sz = $self->getline;
177		if ($sz =~ m/^([0-9a-fA-F]+)/) {
178			my $realsize = hex($1);
179			last if $realsize == 0;
180			$result .= $self->retrieve($realsize);
181		}
182	}
183	return $result;
184}
185
186sub retrieve_response($self, $h)
187{
188	if ($h->{chunked}) {
189		return $self->retrieve_chunked;
190	}
191	if ($h->{length}) {
192		return $self->retrieve($h->{length});
193	}
194	return undef;
195}
196
197sub retrieve_response_and_print($self, $h, $fh)
198{
199	if ($h->{chunked}) {
200		print $fh $self->retrieve_chunked;
201	}
202	if ($h->{length}) {
203		$self->retrieve_and_print($h->{length}, $fh);
204	}
205}
206
207sub print($self, @l)
208{
209#	print STDERR "Before print\n";
210	if (!print {$self->{fh}} @l) {
211		print STDERR "network print failed with $!\n";
212	}
213#	print STDERR "After print\n";
214}
215
216package _Proxy;
217
218my $pid;
219my $token = 0;
220
221sub batch($code)
222{
223	if (defined $pid) {
224		waitpid($pid, 0);
225		undef $pid;
226	}
227	$token++;
228	$pid = fork();
229	if (!defined $pid) {
230		print "ERROR: fork failed: $!\n";
231	}
232	if ($pid == 0) {
233		&$code();
234		exit(0);
235	}
236}
237
238sub abort_batch()
239{
240	if (defined $pid) {
241		kill HUP => $pid;
242		waitpid($pid, 0);
243		undef $pid;
244	}
245	print "\nABORTED $token\n";
246}
247
248sub get_directory($o, $dname)
249{
250	local $SIG{'HUP'} = 'IGNORE';
251	$o->send_header("$dname/");
252	my $h = $o->get_header;
253	if (!defined $h) {
254		print "ERROR: can't decode header\n";
255		exit 1;
256	}
257
258	my $r = $o->retrieve_response($h);
259	if (!defined $r) {
260		print "ERROR: can't decode response\n";
261	}
262	if ($h->code != 200) {
263			print "ERROR: code was ", $h->code, "\n";
264			exit 1;
265	}
266	print "SUCCESS: directory $dname\n";
267	for my $pkg ($r =~ m/\<A[^>]*\s+HREF=\"(.+?)\.tgz\"/gio) {
268		$pkg = $1 if $pkg =~ m|^.*/(.*)$|;
269		# decode uri-encoding; from URI::Escape
270		$pkg =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
271		print $pkg, "\n";
272	}
273	print "\n";
274	return;
275}
276
277use File::Basename;
278
279sub get_file($o, $fname)
280{
281	my $bailout = 0;
282	$SIG{'HUP'} = sub {
283		$bailout++;
284	};
285	my $first = 1;
286	my $start = 0;
287	my $end = 2000;
288	my $total_size = 0;
289
290	do {
291		$end *= 2;
292		$o->send_header($fname, range => [$start, $end-1]);
293		my $h = $o->get_header;
294		if (!defined $h) {
295			print "ERROR\n";
296			exit 1;
297		}
298		if (defined $h->{size}) {
299			$total_size = $h->{size};
300		}
301		if ($h->code != 200 && $h->code != 206) {
302			print "ERROR: code was ", $h->code, "\n";
303			my $r = $o->retrieve_response($h);
304			exit 1;
305		}
306		if ($first) {
307			print "TRANSFER: $total_size\n";
308			$first = 0;
309		}
310		$o->retrieve_response_and_print($h, \*STDOUT);
311		$start = $end;
312		if ($bailout) {
313			exit 0;
314		}
315	} while ($end < $total_size);
316}
317
318sub main($self)
319{
320	my $o = _Proxy::Connection->new($self->{host}, "www");
321	while (<STDIN>) {
322		chomp;
323		if (m/^LIST\s+(.*)$/o) {
324			my $dname = $1;
325			batch(sub() {get_directory($o, $dname);});
326		} elsif (m/^GET\s+(.*)$/o) {
327			my $fname = $1;
328			batch(sub() { get_file($o, $fname);});
329		} elsif (m/^BYE$/o) {
330			exit(0);
331		} elsif (m/^ABORT$/o) {
332			abort_batch();
333		} else {
334			print "ERROR: Unknown command\n";
335		}
336	}
337}
338
3391;
340