1package Bencode;
2BEGIN {
3  $Bencode::VERSION = '1.4';
4}
5use strict;
6use Carp;
7use Exporter;
8
9# ABSTRACT: BitTorrent serialisation format
10
11use vars qw( $VERSION @ISA @EXPORT_OK $DEBUG $do_lenient_decode $max_depth );
12
13@ISA = qw( Exporter );
14@EXPORT_OK = qw( bencode bdecode );
15
16sub _msg { sprintf "@_", pos() || 0 }
17
18sub _bdecode_string {
19
20	if ( m/ \G ( 0 | [1-9] \d* ) : /xgc ) {
21		my $len = $1;
22
23		croak _msg 'unexpected end of string data starting at %s'
24			if $len > length() - pos();
25
26		my $str = substr $_, pos(), $len;
27		pos() = pos() + $len;
28
29		warn _msg STRING => "(length $len)", $len < 200 ? "[$str]" : () if $DEBUG;
30
31		return $str;
32	}
33	else {
34		my $pos = pos();
35		if ( m/ \G -? 0? \d+ : /xgc ) {
36			pos() = $pos;
37			croak _msg 'malformed string length at %s';
38		}
39	}
40
41	return;
42}
43
44sub _bdecode_chunk {
45	warn _msg 'decoding at %s' if $DEBUG;
46
47	local $max_depth = $max_depth - 1 if defined $max_depth;
48
49	if ( defined( my $str = _bdecode_string() ) ) {
50		return $str;
51	}
52	elsif ( m/ \G i /xgc ) {
53		croak _msg 'unexpected end of data at %s' if m/ \G \z /xgc;
54
55		m/ \G ( 0 | -? [1-9] \d* ) e /xgc
56			or croak _msg 'malformed integer data at %s';
57
58		warn _msg INTEGER => $1 if $DEBUG;
59		return $1;
60	}
61	elsif ( m/ \G l /xgc ) {
62		warn _msg 'LIST' if $DEBUG;
63
64		croak _msg 'nesting depth exceeded at %s'
65			if defined $max_depth and $max_depth < 0;
66
67		my @list;
68		until ( m/ \G e /xgc ) {
69			warn _msg 'list not terminated at %s, looking for another element' if $DEBUG;
70			push @list, _bdecode_chunk();
71		}
72		return \@list;
73	}
74	elsif ( m/ \G d /xgc ) {
75		warn _msg 'DICT' if $DEBUG;
76
77		croak _msg 'nesting depth exceeded at %s'
78			if defined $max_depth and $max_depth < 0;
79
80		my $last_key;
81		my %hash;
82		until ( m/ \G e /xgc ) {
83			warn _msg 'dict not terminated at %s, looking for another pair' if $DEBUG;
84
85			croak _msg 'unexpected end of data at %s'
86				if m/ \G \z /xgc;
87
88			my $key = _bdecode_string();
89			defined $key or croak _msg 'dict key is not a string at %s';
90
91			croak _msg 'duplicate dict key at %s'
92				if exists $hash{ $key };
93
94			croak _msg 'dict key not in sort order at %s'
95				if not( $do_lenient_decode ) and defined $last_key and $key lt $last_key;
96
97			croak _msg 'dict key is missing value at %s'
98				if m/ \G e /xgc;
99
100			$last_key = $key;
101			$hash{ $key } = _bdecode_chunk();
102		}
103		return \%hash;
104	}
105	else {
106		croak _msg m/ \G \z /xgc ? 'unexpected end of data at %s' : 'garbage at %s';
107	}
108}
109
110sub bdecode {
111	local $_ = shift;
112	local $do_lenient_decode = shift;
113	local $max_depth = shift;
114	my $deserialised_data = _bdecode_chunk();
115	croak _msg 'trailing garbage at %s' if $_ !~ m/ \G \z /xgc;
116	return $deserialised_data;
117}
118
119sub _bencode {
120	my ( $data ) = @_;
121	if ( not ref $data ) {
122		return sprintf 'i%se', $data if $data =~ m/\A (?: 0 | -? [1-9] \d* ) \z/x;
123		return length( $data ) . ':' . $data;
124	}
125	elsif ( ref $data eq 'SCALAR' ) {
126		# escape hatch -- use this to avoid num/str heuristics
127		return length( $$data ) . ':' . $$data;
128	}
129	elsif ( ref $data eq 'ARRAY' ) {
130		return 'l' . join( '', map _bencode( $_ ), @$data ) . 'e';
131	}
132	elsif ( ref $data eq 'HASH' ) {
133		return 'd' . join( '', map { _bencode( \$_ ), _bencode( $data->{ $_ } ) } sort keys %$data ) . 'e';
134	}
135	else {
136		croak 'unhandled data type';
137	}
138}
139
140sub bencode {
141	croak 'need exactly one argument' if @_ != 1;
142	goto &_bencode;
143}
144
145bdecode( 'i1e' );
146
147
148
149=pod
150
151=head1 NAME
152
153Bencode - BitTorrent serialisation format
154
155=head1 VERSION
156
157version 1.4
158
159=head1 SYNOPSIS
160
161 use Bencode qw( bencode bdecode );
162
163 my $bencoded = bencode { 'age' => 25, 'eyes' => 'blue' };
164 print $bencoded, "\n";
165 my $decoded = bdecode $bencoded;
166
167=head1 DESCRIPTION
168
169This module implements the BitTorrent I<bencode> serialisation format as described in L<http://www.bittorrent.org/protocol.html>.
170
171=head1 INTERFACE
172
173=head2 C<bencode( $datastructure )>
174
175Takes a single argument which may be a scalar or a reference to a scalar, array or hash. Arrays and hashes may in turn contain values of these same types. Simple scalars that look like canonically represented integers will be serialised as such. To bypass the heuristic and force serialisation as a string, use a reference to a scalar.
176
177Croaks on unhandled data types.
178
179=head2 C<bdecode( $string [, $do_lenient_decode [, $max_depth ] ] )>
180
181Takes a string and returns the corresponding deserialised data structure.
182
183If you pass a true value for the second option, it will disregard the sort order of dict keys. This violation of the I<bencode> format is somewhat common.
184
185If you pass an integer for the third option, it will croak when attempting to parse dictionaries nested deeper than this level, to prevent DoS attacks using maliciously crafted input.
186
187Croaks on malformed data.
188
189=head1 DIAGNOSTICS
190
191=over
192
193=item C<trailing garbage at %s>
194
195Your data does not end after the first I<bencode>-serialised item.
196
197You may also get this error if a malformed item follows.
198
199=item C<garbage at %s>
200
201Your data is malformed.
202
203=item C<unexpected end of data at %s>
204
205Your data is truncated.
206
207=item C<unexpected end of string data starting at %s>
208
209Your data includes a string declared to be longer than the available data.
210
211=item C<malformed string length at %s>
212
213Your data contained a string with negative length or a length with leading zeroes.
214
215=item C<malformed integer data at %s>
216
217Your data contained something that was supposed to be an integer but didn't make sense.
218
219=item C<dict key not in sort order at %s>
220
221Your data violates the I<bencode> format constaint that dict keys must appear in lexical sort order.
222
223=item C<duplicate dict key at %s>
224
225Your data violates the I<bencode> format constaint that all dict keys must be unique.
226
227=item C<dict key is not a string at %s>
228
229Your data violates the I<bencode> format constaint that all dict keys be strings.
230
231=item C<dict key is missing value at %s>
232
233Your data contains a dictionary with an odd number of elements.
234
235=item C<nesting depth exceeded at %s>
236
237Your data contains dicts or lists that are nested deeper than the $max_depth passed to C<bdecode()>.
238
239=item C<unhandled data type>
240
241You are trying to serialise a data structure that consists of data types other than
242
243=over
244
245=item * scalars
246
247=item * references to arrays
248
249=item * references to hashes
250
251=item * references to scalars
252
253=back
254
255The format does not support this.
256
257=back
258
259=head1 BUGS AND LIMITATIONS
260
261Strings and numbers are practically indistinguishable in Perl, so C<bencode()> has to resort to a heuristic to decide how to serialise a scalar. This cannot be fixed.
262
263Please report any bugs or feature requests through the web interface at L<http://github.com/ap/Bencode/issues>.
264
265=head1 AUTHOR
266
267  Aristotle Pagaltzis <pagaltzis@gmx.de>
268
269=head1 COPYRIGHT AND LICENSE
270
271This software is copyright (c) 2010 by Aristotle Pagaltzis.
272
273This is free software; you can redistribute it and/or modify it under
274the same terms as the Perl 5 programming language system itself.
275
276=cut
277
278
279__END__
280
281