1package Bencode;
2use strict;
3use Carp;
4use Exporter;
5
6use vars qw( $VERSION @ISA @EXPORT_OK $DEBUG $do_lenient_decode );
7
8$VERSION = '1.31';
9
10@ISA = qw( Exporter );
11@EXPORT_OK = qw( bencode bdecode );
12
13sub _msg { sprintf "@_", pos() || 0 }
14
15sub _bdecode_string {
16
17	if ( m/ \G ( 0 | [1-9] \d* ) : /xgc ) {
18		my $len = $1;
19
20		croak _msg 'unexpected end of string data starting at %s' if $len > length() - pos();
21
22		my $str = substr $_, pos(), $len;
23		pos() = pos() + $len;
24
25		warn _msg STRING => "(length $len)", $len < 200 ? "[$str]" : () if $DEBUG;
26
27		return $str;
28	}
29	else {
30		my $pos = pos();
31		if ( m/ \G -? 0? \d+ : /xgc ) {
32			pos() = $pos;
33			croak _msg 'malformed string length at %s';
34		}
35	}
36
37	return;
38}
39
40sub _bdecode_chunk {
41	warn _msg 'decoding at %s' if $DEBUG;
42
43	if ( defined( my $str = _bdecode_string() ) ) {
44		return $str;
45	}
46	elsif ( m/ \G i /xgc ) {
47		croak _msg 'unexpected end of data at %s' if m/ \G \z /xgc;
48
49		m/ \G ( 0 | -? [1-9] \d* ) e /xgc
50			or croak _msg 'malformed integer data at %s';
51
52		warn _msg INTEGER => $1 if $DEBUG;
53		return $1;
54	}
55	elsif ( m/ \G l /xgc ) {
56		warn _msg 'LIST' if $DEBUG;
57		my @list;
58		until ( m/ \G e /xgc ) {
59			warn _msg 'list not terminated at %s, looking for another element' if $DEBUG;
60			push @list, _bdecode_chunk();
61		}
62		return \@list;
63	}
64	elsif ( m/ \G d /xgc ) {
65		warn _msg 'DICT' if $DEBUG;
66		my $last_key;
67		my %hash;
68		until ( m/ \G e /xgc ) {
69			warn _msg 'dict not terminated at %s, looking for another pair' if $DEBUG;
70
71			croak _msg 'unexpected end of data at %s'
72				if m/ \G \z /xgc;
73
74			my $key = _bdecode_string();
75			defined $key or croak _msg 'dict key is not a string at %s';
76
77			croak _msg 'duplicate dict key at %s'
78				if exists $hash{ $key };
79
80			croak _msg 'dict key not in sort order at %s'
81				if not( $do_lenient_decode ) and defined $last_key and $key lt $last_key;
82
83			croak _msg 'dict key is missing value at %s'
84				if m/ \G e /xgc;
85
86			$last_key = $key;
87			$hash{ $key } = _bdecode_chunk();
88		}
89		return \%hash;
90	}
91	else {
92		croak _msg m/ \G \z /xgc ? 'unexpected end of data at %s' : 'garbage at %s';
93	}
94}
95
96sub bdecode {
97	local $_ = shift;
98	local $do_lenient_decode = shift;
99	my $deserialised_data = _bdecode_chunk();
100	croak _msg 'trailing garbage at %s' if $_ !~ m/ \G \z /xgc;
101	return $deserialised_data;
102}
103
104sub _bencode {
105	my ( $data ) = @_;
106	if ( not ref $data ) {
107		return sprintf 'i%se', $data if $data =~ m/\A (?: 0 | -? [1-9] \d* ) \z/x;
108		return length( $data ) . ':' . $data;
109	}
110	elsif ( ref $data eq 'SCALAR' ) {
111		# escape hatch -- use this to avoid num/str heuristics
112		return length( $$data ) . ':' . $$data;
113	}
114	elsif ( ref $data eq 'ARRAY' ) {
115		return 'l' . join( '', map _bencode( $_ ), @$data ) . 'e';
116	}
117	elsif ( ref $data eq 'HASH' ) {
118		return 'd' . join( '', map { _bencode( \$_ ), _bencode( $data->{ $_ } ) } sort keys %$data ) . 'e';
119	}
120	else {
121		croak 'unhandled data type';
122	}
123}
124
125sub bencode {
126	croak 'need exactly one argument' if @_ != 1;
127	goto &_bencode;
128}
129
130bdecode( 'i1e' );
131
132__END__
133
134=head1 NAME
135
136Bencode - BitTorrent serialisation format
137
138=head1 VERSION
139
140This document describes Bencode version 1.0
141
142
143=head1 SYNOPSIS
144
145 use Bencode qw( bencode bdecode );
146
147 my $bencoded = bencode { 'age' => 25, 'eyes' => 'blue' };
148 print $bencoded, "\n";
149 my $decoded = bdecode $bencoded;
150
151
152=head1 DESCRIPTION
153
154This module implements the BitTorrent I<bencode> serialisation format as described in L<http://www.bittorrent.org/protocol.html>.
155
156
157=head1 INTERFACE
158
159=head2 C<bencode( $datastructure )>
160
161Takes a single argument which may be a scalar or a reference to a scalar, array or hash. Arrays and hashes may in turn contain values of these same types. Simple scalars that look like canonically represented integers will be serialised as such. To bypass the heuristic and force serialisation as a string, use a reference to a scalar.
162
163Croaks on unhandled data types.
164
165=head2 C<bdecode( $string [, $do_lenient_decode ] )>
166
167Takes a string and returns the corresponding deserialised data structure.
168
169If you pass a true value for the second option, it will disregard the sort order of dict keys. This violation of the I<becode> format is somewhat common.
170
171Croaks on malformed data.
172
173=head1 DIAGNOSTICS
174
175=over
176
177=item C<trailing garbage at %s>
178
179Your data does not end after the first I<bencode>-serialised item.
180
181You may also get this error if a malformed item follows.
182
183=item C<garbage at %s>
184
185Your data is malformed.
186
187=item C<unexpected end of data at %s>
188
189Your data is truncated.
190
191=item C<unexpected end of string data starting at %s>
192
193Your data includes a string declared to be longer than the available data.
194
195=item C<malformed string length at %s>
196
197Your data contained a string with negative length or a length with leading zeroes.
198
199=item C<malformed integer data at %s>
200
201Your data contained something that was supposed to be an integer but didn't make sense.
202
203=item C<dict key not in sort order at %s>
204
205Your data violates the I<bencode> format constaint that dict keys must appear in lexical sort order.
206
207=item C<duplicate dict key at %s>
208
209Your data violates the I<bencode> format constaint that all dict keys must be unique.
210
211=item C<dict key is not a string at %s>
212
213Your data violates the I<bencode> format constaint that all dict keys be strings.
214
215=item C<dict key is missing value at %s>
216
217Your data contains a dictionary with an odd number of elements.
218
219=item C<unhandled data type>
220
221You are trying to serialise a data structure that consists of data types other than
222
223=over
224
225=item * scalars
226
227=item * references to arrays
228
229=item * references to hashes
230
231=item * references to scalars
232
233=back
234
235The format does not support this.
236
237=back
238
239
240=head1 BUGS AND LIMITATIONS
241
242Strings and numbers are practically indistinguishable in Perl, so C<bencode()> has to resort to a heuristic to decide how to serialise a scalar. This cannot be fixed.
243
244Error reporting is currently suboptimal. Malformed strings or integers will throw a misleading C<trailing garbage> message instead of a more specific diagnostic.
245
246No bugs have been reported.
247
248Please report any bugs or feature requests to C<bug-bencode@rt.cpan.org>, or through the web interface at L<http://rt.cpan.org>.
249
250
251=head1 AUTHOR
252
253Aristotle Pagaltzis  L<mailto:pagaltzis@gmx.de>
254
255
256=head1 LICENCE AND COPYRIGHT
257
258Copyright (c) 2006, Aristotle Pagaltzis. All rights reserved.
259
260This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. See L<perlartistic>.
261
262
263=head1 DISCLAIMER OF WARRANTY
264
265BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR, OR CORRECTION.
266
267IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
268