1package Bencode; 2use strict; 3use Carp; 4use Exporter; 5 6use vars qw( $VERSION @ISA @EXPORT_OK $DEBUG $do_lenient_decode ); 7 8$VERSION = '1.31'; 9 10@ISA = qw( Exporter ); 11@EXPORT_OK = qw( bencode bdecode ); 12 13sub _msg { sprintf "@_", pos() || 0 } 14 15sub _bdecode_string { 16 17 if ( m/ \G ( 0 | [1-9] \d* ) : /xgc ) { 18 my $len = $1; 19 20 croak _msg 'unexpected end of string data starting at %s' if $len > length() - pos(); 21 22 my $str = substr $_, pos(), $len; 23 pos() = pos() + $len; 24 25 warn _msg STRING => "(length $len)", $len < 200 ? "[$str]" : () if $DEBUG; 26 27 return $str; 28 } 29 else { 30 my $pos = pos(); 31 if ( m/ \G -? 0? \d+ : /xgc ) { 32 pos() = $pos; 33 croak _msg 'malformed string length at %s'; 34 } 35 } 36 37 return; 38} 39 40sub _bdecode_chunk { 41 warn _msg 'decoding at %s' if $DEBUG; 42 43 if ( defined( my $str = _bdecode_string() ) ) { 44 return $str; 45 } 46 elsif ( m/ \G i /xgc ) { 47 croak _msg 'unexpected end of data at %s' if m/ \G \z /xgc; 48 49 m/ \G ( 0 | -? [1-9] \d* ) e /xgc 50 or croak _msg 'malformed integer data at %s'; 51 52 warn _msg INTEGER => $1 if $DEBUG; 53 return $1; 54 } 55 elsif ( m/ \G l /xgc ) { 56 warn _msg 'LIST' if $DEBUG; 57 my @list; 58 until ( m/ \G e /xgc ) { 59 warn _msg 'list not terminated at %s, looking for another element' if $DEBUG; 60 push @list, _bdecode_chunk(); 61 } 62 return \@list; 63 } 64 elsif ( m/ \G d /xgc ) { 65 warn _msg 'DICT' if $DEBUG; 66 my $last_key; 67 my %hash; 68 until ( m/ \G e /xgc ) { 69 warn _msg 'dict not terminated at %s, looking for another pair' if $DEBUG; 70 71 croak _msg 'unexpected end of data at %s' 72 if m/ \G \z /xgc; 73 74 my $key = _bdecode_string(); 75 defined $key or croak _msg 'dict key is not a string at %s'; 76 77 croak _msg 'duplicate dict key at %s' 78 if exists $hash{ $key }; 79 80 croak _msg 'dict key not in sort order at %s' 81 if not( $do_lenient_decode ) and defined $last_key and $key lt $last_key; 82 83 croak _msg 'dict key is missing value at %s' 84 if m/ \G e /xgc; 85 86 $last_key = $key; 87 $hash{ $key } = _bdecode_chunk(); 88 } 89 return \%hash; 90 } 91 else { 92 croak _msg m/ \G \z /xgc ? 'unexpected end of data at %s' : 'garbage at %s'; 93 } 94} 95 96sub bdecode { 97 local $_ = shift; 98 local $do_lenient_decode = shift; 99 my $deserialised_data = _bdecode_chunk(); 100 croak _msg 'trailing garbage at %s' if $_ !~ m/ \G \z /xgc; 101 return $deserialised_data; 102} 103 104sub _bencode { 105 my ( $data ) = @_; 106 if ( not ref $data ) { 107 return sprintf 'i%se', $data if $data =~ m/\A (?: 0 | -? [1-9] \d* ) \z/x; 108 return length( $data ) . ':' . $data; 109 } 110 elsif ( ref $data eq 'SCALAR' ) { 111 # escape hatch -- use this to avoid num/str heuristics 112 return length( $$data ) . ':' . $$data; 113 } 114 elsif ( ref $data eq 'ARRAY' ) { 115 return 'l' . join( '', map _bencode( $_ ), @$data ) . 'e'; 116 } 117 elsif ( ref $data eq 'HASH' ) { 118 return 'd' . join( '', map { _bencode( \$_ ), _bencode( $data->{ $_ } ) } sort keys %$data ) . 'e'; 119 } 120 else { 121 croak 'unhandled data type'; 122 } 123} 124 125sub bencode { 126 croak 'need exactly one argument' if @_ != 1; 127 goto &_bencode; 128} 129 130bdecode( 'i1e' ); 131 132__END__ 133 134=head1 NAME 135 136Bencode - BitTorrent serialisation format 137 138=head1 VERSION 139 140This document describes Bencode version 1.0 141 142 143=head1 SYNOPSIS 144 145 use Bencode qw( bencode bdecode ); 146 147 my $bencoded = bencode { 'age' => 25, 'eyes' => 'blue' }; 148 print $bencoded, "\n"; 149 my $decoded = bdecode $bencoded; 150 151 152=head1 DESCRIPTION 153 154This module implements the BitTorrent I<bencode> serialisation format as described in L<http://www.bittorrent.org/protocol.html>. 155 156 157=head1 INTERFACE 158 159=head2 C<bencode( $datastructure )> 160 161Takes a single argument which may be a scalar or a reference to a scalar, array or hash. Arrays and hashes may in turn contain values of these same types. Simple scalars that look like canonically represented integers will be serialised as such. To bypass the heuristic and force serialisation as a string, use a reference to a scalar. 162 163Croaks on unhandled data types. 164 165=head2 C<bdecode( $string [, $do_lenient_decode ] )> 166 167Takes a string and returns the corresponding deserialised data structure. 168 169If you pass a true value for the second option, it will disregard the sort order of dict keys. This violation of the I<becode> format is somewhat common. 170 171Croaks on malformed data. 172 173=head1 DIAGNOSTICS 174 175=over 176 177=item C<trailing garbage at %s> 178 179Your data does not end after the first I<bencode>-serialised item. 180 181You may also get this error if a malformed item follows. 182 183=item C<garbage at %s> 184 185Your data is malformed. 186 187=item C<unexpected end of data at %s> 188 189Your data is truncated. 190 191=item C<unexpected end of string data starting at %s> 192 193Your data includes a string declared to be longer than the available data. 194 195=item C<malformed string length at %s> 196 197Your data contained a string with negative length or a length with leading zeroes. 198 199=item C<malformed integer data at %s> 200 201Your data contained something that was supposed to be an integer but didn't make sense. 202 203=item C<dict key not in sort order at %s> 204 205Your data violates the I<bencode> format constaint that dict keys must appear in lexical sort order. 206 207=item C<duplicate dict key at %s> 208 209Your data violates the I<bencode> format constaint that all dict keys must be unique. 210 211=item C<dict key is not a string at %s> 212 213Your data violates the I<bencode> format constaint that all dict keys be strings. 214 215=item C<dict key is missing value at %s> 216 217Your data contains a dictionary with an odd number of elements. 218 219=item C<unhandled data type> 220 221You are trying to serialise a data structure that consists of data types other than 222 223=over 224 225=item * scalars 226 227=item * references to arrays 228 229=item * references to hashes 230 231=item * references to scalars 232 233=back 234 235The format does not support this. 236 237=back 238 239 240=head1 BUGS AND LIMITATIONS 241 242Strings and numbers are practically indistinguishable in Perl, so C<bencode()> has to resort to a heuristic to decide how to serialise a scalar. This cannot be fixed. 243 244Error reporting is currently suboptimal. Malformed strings or integers will throw a misleading C<trailing garbage> message instead of a more specific diagnostic. 245 246No bugs have been reported. 247 248Please report any bugs or feature requests to C<bug-bencode@rt.cpan.org>, or through the web interface at L<http://rt.cpan.org>. 249 250 251=head1 AUTHOR 252 253Aristotle Pagaltzis L<mailto:pagaltzis@gmx.de> 254 255 256=head1 LICENCE AND COPYRIGHT 257 258Copyright (c) 2006, Aristotle Pagaltzis. All rights reserved. 259 260This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. See L<perlartistic>. 261 262 263=head1 DISCLAIMER OF WARRANTY 264 265BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR, OR CORRECTION. 266 267IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 268