1package Bencode; 2BEGIN { 3 $Bencode::VERSION = '1.4'; 4} 5use strict; 6use Carp; 7use Exporter; 8 9# ABSTRACT: BitTorrent serialisation format 10 11use vars qw( $VERSION @ISA @EXPORT_OK $DEBUG $do_lenient_decode $max_depth ); 12 13@ISA = qw( Exporter ); 14@EXPORT_OK = qw( bencode bdecode ); 15 16sub _msg { sprintf "@_", pos() || 0 } 17 18sub _bdecode_string { 19 20 if ( m/ \G ( 0 | [1-9] \d* ) : /xgc ) { 21 my $len = $1; 22 23 croak _msg 'unexpected end of string data starting at %s' 24 if $len > length() - pos(); 25 26 my $str = substr $_, pos(), $len; 27 pos() = pos() + $len; 28 29 warn _msg STRING => "(length $len)", $len < 200 ? "[$str]" : () if $DEBUG; 30 31 return $str; 32 } 33 else { 34 my $pos = pos(); 35 if ( m/ \G -? 0? \d+ : /xgc ) { 36 pos() = $pos; 37 croak _msg 'malformed string length at %s'; 38 } 39 } 40 41 return; 42} 43 44sub _bdecode_chunk { 45 warn _msg 'decoding at %s' if $DEBUG; 46 47 local $max_depth = $max_depth - 1 if defined $max_depth; 48 49 if ( defined( my $str = _bdecode_string() ) ) { 50 return $str; 51 } 52 elsif ( m/ \G i /xgc ) { 53 croak _msg 'unexpected end of data at %s' if m/ \G \z /xgc; 54 55 m/ \G ( 0 | -? [1-9] \d* ) e /xgc 56 or croak _msg 'malformed integer data at %s'; 57 58 warn _msg INTEGER => $1 if $DEBUG; 59 return $1; 60 } 61 elsif ( m/ \G l /xgc ) { 62 warn _msg 'LIST' if $DEBUG; 63 64 croak _msg 'nesting depth exceeded at %s' 65 if defined $max_depth and $max_depth < 0; 66 67 my @list; 68 until ( m/ \G e /xgc ) { 69 warn _msg 'list not terminated at %s, looking for another element' if $DEBUG; 70 push @list, _bdecode_chunk(); 71 } 72 return \@list; 73 } 74 elsif ( m/ \G d /xgc ) { 75 warn _msg 'DICT' if $DEBUG; 76 77 croak _msg 'nesting depth exceeded at %s' 78 if defined $max_depth and $max_depth < 0; 79 80 my $last_key; 81 my %hash; 82 until ( m/ \G e /xgc ) { 83 warn _msg 'dict not terminated at %s, looking for another pair' if $DEBUG; 84 85 croak _msg 'unexpected end of data at %s' 86 if m/ \G \z /xgc; 87 88 my $key = _bdecode_string(); 89 defined $key or croak _msg 'dict key is not a string at %s'; 90 91 croak _msg 'duplicate dict key at %s' 92 if exists $hash{ $key }; 93 94 croak _msg 'dict key not in sort order at %s' 95 if not( $do_lenient_decode ) and defined $last_key and $key lt $last_key; 96 97 croak _msg 'dict key is missing value at %s' 98 if m/ \G e /xgc; 99 100 $last_key = $key; 101 $hash{ $key } = _bdecode_chunk(); 102 } 103 return \%hash; 104 } 105 else { 106 croak _msg m/ \G \z /xgc ? 'unexpected end of data at %s' : 'garbage at %s'; 107 } 108} 109 110sub bdecode { 111 local $_ = shift; 112 local $do_lenient_decode = shift; 113 local $max_depth = shift; 114 my $deserialised_data = _bdecode_chunk(); 115 croak _msg 'trailing garbage at %s' if $_ !~ m/ \G \z /xgc; 116 return $deserialised_data; 117} 118 119sub _bencode { 120 my ( $data ) = @_; 121 if ( not ref $data ) { 122 return sprintf 'i%se', $data if $data =~ m/\A (?: 0 | -? [1-9] \d* ) \z/x; 123 return length( $data ) . ':' . $data; 124 } 125 elsif ( ref $data eq 'SCALAR' ) { 126 # escape hatch -- use this to avoid num/str heuristics 127 return length( $$data ) . ':' . $$data; 128 } 129 elsif ( ref $data eq 'ARRAY' ) { 130 return 'l' . join( '', map _bencode( $_ ), @$data ) . 'e'; 131 } 132 elsif ( ref $data eq 'HASH' ) { 133 return 'd' . join( '', map { _bencode( \$_ ), _bencode( $data->{ $_ } ) } sort keys %$data ) . 'e'; 134 } 135 else { 136 croak 'unhandled data type'; 137 } 138} 139 140sub bencode { 141 croak 'need exactly one argument' if @_ != 1; 142 goto &_bencode; 143} 144 145bdecode( 'i1e' ); 146 147 148 149=pod 150 151=head1 NAME 152 153Bencode - BitTorrent serialisation format 154 155=head1 VERSION 156 157version 1.4 158 159=head1 SYNOPSIS 160 161 use Bencode qw( bencode bdecode ); 162 163 my $bencoded = bencode { 'age' => 25, 'eyes' => 'blue' }; 164 print $bencoded, "\n"; 165 my $decoded = bdecode $bencoded; 166 167=head1 DESCRIPTION 168 169This module implements the BitTorrent I<bencode> serialisation format as described in L<http://www.bittorrent.org/protocol.html>. 170 171=head1 INTERFACE 172 173=head2 C<bencode( $datastructure )> 174 175Takes a single argument which may be a scalar or a reference to a scalar, array or hash. Arrays and hashes may in turn contain values of these same types. Simple scalars that look like canonically represented integers will be serialised as such. To bypass the heuristic and force serialisation as a string, use a reference to a scalar. 176 177Croaks on unhandled data types. 178 179=head2 C<bdecode( $string [, $do_lenient_decode [, $max_depth ] ] )> 180 181Takes a string and returns the corresponding deserialised data structure. 182 183If you pass a true value for the second option, it will disregard the sort order of dict keys. This violation of the I<bencode> format is somewhat common. 184 185If you pass an integer for the third option, it will croak when attempting to parse dictionaries nested deeper than this level, to prevent DoS attacks using maliciously crafted input. 186 187Croaks on malformed data. 188 189=head1 DIAGNOSTICS 190 191=over 192 193=item C<trailing garbage at %s> 194 195Your data does not end after the first I<bencode>-serialised item. 196 197You may also get this error if a malformed item follows. 198 199=item C<garbage at %s> 200 201Your data is malformed. 202 203=item C<unexpected end of data at %s> 204 205Your data is truncated. 206 207=item C<unexpected end of string data starting at %s> 208 209Your data includes a string declared to be longer than the available data. 210 211=item C<malformed string length at %s> 212 213Your data contained a string with negative length or a length with leading zeroes. 214 215=item C<malformed integer data at %s> 216 217Your data contained something that was supposed to be an integer but didn't make sense. 218 219=item C<dict key not in sort order at %s> 220 221Your data violates the I<bencode> format constaint that dict keys must appear in lexical sort order. 222 223=item C<duplicate dict key at %s> 224 225Your data violates the I<bencode> format constaint that all dict keys must be unique. 226 227=item C<dict key is not a string at %s> 228 229Your data violates the I<bencode> format constaint that all dict keys be strings. 230 231=item C<dict key is missing value at %s> 232 233Your data contains a dictionary with an odd number of elements. 234 235=item C<nesting depth exceeded at %s> 236 237Your data contains dicts or lists that are nested deeper than the $max_depth passed to C<bdecode()>. 238 239=item C<unhandled data type> 240 241You are trying to serialise a data structure that consists of data types other than 242 243=over 244 245=item * scalars 246 247=item * references to arrays 248 249=item * references to hashes 250 251=item * references to scalars 252 253=back 254 255The format does not support this. 256 257=back 258 259=head1 BUGS AND LIMITATIONS 260 261Strings and numbers are practically indistinguishable in Perl, so C<bencode()> has to resort to a heuristic to decide how to serialise a scalar. This cannot be fixed. 262 263Please report any bugs or feature requests through the web interface at L<http://github.com/ap/Bencode/issues>. 264 265=head1 AUTHOR 266 267 Aristotle Pagaltzis <pagaltzis@gmx.de> 268 269=head1 COPYRIGHT AND LICENSE 270 271This software is copyright (c) 2010 by Aristotle Pagaltzis. 272 273This is free software; you can redistribute it and/or modify it under 274the same terms as the Perl 5 programming language system itself. 275 276=cut 277 278 279__END__ 280 281