1package Data::Dump; 2 3use strict; 4use vars qw(@EXPORT @EXPORT_OK $VERSION $DEBUG); 5use subs qq(dump); 6 7require Exporter; 8*import = \&Exporter::import; 9@EXPORT = qw(dd ddx); 10@EXPORT_OK = qw(dump pp dumpf quote); 11 12$VERSION = "1.21"; 13$DEBUG = 0; 14 15use overload (); 16use vars qw(%seen %refcnt @dump @fixup %require $TRY_BASE64 @FILTERS $INDENT); 17 18$TRY_BASE64 = 50 unless defined $TRY_BASE64; 19$INDENT = " " unless defined $INDENT; 20 21sub dump 22{ 23 local %seen; 24 local %refcnt; 25 local %require; 26 local @fixup; 27 28 require Data::Dump::FilterContext if @FILTERS; 29 30 my $name = "a"; 31 my @dump; 32 33 for my $v (@_) { 34 my $val = _dump($v, $name, [], tied($v)); 35 push(@dump, [$name, $val]); 36 } continue { 37 $name++; 38 } 39 40 my $out = ""; 41 if (%require) { 42 for (sort keys %require) { 43 $out .= "require $_;\n"; 44 } 45 } 46 if (%refcnt) { 47 # output all those with refcounts first 48 for (@dump) { 49 my $name = $_->[0]; 50 if ($refcnt{$name}) { 51 $out .= "my \$$name = $_->[1];\n"; 52 undef $_->[1]; 53 } 54 } 55 for (@fixup) { 56 $out .= "$_;\n"; 57 } 58 } 59 60 my $paren = (@dump != 1); 61 $out .= "(" if $paren; 62 $out .= format_list($paren, undef, 63 map {defined($_->[1]) ? $_->[1] : "\$".$_->[0]} 64 @dump 65 ); 66 $out .= ")" if $paren; 67 68 if (%refcnt || %require) { 69 $out .= ";\n"; 70 $out =~ s/^/$INDENT/gm; 71 $out = "do {\n$out}"; 72 } 73 74 #use Data::Dumper; print Dumper(\%refcnt); 75 #use Data::Dumper; print Dumper(\%seen); 76 77 print STDERR "$out\n" unless defined wantarray; 78 $out; 79} 80 81*pp = \&dump; 82 83sub dd { 84 print dump(@_), "\n"; 85} 86 87sub ddx { 88 my(undef, $file, $line) = caller; 89 $file =~ s,.*[\\/],,; 90 my $out = "$file:$line: " . dump(@_) . "\n"; 91 $out =~ s/^/# /gm; 92 print $out; 93} 94 95sub dumpf { 96 require Data::Dump::Filtered; 97 goto &Data::Dump::Filtered::dump_filtered; 98} 99 100sub _dump 101{ 102 my $ref = ref $_[0]; 103 my $rval = $ref ? $_[0] : \$_[0]; 104 shift; 105 106 my($name, $idx, $dont_remember, $pclass, $pidx) = @_; 107 108 my($class, $type, $id); 109 my $strval = overload::StrVal($rval); 110 # Parse $strval without using regexps, in order not to clobber $1, $2,... 111 if ((my $i = index($strval, "=")) >= 0) { 112 $class = substr($strval, 0, $i); 113 $strval = substr($strval, $i+1); 114 } 115 if ((my $i = index($strval, "(0x")) >= 0) { 116 $type = substr($strval, 0, $i); 117 $id = substr($strval, $i + 2, -1); 118 } 119 else { 120 die "Can't parse " . overload::StrVal($rval); 121 } 122 if ($] < 5.008 && $type eq "SCALAR") { 123 $type = "REF" if $ref eq "REF"; 124 } 125 warn "\$$name(@$idx) $class $type $id ($ref)" if $DEBUG; 126 127 my $out; 128 my $comment; 129 my $hide_keys; 130 if (@FILTERS) { 131 my $pself = ""; 132 $pself = fullname("self", [@$idx[$pidx..(@$idx - 1)]]) if $pclass; 133 my $ctx = Data::Dump::FilterContext->new($rval, $class, $type, $ref, $pclass, $pidx, $idx); 134 my @bless; 135 for my $filter (@FILTERS) { 136 if (my $f = $filter->($ctx, $rval)) { 137 if (my $v = $f->{object}) { 138 local @FILTERS; 139 $out = _dump($v, $name, $idx, 1); 140 $dont_remember++; 141 } 142 if (defined(my $c = $f->{bless})) { 143 push(@bless, $c); 144 } 145 if (my $c = $f->{comment}) { 146 $comment = $c; 147 } 148 if (defined(my $c = $f->{dump})) { 149 $out = $c; 150 $dont_remember++; 151 } 152 if (my $h = $f->{hide_keys}) { 153 if (ref($h) eq "ARRAY") { 154 $hide_keys = sub { 155 for my $k (@$h) { 156 return 1 if $k eq $_[0]; 157 } 158 return 0; 159 }; 160 } 161 } 162 } 163 } 164 push(@bless, "") if defined($out) && !@bless; 165 if (@bless) { 166 $class = shift(@bless); 167 warn "More than one filter callback tried to bless object" if @bless; 168 } 169 } 170 171 unless ($dont_remember) { 172 if (my $s = $seen{$id}) { 173 my($sname, $sidx) = @$s; 174 $refcnt{$sname}++; 175 my $sref = fullname($sname, $sidx, 176 ($ref && $type eq "SCALAR")); 177 warn "SEEN: [\$$name(@$idx)] => [\$$sname(@$sidx)] ($ref,$sref)" if $DEBUG; 178 return $sref unless $sname eq $name; 179 $refcnt{$name}++; 180 push(@fixup, fullname($name,$idx)." = $sref"); 181 return "do{my \$fix}" if @$idx && $idx->[-1] eq '$'; 182 return "'fix'"; 183 } 184 $seen{$id} = [$name, $idx]; 185 } 186 187 if ($class) { 188 $pclass = $class; 189 $pidx = @$idx; 190 } 191 192 if (defined $out) { 193 # keep it 194 } 195 elsif ($type eq "SCALAR" || $type eq "REF" || $type eq "REGEXP") { 196 if ($ref) { 197 if ($class && $class eq "Regexp") { 198 my $v = "$rval"; 199 200 my $mod = ""; 201 if ($v =~ /^\(\?\^?([msix-]*):([\x00-\xFF]*)\)\z/) { 202 $mod = $1; 203 $v = $2; 204 $mod =~ s/-.*//; 205 } 206 207 my $sep = '/'; 208 my $sep_count = ($v =~ tr/\///); 209 if ($sep_count) { 210 # see if we can find a better one 211 for ('|', ',', ':', '#') { 212 my $c = eval "\$v =~ tr/\Q$_\E//"; 213 #print "SEP $_ $c $sep_count\n"; 214 if ($c < $sep_count) { 215 $sep = $_; 216 $sep_count = $c; 217 last if $sep_count == 0; 218 } 219 } 220 } 221 $v =~ s/\Q$sep\E/\\$sep/g; 222 223 $out = "qr$sep$v$sep$mod"; 224 undef($class); 225 } 226 else { 227 delete $seen{$id} if $type eq "SCALAR"; # will be seen again shortly 228 my $val = _dump($$rval, $name, [@$idx, "\$"], 0, $pclass, $pidx); 229 $out = $class ? "do{\\(my \$o = $val)}" : "\\$val"; 230 } 231 } else { 232 if (!defined $$rval) { 233 $out = "undef"; 234 } 235 elsif (do {no warnings 'numeric'; $$rval + 0 eq $$rval}) { 236 $out = $$rval; 237 } 238 else { 239 $out = str($$rval); 240 } 241 if ($class && !@$idx) { 242 # Top is an object, not a reference to one as perl needs 243 $refcnt{$name}++; 244 my $obj = fullname($name, $idx); 245 my $cl = quote($class); 246 push(@fixup, "bless \\$obj, $cl"); 247 } 248 } 249 } 250 elsif ($type eq "GLOB") { 251 if ($ref) { 252 delete $seen{$id}; 253 my $val = _dump($$rval, $name, [@$idx, "*"], 0, $pclass, $pidx); 254 $out = "\\$val"; 255 if ($out =~ /^\\\*Symbol::/) { 256 $require{Symbol}++; 257 $out = "Symbol::gensym()"; 258 } 259 } else { 260 my $val = "$$rval"; 261 $out = "$$rval"; 262 263 for my $k (qw(SCALAR ARRAY HASH)) { 264 my $gval = *$$rval{$k}; 265 next unless defined $gval; 266 next if $k eq "SCALAR" && ! defined $$gval; # always there 267 my $f = scalar @fixup; 268 push(@fixup, "RESERVED"); # overwritten after _dump() below 269 $gval = _dump($gval, $name, [@$idx, "*{$k}"], 0, $pclass, $pidx); 270 $refcnt{$name}++; 271 my $gname = fullname($name, $idx); 272 $fixup[$f] = "$gname = $gval"; #XXX indent $gval 273 } 274 } 275 } 276 elsif ($type eq "ARRAY") { 277 my @vals; 278 my $tied = tied_str(tied(@$rval)); 279 my $i = 0; 280 for my $v (@$rval) { 281 push(@vals, _dump($v, $name, [@$idx, "[$i]"], $tied, $pclass, $pidx)); 282 $i++; 283 } 284 $out = "[" . format_list(1, $tied, @vals) . "]"; 285 } 286 elsif ($type eq "HASH") { 287 my(@keys, @vals); 288 my $tied = tied_str(tied(%$rval)); 289 290 # statistics to determine variation in key lengths 291 my $kstat_max = 0; 292 my $kstat_sum = 0; 293 my $kstat_sum2 = 0; 294 295 my @orig_keys = keys %$rval; 296 if ($hide_keys) { 297 @orig_keys = grep !$hide_keys->($_), @orig_keys; 298 } 299 my $text_keys = 0; 300 for (@orig_keys) { 301 $text_keys++, last unless /^[-+]?(?:0|[1-9]\d*)(?:\.\d+)?\z/; 302 } 303 304 if ($text_keys) { 305 @orig_keys = sort { lc($a) cmp lc($b) } @orig_keys; 306 } 307 else { 308 @orig_keys = sort { $a <=> $b } @orig_keys; 309 } 310 311 my $quote; 312 for my $key (@orig_keys) { 313 next if $key =~ /^-?[a-zA-Z_]\w*\z/; 314 next if $key =~ /^-?[1-9]\d{0,8}\z/; 315 $quote++; 316 last; 317 } 318 319 for my $key (@orig_keys) { 320 my $val = \$rval->{$key}; # capture value before we modify $key 321 $key = quote($key) if $quote; 322 $kstat_max = length($key) if length($key) > $kstat_max; 323 $kstat_sum += length($key); 324 $kstat_sum2 += length($key)*length($key); 325 326 push(@keys, $key); 327 push(@vals, _dump($$val, $name, [@$idx, "{$key}"], $tied, $pclass, $pidx)); 328 } 329 my $nl = ""; 330 my $klen_pad = 0; 331 my $tmp = "@keys @vals"; 332 if (length($tmp) > 60 || $tmp =~ /\n/ || $tied) { 333 $nl = "\n"; 334 335 # Determine what padding to add 336 if ($kstat_max < 4) { 337 $klen_pad = $kstat_max; 338 } 339 elsif (@keys >= 2) { 340 my $n = @keys; 341 my $avg = $kstat_sum/$n; 342 my $stddev = sqrt(($kstat_sum2 - $n * $avg * $avg) / ($n - 1)); 343 344 # I am not actually very happy with this heuristics 345 if ($stddev / $kstat_max < 0.25) { 346 $klen_pad = $kstat_max; 347 } 348 if ($DEBUG) { 349 push(@keys, "__S"); 350 push(@vals, sprintf("%.2f (%d/%.1f/%.1f)", 351 $stddev / $kstat_max, 352 $kstat_max, $avg, $stddev)); 353 } 354 } 355 } 356 $out = "{$nl"; 357 $out .= "$INDENT# $tied$nl" if $tied; 358 while (@keys) { 359 my $key = shift @keys; 360 my $val = shift @vals; 361 my $vpad = $INDENT . (" " x ($klen_pad ? $klen_pad + 4 : 0)); 362 $val =~ s/\n/\n$vpad/gm; 363 my $kpad = $nl ? $INDENT : " "; 364 $key .= " " x ($klen_pad - length($key)) if $nl; 365 $out .= "$kpad$key => $val,$nl"; 366 } 367 $out =~ s/,$/ / unless $nl; 368 $out .= "}"; 369 } 370 elsif ($type eq "CODE") { 371 $out = 'sub { ... }'; 372 } 373 elsif ($type eq "VSTRING") { 374 $out = sprintf +($ref ? '\v%vd' : 'v%vd'), $$rval; 375 } 376 else { 377 warn "Can't handle $type data"; 378 $out = "'#$type#'"; 379 } 380 381 if ($class && $ref) { 382 $out = "bless($out, " . quote($class) . ")"; 383 } 384 if ($comment) { 385 $comment =~ s/^/# /gm; 386 $comment .= "\n" unless $comment =~ /\n\z/; 387 $comment =~ s/^#[ \t]+\n/\n/; 388 $out = "$comment$out"; 389 } 390 return $out; 391} 392 393sub tied_str { 394 my $tied = shift; 395 if ($tied) { 396 if (my $tied_ref = ref($tied)) { 397 $tied = "tied $tied_ref"; 398 } 399 else { 400 $tied = "tied"; 401 } 402 } 403 return $tied; 404} 405 406sub fullname 407{ 408 my($name, $idx, $ref) = @_; 409 substr($name, 0, 0) = "\$"; 410 411 my @i = @$idx; # need copy in order to not modify @$idx 412 if ($ref && @i && $i[0] eq "\$") { 413 shift(@i); # remove one deref 414 $ref = 0; 415 } 416 while (@i && $i[0] eq "\$") { 417 shift @i; 418 $name = "\$$name"; 419 } 420 421 my $last_was_index; 422 for my $i (@i) { 423 if ($i eq "*" || $i eq "\$") { 424 $last_was_index = 0; 425 $name = "$i\{$name}"; 426 } elsif ($i =~ s/^\*//) { 427 $name .= $i; 428 $last_was_index++; 429 } else { 430 $name .= "->" unless $last_was_index++; 431 $name .= $i; 432 } 433 } 434 $name = "\\$name" if $ref; 435 $name; 436} 437 438sub format_list 439{ 440 my $paren = shift; 441 my $comment = shift; 442 my $indent_lim = $paren ? 0 : 1; 443 if (@_ > 3) { 444 # can we use range operator to shorten the list? 445 my $i = 0; 446 while ($i < @_) { 447 my $j = $i + 1; 448 my $v = $_[$i]; 449 while ($j < @_) { 450 # XXX allow string increment too? 451 if ($v eq "0" || $v =~ /^-?[1-9]\d{0,9}\z/) { 452 $v++; 453 } 454 elsif ($v =~ /^"([A-Za-z]{1,3}\d*)"\z/) { 455 $v = $1; 456 $v++; 457 $v = qq("$v"); 458 } 459 else { 460 last; 461 } 462 last if $_[$j] ne $v; 463 $j++; 464 } 465 if ($j - $i > 3) { 466 splice(@_, $i, $j - $i, "$_[$i] .. $_[$j-1]"); 467 } 468 $i++; 469 } 470 } 471 my $tmp = "@_"; 472 if ($comment || (@_ > $indent_lim && (length($tmp) > 60 || $tmp =~ /\n/))) { 473 my @elem = @_; 474 for (@elem) { s/^/$INDENT/gm; } 475 return "\n" . ($comment ? "$INDENT# $comment\n" : "") . 476 join(",\n", @elem, ""); 477 } else { 478 return join(", ", @_); 479 } 480} 481 482sub str { 483 if (length($_[0]) > 20) { 484 for ($_[0]) { 485 # Check for repeated string 486 if (/^(.)\1\1\1/s) { 487 # seems to be a repating sequence, let's check if it really is 488 # without backtracking 489 unless (/[^\Q$1\E]/) { 490 my $base = quote($1); 491 my $repeat = length; 492 return "($base x $repeat)" 493 } 494 } 495 # Length protection because the RE engine will blow the stack [RT#33520] 496 if (length($_) < 16 * 1024 && /^(.{2,5}?)\1*\z/s) { 497 my $base = quote($1); 498 my $repeat = length($_)/length($1); 499 return "($base x $repeat)"; 500 } 501 } 502 } 503 504 local $_ = "e; 505 506 if (length($_) > 40 && !/\\x\{/ && length($_) > (length($_[0]) * 2)) { 507 # too much binary data, better to represent as a hex/base64 string 508 509 # Base64 is more compact than hex when string is longer than 510 # 17 bytes (not counting any require statement needed). 511 # But on the other hand, hex is much more readable. 512 if ($TRY_BASE64 && length($_[0]) > $TRY_BASE64 && 513 (defined &utf8::is_utf8 && !utf8::is_utf8($_[0])) && 514 eval { require MIME::Base64 }) 515 { 516 $require{"MIME::Base64"}++; 517 return "MIME::Base64::decode(\"" . 518 MIME::Base64::encode($_[0],"") . 519 "\")"; 520 } 521 return "pack(\"H*\",\"" . unpack("H*", $_[0]) . "\")"; 522 } 523 524 return $_; 525} 526 527my %esc = ( 528 "\a" => "\\a", 529 "\b" => "\\b", 530 "\t" => "\\t", 531 "\n" => "\\n", 532 "\f" => "\\f", 533 "\r" => "\\r", 534 "\e" => "\\e", 535); 536 537# put a string value in double quotes 538sub quote { 539 local($_) = $_[0]; 540 # If there are many '"' we might want to use qq() instead 541 s/([\\\"\@\$])/\\$1/g; 542 return qq("$_") unless /[^\040-\176]/; # fast exit 543 544 s/([\a\b\t\n\f\r\e])/$esc{$1}/g; 545 546 # no need for 3 digits in escape for these 547 s/([\0-\037])(?!\d)/sprintf('\\%o',ord($1))/eg; 548 549 s/([\0-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg; 550 s/([^\040-\176])/sprintf('\\x{%X}',ord($1))/eg; 551 552 return qq("$_"); 553} 554 5551; 556 557__END__ 558 559=head1 NAME 560 561Data::Dump - Pretty printing of data structures 562 563=head1 SYNOPSIS 564 565 use Data::Dump qw(dump); 566 567 $str = dump(@list); 568 @copy_of_list = eval $str; 569 570 # or use it for easy debug printout 571 use Data::Dump; dd localtime; 572 573=head1 DESCRIPTION 574 575This module provide a few functions that traverse their 576argument and produces a string as its result. The string contains 577Perl code that, when C<eval>ed, produces a deep copy of the original 578arguments. 579 580The main feature of the module is that it strives to produce output 581that is easy to read. Example: 582 583 @a = (1, [2, 3], {4 => 5}); 584 dump(@a); 585 586Produces: 587 588 "(1, [2, 3], { 4 => 5 })" 589 590If you dump just a little data, it is output on a single line. If 591you dump data that is more complex or there is a lot of it, line breaks 592are automatically added to keep it easy to read. 593 594The following functions are provided (only the dd* functions are exported by default): 595 596=over 597 598=item dump( ... ) 599 600=item pp( ... ) 601 602Returns a string containing a Perl expression. If you pass this 603string to Perl's built-in eval() function it should return a copy of 604the arguments you passed to dump(). 605 606If you call the function with multiple arguments then the output will 607be wrapped in parenthesis "( ..., ... )". If you call the function with a 608single argument the output will not have the wrapping. If you call the function with 609a single scalar (non-reference) argument it will just return the 610scalar quoted if needed, but never break it into multiple lines. If you 611pass multiple arguments or references to arrays of hashes then the 612return value might contain line breaks to format it for easier 613reading. The returned string will never be "\n" terminated, even if 614contains multiple lines. This allows code like this to place the 615semicolon in the expected place: 616 617 print '$obj = ', dump($obj), ";\n"; 618 619If dump() is called in void context, then the dump is printed on 620STDERR and then "\n" terminated. You might find this useful for quick 621debug printouts, but the dd*() functions might be better alternatives 622for this. 623 624There is no difference between dump() and pp(), except that dump() 625shares its name with a not-so-useful perl builtin. Because of this 626some might want to avoid using that name. 627 628=item quote( $string ) 629 630Returns a quoted version of the provided string. 631 632It differs from C<dump($string)> in that it will quote even numbers and 633not try to come up with clever expressions that might shorten the 634output. If a non-scalar argument is provided then it's just stringified 635instead of traversed. 636 637=item dd( ... ) 638 639=item ddx( ... ) 640 641These functions will call dump() on their argument and print the 642result to STDOUT (actually, it's the currently selected output handle, but 643STDOUT is the default for that). 644 645The difference between them is only that ddx() will prefix the lines 646it prints with "# " and mark the first line with the file and line 647number where it was called. This is meant to be useful for debug 648printouts of state within programs. 649 650=item dumpf( ..., \&filter ) 651 652Short hand for calling the dump_filtered() function of L<Data::Dump::Filtered>. 653This works like dump(), but the last argument should be a filter callback 654function. As objects are visited the filter callback is invoked and it 655can modify how the objects are dumped. 656 657=back 658 659=head1 CONFIGURATION 660 661There are a few global variables that can be set to modify the output 662generated by the dump functions. It's wise to localize the setting of 663these. 664 665=over 666 667=item $Data::Dump::INDENT 668 669This holds the string that's used for indenting multiline data structures. 670It's default value is " " (two spaces). Set it to "" to suppress indentation. 671Setting it to "| " makes for nice visuals even if the dump output then fails to 672be valid Perl. 673 674=item $Data::Dump::TRY_BASE64 675 676How long must a binary string be before we try to use the base64 encoding 677for the dump output. The default is 50. Set it to 0 to disable base64 dumps. 678 679=back 680 681 682=head1 LIMITATIONS 683 684Code references will be dumped as C<< sub { ... } >>. Thus, C<eval>ing them will 685not reproduce the original routine. The C<...>-operator used will also require 686perl-5.12 or better to be evaled. 687 688If you forget to explicitly import the C<dump> function, your code will 689core dump. That's because you just called the builtin C<dump> function 690by accident, which intentionally dumps core. Because of this you can 691also import the same function as C<pp>, mnemonic for "pretty-print". 692 693=head1 HISTORY 694 695The C<Data::Dump> module grew out of frustration with Sarathy's 696in-most-cases-excellent C<Data::Dumper>. Basic ideas and some code 697are shared with Sarathy's module. 698 699The C<Data::Dump> module provides a much simpler interface than 700C<Data::Dumper>. No OO interface is available and there are fewer 701configuration options to worry about. The other benefit is 702that the dump produced does not try to set any variables. It only 703returns what is needed to produce a copy of the arguments. This means 704that C<dump("foo")> simply returns C<'"foo"'>, and C<dump(1..3)> simply 705returns C<'(1, 2, 3)'>. 706 707=head1 SEE ALSO 708 709L<Data::Dump::Filtered>, L<Data::Dump::Trace>, L<Data::Dumper>, L<JSON>, 710L<Storable> 711 712=head1 AUTHORS 713 714The C<Data::Dump> module is written by Gisle Aas <gisle@aas.no>, based 715on C<Data::Dumper> by Gurusamy Sarathy <gsar@umich.edu>. 716 717 Copyright 1998-2010 Gisle Aas. 718 Copyright 1996-1998 Gurusamy Sarathy. 719 720This library is free software; you can redistribute it and/or 721modify it under the same terms as Perl itself. 722 723=cut 724