1package Data::Dump;
2
3use strict;
4use vars qw(@EXPORT @EXPORT_OK $VERSION $DEBUG);
5use subs qq(dump);
6
7require Exporter;
8*import = \&Exporter::import;
9@EXPORT = qw(dd ddx);
10@EXPORT_OK = qw(dump pp dumpf quote);
11
12$VERSION = "1.21";
13$DEBUG = 0;
14
15use overload ();
16use vars qw(%seen %refcnt @dump @fixup %require $TRY_BASE64 @FILTERS $INDENT);
17
18$TRY_BASE64 = 50 unless defined $TRY_BASE64;
19$INDENT = "  " unless defined $INDENT;
20
21sub dump
22{
23    local %seen;
24    local %refcnt;
25    local %require;
26    local @fixup;
27
28    require Data::Dump::FilterContext if @FILTERS;
29
30    my $name = "a";
31    my @dump;
32
33    for my $v (@_) {
34	my $val = _dump($v, $name, [], tied($v));
35	push(@dump, [$name, $val]);
36    } continue {
37	$name++;
38    }
39
40    my $out = "";
41    if (%require) {
42	for (sort keys %require) {
43	    $out .= "require $_;\n";
44	}
45    }
46    if (%refcnt) {
47	# output all those with refcounts first
48	for (@dump) {
49	    my $name = $_->[0];
50	    if ($refcnt{$name}) {
51		$out .= "my \$$name = $_->[1];\n";
52		undef $_->[1];
53	    }
54	}
55	for (@fixup) {
56	    $out .= "$_;\n";
57	}
58    }
59
60    my $paren = (@dump != 1);
61    $out .= "(" if $paren;
62    $out .= format_list($paren, undef,
63			map {defined($_->[1]) ? $_->[1] : "\$".$_->[0]}
64			    @dump
65		       );
66    $out .= ")" if $paren;
67
68    if (%refcnt || %require) {
69	$out .= ";\n";
70	$out =~ s/^/$INDENT/gm;
71	$out = "do {\n$out}";
72    }
73
74    #use Data::Dumper;   print Dumper(\%refcnt);
75    #use Data::Dumper;   print Dumper(\%seen);
76
77    print STDERR "$out\n" unless defined wantarray;
78    $out;
79}
80
81*pp = \&dump;
82
83sub dd {
84    print dump(@_), "\n";
85}
86
87sub ddx {
88    my(undef, $file, $line) = caller;
89    $file =~ s,.*[\\/],,;
90    my $out = "$file:$line: " . dump(@_) . "\n";
91    $out =~ s/^/# /gm;
92    print $out;
93}
94
95sub dumpf {
96    require Data::Dump::Filtered;
97    goto &Data::Dump::Filtered::dump_filtered;
98}
99
100sub _dump
101{
102    my $ref  = ref $_[0];
103    my $rval = $ref ? $_[0] : \$_[0];
104    shift;
105
106    my($name, $idx, $dont_remember, $pclass, $pidx) = @_;
107
108    my($class, $type, $id);
109    my $strval = overload::StrVal($rval);
110    # Parse $strval without using regexps, in order not to clobber $1, $2,...
111    if ((my $i = index($strval, "=")) >= 0) {
112	$class = substr($strval, 0, $i);
113	$strval = substr($strval, $i+1);
114    }
115    if ((my $i = index($strval, "(0x")) >= 0) {
116	$type = substr($strval, 0, $i);
117	$id = substr($strval, $i + 2, -1);
118    }
119    else {
120	die "Can't parse " . overload::StrVal($rval);
121    }
122    if ($] < 5.008 && $type eq "SCALAR") {
123	$type = "REF" if $ref eq "REF";
124    }
125    warn "\$$name(@$idx) $class $type $id ($ref)" if $DEBUG;
126
127    my $out;
128    my $comment;
129    my $hide_keys;
130    if (@FILTERS) {
131	my $pself = "";
132	$pself = fullname("self", [@$idx[$pidx..(@$idx - 1)]]) if $pclass;
133	my $ctx = Data::Dump::FilterContext->new($rval, $class, $type, $ref, $pclass, $pidx, $idx);
134	my @bless;
135	for my $filter (@FILTERS) {
136	    if (my $f = $filter->($ctx, $rval)) {
137		if (my $v = $f->{object}) {
138		    local @FILTERS;
139		    $out = _dump($v, $name, $idx, 1);
140		    $dont_remember++;
141		}
142		if (defined(my $c = $f->{bless})) {
143		    push(@bless, $c);
144		}
145		if (my $c = $f->{comment}) {
146		    $comment = $c;
147		}
148		if (defined(my $c = $f->{dump})) {
149		    $out = $c;
150		    $dont_remember++;
151		}
152		if (my $h = $f->{hide_keys}) {
153		    if (ref($h) eq "ARRAY") {
154			$hide_keys = sub {
155			    for my $k (@$h) {
156				return 1 if $k eq $_[0];
157			    }
158			    return 0;
159			};
160		    }
161		}
162	    }
163	}
164	push(@bless, "") if defined($out) && !@bless;
165	if (@bless) {
166	    $class = shift(@bless);
167	    warn "More than one filter callback tried to bless object" if @bless;
168	}
169    }
170
171    unless ($dont_remember) {
172	if (my $s = $seen{$id}) {
173	    my($sname, $sidx) = @$s;
174	    $refcnt{$sname}++;
175	    my $sref = fullname($sname, $sidx,
176				($ref && $type eq "SCALAR"));
177	    warn "SEEN: [\$$name(@$idx)] => [\$$sname(@$sidx)] ($ref,$sref)" if $DEBUG;
178	    return $sref unless $sname eq $name;
179	    $refcnt{$name}++;
180	    push(@fixup, fullname($name,$idx)." = $sref");
181	    return "do{my \$fix}" if @$idx && $idx->[-1] eq '$';
182	    return "'fix'";
183	}
184	$seen{$id} = [$name, $idx];
185    }
186
187    if ($class) {
188	$pclass = $class;
189	$pidx = @$idx;
190    }
191
192    if (defined $out) {
193	# keep it
194    }
195    elsif ($type eq "SCALAR" || $type eq "REF" || $type eq "REGEXP") {
196	if ($ref) {
197	    if ($class && $class eq "Regexp") {
198		my $v = "$rval";
199
200		my $mod = "";
201		if ($v =~ /^\(\?\^?([msix-]*):([\x00-\xFF]*)\)\z/) {
202		    $mod = $1;
203		    $v = $2;
204		    $mod =~ s/-.*//;
205		}
206
207		my $sep = '/';
208		my $sep_count = ($v =~ tr/\///);
209		if ($sep_count) {
210		    # see if we can find a better one
211		    for ('|', ',', ':', '#') {
212			my $c = eval "\$v =~ tr/\Q$_\E//";
213			#print "SEP $_ $c $sep_count\n";
214			if ($c < $sep_count) {
215			    $sep = $_;
216			    $sep_count = $c;
217			    last if $sep_count == 0;
218			}
219		    }
220		}
221		$v =~ s/\Q$sep\E/\\$sep/g;
222
223		$out = "qr$sep$v$sep$mod";
224		undef($class);
225	    }
226	    else {
227		delete $seen{$id} if $type eq "SCALAR";  # will be seen again shortly
228		my $val = _dump($$rval, $name, [@$idx, "\$"], 0, $pclass, $pidx);
229		$out = $class ? "do{\\(my \$o = $val)}" : "\\$val";
230	    }
231	} else {
232	    if (!defined $$rval) {
233		$out = "undef";
234	    }
235	    elsif (do {no warnings 'numeric'; $$rval + 0 eq $$rval}) {
236		$out = $$rval;
237	    }
238	    else {
239		$out = str($$rval);
240	    }
241	    if ($class && !@$idx) {
242		# Top is an object, not a reference to one as perl needs
243		$refcnt{$name}++;
244		my $obj = fullname($name, $idx);
245		my $cl  = quote($class);
246		push(@fixup, "bless \\$obj, $cl");
247	    }
248	}
249    }
250    elsif ($type eq "GLOB") {
251	if ($ref) {
252	    delete $seen{$id};
253	    my $val = _dump($$rval, $name, [@$idx, "*"], 0, $pclass, $pidx);
254	    $out = "\\$val";
255	    if ($out =~ /^\\\*Symbol::/) {
256		$require{Symbol}++;
257		$out = "Symbol::gensym()";
258	    }
259	} else {
260	    my $val = "$$rval";
261	    $out = "$$rval";
262
263	    for my $k (qw(SCALAR ARRAY HASH)) {
264		my $gval = *$$rval{$k};
265		next unless defined $gval;
266		next if $k eq "SCALAR" && ! defined $$gval;  # always there
267		my $f = scalar @fixup;
268		push(@fixup, "RESERVED");  # overwritten after _dump() below
269		$gval = _dump($gval, $name, [@$idx, "*{$k}"], 0, $pclass, $pidx);
270		$refcnt{$name}++;
271		my $gname = fullname($name, $idx);
272		$fixup[$f] = "$gname = $gval";  #XXX indent $gval
273	    }
274	}
275    }
276    elsif ($type eq "ARRAY") {
277	my @vals;
278	my $tied = tied_str(tied(@$rval));
279	my $i = 0;
280	for my $v (@$rval) {
281	    push(@vals, _dump($v, $name, [@$idx, "[$i]"], $tied, $pclass, $pidx));
282	    $i++;
283	}
284	$out = "[" . format_list(1, $tied, @vals) . "]";
285    }
286    elsif ($type eq "HASH") {
287	my(@keys, @vals);
288	my $tied = tied_str(tied(%$rval));
289
290	# statistics to determine variation in key lengths
291	my $kstat_max = 0;
292	my $kstat_sum = 0;
293	my $kstat_sum2 = 0;
294
295	my @orig_keys = keys %$rval;
296	if ($hide_keys) {
297	    @orig_keys = grep !$hide_keys->($_), @orig_keys;
298	}
299	my $text_keys = 0;
300	for (@orig_keys) {
301	    $text_keys++, last unless /^[-+]?(?:0|[1-9]\d*)(?:\.\d+)?\z/;
302	}
303
304	if ($text_keys) {
305	    @orig_keys = sort { lc($a) cmp lc($b) } @orig_keys;
306	}
307	else {
308	    @orig_keys = sort { $a <=> $b } @orig_keys;
309	}
310
311	my $quote;
312	for my $key (@orig_keys) {
313	    next if $key =~ /^-?[a-zA-Z_]\w*\z/;
314	    next if $key =~ /^-?[1-9]\d{0,8}\z/;
315	    $quote++;
316	    last;
317	}
318
319	for my $key (@orig_keys) {
320	    my $val = \$rval->{$key};  # capture value before we modify $key
321	    $key = quote($key) if $quote;
322	    $kstat_max = length($key) if length($key) > $kstat_max;
323	    $kstat_sum += length($key);
324	    $kstat_sum2 += length($key)*length($key);
325
326	    push(@keys, $key);
327	    push(@vals, _dump($$val, $name, [@$idx, "{$key}"], $tied, $pclass, $pidx));
328	}
329	my $nl = "";
330	my $klen_pad = 0;
331	my $tmp = "@keys @vals";
332	if (length($tmp) > 60 || $tmp =~ /\n/ || $tied) {
333	    $nl = "\n";
334
335	    # Determine what padding to add
336	    if ($kstat_max < 4) {
337		$klen_pad = $kstat_max;
338	    }
339	    elsif (@keys >= 2) {
340		my $n = @keys;
341		my $avg = $kstat_sum/$n;
342		my $stddev = sqrt(($kstat_sum2 - $n * $avg * $avg) / ($n - 1));
343
344		# I am not actually very happy with this heuristics
345		if ($stddev / $kstat_max < 0.25) {
346		    $klen_pad = $kstat_max;
347		}
348		if ($DEBUG) {
349		    push(@keys, "__S");
350		    push(@vals, sprintf("%.2f (%d/%.1f/%.1f)",
351					$stddev / $kstat_max,
352					$kstat_max, $avg, $stddev));
353		}
354	    }
355	}
356	$out = "{$nl";
357	$out .= "$INDENT# $tied$nl" if $tied;
358	while (@keys) {
359	    my $key = shift @keys;
360	    my $val = shift @vals;
361	    my $vpad = $INDENT . (" " x ($klen_pad ? $klen_pad + 4 : 0));
362	    $val =~ s/\n/\n$vpad/gm;
363	    my $kpad = $nl ? $INDENT : " ";
364	    $key .= " " x ($klen_pad - length($key)) if $nl;
365	    $out .= "$kpad$key => $val,$nl";
366	}
367	$out =~ s/,$/ / unless $nl;
368	$out .= "}";
369    }
370    elsif ($type eq "CODE") {
371	$out = 'sub { ... }';
372    }
373    elsif ($type eq "VSTRING") {
374        $out = sprintf +($ref ? '\v%vd' : 'v%vd'), $$rval;
375    }
376    else {
377	warn "Can't handle $type data";
378	$out = "'#$type#'";
379    }
380
381    if ($class && $ref) {
382	$out = "bless($out, " . quote($class) . ")";
383    }
384    if ($comment) {
385	$comment =~ s/^/# /gm;
386	$comment .= "\n" unless $comment =~ /\n\z/;
387	$comment =~ s/^#[ \t]+\n/\n/;
388	$out = "$comment$out";
389    }
390    return $out;
391}
392
393sub tied_str {
394    my $tied = shift;
395    if ($tied) {
396	if (my $tied_ref = ref($tied)) {
397	    $tied = "tied $tied_ref";
398	}
399	else {
400	    $tied = "tied";
401	}
402    }
403    return $tied;
404}
405
406sub fullname
407{
408    my($name, $idx, $ref) = @_;
409    substr($name, 0, 0) = "\$";
410
411    my @i = @$idx;  # need copy in order to not modify @$idx
412    if ($ref && @i && $i[0] eq "\$") {
413	shift(@i);  # remove one deref
414	$ref = 0;
415    }
416    while (@i && $i[0] eq "\$") {
417	shift @i;
418	$name = "\$$name";
419    }
420
421    my $last_was_index;
422    for my $i (@i) {
423	if ($i eq "*" || $i eq "\$") {
424	    $last_was_index = 0;
425	    $name = "$i\{$name}";
426	} elsif ($i =~ s/^\*//) {
427	    $name .= $i;
428	    $last_was_index++;
429	} else {
430	    $name .= "->" unless $last_was_index++;
431	    $name .= $i;
432	}
433    }
434    $name = "\\$name" if $ref;
435    $name;
436}
437
438sub format_list
439{
440    my $paren = shift;
441    my $comment = shift;
442    my $indent_lim = $paren ? 0 : 1;
443    if (@_ > 3) {
444	# can we use range operator to shorten the list?
445	my $i = 0;
446	while ($i < @_) {
447	    my $j = $i + 1;
448	    my $v = $_[$i];
449	    while ($j < @_) {
450		# XXX allow string increment too?
451		if ($v eq "0" || $v =~ /^-?[1-9]\d{0,9}\z/) {
452		    $v++;
453		}
454		elsif ($v =~ /^"([A-Za-z]{1,3}\d*)"\z/) {
455		    $v = $1;
456		    $v++;
457		    $v = qq("$v");
458		}
459		else {
460		    last;
461		}
462		last if $_[$j] ne $v;
463		$j++;
464	    }
465	    if ($j - $i > 3) {
466		splice(@_, $i, $j - $i, "$_[$i] .. $_[$j-1]");
467	    }
468	    $i++;
469	}
470    }
471    my $tmp = "@_";
472    if ($comment || (@_ > $indent_lim && (length($tmp) > 60 || $tmp =~ /\n/))) {
473	my @elem = @_;
474	for (@elem) { s/^/$INDENT/gm; }
475	return "\n" . ($comment ? "$INDENT# $comment\n" : "") .
476               join(",\n", @elem, "");
477    } else {
478	return join(", ", @_);
479    }
480}
481
482sub str {
483  if (length($_[0]) > 20) {
484      for ($_[0]) {
485      # Check for repeated string
486      if (/^(.)\1\1\1/s) {
487          # seems to be a repating sequence, let's check if it really is
488          # without backtracking
489          unless (/[^\Q$1\E]/) {
490              my $base = quote($1);
491              my $repeat = length;
492              return "($base x $repeat)"
493          }
494      }
495      # Length protection because the RE engine will blow the stack [RT#33520]
496      if (length($_) < 16 * 1024 && /^(.{2,5}?)\1*\z/s) {
497	  my $base   = quote($1);
498	  my $repeat = length($_)/length($1);
499	  return "($base x $repeat)";
500      }
501      }
502  }
503
504  local $_ = &quote;
505
506  if (length($_) > 40  && !/\\x\{/ && length($_) > (length($_[0]) * 2)) {
507      # too much binary data, better to represent as a hex/base64 string
508
509      # Base64 is more compact than hex when string is longer than
510      # 17 bytes (not counting any require statement needed).
511      # But on the other hand, hex is much more readable.
512      if ($TRY_BASE64 && length($_[0]) > $TRY_BASE64 &&
513	  (defined &utf8::is_utf8 && !utf8::is_utf8($_[0])) &&
514	  eval { require MIME::Base64 })
515      {
516	  $require{"MIME::Base64"}++;
517	  return "MIME::Base64::decode(\"" .
518	             MIME::Base64::encode($_[0],"") .
519		 "\")";
520      }
521      return "pack(\"H*\",\"" . unpack("H*", $_[0]) . "\")";
522  }
523
524  return $_;
525}
526
527my %esc = (
528    "\a" => "\\a",
529    "\b" => "\\b",
530    "\t" => "\\t",
531    "\n" => "\\n",
532    "\f" => "\\f",
533    "\r" => "\\r",
534    "\e" => "\\e",
535);
536
537# put a string value in double quotes
538sub quote {
539  local($_) = $_[0];
540  # If there are many '"' we might want to use qq() instead
541  s/([\\\"\@\$])/\\$1/g;
542  return qq("$_") unless /[^\040-\176]/;  # fast exit
543
544  s/([\a\b\t\n\f\r\e])/$esc{$1}/g;
545
546  # no need for 3 digits in escape for these
547  s/([\0-\037])(?!\d)/sprintf('\\%o',ord($1))/eg;
548
549  s/([\0-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg;
550  s/([^\040-\176])/sprintf('\\x{%X}',ord($1))/eg;
551
552  return qq("$_");
553}
554
5551;
556
557__END__
558
559=head1 NAME
560
561Data::Dump - Pretty printing of data structures
562
563=head1 SYNOPSIS
564
565 use Data::Dump qw(dump);
566
567 $str = dump(@list);
568 @copy_of_list = eval $str;
569
570 # or use it for easy debug printout
571 use Data::Dump; dd localtime;
572
573=head1 DESCRIPTION
574
575This module provide a few functions that traverse their
576argument and produces a string as its result.  The string contains
577Perl code that, when C<eval>ed, produces a deep copy of the original
578arguments.
579
580The main feature of the module is that it strives to produce output
581that is easy to read.  Example:
582
583    @a = (1, [2, 3], {4 => 5});
584    dump(@a);
585
586Produces:
587
588    "(1, [2, 3], { 4 => 5 })"
589
590If you dump just a little data, it is output on a single line. If
591you dump data that is more complex or there is a lot of it, line breaks
592are automatically added to keep it easy to read.
593
594The following functions are provided (only the dd* functions are exported by default):
595
596=over
597
598=item dump( ... )
599
600=item pp( ... )
601
602Returns a string containing a Perl expression.  If you pass this
603string to Perl's built-in eval() function it should return a copy of
604the arguments you passed to dump().
605
606If you call the function with multiple arguments then the output will
607be wrapped in parenthesis "( ..., ... )".  If you call the function with a
608single argument the output will not have the wrapping.  If you call the function with
609a single scalar (non-reference) argument it will just return the
610scalar quoted if needed, but never break it into multiple lines.  If you
611pass multiple arguments or references to arrays of hashes then the
612return value might contain line breaks to format it for easier
613reading.  The returned string will never be "\n" terminated, even if
614contains multiple lines.  This allows code like this to place the
615semicolon in the expected place:
616
617   print '$obj = ', dump($obj), ";\n";
618
619If dump() is called in void context, then the dump is printed on
620STDERR and then "\n" terminated.  You might find this useful for quick
621debug printouts, but the dd*() functions might be better alternatives
622for this.
623
624There is no difference between dump() and pp(), except that dump()
625shares its name with a not-so-useful perl builtin.  Because of this
626some might want to avoid using that name.
627
628=item quote( $string )
629
630Returns a quoted version of the provided string.
631
632It differs from C<dump($string)> in that it will quote even numbers and
633not try to come up with clever expressions that might shorten the
634output.  If a non-scalar argument is provided then it's just stringified
635instead of traversed.
636
637=item dd( ... )
638
639=item ddx( ... )
640
641These functions will call dump() on their argument and print the
642result to STDOUT (actually, it's the currently selected output handle, but
643STDOUT is the default for that).
644
645The difference between them is only that ddx() will prefix the lines
646it prints with "# " and mark the first line with the file and line
647number where it was called.  This is meant to be useful for debug
648printouts of state within programs.
649
650=item dumpf( ..., \&filter )
651
652Short hand for calling the dump_filtered() function of L<Data::Dump::Filtered>.
653This works like dump(), but the last argument should be a filter callback
654function.  As objects are visited the filter callback is invoked and it
655can modify how the objects are dumped.
656
657=back
658
659=head1 CONFIGURATION
660
661There are a few global variables that can be set to modify the output
662generated by the dump functions.  It's wise to localize the setting of
663these.
664
665=over
666
667=item $Data::Dump::INDENT
668
669This holds the string that's used for indenting multiline data structures.
670It's default value is "  " (two spaces).  Set it to "" to suppress indentation.
671Setting it to "| " makes for nice visuals even if the dump output then fails to
672be valid Perl.
673
674=item $Data::Dump::TRY_BASE64
675
676How long must a binary string be before we try to use the base64 encoding
677for the dump output.  The default is 50.  Set it to 0 to disable base64 dumps.
678
679=back
680
681
682=head1 LIMITATIONS
683
684Code references will be dumped as C<< sub { ... } >>. Thus, C<eval>ing them will
685not reproduce the original routine.  The C<...>-operator used will also require
686perl-5.12 or better to be evaled.
687
688If you forget to explicitly import the C<dump> function, your code will
689core dump. That's because you just called the builtin C<dump> function
690by accident, which intentionally dumps core.  Because of this you can
691also import the same function as C<pp>, mnemonic for "pretty-print".
692
693=head1 HISTORY
694
695The C<Data::Dump> module grew out of frustration with Sarathy's
696in-most-cases-excellent C<Data::Dumper>.  Basic ideas and some code
697are shared with Sarathy's module.
698
699The C<Data::Dump> module provides a much simpler interface than
700C<Data::Dumper>.  No OO interface is available and there are fewer
701configuration options to worry about.  The other benefit is
702that the dump produced does not try to set any variables.  It only
703returns what is needed to produce a copy of the arguments.  This means
704that C<dump("foo")> simply returns C<'"foo"'>, and C<dump(1..3)> simply
705returns C<'(1, 2, 3)'>.
706
707=head1 SEE ALSO
708
709L<Data::Dump::Filtered>, L<Data::Dump::Trace>, L<Data::Dumper>, L<JSON>,
710L<Storable>
711
712=head1 AUTHORS
713
714The C<Data::Dump> module is written by Gisle Aas <gisle@aas.no>, based
715on C<Data::Dumper> by Gurusamy Sarathy <gsar@umich.edu>.
716
717 Copyright 1998-2010 Gisle Aas.
718 Copyright 1996-1998 Gurusamy Sarathy.
719
720This library is free software; you can redistribute it and/or
721modify it under the same terms as Perl itself.
722
723=cut
724