1#!/usr/bin/perl -w
2################################################################################
3#
4#  mktodo.pl -- generate baseline and todo files
5#
6# It makes the todo file for the single passed in perl binary.  If --base is
7# not specified it compiles with ppport.h.
8################################################################################
9#
10#  Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
11#  Version 2.x, Copyright (C) 2001, Paul Marquess.
12#  Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
13#
14#  This program is free software; you can redistribute it and/or
15#  modify it under the same terms as Perl itself.
16#
17################################################################################
18
19use strict;
20use Getopt::Long;
21use Data::Dumper;
22use IO::File;
23use IO::Select;
24use Config;
25use Time::HiRes qw( gettimeofday tv_interval );
26
27require './devel/devtools.pl';
28
29our %opt = (
30  blead     => 0,     # ? Is this perl blead
31  debug     => 0,
32  base      => 0,     # Don't use ppport.h when generating
33  verbose   => 0,
34  check     => 1,
35  final     => "",
36 'todo-dir' => "",
37  todo      => "",    # If no --todo, this is a blead perl
38  shlib     => 'blib/arch/auto/Devel/PPPort/PPPort.so',
39);
40
41GetOptions(\%opt, qw(
42perl=s todo=i blead todo-dir=s version=s shlib=s debug=i base final=s verbose check!
43          )) or die;
44
45identify();
46
47my $todo_file;
48my $todo_version;
49if ($opt{todo}) {
50    $todo_file = "$opt{'todo-dir'}/$opt{todo}";
51    $todo_version = format_version_line($opt{todo});
52}
53
54# Pass this through the Make, to apicheck.pl
55$ENV{'DPPP_ARGUMENTS'} = "--todo-dir=$opt{'todo-dir'} --todo=$todo_version";
56
57my $test_name_re =   qr/ \b DPPP_test_ (?: \d _ )? (\w+) \b /x;
58
59print "\n", ident_str(), "\n\n";
60
61my $fullperl = `which $opt{perl}`;
62chomp $fullperl;
63
64$ENV{SKIP_SLOW_TESTS} = 1;
65
66# Generate the Makefile using the passed in perl
67regen_Makefile();
68
69# List of functions that are never considered undefined.  Add to as necessary
70my %stdsym = map { ($_ => 1) } qw (
71  acos
72  acosl
73  acosq
74  asin
75  asinl
76  asinq
77  atan
78  atan2
79  atan2l
80  atan2q
81  atanl
82  atanq
83  ceil
84  ceill
85  ceilq
86  cos
87  cosh
88  coshl
89  coshq
90  cosl
91  cosq
92  exit
93  exp
94  expl
95  expq
96  floor
97  floorl
98  floorq
99  fmod
100  fmodl
101  fmodq
102  log
103  log10
104  log10l
105  log10q
106  logl
107  logq
108  memcmp
109  memcpy
110  memmove
111  memset
112  pow
113  powl
114  powq
115  siglongjmp
116  sin
117  sinh
118  sinhl
119  sinhq
120  sinl
121  sinq
122  snprintf
123  sprintf
124  sqrt
125  sqrtl
126  sqrtq
127  strcmp
128  strlen
129  strncmp
130  tan
131  tanh
132  tanhl
133  tanhq
134  tanl
135  tanq
136  tolower
137  vsnprintf
138);
139
140# Initialize %sym so that the keys are all the Text symbols for this perl,
141# output from the system's 'nm'
142my %sym;
143for (`$Config{nm} $fullperl`) {
144  chomp;
145  /\s+T\s+(\w+)\s*$/ and $sym{$1}++;
146}
147keys %sym >= 50 or die "less than 50 symbols found in $fullperl\n";
148
149# %todo is initialized to be the symbols in the current todo file, like so:
150# {
151#   'UTF8_SAFE_SKIP' => 'U',
152#   'newSVsv_flags' => 'U',
153#   'newSVsv_nomg' => 'U',
154# }
155#
156# The values are the outputs from nm, plus 'E' from us, for Error
157my %todo = %{load_todo($todo_file, $todo_version)} if $opt{todo};
158
159my @recheck;
160
161# Get an exhaustive list from apicheck.i of symbols, what functions contain
162# them, and how many in each function.
163# symbol        fcn            count
164# ------        ---            -----
165# 'UV' => {
166#             'SvUVX'          => 1,
167#             'toFOLD_uvchr'   => 2,
168#             'sv_uni_display' => 1,
169#             ...
170# }
171my $symmap = get_apicheck_symbol_map();
172
173# In each iteration of the loop we create an apicheck.c.  This will contain a
174# generated wrapper function for each API function and macro.  The wrapper
175# contains one or more calls to its API element.  Then we attempt to compile
176# apicheck.c into apicheck.o.  If it compiles, then every API element exists
177# in this version of perl.  If not, we figure out which ones were undefined,
178# and set things up so that in the next iteration of the loop, the wrappers
179# for those elements are #ifdef'd out.
180for (;;) {
181  my $retry = 1;
182  my $trynm = 1;
183
184  regen_apicheck();
185
186retry:
187  my(@new, @already_in_sym, %seen);
188
189  my $r = run(qw(make));
190  $r->{didnotrun} and die "couldn't run make: $!\n" .
191        join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
192
193  # If there were warnings, we ask the user before continuing when creating
194  # the base files of blead.  This leads to a potential early exit when things
195  # aren't working right.
196  my $is_blead = 0;
197  if ($opt{blead} && $opt{base}) {
198    undef $opt{blead};  # Only warn once.
199    $is_blead = 1;      # But let the code below know
200    if (@{$r->{stderr}}) {
201        print STDERR "Warnings and errors from compiling blead:\n";
202        print STDERR @{$r->{stderr}};
203        ask_or_quit("\nUnexpected warnings when compiling blead can lead to"
204                  . " wrong results.  Please examine the above list.\n"
205                  . "Shall I proceed?");
206    }
207    else {
208        print STDERR "blead compiled without warnings nor errors.\n"
209                   . "Proceeding with everything else\n\n";
210    }
211  }
212
213  # Examine stderr.  For each wrapper function listed in it, we create an
214  # 'E' (for error) entry.   If the function (possibly prefixed by '[Pp]erl')
215  # is in %sym, it is added to @already_in_sym.  Otherwise, @new.
216  for my $l (@{$r->{stderr}}) {
217    if ($l =~ $test_name_re) {
218      if (!$seen{$1}++) {
219        my @s = grep { exists $sym{$_} } $1, "Perl_$1", "perl_$1";
220        if (@s) {
221          push @already_in_sym, [$1, "E (@s)"];
222        }
223        else {
224          push @new, [$1, "E"];
225        }
226      }
227    }
228  }
229  print STDERR __LINE__, ": \@new after make", Dumper \@new if $opt{debug} > 6;
230
231  if ($r->{status} == 0) {
232    my @u;
233    my @usym;
234
235    # Here, apicheck.o was successfully created.  It likely will need
236    # functions from outside it in order to form a complete executable a.out.
237    # In the first iteration, look to see if all needed externs are available.
238    # (We don't actually try to create an a.out)
239    if ($trynm) {
240      @u = eval { find_undefined_symbols($fullperl, $opt{shlib}) };
241      warn "warning: $@" if $@;
242      $trynm = 0;
243    }
244
245    # If it didn't find any undefined symbols, everything should be working.
246    # Run the test suite.
247    unless (@u) {
248      $r = run(qw(make test));
249      $r->{didnotrun} and die "couldn't run make test: $!\n" .
250        join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
251
252      $r->{status} == 0 and last;   # It worked!!
253
254      # Alas, something was wrong.  Add any undefined symbols listed in the
255      # output to our list
256      for my $l (@{$r->{stderr}}) {
257        if ($l =~ /undefined symbol: (\w+)/) {
258          push @u, $1;
259        }
260      }
261    }
262
263    # For each undefined symbol
264    for my $u (@u) {
265
266      # If this is an API symbol, $symmap->{$u} will exist and be a hash of
267      # keys, being all the symbols referred to within it (with their values
268      # btw being the count of occurrences in the element).
269      for my $m (keys %{$symmap->{$u}}) {
270
271        # pthread_[gs]etspecific() are undefined.  khw doesn't know why; these
272        # are Posix functions.  But we have a bunch of things depending on
273        # them, so it doesn't work unless we ignore this apparently spurious
274        # issue.
275        next if $u =~ / ^ pthread_[gs]etspecific $ /x;
276
277        if (!$seen{$m}++) {
278          my $pl = $m;
279          $pl =~ s/^[Pp]erl_//;
280          my @s = grep { exists $sym{$_} } $pl, "Perl_$pl", "perl_$pl";
281
282          # The comment for this entry that goes into the file that gets
283          # written includes any [Pp]erl prefix.
284          push @new, [$m, @s ? "U (@s)" : "U"];
285        }
286      }
287    }
288  }
289  print STDERR __LINE__, ": \@new after getting undefs", Dumper \@new
290                                                            if $opt{debug} > 6;
291
292  # Remove from @new all the current todo symbols
293  @new = grep !$todo{$_->[0]}, @new;
294  print STDERR __LINE__, ": \@new after removing current", Dumper \@new
295                                                            if $opt{debug} > 6;
296
297  # If none remain, start over with those we know about, minus the todo
298  # symbols.  khw doesn't understand why this is necessary
299  unless (@new) {
300    @new = grep !$todo{$_->[0]}, @already_in_sym;
301    print STDERR __LINE__, ": \@new after starting over", Dumper \@new
302                                                            if $opt{debug} > 6;
303  }
304
305  # This retries once if nothing new was found (khw guesses that is just to
306  # be sure, or maybe it's because we ran nm the first time through)
307  unless (@new) {
308    if ($retry > 0) {
309      $retry--;
310      regen_Makefile();
311      goto retry;
312    }
313    print Dumper($r);
314    die "no new TODO symbols found...";
315  }
316
317  # recheck symbols except undefined ones reported by the dynamic linker
318  push @recheck, map { $_->[0] } grep { $_->[1] !~ /^U/ } @new;
319
320  # Display each newly found undefined symbol, and add it to the list of todo
321  # symbols
322  if (@new) {
323    for (@new) {
324        display_sym('new', @$_);
325        $todo{$_->[0]} = $_->[1];
326    }
327
328    if ($is_blead) {
329        ask_or_quit("\nUndefined symbols in blead indicate a bug in blead\n"
330                  . "Shall I proceed?");
331    }
332  }
333
334  print STDERR __LINE__, ": %todo at end of iteration ", Dumper \%todo
335                                                            if $opt{debug} > 6;
336
337  # Write the revised todo, so that apicheck.c when generated in the next
338  # iteration will have these #ifdef'd out
339  write_todo($todo_file, $todo_version, \%todo);
340} # End of loop
341
342# If we are to check our work, do so.  This verifies that each symbol
343# identified above is really a problem in this version.  (khw doesn't know
344# under what circumstances this becomes an issue)
345#
346# We go through each symbol on the @recheck list, and create an apicheck.c
347# with it enabled.
348if ($opt{check}) {
349
350  # Create something like '%3d'
351  my $ifmt = '%' . length(scalar @recheck) . 'd';
352
353  my $t0 = [gettimeofday];
354
355  RECHECK: for my $i (0 .. $#recheck) {
356    my $sym = $recheck[$i];
357
358    # Assume it will work
359    my $cur = delete $todo{$sym};
360
361    # Give a progress report
362    display_sym('chk', $sym, $cur, sprintf(" [$ifmt/$ifmt, ETA %s]",
363               $i + 1, scalar @recheck, eta($t0, $i, scalar @recheck)));
364
365    # Write out the todo file without this symbol, meaning it will be enabled
366    # in the generated apicheck.c file
367    write_todo($todo_file, $todo_version, \%todo);
368
369    # E is not an nm symbol, but was added by us to indicate 'Error'
370    if ($cur eq "E (Perl_$sym)") {
371
372      # We can try a shortcut here.  Create an apicheck.c file for just this
373      # symbol.
374      regen_apicheck($sym);
375
376      my $r = run(qw(make test));
377
378      if (!$r->{didnotrun} && $r->{status} == 0) {
379
380        # Shortcut indicated that this function compiles..
381        display_sym('del', $sym, $cur);
382        next RECHECK;
383      }
384
385      # Here, the api file with just this entry failed to compile.  (khw
386      # doesn't know why we just don't give up on it now, but we don't.)  We
387      # drop down below to generate and compile a full apicheck.c with this
388      # symbol enabled.  (XXX Perhaps we could look at stderr and if it
389      # contained things about parameter mismatch, (which is a common
390      # occurrence), we could skip the steps below.)
391    }
392
393    # Either can't shortcut, or the shortcut indicated that the function
394    # doesn't compile in isolation.  Create, compile and test with this
395    # function/symbol enabled.  (Remember that this should have succeeded
396    # above to get to here when this symbol was disabled, so enabling just
397    # this one will tell us for sure that it works or doesn't work.  (khw
398    # wonders if this is actually a DAG, or perhaps with cycles, so this is
399    # under it all, insufficient.)
400    regen_Makefile();
401
402    my $r = run(qw(make test));
403
404    # This regenerated apicheck.c
405    dump_apicheck() if $opt{debug} > 3;
406
407    $r->{didnotrun} and die "couldn't run make test: $!\n" .
408        join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
409
410    if ($r->{status} == 0) {    # This symbol compiles and tests ok, so retain
411                                # in this version
412      display_sym('del', $sym, $cur);
413    }
414    else { # Revert to this symbol is bad in this version
415      print STDERR __LINE__, ": symbol '$sym' not in this version\n"
416                                                            if $opt{debug} > 6;
417      $todo{$sym} = $cur;
418      write_todo($todo_file, $todo_version, \%todo);
419    }
420  }
421} # End of checking our work
422
423print STDERR __LINE__, ": %todo at end ", Dumper \%todo  if $opt{debug} > 6;
424write_todo($todo_file, $todo_version, \%todo);
425
426# If this is the earliest perl being tested, we can extend down our values to
427# include it.  (Remember, that we create files for the next higher version,
428# but this allows us to create a file for the lowest as well.)  This
429# effectively writes out all the known symbols of this earliest version as if
430# they came into existence during it.
431if ($opt{final}) {
432    my $file = "$opt{'todo-dir'}/$opt{final}";
433    my $version = format_version_line($opt{final});
434
435    regen_Makefile();
436    my $r = run(qw(make));
437    $r->{didnotrun} and die "couldn't run make: $!\n" .
438        join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
439
440    my $symbols = read_sym(file => $opt{shlib}, options => [qw( --defined-only )]);
441    my @stuff = map { $_ =~ $test_name_re } keys %$symbols;
442    %todo = map { $_ => 'T' } @stuff;
443
444    print STDERR __LINE__, ": write at ", Dumper $file, $version, \%todo
445                                                            if $opt{debug} > 5;
446    write_todo($file, $version, \%todo);
447}
448
449# Clean up after ourselves
450$opt{debug} = 0;    # Don't care about failures
451run(qw(make realclean));
452
453exit 0;
454
455sub display_sym
456{
457  my($what, $sym, $reason, $extra) = @_;
458  $extra ||= '';
459  my %col = (
460    'new' => 'bold red',
461    'chk' => 'bold magenta',
462    'del' => 'bold green',
463  );
464  $what = colored("$what symbol", $col{$what});
465
466  printf "[%s] %s %-30s # %s%s\n",
467         $todo_version, $what, $sym, $reason, $extra;
468}
469
470sub regen_Makefile
471{
472  # We make sure to add rules for creating apicheck.c
473  my @mf_arg = ('--with-apicheck', 'OPTIMIZE=-O0 -w');
474
475  # It doesn't include ppport.h if generating the base files.
476  push @mf_arg, qw( DEFINE=-DDPPP_APICHECK_NO_PPPORT_H ) if $opt{base};
477
478  # just to be sure
479  my $debug = $opt{debug};
480  $opt{debug} = 0;    # Don't care about failures
481  run(qw(make realclean));
482  $opt{debug} = $debug;
483
484  my $r = run($fullperl, "Makefile.PL", @mf_arg);
485  unless ($r->{status} == 0) {
486      die "cannot run Makefile.PL: $!\n" .
487          join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
488  }
489}
490
491sub regen_apicheck      # Regeneration can also occur by calling 'make'
492{
493  unlink qw(apicheck.c apicheck.o);
494  runtool({ out => '/dev/null' }, $fullperl, 'apicheck_c.PL', map { "--api=$_" } @_)
495      or die "cannot regenerate apicheck.c\n";
496  dump_apicheck() if $opt{debug} > 3;
497}
498
499sub dump_apicheck
500{
501    my $apicheck = "apicheck.c";
502    my $f = new IO::File $apicheck or die "cannot open $apicheck: $!\n";
503    my @lines = <$f>;
504    print STDERR __FILE__, ": ", __LINE__, ": $apicheck (",
505                                           scalar @lines,
506                                           " lines) for $fullperl";
507    print STDERR " and '" if @_;
508    print STDERR join "', '", @_;
509    print STDERR "'" if @_;
510    print STDERR ":\n";
511    my $n = 1;
512    print STDERR $n++, " ", $_ for @lines;
513}
514
515sub load_todo   # Return entries from $file; skip if the first line
516                # isn't $expver (expected version)
517{
518  my($file, $expver) = @_;
519
520  if (-e $file) {
521    my $f = new IO::File $file or die "cannot open $file: $!\n";
522    my $ver = <$f>;
523    chomp $ver;
524    if ($ver eq $expver) {
525      my %sym;
526      while (<$f>) {
527        chomp;
528        /^(\w+)\s+#\s+(.*)/ or goto nuke_file;
529        exists $sym{$1} and goto nuke_file;
530        $sym{$1} = $2;
531      }
532      return \%sym;
533    }
534
535nuke_file:
536    undef $f;
537    unlink $file or die "cannot remove $file: $!\n";
538  }
539
540  return {};
541}
542
543sub write_todo  # Write out the todo file.  The keys of %sym are known to not
544                # be in this version, hence are 'todo'
545{
546  my($file, $ver, $sym) = @_;
547  my $f;
548
549  $f = new IO::File ">$file" or die "cannot open $file: $!\n";
550  $f->print("$ver\n");
551
552  # Dictionary ordering, with only alphanumerics
553  for (sort dictionary_order keys %$sym) {
554    $f->print(sprintf "%-30s # %s\n", $_, $sym->{$_});
555  }
556
557  $f->close;
558}
559
560sub find_undefined_symbols
561{
562  # returns a list of undefined symbols in $shlib.  To be considered
563  # undefined, it must also not be defined in $perl.  Symbols that begin with
564  # underscore, or contain '@', or are some libc ones are not returned.
565  # Presumably, the list of libc could be expanded if necessary.
566
567  my($perl, $shlib) = @_;
568
569  my $ps = read_sym(file => $perl,  options => [qw( --defined-only   )]);
570  my $ls = read_sym(file => $shlib, options => [qw( --undefined-only )]);
571
572  my @undefined;
573
574  for my $sym (keys %$ls) {
575    next if $sym =~ /\@/ or $sym =~ /^_/ or exists $stdsym{$sym};
576    unless (exists $ps->{$sym}) {
577        print STDERR __LINE__, ": , Couldn't find '$sym' in $perl\n"
578                                                            if $opt{debug} > 4;
579        push @undefined, $sym;
580    }
581  }
582
583  print STDERR __LINE__, ": find_undef returning ", Dumper \@undefined
584                                                            if $opt{debug} > 4;
585  return @undefined;
586}
587
588sub read_sym
589{
590  my %opt = ( options => [], @_ );
591
592  my $r = run($Config{nm}, @{$opt{options}}, $opt{file});
593
594  if ($r->{didnotrun} or $r->{status}) {
595    die "cannot run $Config{nm}" .
596          join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
597  }
598
599  my %sym;
600
601  for (@{$r->{stdout}}) {
602    chomp;
603    my($adr, $fmt, $sym) = /^\s*([[:xdigit:]]+)?\s+([ABCDGINRSTUVW?-])\s+(\S+)\s*$/i
604                           or die "cannot parse $Config{nm} output:\n[$_]\n";
605    $sym{$sym} = { format => $fmt };
606    $sym{$sym}{address} = $adr if defined $adr;
607  }
608
609  return \%sym;
610}
611
612sub get_apicheck_symbol_map
613{
614  my $r;
615
616  while (1) {
617
618    # Create apicheck.i
619    $r = run(qw(make apicheck.i));
620
621    # Quit the loop if it succeeded
622    last unless $r->{didnotrun} or $r->{status};
623
624    # Get the list of macros that had parameter issues.  These are marked as
625    # A, for absolute in nm terms
626    my $absolute_err = 'A';
627    my %sym = map { /error: macro "(\w+)" (?:requires|passed) \d+ argument/
628                    ? ($1 => $absolute_err)
629                    : ()
630                  } @{$r->{stderr}};
631
632    # Display these, and add them to the global %todo.
633    if (keys %sym) {
634      for my $s (sort dictionary_order keys %sym) {
635        if (defined $todo{$s} && $todo{$s} eq $absolute_err) {
636            # Otherwise could loop
637            die "cannot run make apicheck.i ($r->{didnotrun} / $r->{status}):\n".
638                join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
639        }
640        $todo{$s} = $sym{$s};
641        display_sym('new', $s, $sym{$s});
642      }
643
644      # And rewrite the todo file, including these new symbols.
645      write_todo($todo_file, $todo_version, \%todo);
646
647      # Regenerate apicheck.c for the next iteration
648      regen_apicheck();
649    }
650    else {  # It failed for some other reason than parameter issues: give up
651      die "cannot run make apicheck.i ($r->{didnotrun} / $r->{status}):\n".
652          join('', @{$r->{stdout}})."\n---\n".join('', @{$r->{stderr}});
653    }
654  }
655
656  # Here, have an apicheck.i.  Read it in
657  my $fh = IO::File->new('apicheck.i')
658           or die "cannot open apicheck.i: $!";
659
660  local $_;
661  my %symmap;
662  my $cur;
663
664  while (<$fh>) {
665    print STDERR __LINE__, ": apicheck.i ", $_ if $opt{debug} > 5;
666    next if /^#/;
667
668    # We only care about lines within one of our DPPP_test_ functions.  If
669    # we're in one, $cur is set to the name of the current one.
670    if (! defined $cur) {   # Not within such a function; see if this starts
671                            # one
672      $_ =~ $test_name_re and $cur = $1;
673    }
674    else {
675
676      # For anything that looks like a symbol, note it as a key, and as its
677      # value, the name of the function.  Actually the value is another key,
678      # whose value is the count of this symbol's occurrences, so it looks
679      # like:
680      # 'UV' => {
681      #             'SvUVX' => 1,
682      #             'toFOLD_uvchr' => 2,
683      #             'sv_uni_display' => 1,
684      #             ...
685      # }
686      for my $sym (/\b([A-Za-z_]\w+)\b/g) {
687        $symmap{$sym}{$cur}++;
688      }
689
690      # This line marks the end of this function, as constructed by us.
691      undef $cur if /^}$/;
692    }
693  }
694
695  print STDERR __LINE__, ": load_todo returning ", Dumper \%symmap
696                                                            if $opt{debug} > 5;
697  return \%symmap;
698}
699