scanprov revision 1.2
1102644Snectar#!/usr/bin/perl -w 2102644Snectar$|=1; 3102644Snectar################################################################################ 4102644Snectar# 5102644Snectar# scanprov -- scan Perl headers for macros, and add known exceptions, and 6102644Snectar# functions we weren't able to otherwise find. Thus the purpose 7102644Snectar# of this file has been expanded beyond what its name says. 8102644Snectar# 9102644Snectar# Besides the normal options, 'mode=clean' is understood as 'write', but 10102644Snectar# first remove any scanprov lines added in previous runs of this. 11102644Snectar# 12102644Snectar# The lines added have a code to signify they are added by us: 13102644Snectar# F means it is a function in embed.fnc that the normal routines didn't find 14102644Snectar# K means it is a macro in config.h, hence is provided, and documented 15102644Snectar# M means it is a provided by D:P macro 16102644Snectar# X means it is a known exceptional item 17102644Snectar# Z means it is an unprovided macro without documentation 18102644Snectar# 19102644Snectar# The regeneration routines do not know the prototypes for the macros scanned 20102644Snectar# for, which is gotten from documentation in the source. (If they were 21102644Snectar# documented, they would be put in parts/apidoc.fnc, and test cases generated 22102644Snectar# for them in mktodo.pl). Therefore these are all undocumented, except for 23102644Snectar# things from config.h which are all documented there, and many of which are 24102644Snectar# just defined or not defined, and hence can't be tested. Thus looking for 25102644Snectar# them here is the most convenient option, which is why it's done here. 26102644Snectar# 27102644Snectar# The scope of this program has also expanded to look in almost all header 28102644Snectar# files for almost all macros that aren't documented nor provided. This 29102644Snectar# allows ppport.h --api-info=/foo/ to return when a given element actually 30102644Snectar# came into existence, which can be a time saver for developers of the perl 31102644Snectar# core. 32102644Snectar# 33102644Snectar# It would be best if people would add documentation to them in the perl 34102644Snectar# source, and then this portion of this function would be minimized. 35102644Snectar# 36102644Snectar# On Linux nm and other uses by D:P, these are the remaining unused capital 37102644Snectar# flags: HJLOQY 38102644Snectar# 39102644Snectar################################################################################ 40102644Snectar# 41102644Snectar# Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz. 42102644Snectar# Version 2.x, Copyright (C) 2001, Paul Marquess. 43102644Snectar# Version 1.x, Copyright (C) 1999, Kenneth Albanowski. 44102644Snectar# 45102644Snectar# This program is free software; you can redistribute it and/or 46102644Snectar# modify it under the same terms as Perl itself. 47102644Snectar# 48102644Snectar################################################################################ 49102644Snectar 50102644Snectaruse strict; 51102644Snectaruse Getopt::Long; 52102644Snectar 53102644Snectarrequire './parts/ppptools.pl'; 54102644Snectarrequire './parts/inc/inctools'; 55102644Snectarrequire './devel/devtools.pl'; 56102644Snectar 57102644Snectarour %opt = ( 58102644Snectar mode => 'check', 59102644Snectar install => '/tmp/perl/install/default', 60102644Snectar blead => 'bleadperl', 61102644Snectar debug => 0, 62102644Snectar 'debug-start' => "", 63102644Snectar); 64102644Snectar 65102644SnectarGetOptions(\%opt, qw( install=s mode=s blead=s debug=i debug-start=s)) or die; 66102644Snectar 67102644Snectarmy $clean = $opt{mode} eq 'clean'; 68102644Snectarmy $write = $clean || $opt{mode} eq 'write'; 69102644Snectarmy $debug = $opt{debug}; 70102644Snectar 71102644Snectar# Get the list of known macros. Functions are calculated separately below 72102644Snectarmy %embed = map { $_->{flags}{m} ? ( $_->{name} => 1 ) : () } 73102644Snectar parse_embed(qw(parts/embed.fnc parts/apidoc.fnc)); 74102644Snectar 75102644Snectar# @provided is set to everthing provided 7690926Snectarmy @provided = map { /^(\w+)/ ? $1 : () } `$^X ppport.h --list-provided`; 7790926Snectar 7890926Snectar# There are a few exceptions that have to be dealt with specially. Add these 7990926Snectar# to the list of things to scan for. 8090926Snectarmy $hard_to_test_ref = known_but_hard_to_test_for(); 8190926Snectarpush @provided, keys %$hard_to_test_ref; 8290926Snectar 8390926Snectarmy $base_dir = 'parts/base'; 8490926Snectarmy $todo_dir = 'parts/todo'; 8590926Snectar 8690926Snectar# The identifying text placed in every entry by this program 8790926Snectarmy $id_text = "added by $0"; 8890926Snectar 8990926Snectarif ($write) { 9090926Snectar 9190926Snectar # Get the list of files 9290926Snectar my @files = all_files_in_dir($base_dir); 9390926Snectar 9490926Snectar # If asked to, first strip out the results of previous incarnations of 9590926Snectar # this script 9690926Snectar if ($clean) { 9790926Snectar print "Cleaning previous $0 runs\n"; 9890926Snectar foreach my $file (@files) { 9990926Snectar open my $fh, "+<", $file or die "$file: $!\n"; 10090926Snectar my @lines = <$fh>; 10190926Snectar my $orig_count = @lines; 10290926Snectar @lines = grep { $_ !~ /$id_text/ } @lines; 10390926Snectar next if @lines == $orig_count; # No need to write if unchanged. 10490926Snectar truncate $fh, 0; 10590926Snectar seek $fh, 0, 0; 10690926Snectar print $fh @lines; 10790926Snectar close $fh or die "$file: $!\n"; 10890926Snectar } 10990926Snectar } 11090926Snectar 11190926Snectar # The file list is returned sorted, and so the min version is in the 0th 11290926Snectar # element 11390926Snectar my $file = $files[0]; 11490926Snectar my $min_perl = $file; 11590926Snectar $min_perl =~ s,.*/,,; # The name is the integer of __MIN_PERL__ 11690926Snectar 11790926Snectar # There are a very few special cases that we may not find in scanning, but 11890926Snectar # exist all the way back. Add them now to avoid throwing later things 11990926Snectar # off. 12090926Snectar print "-- $file --\n"; 12190926Snectar open my $fh, "+<", $file or die "$file: $!\n"; 12290926Snectar my @lines = <$fh>; 12390926Snectar my $count = @lines; 12490926Snectar for (qw(RETVAL CALL THIS)) { # These are also in hard_to_test_for(), 12590926Snectar # so can't be in blead, as they are skipped 12690926Snectar # in testing, so no real need to check that 12790926Snectar # they aren't dups. 12890926Snectar my $line = format_output_line($_, 'X'); 12990926Snectar next if grep { /$line/ } @lines; 13090926Snectar print "Adding $_ to $file\n"; 13190926Snectar push @lines, $line; 13290926Snectar } 13390926Snectar if ($count != @lines) { 13490926Snectar @lines = sort symbol_order @lines; 13590926Snectar truncate $fh, 0; 13690926Snectar seek $fh, 0, 0; 13790926Snectar print $fh @lines; 13890926Snectar } 13990926Snectar close $fh; 14090926Snectar 14190926Snectar # Now we're going to add the hard to test symbols. The hash has been 14290926Snectar # manually populated and commited, with the version number ppport supports 14390926Snectar # them to. 14490926Snectar # 14590926Snectar # This is a hash ref with the keys being all symbols found in all the 14690926Snectar # files in the directory, and the values being the perl versions of each 14790926Snectar # symbol. 14890926Snectar my $todo = parse_todo($todo_dir); 14990926Snectar 15090926Snectar # The keys of $hard_to_test_ref are the symbols, and the values are 15190926Snectar # subhashes, with each 'version' key being its proper perl version. 15290926Snectar # Below, we invert %hard_to_test, so that the keys are the version, and 15390926Snectar # the values are the symbols that go in that version 15490926Snectar my %add_by_version; 15590926Snectar for my $hard (keys %$hard_to_test_ref) { 15690926Snectar 15790926Snectar # But if someone ups the min version we support, we don't want to add 15890926Snectar # something less than that. 15990926Snectar my $version = int_parse_version($hard_to_test_ref->{$hard}); 16090926Snectar $version = $min_perl if $version < $min_perl; 16190926Snectar $version = format_version_line($version); 16290926Snectar 16390926Snectar push @{$add_by_version{$version}}, $hard 16490926Snectar unless grep { $todo->{$_}->{version} eq $hard } keys %$todo; 16590926Snectar } 16690926Snectar 16790926Snectar # Only a few files will have exceptions that apply to them. Rewrite each 16890926Snectar foreach my $version (keys %add_by_version) { 16990926Snectar my $file = "$todo_dir/" . int_parse_version($version); 17090926Snectar print "-- Adding known exceptions to $file --\n"; 17190926Snectar open my $fh, "+<", $file or die "$file: $!\n"; 17290926Snectar my @lines = <$fh>; 17390926Snectar my $count = @lines; 17490926Snectar push @lines, format_version_line($version) . "\n" unless @lines; 17590926Snectar foreach my $symbol (@{$add_by_version{$version}}) { 17690926Snectar my $line = format_output_line($symbol, 'X'); 17790926Snectar unless (grep { /$line/ } @lines) {; 17890926Snectar print "adding $symbol\n"; 17990926Snectar push @lines, $line unless grep { /$line/ } @lines; 18090926Snectar } 18190926Snectar } 18290926Snectar if (@lines != $count) { 18390926Snectar @lines = sort symbol_order @lines; 18490926Snectar truncate $fh, 0; 18590926Snectar seek $fh, 0, 0; 18690926Snectar print $fh @lines; 18790926Snectar } 18890926Snectar close $fh; 18990926Snectar } 19090926Snectar} 19190926Snectar 19290926Snectar# Now that we've added the exceptions to a few files, we can parse 19390926Snectar# and deal with all of them. 19490926Snectarmy $perls_ref = get_and_sort_perls(\%opt); 19590926Snectar 19690926Snectardie "Couldn't find any perls" unless @$perls_ref > 1; 19790926Snectar 19890926Snectarfind_first_mentions($perls_ref, # perls to look in 19990926Snectar \@provided, # List of symbol names to look for 20090926Snectar '*.h', # Look in all hdrs. 20190926Snectar 1, # Strip comments 20290926Snectar 'M' 20390926Snectar ); 20490926Snectar 20590926Snectar# Now look for functions that we didn't test in mktodo.pl, generally because 20690926Snectar# these were hidden behind #ifdef's. 20790926Snectarmy $base_ref = parse_todo($base_dir); 20890926Snectarmy @functions = parse_embed(qw(parts/embed.fnc)); 20990926Snectar 21090926Snectar# We could just gather data for the publicly available ones, but having this 21190926Snectar# information available for everything is useful. 21290926Snectar#@functions = grep { exists $_->{flags}{A} } @functions; 21390926Snectar 21490926Snectar# The ones we don't have info on are the ones in embed.fnc that aren't in the 21590926Snectar# base files. Certain of these will only be in the Perl_foo form. 21690926Snectarmy @missing = map { exists $base_ref->{$_->{name}} 21790926Snectar ? () 21890926Snectar : ((exists $_->{flags}{p} && exists $_->{flags}{o}) 21990926Snectar ? ((exists $base_ref->{$_->{"Perl_$_->{name}"}} 22090926Snectar ? () 22190926Snectar : "Perl_$_->{name}")) 22290926Snectar : $_->{name}) 22390926Snectar } @functions; 22490926Snectar 22590926Snectar# These symbols will be found in the autogen'd files, and they may be 22690926Snectar# commented out in them. 22790926Snectarfind_first_mentions($perls_ref, 22890926Snectar \@missing, 22990926Snectar [ 'embed.h', 'proto.h' ], 23090926Snectar 0, # Don't strip comments 23190926Snectar 'F' 23290926Snectar ); 23390926Snectar 23490926Snectarsub symbol_order # Sort based on first word on line 23590926Snectar{ 23690926Snectar my $stripped_a = $a =~ s/ ^ \s* //rx; 23790926Snectar $stripped_a =~ s/ \s.* //x; 23890926Snectar 23990926Snectar my $stripped_b = $b =~ s/ ^ \s* //rx; 24090926Snectar $stripped_b =~ s/ \s.* //x; 24190926Snectar 24290926Snectar return dictionary_order($stripped_a, $stripped_b); 24390926Snectar} 24490926Snectar 24590926Snectarsub format_output_line 24690926Snectar{ 24790926Snectar my $sym = shift; 24890926Snectar my $code = shift; 24990926Snectar 25090926Snectar return sprintf "%-30s # $code $id_text\n", $sym; 25190926Snectar} 25290926Snectar 25390926Snectarsub find_first_mentions 25490926Snectar{ 25590926Snectar my $perls_ref = shift; # List of perls to look in 25690926Snectar my $look_for_ref = shift; # List of symbol names to look for 25790926Snectar my $hdrs = shift; # Glob of hdrs to look in 25890926Snectar my $strip_comments = shift; 25990926Snectar my $code = shift; # Mark entries as having this type 26090926Snectar 26190926Snectar use feature 'state'; 26290926Snectar state $first_perl = 1; 26390926Snectar 26490926Snectar $hdrs = [ $hdrs ] unless ref $hdrs; 26590926Snectar 26690926Snectar my %remaining; 26790926Snectar $remaining{$_} = $code for @$look_for_ref; 26890926Snectar 26990926Snectar my %v; 27090926Snectar 27190926Snectar # We look in descending order of perl versions. Each time through the 27290926Snectar # loop %remaining is narrowed. 27390926Snectar for my $p (@$perls_ref) { 27490926Snectar print "checking perl $p->{version}...\n"; 27590926Snectar 27690926Snectar # Get the hdr files associated with this version 27790926Snectar my $archlib = `$p->{path} -MConfig -l -e 'print \$Config{archlib}'`; 27890926Snectar chomp $archlib; 27990926Snectar local @ARGV; 28078527Sassar push @ARGV, glob "$archlib/CORE/$_" for @$hdrs; 28178527Sassar 28290926Snectar # %sym's keys are every single thing that looks like an identifier 28390926Snectar # (beginning with a non-digit \w, followed by \w*) that occurs in any 28490926Snectar # header, regardless of where (outside of comments). For macros, it 28578527Sassar # can't end in an underscore, nor be like 'AbCd', which are marks for 28678527Sassar # internal. 28778527Sassar my %sym; 28878527Sassar 28978527Sassar local $/ = undef; 29078527Sassar while (<<>>) { # Read in the whole next file as one string. 29178527Sassar 29278527Sassar # This would override function definitions with macro ones 29378527Sassar next if $code eq 'M' && $ARGV =~ m! / embed\.h $ !x; 29478527Sassar 29578527Sassar my $is_config_h = $ARGV =~ m! / config\.h $ !x; 29678527Sassar 29778527Sassar my $contents = $_; 29878527Sassar 29978527Sassar # Strip initial '/*' in config.h /*#define... lines. This just 30078527Sassar # means the item isn't available on the platform this program is 30178527Sassar # being run on. 30278527Sassar $contents =~ s! ^ /\* \s* (?=\#\s*define\s) !!mx if $is_config_h; 30378527Sassar 30478527Sassar # Strip comments, from perl faq 30578527Sassar if ($strip_comments) { 30678527Sassar $contents =~ s#/\*[^*]*\*+([^/*][^*]*\*+)*/|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|.[^/"'\\]*)#defined $2 ? $2 : ""#gse; 30778527Sassar } 30878527Sassar 30978527Sassar # For macros, we look for #defines 31078527Sassar if ($code eq 'M') { 31178527Sassar my %defines; 31278527Sassar 31378527Sassar while ($contents =~ m/ ^ \s* \# \s* define \s+ 31478527Sassar 31578527Sassar # A symbol not ending in underscore 31678527Sassar ( [A-Za-z][_A-Za-z0-9]*[A-Za-z0-9] ) 31778527Sassar /mxg) 31878527Sassar { 31978527Sassar my $this_define = $1; 32078527Sassar 32178527Sassar # These are internal and not of external interest, so just 32278527Sassar # noise if we were to index them 32378527Sassar next if $this_define =~ / ^ PERL_ARGS_ASSERT /x; 32478527Sassar 32578527Sassar # Names like AbCd are internal 32678527Sassar next if $this_define =~ /[[:upper:]][[:lower:]][[:upper:]][[:lower:]]/; 32778527Sassar 32878527Sassar $defines{$this_define}++; 32978527Sassar } 33078527Sassar $sym{$_}++ for keys %defines; 33178527Sassar 33278527Sassar # For functions, etc we get all the symbols for the latest 33378527Sassar # perl passed in, but for macros, it is just the ones for the 33478527Sassar # known documented ones, and we have to find the rest. This 33578527Sassar # allows us to keep the logic for that in just one place: 33678527Sassar # here. 33778527Sassar if ($first_perl) { 33878527Sassar 33978527Sassar # config.h symbols are documented; the rest aren't, so use 34078527Sassar # different flags so downstream processing knows which are 34178527Sassar # which. 34278527Sassar if ($is_config_h) { 34378527Sassar foreach my $define (keys %defines) { 34478527Sassar $remaining{$define} = 'K'; 34578527Sassar } 34678527Sassar } 34778527Sassar else { 34878527Sassar foreach my $define (keys %defines) { 34978527Sassar # Don't override input 'M' symbols. 35078527Sassar $remaining{$define} = 'Z' 35178527Sassar unless defined $remaining{$define}; 35278527Sassar } 35378527Sassar } 35478527Sassar } 35578527Sassar } 35678527Sassar else { # Look for potential function names; remember comments 35772445Sassar # have been stripped off. 35872445Sassar $sym{$_}++ for /(\b[^\W\d]\w*)/g; 35972445Sassar } 36072445Sassar } 36172445Sassar 36272445Sassar # %remaining is narrowed to include only those identifier-like things 36372445Sassar # that are mentioned in one of the input hdrs in this release. (If it 36472445Sassar # isn't even mentioned, it won't exist in the release.) For those not 36572445Sassar # mentioned, a key is added of the identifier-like thing in %v. It is 36672445Sassar # a subkey of this release's "todo" release, which is the next higher 36772445Sassar # one. If we are at version n, we have already done version n+1 and 36872445Sassar # the provided element was mentioned there, and now it no longer is. 36972445Sassar # We take that to mean that to mean that the element became provided 37072445Sassar # for in n+1. 37172445Sassar foreach my $symbol (keys %remaining) { 37272445Sassar next if defined $sym{$symbol}; # Still exists in this release 37372445Sassar 37472445Sassar # Gone in this release, must have come into existence in the next 37572445Sassar # higher one. 37672445Sassar $v{$p->{todo}}{$symbol} = delete $remaining{$symbol}; 37772445Sassar } 37872445Sassar 37972445Sassar $first_perl = 0; 38072445Sassar } 38172445Sassar 38272445Sassar # After all releases, assume that anything still defined came into 38372445Sassar # existence in that earliest release. 38472445Sassar $v{$perls_ref->[-1]{file}}{$_} = $remaining{$_} for keys %remaining; 38572445Sassar 38672445Sassar # Read in the parts/base files. The hash ref has keys being all symbols 38772445Sassar # found in all the files in base/, which are all we are concerned with 38872445Sassar # became defined in. 38972445Sassar my $base_ref = parse_todo($base_dir); 39072445Sassar 39172445Sassar 39272445Sassar # Now add the results from above. At this point, The keys of %v are the 7 39372445Sassar # digit BCD version numbers, and their subkeys are the symbols provided by 39472445Sassar # D:P that are first mentioned in this version, like this: 39572445Sassar # '5009002' => { 39672445Sassar # 'MY_CXT_CLONE' => 1, 39772445Sassar # 'SV_NOSTEAL' => 1, 39872445Sassar # 'UTF8_MAXBYTES' => 1 39972445Sassar # }, 40072445Sassar 40172445Sassar for my $version (keys %v) { 40272445Sassar 40372445Sassar # Things listed in blead (the most recent file) are special. They are 40472445Sassar # there by default because we haven't found them anywhere, so they 40572445Sassar # don't really exist as far as we can determine, so shouldn't be 40672445Sassar # listed as existing. 40772445Sassar next if $version > $perls_ref->[0]->{file}; 40872445Sassar 40972445Sassar # @new becomes the symbols for $version not already in the file for it 41072445Sassar my @new = sort symbol_order grep { !exists $base_ref->{$_} } 41172445Sassar keys %{$v{$version}}; 41272445Sassar @new or next; # Nothing new, skip writing 41372445Sassar 41472445Sassar my $file = $version; 41572445Sassar $file =~ s/\.//g; 41672445Sassar $file = "$base_dir/$file"; 41772445Sassar -e $file or die "non-existent: $file\n"; 41872445Sassar print "-- $file --\n"; 41972445Sassar if ($write) { 42072445Sassar open my $fh, "+<", $file or die "$file: $!\n"; 42172445Sassar my @lines = <$fh>; 42272445Sassar my $count = @lines; 42372445Sassar for my $new (@new) { 42472445Sassar my $line = format_output_line($new, $v{$version}{$new}); 42572445Sassar next if grep { /$line/ } @lines; 42672445Sassar print "adding $new\n"; 42772445Sassar push @lines, $line; 42872445Sassar } 42972445Sassar if (@lines != $count) { 43072445Sassar @lines = sort symbol_order @lines; 43172445Sassar truncate $fh, 0; 43272445Sassar seek $fh, 0, 0; 43372445Sassar print $fh @lines; 43472445Sassar } 43572445Sassar close $fh; 43672445Sassar } 43772445Sassar } 43872445Sassar} 43972445Sassar