1#!/usr/bin/perl -w 2 3# Try opening libperl.a with nm, and verifying it has the kind of 4# symbols we expect, and no symbols we should avoid. 5# 6# Fail softly, expect things only on known platforms: 7# - linux, x86 only (ppc linux has odd symbol tables) 8# - darwin (OS X), both x86 and ppc 9# - freebsd 10# and on other platforms, and if things seem odd, just give up (skip_all). 11# 12# Symbol types for LTO builds don't seem to match their final section, so 13# skip on LTO builds too. 14# 15# Debugging tip: nm output (this script's input) can be faked by 16# giving one command line argument for this script: it should be 17# either the filename to read, or "-" for STDIN. You can also append 18# "@style" (where style is a supported nm style, like "gnu" or "darwin") 19# to this filename for "cross-parsing". 20# 21# Some terminology: 22# - "text" symbols are code 23# - "data" symbols are data (duh), with subdivisions: 24# - "bss": (Block-Started-by-Symbol: originally from IBM assembler...), 25# uninitialized data, which often even doesn't exist in the object 26# file as such, only its size does, which is then created on demand 27# by the loader 28# - "const": initialized read-only data, like string literals 29# - "common": uninitialized data unless initialized... 30# (the full story is too long for here, see "man nm") 31# - "data": initialized read-write data 32# (somewhat confusingly below: "data data", but it makes code simpler) 33# - "undefined": external symbol referred to by an object, 34# most likely a text symbol. Can be either a symbol defined by 35# a Perl object file but referred to by other Perl object files, 36# or a completely external symbol from libc, or other system libraries. 37 38BEGIN { 39 chdir 't' if -d 't'; 40 @INC = '../lib'; 41 require "./test.pl"; 42} 43 44use strict; 45 46use Config; 47 48if ($Config{cc} =~ /g\+\+/) { 49 # XXX Could use c++filt, maybe. 50 skip_all "on g++"; 51} 52 53# ccname is gcc for both gcc and clang 54if ($Config{ccname} eq "gcc" && $Config{ccflags} =~ /-flto\b/) { 55 # If we compile with gcc nm marks PL_no_mem as "D" (normal data) rather than a R (read only) 56 # but the symbol still ends up in the .rodata section of the image on linking. 57 # If we compile with clang 14, nm marks PL_no_mem as "T" (text, aka code) rather than R 58 # but the symbol still ends up in the .rodata section on linking. 59 skip_all "LTO libperl.a flags don't match the final linker sections"; 60} 61 62my $libperl_a; 63 64for my $f (qw(../libperl.a libperl.a)) { 65 if (-f $f) { 66 $libperl_a = $f; 67 last; 68 } 69} 70 71unless (defined $libperl_a) { 72 skip_all "no libperl.a"; 73} 74 75print "# \$^O = $^O\n"; 76print "# \$Config{archname} = $Config{archname}\n"; 77print "# \$Config{cc} = $Config{cc}\n"; 78print "# libperl = $libperl_a\n"; 79 80my $nm; 81my $nm_opt = ''; 82my $nm_style; 83my $nm_fh; 84my $nm_err_tmp = "libperl$$"; 85 86END { 87 # this is still executed when we skip_all above, avoid a warning 88 unlink $nm_err_tmp if $nm_err_tmp; 89} 90 91my $fake_input; 92my $fake_style; 93 94if (@ARGV == 1) { 95 $fake_input = shift @ARGV; 96 print "# Faking nm output from $fake_input\n"; 97 if ($fake_input =~ s/\@(.+)$//) { 98 $fake_style = $1; 99 print "# Faking nm style from $fake_style\n"; 100 if ($fake_style eq 'gnu' || 101 $fake_style eq 'linux' || 102 $fake_style eq 'freebsd') { 103 $nm_style = 'gnu' 104 } elsif ($fake_style eq 'darwin' || $fake_style eq 'osx') { 105 $nm_style = 'darwin' 106 } else { 107 die "$0: Unknown explicit nm style '$fake_style'\n"; 108 } 109 } 110} 111 112unless (defined $nm_style) { 113 if ($^O eq 'linux') { 114 # The 'gnu' style could be equally well be called 'bsd' style, 115 # since the output format of the GNU binutils nm is really BSD. 116 $nm_style = 'gnu'; 117 } elsif ($^O eq 'freebsd') { 118 $nm_style = 'gnu'; 119 } elsif ($^O eq 'darwin') { 120 $nm_style = 'darwin'; 121 } 122} 123 124if (defined $nm_style) { 125 if ($nm_style eq 'gnu') { 126 $nm = '/usr/bin/nm'; 127 } elsif ($nm_style eq 'darwin') { 128 $nm = '/usr/bin/nm'; 129 # With the -m option we get better information than the BSD-like 130 # default: with the default, a lot of symbols get dumped into 'S' 131 # or 's', for example one cannot tell the difference between const 132 # and non-const data symbols. 133 $nm_opt = '-m'; 134 } else { 135 die "$0: Unexpected nm style '$nm_style'\n"; 136 } 137} 138 139if ($^O eq 'linux' && $Config{archname} !~ /^(?:x|i6)86/) { 140 # For example in ppc most (but not all!) code symbols are placed 141 # in 'D' (data), not in ' T '. We cannot work under such conditions. 142 skip_all "linux but archname $Config{archname} not x86*"; 143} 144 145unless (defined $nm) { 146 skip_all "no nm"; 147} 148 149unless (defined $nm_style) { 150 skip_all "no nm style"; 151} 152 153print "# nm = $nm\n"; 154print "# nm_style = $nm_style\n"; 155print "# nm_opt = $nm_opt\n"; 156 157unless (-x $nm) { 158 skip_all "no executable nm $nm"; 159} 160 161if ($nm_style eq 'gnu' && !defined $fake_style) { 162 open(my $gnu_verify, "$nm --version|") or 163 skip_all "nm failed: $!"; 164 my $gnu_verified; 165 while (<$gnu_verify>) { 166 if (/^GNU nm/) { 167 $gnu_verified = 1; 168 last; 169 } 170 } 171 unless ($gnu_verified) { 172 skip_all "no GNU nm"; 173 } 174} 175 176if (defined $fake_input) { 177 if ($fake_input eq '-') { 178 open($nm_fh, "<&STDIN") or 179 skip_all "Duping STDIN failed: $!"; 180 } else { 181 open($nm_fh, "<", $fake_input) or 182 skip_all "Opening '$fake_input' failed: $!"; 183 } 184 undef $nm_err_tmp; # In this case there will be no nm errors. 185} else { 186 print qq{# command: "$nm $nm_opt $libperl_a 2>$nm_err_tmp |"\n}; 187 open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or 188 skip_all "$nm $nm_opt $libperl_a failed: $!"; 189} 190 191sub is_perlish_symbol { 192 $_[0] =~ /^(?:PL_|Perl|PerlIO)/; 193} 194 195# XXX Implement "internal test" for this script (option -t?) 196# to verify that the parsing does what it's intended to. 197 198sub nm_parse_gnu { 199 my $symbols = shift; 200 my $line = $_; 201 if (m{^(\w+\.o):$}) { 202 # object file name 203 $symbols->{obj}{$1}++; 204 $symbols->{o} = $1; 205 return; 206 } else { 207 die "$0: undefined current object: $line" 208 unless defined $symbols->{o}; 209 # 64-bit systems have 16 hexdigits, 32-bit systems have 8. 210 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { 211 if (/^[Rr] (\w+)$/) { 212 # R: read only (const) 213 $symbols->{data}{const}{$1}{$symbols->{o}}++; 214 } elsif (/^r .+$/) { 215 # Skip local const (read only). 216 } elsif (/^([Tti]) (\w+)(\..+)?$/) { 217 $symbols->{text}{$2}{$symbols->{o}}{$1}++; 218 } elsif (/^C (\w+)$/) { 219 $symbols->{data}{common}{$1}{$symbols->{o}}++; 220 } elsif (/^[BbSs] (\w+)(\.\d+)?$/) { 221 # Bb: uninitialized data (bss) 222 # Ss: uninitialized data "for small objects" 223 $symbols->{data}{bss}{$1}{$symbols->{o}}++; 224 } elsif (/^D _LIB_VERSION$/) { 225 # Skip the _LIB_VERSION (not ours, probably libm) 226 } elsif (/^[DdGg] (\w+)$/) { 227 # Dd: initialized data 228 # Gg: initialized "for small objects" 229 $symbols->{data}{data}{$1}{$symbols->{o}}++; 230 } elsif (/^. \.?(\w+)$/) { 231 # Skip the unknown types. 232 print "# Unknown type: $line ($symbols->{o})\n"; 233 } 234 return; 235 } elsif (/^ {8}(?: {8})? U _?(\w+)$/) { 236 my ($symbol) = $1; 237 return if is_perlish_symbol($symbol); 238 $symbols->{undef}{$symbol}{$symbols->{o}}++; 239 return; 240 } 241 } 242 print "# Unexpected nm output '$line' ($symbols->{o})\n"; 243} 244 245sub nm_parse_darwin { 246 my $symbols = shift; 247 my $line = $_; 248 if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$}) { 249 # object file name 250 $symbols->{obj}{$1}++; 251 $symbols->{o} = $1; 252 return; 253 } else { 254 if ($^V < v5.39 && !defined $symbols->{o}) { 255 skip_all "nm parser requires an update on Darwin"; 256 } 257 258 die "$0: undefined current object: $line" unless defined $symbols->{o}; 259 # 64-bit systems have 16 hexdigits, 32-bit systems have 8. 260 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { 261 # String literals can live in different sections 262 # depending on the compiler and os release, assumedly 263 # also linker flags. 264 if (/^\(__TEXT,__(?:const|(?:asan_)?cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) { 265 my ($symbol, $suffix) = ($1, $2); 266 # Ignore function-local constants like 267 # _Perl_av_extend_guts.oom_array_extend 268 return if defined $suffix && /__TEXT,__const/; 269 # Ignore the cstring unnamed strings. 270 return if $symbol =~ /^L\.str\d+$/; 271 $symbols->{data}{const}{$symbol}{$symbols->{o}}++; 272 } elsif (/^\(__TEXT,__text\) ((?:non-)?external) _(\w+)$/) { 273 my ($exp, $sym) = ($1, $2); 274 $symbols->{text}{$sym}{$symbols->{o}}{$exp =~ /^non/ ? 't' : 'T'}++; 275 } elsif (/^\(__DATA,__\w*?(const|data|bss|common)\w*\) (?:non-)?external _?(\w+)(\.\w+)?$/) { 276 my ($dtype, $symbol, $suffix) = ($1, $2, $3); 277 # Ignore function-local constants like 278 # _Perl_pp_gmtime.dayname 279 return if defined $suffix; 280 $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++; 281 } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) { 282 # Skip this, whatever it is (some inlined leakage from 283 # darwin libc?) 284 } elsif (/^\(__TEXT,__eh_frame/) { 285 # Skip the eh_frame (exception handling) symbols. 286 return; 287 } elsif (/^\(__\w+,__\w+\) /) { 288 # Skip the unknown types. 289 print "# Unknown type: $line ($symbols->{o})\n"; 290 } 291 return; 292 } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) { 293 # darwin/ppc marks most undefined text symbols 294 # as "[lazy bound]". 295 my ($symbol) = $1 =~ s/\$UNIX2003\z//r; 296 return if is_perlish_symbol($symbol); 297 $symbols->{undef}{$symbol}{$symbols->{o}}++; 298 return; 299 } 300 } 301 print "# Unexpected nm output '$line' ($symbols->{o})\n"; 302} 303 304my $nm_parse; 305 306if ($nm_style eq 'gnu') { 307 $nm_parse = \&nm_parse_gnu; 308} elsif ($nm_style eq 'darwin') { 309 $nm_parse = \&nm_parse_darwin; 310} 311 312unless (defined $nm_parse) { 313 skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)"; 314} 315 316my %symbols; 317 318while (<$nm_fh>) { 319 next if /^$/; 320 chomp; 321 $nm_parse->(\%symbols); 322} 323 324# use Data::Dumper; print Dumper(\%symbols); 325 326# Something went awfully wrong. Wrong nm? Wrong options? 327unless (keys %symbols) { 328 skip_all "no symbols\n"; 329} 330unless (exists $symbols{text}) { 331 skip_all "no text symbols\n"; 332} 333 334# These should always be true for everyone. 335 336ok($symbols{obj}{'util.o'}, "has object util.o"); 337ok($symbols{text}{'Perl_croak'}{'util.o'}, "has text Perl_croak in util.o"); 338ok(exists $symbols{data}{const}, "has data const symbols"); 339ok($symbols{data}{const}{PL_no_mem}{'globals.o'}, "has PL_no_mem"); 340 341my $nocommon = $Config{ccflags} =~ /-fno-common/ ? 1 : 0; 342 343print "# nocommon = $nocommon\n"; 344 345my %data_symbols; 346 347for my $dtype (sort keys %{$symbols{data}}) { 348 for my $symbol (sort keys %{$symbols{data}{$dtype}}) { 349 $data_symbols{$symbol}++; 350 } 351} 352 353if ( !$symbols{data}{common} ) { 354 # This is likely because Perl was compiled with 355 # -Accflags="-fno-common" 356 $symbols{data}{common} = $symbols{data}{bss}; 357} 358 359ok($symbols{data}{common}{PL_hash_seed_w}{'globals.o'}, "has PL_hash_seed_w"); 360ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr"); 361 362# See the comments in the beginning for what "undefined symbols" 363# really means. We *should* have many of those, that is a good thing. 364ok(keys %{$symbols{undef}}, "has undefined symbols"); 365 366# There are certain symbols we expect to see. 367 368# chmod, socket, getenv, sigaction, exp, time are system/library 369# calls that should each see at least one use. exp can be expl 370# if so configured. 371my %expected = ( 372 chmod => undef, # There is no Configure symbol for chmod. 373 socket => 'd_socket', 374 getenv => undef, # There is no Configure symbol for getenv, 375 sigaction => 'd_sigaction', 376 time => 'd_time', 377 ); 378 379if ($Config{uselongdouble} && $Config{longdblsize} > $Config{doublesize}) { 380 $expected{expl} = undef; # There is no Configure symbol for expl. 381} elsif ($Config{usequadmath}) { 382 $expected{expq} = undef; # There is no Configure symbol for expq. 383} else { 384 $expected{exp} = undef; # There is no Configure symbol for exp. 385} 386 387# DynaLoader will use dlopen, unless we are building static, 388# and it is used in the platforms we are supporting in this test. 389if ($Config{usedl} ) { 390 $expected{dlopen} = 'd_dlopen'; 391} 392 393for my $symbol (sort keys %expected) { 394 if (defined $expected{$symbol} && !$Config{$expected{$symbol}}) { 395 SKIP: { 396 skip("no $symbol"); 397 } 398 next; 399 } 400 my @o = exists $symbols{undef}{$symbol} ? 401 sort keys %{ $symbols{undef}{$symbol} } : (); 402 ok(@o, "uses $symbol (@o)"); 403} 404 405# There are certain symbols we expect NOT to see. 406# 407# gets is horribly unsafe. 408# 409# fgets should not be used (Perl has its own API, sv_gets), 410# even without perlio. 411# 412# tmpfile is unsafe. 413# 414# strcat, strcpy, strncat, strncpy are unsafe. 415# 416# sprintf and vsprintf should not be used because 417# Perl has its own safer and more portable implementations. 418# (One exception: for certain floating point outputs 419# the native sprintf is still used in some platforms, see below.) 420# 421# atoi has unsafe and undefined failure modes, and is affected by locale. 422# Its cousins include atol and atoll. 423# 424# strtol and strtoul are affected by locale. 425# Cousins include strtoq. 426# 427# system should not be used, use pp_system or my_popen. 428# 429 430my %unexpected; 431 432for my $str (qw(system)) { 433 $unexpected{$str} = "d_$str"; 434} 435 436for my $stdio (qw(gets fgets tmpfile sprintf vsprintf)) { 437 $unexpected{$stdio} = undef; # No Configure symbol for these. 438} 439for my $str (qw(strcat strcpy strncat strncpy)) { 440 $unexpected{$str} = undef; # No Configure symbol for these. 441} 442 443$unexpected{atoi} = undef; # No Configure symbol for atoi. 444$unexpected{atol} = undef; # No Configure symbol for atol. 445 446for my $str (qw(atoll strtol strtoul strtoq)) { 447 $unexpected{$str} = "d_$str"; 448} 449 450for my $symbol (sort keys %unexpected) { 451 if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) { 452 SKIP: { 453 skip("no $symbol"); 454 } 455 next; 456 } 457 my @o = exists $symbols{undef}{$symbol} ? 458 sort keys %{ $symbols{undef}{$symbol} } : (); 459 # While sprintf() is bad in the general case, 460 # some platforms implement Gconvert via sprintf, in sv.o. 461 if ($symbol eq 'sprintf' && 462 $Config{d_Gconvert} =~ /^sprintf/ && 463 @o == 1 && $o[0] eq 'sv.o') { 464 SKIP: { 465 skip("uses sprintf for Gconvert in sv.o"); 466 } 467 } else { 468 is(@o, 0, "uses no $symbol (@o)"); 469 } 470} 471 472# Check that any text symbols named S_ are not exported. 473my $export_S_prefix = 0; 474for my $t (sort grep { /^S_/ } keys %{$symbols{text}}) { 475 for my $o (sort keys %{$symbols{text}{$t}}) { 476 if (exists $symbols{text}{$t}{$o}{T}) { 477 fail($t, "$t exported from $o"); 478 $export_S_prefix++; 479 } 480 } 481} 482is($export_S_prefix, 0, "no S_ exports"); 483 484if (defined $nm_err_tmp) { 485 if (open(my $nm_err_fh, $nm_err_tmp)) { 486 my $error; 487 while (<$nm_err_fh>) { 488 # OS X has weird error where nm warns about 489 # "no name list" but then outputs fine. 490 # llvm-nm may also complain about 'no symbols'. In some 491 # versions this is exactly the string "no symbols\n" but in later 492 # versions becomes a string followed by ": no symbols\n". For this 493 # test it is typically "../libperl.a:perlapi.o: no symbols\n" 494 if ( $^O eq 'darwin' ) { 495 if (/nm: no name list/ || /^(.*: )?no symbols$/ ) { 496 print "# $^O ignoring $nm output: $_"; 497 next; 498 } 499 } 500 warn "$0: Unexpected $nm error: $_"; 501 $error++; 502 } 503 die "$0: Unexpected $nm errors\n" if $error; 504 } else { 505 warn "Failed to open '$nm_err_tmp': $!\n"; 506 } 507} 508 509done_testing(); 510