1#!/usr/bin/perl 2 3use Config; 4use File::Basename qw(&basename &dirname); 5use Cwd; 6use subs qw(link); 7 8sub link { # This is a cut-down version of installperl:link(). 9 my($from,$to) = @_; 10 my($success) = 0; 11 12 eval { 13 CORE::link($from, $to) 14 ? $success++ 15 : ($from =~ m#^/afs/# || $to =~ m#^/afs/#) 16 ? die "AFS" # okay inside eval {} 17 : die "Couldn't link $from to $to: $!\n"; 18 }; 19 if ($@) { 20 require File::Copy; 21 File::Copy::copy($from, $to) 22 ? $success++ 23 : warn "Couldn't copy $from to $to: $!\n"; 24 } 25 $success; 26} 27 28# List explicitly here the variables you want Configure to 29# generate. Metaconfig only looks for shell variables, so you 30# have to mention them as if they were shell variables, not 31# %Config entries. Thus you write 32# $startperl 33# to ensure Configure will look for $Config{startperl}. 34 35# This forces PL files to create target in same directory as PL file. 36# This is so that make depend always knows where to find PL derivatives. 37$origdir = cwd; 38chdir dirname($0); 39$file = basename($0, '.PL'); 40$file .= '.com' if $^O eq 'VMS'; 41 42open OUT,">$file" or die "Can't create $file: $!"; 43 44print "Extracting $file (with variable substitutions)\n"; 45 46# In this section, perl variables will be expanded during extraction. 47# You can use $Config{...} to use Configure variables. 48 49print OUT <<"!GROK!THIS!"; 50$Config{startperl} 51 eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' 52 if \$running_under_some_shell; 53my \$startperl; 54my \$perlpath; 55(\$startperl = <<'/../') =~ s/\\s*\\z//; 56$Config{startperl} 57/../ 58(\$perlpath = <<'/../') =~ s/\\s*\\z//; 59$Config{perlpath} 60/../ 61!GROK!THIS! 62 63# In the following, perl variables are not expanded during extraction. 64 65print OUT <<'!NO!SUBS!'; 66 67$0 =~ s/^.*?(\w+)[\.\w]*$/$1/; 68 69# (p)sed - a stream editor 70# History: Aug 12 2000: Original version. 71# Mar 25 2002: Rearrange generated Perl program. 72 73use strict; 74use integer; 75use Symbol; 76 77=head1 NAME 78 79psed - a stream editor 80 81=head1 SYNOPSIS 82 83 psed [-an] script [file ...] 84 psed [-an] [-e script] [-f script-file] [file ...] 85 86 s2p [-an] [-e script] [-f script-file] 87 88=head1 DESCRIPTION 89 90A stream editor reads the input stream consisting of the specified files 91(or standard input, if none are given), processes is line by line by 92applying a script consisting of edit commands, and writes resulting lines 93to standard output. The filename `C<->' may be used to read standard input. 94 95The edit script is composed from arguments of B<-e> options and 96script-files, in the given order. A single script argument may be specified 97as the first parameter. 98 99If this program is invoked with the name F<s2p>, it will act as a 100sed-to-Perl translator. See L<"sed Script Translation">. 101 102B<sed> returns an exit code of 0 on success or >0 if an error occurred. 103 104=head1 OPTIONS 105 106=over 4 107 108=item B<-a> 109 110A file specified as argument to the B<w> edit command is by default 111opened before input processing starts. Using B<-a>, opening of such 112files is delayed until the first line is actually written to the file. 113 114=item B<-e> I<script> 115 116The editing commands defined by I<script> are appended to the script. 117Multiple commands must be separated by newlines. 118 119=item B<-f> I<script-file> 120 121Editing commands from the specified I<script-file> are read and appended 122to the script. 123 124=item B<-n> 125 126By default, a line is written to standard output after the editing script 127has been applied to it. The B<-n> option suppresses automatic printing. 128 129=back 130 131=head1 COMMANDS 132 133B<sed> command syntax is defined as 134 135Z<> Z<> Z<> Z<>[I<address>[B<,>I<address>]][B<!>]I<function>[I<argument>] 136 137with whitespace being permitted before or after addresses, and between 138the function character and the argument. The I<address>es and the 139address inverter (C<!>) are used to restrict the application of a 140command to the selected line(s) of input. 141 142Each command must be on a line of its own, except where noted in 143the synopses below. 144 145The edit cycle performed on each input line consist of reading the line 146(without its trailing newline character) into the I<pattern space>, 147applying the applicable commands of the edit script, writing the final 148contents of the pattern space and a newline to the standard output. 149A I<hold space> is provided for saving the contents of the 150pattern space for later use. 151 152=head2 Addresses 153 154A sed address is either a line number or a pattern, which may be combined 155arbitrarily to construct ranges. Lines are numbered across all input files. 156 157Any address may be followed by an exclamation mark (`C<!>'), selecting 158all lines not matching that address. 159 160=over 4 161 162=item I<number> 163 164The line with the given number is selected. 165 166=item B<$> 167 168A dollar sign (C<$>) is the line number of the last line of the input stream. 169 170=item B</>I<regular expression>B</> 171 172A pattern address is a basic regular expression (see 173L<"Basic Regular Expressions">), between the delimiting character C</>. 174Any other character except C<\> or newline may be used to delimit a 175pattern address when the initial delimiter is prefixed with a 176backslash (`C<\>'). 177 178=back 179 180If no address is given, the command selects every line. 181 182If one address is given, it selects the line (or lines) matching the 183address. 184 185Two addresses select a range that begins whenever the first address 186matches, and ends (including that line) when the second address matches. 187If the first (second) address is a matching pattern, the second 188address is not applied to the very same line to determine the end of 189the range. Likewise, if the second address is a matching pattern, the 190first address is not applied to the very same line to determine the 191begin of another range. If both addresses are line numbers, 192and the second line number is less than the first line number, then 193only the first line is selected. 194 195 196=head2 Functions 197 198The maximum permitted number of addresses is indicated with each 199function synopsis below. 200 201The argument I<text> consists of one or more lines following the command. 202Embedded newlines in I<text> must be preceded with a backslash. Other 203backslashes in I<text> are deleted and the following character is taken 204literally. 205 206=over 4 207 208=cut 209 210my %ComTab; 211my %GenKey; 212#-------------------------------------------------------------------------- 213$ComTab{'a'}=[ 1, 'txt', \&Emit, '{ push( @Q, <<'."'TheEnd' ) }\n" ]; #ok 214 215=item [1addr]B<a\> I<text> 216 217Write I<text> (which must start on the line following the command) 218to standard output immediately before reading the next line 219of input, either by executing the B<N> function or by beginning a new cycle. 220 221=cut 222 223#-------------------------------------------------------------------------- 224$ComTab{'b'}=[ 2, 'str', \&Branch, '{ goto XXX; }' ]; #ok 225 226=item [2addr]B<b> [I<label>] 227 228Branch to the B<:> function with the specified I<label>. If no label 229is given, branch to the end of the script. 230 231=cut 232 233#-------------------------------------------------------------------------- 234$ComTab{'c'}=[ 2, 'txt', \&Change, <<'-X-' ]; #ok 235{ print <<'TheEnd'; } $doPrint = 0; goto EOS; 236-X- 237### continue OK => next CYCLE; 238 239=item [2addr]B<c\> I<text> 240 241The line, or range of lines, selected by the address is deleted. 242The I<text> (which must start on the line following the command) 243is written to standard output. With an address range, this occurs at 244the end of the range. 245 246=cut 247 248#-------------------------------------------------------------------------- 249$ComTab{'d'}=[ 2, '', \&Emit, <<'-X-' ]; #ok 250{ $doPrint = 0; 251 goto EOS; 252} 253-X- 254### continue OK => next CYCLE; 255 256=item [2addr]B<d> 257 258Deletes the pattern space and starts the next cycle. 259 260=cut 261 262#-------------------------------------------------------------------------- 263$ComTab{'D'}=[ 2, '', \&Emit, <<'-X-' ]; #ok 264{ s/^.*\n?//; 265 if(length($_)){ goto BOS } else { goto EOS } 266} 267-X- 268### continue OK => next CYCLE; 269 270=item [2addr]B<D> 271 272Deletes the pattern space through the first embedded newline or to the end. 273If the pattern space becomes empty, a new cycle is started, otherwise 274execution of the script is restarted. 275 276=cut 277 278#-------------------------------------------------------------------------- 279$ComTab{'g'}=[ 2, '', \&Emit, '{ $_ = $Hold };' ]; #ok 280 281=item [2addr]B<g> 282 283Replace the contents of the pattern space with the hold space. 284 285=cut 286 287#-------------------------------------------------------------------------- 288$ComTab{'G'}=[ 2, '', \&Emit, '{ $_ .= "\n"; $_ .= $Hold };' ]; #ok 289 290=item [2addr]B<G> 291 292Append a newline and the contents of the hold space to the pattern space. 293 294=cut 295 296#-------------------------------------------------------------------------- 297$ComTab{'h'}=[ 2, '', \&Emit, '{ $Hold = $_ }' ]; #ok 298 299=item [2addr]B<h> 300 301Replace the contents of the hold space with the pattern space. 302 303=cut 304 305#-------------------------------------------------------------------------- 306$ComTab{'H'}=[ 2, '', \&Emit, '{ $Hold .= "\n"; $Hold .= $_; }' ]; #ok 307 308=item [2addr]B<H> 309 310Append a newline and the contents of the pattern space to the hold space. 311 312=cut 313 314#-------------------------------------------------------------------------- 315$ComTab{'i'}=[ 1, 'txt', \&Emit, '{ print <<'."'TheEnd' }\n" ]; #ok 316 317=item [1addr]B<i\> I<text> 318 319Write the I<text> (which must start on the line following the command) 320to standard output. 321 322=cut 323 324#-------------------------------------------------------------------------- 325$ComTab{'l'}=[ 2, '', \&Emit, '{ _l() }' ]; #okUTF8 326 327=item [2addr]B<l> 328 329Print the contents of the pattern space: non-printable characters are 330shown in C-style escaped form; long lines are split and have a trailing 331`C<\>' at the point of the split; the true end of a line is marked with 332a `C<$>'. Escapes are: `\a', `\t', `\n', `\f', `\r', `\e' for 333BEL, HT, LF, FF, CR, ESC, respectively, and `\' followed by a three-digit 334octal number for all other non-printable characters. 335 336=cut 337 338#-------------------------------------------------------------------------- 339$ComTab{'n'}=[ 2, '', \&Emit, <<'-X-' ]; #ok 340{ print $_, "\n" if $doPrint; 341 printQ() if @Q; 342 $CondReg = 0; 343 last CYCLE unless getsARGV(); 344 chomp(); 345} 346-X- 347 348=item [2addr]B<n> 349 350If automatic printing is enabled, write the pattern space to the standard 351output. Replace the pattern space with the next line of input. If 352there is no more input, processing is terminated. 353 354=cut 355 356#-------------------------------------------------------------------------- 357$ComTab{'N'}=[ 2, '', \&Emit, <<'-X-' ]; #ok 358{ printQ() if @Q; 359 $CondReg = 0; 360 last CYCLE unless getsARGV( $h ); 361 chomp( $h ); 362 $_ .= "\n$h"; 363} 364-X- 365 366=item [2addr]B<N> 367 368Append a newline and the next line of input to the pattern space. If 369there is no more input, processing is terminated. 370 371=cut 372 373#-------------------------------------------------------------------------- 374$ComTab{'p'}=[ 2, '', \&Emit, '{ print $_, "\n"; }' ]; #ok 375 376=item [2addr]B<p> 377 378Print the pattern space to the standard output. (Use the B<-n> option 379to suppress automatic printing at the end of a cycle if you want to 380avoid double printing of lines.) 381 382=cut 383 384#-------------------------------------------------------------------------- 385$ComTab{'P'}=[ 2, '', \&Emit, <<'-X-' ]; #ok 386{ if( /^(.*)/ ){ print $1, "\n"; } } 387-X- 388 389=item [2addr]B<P> 390 391Prints the pattern space through the first embedded newline or to the end. 392 393=cut 394 395#-------------------------------------------------------------------------- 396$ComTab{'q'}=[ 1, '', \&Emit, <<'-X-' ]; #ok 397{ print $_, "\n" if $doPrint; 398 last CYCLE; 399} 400-X- 401 402=item [1addr]B<q> 403 404Branch to the end of the script and quit without starting a new cycle. 405 406=cut 407 408#-------------------------------------------------------------------------- 409$ComTab{'r'}=[ 1, 'str', \&Emit, "{ _r( '-X-' ) }" ]; #ok 410 411=item [1addr]B<r> I<file> 412 413Copy the contents of the I<file> to standard output immediately before 414the next attempt to read a line of input. Any error encountered while 415reading I<file> is silently ignored. 416 417=cut 418 419#-------------------------------------------------------------------------- 420$ComTab{'s'}=[ 2, 'sub', \&Emit, '' ]; #ok 421 422=item [2addr]B<s/>I<regular expression>B</>I<replacement>B</>I<flags> 423 424Substitute the I<replacement> string for the first substring in 425the pattern space that matches the I<regular expression>. 426Any character other than backslash or newline can be used instead of a 427slash to delimit the regular expression and the replacement. 428To use the delimiter as a literal character within the regular expression 429and the replacement, precede the character by a backslash (`C<\>'). 430 431Literal newlines may be embedded in the replacement string by 432preceding a newline with a backslash. 433 434Within the replacement, an ampersand (`C<&>') is replaced by the string 435matching the regular expression. The strings `C<\1>' through `C<\9>' are 436replaced by the corresponding subpattern (see L<"Basic Regular Expressions">). 437To get a literal `C<&>' or `C<\>' in the replacement text, precede it 438by a backslash. 439 440The following I<flags> modify the behaviour of the B<s> command: 441 442=over 8 443 444=item B<g> 445 446The replacement is performed for all matching, non-overlapping substrings 447of the pattern space. 448 449=item B<1>..B<9> 450 451Replace only the n-th matching substring of the pattern space. 452 453=item B<p> 454 455If the substitution was made, print the new value of the pattern space. 456 457=item B<w> I<file> 458 459If the substitution was made, write the new value of the pattern space 460to the specified file. 461 462=back 463 464=cut 465 466#-------------------------------------------------------------------------- 467$ComTab{'t'}=[ 2, 'str', \&Branch, '{ goto XXX if _t() }' ]; #ok 468 469=item [2addr]B<t> [I<label>] 470 471Branch to the B<:> function with the specified I<label> if any B<s> 472substitutions have been made since the most recent reading of an input line 473or execution of a B<t> function. If no label is given, branch to the end of 474the script. 475 476 477=cut 478 479#-------------------------------------------------------------------------- 480$ComTab{'w'}=[ 2, 'str', \&Write, "{ _w( '-X-' ) }" ]; #ok 481 482=item [2addr]B<w> I<file> 483 484The contents of the pattern space are written to the I<file>. 485 486=cut 487 488#-------------------------------------------------------------------------- 489$ComTab{'x'}=[ 2, '', \&Emit, '{ ($Hold, $_) = ($_, $Hold) }' ]; #ok 490 491=item [2addr]B<x> 492 493Swap the contents of the pattern space and the hold space. 494 495=cut 496 497#-------------------------------------------------------------------------- 498$ComTab{'y'}=[ 2, 'tra', \&Emit, '' ]; #ok 499=item [2addr]B<y>B</>I<string1>B</>I<string2>B</> 500 501In the pattern space, replace all characters occuring in I<string1> by the 502character at the corresponding position in I<string2>. It is possible 503to use any character (other than a backslash or newline) instead of a 504slash to delimit the strings. Within I<string1> and I<string2>, a 505backslash followed by any character other than a newline is that literal 506character, and a backslash followed by an `n' is replaced by a newline 507character. 508 509=cut 510 511#-------------------------------------------------------------------------- 512$ComTab{'='}=[ 1, '', \&Emit, '{ print "$.\n" }' ]; #ok 513 514=item [1addr]B<=> 515 516Prints the current line number on the standard output. 517 518=cut 519 520#-------------------------------------------------------------------------- 521$ComTab{':'}=[ 0, 'str', \&Label, '' ]; #ok 522 523=item [0addr]B<:> [I<label>] 524 525The command specifies the position of the I<label>. It has no other effect. 526 527=cut 528 529#-------------------------------------------------------------------------- 530$ComTab{'{'}=[ 2, '', \&BeginBlock, '{' ]; #ok 531$ComTab{'}'}=[ 0, '', \&EndBlock, ';}' ]; #ok 532# ';' to avoid warning on empty {}-block 533 534=item [2addr]B<{> [I<command>] 535 536=item [0addr]B<}> 537 538These two commands begin and end a command list. The first command may 539be given on the same line as the opening B<{> command. The commands 540within the list are jointly selected by the address(es) given on the 541B<{> command (but may still have individual addresses). 542 543=cut 544 545#-------------------------------------------------------------------------- 546$ComTab{'#'}=[ 0, 'str', \&Comment, '' ]; #ok 547 548=item [0addr]B<#> [I<comment>] 549 550The entire line is ignored (treated as a comment). If, however, the first 551two characters in the script are `C<#n>', automatic printing of output is 552suppressed, as if the B<-n> option were given on the command line. 553 554=back 555 556=cut 557 558use vars qw{ $isEOF $Hold %wFiles @Q $CondReg $doPrint }; 559 560my $useDEBUG = exists( $ENV{PSEDDEBUG} ); 561my $useEXTBRE = $ENV{PSEDEXTBRE} || ''; 562$useEXTBRE =~ s/[^<>wWyB]//g; # gawk RE's handle these 563 564my $doAutoPrint = 1; # automatic printing of pattern space (-n => 0) 565my $doOpenWrite = 1; # open w command output files at start (-a => 0) 566my $svOpenWrite = 0; # save $doOpenWrite 567my $doGenerate = $0 eq 's2p'; 568 569# Collected and compiled script 570# 571my( @Commands, %Defined, @BlockStack, %Label, $labNum, $Code, $Func ); 572$Code = ''; 573 574################## 575# Compile Time 576# 577# Labels 578# 579# Error handling 580# 581sub Warn($;$){ 582 my( $msg, $loc ) = @_; 583 $loc ||= ''; 584 $loc .= ': ' if length( $loc ); 585 warn( "$0: $loc$msg\n" ); 586} 587 588$labNum = 0; 589sub newLabel(){ 590 return 'L_'.++$labNum; 591} 592 593# safeHere: create safe here delimiter and modify opcode and argument 594# 595sub safeHere($$){ 596 my( $codref, $argref ) = @_; 597 my $eod = 'EOD000'; 598 while( $$argref =~ /^$eod$/m ){ 599 $eod++; 600 } 601 $$codref =~ s/TheEnd/$eod/e; 602 $$argref .= "$eod\n"; 603} 604 605# Emit: create address logic and emit command 606# 607sub Emit($$$$$$){ 608 my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_; 609 my $cond = ''; 610 if( defined( $addr1 ) ){ 611 if( defined( $addr2 ) ){ 612 $addr1 .= $addr2 =~ /^\d+$/ ? "..$addr2" : "...$addr2"; 613 } else { 614 $addr1 .= ' == $.' if $addr1 =~ /^\d+$/; 615 } 616 $cond = $negated ? "unless( $addr1 )\n" : "if( $addr1 )\n"; 617 } 618 619 if( $opcode eq '' ){ 620 $Code .= "$cond$arg\n"; 621 622 } elsif( $opcode =~ s/-X-/$arg/e ){ 623 $Code .= "$cond$opcode\n"; 624 625 } elsif( $opcode =~ /TheEnd/ ){ 626 safeHere( \$opcode, \$arg ); 627 $Code .= "$cond$opcode$arg"; 628 629 } else { 630 $Code .= "$cond$opcode\n"; 631 } 632 0; 633} 634 635# Write (w command, w flag): store pathname 636# 637sub Write($$$$$$){ 638 my( $addr1, $addr2, $negated, $opcode, $path, $fl ) = @_; 639 $wFiles{$path} = ''; 640 Emit( $addr1, $addr2, $negated, $opcode, $path, $fl ); 641} 642 643 644# Label (: command): label definition 645# 646sub Label($$$$$$){ 647 my( $addr1, $addr2, $negated, $opcode, $lab, $fl ) = @_; 648 my $rc = 0; 649 $lab =~ s/\s+//; 650 if( length( $lab ) ){ 651 my $h; 652 if( ! exists( $Label{$lab} ) ){ 653 $h = $Label{$lab}{name} = newLabel(); 654 } else { 655 $h = $Label{$lab}{name}; 656 if( exists( $Label{$lab}{defined} ) ){ 657 my $dl = $Label{$lab}{defined}; 658 Warn( "duplicate label $lab (first defined at $dl)", $fl ); 659 $rc = 1; 660 } 661 } 662 $Label{$lab}{defined} = $fl; 663 $Code .= "$h:;\n"; 664 } 665 $rc; 666} 667 668# BeginBlock ({ command): push block start 669# 670sub BeginBlock($$$$$$){ 671 my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_; 672 push( @BlockStack, [ $fl, $addr1, $addr2, $negated ] ); 673 Emit( $addr1, $addr2, $negated, $opcode, $arg, $fl ); 674} 675 676# EndBlock (} command): check proper nesting 677# 678sub EndBlock($$$$$$){ 679 my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_; 680 my $rc; 681 my $jcom = pop( @BlockStack ); 682 if( defined( $jcom ) ){ 683 $rc = Emit( $addr1, $addr2, $negated, $opcode, $arg, $fl ); 684 } else { 685 Warn( "unexpected `}'", $fl ); 686 $rc = 1; 687 } 688 $rc; 689} 690 691# Branch (t, b commands): check or create label, substitute default 692# 693sub Branch($$$$$$){ 694 my( $addr1, $addr2, $negated, $opcode, $lab, $fl ) = @_; 695 $lab =~ s/\s+//; # no spaces at end 696 my $h; 697 if( length( $lab ) ){ 698 if( ! exists( $Label{$lab} ) ){ 699 $h = $Label{$lab}{name} = newLabel(); 700 } else { 701 $h = $Label{$lab}{name}; 702 } 703 push( @{$Label{$lab}{used}}, $fl ); 704 } else { 705 $h = 'EOS'; 706 } 707 $opcode =~ s/XXX/$h/e; 708 Emit( $addr1, $addr2, $negated, $opcode, '', $fl ); 709} 710 711# Change (c command): is special due to range end watching 712# 713sub Change($$$$$$){ 714 my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_; 715 my $kwd = $negated ? 'unless' : 'if'; 716 if( defined( $addr2 ) ){ 717 $addr1 .= $addr2 =~ /^\d+$/ ? "..$addr2" : "...$addr2"; 718 if( ! $negated ){ 719 $addr1 = '$icnt = ('.$addr1.')'; 720 $opcode = 'if( $icnt =~ /E0$/ )' . $opcode; 721 } 722 } else { 723 $addr1 .= ' == $.' if $addr1 =~ /^\d+$/; 724 } 725 safeHere( \$opcode, \$arg ); 726 $Code .= "$kwd( $addr1 ){\n $opcode$arg}\n"; 727 0; 728} 729 730 731# Comment (# command): A no-op. Who would've thought that! 732# 733sub Comment($$$$$$){ 734 my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_; 735### $Code .= "# $arg\n"; 736 0; 737} 738 739 740sub stripRegex($$){ 741 my( $del, $sref ) = @_; 742 my $regex = $del; 743 print "stripRegex:$del:$$sref:\n" if $useDEBUG; 744 while( $$sref =~ s{^(.*?)(\\*)\Q$del\E(\s*)}{}s ){ 745 my $sl = $2; 746 $regex .= $1.$sl.$del; 747 if( length( $sl ) % 2 == 0 ){ 748 return $regex; 749 } 750 $regex .= $3; 751 } 752 undef(); 753} 754 755# stripTrans: take a <del> terminated string from y command 756# honoring and cleaning up of \-escaped <del>'s 757# 758sub stripTrans($$){ 759 my( $del, $sref ) = @_; 760 my $t = ''; 761 print "stripTrans:$del:$$sref:\n" if $useDEBUG; 762 while( $$sref =~ s{^(.*?)(\\*)\Q$del\E}{}s ){ 763 my $sl = $2; 764 $t .= $1; 765 if( length( $sl ) % 2 == 0 ){ 766 $t .= $sl; 767 $t =~ s/\\\\/\\/g; 768 return $t; 769 } 770 chop( $sl ); 771 $t .= $sl.$del.$3; 772 } 773 undef(); 774} 775 776# makey - construct Perl y/// from sed y/// 777# 778sub makey($$$){ 779 my( $fr, $to, $fl ) = @_; 780 my $error = 0; 781 782 # Ensure that any '-' is up front. 783 # Diagnose duplicate contradicting mappings 784 my %tr; 785 for( my $i = 0; $i < length($fr); $i++ ){ 786 my $fc = substr($fr,$i,1); 787 my $tc = substr($to,$i,1); 788 if( exists( $tr{$fc} ) && $tr{$fc} ne $tc ){ 789 Warn( "ambiguous translation for character `$fc' in `y' command", 790 $fl ); 791 $error++; 792 } 793 $tr{$fc} = $tc; 794 } 795 $fr = $to = ''; 796 if( exists( $tr{'-'} ) ){ 797 ( $fr, $to ) = ( '-', $tr{'-'} ); 798 delete( $tr{'-'} ); 799 } else { 800 $fr = $to = ''; 801 } 802 # might just as well sort it... 803 for my $fc ( sort keys( %tr ) ){ 804 $fr .= $fc; 805 $to .= $tr{$fc}; 806 } 807 # make embedded delimiters and newlines safe 808 $fr =~ s/([{}])/\$1/g; 809 $to =~ s/([{}])/\$1/g; 810 $fr =~ s/\n/\\n/g; 811 $to =~ s/\n/\\n/g; 812 return $error ? undef() : "{ y{$fr}{$to}; }"; 813} 814 815###### 816# makes - construct Perl s/// from sed s/// 817# 818sub makes($$$$$$$){ 819 my( $regex, $subst, $path, $global, $print, $nmatch, $fl ) = @_; 820 821 # make embedded newlines safe 822 $regex =~ s/\n/\\n/g; 823 $subst =~ s/\n/\\n/g; 824 825 my $code; 826 # n-th occurrence 827 # 828 if( length( $nmatch ) ){ 829 $code = <<TheEnd; 830{ \$n = $nmatch; 831 while( --\$n && ( \$s = m ${regex}g ) ){} 832 \$s = ( substr( \$_, pos() ) =~ s ${regex}${subst}s ) if \$s; 833 \$CondReg ||= \$s; 834TheEnd 835 } else { 836 $code = <<TheEnd; 837{ \$s = s ${regex}${subst}s${global}; 838 \$CondReg ||= \$s; 839TheEnd 840 } 841 if( $print ){ 842 $code .= ' print $_, "\n" if $s;'."\n"; 843 } 844 if( defined( $path ) ){ 845 $wFiles{$path} = ''; 846 $code .= " _w( '$path' ) if \$s;\n"; 847 $GenKey{'w'} = 1; 848 } 849 $code .= "}"; 850} 851 852=head1 BASIC REGULAR EXPRESSIONS 853 854A I<Basic Regular Expression> (BRE), as defined in POSIX 1003.2, consists 855of I<atoms>, for matching parts of a string, and I<bounds>, specifying 856repetitions of a preceding atom. 857 858=head2 Atoms 859 860The possible atoms of a BRE are: B<.>, matching any single character; 861B<^> and B<$>, matching the null string at the beginning or end 862of a string, respectively; a I<bracket expressions>, enclosed 863in B<[> and B<]> (see below); and any single character with no 864other significance (matching that character). A B<\> before one 865of: B<.>, B<^>, B<$>, B<[>, B<*>, B<\>, matching the character 866after the backslash. A sequence of atoms enclosed in B<\(> and B<\)> 867becomes an atom and establishes the target for a I<backreference>, 868consisting of the substring that actually matches the enclosed atoms. 869Finally, B<\> followed by one of the digits B<0> through B<9> is a 870backreference. 871 872A B<^> that is not first, or a B<$> that is not last does not have 873a special significance and need not be preceded by a backslash to 874become literal. The same is true for a B<]>, that does not terminate 875a bracket expression. 876 877An unescaped backslash cannot be last in a BRE. 878 879=head2 Bounds 880 881The BRE bounds are: B<*>, specifying 0 or more matches of the preceding 882atom; B<\{>I<count>B<\}>, specifying that many repetitions; 883B<\{>I<minimum>B<,\}>, giving a lower limit; and 884B<\{>I<minimum>B<,>I<maximum>B<\}> finally defines a lower and upper 885bound. 886 887A bound appearing as the first item in a BRE is taken literally. 888 889=head2 Bracket Expressions 890 891A I<bracket expression> is a list of characters, character ranges 892and character classes enclosed in B<[> and B<]> and matches any 893single character from the represented set of characters. 894 895A character range is written as two characters separated by B<-> and 896represents all characters (according to the character collating sequence) 897that are not less than the first and not greater than the second. 898(Ranges are very collating-sequence-dependent, and portable programs 899should avoid relying on them.) 900 901A character class is one of the class names 902 903 alnum digit punct 904 alpha graph space 905 blank lower upper 906 cntrl print xdigit 907 908enclosed in B<[:> and B<:]> and represents the set of characters 909as defined in ctype(3). 910 911If the first character after B<[> is B<^>, the sense of matching is 912inverted. 913 914To include a literal `C<^>', place it anywhere else but first. To 915include a literal 'C<]>' place it first or immediately after an 916initial B<^>. To include a literal `C<->' make it the first (or 917second after B<^>) or last character, or the second endpoint of 918a range. 919 920The special bracket expression constructs C<[[:E<lt>:]]> and C<[[:E<gt>:]]> 921match the null string at the beginning and end of a word respectively. 922(Note that neither is identical to Perl's `\b' atom.) 923 924=head2 Additional Atoms 925 926Since some sed implementations provide additional regular expression 927atoms (not defined in POSIX 1003.2), B<psed> is capable of translating 928the following backslash escapes: 929 930=over 4 931 932=item B<\E<lt>> This is the same as C<[[:E<gt>:]]>. 933 934=item B<\E<gt>> This is the same as C<[[:E<lt>:]]>. 935 936=item B<\w> This is an abbreviation for C<[[:alnum:]_]>. 937 938=item B<\W> This is an abbreviation for C<[^[:alnum:]_]>. 939 940=item B<\y> Match the empty string at a word boundary. 941 942=item B<\B> Match the empty string between any two either word or non-word characters. 943 944=back 945 946To enable this feature, the environment variable PSEDEXTBRE must be set 947to a string containing the requested characters, e.g.: 948C<PSEDEXTBRE='E<lt>E<gt>wW'>. 949 950=cut 951 952##### 953# bre2p - convert BRE to Perl RE 954# 955sub peek(\$$){ 956 my( $pref, $ic ) = @_; 957 $ic < length($$pref)-1 ? substr( $$pref, $ic+1, 1 ) : ''; 958} 959 960sub bre2p($$$){ 961 my( $del, $pat, $fl ) = @_; 962 my $led = $del; 963 $led =~ tr/{([</})]>/; 964 $led = '' if $led eq $del; 965 966 $pat = substr( $pat, 1, length($pat) - 2 ); 967 my $res = ''; 968 my $bracklev = 0; 969 my $backref = 0; 970 my $parlev = 0; 971 for( my $ic = 0; $ic < length( $pat ); $ic++ ){ 972 my $c = substr( $pat, $ic, 1 ); 973 if( $c eq '\\' ){ 974 ### backslash escapes 975 my $nc = peek($pat,$ic); 976 if( $nc eq '' ){ 977 Warn( "`\\' cannot be last in pattern", $fl ); 978 return undef(); 979 } 980 $ic++; 981 if( $nc eq $del ){ ## \<pattern del> => \<pattern del> 982 $res .= "\\$del"; 983 984 } elsif( $nc =~ /([[.*\\n])/ ){ 985 ## check for \-escaped magics and \n: 986 ## \[ \. \* \\ \n stay as they are 987 $res .= '\\'.$nc; 988 989 } elsif( $nc eq '(' ){ ## \( => ( 990 $parlev++; 991 $res .= '('; 992 993 } elsif( $nc eq ')' ){ ## \) => ) 994 $parlev--; 995 $backref++; 996 if( $parlev < 0 ){ 997 Warn( "unmatched `\\)'", $fl ); 998 return undef(); 999 } 1000 $res .= ')'; 1001 1002 } elsif( $nc eq '{' ){ ## repetition factor \{<i>[,[<j>]]\} 1003 my $endpos = index( $pat, '\\}', $ic ); 1004 if( $endpos < 0 ){ 1005 Warn( "unmatched `\\{'", $fl ); 1006 return undef(); 1007 } 1008 my $rep = substr( $pat, $ic+1, $endpos-($ic+1) ); 1009 $ic = $endpos + 1; 1010 1011 if( $res =~ /^\^?$/ ){ 1012 $res .= "\\{$rep\}"; 1013 } elsif( $rep =~ /^(\d+)(,?)(\d*)?$/ ){ 1014 my $min = $1; 1015 my $com = $2 || ''; 1016 my $max = $3; 1017 if( length( $max ) ){ 1018 if( $max < $min ){ 1019 Warn( "maximum less than minimum in `\\{$rep\\}'", 1020 $fl ); 1021 return undef(); 1022 } 1023 } else { 1024 $max = ''; 1025 } 1026 # simplify some 1027 if( $min == 0 && $max eq '1' ){ 1028 $res .= '?'; 1029 } elsif( $min == 1 && "$com$max" eq ',' ){ 1030 $res .= '+'; 1031 } elsif( $min == 0 && "$com$max" eq ',' ){ 1032 $res .= '*'; 1033 } else { 1034 $res .= "{$min$com$max}"; 1035 } 1036 } else { 1037 Warn( "invalid repeat clause `\\{$rep\\}'", $fl ); 1038 return undef(); 1039 } 1040 1041 } elsif( $nc =~ /^[1-9]$/ ){ 1042 ## \1 .. \9 => \1 .. \9, but check for a following digit 1043 if( $nc > $backref ){ 1044 Warn( "invalid backreference ($nc)", $fl ); 1045 return undef(); 1046 } 1047 $res .= "\\$nc"; 1048 if( peek($pat,$ic) =~ /[0-9]/ ){ 1049 $res .= '(?:)'; 1050 } 1051 1052 } elsif( $useEXTBRE && ( $nc =~ /[$useEXTBRE]/ ) ){ 1053 ## extensions - at most <>wWyB - not in POSIX 1054 if( $nc eq '<' ){ ## \< => \b(?=\w), be precise 1055 $res .= '\\b(?<=\\W)'; 1056 } elsif( $nc eq '>' ){ ## \> => \b(?=\W), be precise 1057 $res .= '\\b(?=\\W)'; 1058 } elsif( $nc eq 'y' ){ ## \y => \b 1059 $res .= '\\b'; 1060 } else { ## \B, \w, \W remain the same 1061 $res .= "\\$nc"; 1062 } 1063 } elsif( $nc eq $led ){ 1064 ## \<closing bracketing-delimiter> - keep '\' 1065 $res .= "\\$nc"; 1066 1067 } else { ## \ <char> => <char> ("as if `\' were not present") 1068 $res .= $nc; 1069 } 1070 1071 } elsif( $c eq '.' ){ ## . => . 1072 $res .= $c; 1073 1074 } elsif( $c eq '*' ){ ## * => * but \* if there's nothing preceding it 1075 if( $res =~ /^\^?$/ ){ 1076 $res .= '\\*'; 1077 } elsif( substr( $res, -1, 1 ) ne '*' ){ 1078 $res .= $c; 1079 } 1080 1081 } elsif( $c eq '[' ){ 1082 ## parse []: [^...] [^]...] [-...] 1083 my $add = '['; 1084 if( peek($pat,$ic) eq '^' ){ 1085 $ic++; 1086 $add .= '^'; 1087 } 1088 my $nc = peek($pat,$ic); 1089 if( $nc eq ']' || $nc eq '-' ){ 1090 $add .= $nc; 1091 $ic++; 1092 } 1093 # check that [ is not trailing 1094 if( $ic >= length( $pat ) - 1 ){ 1095 Warn( "unmatched `['", $fl ); 1096 return undef(); 1097 } 1098 # look for [:...:] and x-y 1099 my $rstr = substr( $pat, $ic+1 ); 1100 if( $rstr =~ /^((?:\[:\(\w+|[><]\):\]|[^]-](?:-[^]])?)*)/ ){ 1101 my $cnt = $1; 1102 $ic += length( $cnt ); 1103 $cnt =~ s/([\\\$])/\\$1/g; # `\', `$' are magic in Perl [] 1104 # try some simplifications 1105 my $red = $cnt; 1106 if( $red =~ s/0-9// ){ 1107 $cnt = $red.'\d'; 1108 if( $red =~ s/A-Z// && $red =~ s/a-z// && $red =~ s/_// ){ 1109 $cnt = $red.'\w'; 1110 } 1111 } 1112 $add .= $cnt; 1113 1114 # POSIX 1003.2 has this (optional) for begin/end word 1115 $add = '\\b(?=\\W)' if $add eq '[[:<:]]'; 1116 $add = '\\b(?<=\\W)' if $add eq '[[:>:]]'; 1117 1118 } 1119 1120 ## may have a trailing `-' before `]' 1121 if( $ic < length($pat) - 1 && 1122 substr( $pat, $ic+1 ) =~ /^(-?])/ ){ 1123 $ic += length( $1 ); 1124 $add .= $1; 1125 # another simplification 1126 $add =~ s/^\[(\^?)(\\[dw])]$/ $1 eq '^' ? uc($2) : $2 /e; 1127 $res .= $add; 1128 } else { 1129 Warn( "unmatched `['", $fl ); 1130 return undef(); 1131 } 1132 1133 } elsif( $c eq $led ){ ## unescaped <closing bracketing-delimiter> 1134 $res .= "\\$c"; 1135 1136 } elsif( $c eq ']' ){ ## unmatched ] is not magic 1137 $res .= ']'; 1138 1139 } elsif( $c =~ /[|+?{}()]/ ){ ## not magic in BRE, but in Perl: \-quote 1140 $res .= "\\$c"; 1141 1142 } elsif( $c eq '^' ){ ## not magic unless 1st, but in Perl: \-quote 1143 $res .= length( $res ) ? '\\^' : '^'; 1144 1145 } elsif( $c eq '$' ){ ## not magic unless last, but in Perl: \-quote 1146 $res .= $ic == length( $pat ) - 1 ? '$' : '\\$'; 1147 1148 } else { 1149 $res .= $c; 1150 } 1151 } 1152 1153 if( $parlev ){ 1154 Warn( "unmatched `\\('", $fl ); 1155 return undef(); 1156 } 1157 1158 # final cleanup: eliminate raw HTs 1159 $res =~ s/\t/\\t/g; 1160 return $del . $res . ( $led ? $led : $del ); 1161} 1162 1163 1164##### 1165# sub2p - convert sed substitution to Perl substitution 1166# 1167sub sub2p($$$){ 1168 my( $del, $subst, $fl ) = @_; 1169 my $led = $del; 1170 $led =~ tr/{([</})]>/; 1171 $led = '' if $led eq $del; 1172 1173 $subst = substr( $subst, 1, length($subst) - 2 ); 1174 my $res = ''; 1175 1176 for( my $ic = 0; $ic < length( $subst ); $ic++ ){ 1177 my $c = substr( $subst, $ic, 1 ); 1178 if( $c eq '\\' ){ 1179 ### backslash escapes 1180 my $nc = peek($subst,$ic); 1181 if( $nc eq '' ){ 1182 Warn( "`\\' cannot be last in substitution", $fl ); 1183 return undef(); 1184 } 1185 $ic++; 1186 if( $nc =~ /[\\$del$led]/ ){ ## \ and delimiter 1187 $res .= '\\' . $nc; 1188 } elsif( $nc =~ /[1-9]/ ){ ## \1 - \9 => ${1} - ${9} 1189 $res .= '${' . $nc . '}'; 1190 } else { ## everything else (includes &): omit \ 1191 $res .= $nc; 1192 } 1193 } elsif( $c eq '&' ){ ## & => $& 1194 $res .= '$&'; 1195 } elsif( $c =~ /[\$\@$led]/ ){ ## magic in Perl's substitution string 1196 $res .= '\\' . $c; 1197 } else { 1198 $res .= $c; 1199 } 1200 } 1201 1202 # final cleanup: eliminate raw HTs 1203 $res =~ s/\t/\\t/g; 1204 return ( $led ? $del : $led ) . $res . ( $led ? $led : $del ); 1205} 1206 1207 1208sub Parse(){ 1209 my $error = 0; 1210 my( $pdef, $pfil, $plin ); 1211 for( my $icom = 0; $icom < @Commands; $icom++ ){ 1212 my $cmd = $Commands[$icom]; 1213 print "Parse:$cmd:\n" if $useDEBUG; 1214 $cmd =~ s/^\s+//; 1215 next unless length( $cmd ); 1216 my $scom = $icom; 1217 if( exists( $Defined{$icom} ) ){ 1218 $pdef = $Defined{$icom}; 1219 if( $pdef =~ /^ #(\d+)/ ){ 1220 $pfil = 'expression #'; 1221 $plin = $1; 1222 } else { 1223 $pfil = "$pdef l."; 1224 $plin = 1; 1225 } 1226 } else { 1227 $plin++; 1228 } 1229 my $fl = "$pfil$plin"; 1230 1231 # insert command as comment in gnerated code 1232 # 1233 $Code .= "# $cmd\n" if $doGenerate; 1234 1235 # The Address(es) 1236 # 1237 my( $negated, $naddr, $addr1, $addr2 ); 1238 $naddr = 0; 1239 if( $cmd =~ s/^(\d+)\s*// ){ 1240 $addr1 = "$1"; $naddr++; 1241 } elsif( $cmd =~ s/^\$\s*// ){ 1242 $addr1 = 'eofARGV()'; $naddr++; 1243 } elsif( $cmd =~ s{^(/)}{} || $cmd =~ s{^\\(.)}{} ){ 1244 my $del = $1; 1245 my $regex = stripRegex( $del, \$cmd ); 1246 if( defined( $regex ) ){ 1247 $addr1 = 'm '.bre2p( $del, $regex, $fl ).'s'; 1248 $naddr++; 1249 } else { 1250 Warn( "malformed regex, 1st address", $fl ); 1251 $error++; 1252 next; 1253 } 1254 } 1255 if( defined( $addr1 ) && $cmd =~ s/,\s*// ){ 1256 if( $cmd =~ s/^(\d+)\s*// ){ 1257 $addr2 = "$1"; $naddr++; 1258 } elsif( $cmd =~ s/^\$\s*// ){ 1259 $addr2 = 'eofARGV()'; $naddr++; 1260 } elsif( $cmd =~ s{^(/)}{} || $cmd =~ s{^\\(.)}{} ){ 1261 my $del = $1; 1262 my $regex = stripRegex( $del, \$cmd ); 1263 if( defined( $regex ) ){ 1264 $addr2 = 'm '. bre2p( $del, $regex, $fl ).'s'; 1265 $naddr++; 1266 } else { 1267 Warn( "malformed regex, 2nd address", $fl ); 1268 $error++; 1269 next; 1270 } 1271 } else { 1272 Warn( "invalid address after `,'", $fl ); 1273 $error++; 1274 next; 1275 } 1276 } 1277 1278 # address modifier `!' 1279 # 1280 $negated = $cmd =~ s/^!\s*//; 1281 if( defined( $addr1 ) ){ 1282 print "Parse: addr1=$addr1" if $useDEBUG; 1283 if( defined( $addr2 ) ){ 1284 print ", addr2=$addr2 " if $useDEBUG; 1285 # both numeric and addr1 > addr2 => eliminate addr2 1286 undef( $addr2 ) if $addr1 =~ /^\d+$/ && 1287 $addr2 =~ /^\d+$/ && $addr1 > $addr2; 1288 } 1289 } 1290 print 'negated' if $useDEBUG && $negated; 1291 print " command:$cmd\n" if $useDEBUG; 1292 1293 # The Command 1294 # 1295 if( $cmd !~ s/^([:#={}abcdDgGhHilnNpPqrstwxy])\s*// ){ 1296 my $h = substr( $cmd, 0, 1 ); 1297 Warn( "unknown command `$h'", $fl ); 1298 $error++; 1299 next; 1300 } 1301 my $key = $1; 1302 1303 my $tabref = $ComTab{$key}; 1304 $GenKey{$key} = 1; 1305 if( $naddr > $tabref->[0] ){ 1306 Warn( "excess address(es)", $fl ); 1307 $error++; 1308 next; 1309 } 1310 1311 my $arg = ''; 1312 if( $tabref->[1] eq 'str' ){ 1313 # take remainder - don't care if it is empty 1314 $arg = $cmd; 1315 $cmd = ''; 1316 1317 } elsif( $tabref->[1] eq 'txt' ){ 1318 # multi-line text 1319 my $goon = $cmd =~ /(.*)\\$/; 1320 if( length( $1 ) ){ 1321 Warn( "extra characters after command ($cmd)", $fl ); 1322 $error++; 1323 } 1324 while( $goon ){ 1325 $icom++; 1326 if( $icom > $#Commands ){ 1327 Warn( "unexpected end of script", $fl ); 1328 $error++; 1329 last; 1330 } 1331 $cmd = $Commands[$icom]; 1332 $Code .= "# $cmd\n" if $doGenerate; 1333 $goon = $cmd =~ s/\\$//; 1334 $cmd =~ s/\\(.)/$1/g; 1335 $arg .= "\n" if length( $arg ); 1336 $arg .= $cmd; 1337 } 1338 $arg .= "\n" if length( $arg ); 1339 $cmd = ''; 1340 1341 } elsif( $tabref->[1] eq 'sub' ){ 1342 # s/// 1343 if( ! length( $cmd ) ){ 1344 Warn( "`s' command requires argument", $fl ); 1345 $error++; 1346 next; 1347 } 1348 if( $cmd =~ s{^([^\\\n])}{} ){ 1349 my $del = $1; 1350 my $regex = stripRegex( $del, \$cmd ); 1351 if( ! defined( $regex ) ){ 1352 Warn( "malformed regular expression", $fl ); 1353 $error++; 1354 next; 1355 } 1356 $regex = bre2p( $del, $regex, $fl ); 1357 1358 # a trailing \ indicates embedded NL (in replacement string) 1359 while( $cmd =~ s/(?<!\\)\\$/\n/ ){ 1360 $icom++; 1361 if( $icom > $#Commands ){ 1362 Warn( "unexpected end of script", $fl ); 1363 $error++; 1364 last; 1365 } 1366 $cmd .= $Commands[$icom]; 1367 $Code .= "# $Commands[$icom]\n" if $doGenerate; 1368 } 1369 1370 my $subst = stripRegex( $del, \$cmd ); 1371 if( ! defined( $regex ) ){ 1372 Warn( "malformed substitution expression", $fl ); 1373 $error++; 1374 next; 1375 } 1376 $subst = sub2p( $del, $subst, $fl ); 1377 1378 # parse s/// modifier: g|p|0-9|w <file> 1379 my( $global, $nmatch, $print, $write ) = 1380 ( '', '', 0, undef ); 1381 while( $cmd =~ s/^([gp0-9])// ){ 1382 $1 eq 'g' ? ( $global = 'g' ) : 1383 $1 eq 'p' ? ( $print = $1 ) : ( $nmatch .= $1 ); 1384 } 1385 $write = $1 if $cmd =~ s/w\s*(.*)$//; 1386 ### $nmatch =~ s/^(\d)\1*$/$1/; ### may be dangerous? 1387 if( $global && length( $nmatch ) || length( $nmatch ) > 1 ){ 1388 Warn( "conflicting flags `$global$nmatch'", $fl ); 1389 $error++; 1390 next; 1391 } 1392 1393 $arg = makes( $regex, $subst, 1394 $write, $global, $print, $nmatch, $fl ); 1395 if( ! defined( $arg ) ){ 1396 $error++; 1397 next; 1398 } 1399 1400 } else { 1401 Warn( "improper delimiter in s command", $fl ); 1402 $error++; 1403 next; 1404 } 1405 1406 } elsif( $tabref->[1] eq 'tra' ){ 1407 # y/// 1408 # a trailing \ indicates embedded newline 1409 while( $cmd =~ s/(?<!\\)\\$/\n/ ){ 1410 $icom++; 1411 if( $icom > $#Commands ){ 1412 Warn( "unexpected end of script", $fl ); 1413 $error++; 1414 last; 1415 } 1416 $cmd .= $Commands[$icom]; 1417 $Code .= "# $Commands[$icom]\n" if $doGenerate; 1418 } 1419 if( ! length( $cmd ) ){ 1420 Warn( "`y' command requires argument", $fl ); 1421 $error++; 1422 next; 1423 } 1424 my $d = substr( $cmd, 0, 1 ); $cmd = substr( $cmd, 1 ); 1425 if( $d eq '\\' ){ 1426 Warn( "`\\' not valid as delimiter in `y' command", $fl ); 1427 $error++; 1428 next; 1429 } 1430 my $fr = stripTrans( $d, \$cmd ); 1431 if( ! defined( $fr ) || ! length( $cmd ) ){ 1432 Warn( "malformed `y' command argument", $fl ); 1433 $error++; 1434 next; 1435 } 1436 my $to = stripTrans( $d, \$cmd ); 1437 if( ! defined( $to ) ){ 1438 Warn( "malformed `y' command argument", $fl ); 1439 $error++; 1440 next; 1441 } 1442 if( length($fr) != length($to) ){ 1443 Warn( "string lengths in `y' command differ", $fl ); 1444 $error++; 1445 next; 1446 } 1447 if( ! defined( $arg = makey( $fr, $to, $fl ) ) ){ 1448 $error++; 1449 next; 1450 } 1451 1452 } 1453 1454 # $cmd must be now empty - exception is { 1455 if( $cmd !~ /^\s*$/ ){ 1456 if( $key eq '{' ){ 1457 # dirty hack to process command on '{' line 1458 $Commands[$icom--] = $cmd; 1459 } else { 1460 Warn( "extra characters after command ($cmd)", $fl ); 1461 $error++; 1462 next; 1463 } 1464 } 1465 1466 # Make Code 1467 # 1468 if( &{$tabref->[2]}( $addr1, $addr2, $negated, 1469 $tabref->[3], $arg, $fl ) ){ 1470 $error++; 1471 } 1472 } 1473 1474 while( @BlockStack ){ 1475 my $bl = pop( @BlockStack ); 1476 Warn( "start of unterminated `{'", $bl ); 1477 $error++; 1478 } 1479 1480 for my $lab ( keys( %Label ) ){ 1481 if( ! exists( $Label{$lab}{defined} ) ){ 1482 for my $used ( @{$Label{$lab}{used}} ){ 1483 Warn( "undefined label `$lab'", $used ); 1484 $error++; 1485 } 1486 } 1487 } 1488 1489 exit( 1 ) if $error; 1490} 1491 1492 1493############## 1494#### MAIN #### 1495############## 1496 1497sub usage(){ 1498 print STDERR "Usage: sed [-an] command [file...]\n"; 1499 print STDERR " [-an] [-e command] [-f script-file] [file...]\n"; 1500} 1501 1502################### 1503# Here we go again... 1504# 1505my $expr = 0; 1506while( @ARGV && $ARGV[0] =~ /^-(.)(.*)$/ ){ 1507 my $opt = $1; 1508 my $arg = $2; 1509 shift( @ARGV ); 1510 if( $opt eq 'e' ){ 1511 if( length( $arg ) ){ 1512 push( @Commands, split( "\n", $arg ) ); 1513 } elsif( @ARGV ){ 1514 push( @Commands, shift( @ARGV ) ); 1515 } else { 1516 Warn( "option -e requires an argument" ); 1517 usage(); 1518 exit( 1 ); 1519 } 1520 $expr++; 1521 $Defined{$#Commands} = " #$expr"; 1522 next; 1523 } 1524 if( $opt eq 'f' ){ 1525 my $path; 1526 if( length( $arg ) ){ 1527 $path = $arg; 1528 } elsif( @ARGV ){ 1529 $path = shift( @ARGV ); 1530 } else { 1531 Warn( "option -f requires an argument" ); 1532 usage(); 1533 exit( 1 ); 1534 } 1535 my $fst = $#Commands + 1; 1536 open( SCRIPT, "<$path" ) || die( "$0: $path: could not open ($!)\n" ); 1537 my $cmd; 1538 while( defined( $cmd = <SCRIPT> ) ){ 1539 chomp( $cmd ); 1540 push( @Commands, $cmd ); 1541 } 1542 close( SCRIPT ); 1543 if( $#Commands >= $fst ){ 1544 $Defined{$fst} = "$path"; 1545 } 1546 next; 1547 } 1548 if( $opt eq '-' && $arg eq '' ){ 1549 last; 1550 } 1551 if( $opt eq 'h' || $opt eq '?' ){ 1552 usage(); 1553 exit( 0 ); 1554 } 1555 if( $opt eq 'n' ){ 1556 $doAutoPrint = 0; 1557 } elsif( $opt eq 'a' ){ 1558 $doOpenWrite = 0; 1559 } else { 1560 Warn( "illegal option `$opt'" ); 1561 usage(); 1562 exit( 1 ); 1563 } 1564 if( length( $arg ) ){ 1565 unshift( @ARGV, "-$arg" ); 1566 } 1567} 1568 1569# A singleton command may be the 1st argument when there are no options. 1570# 1571if( @Commands == 0 ){ 1572 if( @ARGV == 0 ){ 1573 Warn( "no script command given" ); 1574 usage(); 1575 exit( 1 ); 1576 } 1577 push( @Commands, split( "\n", shift( @ARGV ) ) ); 1578 $Defined{0} = ' #1'; 1579} 1580 1581print STDERR "Files: @ARGV\n" if $useDEBUG; 1582 1583# generate leading code 1584# 1585$Func = <<'[TheEnd]'; 1586 1587# openARGV: open 1st input file 1588# 1589sub openARGV(){ 1590 unshift( @ARGV, '-' ) unless @ARGV; 1591 my $file = shift( @ARGV ); 1592 open( ARG, "<$file" ) 1593 || die( "$0: can't open $file for reading ($!)\n" ); 1594 $isEOF = 0; 1595} 1596 1597# getsARGV: Read another input line into argument (default: $_). 1598# Move on to next input file, and reset EOF flag $isEOF. 1599sub getsARGV(;\$){ 1600 my $argref = @_ ? shift() : \$_; 1601 while( $isEOF || ! defined( $$argref = <ARG> ) ){ 1602 close( ARG ); 1603 return 0 unless @ARGV; 1604 my $file = shift( @ARGV ); 1605 open( ARG, "<$file" ) 1606 || die( "$0: can't open $file for reading ($!)\n" ); 1607 $isEOF = 0; 1608 } 1609 1; 1610} 1611 1612# eofARGV: end-of-file test 1613# 1614sub eofARGV(){ 1615 return @ARGV == 0 && ( $isEOF = eof( ARG ) ); 1616} 1617 1618# makeHandle: Generates another file handle for some file (given by its path) 1619# to be written due to a w command or an s command's w flag. 1620sub makeHandle($){ 1621 my( $path ) = @_; 1622 my $handle; 1623 if( ! exists( $wFiles{$path} ) || $wFiles{$path} eq '' ){ 1624 $handle = $wFiles{$path} = gensym(); 1625 if( $doOpenWrite ){ 1626 if( ! open( $handle, ">$path" ) ){ 1627 die( "$0: can't open $path for writing: ($!)\n" ); 1628 } 1629 } 1630 } else { 1631 $handle = $wFiles{$path}; 1632 } 1633 return $handle; 1634} 1635 1636# printQ: Print queued output which is either a string or a reference 1637# to a pathname. 1638sub printQ(){ 1639 for my $q ( @Q ){ 1640 if( ref( $q ) ){ 1641 # flush open w files so that reading this file gets it all 1642 if( exists( $wFiles{$$q} ) && $wFiles{$$q} ne '' ){ 1643 open( $wFiles{$$q}, ">>$$q" ); 1644 } 1645 # copy file to stdout: slow, but safe 1646 if( open( RF, "<$$q" ) ){ 1647 while( defined( my $line = <RF> ) ){ 1648 print $line; 1649 } 1650 close( RF ); 1651 } 1652 } else { 1653 print $q; 1654 } 1655 } 1656 undef( @Q ); 1657} 1658 1659[TheEnd] 1660 1661# generate the sed loop 1662# 1663$Code .= <<'[TheEnd]'; 1664sub openARGV(); 1665sub getsARGV(;\$); 1666sub eofARGV(); 1667sub printQ(); 1668 1669# Run: the sed loop reading input and applying the script 1670# 1671sub Run(){ 1672 my( $h, $icnt, $s, $n ); 1673 # hack (not unbreakable :-/) to avoid // matching an empty string 1674 my $z = "\000"; $z =~ /$z/; 1675 # Initialize. 1676 openARGV(); 1677 $Hold = ''; 1678 $CondReg = 0; 1679 $doPrint = $doAutoPrint; 1680CYCLE: 1681 while( getsARGV() ){ 1682 chomp(); 1683 $CondReg = 0; # cleared on t 1684BOS:; 1685[TheEnd] 1686 1687 # parse - avoid opening files when doing s2p 1688 # 1689 ( $svOpenWrite, $doOpenWrite ) = ( $doOpenWrite, $svOpenWrite ) 1690 if $doGenerate; 1691 Parse(); 1692 ( $svOpenWrite, $doOpenWrite ) = ( $doOpenWrite, $svOpenWrite ) 1693 if $doGenerate; 1694 1695 # append trailing code 1696 # 1697 $Code .= <<'[TheEnd]'; 1698EOS: if( $doPrint ){ 1699 print $_, "\n"; 1700 } else { 1701 $doPrint = $doAutoPrint; 1702 } 1703 printQ() if @Q; 1704 } 1705 1706 exit( 0 ); 1707} 1708[TheEnd] 1709 1710 1711# append optional functions, prepend prototypes 1712# 1713my $Proto = "# prototypes\n"; 1714if( $GenKey{'l'} ){ 1715 $Proto .= "sub _l();\n"; 1716 $Func .= <<'[TheEnd]'; 1717# _l: l command processing 1718# 1719sub _l(){ 1720 my $h = $_; 1721 my $mcpl = 70; 1722 # transform non printing chars into escape notation 1723 $h =~ s/\\/\\\\/g; 1724 if( $h =~ /[^[:print:]]/ ){ 1725 $h =~ s/\a/\\a/g; 1726 $h =~ s/\f/\\f/g; 1727 $h =~ s/\n/\\n/g; 1728 $h =~ s/\t/\\t/g; 1729 $h =~ s/\r/\\r/g; 1730 $h =~ s/\e/\\e/g; 1731 $h =~ s/([^[:print:]])/sprintf("\\%03o", ord($1))/ge; 1732 } 1733 # split into lines of length $mcpl 1734 while( length( $h ) > $mcpl ){ 1735 my $l = substr( $h, 0, $mcpl-1 ); 1736 $h = substr( $h, $mcpl ); 1737 # remove incomplete \-escape from end of line 1738 if( $l =~ s/(?<!\\)(\\[0-7]{0,2})$// ){ 1739 $h = $1 . $h; 1740 } 1741 print $l, "\\\n"; 1742 } 1743 print "$h\$\n"; 1744} 1745 1746[TheEnd] 1747} 1748 1749if( $GenKey{'r'} ){ 1750 $Proto .= "sub _r(\$);\n"; 1751 $Func .= <<'[TheEnd]'; 1752# _r: r command processing: Save a reference to the pathname. 1753# 1754sub _r($){ 1755 my $path = shift(); 1756 push( @Q, \$path ); 1757} 1758 1759[TheEnd] 1760} 1761 1762if( $GenKey{'t'} ){ 1763 $Proto .= "sub _t();\n"; 1764 $Func .= <<'[TheEnd]'; 1765# _t: t command - condition register test/reset 1766# 1767sub _t(){ 1768 my $res = $CondReg; 1769 $CondReg = 0; 1770 $res; 1771} 1772 1773[TheEnd] 1774} 1775 1776if( $GenKey{'w'} ){ 1777 $Proto .= "sub _w(\$);\n"; 1778 $Func .= <<'[TheEnd]'; 1779# _w: w command and s command's w flag - write to file 1780# 1781sub _w($){ 1782 my $path = shift(); 1783 my $handle = $wFiles{$path}; 1784 if( ! $doOpenWrite && ! defined( fileno( $handle ) ) ){ 1785 open( $handle, ">$path" ) 1786 || die( "$0: $path: cannot open ($!)\n" ); 1787 } 1788 print $handle $_, "\n"; 1789} 1790 1791[TheEnd] 1792} 1793 1794$Code = $Proto . $Code; 1795 1796# magic "#n" - same as -n option 1797# 1798$doAutoPrint = 0 if substr( $Commands[0], 0, 2 ) eq '#n'; 1799 1800# eval code - check for errors 1801# 1802print "Code:\n$Code$Func" if $useDEBUG; 1803eval $Code . $Func; 1804if( $@ ){ 1805 print "Code:\n$Code$Func"; 1806 die( "$0: internal error - generated incorrect Perl code: $@\n" ); 1807} 1808 1809if( $doGenerate ){ 1810 1811 # write full Perl program 1812 # 1813 1814 # bang line, declarations, prototypes 1815 print <<TheEnd; 1816#!$perlpath -w 1817eval 'exec $perlpath -S \$0 \${1+"\$@"}' 1818 if 0; 1819\$0 =~ s/^.*?(\\w+)\[\\.\\w+\]*\$/\$1/; 1820 1821use strict; 1822use Symbol; 1823use vars qw{ \$isEOF \$Hold \%wFiles \@Q \$CondReg 1824 \$doAutoPrint \$doOpenWrite \$doPrint }; 1825\$doAutoPrint = $doAutoPrint; 1826\$doOpenWrite = $doOpenWrite; 1827TheEnd 1828 1829 my $wf = "'" . join( "', '", keys( %wFiles ) ) . "'"; 1830 if( $wf ne "''" ){ 1831 print <<TheEnd; 1832sub makeHandle(\$); 1833for my \$p ( $wf ){ 1834 exit( 1 ) unless makeHandle( \$p ); 1835} 1836TheEnd 1837 } 1838 1839 print $Code; 1840 print "Run();\n"; 1841 print $Func; 1842 exit( 0 ); 1843 1844} else { 1845 1846 # execute: make handles (and optionally open) all w files; run! 1847 for my $p ( keys( %wFiles ) ){ 1848 exit( 1 ) unless makeHandle( $p ); 1849 } 1850 Run(); 1851} 1852 1853 1854=head1 ENVIRONMENT 1855 1856The environment variable C<PSEDEXTBRE> may be set to extend BREs. 1857See L<"Additional Atoms">. 1858 1859=head1 DIAGNOSTICS 1860 1861=over 4 1862 1863=item ambiguous translation for character `%s' in `y' command 1864 1865The indicated character appears twice, with different translations. 1866 1867=item `[' cannot be last in pattern 1868 1869A `[' in a BRE indicates the beginning of a I<bracket expression>. 1870 1871=item `\' cannot be last in pattern 1872 1873A `\' in a BRE is used to make the subsequent character literal. 1874 1875=item `\' cannot be last in substitution 1876 1877A `\' in a subsitution string is used to make the subsequent character literal. 1878 1879=item conflicting flags `%s' 1880 1881In an B<s> command, either the `g' flag and an n-th occurrence flag, or 1882multiple n-th occurrence flags are specified. Note that only the digits 1883`1' through `9' are permitted. 1884 1885=item duplicate label %s (first defined at %s) 1886 1887=item excess address(es) 1888 1889The command has more than the permitted number of addresses. 1890 1891=item extra characters after command (%s) 1892 1893=item illegal option `%s' 1894 1895=item improper delimiter in s command 1896 1897The BRE and substitution may not be delimited with `\' or newline. 1898 1899=item invalid address after `,' 1900 1901=item invalid backreference (%s) 1902 1903The specified backreference number exceeds the number of backreferences 1904in the BRE. 1905 1906=item invalid repeat clause `\{%s\}' 1907 1908The repeat clause does not contain a valid integer value, or pair of 1909values. 1910 1911=item malformed regex, 1st address 1912 1913=item malformed regex, 2nd address 1914 1915=item malformed regular expression 1916 1917=item malformed substitution expression 1918 1919=item malformed `y' command argument 1920 1921The first or second string of a B<y> command is syntactically incorrect. 1922 1923=item maximum less than minimum in `\{%s\}' 1924 1925=item no script command given 1926 1927There must be at least one B<-e> or one B<-f> option specifying a 1928script or script file. 1929 1930=item `\' not valid as delimiter in `y' command 1931 1932=item option -e requires an argument 1933 1934=item option -f requires an argument 1935 1936=item `s' command requires argument 1937 1938=item start of unterminated `{' 1939 1940=item string lengths in `y' command differ 1941 1942The translation table strings in a B<y> commanf must have equal lengths. 1943 1944=item undefined label `%s' 1945 1946=item unexpected `}' 1947 1948A B<}> command without a preceding B<{> command was encountered. 1949 1950=item unexpected end of script 1951 1952The end of the script was reached although a text line after a 1953B<a>, B<c> or B<i> command indicated another line. 1954 1955=item unknown command `%s' 1956 1957=item unterminated `[' 1958 1959A BRE contains an unterminated bracket expression. 1960 1961=item unterminated `\(' 1962 1963A BRE contains an unterminated backreference. 1964 1965=item `\{' without closing `\}' 1966 1967A BRE contains an unterminated bounds specification. 1968 1969=item `\)' without preceding `\(' 1970 1971=item `y' command requires argument 1972 1973=back 1974 1975=head1 EXAMPLE 1976 1977The basic material for the preceding section was generated by running 1978the sed script 1979 1980 #no autoprint 1981 s/^.*Warn( *"\([^"]*\)".*$/\1/ 1982 t process 1983 b 1984 :process 1985 s/$!/%s/g 1986 s/$[_[:alnum:]]\{1,\}/%s/g 1987 s/\\\\/\\/g 1988 s/^/=item / 1989 p 1990 1991on the program's own text, and piping the output into C<sort -u>. 1992 1993 1994=head1 SED SCRIPT TRANSLATION 1995 1996If this program is invoked with the name F<s2p> it will act as a 1997sed-to-Perl translator. After option processing (all other 1998arguments are ignored), a Perl program is printed on standard 1999output, which will process the input stream (as read from all 2000arguments) in the way defined by the sed script and the option setting 2001used for the translation. 2002 2003=head1 SEE ALSO 2004 2005perl(1), re_format(7) 2006 2007=head1 BUGS 2008 2009The B<l> command will show escape characters (ESC) as `C<\e>', but 2010a vertical tab (VT) in octal. 2011 2012Trailing spaces are truncated from labels in B<:>, B<t> and B<b> commands. 2013 2014The meaning of an empty regular expression (`C<//>'), as defined by B<sed>, 2015is "the last pattern used, at run time". This deviates from the Perl 2016interpretation, which will re-use the "last last successfully executed 2017regular expression". Since keeping track of pattern usage would create 2018terribly cluttered code, and differences would only appear in obscure 2019context (where other B<sed> implementations appear to deviate, too), 2020the Perl semantics was adopted. Note that common usage of this feature, 2021such as in C</abc/s//xyz/>, will work as expected. 2022 2023Collating elements (of bracket expressions in BREs) are not implemented. 2024 2025=head1 STANDARDS 2026 2027This B<sed> implementation conforms to the IEEE Std1003.2-1992 ("POSIX.2") 2028definition of B<sed>, and is compatible with the I<OpenBSD> 2029implementation, except where otherwise noted (see L<"BUGS">). 2030 2031=head1 AUTHOR 2032 2033This Perl implementation of I<sed> was written by Wolfgang Laun, 2034I<Wolfgang.Laun@alcatel.at>. 2035 2036=head1 COPYRIGHT and LICENSE 2037 2038This program is free and open software. You may use, modify, 2039distribute, and sell this program (and any modified variants) in any 2040way you wish, provided you do not restrict others from doing the same. 2041 2042=cut 2043 2044!NO!SUBS! 2045 2046close OUT or die "Can't close $file: $!"; 2047chmod 0755, $file or die "Can't reset permissions for $file: $!\n"; 2048unlink 'psed'; 2049print "Linking s2p to psed.\n"; 2050if (defined $Config{d_link}) { 2051 link 's2p', 'psed'; 2052} else { 2053 unshift @INC, '../lib'; 2054 require File::Copy; 2055 File::Copy::syscopy('s2p', 'psed'); 2056} 2057exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':'; 2058chdir $origdir; 2059