1#!/usr/bin/perl
2
3use Config;
4use File::Basename qw(&basename &dirname);
5use Cwd;
6use subs qw(link);
7
8sub link { # This is a cut-down version of installperl:link().
9    my($from,$to) = @_;
10    my($success) = 0;
11
12    eval {
13	CORE::link($from, $to)
14	    ? $success++
15	    : ($from =~ m#^/afs/# || $to =~ m#^/afs/#)
16	      ? die "AFS"  # okay inside eval {}
17	      : die "Couldn't link $from to $to: $!\n";
18    };
19    if ($@) {
20	require File::Copy;
21	File::Copy::copy($from, $to)
22	    ? $success++
23	    : warn "Couldn't copy $from to $to: $!\n";
24    }
25    $success;
26}
27
28# List explicitly here the variables you want Configure to
29# generate.  Metaconfig only looks for shell variables, so you
30# have to mention them as if they were shell variables, not
31# %Config entries.  Thus you write
32#  $startperl
33# to ensure Configure will look for $Config{startperl}.
34
35# This forces PL files to create target in same directory as PL file.
36# This is so that make depend always knows where to find PL derivatives.
37$origdir = cwd;
38chdir dirname($0);
39$file = basename($0, '.PL');
40$file .= '.com' if $^O eq 'VMS';
41
42open OUT,">$file" or die "Can't create $file: $!";
43
44print "Extracting $file (with variable substitutions)\n";
45
46# In this section, perl variables will be expanded during extraction.
47# You can use $Config{...} to use Configure variables.
48
49print OUT <<"!GROK!THIS!";
50$Config{startperl}
51    eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}'
52	if \$running_under_some_shell;
53my \$startperl;
54my \$perlpath;
55(\$startperl = <<'/../') =~ s/\\s*\\z//;
56$Config{startperl}
57/../
58(\$perlpath = <<'/../') =~ s/\\s*\\z//;
59$Config{perlpath}
60/../
61!GROK!THIS!
62
63# In the following, perl variables are not expanded during extraction.
64
65print OUT <<'!NO!SUBS!';
66
67$0 =~ s/^.*?(\w+)[\.\w]*$/$1/;
68
69# (p)sed - a stream editor
70# History:  Aug 12 2000: Original version.
71#           Mar 25 2002: Rearrange generated Perl program.
72
73use strict;
74use integer;
75use Symbol;
76
77=head1 NAME
78
79psed - a stream editor
80
81=head1 SYNOPSIS
82
83   psed [-an] script [file ...]
84   psed [-an] [-e script] [-f script-file] [file ...]
85
86   s2p  [-an] [-e script] [-f script-file]
87
88=head1 DESCRIPTION
89
90A stream editor reads the input stream consisting of the specified files
91(or standard input, if none are given), processes is line by line by
92applying a script consisting of edit commands, and writes resulting lines
93to standard output. The filename `C<->' may be used to read standard input.
94
95The edit script is composed from arguments of B<-e> options and
96script-files, in the given order. A single script argument may be specified
97as the first parameter.
98
99If this program is invoked with the name F<s2p>, it will act as a
100sed-to-Perl translator. See L<"sed Script Translation">.
101
102B<sed> returns an exit code of 0 on success or >0 if an error occurred.
103
104=head1 OPTIONS
105
106=over 4
107
108=item B<-a>
109
110A file specified as argument to the B<w> edit command is by default
111opened before input processing starts. Using B<-a>, opening of such
112files is delayed until the first line is actually written to the file.
113
114=item B<-e> I<script>
115
116The editing commands defined by I<script> are appended to the script.
117Multiple commands must be separated by newlines.
118
119=item B<-f> I<script-file>
120
121Editing commands from the specified I<script-file> are read and appended
122to the script.
123
124=item B<-n>
125
126By default, a line is written to standard output after the editing script
127has been applied to it. The B<-n> option suppresses automatic printing.
128
129=back
130
131=head1 COMMANDS
132
133B<sed> command syntax is defined as
134
135Z<> Z<> Z<> Z<>[I<address>[B<,>I<address>]][B<!>]I<function>[I<argument>]
136
137with whitespace being permitted before or after addresses, and between
138the function character and the argument. The I<address>es and the
139address inverter (C<!>) are used to restrict the application of a
140command to the selected line(s) of input.
141
142Each command must be on a line of its own, except where noted in
143the synopses below.
144
145The edit cycle performed on each input line consist of reading the line
146(without its trailing newline character) into the I<pattern space>,
147applying the applicable commands of the edit script, writing the final
148contents of the pattern space and a newline to the standard output.
149A I<hold space> is provided for saving the contents of the
150pattern space for later use.
151
152=head2 Addresses
153
154A sed address is either a line number or a pattern, which may be combined
155arbitrarily to construct ranges. Lines are numbered across all input files.
156
157Any address may be followed by an exclamation mark (`C<!>'), selecting
158all lines not matching that address.
159
160=over 4
161
162=item I<number>
163
164The line with the given number is selected.
165
166=item B<$>
167
168A dollar sign (C<$>) is the line number of the last line of the input stream.
169
170=item B</>I<regular expression>B</>
171
172A pattern address is a basic regular expression (see
173L<"Basic Regular Expressions">), between the delimiting character C</>.
174Any other character except C<\> or newline may be used to delimit a
175pattern address when the initial delimiter is prefixed with a
176backslash (`C<\>').
177
178=back
179
180If no address is given, the command selects every line.
181
182If one address is given, it selects the line (or lines) matching the
183address.
184
185Two addresses select a range that begins whenever the first address
186matches, and ends (including that line) when the second address matches.
187If the first (second) address is a matching pattern, the second
188address is not applied to the very same line to determine the end of
189the range. Likewise, if the second address is a matching pattern, the
190first address is not applied to the very same line to determine the
191begin of another range. If both addresses are line numbers,
192and the second line number is less than the first line number, then
193only the first line is selected.
194
195
196=head2 Functions
197
198The maximum permitted number of addresses is indicated with each
199function synopsis below.
200
201The argument I<text> consists of one or more lines following the command.
202Embedded newlines in I<text> must be preceded with a backslash.  Other
203backslashes in I<text> are deleted and the following character is taken
204literally.
205
206=over 4
207
208=cut
209
210my %ComTab;
211my %GenKey;
212#--------------------------------------------------------------------------
213$ComTab{'a'}=[ 1, 'txt', \&Emit,       '{ push( @Q, <<'."'TheEnd' ) }\n" ]; #ok
214
215=item [1addr]B<a\> I<text>
216
217Write I<text> (which must start on the line following the command)
218to standard output immediately before reading the next line
219of input, either by executing the B<N> function or by beginning a new cycle.
220
221=cut
222
223#--------------------------------------------------------------------------
224$ComTab{'b'}=[ 2, 'str', \&Branch,     '{ goto XXX; }'                   ]; #ok
225
226=item [2addr]B<b> [I<label>]
227
228Branch to the B<:> function with the specified I<label>. If no label
229is given, branch to the end of the script.
230
231=cut
232
233#--------------------------------------------------------------------------
234$ComTab{'c'}=[ 2, 'txt', \&Change,     <<'-X-'                           ]; #ok
235{ print <<'TheEnd'; } $doPrint = 0; goto EOS;
236-X-
237### continue OK => next CYCLE;
238
239=item [2addr]B<c\> I<text>
240
241The line, or range of lines, selected by the address is deleted.
242The I<text> (which must start on the line following the command)
243is written to standard output. With an address range, this occurs at
244the end of the range.
245
246=cut
247
248#--------------------------------------------------------------------------
249$ComTab{'d'}=[ 2, '',    \&Emit,       <<'-X-'                           ]; #ok
250{ $doPrint = 0;
251  goto EOS;
252}
253-X-
254### continue OK => next CYCLE;
255
256=item [2addr]B<d>
257
258Deletes the pattern space and starts the next cycle.
259
260=cut
261
262#--------------------------------------------------------------------------
263$ComTab{'D'}=[ 2, '',    \&Emit,       <<'-X-'                           ]; #ok
264{ s/^.*\n?//;
265  if(length($_)){ goto BOS } else { goto EOS }
266}
267-X-
268### continue OK => next CYCLE;
269
270=item [2addr]B<D>
271
272Deletes the pattern space through the first embedded newline or to the end.
273If the pattern space becomes empty, a new cycle is started, otherwise
274execution of the script is restarted.
275
276=cut
277
278#--------------------------------------------------------------------------
279$ComTab{'g'}=[ 2, '',    \&Emit,       '{ $_ = $Hold };'                 ]; #ok
280
281=item [2addr]B<g>
282
283Replace the contents of the pattern space with the hold space.
284
285=cut
286
287#--------------------------------------------------------------------------
288$ComTab{'G'}=[ 2, '',    \&Emit,       '{ $_ .= "\n"; $_ .= $Hold };'    ]; #ok
289
290=item [2addr]B<G>
291
292Append a newline and the contents of the hold space to the pattern space.
293
294=cut
295
296#--------------------------------------------------------------------------
297$ComTab{'h'}=[ 2, '',    \&Emit,       '{ $Hold = $_ }'                  ]; #ok
298
299=item [2addr]B<h>
300
301Replace the contents of the hold space with the pattern space.
302
303=cut
304
305#--------------------------------------------------------------------------
306$ComTab{'H'}=[ 2, '',    \&Emit,       '{ $Hold .= "\n"; $Hold .= $_; }' ]; #ok
307
308=item [2addr]B<H>
309
310Append a newline and the contents of the pattern space to the hold space.
311
312=cut
313
314#--------------------------------------------------------------------------
315$ComTab{'i'}=[ 1, 'txt', \&Emit,       '{ print <<'."'TheEnd' }\n"       ]; #ok
316
317=item [1addr]B<i\> I<text>
318
319Write the I<text> (which must start on the line following the command)
320to standard output.
321
322=cut
323
324#--------------------------------------------------------------------------
325$ComTab{'l'}=[ 2, '',    \&Emit,       '{ _l() }'                        ]; #okUTF8
326
327=item [2addr]B<l>
328
329Print the contents of the pattern space: non-printable characters are
330shown in C-style escaped form; long lines are split and have a trailing
331`C<\>' at the point of the split; the true end of a line is marked with
332a `C<$>'. Escapes are: `\a', `\t', `\n', `\f', `\r', `\e' for
333BEL, HT, LF, FF, CR, ESC, respectively, and `\' followed by a three-digit
334octal number for all other non-printable characters.
335
336=cut
337
338#--------------------------------------------------------------------------
339$ComTab{'n'}=[ 2, '',    \&Emit,       <<'-X-'                           ]; #ok
340{ print $_, "\n" if $doPrint;
341  printQ() if @Q;
342  $CondReg = 0;
343  last CYCLE unless getsARGV();
344  chomp();
345}
346-X-
347
348=item [2addr]B<n>
349
350If automatic printing is enabled, write the pattern space to the standard
351output. Replace the pattern space with the next line of input. If
352there is no more input, processing is terminated.
353
354=cut
355
356#--------------------------------------------------------------------------
357$ComTab{'N'}=[ 2, '',    \&Emit,       <<'-X-'                           ]; #ok
358{ printQ() if @Q;
359  $CondReg = 0;
360  last CYCLE unless getsARGV( $h );
361  chomp( $h );
362  $_ .= "\n$h";
363}
364-X-
365
366=item [2addr]B<N>
367
368Append a newline and the next line of input to the pattern space. If
369there is no more input, processing is terminated.
370
371=cut
372
373#--------------------------------------------------------------------------
374$ComTab{'p'}=[ 2, '',    \&Emit,       '{ print $_, "\n"; }'             ]; #ok
375
376=item [2addr]B<p>
377
378Print the pattern space to the standard output. (Use the B<-n> option
379to suppress automatic printing at the end of a cycle if you want to
380avoid double printing of lines.)
381
382=cut
383
384#--------------------------------------------------------------------------
385$ComTab{'P'}=[ 2, '',    \&Emit,       <<'-X-'                           ]; #ok
386{ if( /^(.*)/ ){ print $1, "\n"; } }
387-X-
388
389=item [2addr]B<P>
390
391Prints the pattern space through the first embedded newline or to the end.
392
393=cut
394
395#--------------------------------------------------------------------------
396$ComTab{'q'}=[ 1, '',    \&Emit,       <<'-X-'                           ]; #ok
397{ print $_, "\n" if $doPrint;
398  last CYCLE;
399}
400-X-
401
402=item [1addr]B<q>
403
404Branch to the end of the script and quit without starting a new cycle.
405
406=cut
407
408#--------------------------------------------------------------------------
409$ComTab{'r'}=[ 1, 'str', \&Emit,       "{ _r( '-X-' ) }"                 ]; #ok
410
411=item [1addr]B<r> I<file>
412
413Copy the contents of the I<file> to standard output immediately before
414the next attempt to read a line of input. Any error encountered while
415reading I<file> is silently ignored.
416
417=cut
418
419#--------------------------------------------------------------------------
420$ComTab{'s'}=[ 2, 'sub', \&Emit,       ''                                ]; #ok
421
422=item [2addr]B<s/>I<regular expression>B</>I<replacement>B</>I<flags>
423
424Substitute the I<replacement> string for the first substring in
425the pattern space that matches the I<regular expression>.
426Any character other than backslash or newline can be used instead of a
427slash to delimit the regular expression and the replacement.
428To use the delimiter as a literal character within the regular expression
429and the replacement, precede the character by a backslash (`C<\>').
430
431Literal newlines may be embedded in the replacement string by
432preceding a newline with a backslash.
433
434Within the replacement, an ampersand (`C<&>') is replaced by the string
435matching the regular expression. The strings `C<\1>' through `C<\9>' are
436replaced by the corresponding subpattern (see L<"Basic Regular Expressions">).
437To get a literal `C<&>' or `C<\>' in the replacement text, precede it
438by a backslash.
439
440The following I<flags> modify the behaviour of the B<s> command:
441
442=over 8
443
444=item B<g>
445
446The replacement is performed for all matching, non-overlapping substrings
447of the pattern space.
448
449=item B<1>..B<9>
450
451Replace only the n-th matching substring of the pattern space.
452
453=item B<p>
454
455If the substitution was made, print the new value of the pattern space.
456
457=item B<w> I<file>
458
459If the substitution was made, write the new value of the pattern space
460to the specified file.
461
462=back
463
464=cut
465
466#--------------------------------------------------------------------------
467$ComTab{'t'}=[ 2, 'str', \&Branch,     '{ goto XXX if _t() }'            ]; #ok
468
469=item [2addr]B<t> [I<label>]
470
471Branch to the B<:> function with the specified I<label> if any B<s>
472substitutions have been made since the most recent reading of an input line
473or execution of a B<t> function. If no label is given, branch to the end of
474the script.
475
476
477=cut
478
479#--------------------------------------------------------------------------
480$ComTab{'w'}=[ 2, 'str', \&Write,      "{ _w( '-X-' ) }"                 ]; #ok
481
482=item [2addr]B<w> I<file>
483
484The contents of the pattern space are written to the I<file>.
485
486=cut
487
488#--------------------------------------------------------------------------
489$ComTab{'x'}=[ 2, '',    \&Emit,       '{ ($Hold, $_) = ($_, $Hold) }'   ]; #ok
490
491=item [2addr]B<x>
492
493Swap the contents of the pattern space and the hold space.
494
495=cut
496
497#--------------------------------------------------------------------------
498$ComTab{'y'}=[ 2, 'tra', \&Emit,       ''                                ]; #ok
499=item [2addr]B<y>B</>I<string1>B</>I<string2>B</>
500
501In the pattern space, replace all characters occuring in I<string1> by the
502character at the corresponding position in I<string2>. It is possible
503to use any character (other than a backslash or newline) instead of a
504slash to delimit the strings.  Within I<string1> and I<string2>, a
505backslash followed by any character other than a newline is that literal
506character, and a backslash followed by an `n' is replaced by a newline
507character.
508
509=cut
510
511#--------------------------------------------------------------------------
512$ComTab{'='}=[ 1, '',    \&Emit,       '{ print "$.\n" }'                ]; #ok
513
514=item [1addr]B<=>
515
516Prints the current line number on the standard output.
517
518=cut
519
520#--------------------------------------------------------------------------
521$ComTab{':'}=[ 0, 'str', \&Label,      ''                                ]; #ok
522
523=item [0addr]B<:> [I<label>]
524
525The command specifies the position of the I<label>. It has no other effect.
526
527=cut
528
529#--------------------------------------------------------------------------
530$ComTab{'{'}=[ 2, '',    \&BeginBlock, '{'                               ]; #ok
531$ComTab{'}'}=[ 0, '',    \&EndBlock,   ';}'                              ]; #ok
532# ';' to avoid warning on empty {}-block
533
534=item [2addr]B<{> [I<command>]
535
536=item [0addr]B<}>
537
538These two commands begin and end a command list. The first command may
539be given on the same line as the opening B<{> command. The commands
540within the list are jointly selected by the address(es) given on the
541B<{> command (but may still have individual addresses).
542
543=cut
544
545#--------------------------------------------------------------------------
546$ComTab{'#'}=[ 0, 'str', \&Comment,    ''                                ]; #ok
547
548=item [0addr]B<#> [I<comment>]
549
550The entire line is ignored (treated as a comment). If, however, the first
551two characters in the script are `C<#n>', automatic printing of output is
552suppressed, as if the B<-n> option were given on the command line.
553
554=back
555
556=cut
557
558use vars qw{ $isEOF $Hold %wFiles @Q $CondReg $doPrint };
559
560my $useDEBUG    = exists( $ENV{PSEDDEBUG} );
561my $useEXTBRE   = $ENV{PSEDEXTBRE} || '';
562$useEXTBRE =~ s/[^<>wWyB]//g; # gawk RE's handle these
563
564my $doAutoPrint = 1;          # automatic printing of pattern space (-n => 0)
565my $doOpenWrite = 1;          # open w command output files at start (-a => 0)
566my $svOpenWrite = 0;          # save $doOpenWrite
567my $doGenerate  = $0 eq 's2p';
568
569# Collected and compiled script
570#
571my( @Commands, %Defined, @BlockStack, %Label, $labNum, $Code, $Func );
572$Code = '';
573
574##################
575#  Compile Time
576#
577# Labels
578#
579# Error handling
580#
581sub Warn($;$){
582    my( $msg, $loc ) = @_;
583    $loc ||= '';
584    $loc .= ': ' if length( $loc );
585    warn( "$0: $loc$msg\n" );
586}
587
588$labNum = 0;
589sub newLabel(){
590    return 'L_'.++$labNum;
591}
592
593# safeHere: create safe here delimiter and  modify opcode and argument
594#
595sub safeHere($$){
596    my( $codref, $argref ) = @_;
597    my $eod = 'EOD000';
598    while( $$argref =~ /^$eod$/m ){
599        $eod++;
600    }
601    $$codref =~ s/TheEnd/$eod/e;
602    $$argref .= "$eod\n";
603}
604
605# Emit: create address logic and emit command
606#
607sub Emit($$$$$$){
608    my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
609    my $cond = '';
610    if( defined( $addr1 ) ){
611        if( defined( $addr2 ) ){
612	    $addr1 .= $addr2 =~ /^\d+$/ ? "..$addr2" : "...$addr2";
613        } else {
614	    $addr1 .= ' == $.' if $addr1 =~ /^\d+$/;
615	}
616	$cond = $negated ? "unless( $addr1 )\n" : "if( $addr1 )\n";
617    }
618
619    if( $opcode eq '' ){
620	$Code .= "$cond$arg\n";
621
622    } elsif( $opcode =~ s/-X-/$arg/e ){
623	$Code .= "$cond$opcode\n";
624
625    } elsif( $opcode =~ /TheEnd/ ){
626	safeHere( \$opcode, \$arg );
627	$Code .= "$cond$opcode$arg";
628
629    } else {
630	$Code .= "$cond$opcode\n";
631    }
632    0;
633}
634
635# Write (w command, w flag): store pathname
636#
637sub Write($$$$$$){
638    my( $addr1, $addr2, $negated, $opcode, $path, $fl ) = @_;
639    $wFiles{$path} = '';
640    Emit( $addr1, $addr2, $negated, $opcode, $path, $fl );
641}
642
643
644# Label (: command): label definition
645#
646sub Label($$$$$$){
647    my( $addr1, $addr2, $negated, $opcode, $lab, $fl ) = @_;
648    my $rc = 0;
649    $lab =~ s/\s+//;
650    if( length( $lab ) ){
651	my $h;
652	if( ! exists( $Label{$lab} ) ){
653	    $h = $Label{$lab}{name} = newLabel();
654        } else {
655	    $h = $Label{$lab}{name};
656	    if( exists( $Label{$lab}{defined} ) ){
657		my $dl = $Label{$lab}{defined};
658		Warn( "duplicate label $lab (first defined at $dl)", $fl );
659		$rc = 1;
660	    }
661	}
662        $Label{$lab}{defined} = $fl;
663	$Code .= "$h:;\n";
664    }
665    $rc;
666}
667
668# BeginBlock ({ command): push block start
669#
670sub BeginBlock($$$$$$){
671    my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
672    push( @BlockStack, [ $fl, $addr1, $addr2, $negated ] );
673    Emit( $addr1, $addr2, $negated, $opcode, $arg, $fl );
674}
675
676# EndBlock (} command): check proper nesting
677#
678sub EndBlock($$$$$$){
679    my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
680    my $rc;
681    my $jcom = pop( @BlockStack );
682    if( defined( $jcom ) ){
683	$rc = Emit( $addr1, $addr2, $negated, $opcode, $arg, $fl );
684    } else {
685	Warn( "unexpected `}'", $fl );
686	$rc = 1;
687    }
688    $rc;
689}
690
691# Branch (t, b commands): check or create label, substitute default
692#
693sub Branch($$$$$$){
694    my( $addr1, $addr2, $negated, $opcode, $lab, $fl ) = @_;
695    $lab =~ s/\s+//; # no spaces at end
696    my $h;
697    if( length( $lab ) ){
698	if( ! exists( $Label{$lab} ) ){
699	    $h = $Label{$lab}{name} = newLabel();
700        } else {
701	    $h = $Label{$lab}{name};
702	}
703	push( @{$Label{$lab}{used}}, $fl );
704    } else {
705	$h = 'EOS';
706    }
707    $opcode =~ s/XXX/$h/e;
708    Emit( $addr1, $addr2, $negated, $opcode, '', $fl );
709}
710
711# Change (c command): is special due to range end watching
712#
713sub Change($$$$$$){
714    my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
715    my $kwd = $negated ? 'unless' : 'if';
716    if( defined( $addr2 ) ){
717        $addr1 .= $addr2 =~ /^\d+$/ ? "..$addr2" : "...$addr2";
718	if( ! $negated ){
719	    $addr1  = '$icnt = ('.$addr1.')';
720	    $opcode = 'if( $icnt =~ /E0$/ )' . $opcode;
721	}
722    } else {
723	$addr1 .= ' == $.' if $addr1 =~ /^\d+$/;
724    }
725    safeHere( \$opcode, \$arg );
726    $Code .= "$kwd( $addr1 ){\n  $opcode$arg}\n";
727    0;
728}
729
730
731# Comment (# command): A no-op. Who would've thought that!
732#
733sub Comment($$$$$$){
734    my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
735### $Code .= "# $arg\n";
736    0;
737}
738
739
740sub stripRegex($$){
741    my( $del, $sref ) = @_;
742    my $regex = $del;
743    print "stripRegex:$del:$$sref:\n" if $useDEBUG;
744    while( $$sref =~ s{^(.*?)(\\*)\Q$del\E(\s*)}{}s ){
745        my $sl = $2;
746	$regex .= $1.$sl.$del;
747	if( length( $sl ) % 2 == 0 ){
748	    return $regex;
749	}
750	$regex .= $3;
751    }
752    undef();
753}
754
755# stripTrans: take a <del> terminated string from y command
756#   honoring and cleaning up of \-escaped <del>'s
757#
758sub stripTrans($$){
759    my( $del, $sref ) = @_;
760    my $t = '';
761    print "stripTrans:$del:$$sref:\n" if $useDEBUG;
762    while( $$sref =~ s{^(.*?)(\\*)\Q$del\E}{}s ){
763        my $sl = $2;
764	$t .= $1;
765	if( length( $sl ) % 2 == 0 ){
766	    $t .= $sl;
767	    $t =~ s/\\\\/\\/g;
768	    return $t;
769	}
770	chop( $sl );
771	$t .= $sl.$del.$3;
772    }
773    undef();
774}
775
776# makey - construct Perl y/// from sed y///
777#
778sub makey($$$){
779    my( $fr, $to, $fl ) = @_;
780    my $error = 0;
781
782    # Ensure that any '-' is up front.
783    # Diagnose duplicate contradicting mappings
784    my %tr;
785    for( my $i = 0; $i < length($fr); $i++ ){
786	my $fc = substr($fr,$i,1);
787	my $tc = substr($to,$i,1);
788	if( exists( $tr{$fc} ) && $tr{$fc} ne $tc ){
789	    Warn( "ambiguous translation for character `$fc' in `y' command",
790		  $fl );
791	    $error++;
792	}
793	$tr{$fc} = $tc;
794    }
795    $fr = $to = '';
796    if( exists( $tr{'-'} ) ){
797	( $fr, $to ) = ( '-', $tr{'-'} );
798	delete( $tr{'-'} );
799    } else {
800	$fr = $to = '';
801    }
802    # might just as well sort it...
803    for my $fc ( sort keys( %tr ) ){
804	$fr .= $fc;
805	$to .= $tr{$fc};
806    }
807    # make embedded delimiters and newlines safe
808    $fr =~ s/([{}])/\$1/g;
809    $to =~ s/([{}])/\$1/g;
810    $fr =~ s/\n/\\n/g;
811    $to =~ s/\n/\\n/g;
812    return $error ? undef() : "{ y{$fr}{$to}; }";
813}
814
815######
816# makes - construct Perl s/// from sed s///
817#
818sub makes($$$$$$$){
819    my( $regex, $subst, $path, $global, $print, $nmatch, $fl ) = @_;
820
821    # make embedded newlines safe
822    $regex =~ s/\n/\\n/g;
823    $subst =~ s/\n/\\n/g;
824
825    my $code;
826    # n-th occurrence
827    #
828    if( length( $nmatch ) ){
829	$code = <<TheEnd;
830{ \$n = $nmatch;
831  while( --\$n && ( \$s = m ${regex}g ) ){}
832  \$s = ( substr( \$_, pos() ) =~ s ${regex}${subst}s ) if \$s;
833  \$CondReg ||= \$s;
834TheEnd
835    } else {
836        $code = <<TheEnd;
837{ \$s = s ${regex}${subst}s${global};
838  \$CondReg ||= \$s;
839TheEnd
840    }
841    if( $print ){
842        $code .= '  print $_, "\n" if $s;'."\n";
843    }
844    if( defined( $path ) ){
845        $wFiles{$path} = '';
846	$code .= " _w( '$path' ) if \$s;\n";
847        $GenKey{'w'} = 1;
848    }
849    $code .= "}";
850}
851
852=head1 BASIC REGULAR EXPRESSIONS
853
854A I<Basic Regular Expression> (BRE), as defined in POSIX 1003.2, consists
855of I<atoms>, for matching parts of a string, and I<bounds>, specifying
856repetitions of a preceding atom.
857
858=head2 Atoms
859
860The possible atoms of a BRE are: B<.>, matching any single character;
861B<^> and B<$>, matching the null string at the beginning or end
862of a string, respectively; a I<bracket expressions>, enclosed
863in B<[> and B<]> (see below); and any single character with no
864other significance (matching that character). A B<\> before one
865of: B<.>, B<^>, B<$>, B<[>, B<*>, B<\>, matching the character
866after the backslash. A sequence of atoms enclosed in B<\(> and B<\)>
867becomes an atom and establishes the target for a I<backreference>,
868consisting of the substring that actually matches the enclosed atoms.
869Finally, B<\> followed by one of the digits B<0> through B<9> is a
870backreference.
871
872A B<^> that is not first, or a B<$> that is not last does not have
873a special significance and need not be preceded by a backslash to
874become literal. The same is true for a B<]>, that does not terminate
875a bracket expression.
876
877An unescaped backslash cannot be last in a BRE.
878
879=head2 Bounds
880
881The BRE bounds are: B<*>, specifying 0 or more matches of the preceding
882atom; B<\{>I<count>B<\}>, specifying that many repetitions;
883B<\{>I<minimum>B<,\}>, giving a lower limit; and
884B<\{>I<minimum>B<,>I<maximum>B<\}> finally defines a lower and upper
885bound.
886
887A bound appearing as the first item in a BRE is taken literally.
888
889=head2 Bracket Expressions
890
891A I<bracket expression> is a list of characters, character ranges
892and character classes enclosed in B<[> and B<]> and matches any
893single character from the represented set of characters.
894
895A character range is written as two characters separated by B<-> and
896represents all characters (according to the character collating sequence)
897that are not less than the first and not greater than the second.
898(Ranges are very collating-sequence-dependent, and portable programs
899should avoid relying on them.)
900
901A character class is one of the class names
902
903   alnum     digit     punct
904   alpha     graph     space
905   blank     lower     upper
906   cntrl     print     xdigit
907
908enclosed in B<[:> and B<:]> and represents the set of characters
909as defined in ctype(3).
910
911If the first character after B<[> is B<^>, the sense of matching is
912inverted.
913
914To include a literal `C<^>', place it anywhere else but first. To
915include a literal 'C<]>' place it first or immediately after an
916initial B<^>. To include a literal `C<->' make it the first (or
917second after B<^>) or last character, or the second endpoint of
918a range.
919
920The special bracket expression constructs C<[[:E<lt>:]]> and C<[[:E<gt>:]]>
921match the null string at the beginning and end of a word respectively.
922(Note that neither is identical to Perl's `\b' atom.)
923
924=head2 Additional Atoms
925
926Since some sed implementations provide additional regular expression
927atoms (not defined in POSIX 1003.2), B<psed> is capable of translating
928the following backslash escapes:
929
930=over 4
931
932=item B<\E<lt>> This is the same as C<[[:E<gt>:]]>.
933
934=item B<\E<gt>> This is the same as C<[[:E<lt>:]]>.
935
936=item B<\w> This is an abbreviation for C<[[:alnum:]_]>.
937
938=item B<\W> This is an abbreviation for C<[^[:alnum:]_]>.
939
940=item B<\y> Match the empty string at a word boundary.
941
942=item B<\B> Match the empty string between any two either word or non-word characters.
943
944=back
945
946To enable this feature, the environment variable PSEDEXTBRE must be set
947to a string containing the requested characters, e.g.:
948C<PSEDEXTBRE='E<lt>E<gt>wW'>.
949
950=cut
951
952#####
953# bre2p - convert BRE to Perl RE
954#
955sub peek(\$$){
956    my( $pref, $ic ) = @_;
957    $ic < length($$pref)-1 ? substr( $$pref, $ic+1, 1 ) : '';
958}
959
960sub bre2p($$$){
961    my( $del, $pat, $fl ) = @_;
962    my $led = $del;
963    $led =~ tr/{([</})]>/;
964    $led = '' if $led eq $del;
965
966    $pat = substr( $pat, 1, length($pat) - 2 );
967    my $res = '';
968    my $bracklev = 0;
969    my $backref  = 0;
970    my $parlev = 0;
971    for( my $ic = 0; $ic < length( $pat ); $ic++ ){
972        my $c = substr( $pat, $ic, 1 );
973        if( $c eq '\\' ){
974	    ### backslash escapes
975            my $nc = peek($pat,$ic);
976            if( $nc eq '' ){
977                Warn( "`\\' cannot be last in pattern", $fl );
978                return undef();
979            }
980	    $ic++;
981            if( $nc eq $del ){ ## \<pattern del> => \<pattern del>
982                $res .= "\\$del";
983
984	    } elsif( $nc =~ /([[.*\\n])/ ){
985		## check for \-escaped magics and \n:
986		## \[ \. \* \\ \n stay as they are
987                $res .= '\\'.$nc;
988
989            } elsif( $nc eq '(' ){ ## \( => (
990                $parlev++;
991                $res .= '(';
992
993            } elsif( $nc eq ')' ){ ## \) => )
994                $parlev--;
995		$backref++;
996                if( $parlev < 0 ){
997                    Warn( "unmatched `\\)'", $fl );
998                    return undef();
999                }
1000                $res .= ')';
1001
1002            } elsif( $nc eq '{' ){ ## repetition factor \{<i>[,[<j>]]\}
1003                my $endpos = index( $pat, '\\}', $ic );
1004                if( $endpos < 0 ){
1005                    Warn( "unmatched `\\{'", $fl );
1006                    return undef();
1007                }
1008                my $rep = substr( $pat, $ic+1, $endpos-($ic+1) );
1009                $ic = $endpos + 1;
1010
1011  	        if( $res =~ /^\^?$/ ){
1012		    $res .= "\\{$rep\}";
1013                } elsif( $rep =~ /^(\d+)(,?)(\d*)?$/ ){
1014                    my $min = $1;
1015                    my $com = $2 || '';
1016                    my $max = $3;
1017                    if( length( $max ) ){
1018                        if( $max < $min ){
1019                            Warn( "maximum less than minimum in `\\{$rep\\}'",
1020				  $fl );
1021                            return undef();
1022                        }
1023                    } else {
1024                        $max = '';
1025                    }
1026		    # simplify some
1027		    if( $min == 0 && $max eq '1' ){
1028			$res .= '?';
1029		    } elsif( $min == 1 && "$com$max" eq ',' ){
1030			$res .= '+';
1031		    } elsif( $min == 0 && "$com$max" eq ',' ){
1032			$res .= '*';
1033		    } else {
1034			$res .= "{$min$com$max}";
1035		    }
1036                } else {
1037                    Warn( "invalid repeat clause `\\{$rep\\}'", $fl );
1038                    return undef();
1039                }
1040
1041            } elsif( $nc =~ /^[1-9]$/ ){
1042		## \1 .. \9 => \1 .. \9, but check for a following digit
1043		if( $nc > $backref ){
1044                    Warn( "invalid backreference ($nc)", $fl );
1045                    return undef();
1046		}
1047                $res .= "\\$nc";
1048		if( peek($pat,$ic) =~ /[0-9]/ ){
1049		    $res .= '(?:)';
1050		}
1051
1052            } elsif( $useEXTBRE && ( $nc =~ /[$useEXTBRE]/ ) ){
1053		## extensions - at most <>wWyB - not in POSIX
1054                if(      $nc eq '<' ){ ## \< => \b(?=\w), be precise
1055                    $res .= '\\b(?<=\\W)';
1056                } elsif( $nc eq '>' ){ ## \> => \b(?=\W), be precise
1057                    $res .= '\\b(?=\\W)';
1058                } elsif( $nc eq 'y' ){ ## \y => \b
1059                    $res .= '\\b';
1060                } else {               ## \B, \w, \W remain the same
1061                    $res .= "\\$nc";
1062                }
1063	    } elsif( $nc eq $led ){
1064		## \<closing bracketing-delimiter> - keep '\'
1065		$res .= "\\$nc";
1066
1067            } else { ## \ <char> => <char> ("as if `\' were not present")
1068                $res .= $nc;
1069            }
1070
1071        } elsif( $c eq '.' ){ ## . => .
1072            $res .= $c;
1073
1074	} elsif( $c eq '*' ){ ## * => * but \* if there's nothing preceding it
1075	    if( $res =~ /^\^?$/ ){
1076                $res .= '\\*';
1077            } elsif( substr( $res, -1, 1 ) ne '*' ){
1078		$res .= $c;
1079	    }
1080
1081        } elsif( $c eq '[' ){
1082	    ## parse []: [^...] [^]...] [-...]
1083	    my $add = '[';
1084	    if( peek($pat,$ic) eq '^' ){
1085		$ic++;
1086		$add .= '^';
1087	    }
1088	    my $nc = peek($pat,$ic);
1089  	    if( $nc eq ']' || $nc eq '-' ){
1090		$add .= $nc;
1091                $ic++;
1092	    }
1093	    # check that [ is not trailing
1094	    if( $ic >= length( $pat ) - 1 ){
1095		Warn( "unmatched `['", $fl );
1096		return undef();
1097	    }
1098	    # look for [:...:] and x-y
1099	    my $rstr = substr( $pat, $ic+1 );
1100	    if( $rstr =~ /^((?:\[:\(\w+|[><]\):\]|[^]-](?:-[^]])?)*)/ ){
1101 	        my $cnt = $1;
1102		$ic += length( $cnt );
1103		$cnt =~ s/([\\\$])/\\$1/g; # `\', `$' are magic in Perl []
1104		# try some simplifications
1105 	        my $red = $cnt;
1106		if( $red =~ s/0-9// ){
1107		    $cnt = $red.'\d';
1108		    if( $red =~ s/A-Z// && $red =~ s/a-z// && $red =~ s/_// ){
1109			$cnt = $red.'\w';
1110                    }
1111		}
1112		$add .= $cnt;
1113
1114		# POSIX 1003.2 has this (optional) for begin/end word
1115		$add = '\\b(?=\\W)'  if $add eq '[[:<:]]';
1116		$add = '\\b(?<=\\W)' if $add eq '[[:>:]]';
1117
1118	    }
1119
1120	    ## may have a trailing `-' before `]'
1121	    if( $ic < length($pat) - 1 &&
1122                substr( $pat, $ic+1 ) =~ /^(-?])/ ){
1123		$ic += length( $1 );
1124		$add .= $1;
1125		# another simplification
1126		$add =~ s/^\[(\^?)(\\[dw])]$/ $1 eq '^' ? uc($2) : $2 /e;
1127		$res .= $add;
1128	    } else {
1129		Warn( "unmatched `['", $fl );
1130		return undef();
1131	    }
1132
1133        } elsif( $c eq $led ){ ## unescaped <closing bracketing-delimiter>
1134            $res .= "\\$c";
1135
1136        } elsif( $c eq ']' ){ ## unmatched ] is not magic
1137            $res .= ']';
1138
1139        } elsif( $c =~ /[|+?{}()]/ ){ ## not magic in BRE, but in Perl: \-quote
1140            $res .= "\\$c";
1141
1142        } elsif( $c eq '^' ){ ## not magic unless 1st, but in Perl: \-quote
1143            $res .= length( $res ) ? '\\^' : '^';
1144
1145        } elsif( $c eq '$' ){ ## not magic unless last, but in Perl: \-quote
1146            $res .= $ic == length( $pat ) - 1 ? '$' : '\\$';
1147
1148        } else {
1149            $res .= $c;
1150        }
1151    }
1152
1153    if( $parlev ){
1154       Warn( "unmatched `\\('", $fl );
1155       return undef();
1156    }
1157
1158    # final cleanup: eliminate raw HTs
1159    $res =~ s/\t/\\t/g;
1160    return $del . $res . ( $led ? $led : $del );
1161}
1162
1163
1164#####
1165# sub2p - convert sed substitution to Perl substitution
1166#
1167sub sub2p($$$){
1168    my( $del, $subst, $fl ) = @_;
1169    my $led = $del;
1170    $led =~ tr/{([</})]>/;
1171    $led = '' if $led eq $del;
1172
1173    $subst = substr( $subst, 1, length($subst) - 2 );
1174    my $res = '';
1175
1176    for( my $ic = 0; $ic < length( $subst ); $ic++ ){
1177        my $c = substr( $subst, $ic, 1 );
1178        if( $c eq '\\' ){
1179	    ### backslash escapes
1180            my $nc = peek($subst,$ic);
1181            if( $nc eq '' ){
1182                Warn( "`\\' cannot be last in substitution", $fl );
1183                return undef();
1184            }
1185	    $ic++;
1186	    if( $nc =~ /[\\$del$led]/ ){ ## \ and delimiter
1187		$res .= '\\' . $nc;
1188            } elsif( $nc =~ /[1-9]/ ){ ## \1 - \9 => ${1} - ${9}
1189                $res .= '${' . $nc . '}';
1190	    } else { ## everything else (includes &): omit \
1191		$res .= $nc;
1192	    }
1193        } elsif( $c eq '&' ){ ## & => $&
1194            $res .= '$&';
1195	} elsif( $c =~ /[\$\@$led]/ ){ ## magic in Perl's substitution string
1196	    $res .= '\\' . $c;
1197        } else {
1198	    $res .= $c;
1199	}
1200    }
1201
1202    # final cleanup: eliminate raw HTs
1203    $res =~ s/\t/\\t/g;
1204    return ( $led ? $del : $led ) . $res . ( $led ? $led : $del );
1205}
1206
1207
1208sub Parse(){
1209    my $error = 0;
1210    my( $pdef, $pfil, $plin );
1211    for( my $icom = 0; $icom < @Commands; $icom++ ){
1212	my $cmd = $Commands[$icom];
1213	print "Parse:$cmd:\n" if $useDEBUG;
1214	$cmd =~ s/^\s+//;
1215	next unless length( $cmd );
1216	my $scom = $icom;
1217	if( exists( $Defined{$icom} ) ){
1218	    $pdef = $Defined{$icom};
1219	    if( $pdef =~ /^ #(\d+)/ ){
1220		$pfil = 'expression #';
1221		$plin = $1;
1222	    } else {
1223		$pfil = "$pdef l.";
1224		$plin = 1;
1225            }
1226        } else {
1227	    $plin++;
1228        }
1229        my $fl = "$pfil$plin";
1230
1231        # insert command as comment in gnerated code
1232	#
1233	$Code .= "# $cmd\n" if $doGenerate;
1234
1235	# The Address(es)
1236	#
1237	my( $negated, $naddr, $addr1, $addr2 );
1238	$naddr = 0;
1239	if(      $cmd =~ s/^(\d+)\s*// ){
1240	    $addr1 = "$1"; $naddr++;
1241	} elsif( $cmd =~ s/^\$\s*// ){
1242	    $addr1 = 'eofARGV()'; $naddr++;
1243	} elsif( $cmd =~ s{^(/)}{} || $cmd =~ s{^\\(.)}{} ){
1244	    my $del = $1;
1245	    my $regex = stripRegex( $del, \$cmd );
1246	    if( defined( $regex ) ){
1247		$addr1 = 'm '.bre2p( $del, $regex, $fl ).'s';
1248		$naddr++;
1249	    } else {
1250		Warn( "malformed regex, 1st address", $fl );
1251		$error++;
1252		next;
1253	    }
1254        }
1255        if( defined( $addr1 ) && $cmd =~ s/,\s*// ){
1256 	    if(      $cmd =~ s/^(\d+)\s*// ){
1257	        $addr2 = "$1"; $naddr++;
1258	    } elsif( $cmd =~ s/^\$\s*// ){
1259	        $addr2 = 'eofARGV()'; $naddr++;
1260	    } elsif( $cmd =~ s{^(/)}{} || $cmd =~ s{^\\(.)}{} ){
1261		my $del = $1;
1262	        my $regex = stripRegex( $del, \$cmd );
1263		if( defined( $regex ) ){
1264		    $addr2 = 'm '. bre2p( $del, $regex, $fl ).'s';
1265		    $naddr++;
1266		} else {
1267		    Warn( "malformed regex, 2nd address", $fl );
1268		    $error++;
1269		    next;
1270		}
1271            } else {
1272		Warn( "invalid address after `,'", $fl );
1273		$error++;
1274		next;
1275            }
1276        }
1277
1278        # address modifier `!'
1279        #
1280        $negated = $cmd =~ s/^!\s*//;
1281	if( defined( $addr1 ) ){
1282	    print "Parse: addr1=$addr1" if $useDEBUG;
1283	    if( defined( $addr2 ) ){
1284		print ", addr2=$addr2 " if $useDEBUG;
1285		# both numeric and addr1 > addr2 => eliminate addr2
1286		undef( $addr2 ) if $addr1 =~ /^\d+$/ &&
1287                                   $addr2 =~ /^\d+$/ && $addr1 > $addr2;
1288	    }
1289	}
1290	print 'negated' if $useDEBUG && $negated;
1291	print " command:$cmd\n" if $useDEBUG;
1292
1293	# The Command
1294	#
1295        if( $cmd !~ s/^([:#={}abcdDgGhHilnNpPqrstwxy])\s*// ){
1296	    my $h = substr( $cmd, 0, 1 );
1297 	    Warn( "unknown command `$h'", $fl );
1298	    $error++;
1299	    next;
1300	}
1301        my $key = $1;
1302
1303	my $tabref = $ComTab{$key};
1304	$GenKey{$key} = 1;
1305	if( $naddr > $tabref->[0] ){
1306	    Warn( "excess address(es)", $fl );
1307	    $error++;
1308	    next;
1309	}
1310
1311	my $arg = '';
1312	if(      $tabref->[1] eq 'str' ){
1313	    # take remainder - don't care if it is empty
1314	    $arg = $cmd;
1315            $cmd = '';
1316
1317	} elsif( $tabref->[1] eq 'txt' ){
1318	    # multi-line text
1319	    my $goon = $cmd =~ /(.*)\\$/;
1320	    if( length( $1 ) ){
1321		Warn( "extra characters after command ($cmd)", $fl );
1322		$error++;
1323	    }
1324	    while( $goon ){
1325		$icom++;
1326		if( $icom > $#Commands ){
1327		    Warn( "unexpected end of script", $fl );
1328		    $error++;
1329		    last;
1330		}
1331		$cmd = $Commands[$icom];
1332		$Code .= "# $cmd\n" if $doGenerate;
1333		$goon = $cmd =~ s/\\$//;
1334		$cmd =~ s/\\(.)/$1/g;
1335		$arg .= "\n" if length( $arg );
1336		$arg .= $cmd;
1337	    }
1338	    $arg .= "\n" if length( $arg );
1339	    $cmd = '';
1340
1341	} elsif( $tabref->[1] eq 'sub' ){
1342	    # s///
1343	    if( ! length( $cmd ) ){
1344		Warn( "`s' command requires argument", $fl );
1345		$error++;
1346		next;
1347	    }
1348	    if( $cmd =~ s{^([^\\\n])}{} ){
1349		my $del = $1;
1350		my $regex = stripRegex( $del, \$cmd );
1351		if( ! defined( $regex ) ){
1352		    Warn( "malformed regular expression", $fl );
1353		    $error++;
1354		    next;
1355		}
1356		$regex = bre2p( $del, $regex, $fl );
1357
1358		# a trailing \ indicates embedded NL (in replacement string)
1359		while( $cmd =~ s/(?<!\\)\\$/\n/ ){
1360		    $icom++;
1361		    if( $icom > $#Commands ){
1362			Warn( "unexpected end of script", $fl );
1363			$error++;
1364			last;
1365		    }
1366		    $cmd .= $Commands[$icom];
1367	            $Code .= "# $Commands[$icom]\n" if $doGenerate;
1368		}
1369
1370		my $subst = stripRegex( $del, \$cmd );
1371		if( ! defined( $regex ) ){
1372		    Warn( "malformed substitution expression", $fl );
1373		    $error++;
1374		    next;
1375		}
1376		$subst = sub2p( $del, $subst, $fl );
1377
1378		# parse s/// modifier: g|p|0-9|w <file>
1379		my( $global, $nmatch, $print, $write ) =
1380		  ( '',      '',      0,      undef );
1381		while( $cmd =~ s/^([gp0-9])// ){
1382		    $1 eq 'g' ? ( $global = 'g' ) :
1383  		    $1 eq 'p' ? ( $print  = $1  ) : ( $nmatch .= $1 );
1384                }
1385		$write = $1 if $cmd =~ s/w\s*(.*)$//;
1386  	        ### $nmatch =~ s/^(\d)\1*$/$1/; ### may be dangerous?
1387		if( $global && length( $nmatch ) || length( $nmatch ) > 1 ){
1388		    Warn( "conflicting flags `$global$nmatch'", $fl );
1389		    $error++;
1390		    next;
1391		}
1392
1393		$arg = makes( $regex, $subst,
1394			      $write, $global, $print, $nmatch, $fl );
1395		if( ! defined( $arg ) ){
1396		    $error++;
1397		    next;
1398		}
1399
1400            } else {
1401		Warn( "improper delimiter in s command", $fl );
1402		$error++;
1403		next;
1404            }
1405
1406	} elsif( $tabref->[1] eq 'tra' ){
1407	    # y///
1408	    # a trailing \ indicates embedded newline
1409	    while( $cmd =~ s/(?<!\\)\\$/\n/ ){
1410		$icom++;
1411		if( $icom > $#Commands ){
1412		    Warn( "unexpected end of script", $fl );
1413		    $error++;
1414		    last;
1415		}
1416		$cmd .= $Commands[$icom];
1417                $Code .= "# $Commands[$icom]\n" if $doGenerate;
1418	    }
1419	    if( ! length( $cmd ) ){
1420		Warn( "`y' command requires argument", $fl );
1421		$error++;
1422		next;
1423	    }
1424	    my $d = substr( $cmd, 0, 1 ); $cmd = substr( $cmd, 1 );
1425	    if( $d eq '\\' ){
1426		Warn( "`\\' not valid as delimiter in `y' command", $fl );
1427		$error++;
1428		next;
1429	    }
1430	    my $fr = stripTrans( $d, \$cmd );
1431	    if( ! defined( $fr ) || ! length( $cmd ) ){
1432		Warn( "malformed `y' command argument", $fl );
1433		$error++;
1434		next;
1435	    }
1436	    my $to = stripTrans( $d, \$cmd );
1437	    if( ! defined( $to ) ){
1438		Warn( "malformed `y' command argument", $fl );
1439		$error++;
1440		next;
1441	    }
1442	    if( length($fr) != length($to) ){
1443		Warn( "string lengths in `y' command differ", $fl );
1444		$error++;
1445		next;
1446	    }
1447	    if( ! defined( $arg = makey( $fr, $to, $fl ) ) ){
1448		$error++;
1449		next;
1450	    }
1451
1452	}
1453
1454	# $cmd must be now empty - exception is {
1455	if( $cmd !~ /^\s*$/ ){
1456	    if( $key eq '{' ){
1457		# dirty hack to process command on '{' line
1458		$Commands[$icom--] = $cmd;
1459	    } else {
1460		Warn( "extra characters after command ($cmd)", $fl );
1461		$error++;
1462		next;
1463	    }
1464	}
1465
1466	# Make Code
1467        #
1468	if( &{$tabref->[2]}( $addr1, $addr2, $negated,
1469                             $tabref->[3], $arg, $fl ) ){
1470	    $error++;
1471	}
1472    }
1473
1474    while( @BlockStack ){
1475	my $bl = pop( @BlockStack );
1476	Warn( "start of unterminated `{'", $bl );
1477        $error++;
1478    }
1479
1480    for my $lab ( keys( %Label ) ){
1481	if( ! exists( $Label{$lab}{defined} ) ){
1482	    for my $used ( @{$Label{$lab}{used}} ){
1483 	        Warn( "undefined label `$lab'", $used );
1484	        $error++;
1485	    }
1486	}
1487    }
1488
1489    exit( 1 ) if $error;
1490}
1491
1492
1493##############
1494#### MAIN ####
1495##############
1496
1497sub usage(){
1498    print STDERR "Usage: sed [-an] command [file...]\n";
1499    print STDERR "           [-an] [-e command] [-f script-file] [file...]\n";
1500}
1501
1502###################
1503# Here we go again...
1504#
1505my $expr = 0;
1506while( @ARGV && $ARGV[0] =~ /^-(.)(.*)$/ ){
1507    my $opt = $1;
1508    my $arg = $2;
1509    shift( @ARGV );
1510    if(      $opt eq 'e' ){
1511        if( length( $arg ) ){
1512	    push( @Commands, split( "\n", $arg ) );
1513        } elsif( @ARGV ){
1514	    push( @Commands, shift( @ARGV ) );
1515        } else {
1516            Warn( "option -e requires an argument" );
1517            usage();
1518            exit( 1 );
1519        }
1520	$expr++;
1521        $Defined{$#Commands} = " #$expr";
1522	next;
1523    }
1524    if( $opt eq 'f' ){
1525        my $path;
1526        if( length( $arg ) ){
1527	    $path = $arg;
1528        } elsif( @ARGV ){
1529	    $path = shift( @ARGV );
1530        } else {
1531            Warn( "option -f requires an argument" );
1532            usage();
1533            exit( 1 );
1534        }
1535	my $fst = $#Commands + 1;
1536        open( SCRIPT, "<$path" ) || die( "$0: $path: could not open ($!)\n" );
1537        my $cmd;
1538        while( defined( $cmd = <SCRIPT> ) ){
1539            chomp( $cmd );
1540            push( @Commands, $cmd );
1541        }
1542        close( SCRIPT );
1543	if( $#Commands >= $fst ){
1544	    $Defined{$fst} = "$path";
1545	}
1546	next;
1547    }
1548    if( $opt eq '-' && $arg eq '' ){
1549	last;
1550    }
1551    if( $opt eq 'h' || $opt eq '?' ){
1552        usage();
1553        exit( 0 );
1554    }
1555    if( $opt eq 'n' ){
1556	$doAutoPrint = 0;
1557    } elsif( $opt eq 'a' ){
1558	$doOpenWrite = 0;
1559    } else {
1560        Warn( "illegal option `$opt'" );
1561        usage();
1562        exit( 1 );
1563    }
1564    if( length( $arg ) ){
1565	unshift( @ARGV, "-$arg" );
1566    }
1567}
1568
1569# A singleton command may be the 1st argument when there are no options.
1570#
1571if( @Commands == 0 ){
1572    if( @ARGV == 0 ){
1573        Warn( "no script command given" );
1574        usage();
1575        exit( 1 );
1576    }
1577    push( @Commands, split( "\n", shift( @ARGV ) ) );
1578    $Defined{0} = ' #1';
1579}
1580
1581print STDERR "Files: @ARGV\n" if $useDEBUG;
1582
1583# generate leading code
1584#
1585$Func = <<'[TheEnd]';
1586
1587# openARGV: open 1st input file
1588#
1589sub openARGV(){
1590    unshift( @ARGV, '-' ) unless @ARGV;
1591    my $file = shift( @ARGV );
1592    open( ARG, "<$file" )
1593    || die( "$0: can't open $file for reading ($!)\n" );
1594    $isEOF = 0;
1595}
1596
1597# getsARGV: Read another input line into argument (default: $_).
1598#           Move on to next input file, and reset EOF flag $isEOF.
1599sub getsARGV(;\$){
1600    my $argref = @_ ? shift() : \$_;
1601    while( $isEOF || ! defined( $$argref = <ARG> ) ){
1602	close( ARG );
1603	return 0 unless @ARGV;
1604	my $file = shift( @ARGV );
1605	open( ARG, "<$file" )
1606	|| die( "$0: can't open $file for reading ($!)\n" );
1607	$isEOF = 0;
1608    }
1609    1;
1610}
1611
1612# eofARGV: end-of-file test
1613#
1614sub eofARGV(){
1615    return @ARGV == 0 && ( $isEOF = eof( ARG ) );
1616}
1617
1618# makeHandle: Generates another file handle for some file (given by its path)
1619#             to be written due to a w command or an s command's w flag.
1620sub makeHandle($){
1621    my( $path ) = @_;
1622    my $handle;
1623    if( ! exists( $wFiles{$path} ) || $wFiles{$path} eq '' ){
1624        $handle = $wFiles{$path} = gensym();
1625	if( $doOpenWrite ){
1626	    if( ! open( $handle, ">$path" ) ){
1627		die( "$0: can't open $path for writing: ($!)\n" );
1628	    }
1629	}
1630    } else {
1631        $handle = $wFiles{$path};
1632    }
1633    return $handle;
1634}
1635
1636# printQ: Print queued output which is either a string or a reference
1637#         to a pathname.
1638sub printQ(){
1639    for my $q ( @Q ){
1640	if( ref( $q ) ){
1641            # flush open w files so that reading this file gets it all
1642	    if( exists( $wFiles{$$q} ) && $wFiles{$$q} ne '' ){
1643		open( $wFiles{$$q}, ">>$$q" );
1644	    }
1645            # copy file to stdout: slow, but safe
1646	    if( open( RF, "<$$q" ) ){
1647		while( defined( my $line = <RF> ) ){
1648		    print $line;
1649		}
1650		close( RF );
1651	    }
1652	} else {
1653	    print $q;
1654	}
1655    }
1656    undef( @Q );
1657}
1658
1659[TheEnd]
1660
1661# generate the sed loop
1662#
1663$Code .= <<'[TheEnd]';
1664sub openARGV();
1665sub getsARGV(;\$);
1666sub eofARGV();
1667sub printQ();
1668
1669# Run: the sed loop reading input and applying the script
1670#
1671sub Run(){
1672    my( $h, $icnt, $s, $n );
1673    # hack (not unbreakable :-/) to avoid // matching an empty string
1674    my $z = "\000"; $z =~ /$z/;
1675    # Initialize.
1676    openARGV();
1677    $Hold    = '';
1678    $CondReg = 0;
1679    $doPrint = $doAutoPrint;
1680CYCLE:
1681    while( getsARGV() ){
1682	chomp();
1683	$CondReg = 0;   # cleared on t
1684BOS:;
1685[TheEnd]
1686
1687    # parse - avoid opening files when doing s2p
1688    #
1689    ( $svOpenWrite, $doOpenWrite ) = (  $doOpenWrite, $svOpenWrite )
1690      if $doGenerate;
1691    Parse();
1692    ( $svOpenWrite, $doOpenWrite ) = (  $doOpenWrite, $svOpenWrite )
1693      if $doGenerate;
1694
1695    # append trailing code
1696    #
1697    $Code .= <<'[TheEnd]';
1698EOS:    if( $doPrint ){
1699            print $_, "\n";
1700        } else {
1701	    $doPrint = $doAutoPrint;
1702	}
1703        printQ() if @Q;
1704    }
1705
1706    exit( 0 );
1707}
1708[TheEnd]
1709
1710
1711# append optional functions, prepend prototypes
1712#
1713my $Proto = "# prototypes\n";
1714if( $GenKey{'l'} ){
1715    $Proto .= "sub _l();\n";
1716    $Func .= <<'[TheEnd]';
1717# _l: l command processing
1718#
1719sub _l(){
1720    my $h = $_;
1721    my $mcpl = 70;
1722    # transform non printing chars into escape notation
1723    $h =~ s/\\/\\\\/g;
1724    if( $h =~ /[^[:print:]]/ ){
1725	$h =~ s/\a/\\a/g;
1726	$h =~ s/\f/\\f/g;
1727	$h =~ s/\n/\\n/g;
1728	$h =~ s/\t/\\t/g;
1729	$h =~ s/\r/\\r/g;
1730	$h =~ s/\e/\\e/g;
1731        $h =~ s/([^[:print:]])/sprintf("\\%03o", ord($1))/ge;
1732    }
1733    # split into lines of length $mcpl
1734    while( length( $h ) > $mcpl ){
1735	my $l = substr( $h, 0, $mcpl-1 );
1736	$h = substr( $h, $mcpl );
1737	# remove incomplete \-escape from end of line
1738	if( $l =~ s/(?<!\\)(\\[0-7]{0,2})$// ){
1739	    $h = $1 . $h;
1740	}
1741	print $l, "\\\n";
1742    }
1743    print "$h\$\n";
1744}
1745
1746[TheEnd]
1747}
1748
1749if( $GenKey{'r'} ){
1750    $Proto .= "sub _r(\$);\n";
1751    $Func .= <<'[TheEnd]';
1752# _r: r command processing: Save a reference to the pathname.
1753#
1754sub _r($){
1755    my $path = shift();
1756    push( @Q, \$path );
1757}
1758
1759[TheEnd]
1760}
1761
1762if( $GenKey{'t'} ){
1763    $Proto .= "sub _t();\n";
1764    $Func .= <<'[TheEnd]';
1765# _t: t command - condition register test/reset
1766#
1767sub _t(){
1768    my $res = $CondReg;
1769    $CondReg = 0;
1770    $res;
1771}
1772
1773[TheEnd]
1774}
1775
1776if( $GenKey{'w'} ){
1777    $Proto .= "sub _w(\$);\n";
1778    $Func .= <<'[TheEnd]';
1779# _w: w command and s command's w flag - write to file
1780#
1781sub _w($){
1782    my $path   = shift();
1783    my $handle = $wFiles{$path};
1784    if( ! $doOpenWrite && ! defined( fileno( $handle ) ) ){
1785	open( $handle, ">$path" )
1786	|| die( "$0: $path: cannot open ($!)\n" );
1787    }
1788    print $handle $_, "\n";
1789}
1790
1791[TheEnd]
1792}
1793
1794$Code = $Proto . $Code;
1795
1796# magic "#n" - same as -n option
1797#
1798$doAutoPrint = 0 if substr( $Commands[0], 0, 2 ) eq '#n';
1799
1800# eval code - check for errors
1801#
1802print "Code:\n$Code$Func" if $useDEBUG;
1803eval $Code . $Func;
1804if( $@ ){
1805    print "Code:\n$Code$Func";
1806    die( "$0: internal error - generated incorrect Perl code: $@\n" );
1807}
1808
1809if( $doGenerate ){
1810
1811    # write full Perl program
1812    #
1813
1814    # bang line, declarations, prototypes
1815    print <<TheEnd;
1816#!$perlpath -w
1817eval 'exec $perlpath -S \$0 \${1+"\$@"}'
1818  if 0;
1819\$0 =~ s/^.*?(\\w+)\[\\.\\w+\]*\$/\$1/;
1820
1821use strict;
1822use Symbol;
1823use vars qw{ \$isEOF \$Hold \%wFiles \@Q \$CondReg
1824	     \$doAutoPrint \$doOpenWrite \$doPrint };
1825\$doAutoPrint = $doAutoPrint;
1826\$doOpenWrite = $doOpenWrite;
1827TheEnd
1828
1829    my $wf = "'" . join( "', '",  keys( %wFiles ) ) . "'";
1830    if( $wf ne "''" ){
1831	print <<TheEnd;
1832sub makeHandle(\$);
1833for my \$p ( $wf ){
1834   exit( 1 ) unless makeHandle( \$p );
1835}
1836TheEnd
1837   }
1838
1839   print $Code;
1840   print "Run();\n";
1841   print $Func;
1842   exit( 0 );
1843
1844} else {
1845
1846    # execute: make handles (and optionally open) all w files; run!
1847    for my $p ( keys( %wFiles ) ){
1848        exit( 1 ) unless makeHandle( $p );
1849    }
1850    Run();
1851}
1852
1853
1854=head1 ENVIRONMENT
1855
1856The environment variable C<PSEDEXTBRE> may be set to extend BREs.
1857See L<"Additional Atoms">.
1858
1859=head1 DIAGNOSTICS
1860
1861=over 4
1862
1863=item ambiguous translation for character `%s' in `y' command
1864
1865The indicated character appears twice, with different translations.
1866
1867=item `[' cannot be last in pattern
1868
1869A `[' in a BRE indicates the beginning of a I<bracket expression>.
1870
1871=item `\' cannot be last in pattern
1872
1873A `\' in a BRE is used to make the subsequent character literal.
1874
1875=item `\' cannot be last in substitution
1876
1877A `\' in a subsitution string is used to make the subsequent character literal.
1878
1879=item conflicting flags `%s'
1880
1881In an B<s> command, either the `g' flag and an n-th occurrence flag, or
1882multiple n-th occurrence flags are specified. Note that only the digits
1883`1' through `9' are permitted.
1884
1885=item duplicate label %s (first defined at %s)
1886
1887=item excess address(es)
1888
1889The command has more than the permitted number of addresses.
1890
1891=item extra characters after command (%s)
1892
1893=item illegal option `%s'
1894
1895=item improper delimiter in s command
1896
1897The BRE and substitution may not be delimited with `\' or newline.
1898
1899=item invalid address after `,'
1900
1901=item invalid backreference (%s)
1902
1903The specified backreference number exceeds the number of backreferences
1904in the BRE.
1905
1906=item invalid repeat clause `\{%s\}'
1907
1908The repeat clause does not contain a valid integer value, or pair of
1909values.
1910
1911=item malformed regex, 1st address
1912
1913=item malformed regex, 2nd address
1914
1915=item malformed regular expression
1916
1917=item malformed substitution expression
1918
1919=item malformed `y' command argument
1920
1921The first or second string of a B<y> command  is syntactically incorrect.
1922
1923=item maximum less than minimum in `\{%s\}'
1924
1925=item no script command given
1926
1927There must be at least one B<-e> or one B<-f> option specifying a
1928script or script file.
1929
1930=item `\' not valid as delimiter in `y' command
1931
1932=item option -e requires an argument
1933
1934=item option -f requires an argument
1935
1936=item `s' command requires argument
1937
1938=item start of unterminated `{'
1939
1940=item string lengths in `y' command differ
1941
1942The translation table strings in a B<y> commanf must have equal lengths.
1943
1944=item undefined label `%s'
1945
1946=item unexpected `}'
1947
1948A B<}> command without a preceding B<{> command was encountered.
1949
1950=item unexpected end of script
1951
1952The end of the script was reached although a text line after a
1953B<a>, B<c> or B<i> command indicated another line.
1954
1955=item unknown command `%s'
1956
1957=item unterminated `['
1958
1959A BRE contains an unterminated bracket expression.
1960
1961=item unterminated `\('
1962
1963A BRE contains an unterminated backreference.
1964
1965=item `\{' without closing `\}'
1966
1967A BRE contains an unterminated bounds specification.
1968
1969=item `\)' without preceding `\('
1970
1971=item `y' command requires argument
1972
1973=back
1974
1975=head1 EXAMPLE
1976
1977The basic material for the preceding section was generated by running
1978the sed script
1979
1980   #no autoprint
1981   s/^.*Warn( *"\([^"]*\)".*$/\1/
1982   t process
1983   b
1984   :process
1985   s/$!/%s/g
1986   s/$[_[:alnum:]]\{1,\}/%s/g
1987   s/\\\\/\\/g
1988   s/^/=item /
1989   p
1990
1991on the program's own text, and piping the output into C<sort -u>.
1992
1993
1994=head1 SED SCRIPT TRANSLATION
1995
1996If this program is invoked with the name F<s2p> it will act as a
1997sed-to-Perl translator. After option processing (all other
1998arguments are ignored), a Perl program is printed on standard
1999output, which will process the input stream (as read from all
2000arguments) in the way defined by the sed script and the option setting
2001used for the translation.
2002
2003=head1 SEE ALSO
2004
2005perl(1), re_format(7)
2006
2007=head1 BUGS
2008
2009The B<l> command will show escape characters (ESC) as `C<\e>', but
2010a vertical tab (VT) in octal.
2011
2012Trailing spaces are truncated from labels in B<:>, B<t> and B<b> commands.
2013
2014The meaning of an empty regular expression (`C<//>'), as defined by B<sed>,
2015is "the last pattern used, at run time". This deviates from the Perl
2016interpretation, which will re-use the "last last successfully executed
2017regular expression". Since keeping track of pattern usage would create
2018terribly cluttered code, and differences would only appear in obscure
2019context (where other B<sed> implementations appear to deviate, too),
2020the Perl semantics was adopted. Note that common usage of this feature,
2021such as in C</abc/s//xyz/>, will work as expected.
2022
2023Collating elements (of bracket expressions in BREs) are not implemented.
2024
2025=head1 STANDARDS
2026
2027This B<sed> implementation conforms to the IEEE Std1003.2-1992 ("POSIX.2")
2028definition of B<sed>, and is compatible with the I<OpenBSD>
2029implementation, except where otherwise noted (see L<"BUGS">).
2030
2031=head1 AUTHOR
2032
2033This Perl implementation of I<sed> was written by Wolfgang Laun,
2034I<Wolfgang.Laun@alcatel.at>.
2035
2036=head1 COPYRIGHT and LICENSE
2037
2038This program is free and open software. You may use, modify,
2039distribute, and sell this program (and any modified variants) in any
2040way you wish, provided you do not restrict others from doing the same.
2041
2042=cut
2043
2044!NO!SUBS!
2045
2046close OUT or die "Can't close $file: $!";
2047chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
2048unlink 'psed';
2049print "Linking s2p to psed.\n";
2050if (defined $Config{d_link}) {
2051  link 's2p', 'psed';
2052} else {
2053  unshift @INC, '../lib';
2054  require File::Copy;
2055  File::Copy::syscopy('s2p', 'psed');
2056}
2057exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';
2058chdir $origdir;
2059