1#
2############################################################
3#
4#    perltidy - a perl script indenter and formatter
5#
6#    Copyright (c) 2000-2012 by Steve Hancock
7#    Distributed under the GPL license agreement; see file COPYING
8#
9#    This program is free software; you can redistribute it and/or modify
10#    it under the terms of the GNU General Public License as published by
11#    the Free Software Foundation; either version 2 of the License, or
12#    (at your option) any later version.
13#
14#    This program is distributed in the hope that it will be useful,
15#    but WITHOUT ANY WARRANTY; without even the implied warranty of
16#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17#    GNU General Public License for more details.
18#
19#    You should have received a copy of the GNU General Public License along
20#    with this program; if not, write to the Free Software Foundation, Inc.,
21#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22#
23#    For brief instructions instructions, try 'perltidy -h'.
24#    For more complete documentation, try 'man perltidy'
25#    or visit http://perltidy.sourceforge.net
26#
27#    This script is an example of the default style.  It was formatted with:
28#
29#      perltidy Tidy.pm
30#
31#    Code Contributions: See ChangeLog.html for a complete history.
32#      Michael Cartmell supplied code for adaptation to VMS and helped with
33#        v-strings.
34#      Hugh S. Myers supplied sub streamhandle and the supporting code to
35#        create a Perl::Tidy module which can operate on strings, arrays, etc.
36#      Yves Orton supplied coding to help detect Windows versions.
37#      Axel Rose supplied a patch for MacPerl.
38#      Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
39#      Dan Tyrell contributed a patch for binary I/O.
40#      Ueli Hugenschmidt contributed a patch for -fpsc
41#      Sam Kington supplied a patch to identify the initial indentation of
42#      entabbed code.
43#      jonathan swartz supplied patches for:
44#      * .../ pattern, which looks upwards from directory
45#      * --notidy, to be used in directories where we want to avoid
46#        accidentally tidying
47#      * prefilter and postfilter
48#      * iterations option
49#
50#      Many others have supplied key ideas, suggestions, and bug reports;
51#        see the CHANGES file.
52#
53############################################################
54
55package Perl::Tidy;
56use 5.004;    # need IO::File from 5.004 or later
57BEGIN { $^W = 1; }    # turn on warnings
58
59use strict;
60use Exporter;
61use Carp;
62$|++;
63
64use vars qw{
65  $VERSION
66  @ISA
67  @EXPORT
68  $missing_file_spec
69  $fh_stderr
70};
71
72@ISA    = qw( Exporter );
73@EXPORT = qw( &perltidy );
74
75use Cwd;
76use IO::File;
77use File::Basename;
78use File::Copy;
79
80BEGIN {
81    ( $VERSION = q($Id: Tidy.pm,v 1.74 2012/12/07 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
82}
83
84sub streamhandle {
85
86    # given filename and mode (r or w), create an object which:
87    #   has a 'getline' method if mode='r', and
88    #   has a 'print' method if mode='w'.
89    # The objects also need a 'close' method.
90    #
91    # How the object is made:
92    #
93    # if $filename is:     Make object using:
94    # ----------------     -----------------
95    # '-'                  (STDIN if mode = 'r', STDOUT if mode='w')
96    # string               IO::File
97    # ARRAY  ref           Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
98    # STRING ref           Perl::Tidy::IOScalar      (formerly IO::Scalar)
99    # object               object
100    #                      (check for 'print' method for 'w' mode)
101    #                      (check for 'getline' method for 'r' mode)
102    my $ref = ref( my $filename = shift );
103    my $mode = shift;
104    my $New;
105    my $fh;
106
107    # handle a reference
108    if ($ref) {
109        if ( $ref eq 'ARRAY' ) {
110            $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
111        }
112        elsif ( $ref eq 'SCALAR' ) {
113            $New = sub { Perl::Tidy::IOScalar->new(@_) };
114        }
115        else {
116
117            # Accept an object with a getline method for reading. Note:
118            # IO::File is built-in and does not respond to the defined
119            # operator.  If this causes trouble, the check can be
120            # skipped and we can just let it crash if there is no
121            # getline.
122            if ( $mode =~ /[rR]/ ) {
123                if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
124                    $New = sub { $filename };
125                }
126                else {
127                    $New = sub { undef };
128                    confess <<EOM;
129------------------------------------------------------------------------
130No 'getline' method is defined for object of class $ref
131Please check your call to Perl::Tidy::perltidy.  Trace follows.
132------------------------------------------------------------------------
133EOM
134                }
135            }
136
137            # Accept an object with a print method for writing.
138            # See note above about IO::File
139            if ( $mode =~ /[wW]/ ) {
140                if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
141                    $New = sub { $filename };
142                }
143                else {
144                    $New = sub { undef };
145                    confess <<EOM;
146------------------------------------------------------------------------
147No 'print' method is defined for object of class $ref
148Please check your call to Perl::Tidy::perltidy. Trace follows.
149------------------------------------------------------------------------
150EOM
151                }
152            }
153        }
154    }
155
156    # handle a string
157    else {
158        if ( $filename eq '-' ) {
159            $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
160        }
161        else {
162            $New = sub { IO::File->new(@_) };
163        }
164    }
165    $fh = $New->( $filename, $mode )
166      or Warn("Couldn't open file:$filename in mode:$mode : $!\n");
167    return $fh, ( $ref or $filename );
168}
169
170sub find_input_line_ending {
171
172    # Peek at a file and return first line ending character.
173    # Quietly return undef in case of any trouble.
174    my ($input_file) = @_;
175    my $ending;
176
177    # silently ignore input from object or stdin
178    if ( ref($input_file) || $input_file eq '-' ) {
179        return $ending;
180    }
181    open( INFILE, $input_file ) || return $ending;
182
183    binmode INFILE;
184    my $buf;
185    read( INFILE, $buf, 1024 );
186    close INFILE;
187    if ( $buf && $buf =~ /([\012\015]+)/ ) {
188        my $test = $1;
189
190        # dos
191        if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
192
193        # mac
194        elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
195
196        # unix
197        elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
198
199        # unknown
200        else { }
201    }
202
203    # no ending seen
204    else { }
205
206    return $ending;
207}
208
209sub catfile {
210
211    # concatenate a path and file basename
212    # returns undef in case of error
213
214    BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
215
216    # use File::Spec if we can
217    unless ($missing_file_spec) {
218        return File::Spec->catfile(@_);
219    }
220
221    # Perl 5.004 systems may not have File::Spec so we'll make
222    # a simple try.  We assume File::Basename is available.
223    # return undef if not successful.
224    my $name      = pop @_;
225    my $path      = join '/', @_;
226    my $test_file = $path . $name;
227    my ( $test_name, $test_path ) = fileparse($test_file);
228    return $test_file if ( $test_name eq $name );
229    return undef if ( $^O eq 'VMS' );
230
231    # this should work at least for Windows and Unix:
232    $test_file = $path . '/' . $name;
233    ( $test_name, $test_path ) = fileparse($test_file);
234    return $test_file if ( $test_name eq $name );
235    return undef;
236}
237
238sub make_temporary_filename {
239
240    # Make a temporary filename.
241    # The POSIX tmpnam() function has been unreliable for non-unix systems
242    # (at least for the win32 systems that I've tested), so use a pre-defined
243    # name for them.  A disadvantage of this is that two perltidy
244    # runs in the same working directory may conflict.  However, the chance of
245    # that is small and manageable by the user, especially on systems for which
246    # the POSIX tmpnam function doesn't work.
247    my $name = "perltidy.TMP";
248    if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
249        return $name;
250    }
251    eval "use POSIX qw(tmpnam)";
252    if ($@) { return $name }
253    use IO::File;
254
255    # just make a couple of tries before giving up and using the default
256    for ( 0 .. 3 ) {
257        my $tmpname = tmpnam();
258        my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
259        if ($fh) {
260            $fh->close();
261            return ($tmpname);
262            last;
263        }
264    }
265    return ($name);
266}
267
268# Here is a map of the flow of data from the input source to the output
269# line sink:
270#
271# LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
272#       input                         groups                 output
273#       lines   tokens      lines       of          lines    lines
274#                                      lines
275#
276# The names correspond to the package names responsible for the unit processes.
277#
278# The overall process is controlled by the "main" package.
279#
280# LineSource is the stream of input lines
281#
282# Tokenizer analyzes a line and breaks it into tokens, peeking ahead
283# if necessary.  A token is any section of the input line which should be
284# manipulated as a single entity during formatting.  For example, a single
285# ',' character is a token, and so is an entire side comment.  It handles
286# the complexities of Perl syntax, such as distinguishing between '<<' as
287# a shift operator and as a here-document, or distinguishing between '/'
288# as a divide symbol and as a pattern delimiter.
289#
290# Formatter inserts and deletes whitespace between tokens, and breaks
291# sequences of tokens at appropriate points as output lines.  It bases its
292# decisions on the default rules as modified by any command-line options.
293#
294# VerticalAligner collects groups of lines together and tries to line up
295# certain tokens, such as '=>', '#', and '=' by adding whitespace.
296#
297# FileWriter simply writes lines to the output stream.
298#
299# The Logger package, not shown, records significant events and warning
300# messages.  It writes a .LOG file, which may be saved with a
301# '-log' or a '-g' flag.
302
303sub perltidy {
304
305    my %defaults = (
306        argv                  => undef,
307        destination           => undef,
308        formatter             => undef,
309        logfile               => undef,
310        errorfile             => undef,
311        perltidyrc            => undef,
312        source                => undef,
313        stderr                => undef,
314        dump_options          => undef,
315        dump_options_type     => undef,
316        dump_getopt_flags     => undef,
317        dump_options_category => undef,
318        dump_options_range    => undef,
319        dump_abbreviations    => undef,
320        prefilter             => undef,
321        postfilter            => undef,
322    );
323
324    # don't overwrite callers ARGV
325    local @ARGV   = @ARGV;
326    local *STDERR = *STDERR;
327
328    my %input_hash = @_;
329
330    if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
331        local $" = ')(';
332        my @good_keys = sort keys %defaults;
333        @bad_keys = sort @bad_keys;
334        confess <<EOM;
335------------------------------------------------------------------------
336Unknown perltidy parameter : (@bad_keys)
337perltidy only understands : (@good_keys)
338------------------------------------------------------------------------
339
340EOM
341    }
342
343    my $get_hash_ref = sub {
344        my ($key) = @_;
345        my $hash_ref = $input_hash{$key};
346        if ( defined($hash_ref) ) {
347            unless ( ref($hash_ref) eq 'HASH' ) {
348                my $what = ref($hash_ref);
349                my $but_is =
350                  $what ? "but is ref to $what" : "but is not a reference";
351                croak <<EOM;
352------------------------------------------------------------------------
353error in call to perltidy:
354-$key must be reference to HASH $but_is
355------------------------------------------------------------------------
356EOM
357            }
358        }
359        return $hash_ref;
360    };
361
362    %input_hash = ( %defaults, %input_hash );
363    my $argv               = $input_hash{'argv'};
364    my $destination_stream = $input_hash{'destination'};
365    my $errorfile_stream   = $input_hash{'errorfile'};
366    my $logfile_stream     = $input_hash{'logfile'};
367    my $perltidyrc_stream  = $input_hash{'perltidyrc'};
368    my $source_stream      = $input_hash{'source'};
369    my $stderr_stream      = $input_hash{'stderr'};
370    my $user_formatter     = $input_hash{'formatter'};
371    my $prefilter          = $input_hash{'prefilter'};
372    my $postfilter         = $input_hash{'postfilter'};
373
374    if ($stderr_stream) {
375        ( $fh_stderr, my $stderr_file ) =
376          Perl::Tidy::streamhandle( $stderr_stream, 'w' );
377        if ( !$fh_stderr ) {
378            croak <<EOM;
379------------------------------------------------------------------------
380Unable to redirect STDERR to $stderr_stream
381Please check value of -stderr in call to perltidy
382------------------------------------------------------------------------
383EOM
384        }
385    }
386    else {
387        $fh_stderr = *STDERR;
388    }
389
390    sub Warn ($) { $fh_stderr->print( $_[0] ); }
391
392    sub Exit ($) {
393        if   ( $_[0] ) { goto ERROR_EXIT }
394        else           { goto NORMAL_EXIT }
395    }
396
397    sub Die ($) { Warn $_[0]; Exit(1); }
398
399    # extract various dump parameters
400    my $dump_options_type     = $input_hash{'dump_options_type'};
401    my $dump_options          = $get_hash_ref->('dump_options');
402    my $dump_getopt_flags     = $get_hash_ref->('dump_getopt_flags');
403    my $dump_options_category = $get_hash_ref->('dump_options_category');
404    my $dump_abbreviations    = $get_hash_ref->('dump_abbreviations');
405    my $dump_options_range    = $get_hash_ref->('dump_options_range');
406
407    # validate dump_options_type
408    if ( defined($dump_options) ) {
409        unless ( defined($dump_options_type) ) {
410            $dump_options_type = 'perltidyrc';
411        }
412        unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
413            croak <<EOM;
414------------------------------------------------------------------------
415Please check value of -dump_options_type in call to perltidy;
416saw: '$dump_options_type'
417expecting: 'perltidyrc' or 'full'
418------------------------------------------------------------------------
419EOM
420
421        }
422    }
423    else {
424        $dump_options_type = "";
425    }
426
427    if ($user_formatter) {
428
429        # if the user defines a formatter, there is no output stream,
430        # but we need a null stream to keep coding simple
431        $destination_stream = Perl::Tidy::DevNull->new();
432    }
433
434    # see if ARGV is overridden
435    if ( defined($argv) ) {
436
437        my $rargv = ref $argv;
438        if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
439
440        # ref to ARRAY
441        if ($rargv) {
442            if ( $rargv eq 'ARRAY' ) {
443                @ARGV = @$argv;
444            }
445            else {
446                croak <<EOM;
447------------------------------------------------------------------------
448Please check value of -argv in call to perltidy;
449it must be a string or ref to ARRAY but is: $rargv
450------------------------------------------------------------------------
451EOM
452            }
453        }
454
455        # string
456        else {
457            my ( $rargv, $msg ) = parse_args($argv);
458            if ($msg) {
459                Die <<EOM;
460Error parsing this string passed to to perltidy with 'argv':
461$msg
462EOM
463            }
464            @ARGV = @{$rargv};
465        }
466    }
467
468    my $rpending_complaint;
469    $$rpending_complaint = "";
470    my $rpending_logfile_message;
471    $$rpending_logfile_message = "";
472
473    my ( $is_Windows, $Windows_type ) = look_for_Windows($rpending_complaint);
474
475    # VMS file names are restricted to a 40.40 format, so we append _tdy
476    # instead of .tdy, etc. (but see also sub check_vms_filename)
477    my $dot;
478    my $dot_pattern;
479    if ( $^O eq 'VMS' ) {
480        $dot         = '_';
481        $dot_pattern = '_';
482    }
483    else {
484        $dot         = '.';
485        $dot_pattern = '\.';    # must escape for use in regex
486    }
487
488    #---------------------------------------------------------------
489    # get command line options
490    #---------------------------------------------------------------
491    my (
492        $rOpts,       $config_file,      $rraw_options,
493        $saw_extrude, $saw_pbp,          $roption_string,
494        $rexpansion,  $roption_category, $roption_range
495      )
496      = process_command_line(
497        $perltidyrc_stream,  $is_Windows, $Windows_type,
498        $rpending_complaint, $dump_options_type,
499      );
500
501    #---------------------------------------------------------------
502    # Handle requests to dump information
503    #---------------------------------------------------------------
504
505    # return or exit immediately after all dumps
506    my $quit_now = 0;
507
508    # Getopt parameters and their flags
509    if ( defined($dump_getopt_flags) ) {
510        $quit_now = 1;
511        foreach my $op ( @{$roption_string} ) {
512            my $opt  = $op;
513            my $flag = "";
514
515            # Examples:
516            #  some-option=s
517            #  some-option=i
518            #  some-option:i
519            #  some-option!
520            if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
521                $opt  = $1;
522                $flag = $2;
523            }
524            $dump_getopt_flags->{$opt} = $flag;
525        }
526    }
527
528    if ( defined($dump_options_category) ) {
529        $quit_now = 1;
530        %{$dump_options_category} = %{$roption_category};
531    }
532
533    if ( defined($dump_options_range) ) {
534        $quit_now = 1;
535        %{$dump_options_range} = %{$roption_range};
536    }
537
538    if ( defined($dump_abbreviations) ) {
539        $quit_now = 1;
540        %{$dump_abbreviations} = %{$rexpansion};
541    }
542
543    if ( defined($dump_options) ) {
544        $quit_now = 1;
545        %{$dump_options} = %{$rOpts};
546    }
547
548    Exit 0 if ($quit_now);
549
550    # make printable string of options for this run as possible diagnostic
551    my $readable_options = readable_options( $rOpts, $roption_string );
552
553    # dump from command line
554    if ( $rOpts->{'dump-options'} ) {
555        print STDOUT $readable_options;
556        Exit 0;
557    }
558
559    #---------------------------------------------------------------
560    # check parameters and their interactions
561    #---------------------------------------------------------------
562    my $tabsize =
563      check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint );
564
565    if ($user_formatter) {
566        $rOpts->{'format'} = 'user';
567    }
568
569    # there must be one entry here for every possible format
570    my %default_file_extension = (
571        tidy => 'tdy',
572        html => 'html',
573        user => '',
574    );
575
576    # be sure we have a valid output format
577    unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
578        my $formats = join ' ',
579          sort map { "'" . $_ . "'" } keys %default_file_extension;
580        my $fmt = $rOpts->{'format'};
581        Die "-format='$fmt' but must be one of: $formats\n";
582    }
583
584    my $output_extension = make_extension( $rOpts->{'output-file-extension'},
585        $default_file_extension{ $rOpts->{'format'} }, $dot );
586
587    # If the backup extension contains a / character then the backup should
588    # be deleted when the -b option is used.   On older versions of
589    # perltidy this will generate an error message due to an illegal
590    # file name.
591    #
592    # A backup file will still be generated but will be deleted
593    # at the end.  If -bext='/' then this extension will be
594    # the default 'bak'.  Otherwise it will be whatever characters
595    # remains after all '/' characters are removed.  For example:
596    # -bext         extension     slashes
597    #  '/'          bak           1
598    #  '/delete'    delete        1
599    #  'delete/'    delete        1
600    #  '/dev/null'  devnull       2    (Currently not allowed)
601    my $bext          = $rOpts->{'backup-file-extension'};
602    my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g );
603
604    # At present only one forward slash is allowed.  In the future multiple
605    # slashes may be allowed to allow for other options
606    if ( $delete_backup > 1 ) {
607        Die "-bext=$bext contains more than one '/'\n";
608    }
609
610    my $backup_extension =
611      make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
612
613    my $html_toc_extension =
614      make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
615
616    my $html_src_extension =
617      make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
618
619    # check for -b option;
620    # silently ignore unless beautify mode
621    my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
622      && $rOpts->{'format'} eq 'tidy';
623
624    # turn off -b with warnings in case of conflicts with other options
625    if ($in_place_modify) {
626        if ( $rOpts->{'standard-output'} ) {
627            my $msg = "Ignoring -b; you may not use -b and -st together";
628            $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
629            Warn "$msg\n";
630            $in_place_modify = 0;
631        }
632        if ($destination_stream) {
633            Warn
634"Ignoring -b; you may not specify a destination stream and -b together\n";
635            $in_place_modify = 0;
636        }
637        if ( ref($source_stream) ) {
638            Warn
639"Ignoring -b; you may not specify a source array and -b together\n";
640            $in_place_modify = 0;
641        }
642        if ( $rOpts->{'outfile'} ) {
643            Warn "Ignoring -b; you may not use -b and -o together\n";
644            $in_place_modify = 0;
645        }
646        if ( defined( $rOpts->{'output-path'} ) ) {
647            Warn "Ignoring -b; you may not use -b and -opath together\n";
648            $in_place_modify = 0;
649        }
650    }
651
652    Perl::Tidy::Formatter::check_options($rOpts);
653    if ( $rOpts->{'format'} eq 'html' ) {
654        Perl::Tidy::HtmlWriter->check_options($rOpts);
655    }
656
657    # make the pattern of file extensions that we shouldn't touch
658    my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
659    if ($output_extension) {
660        my $ext = quotemeta($output_extension);
661        $forbidden_file_extensions .= "|$ext";
662    }
663    if ( $in_place_modify && $backup_extension ) {
664        my $ext = quotemeta($backup_extension);
665        $forbidden_file_extensions .= "|$ext";
666    }
667    $forbidden_file_extensions .= ')$';
668
669    # Create a diagnostics object if requested;
670    # This is only useful for code development
671    my $diagnostics_object = undef;
672    if ( $rOpts->{'DIAGNOSTICS'} ) {
673        $diagnostics_object = Perl::Tidy::Diagnostics->new();
674    }
675
676    # no filenames should be given if input is from an array
677    if ($source_stream) {
678        if ( @ARGV > 0 ) {
679            Die
680"You may not specify any filenames when a source array is given\n";
681        }
682
683        # we'll stuff the source array into ARGV
684        unshift( @ARGV, $source_stream );
685
686        # No special treatment for source stream which is a filename.
687        # This will enable checks for binary files and other bad stuff.
688        $source_stream = undef unless ref($source_stream);
689    }
690
691    # use stdin by default if no source array and no args
692    else {
693        unshift( @ARGV, '-' ) unless @ARGV;
694    }
695
696    #---------------------------------------------------------------
697    # Ready to go...
698    # main loop to process all files in argument list
699    #---------------------------------------------------------------
700    my $number_of_files = @ARGV;
701    my $formatter       = undef;
702    my $tokenizer       = undef;
703    while ( my $input_file = shift @ARGV ) {
704        my $fileroot;
705        my $input_file_permissions;
706
707        #---------------------------------------------------------------
708        # prepare this input stream
709        #---------------------------------------------------------------
710        if ($source_stream) {
711            $fileroot = "perltidy";
712        }
713        elsif ( $input_file eq '-' ) {    # '-' indicates input from STDIN
714            $fileroot = "perltidy";       # root name to use for .ERR, .LOG, etc
715            $in_place_modify = 0;
716        }
717        else {
718            $fileroot = $input_file;
719            unless ( -e $input_file ) {
720
721                # file doesn't exist - check for a file glob
722                if ( $input_file =~ /([\?\*\[\{])/ ) {
723
724                    # Windows shell may not remove quotes, so do it
725                    my $input_file = $input_file;
726                    if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
727                    if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
728                    my $pattern = fileglob_to_re($input_file);
729                    ##eval "/$pattern/";
730                    if ( !$@ && opendir( DIR, './' ) ) {
731                        my @files =
732                          grep { /$pattern/ && !-d $_ } readdir(DIR);
733                        closedir(DIR);
734                        if (@files) {
735                            unshift @ARGV, @files;
736                            next;
737                        }
738                    }
739                }
740                Warn "skipping file: '$input_file': no matches found\n";
741                next;
742            }
743
744            unless ( -f $input_file ) {
745                Warn "skipping file: $input_file: not a regular file\n";
746                next;
747            }
748
749            # As a safety precaution, skip zero length files.
750            # If for example a source file got clobberred somehow,
751            # the old .tdy or .bak files might still exist so we
752            # shouldn't overwrite them with zero length files.
753            unless ( -s $input_file ) {
754                Warn "skipping file: $input_file: Zero size\n";
755                next;
756            }
757
758            unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
759                Warn
760                  "skipping file: $input_file: Non-text (override with -f)\n";
761                next;
762            }
763
764            # we should have a valid filename now
765            $fileroot               = $input_file;
766            $input_file_permissions = ( stat $input_file )[2] & 07777;
767
768            if ( $^O eq 'VMS' ) {
769                ( $fileroot, $dot ) = check_vms_filename($fileroot);
770            }
771
772            # add option to change path here
773            if ( defined( $rOpts->{'output-path'} ) ) {
774
775                my ( $base, $old_path ) = fileparse($fileroot);
776                my $new_path = $rOpts->{'output-path'};
777                unless ( -d $new_path ) {
778                    unless ( mkdir $new_path, 0777 ) {
779                        Die "unable to create directory $new_path: $!\n";
780                    }
781                }
782                my $path = $new_path;
783                $fileroot = catfile( $path, $base );
784                unless ($fileroot) {
785                    Die <<EOM;
786------------------------------------------------------------------------
787Problem combining $new_path and $base to make a filename; check -opath
788------------------------------------------------------------------------
789EOM
790                }
791            }
792        }
793
794        # Skip files with same extension as the output files because
795        # this can lead to a messy situation with files like
796        # script.tdy.tdy.tdy ... or worse problems ...  when you
797        # rerun perltidy over and over with wildcard input.
798        if (
799            !$source_stream
800            && (   $input_file =~ /$forbidden_file_extensions/o
801                || $input_file eq 'DIAGNOSTICS' )
802          )
803        {
804            Warn "skipping file: $input_file: wrong extension\n";
805            next;
806        }
807
808        # the 'source_object' supplies a method to read the input file
809        my $source_object =
810          Perl::Tidy::LineSource->new( $input_file, $rOpts,
811            $rpending_logfile_message );
812        next unless ($source_object);
813
814        # Prefilters and postfilters: The prefilter is a code reference
815        # that will be applied to the source before tidying, and the
816        # postfilter is a code reference to the result before outputting.
817        if ($prefilter) {
818            my $buf = '';
819            while ( my $line = $source_object->get_line() ) {
820                $buf .= $line;
821            }
822            $buf = $prefilter->($buf);
823
824            $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
825                $rpending_logfile_message );
826        }
827
828        # register this file name with the Diagnostics package
829        $diagnostics_object->set_input_file($input_file)
830          if $diagnostics_object;
831
832        #---------------------------------------------------------------
833        # prepare the output stream
834        #---------------------------------------------------------------
835        my $output_file = undef;
836        my $actual_output_extension;
837
838        if ( $rOpts->{'outfile'} ) {
839
840            if ( $number_of_files <= 1 ) {
841
842                if ( $rOpts->{'standard-output'} ) {
843                    my $msg = "You may not use -o and -st together";
844                    $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
845                    Die "$msg\n";
846                }
847                elsif ($destination_stream) {
848                    Die
849"You may not specify a destination array and -o together\n";
850                }
851                elsif ( defined( $rOpts->{'output-path'} ) ) {
852                    Die "You may not specify -o and -opath together\n";
853                }
854                elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
855                    Die "You may not specify -o and -oext together\n";
856                }
857                $output_file = $rOpts->{outfile};
858
859                # make sure user gives a file name after -o
860                if ( $output_file =~ /^-/ ) {
861                    Die "You must specify a valid filename after -o\n";
862                }
863
864                # do not overwrite input file with -o
865                if ( defined($input_file_permissions)
866                    && ( $output_file eq $input_file ) )
867                {
868                    Die "Use 'perltidy -b $input_file' to modify in-place\n";
869                }
870            }
871            else {
872                Die "You may not use -o with more than one input file\n";
873            }
874        }
875        elsif ( $rOpts->{'standard-output'} ) {
876            if ($destination_stream) {
877                my $msg =
878                  "You may not specify a destination array and -st together\n";
879                $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
880                Die "$msg\n";
881            }
882            $output_file = '-';
883
884            if ( $number_of_files <= 1 ) {
885            }
886            else {
887                Die "You may not use -st with more than one input file\n";
888            }
889        }
890        elsif ($destination_stream) {
891            $output_file = $destination_stream;
892        }
893        elsif ($source_stream) {    # source but no destination goes to stdout
894            $output_file = '-';
895        }
896        elsif ( $input_file eq '-' ) {
897            $output_file = '-';
898        }
899        else {
900            if ($in_place_modify) {
901                $output_file = IO::File->new_tmpfile()
902                  or Die "cannot open temp file for -b option: $!\n";
903            }
904            else {
905                $actual_output_extension = $output_extension;
906                $output_file             = $fileroot . $output_extension;
907            }
908        }
909
910        # the 'sink_object' knows how to write the output file
911        my $tee_file = $fileroot . $dot . "TEE";
912
913        my $line_separator = $rOpts->{'output-line-ending'};
914        if ( $rOpts->{'preserve-line-endings'} ) {
915            $line_separator = find_input_line_ending($input_file);
916        }
917
918        # Eventually all I/O may be done with binmode, but for now it is
919        # only done when a user requests a particular line separator
920        # through the -ple or -ole flags
921        my $binmode = 0;
922        if   ( defined($line_separator) ) { $binmode        = 1 }
923        else                              { $line_separator = "\n" }
924
925        my ( $sink_object, $postfilter_buffer );
926        if ($postfilter) {
927            $sink_object =
928              Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
929                $line_separator, $rOpts, $rpending_logfile_message, $binmode );
930        }
931        else {
932            $sink_object =
933              Perl::Tidy::LineSink->new( $output_file, $tee_file,
934                $line_separator, $rOpts, $rpending_logfile_message, $binmode );
935        }
936
937        #---------------------------------------------------------------
938        # initialize the error logger for this file
939        #---------------------------------------------------------------
940        my $warning_file = $fileroot . $dot . "ERR";
941        if ($errorfile_stream) { $warning_file = $errorfile_stream }
942        my $log_file = $fileroot . $dot . "LOG";
943        if ($logfile_stream) { $log_file = $logfile_stream }
944
945        my $logger_object =
946          Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
947            $fh_stderr, $saw_extrude );
948        write_logfile_header(
949            $rOpts,        $logger_object, $config_file,
950            $rraw_options, $Windows_type,  $readable_options,
951        );
952        if ($$rpending_logfile_message) {
953            $logger_object->write_logfile_entry($$rpending_logfile_message);
954        }
955        if ($$rpending_complaint) {
956            $logger_object->complain($$rpending_complaint);
957        }
958
959        #---------------------------------------------------------------
960        # initialize the debug object, if any
961        #---------------------------------------------------------------
962        my $debugger_object = undef;
963        if ( $rOpts->{DEBUG} ) {
964            $debugger_object =
965              Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
966        }
967
968        #---------------------------------------------------------------
969        # loop over iterations for one source stream
970        #---------------------------------------------------------------
971
972        # We will do a convergence test if 3 or more iterations are allowed.
973        # It would be pointless for fewer because we have to make at least
974        # two passes before we can see if we are converged, and the test
975        # would just slow things down.
976        my $max_iterations = $rOpts->{'iterations'};
977        my $convergence_log_message;
978        my %saw_md5;
979        my $do_convergence_test = $max_iterations > 2;
980        if ($do_convergence_test) {
981            eval "use Digest::MD5 qw(md5_hex)";
982            $do_convergence_test = !$@;
983        }
984
985        # save objects to allow redirecting output during iterations
986        my $sink_object_final     = $sink_object;
987        my $debugger_object_final = $debugger_object;
988        my $logger_object_final   = $logger_object;
989
990        for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) {
991
992            # send output stream to temp buffers until last iteration
993            my $sink_buffer;
994            if ( $iter < $max_iterations ) {
995                $sink_object =
996                  Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file,
997                    $line_separator, $rOpts, $rpending_logfile_message,
998                    $binmode );
999            }
1000            else {
1001                $sink_object = $sink_object_final;
1002            }
1003
1004            # Save logger, debugger output only on pass 1 because:
1005            # (1) line number references must be to the starting
1006            # source, not an intermediate result, and
1007            # (2) we need to know if there are errors so we can stop the
1008            # iterations early if necessary.
1009            if ( $iter > 1 ) {
1010                $debugger_object = undef;
1011                $logger_object   = undef;
1012            }
1013
1014            #------------------------------------------------------------
1015            # create a formatter for this file : html writer or
1016            # pretty printer
1017            #------------------------------------------------------------
1018
1019            # we have to delete any old formatter because, for safety,
1020            # the formatter will check to see that there is only one.
1021            $formatter = undef;
1022
1023            if ($user_formatter) {
1024                $formatter = $user_formatter;
1025            }
1026            elsif ( $rOpts->{'format'} eq 'html' ) {
1027                $formatter =
1028                  Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
1029                    $actual_output_extension, $html_toc_extension,
1030                    $html_src_extension );
1031            }
1032            elsif ( $rOpts->{'format'} eq 'tidy' ) {
1033                $formatter = Perl::Tidy::Formatter->new(
1034                    logger_object      => $logger_object,
1035                    diagnostics_object => $diagnostics_object,
1036                    sink_object        => $sink_object,
1037                );
1038            }
1039            else {
1040                Die "I don't know how to do -format=$rOpts->{'format'}\n";
1041            }
1042
1043            unless ($formatter) {
1044                Die "Unable to continue with $rOpts->{'format'} formatting\n";
1045            }
1046
1047            #---------------------------------------------------------------
1048            # create the tokenizer for this file
1049            #---------------------------------------------------------------
1050            $tokenizer = undef;                     # must destroy old tokenizer
1051            $tokenizer = Perl::Tidy::Tokenizer->new(
1052                source_object      => $source_object,
1053                logger_object      => $logger_object,
1054                debugger_object    => $debugger_object,
1055                diagnostics_object => $diagnostics_object,
1056                tabsize            => $tabsize,
1057
1058                starting_level      => $rOpts->{'starting-indentation-level'},
1059                indent_columns      => $rOpts->{'indent-columns'},
1060                look_for_hash_bang  => $rOpts->{'look-for-hash-bang'},
1061                look_for_autoloader => $rOpts->{'look-for-autoloader'},
1062                look_for_selfloader => $rOpts->{'look-for-selfloader'},
1063                trim_qw             => $rOpts->{'trim-qw'},
1064
1065                continuation_indentation =>
1066                  $rOpts->{'continuation-indentation'},
1067                outdent_labels => $rOpts->{'outdent-labels'},
1068            );
1069
1070            #---------------------------------------------------------------
1071            # now we can do it
1072            #---------------------------------------------------------------
1073            process_this_file( $tokenizer, $formatter );
1074
1075            #---------------------------------------------------------------
1076            # close the input source and report errors
1077            #---------------------------------------------------------------
1078            $source_object->close_input_file();
1079
1080            # line source for next iteration (if any) comes from the current
1081            # temporary output buffer
1082            if ( $iter < $max_iterations ) {
1083
1084                $sink_object->close_output_file();
1085                $source_object =
1086                  Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts,
1087                    $rpending_logfile_message );
1088
1089                # stop iterations if errors or converged
1090                my $stop_now = $logger_object->{_warning_count};
1091                if ($stop_now) {
1092                    $convergence_log_message = <<EOM;
1093Stopping iterations because of errors.
1094EOM
1095                }
1096                elsif ($do_convergence_test) {
1097                    my $digest = md5_hex($sink_buffer);
1098                    if ( !$saw_md5{$digest} ) {
1099                        $saw_md5{$digest} = $iter;
1100                    }
1101                    else {
1102
1103                        # Deja vu, stop iterating
1104                        $stop_now = 1;
1105                        my $iterm = $iter - 1;
1106                        if ( $saw_md5{$digest} != $iterm ) {
1107
1108                            # Blinking (oscillating) between two stable
1109                            # end states.  This has happened in the past
1110                            # but at present there are no known instances.
1111                            $convergence_log_message = <<EOM;
1112Blinking. Output for iteration $iter same as for $saw_md5{$digest}.
1113EOM
1114                            $diagnostics_object->write_diagnostics(
1115                                $convergence_log_message)
1116                              if $diagnostics_object;
1117                        }
1118                        else {
1119                            $convergence_log_message = <<EOM;
1120Converged.  Output for iteration $iter same as for iter $iterm.
1121EOM
1122                            $diagnostics_object->write_diagnostics(
1123                                $convergence_log_message)
1124                              if $diagnostics_object && $iterm > 2;
1125                        }
1126                    }
1127                } ## end if ($do_convergence_test)
1128
1129                if ($stop_now) {
1130
1131                    # we are stopping the iterations early;
1132                    # copy the output stream to its final destination
1133                    $sink_object = $sink_object_final;
1134                    while ( my $line = $source_object->get_line() ) {
1135                        $sink_object->write_line($line);
1136                    }
1137                    $source_object->close_input_file();
1138                    last;
1139                }
1140            } ## end if ( $iter < $max_iterations)
1141        }    # end loop over iterations for one source file
1142
1143        # restore objects which have been temporarily undefined
1144        # for second and higher iterations
1145        $debugger_object = $debugger_object_final;
1146        $logger_object   = $logger_object_final;
1147
1148        $logger_object->write_logfile_entry($convergence_log_message)
1149          if $convergence_log_message;
1150
1151        #---------------------------------------------------------------
1152        # Perform any postfilter operation
1153        #---------------------------------------------------------------
1154        if ($postfilter) {
1155            $sink_object->close_output_file();
1156            $sink_object =
1157              Perl::Tidy::LineSink->new( $output_file, $tee_file,
1158                $line_separator, $rOpts, $rpending_logfile_message, $binmode );
1159            my $buf = $postfilter->($postfilter_buffer);
1160            $source_object =
1161              Perl::Tidy::LineSource->new( \$buf, $rOpts,
1162                $rpending_logfile_message );
1163            while ( my $line = $source_object->get_line() ) {
1164                $sink_object->write_line($line);
1165            }
1166            $source_object->close_input_file();
1167        }
1168
1169        # Save names of the input and output files for syntax check
1170        my $ifname = $input_file;
1171        my $ofname = $output_file;
1172
1173        #---------------------------------------------------------------
1174        # handle the -b option (backup and modify in-place)
1175        #---------------------------------------------------------------
1176        if ($in_place_modify) {
1177            unless ( -f $input_file ) {
1178
1179                # oh, oh, no real file to backup ..
1180                # shouldn't happen because of numerous preliminary checks
1181                Die
1182"problem with -b backing up input file '$input_file': not a file\n";
1183            }
1184            my $backup_name = $input_file . $backup_extension;
1185            if ( -f $backup_name ) {
1186                unlink($backup_name)
1187                  or Die
1188"unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
1189            }
1190
1191            # backup the input file
1192            # we use copy for symlinks, move for regular files
1193            if ( -l $input_file ) {
1194                File::Copy::copy( $input_file, $backup_name )
1195                  or Die "File::Copy failed trying to backup source: $!";
1196            }
1197            else {
1198                rename( $input_file, $backup_name )
1199                  or Die
1200"problem renaming $input_file to $backup_name for -b option: $!\n";
1201            }
1202            $ifname = $backup_name;
1203
1204            # copy the output to the original input file
1205            # NOTE: it would be nice to just close $output_file and use
1206            # File::Copy::copy here, but in this case $output_file is the
1207            # handle of an open nameless temporary file so we would lose
1208            # everything if we closed it.
1209            seek( $output_file, 0, 0 )
1210              or Die "unable to rewind a temporary file for -b option: $!\n";
1211            my $fout = IO::File->new("> $input_file")
1212              or Die
1213"problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n";
1214            binmode $fout;
1215            my $line;
1216            while ( $line = $output_file->getline() ) {
1217                $fout->print($line);
1218            }
1219            $fout->close();
1220            $output_file = $input_file;
1221            $ofname      = $input_file;
1222        }
1223
1224        #---------------------------------------------------------------
1225        # clean up and report errors
1226        #---------------------------------------------------------------
1227        $sink_object->close_output_file()    if $sink_object;
1228        $debugger_object->close_debug_file() if $debugger_object;
1229
1230        # set output file permissions
1231        if ( $output_file && -f $output_file && !-l $output_file ) {
1232            if ($input_file_permissions) {
1233
1234                # give output script same permissions as input script, but
1235                # make it user-writable or else we can't run perltidy again.
1236                # Thus we retain whatever executable flags were set.
1237                if ( $rOpts->{'format'} eq 'tidy' ) {
1238                    chmod( $input_file_permissions | 0600, $output_file );
1239                }
1240
1241                # else use default permissions for html and any other format
1242            }
1243        }
1244
1245        #---------------------------------------------------------------
1246        # Do syntax check if requested and possible
1247        #---------------------------------------------------------------
1248        my $infile_syntax_ok = 0;    # -1 no  0=don't know   1 yes
1249        if (   $logger_object
1250            && $rOpts->{'check-syntax'}
1251            && $ifname
1252            && $ofname )
1253        {
1254            $infile_syntax_ok =
1255              check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1256        }
1257
1258        #---------------------------------------------------------------
1259        # remove the original file for in-place modify as follows:
1260        #   $delete_backup=0 never
1261        #   $delete_backup=1 only if no errors
1262        #   $delete_backup>1 always  : NOT ALLOWED, too risky, see above
1263        #---------------------------------------------------------------
1264        if (   $in_place_modify
1265            && $delete_backup
1266            && -f $ifname
1267            && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) )
1268        {
1269
1270            # As an added safety precaution, do not delete the source file
1271            # if its size has dropped from positive to zero, since this
1272            # could indicate a disaster of some kind, including a hardware
1273            # failure.  Actually, this could happen if you had a file of
1274            # all comments (or pod) and deleted everything with -dac (-dap)
1275            # for some reason.
1276            if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) {
1277                Warn(
1278"output file '$output_file' missing or zero length; original '$ifname' not deleted\n"
1279                );
1280            }
1281            else {
1282                unlink($ifname)
1283                  or Die
1284"unable to remove previous '$ifname' for -b option; check permissions: $!\n";
1285            }
1286        }
1287
1288        $logger_object->finish( $infile_syntax_ok, $formatter )
1289          if $logger_object;
1290    }    # end of main loop to process all files
1291
1292  NORMAL_EXIT:
1293    return 0;
1294
1295  ERROR_EXIT:
1296    return 1;
1297}    # end of main program perltidy
1298
1299sub get_stream_as_named_file {
1300
1301    # Return the name of a file containing a stream of data, creating
1302    # a temporary file if necessary.
1303    # Given:
1304    #  $stream - the name of a file or stream
1305    # Returns:
1306    #  $fname = name of file if possible, or undef
1307    #  $if_tmpfile = true if temp file, undef if not temp file
1308    #
1309    # This routine is needed for passing actual files to Perl for
1310    # a syntax check.
1311    my ($stream) = @_;
1312    my $is_tmpfile;
1313    my $fname;
1314    if ($stream) {
1315        if ( ref($stream) ) {
1316            my ( $fh_stream, $fh_name ) =
1317              Perl::Tidy::streamhandle( $stream, 'r' );
1318            if ($fh_stream) {
1319                my ( $fout, $tmpnam );
1320
1321                # TODO: fix the tmpnam routine to return an open filehandle
1322                $tmpnam = Perl::Tidy::make_temporary_filename();
1323                $fout = IO::File->new( $tmpnam, 'w' );
1324
1325                if ($fout) {
1326                    $fname      = $tmpnam;
1327                    $is_tmpfile = 1;
1328                    binmode $fout;
1329                    while ( my $line = $fh_stream->getline() ) {
1330                        $fout->print($line);
1331                    }
1332                    $fout->close();
1333                }
1334                $fh_stream->close();
1335            }
1336        }
1337        elsif ( $stream ne '-' && -f $stream ) {
1338            $fname = $stream;
1339        }
1340    }
1341    return ( $fname, $is_tmpfile );
1342}
1343
1344sub fileglob_to_re {
1345
1346    # modified (corrected) from version in find2perl
1347    my $x = shift;
1348    $x =~ s#([./^\$()])#\\$1#g;    # escape special characters
1349    $x =~ s#\*#.*#g;               # '*' -> '.*'
1350    $x =~ s#\?#.#g;                # '?' -> '.'
1351    "^$x\\z";                      # match whole word
1352}
1353
1354sub make_extension {
1355
1356    # Make a file extension, including any leading '.' if necessary
1357    # The '.' may actually be an '_' under VMS
1358    my ( $extension, $default, $dot ) = @_;
1359
1360    # Use the default if none specified
1361    $extension = $default unless ($extension);
1362
1363    # Only extensions with these leading characters get a '.'
1364    # This rule gives the user some freedom
1365    if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1366        $extension = $dot . $extension;
1367    }
1368    return $extension;
1369}
1370
1371sub write_logfile_header {
1372    my (
1373        $rOpts,        $logger_object, $config_file,
1374        $rraw_options, $Windows_type,  $readable_options
1375    ) = @_;
1376    $logger_object->write_logfile_entry(
1377"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1378    );
1379    if ($Windows_type) {
1380        $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1381    }
1382    my $options_string = join( ' ', @$rraw_options );
1383
1384    if ($config_file) {
1385        $logger_object->write_logfile_entry(
1386            "Found Configuration File >>> $config_file \n");
1387    }
1388    $logger_object->write_logfile_entry(
1389        "Configuration and command line parameters for this run:\n");
1390    $logger_object->write_logfile_entry("$options_string\n");
1391
1392    if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1393        $rOpts->{'logfile'} = 1;    # force logfile to be saved
1394        $logger_object->write_logfile_entry(
1395            "Final parameter set for this run\n");
1396        $logger_object->write_logfile_entry(
1397            "------------------------------------\n");
1398
1399        $logger_object->write_logfile_entry($readable_options);
1400
1401        $logger_object->write_logfile_entry(
1402            "------------------------------------\n");
1403    }
1404    $logger_object->write_logfile_entry(
1405        "To find error messages search for 'WARNING' with your editor\n");
1406}
1407
1408sub generate_options {
1409
1410    ######################################################################
1411    # Generate and return references to:
1412    #  @option_string - the list of options to be passed to Getopt::Long
1413    #  @defaults - the list of default options
1414    #  %expansion - a hash showing how all abbreviations are expanded
1415    #  %category - a hash giving the general category of each option
1416    #  %option_range - a hash giving the valid ranges of certain options
1417
1418    # Note: a few options are not documented in the man page and usage
1419    # message. This is because these are experimental or debug options and
1420    # may or may not be retained in future versions.
1421    #
1422    # Here are the undocumented flags as far as I know.  Any of them
1423    # may disappear at any time.  They are mainly for fine-tuning
1424    # and debugging.
1425    #
1426    # fll --> fuzzy-line-length           # a trivial parameter which gets
1427    #                                       turned off for the extrude option
1428    #                                       which is mainly for debugging
1429    # scl --> short-concatenation-item-length   # helps break at '.'
1430    # recombine                           # for debugging line breaks
1431    # valign                              # for debugging vertical alignment
1432    # I   --> DIAGNOSTICS                 # for debugging
1433    ######################################################################
1434
1435    # here is a summary of the Getopt codes:
1436    # <none> does not take an argument
1437    # =s takes a mandatory string
1438    # :s takes an optional string  (DO NOT USE - filenames will get eaten up)
1439    # =i takes a mandatory integer
1440    # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1441    # ! does not take an argument and may be negated
1442    #  i.e., -foo and -nofoo are allowed
1443    # a double dash signals the end of the options list
1444    #
1445    #---------------------------------------------------------------
1446    # Define the option string passed to GetOptions.
1447    #---------------------------------------------------------------
1448
1449    my @option_string   = ();
1450    my %expansion       = ();
1451    my %option_category = ();
1452    my %option_range    = ();
1453    my $rexpansion      = \%expansion;
1454
1455    # names of categories in manual
1456    # leading integers will allow sorting
1457    my @category_name = (
1458        '0. I/O control',
1459        '1. Basic formatting options',
1460        '2. Code indentation control',
1461        '3. Whitespace control',
1462        '4. Comment controls',
1463        '5. Linebreak controls',
1464        '6. Controlling list formatting',
1465        '7. Retaining or ignoring existing line breaks',
1466        '8. Blank line control',
1467        '9. Other controls',
1468        '10. HTML options',
1469        '11. pod2html options',
1470        '12. Controlling HTML properties',
1471        '13. Debugging',
1472    );
1473
1474    #  These options are parsed directly by perltidy:
1475    #    help h
1476    #    version v
1477    #  However, they are included in the option set so that they will
1478    #  be seen in the options dump.
1479
1480    # These long option names have no abbreviations or are treated specially
1481    @option_string = qw(
1482      html!
1483      noprofile
1484      no-profile
1485      npro
1486      recombine!
1487      valign!
1488      notidy
1489    );
1490
1491    my $category = 13;    # Debugging
1492    foreach (@option_string) {
1493        my $opt = $_;     # must avoid changing the actual flag
1494        $opt =~ s/!$//;
1495        $option_category{$opt} = $category_name[$category];
1496    }
1497
1498    $category = 11;                                       # HTML
1499    $option_category{html} = $category_name[$category];
1500
1501    # routine to install and check options
1502    my $add_option = sub {
1503        my ( $long_name, $short_name, $flag ) = @_;
1504        push @option_string, $long_name . $flag;
1505        $option_category{$long_name} = $category_name[$category];
1506        if ($short_name) {
1507            if ( $expansion{$short_name} ) {
1508                my $existing_name = $expansion{$short_name}[0];
1509                Die
1510"redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1511            }
1512            $expansion{$short_name} = [$long_name];
1513            if ( $flag eq '!' ) {
1514                my $nshort_name = 'n' . $short_name;
1515                my $nolong_name = 'no' . $long_name;
1516                if ( $expansion{$nshort_name} ) {
1517                    my $existing_name = $expansion{$nshort_name}[0];
1518                    Die
1519"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1520                }
1521                $expansion{$nshort_name} = [$nolong_name];
1522            }
1523        }
1524    };
1525
1526    # Install long option names which have a simple abbreviation.
1527    # Options with code '!' get standard negation ('no' for long names,
1528    # 'n' for abbreviations).  Categories follow the manual.
1529
1530    ###########################
1531    $category = 0;    # I/O_Control
1532    ###########################
1533    $add_option->( 'backup-and-modify-in-place', 'b',     '!' );
1534    $add_option->( 'backup-file-extension',      'bext',  '=s' );
1535    $add_option->( 'force-read-binary',          'f',     '!' );
1536    $add_option->( 'format',                     'fmt',   '=s' );
1537    $add_option->( 'iterations',                 'it',    '=i' );
1538    $add_option->( 'logfile',                    'log',   '!' );
1539    $add_option->( 'logfile-gap',                'g',     ':i' );
1540    $add_option->( 'outfile',                    'o',     '=s' );
1541    $add_option->( 'output-file-extension',      'oext',  '=s' );
1542    $add_option->( 'output-path',                'opath', '=s' );
1543    $add_option->( 'profile',                    'pro',   '=s' );
1544    $add_option->( 'quiet',                      'q',     '!' );
1545    $add_option->( 'standard-error-output',      'se',    '!' );
1546    $add_option->( 'standard-output',            'st',    '!' );
1547    $add_option->( 'warning-output',             'w',     '!' );
1548
1549    # options which are both toggle switches and values moved here
1550    # to hide from tidyview (which does not show category 0 flags):
1551    # -ole moved here from category 1
1552    # -sil moved here from category 2
1553    $add_option->( 'output-line-ending',         'ole', '=s' );
1554    $add_option->( 'starting-indentation-level', 'sil', '=i' );
1555
1556    ########################################
1557    $category = 1;    # Basic formatting options
1558    ########################################
1559    $add_option->( 'check-syntax',                 'syn',  '!' );
1560    $add_option->( 'entab-leading-whitespace',     'et',   '=i' );
1561    $add_option->( 'indent-columns',               'i',    '=i' );
1562    $add_option->( 'maximum-line-length',          'l',    '=i' );
1563    $add_option->( 'variable-maximum-line-length', 'vmll', '!' );
1564    $add_option->( 'whitespace-cycle',             'wc',   '=i' );
1565    $add_option->( 'perl-syntax-check-flags',      'pscf', '=s' );
1566    $add_option->( 'preserve-line-endings',        'ple',  '!' );
1567    $add_option->( 'tabs',                         't',    '!' );
1568    $add_option->( 'default-tabsize',              'dt',   '=i' );
1569
1570    ########################################
1571    $category = 2;    # Code indentation control
1572    ########################################
1573    $add_option->( 'continuation-indentation',           'ci',   '=i' );
1574    $add_option->( 'line-up-parentheses',                'lp',   '!' );
1575    $add_option->( 'outdent-keyword-list',               'okwl', '=s' );
1576    $add_option->( 'outdent-keywords',                   'okw',  '!' );
1577    $add_option->( 'outdent-labels',                     'ola',  '!' );
1578    $add_option->( 'outdent-long-quotes',                'olq',  '!' );
1579    $add_option->( 'indent-closing-brace',               'icb',  '!' );
1580    $add_option->( 'closing-token-indentation',          'cti',  '=i' );
1581    $add_option->( 'closing-paren-indentation',          'cpi',  '=i' );
1582    $add_option->( 'closing-brace-indentation',          'cbi',  '=i' );
1583    $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1584    $add_option->( 'brace-left-and-indent',              'bli',  '!' );
1585    $add_option->( 'brace-left-and-indent-list',         'blil', '=s' );
1586
1587    ########################################
1588    $category = 3;    # Whitespace control
1589    ########################################
1590    $add_option->( 'add-semicolons',                            'asc',   '!' );
1591    $add_option->( 'add-whitespace',                            'aws',   '!' );
1592    $add_option->( 'block-brace-tightness',                     'bbt',   '=i' );
1593    $add_option->( 'brace-tightness',                           'bt',    '=i' );
1594    $add_option->( 'delete-old-whitespace',                     'dws',   '!' );
1595    $add_option->( 'delete-semicolons',                         'dsm',   '!' );
1596    $add_option->( 'nospace-after-keyword',                     'nsak',  '=s' );
1597    $add_option->( 'nowant-left-space',                         'nwls',  '=s' );
1598    $add_option->( 'nowant-right-space',                        'nwrs',  '=s' );
1599    $add_option->( 'paren-tightness',                           'pt',    '=i' );
1600    $add_option->( 'space-after-keyword',                       'sak',   '=s' );
1601    $add_option->( 'space-for-semicolon',                       'sfs',   '!' );
1602    $add_option->( 'space-function-paren',                      'sfp',   '!' );
1603    $add_option->( 'space-keyword-paren',                       'skp',   '!' );
1604    $add_option->( 'space-terminal-semicolon',                  'sts',   '!' );
1605    $add_option->( 'square-bracket-tightness',                  'sbt',   '=i' );
1606    $add_option->( 'square-bracket-vertical-tightness',         'sbvt',  '=i' );
1607    $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1608    $add_option->( 'tight-secret-operators',                    'tso',   '!' );
1609    $add_option->( 'trim-qw',                                   'tqw',   '!' );
1610    $add_option->( 'want-left-space',                           'wls',   '=s' );
1611    $add_option->( 'want-right-space',                          'wrs',   '=s' );
1612
1613    ########################################
1614    $category = 4;    # Comment controls
1615    ########################################
1616    $add_option->( 'closing-side-comment-else-flag',    'csce', '=i' );
1617    $add_option->( 'closing-side-comment-interval',     'csci', '=i' );
1618    $add_option->( 'closing-side-comment-list',         'cscl', '=s' );
1619    $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1620    $add_option->( 'closing-side-comment-prefix',       'cscp', '=s' );
1621    $add_option->( 'closing-side-comment-warnings',     'cscw', '!' );
1622    $add_option->( 'closing-side-comments',             'csc',  '!' );
1623    $add_option->( 'closing-side-comments-balanced',    'cscb', '!' );
1624    $add_option->( 'format-skipping',                   'fs',   '!' );
1625    $add_option->( 'format-skipping-begin',             'fsb',  '=s' );
1626    $add_option->( 'format-skipping-end',               'fse',  '=s' );
1627    $add_option->( 'hanging-side-comments',             'hsc',  '!' );
1628    $add_option->( 'indent-block-comments',             'ibc',  '!' );
1629    $add_option->( 'indent-spaced-block-comments',      'isbc', '!' );
1630    $add_option->( 'fixed-position-side-comment',       'fpsc', '=i' );
1631    $add_option->( 'minimum-space-to-comment',          'msc',  '=i' );
1632    $add_option->( 'outdent-long-comments',             'olc',  '!' );
1633    $add_option->( 'outdent-static-block-comments',     'osbc', '!' );
1634    $add_option->( 'static-block-comment-prefix',       'sbcp', '=s' );
1635    $add_option->( 'static-block-comments',             'sbc',  '!' );
1636    $add_option->( 'static-side-comment-prefix',        'sscp', '=s' );
1637    $add_option->( 'static-side-comments',              'ssc',  '!' );
1638    $add_option->( 'ignore-side-comment-lengths',       'iscl', '!' );
1639
1640    ########################################
1641    $category = 5;    # Linebreak controls
1642    ########################################
1643    $add_option->( 'add-newlines',                            'anl',   '!' );
1644    $add_option->( 'block-brace-vertical-tightness',          'bbvt',  '=i' );
1645    $add_option->( 'block-brace-vertical-tightness-list',     'bbvtl', '=s' );
1646    $add_option->( 'brace-vertical-tightness',                'bvt',   '=i' );
1647    $add_option->( 'brace-vertical-tightness-closing',        'bvtc',  '=i' );
1648    $add_option->( 'cuddled-else',                            'ce',    '!' );
1649    $add_option->( 'delete-old-newlines',                     'dnl',   '!' );
1650    $add_option->( 'opening-brace-always-on-right',           'bar',   '!' );
1651    $add_option->( 'opening-brace-on-new-line',               'bl',    '!' );
1652    $add_option->( 'opening-hash-brace-right',                'ohbr',  '!' );
1653    $add_option->( 'opening-paren-right',                     'opr',   '!' );
1654    $add_option->( 'opening-square-bracket-right',            'osbr',  '!' );
1655    $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl',  '!' );
1656    $add_option->( 'opening-sub-brace-on-new-line',           'sbl',   '!' );
1657    $add_option->( 'paren-vertical-tightness',                'pvt',   '=i' );
1658    $add_option->( 'paren-vertical-tightness-closing',        'pvtc',  '=i' );
1659    $add_option->( 'stack-closing-block-brace',               'scbb',  '!' );
1660    $add_option->( 'stack-closing-hash-brace',                'schb',  '!' );
1661    $add_option->( 'stack-closing-paren',                     'scp',   '!' );
1662    $add_option->( 'stack-closing-square-bracket',            'scsb',  '!' );
1663    $add_option->( 'stack-opening-block-brace',               'sobb',  '!' );
1664    $add_option->( 'stack-opening-hash-brace',                'sohb',  '!' );
1665    $add_option->( 'stack-opening-paren',                     'sop',   '!' );
1666    $add_option->( 'stack-opening-square-bracket',            'sosb',  '!' );
1667    $add_option->( 'vertical-tightness',                      'vt',    '=i' );
1668    $add_option->( 'vertical-tightness-closing',              'vtc',   '=i' );
1669    $add_option->( 'want-break-after',                        'wba',   '=s' );
1670    $add_option->( 'want-break-before',                       'wbb',   '=s' );
1671    $add_option->( 'break-after-all-operators',               'baao',  '!' );
1672    $add_option->( 'break-before-all-operators',              'bbao',  '!' );
1673    $add_option->( 'keep-interior-semicolons',                'kis',   '!' );
1674
1675    ########################################
1676    $category = 6;    # Controlling list formatting
1677    ########################################
1678    $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1679    $add_option->( 'comma-arrow-breakpoints',        'cab', '=i' );
1680    $add_option->( 'maximum-fields-per-table',       'mft', '=i' );
1681
1682    ########################################
1683    $category = 7;    # Retaining or ignoring existing line breaks
1684    ########################################
1685    $add_option->( 'break-at-old-keyword-breakpoints',   'bok', '!' );
1686    $add_option->( 'break-at-old-logical-breakpoints',   'bol', '!' );
1687    $add_option->( 'break-at-old-ternary-breakpoints',   'bot', '!' );
1688    $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' );
1689    $add_option->( 'ignore-old-breakpoints',             'iob', '!' );
1690
1691    ########################################
1692    $category = 8;    # Blank line control
1693    ########################################
1694    $add_option->( 'blanks-before-blocks',            'bbb',  '!' );
1695    $add_option->( 'blanks-before-comments',          'bbc',  '!' );
1696    $add_option->( 'blank-lines-before-subs',         'blbs', '=i' );
1697    $add_option->( 'blank-lines-before-packages',     'blbp', '=i' );
1698    $add_option->( 'long-block-line-count',           'lbl',  '=i' );
1699    $add_option->( 'maximum-consecutive-blank-lines', 'mbl',  '=i' );
1700    $add_option->( 'keep-old-blank-lines',            'kbl',  '=i' );
1701
1702    ########################################
1703    $category = 9;    # Other controls
1704    ########################################
1705    $add_option->( 'delete-block-comments',        'dbc',  '!' );
1706    $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1707    $add_option->( 'delete-pod',                   'dp',   '!' );
1708    $add_option->( 'delete-side-comments',         'dsc',  '!' );
1709    $add_option->( 'tee-block-comments',           'tbc',  '!' );
1710    $add_option->( 'tee-pod',                      'tp',   '!' );
1711    $add_option->( 'tee-side-comments',            'tsc',  '!' );
1712    $add_option->( 'look-for-autoloader',          'lal',  '!' );
1713    $add_option->( 'look-for-hash-bang',           'x',    '!' );
1714    $add_option->( 'look-for-selfloader',          'lsl',  '!' );
1715    $add_option->( 'pass-version-line',            'pvl',  '!' );
1716
1717    ########################################
1718    $category = 13;    # Debugging
1719    ########################################
1720    $add_option->( 'DEBUG',                           'D',    '!' );
1721    $add_option->( 'DIAGNOSTICS',                     'I',    '!' );
1722    $add_option->( 'dump-defaults',                   'ddf',  '!' );
1723    $add_option->( 'dump-long-names',                 'dln',  '!' );
1724    $add_option->( 'dump-options',                    'dop',  '!' );
1725    $add_option->( 'dump-profile',                    'dpro', '!' );
1726    $add_option->( 'dump-short-names',                'dsn',  '!' );
1727    $add_option->( 'dump-token-types',                'dtt',  '!' );
1728    $add_option->( 'dump-want-left-space',            'dwls', '!' );
1729    $add_option->( 'dump-want-right-space',           'dwrs', '!' );
1730    $add_option->( 'fuzzy-line-length',               'fll',  '!' );
1731    $add_option->( 'help',                            'h',    '' );
1732    $add_option->( 'short-concatenation-item-length', 'scl',  '=i' );
1733    $add_option->( 'show-options',                    'opt',  '!' );
1734    $add_option->( 'version',                         'v',    '' );
1735    $add_option->( 'memoize',                         'mem',  '!' );
1736
1737    #---------------------------------------------------------------------
1738
1739    # The Perl::Tidy::HtmlWriter will add its own options to the string
1740    Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1741
1742    ########################################
1743    # Set categories 10, 11, 12
1744    ########################################
1745    # Based on their known order
1746    $category = 12;    # HTML properties
1747    foreach my $opt (@option_string) {
1748        my $long_name = $opt;
1749        $long_name =~ s/(!|=.*|:.*)$//;
1750        unless ( defined( $option_category{$long_name} ) ) {
1751            if ( $long_name =~ /^html-linked/ ) {
1752                $category = 10;    # HTML options
1753            }
1754            elsif ( $long_name =~ /^pod2html/ ) {
1755                $category = 11;    # Pod2html
1756            }
1757            $option_category{$long_name} = $category_name[$category];
1758        }
1759    }
1760
1761    #---------------------------------------------------------------
1762    # Assign valid ranges to certain options
1763    #---------------------------------------------------------------
1764    # In the future, these may be used to make preliminary checks
1765    # hash keys are long names
1766    # If key or value is undefined:
1767    #   strings may have any value
1768    #   integer ranges are >=0
1769    # If value is defined:
1770    #   value is [qw(any valid words)] for strings
1771    #   value is [min, max] for integers
1772    #   if min is undefined, there is no lower limit
1773    #   if max is undefined, there is no upper limit
1774    # Parameters not listed here have defaults
1775    %option_range = (
1776        'format'             => [ 'tidy', 'html', 'user' ],
1777        'output-line-ending' => [ 'dos',  'win',  'mac', 'unix' ],
1778
1779        'block-brace-tightness'    => [ 0, 2 ],
1780        'brace-tightness'          => [ 0, 2 ],
1781        'paren-tightness'          => [ 0, 2 ],
1782        'square-bracket-tightness' => [ 0, 2 ],
1783
1784        'block-brace-vertical-tightness'            => [ 0, 2 ],
1785        'brace-vertical-tightness'                  => [ 0, 2 ],
1786        'brace-vertical-tightness-closing'          => [ 0, 2 ],
1787        'paren-vertical-tightness'                  => [ 0, 2 ],
1788        'paren-vertical-tightness-closing'          => [ 0, 2 ],
1789        'square-bracket-vertical-tightness'         => [ 0, 2 ],
1790        'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1791        'vertical-tightness'                        => [ 0, 2 ],
1792        'vertical-tightness-closing'                => [ 0, 2 ],
1793
1794        'closing-brace-indentation'          => [ 0, 3 ],
1795        'closing-paren-indentation'          => [ 0, 3 ],
1796        'closing-square-bracket-indentation' => [ 0, 3 ],
1797        'closing-token-indentation'          => [ 0, 3 ],
1798
1799        'closing-side-comment-else-flag' => [ 0, 2 ],
1800        'comma-arrow-breakpoints'        => [ 0, 5 ],
1801    );
1802
1803    # Note: we could actually allow negative ci if someone really wants it:
1804    # $option_range{'continuation-indentation'} = [ undef, undef ];
1805
1806    #---------------------------------------------------------------
1807    # Assign default values to the above options here, except
1808    # for 'outfile' and 'help'.
1809    # These settings should approximate the perlstyle(1) suggestions.
1810    #---------------------------------------------------------------
1811    my @defaults = qw(
1812      add-newlines
1813      add-semicolons
1814      add-whitespace
1815      blanks-before-blocks
1816      blanks-before-comments
1817      blank-lines-before-subs=1
1818      blank-lines-before-packages=1
1819      block-brace-tightness=0
1820      block-brace-vertical-tightness=0
1821      brace-tightness=1
1822      brace-vertical-tightness-closing=0
1823      brace-vertical-tightness=0
1824      break-at-old-logical-breakpoints
1825      break-at-old-ternary-breakpoints
1826      break-at-old-attribute-breakpoints
1827      break-at-old-keyword-breakpoints
1828      comma-arrow-breakpoints=5
1829      nocheck-syntax
1830      closing-side-comment-interval=6
1831      closing-side-comment-maximum-text=20
1832      closing-side-comment-else-flag=0
1833      closing-side-comments-balanced
1834      closing-paren-indentation=0
1835      closing-brace-indentation=0
1836      closing-square-bracket-indentation=0
1837      continuation-indentation=2
1838      delete-old-newlines
1839      delete-semicolons
1840      fuzzy-line-length
1841      hanging-side-comments
1842      indent-block-comments
1843      indent-columns=4
1844      iterations=1
1845      keep-old-blank-lines=1
1846      long-block-line-count=8
1847      look-for-autoloader
1848      look-for-selfloader
1849      maximum-consecutive-blank-lines=1
1850      maximum-fields-per-table=0
1851      maximum-line-length=80
1852      memoize
1853      minimum-space-to-comment=4
1854      nobrace-left-and-indent
1855      nocuddled-else
1856      nodelete-old-whitespace
1857      nohtml
1858      nologfile
1859      noquiet
1860      noshow-options
1861      nostatic-side-comments
1862      notabs
1863      nowarning-output
1864      outdent-labels
1865      outdent-long-quotes
1866      outdent-long-comments
1867      paren-tightness=1
1868      paren-vertical-tightness-closing=0
1869      paren-vertical-tightness=0
1870      pass-version-line
1871      recombine
1872      valign
1873      short-concatenation-item-length=8
1874      space-for-semicolon
1875      square-bracket-tightness=1
1876      square-bracket-vertical-tightness-closing=0
1877      square-bracket-vertical-tightness=0
1878      static-block-comments
1879      trim-qw
1880      format=tidy
1881      backup-file-extension=bak
1882      format-skipping
1883      default-tabsize=8
1884
1885      pod2html
1886      html-table-of-contents
1887      html-entities
1888    );
1889
1890    push @defaults, "perl-syntax-check-flags=-c -T";
1891
1892    #---------------------------------------------------------------
1893    # Define abbreviations which will be expanded into the above primitives.
1894    # These may be defined recursively.
1895    #---------------------------------------------------------------
1896    %expansion = (
1897        %expansion,
1898        'freeze-newlines'   => [qw(noadd-newlines nodelete-old-newlines)],
1899        'fnl'               => [qw(freeze-newlines)],
1900        'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1901        'fws'               => [qw(freeze-whitespace)],
1902        'freeze-blank-lines' =>
1903          [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
1904        'fbl'                => [qw(freeze-blank-lines)],
1905        'indent-only'        => [qw(freeze-newlines freeze-whitespace)],
1906        'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1907        'nooutdent-long-lines' =>
1908          [qw(nooutdent-long-quotes nooutdent-long-comments)],
1909        'noll' => [qw(nooutdent-long-lines)],
1910        'io'   => [qw(indent-only)],
1911        'delete-all-comments' =>
1912          [qw(delete-block-comments delete-side-comments delete-pod)],
1913        'nodelete-all-comments' =>
1914          [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1915        'dac'  => [qw(delete-all-comments)],
1916        'ndac' => [qw(nodelete-all-comments)],
1917        'gnu'  => [qw(gnu-style)],
1918        'pbp'  => [qw(perl-best-practices)],
1919        'tee-all-comments' =>
1920          [qw(tee-block-comments tee-side-comments tee-pod)],
1921        'notee-all-comments' =>
1922          [qw(notee-block-comments notee-side-comments notee-pod)],
1923        'tac'   => [qw(tee-all-comments)],
1924        'ntac'  => [qw(notee-all-comments)],
1925        'html'  => [qw(format=html)],
1926        'nhtml' => [qw(format=tidy)],
1927        'tidy'  => [qw(format=tidy)],
1928
1929        'swallow-optional-blank-lines'   => [qw(kbl=0)],
1930        'noswallow-optional-blank-lines' => [qw(kbl=1)],
1931        'sob'                            => [qw(kbl=0)],
1932        'nsob'                           => [qw(kbl=1)],
1933
1934        'break-after-comma-arrows'   => [qw(cab=0)],
1935        'nobreak-after-comma-arrows' => [qw(cab=1)],
1936        'baa'                        => [qw(cab=0)],
1937        'nbaa'                       => [qw(cab=1)],
1938
1939        'blanks-before-subs'   => [qw(blbs=1 blbp=1)],
1940        'bbs'                  => [qw(blbs=1 blbp=1)],
1941        'noblanks-before-subs' => [qw(blbs=0 blbp=0)],
1942        'nbbs'                 => [qw(blbs=0 blbp=0)],
1943
1944        'break-at-old-trinary-breakpoints' => [qw(bot)],
1945
1946        'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1947        'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1948        'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1949        'icp'   => [qw(cpi=2 cbi=2 csbi=2)],
1950        'nicp'  => [qw(cpi=0 cbi=0 csbi=0)],
1951
1952        'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1953        'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1954        'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1955        'indent-closing-paren'        => [qw(cpi=2 cbi=2 csbi=2)],
1956        'noindent-closing-paren'      => [qw(cpi=0 cbi=0 csbi=0)],
1957
1958        'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1959        'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1960        'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1961
1962        'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1963        'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1964        'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1965
1966        'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1967        'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1968        'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1969
1970        'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1971        'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1972        'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1973
1974        'otr'                   => [qw(opr ohbr osbr)],
1975        'opening-token-right'   => [qw(opr ohbr osbr)],
1976        'notr'                  => [qw(nopr nohbr nosbr)],
1977        'noopening-token-right' => [qw(nopr nohbr nosbr)],
1978
1979        'sot'                    => [qw(sop sohb sosb)],
1980        'nsot'                   => [qw(nsop nsohb nsosb)],
1981        'stack-opening-tokens'   => [qw(sop sohb sosb)],
1982        'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1983
1984        'sct'                    => [qw(scp schb scsb)],
1985        'stack-closing-tokens'   => => [qw(scp schb scsb)],
1986        'nsct'                   => [qw(nscp nschb nscsb)],
1987        'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
1988
1989        'sac'                    => [qw(sot sct)],
1990        'nsac'                   => [qw(nsot nsct)],
1991        'stack-all-containers'   => [qw(sot sct)],
1992        'nostack-all-containers' => [qw(nsot nsct)],
1993
1994        'act=0'                      => [qw(pt=0 sbt=0 bt=0 bbt=0)],
1995        'act=1'                      => [qw(pt=1 sbt=1 bt=1 bbt=1)],
1996        'act=2'                      => [qw(pt=2 sbt=2 bt=2 bbt=2)],
1997        'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
1998        'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
1999        'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
2000
2001        'stack-opening-block-brace'   => [qw(bbvt=2 bbvtl=*)],
2002        'sobb'                        => [qw(bbvt=2 bbvtl=*)],
2003        'nostack-opening-block-brace' => [qw(bbvt=0)],
2004        'nsobb'                       => [qw(bbvt=0)],
2005
2006        'converge'   => [qw(it=4)],
2007        'noconverge' => [qw(it=1)],
2008        'conv'       => [qw(it=4)],
2009        'nconv'      => [qw(it=1)],
2010
2011        # 'mangle' originally deleted pod and comments, but to keep it
2012        # reversible, it no longer does.  But if you really want to
2013        # delete them, just use:
2014        #   -mangle -dac
2015
2016        # An interesting use for 'mangle' is to do this:
2017        #    perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
2018        # which will form as many one-line blocks as possible
2019
2020        'mangle' => [
2021            qw(
2022              check-syntax
2023              keep-old-blank-lines=0
2024              delete-old-newlines
2025              delete-old-whitespace
2026              delete-semicolons
2027              indent-columns=0
2028              maximum-consecutive-blank-lines=0
2029              maximum-line-length=100000
2030              noadd-newlines
2031              noadd-semicolons
2032              noadd-whitespace
2033              noblanks-before-blocks
2034              blank-lines-before-subs=0
2035              blank-lines-before-packages=0
2036              notabs
2037              )
2038        ],
2039
2040        # 'extrude' originally deleted pod and comments, but to keep it
2041        # reversible, it no longer does.  But if you really want to
2042        # delete them, just use
2043        #   extrude -dac
2044        #
2045        # An interesting use for 'extrude' is to do this:
2046        #    perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
2047        # which will break up all one-line blocks.
2048
2049        'extrude' => [
2050            qw(
2051              check-syntax
2052              ci=0
2053              delete-old-newlines
2054              delete-old-whitespace
2055              delete-semicolons
2056              indent-columns=0
2057              maximum-consecutive-blank-lines=0
2058              maximum-line-length=1
2059              noadd-semicolons
2060              noadd-whitespace
2061              noblanks-before-blocks
2062              blank-lines-before-subs=0
2063              blank-lines-before-packages=0
2064              nofuzzy-line-length
2065              notabs
2066              norecombine
2067              )
2068        ],
2069
2070        # this style tries to follow the GNU Coding Standards (which do
2071        # not really apply to perl but which are followed by some perl
2072        # programmers).
2073        'gnu-style' => [
2074            qw(
2075              lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
2076              )
2077        ],
2078
2079        # Style suggested in Damian Conway's Perl Best Practices
2080        'perl-best-practices' => [
2081            qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
2082q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
2083        ],
2084
2085        # Additional styles can be added here
2086    );
2087
2088    Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
2089
2090    # Uncomment next line to dump all expansions for debugging:
2091    # dump_short_names(\%expansion);
2092    return (
2093        \@option_string,   \@defaults, \%expansion,
2094        \%option_category, \%option_range
2095    );
2096
2097}    # end of generate_options
2098
2099# Memoize process_command_line. Given same @ARGV passed in, return same
2100# values and same @ARGV back.
2101# This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds
2102# up masontidy (https://metacpan.org/module/masontidy)
2103
2104my %process_command_line_cache;
2105
2106sub process_command_line {
2107
2108    my (
2109        $perltidyrc_stream,  $is_Windows, $Windows_type,
2110        $rpending_complaint, $dump_options_type
2111    ) = @_;
2112
2113    my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type;
2114    if ($use_cache) {
2115        my $cache_key = join( chr(28), @ARGV );
2116        if ( my $result = $process_command_line_cache{$cache_key} ) {
2117            my ( $argv, @retvals ) = @$result;
2118            @ARGV = @$argv;
2119            return @retvals;
2120        }
2121        else {
2122            my @retvals = _process_command_line(@_);
2123            $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ]
2124              if $retvals[0]->{'memoize'};
2125            return @retvals;
2126        }
2127    }
2128    else {
2129        return _process_command_line(@_);
2130    }
2131}
2132
2133# (note the underscore here)
2134sub _process_command_line {
2135
2136    my (
2137        $perltidyrc_stream,  $is_Windows, $Windows_type,
2138        $rpending_complaint, $dump_options_type
2139    ) = @_;
2140
2141    use Getopt::Long;
2142
2143    my (
2144        $roption_string,   $rdefaults, $rexpansion,
2145        $roption_category, $roption_range
2146    ) = generate_options();
2147
2148    #---------------------------------------------------------------
2149    # set the defaults by passing the above list through GetOptions
2150    #---------------------------------------------------------------
2151    my %Opts = ();
2152    {
2153        local @ARGV;
2154        my $i;
2155
2156        # do not load the defaults if we are just dumping perltidyrc
2157        unless ( $dump_options_type eq 'perltidyrc' ) {
2158            for $i (@$rdefaults) { push @ARGV, "--" . $i }
2159        }
2160
2161        # Patch to save users Getopt::Long configuration
2162        # and set to Getopt::Long defaults.  Use eval to avoid
2163        # breaking old versions of Perl without these routines.
2164        my $glc;
2165        eval { $glc = Getopt::Long::Configure() };
2166        unless ($@) {
2167            eval { Getopt::Long::ConfigDefaults() };
2168        }
2169        else { $glc = undef }
2170
2171        if ( !GetOptions( \%Opts, @$roption_string ) ) {
2172            Die "Programming Bug: error in setting default options";
2173        }
2174
2175        # Patch to put the previous Getopt::Long configuration back
2176        eval { Getopt::Long::Configure($glc) } if defined $glc;
2177    }
2178
2179    my $word;
2180    my @raw_options        = ();
2181    my $config_file        = "";
2182    my $saw_ignore_profile = 0;
2183    my $saw_extrude        = 0;
2184    my $saw_pbp            = 0;
2185    my $saw_dump_profile   = 0;
2186    my $i;
2187
2188    #---------------------------------------------------------------
2189    # Take a first look at the command-line parameters.  Do as many
2190    # immediate dumps as possible, which can avoid confusion if the
2191    # perltidyrc file has an error.
2192    #---------------------------------------------------------------
2193    foreach $i (@ARGV) {
2194
2195        $i =~ s/^--/-/;
2196        if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
2197            $saw_ignore_profile = 1;
2198        }
2199
2200        # note: this must come before -pro and -profile, below:
2201        elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
2202            $saw_dump_profile = 1;
2203        }
2204        elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
2205            if ($config_file) {
2206                Warn
2207"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
2208            }
2209            $config_file = $2;
2210
2211            # resolve <dir>/.../<file>, meaning look upwards from directory
2212            if ( defined($config_file) ) {
2213                if ( my ( $start_dir, $search_file ) =
2214                    ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
2215                {
2216                    $start_dir = '.' if !$start_dir;
2217                    $start_dir = Cwd::realpath($start_dir);
2218                    if ( my $found_file =
2219                        find_file_upwards( $start_dir, $search_file ) )
2220                    {
2221                        $config_file = $found_file;
2222                    }
2223                }
2224            }
2225            unless ( -e $config_file ) {
2226                Warn "cannot find file given with -pro=$config_file: $!\n";
2227                $config_file = "";
2228            }
2229        }
2230        elsif ( $i =~ /^-(pro|profile)=?$/ ) {
2231            Die "usage: -pro=filename or --profile=filename, no spaces\n";
2232        }
2233        elsif ( $i =~ /^-extrude$/ ) {
2234            $saw_extrude = 1;
2235        }
2236        elsif ( $i =~ /^-(pbp|perl-best-practices)$/ ) {
2237            $saw_pbp = 1;
2238        }
2239        elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
2240            usage();
2241            Exit 0;
2242        }
2243        elsif ( $i =~ /^-(version|v)$/ ) {
2244            show_version();
2245            Exit 0;
2246        }
2247        elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
2248            dump_defaults(@$rdefaults);
2249            Exit 0;
2250        }
2251        elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
2252            dump_long_names(@$roption_string);
2253            Exit 0;
2254        }
2255        elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
2256            dump_short_names($rexpansion);
2257            Exit 0;
2258        }
2259        elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
2260            Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
2261            Exit 0;
2262        }
2263    }
2264
2265    if ( $saw_dump_profile && $saw_ignore_profile ) {
2266        Warn "No profile to dump because of -npro\n";
2267        Exit 1;
2268    }
2269
2270    #---------------------------------------------------------------
2271    # read any .perltidyrc configuration file
2272    #---------------------------------------------------------------
2273    unless ($saw_ignore_profile) {
2274
2275        # resolve possible conflict between $perltidyrc_stream passed
2276        # as call parameter to perltidy and -pro=filename on command
2277        # line.
2278        if ($perltidyrc_stream) {
2279            if ($config_file) {
2280                Warn <<EOM;
2281 Conflict: a perltidyrc configuration file was specified both as this
2282 perltidy call parameter: $perltidyrc_stream
2283 and with this -profile=$config_file.
2284 Using -profile=$config_file.
2285EOM
2286            }
2287            else {
2288                $config_file = $perltidyrc_stream;
2289            }
2290        }
2291
2292        # look for a config file if we don't have one yet
2293        my $rconfig_file_chatter;
2294        $$rconfig_file_chatter = "";
2295        $config_file =
2296          find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
2297            $rpending_complaint )
2298          unless $config_file;
2299
2300        # open any config file
2301        my $fh_config;
2302        if ($config_file) {
2303            ( $fh_config, $config_file ) =
2304              Perl::Tidy::streamhandle( $config_file, 'r' );
2305            unless ($fh_config) {
2306                $$rconfig_file_chatter .=
2307                  "# $config_file exists but cannot be opened\n";
2308            }
2309        }
2310
2311        if ($saw_dump_profile) {
2312            dump_config_file( $fh_config, $config_file, $rconfig_file_chatter );
2313            Exit 0;
2314        }
2315
2316        if ($fh_config) {
2317
2318            my ( $rconfig_list, $death_message, $_saw_pbp ) =
2319              read_config_file( $fh_config, $config_file, $rexpansion );
2320            Die $death_message if ($death_message);
2321            $saw_pbp ||= $_saw_pbp;
2322
2323            # process any .perltidyrc parameters right now so we can
2324            # localize errors
2325            if (@$rconfig_list) {
2326                local @ARGV = @$rconfig_list;
2327
2328                expand_command_abbreviations( $rexpansion, \@raw_options,
2329                    $config_file );
2330
2331                if ( !GetOptions( \%Opts, @$roption_string ) ) {
2332                    Die
2333"Error in this config file: $config_file  \nUse -npro to ignore this file, -h for help'\n";
2334                }
2335
2336                # Anything left in this local @ARGV is an error and must be
2337                # invalid bare words from the configuration file.  We cannot
2338                # check this earlier because bare words may have been valid
2339                # values for parameters.  We had to wait for GetOptions to have
2340                # a look at @ARGV.
2341                if (@ARGV) {
2342                    my $count = @ARGV;
2343                    my $str   = "\'" . pop(@ARGV) . "\'";
2344                    while ( my $param = pop(@ARGV) ) {
2345                        if ( length($str) < 70 ) {
2346                            $str .= ", '$param'";
2347                        }
2348                        else {
2349                            $str .= ", ...";
2350                            last;
2351                        }
2352                    }
2353                    Die <<EOM;
2354There are $count unrecognized values in the configuration file '$config_file':
2355$str
2356Use leading dashes for parameters.  Use -npro to ignore this file.
2357EOM
2358                }
2359
2360                # Undo any options which cause premature exit.  They are not
2361                # appropriate for a config file, and it could be hard to
2362                # diagnose the cause of the premature exit.
2363                foreach (
2364                    qw{
2365                    dump-defaults
2366                    dump-long-names
2367                    dump-options
2368                    dump-profile
2369                    dump-short-names
2370                    dump-token-types
2371                    dump-want-left-space
2372                    dump-want-right-space
2373                    help
2374                    stylesheet
2375                    version
2376                    }
2377                  )
2378                {
2379
2380                    if ( defined( $Opts{$_} ) ) {
2381                        delete $Opts{$_};
2382                        Warn "ignoring --$_ in config file: $config_file\n";
2383                    }
2384                }
2385            }
2386        }
2387    }
2388
2389    #---------------------------------------------------------------
2390    # now process the command line parameters
2391    #---------------------------------------------------------------
2392    expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
2393
2394    local $SIG{'__WARN__'} = sub { Warn $_[0] };
2395    if ( !GetOptions( \%Opts, @$roption_string ) ) {
2396        Die "Error on command line; for help try 'perltidy -h'\n";
2397    }
2398
2399    return (
2400        \%Opts,       $config_file,      \@raw_options,
2401        $saw_extrude, $saw_pbp,          $roption_string,
2402        $rexpansion,  $roption_category, $roption_range
2403    );
2404}    # end of process_command_line
2405
2406sub check_options {
2407
2408    my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
2409
2410    #---------------------------------------------------------------
2411    # check and handle any interactions among the basic options..
2412    #---------------------------------------------------------------
2413
2414    # Since -vt, -vtc, and -cti are abbreviations, but under
2415    # msdos, an unquoted input parameter like vtc=1 will be
2416    # seen as 2 parameters, vtc and 1, so the abbreviations
2417    # won't be seen.  Therefore, we will catch them here if
2418    # they get through.
2419
2420    if ( defined $rOpts->{'vertical-tightness'} ) {
2421        my $vt = $rOpts->{'vertical-tightness'};
2422        $rOpts->{'paren-vertical-tightness'}          = $vt;
2423        $rOpts->{'square-bracket-vertical-tightness'} = $vt;
2424        $rOpts->{'brace-vertical-tightness'}          = $vt;
2425    }
2426
2427    if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2428        my $vtc = $rOpts->{'vertical-tightness-closing'};
2429        $rOpts->{'paren-vertical-tightness-closing'}          = $vtc;
2430        $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2431        $rOpts->{'brace-vertical-tightness-closing'}          = $vtc;
2432    }
2433
2434    if ( defined $rOpts->{'closing-token-indentation'} ) {
2435        my $cti = $rOpts->{'closing-token-indentation'};
2436        $rOpts->{'closing-square-bracket-indentation'} = $cti;
2437        $rOpts->{'closing-brace-indentation'}          = $cti;
2438        $rOpts->{'closing-paren-indentation'}          = $cti;
2439    }
2440
2441    # In quiet mode, there is no log file and hence no way to report
2442    # results of syntax check, so don't do it.
2443    if ( $rOpts->{'quiet'} ) {
2444        $rOpts->{'check-syntax'} = 0;
2445    }
2446
2447    # can't check syntax if no output
2448    if ( $rOpts->{'format'} ne 'tidy' ) {
2449        $rOpts->{'check-syntax'} = 0;
2450    }
2451
2452    # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2453    # wide variety of nasty problems on these systems, because they cannot
2454    # reliably run backticks.  Don't even think about changing this!
2455    if (   $rOpts->{'check-syntax'}
2456        && $is_Windows
2457        && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2458    {
2459        $rOpts->{'check-syntax'} = 0;
2460    }
2461
2462    # It's really a bad idea to check syntax as root unless you wrote
2463    # the script yourself.  FIXME: not sure if this works with VMS
2464    unless ($is_Windows) {
2465
2466        if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2467            $rOpts->{'check-syntax'} = 0;
2468            $$rpending_complaint .=
2469"Syntax check deactivated for safety; you shouldn't run this as root\n";
2470        }
2471    }
2472
2473    # check iteration count and quietly fix if necessary:
2474    # - iterations option only applies to code beautification mode
2475    # - the convergence check should stop most runs on iteration 2, and
2476    #   virtually all on iteration 3.  But we'll allow up to 6.
2477    if ( $rOpts->{'format'} ne 'tidy' ) {
2478        $rOpts->{'iterations'} = 1;
2479    }
2480    elsif ( defined( $rOpts->{'iterations'} ) ) {
2481        if    ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
2482        elsif ( $rOpts->{'iterations'} > 6 )  { $rOpts->{'iterations'} = 6 }
2483    }
2484    else {
2485        $rOpts->{'iterations'} = 1;
2486    }
2487
2488    # check for reasonable number of blank lines and fix to avoid problems
2489    if ( $rOpts->{'blank-lines-before-subs'} ) {
2490        if ( $rOpts->{'blank-lines-before-subs'} < 0 ) {
2491            $rOpts->{'blank-lines-before-subs'} = 0;
2492            Warn "negative value of -blbs, setting 0\n";
2493        }
2494        if ( $rOpts->{'blank-lines-before-subs'} > 100 ) {
2495            Warn "unreasonably large value of -blbs, reducing\n";
2496            $rOpts->{'blank-lines-before-subs'} = 100;
2497        }
2498    }
2499    if ( $rOpts->{'blank-lines-before-packages'} ) {
2500        if ( $rOpts->{'blank-lines-before-packages'} < 0 ) {
2501            Warn "negative value of -blbp, setting 0\n";
2502            $rOpts->{'blank-lines-before-packages'} = 0;
2503        }
2504        if ( $rOpts->{'blank-lines-before-packages'} > 100 ) {
2505            Warn "unreasonably large value of -blbp, reducing\n";
2506            $rOpts->{'blank-lines-before-packages'} = 100;
2507        }
2508    }
2509
2510    # see if user set a non-negative logfile-gap
2511    if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2512
2513        # a zero gap will be taken as a 1
2514        if ( $rOpts->{'logfile-gap'} == 0 ) {
2515            $rOpts->{'logfile-gap'} = 1;
2516        }
2517
2518        # setting a non-negative logfile gap causes logfile to be saved
2519        $rOpts->{'logfile'} = 1;
2520    }
2521
2522    # not setting logfile gap, or setting it negative, causes default of 50
2523    else {
2524        $rOpts->{'logfile-gap'} = 50;
2525    }
2526
2527    # set short-cut flag when only indentation is to be done.
2528    # Note that the user may or may not have already set the
2529    # indent-only flag.
2530    if (   !$rOpts->{'add-whitespace'}
2531        && !$rOpts->{'delete-old-whitespace'}
2532        && !$rOpts->{'add-newlines'}
2533        && !$rOpts->{'delete-old-newlines'} )
2534    {
2535        $rOpts->{'indent-only'} = 1;
2536    }
2537
2538    # -isbc implies -ibc
2539    if ( $rOpts->{'indent-spaced-block-comments'} ) {
2540        $rOpts->{'indent-block-comments'} = 1;
2541    }
2542
2543    # -bli flag implies -bl
2544    if ( $rOpts->{'brace-left-and-indent'} ) {
2545        $rOpts->{'opening-brace-on-new-line'} = 1;
2546    }
2547
2548    if (   $rOpts->{'opening-brace-always-on-right'}
2549        && $rOpts->{'opening-brace-on-new-line'} )
2550    {
2551        Warn <<EOM;
2552 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2553  'opening-brace-on-new-line' (-bl).  Ignoring -bl.
2554EOM
2555        $rOpts->{'opening-brace-on-new-line'} = 0;
2556    }
2557
2558    # it simplifies things if -bl is 0 rather than undefined
2559    if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2560        $rOpts->{'opening-brace-on-new-line'} = 0;
2561    }
2562
2563    # -sbl defaults to -bl if not defined
2564    if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2565        $rOpts->{'opening-sub-brace-on-new-line'} =
2566          $rOpts->{'opening-brace-on-new-line'};
2567    }
2568
2569    if ( $rOpts->{'entab-leading-whitespace'} ) {
2570        if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2571            Warn "-et=n must use a positive integer; ignoring -et\n";
2572            $rOpts->{'entab-leading-whitespace'} = undef;
2573        }
2574
2575        # entab leading whitespace has priority over the older 'tabs' option
2576        if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2577    }
2578
2579    # set a default tabsize to be used in guessing the starting indentation
2580    # level if and only if this run does not use tabs and the old code does
2581    # use tabs
2582    if ( $rOpts->{'default-tabsize'} ) {
2583        if ( $rOpts->{'default-tabsize'} < 0 ) {
2584            Warn "negative value of -dt, setting 0\n";
2585            $rOpts->{'default-tabsize'} = 0;
2586        }
2587        if ( $rOpts->{'default-tabsize'} > 20 ) {
2588            Warn "unreasonably large value of -dt, reducing\n";
2589            $rOpts->{'default-tabsize'} = 20;
2590        }
2591    }
2592    else {
2593        $rOpts->{'default-tabsize'} = 8;
2594    }
2595
2596    # Define $tabsize, the number of spaces per tab for use in
2597    # guessing the indentation of source lines with leading tabs.
2598    # Assume same as for this run if tabs are used , otherwise assume
2599    # a default value, typically 8
2600    my $tabsize =
2601        $rOpts->{'entab-leading-whitespace'}
2602      ? $rOpts->{'entab-leading-whitespace'}
2603      : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'}
2604      :                    $rOpts->{'default-tabsize'};
2605    return $tabsize;
2606}
2607
2608sub find_file_upwards {
2609    my ( $search_dir, $search_file ) = @_;
2610
2611    $search_dir =~ s{/+$}{};
2612    $search_file =~ s{^/+}{};
2613
2614    while (1) {
2615        my $try_path = "$search_dir/$search_file";
2616        if ( -f $try_path ) {
2617            return $try_path;
2618        }
2619        elsif ( $search_dir eq '/' ) {
2620            return undef;
2621        }
2622        else {
2623            $search_dir = dirname($search_dir);
2624        }
2625    }
2626}
2627
2628sub expand_command_abbreviations {
2629
2630    # go through @ARGV and expand any abbreviations
2631
2632    my ( $rexpansion, $rraw_options, $config_file ) = @_;
2633    my ($word);
2634
2635    # set a pass limit to prevent an infinite loop;
2636    # 10 should be plenty, but it may be increased to allow deeply
2637    # nested expansions.
2638    my $max_passes = 10;
2639    my @new_argv   = ();
2640
2641    # keep looping until all expansions have been converted into actual
2642    # dash parameters..
2643    for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2644        my @new_argv     = ();
2645        my $abbrev_count = 0;
2646
2647        # loop over each item in @ARGV..
2648        foreach $word (@ARGV) {
2649
2650            # convert any leading 'no-' to just 'no'
2651            if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2652
2653            # if it is a dash flag (instead of a file name)..
2654            if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2655
2656                my $abr   = $1;
2657                my $flags = $2;
2658
2659                # save the raw input for debug output in case of circular refs
2660                if ( $pass_count == 0 ) {
2661                    push( @$rraw_options, $word );
2662                }
2663
2664                # recombine abbreviation and flag, if necessary,
2665                # to allow abbreviations with arguments such as '-vt=1'
2666                if ( $rexpansion->{ $abr . $flags } ) {
2667                    $abr   = $abr . $flags;
2668                    $flags = "";
2669                }
2670
2671                # if we see this dash item in the expansion hash..
2672                if ( $rexpansion->{$abr} ) {
2673                    $abbrev_count++;
2674
2675                    # stuff all of the words that it expands to into the
2676                    # new arg list for the next pass
2677                    foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2678                        next unless $abbrev;    # for safety; shouldn't happen
2679                        push( @new_argv, '--' . $abbrev . $flags );
2680                    }
2681                }
2682
2683                # not in expansion hash, must be actual long name
2684                else {
2685                    push( @new_argv, $word );
2686                }
2687            }
2688
2689            # not a dash item, so just save it for the next pass
2690            else {
2691                push( @new_argv, $word );
2692            }
2693        }    # end of this pass
2694
2695        # update parameter list @ARGV to the new one
2696        @ARGV = @new_argv;
2697        last unless ( $abbrev_count > 0 );
2698
2699        # make sure we are not in an infinite loop
2700        if ( $pass_count == $max_passes ) {
2701            local $" = ')(';
2702            Warn <<EOM;
2703I'm tired. We seem to be in an infinite loop trying to expand aliases.
2704Here are the raw options;
2705(rraw_options)
2706EOM
2707            my $num = @new_argv;
2708            if ( $num < 50 ) {
2709                Warn <<EOM;
2710After $max_passes passes here is ARGV
2711(@new_argv)
2712EOM
2713            }
2714            else {
2715                Warn <<EOM;
2716After $max_passes passes ARGV has $num entries
2717EOM
2718            }
2719
2720            if ($config_file) {
2721                Die <<"DIE";
2722Please check your configuration file $config_file for circular-references.
2723To deactivate it, use -npro.
2724DIE
2725            }
2726            else {
2727                Die <<'DIE';
2728Program bug - circular-references in the %expansion hash, probably due to
2729a recent program change.
2730DIE
2731            }
2732        }    # end of check for circular references
2733    }    # end of loop over all passes
2734}
2735
2736# Debug routine -- this will dump the expansion hash
2737sub dump_short_names {
2738    my $rexpansion = shift;
2739    print STDOUT <<EOM;
2740List of short names.  This list shows how all abbreviations are
2741translated into other abbreviations and, eventually, into long names.
2742New abbreviations may be defined in a .perltidyrc file.
2743For a list of all long names, use perltidy --dump-long-names (-dln).
2744--------------------------------------------------------------------------
2745EOM
2746    foreach my $abbrev ( sort keys %$rexpansion ) {
2747        my @list = @{ $$rexpansion{$abbrev} };
2748        print STDOUT "$abbrev --> @list\n";
2749    }
2750}
2751
2752sub check_vms_filename {
2753
2754    # given a valid filename (the perltidy input file)
2755    # create a modified filename and separator character
2756    # suitable for VMS.
2757    #
2758    # Contributed by Michael Cartmell
2759    #
2760    my ( $base, $path ) = fileparse( $_[0] );
2761
2762    # remove explicit ; version
2763    $base =~ s/;-?\d*$//
2764
2765      # remove explicit . version ie two dots in filename NB ^ escapes a dot
2766      or $base =~ s/(          # begin capture $1
2767                  (?:^|[^^])\. # match a dot not preceded by a caret
2768                  (?:          # followed by nothing
2769                    |          # or
2770                    .*[^^]     # anything ending in a non caret
2771                  )
2772                )              # end capture $1
2773                \.-?\d*$       # match . version number
2774              /$1/x;
2775
2776    # normalise filename, if there are no unescaped dots then append one
2777    $base .= '.' unless $base =~ /(?:^|[^^])\./;
2778
2779    # if we don't already have an extension then we just append the extension
2780    my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2781    return ( $path . $base, $separator );
2782}
2783
2784sub Win_OS_Type {
2785
2786    # TODO: are these more standard names?
2787    # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2788
2789    # Returns a string that determines what MS OS we are on.
2790    # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2791    # Returns blank string if not an MS system.
2792    # Original code contributed by: Yves Orton
2793    # We need to know this to decide where to look for config files
2794
2795    my $rpending_complaint = shift;
2796    my $os                 = "";
2797    return $os unless $^O =~ /win32|dos/i;    # is it a MS box?
2798
2799    # Systems built from Perl source may not have Win32.pm
2800    # But probably have Win32::GetOSVersion() anyway so the
2801    # following line is not 'required':
2802    # return $os unless eval('require Win32');
2803
2804    # Use the standard API call to determine the version
2805    my ( $undef, $major, $minor, $build, $id );
2806    eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2807
2808    #
2809    #    NAME                   ID   MAJOR  MINOR
2810    #    Windows NT 4           2      4       0
2811    #    Windows 2000           2      5       0
2812    #    Windows XP             2      5       1
2813    #    Windows Server 2003    2      5       2
2814
2815    return "win32s" unless $id;    # If id==0 then its a win32s box.
2816    $os = {                        # Magic numbers from MSDN
2817                                   # documentation of GetOSVersion
2818        1 => {
2819            0  => "95",
2820            10 => "98",
2821            90 => "Me"
2822        },
2823        2 => {
2824            0  => "2000",          # or NT 4, see below
2825            1  => "XP/.Net",
2826            2  => "Win2003",
2827            51 => "NT3.51"
2828        }
2829    }->{$id}->{$minor};
2830
2831    # If $os is undefined, the above code is out of date.  Suggested updates
2832    # are welcome.
2833    unless ( defined $os ) {
2834        $os = "";
2835        $$rpending_complaint .= <<EOS;
2836Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2837We won't be able to look for a system-wide config file.
2838EOS
2839    }
2840
2841    # Unfortunately the logic used for the various versions isn't so clever..
2842    # so we have to handle an outside case.
2843    return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2844}
2845
2846sub is_unix {
2847    return
2848         ( $^O !~ /win32|dos/i )
2849      && ( $^O ne 'VMS' )
2850      && ( $^O ne 'OS2' )
2851      && ( $^O ne 'MacOS' );
2852}
2853
2854sub look_for_Windows {
2855
2856    # determine Windows sub-type and location of
2857    # system-wide configuration files
2858    my $rpending_complaint = shift;
2859    my $is_Windows         = ( $^O =~ /win32|dos/i );
2860    my $Windows_type       = Win_OS_Type($rpending_complaint) if $is_Windows;
2861    return ( $is_Windows, $Windows_type );
2862}
2863
2864sub find_config_file {
2865
2866    # look for a .perltidyrc configuration file
2867    # For Windows also look for a file named perltidy.ini
2868    my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2869        $rpending_complaint ) = @_;
2870
2871    $$rconfig_file_chatter .= "# Config file search...system reported as:";
2872    if ($is_Windows) {
2873        $$rconfig_file_chatter .= "Windows $Windows_type\n";
2874    }
2875    else {
2876        $$rconfig_file_chatter .= " $^O\n";
2877    }
2878
2879    # sub to check file existence and record all tests
2880    my $exists_config_file = sub {
2881        my $config_file = shift;
2882        return 0 unless $config_file;
2883        $$rconfig_file_chatter .= "# Testing: $config_file\n";
2884        return -f $config_file;
2885    };
2886
2887    my $config_file;
2888
2889    # look in current directory first
2890    $config_file = ".perltidyrc";
2891    return $config_file if $exists_config_file->($config_file);
2892    if ($is_Windows) {
2893        $config_file = "perltidy.ini";
2894        return $config_file if $exists_config_file->($config_file);
2895    }
2896
2897    # Default environment vars.
2898    my @envs = qw(PERLTIDY HOME);
2899
2900    # Check the NT/2k/XP locations, first a local machine def, then a
2901    # network def
2902    push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2903
2904    # Now go through the environment ...
2905    foreach my $var (@envs) {
2906        $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2907        if ( defined( $ENV{$var} ) ) {
2908            $$rconfig_file_chatter .= " = $ENV{$var}\n";
2909
2910            # test ENV{ PERLTIDY } as file:
2911            if ( $var eq 'PERLTIDY' ) {
2912                $config_file = "$ENV{$var}";
2913                return $config_file if $exists_config_file->($config_file);
2914            }
2915
2916            # test ENV as directory:
2917            $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2918            return $config_file if $exists_config_file->($config_file);
2919
2920            if ($is_Windows) {
2921                $config_file = catfile( $ENV{$var}, "perltidy.ini" );
2922                return $config_file if $exists_config_file->($config_file);
2923            }
2924        }
2925        else {
2926            $$rconfig_file_chatter .= "\n";
2927        }
2928    }
2929
2930    # then look for a system-wide definition
2931    # where to look varies with OS
2932    if ($is_Windows) {
2933
2934        if ($Windows_type) {
2935            my ( $os, $system, $allusers ) =
2936              Win_Config_Locs( $rpending_complaint, $Windows_type );
2937
2938            # Check All Users directory, if there is one.
2939            # i.e. C:\Documents and Settings\User\perltidy.ini
2940            if ($allusers) {
2941
2942                $config_file = catfile( $allusers, ".perltidyrc" );
2943                return $config_file if $exists_config_file->($config_file);
2944
2945                $config_file = catfile( $allusers, "perltidy.ini" );
2946                return $config_file if $exists_config_file->($config_file);
2947            }
2948
2949            # Check system directory.
2950            # retain old code in case someone has been able to create
2951            # a file with a leading period.
2952            $config_file = catfile( $system, ".perltidyrc" );
2953            return $config_file if $exists_config_file->($config_file);
2954
2955            $config_file = catfile( $system, "perltidy.ini" );
2956            return $config_file if $exists_config_file->($config_file);
2957        }
2958    }
2959
2960    # Place to add customization code for other systems
2961    elsif ( $^O eq 'OS2' ) {
2962    }
2963    elsif ( $^O eq 'MacOS' ) {
2964    }
2965    elsif ( $^O eq 'VMS' ) {
2966    }
2967
2968    # Assume some kind of Unix
2969    else {
2970
2971        $config_file = "/usr/local/etc/perltidyrc";
2972        return $config_file if $exists_config_file->($config_file);
2973
2974        $config_file = "/etc/perltidyrc";
2975        return $config_file if $exists_config_file->($config_file);
2976    }
2977
2978    # Couldn't find a config file
2979    return;
2980}
2981
2982sub Win_Config_Locs {
2983
2984    # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2985    # or undef if its not a win32 OS.  In list context returns OS, System
2986    # Directory, and All Users Directory.  All Users will be empty on a
2987    # 9x/Me box.  Contributed by: Yves Orton.
2988
2989    my $rpending_complaint = shift;
2990    my $os = (@_) ? shift : Win_OS_Type();
2991    return unless $os;
2992
2993    my $system   = "";
2994    my $allusers = "";
2995
2996    if ( $os =~ /9[58]|Me/ ) {
2997        $system = "C:/Windows";
2998    }
2999    elsif ( $os =~ /NT|XP|200?/ ) {
3000        $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
3001        $allusers =
3002          ( $os =~ /NT/ )
3003          ? "C:/WinNT/profiles/All Users/"
3004          : "C:/Documents and Settings/All Users/";
3005    }
3006    else {
3007
3008        # This currently would only happen on a win32s computer.  I dont have
3009        # one to test, so I am unsure how to proceed.  Suggestions welcome!
3010        $$rpending_complaint .=
3011"I dont know a sensible place to look for config files on an $os system.\n";
3012        return;
3013    }
3014    return wantarray ? ( $os, $system, $allusers ) : $os;
3015}
3016
3017sub dump_config_file {
3018    my $fh                   = shift;
3019    my $config_file          = shift;
3020    my $rconfig_file_chatter = shift;
3021    print STDOUT "$$rconfig_file_chatter";
3022    if ($fh) {
3023        print STDOUT "# Dump of file: '$config_file'\n";
3024        while ( my $line = $fh->getline() ) { print STDOUT $line }
3025        eval { $fh->close() };
3026    }
3027    else {
3028        print STDOUT "# ...no config file found\n";
3029    }
3030}
3031
3032sub read_config_file {
3033
3034    my ( $fh, $config_file, $rexpansion ) = @_;
3035    my @config_list = ();
3036    my $saw_pbp;
3037
3038    # file is bad if non-empty $death_message is returned
3039    my $death_message = "";
3040
3041    my $name = undef;
3042    my $line_no;
3043    while ( my $line = $fh->getline() ) {
3044        $line_no++;
3045        chomp $line;
3046        ( $line, $death_message ) =
3047          strip_comment( $line, $config_file, $line_no );
3048        last if ($death_message);
3049        next unless $line;
3050        $line =~ s/^\s*(.*?)\s*$/$1/;    # trim both ends
3051        next unless $line;
3052
3053        # look for something of the general form
3054        #    newname { body }
3055        # or just
3056        #    body
3057
3058        my $body = $line;
3059        my ($newname);
3060        if ( $line =~ /^((\w+)\s*\{)(.*)\}$/ ) {
3061            ( $newname, $body ) = ( $2, $3, );
3062        }
3063        if ($body) {
3064
3065            if ( !$saw_pbp && $body =~ /-(pbp|perl-best-practices)/ ) {
3066                $saw_pbp = 1;
3067            }
3068
3069            # handle a new alias definition
3070            if ($newname) {
3071                if ($name) {
3072                    $death_message =
3073"No '}' seen after $name and before $newname in config file $config_file line $.\n";
3074                    last;
3075                }
3076                $name = $newname;
3077
3078                if ( ${$rexpansion}{$name} ) {
3079                    local $" = ')(';
3080                    my @names = sort keys %$rexpansion;
3081                    $death_message =
3082                        "Here is a list of all installed aliases\n(@names)\n"
3083                      . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
3084                    last;
3085                }
3086                ${$rexpansion}{$name} = [];
3087            }
3088
3089            # now do the body
3090            if ($body) {
3091
3092                my ( $rbody_parts, $msg ) = parse_args($body);
3093                if ($msg) {
3094                    $death_message = <<EOM;
3095Error reading file '$config_file' at line number $line_no.
3096$msg
3097Please fix this line or use -npro to avoid reading this file
3098EOM
3099                    last;
3100                }
3101
3102                if ($name) {
3103
3104                    # remove leading dashes if this is an alias
3105                    foreach (@$rbody_parts) { s/^\-+//; }
3106                    push @{ ${$rexpansion}{$name} }, @$rbody_parts;
3107                }
3108                else {
3109                    push( @config_list, @$rbody_parts );
3110                }
3111            }
3112        }
3113    }
3114    eval { $fh->close() };
3115    return ( \@config_list, $death_message, $saw_pbp );
3116}
3117
3118sub strip_comment {
3119
3120    # Strip any comment from a command line
3121    my ( $instr, $config_file, $line_no ) = @_;
3122    my $msg = "";
3123
3124    # check for full-line comment
3125    if ( $instr =~ /^\s*#/ ) {
3126        return ( "", $msg );
3127    }
3128
3129    # nothing to do if no comments
3130    if ( $instr !~ /#/ ) {
3131        return ( $instr, $msg );
3132    }
3133
3134    # handle case of no quotes
3135    elsif ( $instr !~ /['"]/ ) {
3136
3137        # We now require a space before the # of a side comment
3138        # this allows something like:
3139        #    -sbcp=#
3140        # Otherwise, it would have to be quoted:
3141        #    -sbcp='#'
3142        $instr =~ s/\s+\#.*$//;
3143        return ( $instr, $msg );
3144    }
3145
3146    # handle comments and quotes
3147    my $outstr     = "";
3148    my $quote_char = "";
3149    while (1) {
3150
3151        # looking for ending quote character
3152        if ($quote_char) {
3153            if ( $instr =~ /\G($quote_char)/gc ) {
3154                $quote_char = "";
3155                $outstr .= $1;
3156            }
3157            elsif ( $instr =~ /\G(.)/gc ) {
3158                $outstr .= $1;
3159            }
3160
3161            # error..we reached the end without seeing the ending quote char
3162            else {
3163                $msg = <<EOM;
3164Error reading file $config_file at line number $line_no.
3165Did not see ending quote character <$quote_char> in this text:
3166$instr
3167Please fix this line or use -npro to avoid reading this file
3168EOM
3169                last;
3170            }
3171        }
3172
3173        # accumulating characters and looking for start of a quoted string
3174        else {
3175            if ( $instr =~ /\G([\"\'])/gc ) {
3176                $outstr .= $1;
3177                $quote_char = $1;
3178            }
3179
3180            # Note: not yet enforcing the space-before-hash rule for side
3181            # comments if the parameter is quoted.
3182            elsif ( $instr =~ /\G#/gc ) {
3183                last;
3184            }
3185            elsif ( $instr =~ /\G(.)/gc ) {
3186                $outstr .= $1;
3187            }
3188            else {
3189                last;
3190            }
3191        }
3192    }
3193    return ( $outstr, $msg );
3194}
3195
3196sub parse_args {
3197
3198    # Parse a command string containing multiple string with possible
3199    # quotes, into individual commands.  It might look like this, for example:
3200    #
3201    #    -wba=" + - "  -some-thing -wbb='. && ||'
3202    #
3203    # There is no need, at present, to handle escaped quote characters.
3204    # (They are not perltidy tokens, so needn't be in strings).
3205
3206    my ($body)     = @_;
3207    my @body_parts = ();
3208    my $quote_char = "";
3209    my $part       = "";
3210    my $msg        = "";
3211    while (1) {
3212
3213        # looking for ending quote character
3214        if ($quote_char) {
3215            if ( $body =~ /\G($quote_char)/gc ) {
3216                $quote_char = "";
3217            }
3218            elsif ( $body =~ /\G(.)/gc ) {
3219                $part .= $1;
3220            }
3221
3222            # error..we reached the end without seeing the ending quote char
3223            else {
3224                if ( length($part) ) { push @body_parts, $part; }
3225                $msg = <<EOM;
3226Did not see ending quote character <$quote_char> in this text:
3227$body
3228EOM
3229                last;
3230            }
3231        }
3232
3233        # accumulating characters and looking for start of a quoted string
3234        else {
3235            if ( $body =~ /\G([\"\'])/gc ) {
3236                $quote_char = $1;
3237            }
3238            elsif ( $body =~ /\G(\s+)/gc ) {
3239                if ( length($part) ) { push @body_parts, $part; }
3240                $part = "";
3241            }
3242            elsif ( $body =~ /\G(.)/gc ) {
3243                $part .= $1;
3244            }
3245            else {
3246                if ( length($part) ) { push @body_parts, $part; }
3247                last;
3248            }
3249        }
3250    }
3251    return ( \@body_parts, $msg );
3252}
3253
3254sub dump_long_names {
3255
3256    my @names = sort @_;
3257    print STDOUT <<EOM;
3258# Command line long names (passed to GetOptions)
3259#---------------------------------------------------------------
3260# here is a summary of the Getopt codes:
3261# <none> does not take an argument
3262# =s takes a mandatory string
3263# :s takes an optional string
3264# =i takes a mandatory integer
3265# :i takes an optional integer
3266# ! does not take an argument and may be negated
3267#  i.e., -foo and -nofoo are allowed
3268# a double dash signals the end of the options list
3269#
3270#---------------------------------------------------------------
3271EOM
3272
3273    foreach (@names) { print STDOUT "$_\n" }
3274}
3275
3276sub dump_defaults {
3277    my @defaults = sort @_;
3278    print STDOUT "Default command line options:\n";
3279    foreach (@_) { print STDOUT "$_\n" }
3280}
3281
3282sub readable_options {
3283
3284    # return options for this run as a string which could be
3285    # put in a perltidyrc file
3286    my ( $rOpts, $roption_string ) = @_;
3287    my %Getopt_flags;
3288    my $rGetopt_flags    = \%Getopt_flags;
3289    my $readable_options = "# Final parameter set for this run.\n";
3290    $readable_options .=
3291      "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
3292    foreach my $opt ( @{$roption_string} ) {
3293        my $flag = "";
3294        if ( $opt =~ /(.*)(!|=.*)$/ ) {
3295            $opt  = $1;
3296            $flag = $2;
3297        }
3298        if ( defined( $rOpts->{$opt} ) ) {
3299            $rGetopt_flags->{$opt} = $flag;
3300        }
3301    }
3302    foreach my $key ( sort keys %{$rOpts} ) {
3303        my $flag   = $rGetopt_flags->{$key};
3304        my $value  = $rOpts->{$key};
3305        my $prefix = '--';
3306        my $suffix = "";
3307        if ($flag) {
3308            if ( $flag =~ /^=/ ) {
3309                if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
3310                $suffix = "=" . $value;
3311            }
3312            elsif ( $flag =~ /^!/ ) {
3313                $prefix .= "no" unless ($value);
3314            }
3315            else {
3316
3317                # shouldn't happen
3318                $readable_options .=
3319                  "# ERROR in dump_options: unrecognized flag $flag for $key\n";
3320            }
3321        }
3322        $readable_options .= $prefix . $key . $suffix . "\n";
3323    }
3324    return $readable_options;
3325}
3326
3327sub show_version {
3328    print STDOUT <<"EOM";
3329This is perltidy, v$VERSION
3330
3331Copyright 2000-2012, Steve Hancock
3332
3333Perltidy is free software and may be copied under the terms of the GNU
3334General Public License, which is included in the distribution files.
3335
3336Complete documentation for perltidy can be found using 'man perltidy'
3337or on the internet at http://perltidy.sourceforge.net.
3338EOM
3339}
3340
3341sub usage {
3342
3343    print STDOUT <<EOF;
3344This is perltidy version $VERSION, a perl script indenter.  Usage:
3345
3346    perltidy [ options ] file1 file2 file3 ...
3347            (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
3348    perltidy [ options ] file1 -o outfile
3349    perltidy [ options ] file1 -st >outfile
3350    perltidy [ options ] <infile >outfile
3351
3352Options have short and long forms. Short forms are shown; see
3353man pages for long forms.  Note: '=s' indicates a required string,
3354and '=n' indicates a required integer.
3355
3356I/O control
3357 -h      show this help
3358 -o=file name of the output file (only if single input file)
3359 -oext=s change output extension from 'tdy' to s
3360 -opath=path  change path to be 'path' for output files
3361 -b      backup original to .bak and modify file in-place
3362 -bext=s change default backup extension from 'bak' to s
3363 -q      deactivate error messages (for running under editor)
3364 -w      include non-critical warning messages in the .ERR error output
3365 -syn    run perl -c to check syntax (default under unix systems)
3366 -log    save .LOG file, which has useful diagnostics
3367 -f      force perltidy to read a binary file
3368 -g      like -log but writes more detailed .LOG file, for debugging scripts
3369 -opt    write the set of options actually used to a .LOG file
3370 -npro   ignore .perltidyrc configuration command file
3371 -pro=file   read configuration commands from file instead of .perltidyrc
3372 -st     send output to standard output, STDOUT
3373 -se     send all error output to standard error output, STDERR
3374 -v      display version number to standard output and quit
3375
3376Basic Options:
3377 -i=n    use n columns per indentation level (default n=4)
3378 -t      tabs: use one tab character per indentation level, not recommeded
3379 -nt     no tabs: use n spaces per indentation level (default)
3380 -et=n   entab leading whitespace n spaces per tab; not recommended
3381 -io     "indent only": just do indentation, no other formatting.
3382 -sil=n  set starting indentation level to n;  use if auto detection fails
3383 -ole=s  specify output line ending (s=dos or win, mac, unix)
3384 -ple    keep output line endings same as input (input must be filename)
3385
3386Whitespace Control
3387 -fws    freeze whitespace; this disables all whitespace changes
3388           and disables the following switches:
3389 -bt=n   sets brace tightness,  n= (0 = loose, 1=default, 2 = tight)
3390 -bbt    same as -bt but for code block braces; same as -bt if not given
3391 -bbvt   block braces vertically tight; use with -bl or -bli
3392 -bbvtl=s  make -bbvt to apply to selected list of block types
3393 -pt=n   paren tightness (n=0, 1 or 2)
3394 -sbt=n  square bracket tightness (n=0, 1, or 2)
3395 -bvt=n  brace vertical tightness,
3396         n=(0=open, 1=close unless multiple steps on a line, 2=always close)
3397 -pvt=n  paren vertical tightness (see -bvt for n)
3398 -sbvt=n square bracket vertical tightness (see -bvt for n)
3399 -bvtc=n closing brace vertical tightness:
3400         n=(0=open, 1=sometimes close, 2=always close)
3401 -pvtc=n closing paren vertical tightness, see -bvtc for n.
3402 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
3403 -ci=n   sets continuation indentation=n,  default is n=2 spaces
3404 -lp     line up parentheses, brackets, and non-BLOCK braces
3405 -sfs    add space before semicolon in for( ; ; )
3406 -aws    allow perltidy to add whitespace (default)
3407 -dws    delete all old non-essential whitespace
3408 -icb    indent closing brace of a code block
3409 -cti=n  closing indentation of paren, square bracket, or non-block brace:
3410         n=0 none, =1 align with opening, =2 one full indentation level
3411 -icp    equivalent to -cti=2
3412 -wls=s  want space left of tokens in string; i.e. -nwls='+ - * /'
3413 -wrs=s  want space right of tokens in string;
3414 -sts    put space before terminal semicolon of a statement
3415 -sak=s  put space between keywords given in s and '(';
3416 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
3417
3418Line Break Control
3419 -fnl    freeze newlines; this disables all line break changes
3420            and disables the following switches:
3421 -anl    add newlines;  ok to introduce new line breaks
3422 -bbs    add blank line before subs and packages
3423 -bbc    add blank line before block comments
3424 -bbb    add blank line between major blocks
3425 -kbl=n  keep old blank lines? 0=no, 1=some, 2=all
3426 -mbl=n  maximum consecutive blank lines to output (default=1)
3427 -ce     cuddled else; use this style: '} else {'
3428 -dnl    delete old newlines (default)
3429 -l=n    maximum line length;  default n=80
3430 -bl     opening brace on new line
3431 -sbl    opening sub brace on new line.  value of -bl is used if not given.
3432 -bli    opening brace on new line and indented
3433 -bar    opening brace always on right, even for long clauses
3434 -vt=n   vertical tightness (requires -lp); n controls break after opening
3435         token: 0=never  1=no break if next line balanced   2=no break
3436 -vtc=n  vertical tightness of closing container; n controls if closing
3437         token starts new line: 0=always  1=not unless list  1=never
3438 -wba=s  want break after tokens in string; i.e. wba=': .'
3439 -wbb=s  want break before tokens in string
3440
3441Following Old Breakpoints
3442 -kis    keep interior semicolons.  Allows multiple statements per line.
3443 -boc    break at old comma breaks: turns off all automatic list formatting
3444 -bol    break at old logical breakpoints: or, and, ||, && (default)
3445 -bok    break at old list keyword breakpoints such as map, sort (default)
3446 -bot    break at old conditional (ternary ?:) operator breakpoints (default)
3447 -boa    break at old attribute breakpoints
3448 -cab=n  break at commas after a comma-arrow (=>):
3449         n=0 break at all commas after =>
3450         n=1 stable: break unless this breaks an existing one-line container
3451         n=2 break only if a one-line container cannot be formed
3452         n=3 do not treat commas after => specially at all
3453
3454Comment controls
3455 -ibc    indent block comments (default)
3456 -isbc   indent spaced block comments; may indent unless no leading space
3457 -msc=n  minimum desired spaces to side comment, default 4
3458 -fpsc=n fix position for side comments; default 0;
3459 -csc    add or update closing side comments after closing BLOCK brace
3460 -dcsc   delete closing side comments created by a -csc command
3461 -cscp=s change closing side comment prefix to be other than '## end'
3462 -cscl=s change closing side comment to apply to selected list of blocks
3463 -csci=n minimum number of lines needed to apply a -csc tag, default n=6
3464 -csct=n maximum number of columns of appended text, default n=20
3465 -cscw   causes warning if old side comment is overwritten with -csc
3466
3467 -sbc    use 'static block comments' identified by leading '##' (default)
3468 -sbcp=s change static block comment identifier to be other than '##'
3469 -osbc   outdent static block comments
3470
3471 -ssc    use 'static side comments' identified by leading '##' (default)
3472 -sscp=s change static side comment identifier to be other than '##'
3473
3474Delete selected text
3475 -dac    delete all comments AND pod
3476 -dbc    delete block comments
3477 -dsc    delete side comments
3478 -dp     delete pod
3479
3480Send selected text to a '.TEE' file
3481 -tac    tee all comments AND pod
3482 -tbc    tee block comments
3483 -tsc    tee side comments
3484 -tp     tee pod
3485
3486Outdenting
3487 -olq    outdent long quoted strings (default)
3488 -olc    outdent a long block comment line
3489 -ola    outdent statement labels
3490 -okw    outdent control keywords (redo, next, last, goto, return)
3491 -okwl=s specify alternative keywords for -okw command
3492
3493Other controls
3494 -mft=n  maximum fields per table; default n=40
3495 -x      do not format lines before hash-bang line (i.e., for VMS)
3496 -asc    allows perltidy to add a ';' when missing (default)
3497 -dsm    allows perltidy to delete an unnecessary ';'  (default)
3498
3499Combinations of other parameters
3500 -gnu     attempt to follow GNU Coding Standards as applied to perl
3501 -mangle  remove as many newlines as possible (but keep comments and pods)
3502 -extrude  insert as many newlines as possible
3503
3504Dump and die, debugging
3505 -dop    dump options used in this run to standard output and quit
3506 -ddf    dump default options to standard output and quit
3507 -dsn    dump all option short names to standard output and quit
3508 -dln    dump option long names to standard output and quit
3509 -dpro   dump whatever configuration file is in effect to standard output
3510 -dtt    dump all token types to standard output and quit
3511
3512HTML
3513 -html write an html file (see 'man perl2web' for many options)
3514       Note: when -html is used, no indentation or formatting are done.
3515       Hint: try perltidy -html -css=mystyle.css filename.pl
3516       and edit mystyle.css to change the appearance of filename.html.
3517       -nnn gives line numbers
3518       -pre only writes out <pre>..</pre> code section
3519       -toc places a table of contents to subs at the top (default)
3520       -pod passes pod text through pod2html (default)
3521       -frm write html as a frame (3 files)
3522       -text=s extra extension for table of contents if -frm, default='toc'
3523       -sext=s extra extension for file content if -frm, default='src'
3524
3525A prefix of "n" negates short form toggle switches, and a prefix of "no"
3526negates the long forms.  For example, -nasc means don't add missing
3527semicolons.
3528
3529If you are unable to see this entire text, try "perltidy -h | more"
3530For more detailed information, and additional options, try "man perltidy",
3531or go to the perltidy home page at http://perltidy.sourceforge.net
3532EOF
3533
3534}
3535
3536sub process_this_file {
3537
3538    my ( $truth, $beauty ) = @_;
3539
3540    # loop to process each line of this file
3541    while ( my $line_of_tokens = $truth->get_line() ) {
3542        $beauty->write_line($line_of_tokens);
3543    }
3544
3545    # finish up
3546    eval { $beauty->finish_formatting() };
3547    $truth->report_tokenization_errors();
3548}
3549
3550sub check_syntax {
3551
3552    # Use 'perl -c' to make sure that we did not create bad syntax
3553    # This is a very good independent check for programming errors
3554    #
3555    # Given names of the input and output files, ($istream, $ostream),
3556    # we do the following:
3557    # - check syntax of the input file
3558    # - if bad, all done (could be an incomplete code snippet)
3559    # - if infile syntax ok, then check syntax of the output file;
3560    #   - if outfile syntax bad, issue warning; this implies a code bug!
3561    # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3562
3563    my ( $istream, $ostream, $logger_object, $rOpts ) = @_;
3564    my $infile_syntax_ok = 0;
3565    my $line_of_dashes   = '-' x 42 . "\n";
3566
3567    my $flags = $rOpts->{'perl-syntax-check-flags'};
3568
3569    # be sure we invoke perl with -c
3570    # note: perl will accept repeated flags like '-c -c'.  It is safest
3571    # to append another -c than try to find an interior bundled c, as
3572    # in -Tc, because such a 'c' might be in a quoted string, for example.
3573    if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3574
3575    # be sure we invoke perl with -x if requested
3576    # same comments about repeated parameters applies
3577    if ( $rOpts->{'look-for-hash-bang'} ) {
3578        if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3579    }
3580
3581    # this shouldn't happen unless a termporary file couldn't be made
3582    if ( $istream eq '-' ) {
3583        $logger_object->write_logfile_entry(
3584            "Cannot run perl -c on STDIN and STDOUT\n");
3585        return $infile_syntax_ok;
3586    }
3587
3588    $logger_object->write_logfile_entry(
3589        "checking input file syntax with perl $flags\n");
3590
3591    # Not all operating systems/shells support redirection of the standard
3592    # error output.
3593    my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3594
3595    my ( $istream_filename, $perl_output ) =
3596      do_syntax_check( $istream, $flags, $error_redirection );
3597    $logger_object->write_logfile_entry(
3598        "Input stream passed to Perl as file $istream_filename\n");
3599    $logger_object->write_logfile_entry($line_of_dashes);
3600    $logger_object->write_logfile_entry("$perl_output\n");
3601
3602    if ( $perl_output =~ /syntax\s*OK/ ) {
3603        $infile_syntax_ok = 1;
3604        $logger_object->write_logfile_entry($line_of_dashes);
3605        $logger_object->write_logfile_entry(
3606            "checking output file syntax with perl $flags ...\n");
3607        my ( $ostream_filename, $perl_output ) =
3608          do_syntax_check( $ostream, $flags, $error_redirection );
3609        $logger_object->write_logfile_entry(
3610            "Output stream passed to Perl as file $ostream_filename\n");
3611        $logger_object->write_logfile_entry($line_of_dashes);
3612        $logger_object->write_logfile_entry("$perl_output\n");
3613
3614        unless ( $perl_output =~ /syntax\s*OK/ ) {
3615            $logger_object->write_logfile_entry($line_of_dashes);
3616            $logger_object->warning(
3617"The output file has a syntax error when tested with perl $flags $ostream !\n"
3618            );
3619            $logger_object->warning(
3620                "This implies an error in perltidy; the file $ostream is bad\n"
3621            );
3622            $logger_object->report_definite_bug();
3623
3624            # the perl version number will be helpful for diagnosing the problem
3625            $logger_object->write_logfile_entry(
3626                qx/perl -v $error_redirection/ . "\n" );
3627        }
3628    }
3629    else {
3630
3631        # Only warn of perl -c syntax errors.  Other messages,
3632        # such as missing modules, are too common.  They can be
3633        # seen by running with perltidy -w
3634        $logger_object->complain("A syntax check using perl $flags\n");
3635        $logger_object->complain(
3636            "for the output in file $istream_filename gives:\n");
3637        $logger_object->complain($line_of_dashes);
3638        $logger_object->complain("$perl_output\n");
3639        $logger_object->complain($line_of_dashes);
3640        $infile_syntax_ok = -1;
3641        $logger_object->write_logfile_entry($line_of_dashes);
3642        $logger_object->write_logfile_entry(
3643"The output file will not be checked because of input file problems\n"
3644        );
3645    }
3646    return $infile_syntax_ok;
3647}
3648
3649sub do_syntax_check {
3650    my ( $stream, $flags, $error_redirection ) = @_;
3651
3652    # We need a named input file for executing perl
3653    my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream);
3654
3655    # TODO: Need to add name of file to log somewhere
3656    # otherwise Perl output is hard to read
3657    if ( !$stream_filename ) { return $stream_filename, "" }
3658
3659    # We have to quote the filename in case it has unusual characters
3660    # or spaces.  Example: this filename #CM11.pm# gives trouble.
3661    my $quoted_stream_filename = '"' . $stream_filename . '"';
3662
3663    # Under VMS something like -T will become -t (and an error) so we
3664    # will put quotes around the flags.  Double quotes seem to work on
3665    # Unix/Windows/VMS, but this may not work on all systems.  (Single
3666    # quotes do not work under Windows).  It could become necessary to
3667    # put double quotes around each flag, such as:  -"c"  -"T"
3668    # We may eventually need some system-dependent coding here.
3669    $flags = '"' . $flags . '"';
3670
3671    # now wish for luck...
3672    my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/;
3673
3674    unlink $stream_filename if ($is_tmpfile);
3675    return $stream_filename, $msg;
3676}
3677
3678#####################################################################
3679#
3680# This is a stripped down version of IO::Scalar
3681# Given a reference to a scalar, it supplies either:
3682# a getline method which reads lines (mode='r'), or
3683# a print method which reads lines (mode='w')
3684#
3685#####################################################################
3686package Perl::Tidy::IOScalar;
3687use Carp;
3688
3689sub new {
3690    my ( $package, $rscalar, $mode ) = @_;
3691    my $ref = ref $rscalar;
3692    if ( $ref ne 'SCALAR' ) {
3693        confess <<EOM;
3694------------------------------------------------------------------------
3695expecting ref to SCALAR but got ref to ($ref); trace follows:
3696------------------------------------------------------------------------
3697EOM
3698
3699    }
3700    if ( $mode eq 'w' ) {
3701        $$rscalar = "";
3702        return bless [ $rscalar, $mode ], $package;
3703    }
3704    elsif ( $mode eq 'r' ) {
3705
3706        # Convert a scalar to an array.
3707        # This avoids looking for "\n" on each call to getline
3708        #
3709        # NOTES: The -1 count is needed to avoid loss of trailing blank lines
3710        # (which might be important in a DATA section).
3711        my @array;
3712        if ( $rscalar && ${$rscalar} ) {
3713            @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1;
3714
3715            # remove possible extra blank line introduced with split
3716            if ( @array && $array[-1] eq "\n" ) { pop @array }
3717        }
3718        my $i_next = 0;
3719        return bless [ \@array, $mode, $i_next ], $package;
3720    }
3721    else {
3722        confess <<EOM;
3723------------------------------------------------------------------------
3724expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3725------------------------------------------------------------------------
3726EOM
3727    }
3728}
3729
3730sub getline {
3731    my $self = shift;
3732    my $mode = $self->[1];
3733    if ( $mode ne 'r' ) {
3734        confess <<EOM;
3735------------------------------------------------------------------------
3736getline call requires mode = 'r' but mode = ($mode); trace follows:
3737------------------------------------------------------------------------
3738EOM
3739    }
3740    my $i = $self->[2]++;
3741    return $self->[0]->[$i];
3742}
3743
3744sub print {
3745    my $self = shift;
3746    my $mode = $self->[1];
3747    if ( $mode ne 'w' ) {
3748        confess <<EOM;
3749------------------------------------------------------------------------
3750print call requires mode = 'w' but mode = ($mode); trace follows:
3751------------------------------------------------------------------------
3752EOM
3753    }
3754    ${ $self->[0] } .= $_[0];
3755}
3756sub close { return }
3757
3758#####################################################################
3759#
3760# This is a stripped down version of IO::ScalarArray
3761# Given a reference to an array, it supplies either:
3762# a getline method which reads lines (mode='r'), or
3763# a print method which reads lines (mode='w')
3764#
3765# NOTE: this routine assumes that that there aren't any embedded
3766# newlines within any of the array elements.  There are no checks
3767# for that.
3768#
3769#####################################################################
3770package Perl::Tidy::IOScalarArray;
3771use Carp;
3772
3773sub new {
3774    my ( $package, $rarray, $mode ) = @_;
3775    my $ref = ref $rarray;
3776    if ( $ref ne 'ARRAY' ) {
3777        confess <<EOM;
3778------------------------------------------------------------------------
3779expecting ref to ARRAY but got ref to ($ref); trace follows:
3780------------------------------------------------------------------------
3781EOM
3782
3783    }
3784    if ( $mode eq 'w' ) {
3785        @$rarray = ();
3786        return bless [ $rarray, $mode ], $package;
3787    }
3788    elsif ( $mode eq 'r' ) {
3789        my $i_next = 0;
3790        return bless [ $rarray, $mode, $i_next ], $package;
3791    }
3792    else {
3793        confess <<EOM;
3794------------------------------------------------------------------------
3795expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3796------------------------------------------------------------------------
3797EOM
3798    }
3799}
3800
3801sub getline {
3802    my $self = shift;
3803    my $mode = $self->[1];
3804    if ( $mode ne 'r' ) {
3805        confess <<EOM;
3806------------------------------------------------------------------------
3807getline requires mode = 'r' but mode = ($mode); trace follows:
3808------------------------------------------------------------------------
3809EOM
3810    }
3811    my $i = $self->[2]++;
3812    return $self->[0]->[$i];
3813}
3814
3815sub print {
3816    my $self = shift;
3817    my $mode = $self->[1];
3818    if ( $mode ne 'w' ) {
3819        confess <<EOM;
3820------------------------------------------------------------------------
3821print requires mode = 'w' but mode = ($mode); trace follows:
3822------------------------------------------------------------------------
3823EOM
3824    }
3825    push @{ $self->[0] }, $_[0];
3826}
3827sub close { return }
3828
3829#####################################################################
3830#
3831# the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3832# which returns the next line to be parsed
3833#
3834#####################################################################
3835
3836package Perl::Tidy::LineSource;
3837
3838sub new {
3839
3840    my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3841
3842    my $input_line_ending;
3843    if ( $rOpts->{'preserve-line-endings'} ) {
3844        $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3845    }
3846
3847    ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3848    return undef unless $fh;
3849
3850    # in order to check output syntax when standard output is used,
3851    # or when it is an object, we have to make a copy of the file
3852    if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3853    {
3854
3855        # Turning off syntax check when input output is used.
3856        # The reason is that temporary files cause problems on
3857        # on many systems.
3858        $rOpts->{'check-syntax'} = 0;
3859
3860        $$rpending_logfile_message .= <<EOM;
3861Note: --syntax check will be skipped because standard input is used
3862EOM
3863
3864    }
3865
3866    return bless {
3867        _fh                => $fh,
3868        _filename          => $input_file,
3869        _input_line_ending => $input_line_ending,
3870        _rinput_buffer     => [],
3871        _started           => 0,
3872    }, $class;
3873}
3874
3875sub close_input_file {
3876    my $self = shift;
3877
3878    # Only close physical files, not STDIN and other objects
3879    my $filename = $self->{_filename};
3880    if ( $filename ne '-' && !ref $filename ) {
3881        eval { $self->{_fh}->close() };
3882    }
3883}
3884
3885sub get_line {
3886    my $self          = shift;
3887    my $line          = undef;
3888    my $fh            = $self->{_fh};
3889    my $rinput_buffer = $self->{_rinput_buffer};
3890
3891    if ( scalar(@$rinput_buffer) ) {
3892        $line = shift @$rinput_buffer;
3893    }
3894    else {
3895        $line = $fh->getline();
3896
3897        # patch to read raw mac files under unix, dos
3898        # see if the first line has embedded \r's
3899        if ( $line && !$self->{_started} ) {
3900            if ( $line =~ /[\015][^\015\012]/ ) {
3901
3902                # found one -- break the line up and store in a buffer
3903                @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3904                my $count = @$rinput_buffer;
3905                $line = shift @$rinput_buffer;
3906            }
3907            $self->{_started}++;
3908        }
3909    }
3910    return $line;
3911}
3912
3913#####################################################################
3914#
3915# the Perl::Tidy::LineSink class supplies a write_line method for
3916# actual file writing
3917#
3918#####################################################################
3919
3920package Perl::Tidy::LineSink;
3921
3922sub new {
3923
3924    my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3925        $rpending_logfile_message, $binmode )
3926      = @_;
3927    my $fh     = undef;
3928    my $fh_tee = undef;
3929
3930    my $output_file_open = 0;
3931
3932    if ( $rOpts->{'format'} eq 'tidy' ) {
3933        ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3934        unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; }
3935        $output_file_open = 1;
3936        if ($binmode) {
3937            if ( ref($fh) eq 'IO::File' ) {
3938                binmode $fh;
3939            }
3940            if ( $output_file eq '-' ) { binmode STDOUT }
3941        }
3942    }
3943
3944    # in order to check output syntax when standard output is used,
3945    # or when it is an object, we have to make a copy of the file
3946    if ( $output_file eq '-' || ref $output_file ) {
3947        if ( $rOpts->{'check-syntax'} ) {
3948
3949            # Turning off syntax check when standard output is used.
3950            # The reason is that temporary files cause problems on
3951            # on many systems.
3952            $rOpts->{'check-syntax'} = 0;
3953            $$rpending_logfile_message .= <<EOM;
3954Note: --syntax check will be skipped because standard output is used
3955EOM
3956
3957        }
3958    }
3959
3960    bless {
3961        _fh               => $fh,
3962        _fh_tee           => $fh_tee,
3963        _output_file      => $output_file,
3964        _output_file_open => $output_file_open,
3965        _tee_flag         => 0,
3966        _tee_file         => $tee_file,
3967        _tee_file_opened  => 0,
3968        _line_separator   => $line_separator,
3969        _binmode          => $binmode,
3970    }, $class;
3971}
3972
3973sub write_line {
3974
3975    my $self = shift;
3976    my $fh   = $self->{_fh};
3977
3978    my $output_file_open = $self->{_output_file_open};
3979    chomp $_[0];
3980    $_[0] .= $self->{_line_separator};
3981
3982    $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3983
3984    if ( $self->{_tee_flag} ) {
3985        unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3986        my $fh_tee = $self->{_fh_tee};
3987        print $fh_tee $_[0];
3988    }
3989}
3990
3991sub tee_on {
3992    my $self = shift;
3993    $self->{_tee_flag} = 1;
3994}
3995
3996sub tee_off {
3997    my $self = shift;
3998    $self->{_tee_flag} = 0;
3999}
4000
4001sub really_open_tee_file {
4002    my $self     = shift;
4003    my $tee_file = $self->{_tee_file};
4004    my $fh_tee;
4005    $fh_tee = IO::File->new(">$tee_file")
4006      or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n");
4007    binmode $fh_tee if $self->{_binmode};
4008    $self->{_tee_file_opened} = 1;
4009    $self->{_fh_tee}          = $fh_tee;
4010}
4011
4012sub close_output_file {
4013    my $self = shift;
4014
4015    # Only close physical files, not STDOUT and other objects
4016    my $output_file = $self->{_output_file};
4017    if ( $output_file ne '-' && !ref $output_file ) {
4018        eval { $self->{_fh}->close() } if $self->{_output_file_open};
4019    }
4020    $self->close_tee_file();
4021}
4022
4023sub close_tee_file {
4024    my $self = shift;
4025
4026    # Only close physical files, not STDOUT and other objects
4027    if ( $self->{_tee_file_opened} ) {
4028        my $tee_file = $self->{_tee_file};
4029        if ( $tee_file ne '-' && !ref $tee_file ) {
4030            eval { $self->{_fh_tee}->close() };
4031            $self->{_tee_file_opened} = 0;
4032        }
4033    }
4034}
4035
4036#####################################################################
4037#
4038# The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
4039# useful for program development.
4040#
4041# Only one such file is created regardless of the number of input
4042# files processed.  This allows the results of processing many files
4043# to be summarized in a single file.
4044#
4045#####################################################################
4046
4047package Perl::Tidy::Diagnostics;
4048
4049sub new {
4050
4051    my $class = shift;
4052    bless {
4053        _write_diagnostics_count => 0,
4054        _last_diagnostic_file    => "",
4055        _input_file              => "",
4056        _fh                      => undef,
4057    }, $class;
4058}
4059
4060sub set_input_file {
4061    my $self = shift;
4062    $self->{_input_file} = $_[0];
4063}
4064
4065# This is a diagnostic routine which is useful for program development.
4066# Output from debug messages go to a file named DIAGNOSTICS, where
4067# they are labeled by file and line.  This allows many files to be
4068# scanned at once for some particular condition of interest.
4069sub write_diagnostics {
4070    my $self = shift;
4071
4072    unless ( $self->{_write_diagnostics_count} ) {
4073        open DIAGNOSTICS, ">DIAGNOSTICS"
4074          or death("couldn't open DIAGNOSTICS: $!\n");
4075    }
4076
4077    my $last_diagnostic_file = $self->{_last_diagnostic_file};
4078    my $input_file           = $self->{_input_file};
4079    if ( $last_diagnostic_file ne $input_file ) {
4080        print DIAGNOSTICS "\nFILE:$input_file\n";
4081    }
4082    $self->{_last_diagnostic_file} = $input_file;
4083    my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
4084    print DIAGNOSTICS "$input_line_number:\t@_";
4085    $self->{_write_diagnostics_count}++;
4086}
4087
4088#####################################################################
4089#
4090# The Perl::Tidy::Logger class writes the .LOG and .ERR files
4091#
4092#####################################################################
4093
4094package Perl::Tidy::Logger;
4095
4096sub new {
4097    my $class = shift;
4098    my $fh;
4099    my ( $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude, ) = @_;
4100
4101    my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef;
4102
4103    # remove any old error output file if we might write a new one
4104    unless ( $fh_warnings || ref($warning_file) ) {
4105        if ( -e $warning_file ) { unlink($warning_file) }
4106    }
4107
4108    bless {
4109        _log_file                      => $log_file,
4110        _rOpts                         => $rOpts,
4111        _fh_warnings                   => $fh_warnings,
4112        _last_input_line_written       => 0,
4113        _at_end_of_file                => 0,
4114        _use_prefix                    => 1,
4115        _block_log_output              => 0,
4116        _line_of_tokens                => undef,
4117        _output_line_number            => undef,
4118        _wrote_line_information_string => 0,
4119        _wrote_column_headings         => 0,
4120        _warning_file                  => $warning_file,
4121        _warning_count                 => 0,
4122        _complaint_count               => 0,
4123        _saw_code_bug    => -1,             # -1=no 0=maybe 1=for sure
4124        _saw_brace_error => 0,
4125        _saw_extrude     => $saw_extrude,
4126        _output_array    => [],
4127    }, $class;
4128}
4129
4130sub get_warning_count {
4131    my $self = shift;
4132    return $self->{_warning_count};
4133}
4134
4135sub get_use_prefix {
4136    my $self = shift;
4137    return $self->{_use_prefix};
4138}
4139
4140sub block_log_output {
4141    my $self = shift;
4142    $self->{_block_log_output} = 1;
4143}
4144
4145sub unblock_log_output {
4146    my $self = shift;
4147    $self->{_block_log_output} = 0;
4148}
4149
4150sub interrupt_logfile {
4151    my $self = shift;
4152    $self->{_use_prefix} = 0;
4153    $self->warning("\n");
4154    $self->write_logfile_entry( '#' x 24 . "  WARNING  " . '#' x 25 . "\n" );
4155}
4156
4157sub resume_logfile {
4158    my $self = shift;
4159    $self->write_logfile_entry( '#' x 60 . "\n" );
4160    $self->{_use_prefix} = 1;
4161}
4162
4163sub we_are_at_the_last_line {
4164    my $self = shift;
4165    unless ( $self->{_wrote_line_information_string} ) {
4166        $self->write_logfile_entry("Last line\n\n");
4167    }
4168    $self->{_at_end_of_file} = 1;
4169}
4170
4171# record some stuff in case we go down in flames
4172sub black_box {
4173    my $self = shift;
4174    my ( $line_of_tokens, $output_line_number ) = @_;
4175    my $input_line        = $line_of_tokens->{_line_text};
4176    my $input_line_number = $line_of_tokens->{_line_number};
4177
4178    # save line information in case we have to write a logfile message
4179    $self->{_line_of_tokens}                = $line_of_tokens;
4180    $self->{_output_line_number}            = $output_line_number;
4181    $self->{_wrote_line_information_string} = 0;
4182
4183    my $last_input_line_written = $self->{_last_input_line_written};
4184    my $rOpts                   = $self->{_rOpts};
4185    if (
4186        (
4187            ( $input_line_number - $last_input_line_written ) >=
4188            $rOpts->{'logfile-gap'}
4189        )
4190        || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
4191      )
4192    {
4193        my $rlevels                      = $line_of_tokens->{_rlevels};
4194        my $structural_indentation_level = $$rlevels[0];
4195        $self->{_last_input_line_written} = $input_line_number;
4196        ( my $out_str = $input_line ) =~ s/^\s*//;
4197        chomp $out_str;
4198
4199        $out_str = ( '.' x $structural_indentation_level ) . $out_str;
4200
4201        if ( length($out_str) > 35 ) {
4202            $out_str = substr( $out_str, 0, 35 ) . " ....";
4203        }
4204        $self->logfile_output( "", "$out_str\n" );
4205    }
4206}
4207
4208sub write_logfile_entry {
4209    my $self = shift;
4210
4211    # add leading >>> to avoid confusing error mesages and code
4212    $self->logfile_output( ">>>", "@_" );
4213}
4214
4215sub write_column_headings {
4216    my $self = shift;
4217
4218    $self->{_wrote_column_headings} = 1;
4219    my $routput_array = $self->{_output_array};
4220    push @{$routput_array}, <<EOM;
4221The nesting depths in the table below are at the start of the lines.
4222The indicated output line numbers are not always exact.
4223ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
4224
4225in:out indent c b  nesting   code + messages; (messages begin with >>>)
4226lines  levels i k            (code begins with one '.' per indent level)
4227------  ----- - - --------   -------------------------------------------
4228EOM
4229}
4230
4231sub make_line_information_string {
4232
4233    # make columns of information when a logfile message needs to go out
4234    my $self                    = shift;
4235    my $line_of_tokens          = $self->{_line_of_tokens};
4236    my $input_line_number       = $line_of_tokens->{_line_number};
4237    my $line_information_string = "";
4238    if ($input_line_number) {
4239
4240        my $output_line_number   = $self->{_output_line_number};
4241        my $brace_depth          = $line_of_tokens->{_curly_brace_depth};
4242        my $paren_depth          = $line_of_tokens->{_paren_depth};
4243        my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
4244        my $guessed_indentation_level =
4245          $line_of_tokens->{_guessed_indentation_level};
4246        my $rlevels         = $line_of_tokens->{_rlevels};
4247        my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
4248        my $rci_levels      = $line_of_tokens->{_rci_levels};
4249        my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
4250
4251        my $structural_indentation_level = $$rlevels[0];
4252
4253        $self->write_column_headings() unless $self->{_wrote_column_headings};
4254
4255        # keep logfile columns aligned for scripts up to 999 lines;
4256        # for longer scripts it doesn't really matter
4257        my $extra_space = "";
4258        $extra_space .=
4259            ( $input_line_number < 10 )  ? "  "
4260          : ( $input_line_number < 100 ) ? " "
4261          :                                "";
4262        $extra_space .=
4263            ( $output_line_number < 10 )  ? "  "
4264          : ( $output_line_number < 100 ) ? " "
4265          :                                 "";
4266
4267        # there are 2 possible nesting strings:
4268        # the original which looks like this:  (0 [1 {2
4269        # the new one, which looks like this:  {{[
4270        # the new one is easier to read, and shows the order, but
4271        # could be arbitrarily long, so we use it unless it is too long
4272        my $nesting_string =
4273          "($paren_depth [$square_bracket_depth {$brace_depth";
4274        my $nesting_string_new = $$rnesting_tokens[0];
4275
4276        my $ci_level = $$rci_levels[0];
4277        if ( $ci_level > 9 ) { $ci_level = '*' }
4278        my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
4279
4280        if ( length($nesting_string_new) <= 8 ) {
4281            $nesting_string =
4282              $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
4283        }
4284        $line_information_string =
4285"L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
4286    }
4287    return $line_information_string;
4288}
4289
4290sub logfile_output {
4291    my $self = shift;
4292    my ( $prompt, $msg ) = @_;
4293    return if ( $self->{_block_log_output} );
4294
4295    my $routput_array = $self->{_output_array};
4296    if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
4297        push @{$routput_array}, "$msg";
4298    }
4299    else {
4300        my $line_information_string = $self->make_line_information_string();
4301        $self->{_wrote_line_information_string} = 1;
4302
4303        if ($line_information_string) {
4304            push @{$routput_array}, "$line_information_string   $prompt$msg";
4305        }
4306        else {
4307            push @{$routput_array}, "$msg";
4308        }
4309    }
4310}
4311
4312sub get_saw_brace_error {
4313    my $self = shift;
4314    return $self->{_saw_brace_error};
4315}
4316
4317sub increment_brace_error {
4318    my $self = shift;
4319    $self->{_saw_brace_error}++;
4320}
4321
4322sub brace_warning {
4323    my $self = shift;
4324    use constant BRACE_WARNING_LIMIT => 10;
4325    my $saw_brace_error = $self->{_saw_brace_error};
4326
4327    if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
4328        $self->warning(@_);
4329    }
4330    $saw_brace_error++;
4331    $self->{_saw_brace_error} = $saw_brace_error;
4332
4333    if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
4334        $self->warning("No further warnings of this type will be given\n");
4335    }
4336}
4337
4338sub complain {
4339
4340    # handle non-critical warning messages based on input flag
4341    my $self  = shift;
4342    my $rOpts = $self->{_rOpts};
4343
4344    # these appear in .ERR output only if -w flag is used
4345    if ( $rOpts->{'warning-output'} ) {
4346        $self->warning(@_);
4347    }
4348
4349    # otherwise, they go to the .LOG file
4350    else {
4351        $self->{_complaint_count}++;
4352        $self->write_logfile_entry(@_);
4353    }
4354}
4355
4356sub warning {
4357
4358    # report errors to .ERR file (or stdout)
4359    my $self = shift;
4360    use constant WARNING_LIMIT => 50;
4361
4362    my $rOpts = $self->{_rOpts};
4363    unless ( $rOpts->{'quiet'} ) {
4364
4365        my $warning_count = $self->{_warning_count};
4366        my $fh_warnings   = $self->{_fh_warnings};
4367        if ( !$fh_warnings ) {
4368            my $warning_file = $self->{_warning_file};
4369            ( $fh_warnings, my $filename ) =
4370              Perl::Tidy::streamhandle( $warning_file, 'w' );
4371            $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n");
4372            Perl::Tidy::Warn "## Please see file $filename\n"
4373              unless ref($warning_file);
4374            $self->{_fh_warnings} = $fh_warnings;
4375        }
4376
4377        if ( $warning_count < WARNING_LIMIT ) {
4378            if ( $self->get_use_prefix() > 0 ) {
4379                my $input_line_number =
4380                  Perl::Tidy::Tokenizer::get_input_line_number();
4381                $fh_warnings->print("$input_line_number:\t@_");
4382                $self->write_logfile_entry("WARNING: @_");
4383            }
4384            else {
4385                $fh_warnings->print(@_);
4386                $self->write_logfile_entry(@_);
4387            }
4388        }
4389        $warning_count++;
4390        $self->{_warning_count} = $warning_count;
4391
4392        if ( $warning_count == WARNING_LIMIT ) {
4393            $fh_warnings->print("No further warnings will be given\n");
4394        }
4395    }
4396}
4397
4398# programming bug codes:
4399#   -1 = no bug
4400#    0 = maybe, not sure.
4401#    1 = definitely
4402sub report_possible_bug {
4403    my $self         = shift;
4404    my $saw_code_bug = $self->{_saw_code_bug};
4405    $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
4406}
4407
4408sub report_definite_bug {
4409    my $self = shift;
4410    $self->{_saw_code_bug} = 1;
4411}
4412
4413sub ask_user_for_bug_report {
4414    my $self = shift;
4415
4416    my ( $infile_syntax_ok, $formatter ) = @_;
4417    my $saw_code_bug = $self->{_saw_code_bug};
4418    if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
4419        $self->warning(<<EOM);
4420
4421You may have encountered a code bug in perltidy.  If you think so, and
4422the problem is not listed in the BUGS file at
4423http://perltidy.sourceforge.net, please report it so that it can be
4424corrected.  Include the smallest possible script which has the problem,
4425along with the .LOG file. See the manual pages for contact information.
4426Thank you!
4427EOM
4428
4429    }
4430    elsif ( $saw_code_bug == 1 ) {
4431        if ( $self->{_saw_extrude} ) {
4432            $self->warning(<<EOM);
4433
4434You may have encountered a bug in perltidy.  However, since you are using the
4435-extrude option, the problem may be with perl or one of its modules, which have
4436occasional problems with this type of file.  If you believe that the
4437problem is with perltidy, and the problem is not listed in the BUGS file at
4438http://perltidy.sourceforge.net, please report it so that it can be corrected.
4439Include the smallest possible script which has the problem, along with the .LOG
4440file. See the manual pages for contact information.
4441Thank you!
4442EOM
4443        }
4444        else {
4445            $self->warning(<<EOM);
4446
4447Oops, you seem to have encountered a bug in perltidy.  Please check the
4448BUGS file at http://perltidy.sourceforge.net.  If the problem is not
4449listed there, please report it so that it can be corrected.  Include the
4450smallest possible script which produces this message, along with the
4451.LOG file if appropriate.  See the manual pages for contact information.
4452Your efforts are appreciated.
4453Thank you!
4454EOM
4455            my $added_semicolon_count = 0;
4456            eval {
4457                $added_semicolon_count =
4458                  $formatter->get_added_semicolon_count();
4459            };
4460            if ( $added_semicolon_count > 0 ) {
4461                $self->warning(<<EOM);
4462
4463The log file shows that perltidy added $added_semicolon_count semicolons.
4464Please rerun with -nasc to see if that is the cause of the syntax error.  Even
4465if that is the problem, please report it so that it can be fixed.
4466EOM
4467
4468            }
4469        }
4470    }
4471}
4472
4473sub finish {
4474
4475    # called after all formatting to summarize errors
4476    my $self = shift;
4477    my ( $infile_syntax_ok, $formatter ) = @_;
4478
4479    my $rOpts         = $self->{_rOpts};
4480    my $warning_count = $self->{_warning_count};
4481    my $saw_code_bug  = $self->{_saw_code_bug};
4482
4483    my $save_logfile =
4484         ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
4485      || $saw_code_bug == 1
4486      || $rOpts->{'logfile'};
4487    my $log_file = $self->{_log_file};
4488    if ($warning_count) {
4489        if ($save_logfile) {
4490            $self->block_log_output();    # avoid echoing this to the logfile
4491            $self->warning(
4492                "The logfile $log_file may contain useful information\n");
4493            $self->unblock_log_output();
4494        }
4495
4496        if ( $self->{_complaint_count} > 0 ) {
4497            $self->warning(
4498"To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
4499            );
4500        }
4501
4502        if ( $self->{_saw_brace_error}
4503            && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
4504        {
4505            $self->warning("To save a full .LOG file rerun with -g\n");
4506        }
4507    }
4508    $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
4509
4510    if ($save_logfile) {
4511        my $log_file = $self->{_log_file};
4512        my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
4513        if ($fh) {
4514            my $routput_array = $self->{_output_array};
4515            foreach ( @{$routput_array} ) { $fh->print($_) }
4516            if ( $log_file ne '-' && !ref $log_file ) {
4517                eval { $fh->close() };
4518            }
4519        }
4520    }
4521}
4522
4523#####################################################################
4524#
4525# The Perl::Tidy::DevNull class supplies a dummy print method
4526#
4527#####################################################################
4528
4529package Perl::Tidy::DevNull;
4530sub new { return bless {}, $_[0] }
4531sub print { return }
4532sub close { return }
4533
4534#####################################################################
4535#
4536# The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4537#
4538#####################################################################
4539
4540package Perl::Tidy::HtmlWriter;
4541
4542use File::Basename;
4543
4544# class variables
4545use vars qw{
4546  %html_color
4547  %html_bold
4548  %html_italic
4549  %token_short_names
4550  %short_to_long_names
4551  $rOpts
4552  $css_filename
4553  $css_linkname
4554  $missing_html_entities
4555};
4556
4557# replace unsafe characters with HTML entity representation if HTML::Entities
4558# is available
4559{ eval "use HTML::Entities"; $missing_html_entities = $@; }
4560
4561sub new {
4562
4563    my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4564        $html_src_extension )
4565      = @_;
4566
4567    my $html_file_opened = 0;
4568    my $html_fh;
4569    ( $html_fh, my $html_filename ) =
4570      Perl::Tidy::streamhandle( $html_file, 'w' );
4571    unless ($html_fh) {
4572        Perl::Tidy::Warn("can't open $html_file: $!\n");
4573        return undef;
4574    }
4575    $html_file_opened = 1;
4576
4577    if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4578        $input_file = "NONAME";
4579    }
4580
4581    # write the table of contents to a string
4582    my $toc_string;
4583    my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4584
4585    my $html_pre_fh;
4586    my @pre_string_stack;
4587    if ( $rOpts->{'html-pre-only'} ) {
4588
4589        # pre section goes directly to the output stream
4590        $html_pre_fh = $html_fh;
4591        $html_pre_fh->print( <<"PRE_END");
4592<pre>
4593PRE_END
4594    }
4595    else {
4596
4597        # pre section go out to a temporary string
4598        my $pre_string;
4599        $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4600        push @pre_string_stack, \$pre_string;
4601    }
4602
4603    # pod text gets diverted if the 'pod2html' is used
4604    my $html_pod_fh;
4605    my $pod_string;
4606    if ( $rOpts->{'pod2html'} ) {
4607        if ( $rOpts->{'html-pre-only'} ) {
4608            undef $rOpts->{'pod2html'};
4609        }
4610        else {
4611            eval "use Pod::Html";
4612            if ($@) {
4613                Perl::Tidy::Warn
4614"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4615                undef $rOpts->{'pod2html'};
4616            }
4617            else {
4618                $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4619            }
4620        }
4621    }
4622
4623    my $toc_filename;
4624    my $src_filename;
4625    if ( $rOpts->{'frames'} ) {
4626        unless ($extension) {
4627            Perl::Tidy::Warn
4628"cannot use frames without a specified output extension; ignoring -frm\n";
4629            undef $rOpts->{'frames'};
4630        }
4631        else {
4632            $toc_filename = $input_file . $html_toc_extension . $extension;
4633            $src_filename = $input_file . $html_src_extension . $extension;
4634        }
4635    }
4636
4637    # ----------------------------------------------------------
4638    # Output is now directed as follows:
4639    # html_toc_fh <-- table of contents items
4640    # html_pre_fh <-- the <pre> section of formatted code, except:
4641    # html_pod_fh <-- pod goes here with the pod2html option
4642    # ----------------------------------------------------------
4643
4644    my $title = $rOpts->{'title'};
4645    unless ($title) {
4646        ( $title, my $path ) = fileparse($input_file);
4647    }
4648    my $toc_item_count = 0;
4649    my $in_toc_package = "";
4650    my $last_level     = 0;
4651    bless {
4652        _input_file        => $input_file,          # name of input file
4653        _title             => $title,               # title, unescaped
4654        _html_file         => $html_file,           # name of .html output file
4655        _toc_filename      => $toc_filename,        # for frames option
4656        _src_filename      => $src_filename,        # for frames option
4657        _html_file_opened  => $html_file_opened,    # a flag
4658        _html_fh           => $html_fh,             # the output stream
4659        _html_pre_fh       => $html_pre_fh,         # pre section goes here
4660        _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
4661        _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
4662        _rpod_string       => \$pod_string,         # string holding pod
4663        _pod_cut_count     => 0,                    # how many =cut's?
4664        _html_toc_fh       => $html_toc_fh,         # fh for table of contents
4665        _rtoc_string       => \$toc_string,         # string holding toc
4666        _rtoc_item_count   => \$toc_item_count,     # how many toc items
4667        _rin_toc_package   => \$in_toc_package,     # package name
4668        _rtoc_name_count   => {},                   # hash to track unique names
4669        _rpackage_stack    => [],                   # stack to check for package
4670                                                    # name changes
4671        _rlast_level       => \$last_level,         # brace indentation level
4672    }, $class;
4673}
4674
4675sub add_toc_item {
4676
4677    # Add an item to the html table of contents.
4678    # This is called even if no table of contents is written,
4679    # because we still want to put the anchors in the <pre> text.
4680    # We are given an anchor name and its type; types are:
4681    #      'package', 'sub', '__END__', '__DATA__', 'EOF'
4682    # There must be an 'EOF' call at the end to wrap things up.
4683    my $self = shift;
4684    my ( $name, $type ) = @_;
4685    my $html_toc_fh     = $self->{_html_toc_fh};
4686    my $html_pre_fh     = $self->{_html_pre_fh};
4687    my $rtoc_name_count = $self->{_rtoc_name_count};
4688    my $rtoc_item_count = $self->{_rtoc_item_count};
4689    my $rlast_level     = $self->{_rlast_level};
4690    my $rin_toc_package = $self->{_rin_toc_package};
4691    my $rpackage_stack  = $self->{_rpackage_stack};
4692
4693    # packages contain sublists of subs, so to avoid errors all package
4694    # items are written and finished with the following routines
4695    my $end_package_list = sub {
4696        if ($$rin_toc_package) {
4697            $html_toc_fh->print("</ul>\n</li>\n");
4698            $$rin_toc_package = "";
4699        }
4700    };
4701
4702    my $start_package_list = sub {
4703        my ( $unique_name, $package ) = @_;
4704        if ($$rin_toc_package) { $end_package_list->() }
4705        $html_toc_fh->print(<<EOM);
4706<li><a href=\"#$unique_name\">package $package</a>
4707<ul>
4708EOM
4709        $$rin_toc_package = $package;
4710    };
4711
4712    # start the table of contents on the first item
4713    unless ($$rtoc_item_count) {
4714
4715        # but just quit if we hit EOF without any other entries
4716        # in this case, there will be no toc
4717        return if ( $type eq 'EOF' );
4718        $html_toc_fh->print( <<"TOC_END");
4719<!-- BEGIN CODE INDEX --><a name="code-index"></a>
4720<ul>
4721TOC_END
4722    }
4723    $$rtoc_item_count++;
4724
4725    # make a unique anchor name for this location:
4726    #   - packages get a 'package-' prefix
4727    #   - subs use their names
4728    my $unique_name = $name;
4729    if ( $type eq 'package' ) { $unique_name = "package-$name" }
4730
4731    # append '-1', '-2', etc if necessary to make unique; this will
4732    # be unique because subs and packages cannot have a '-'
4733    if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4734        $unique_name .= "-$count";
4735    }
4736
4737    #   - all names get terminal '-' if pod2html is used, to avoid
4738    #     conflicts with anchor names created by pod2html
4739    if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4740
4741    # start/stop lists of subs
4742    if ( $type eq 'sub' ) {
4743        my $package = $rpackage_stack->[$$rlast_level];
4744        unless ($package) { $package = 'main' }
4745
4746        # if we're already in a package/sub list, be sure its the right
4747        # package or else close it
4748        if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4749            $end_package_list->();
4750        }
4751
4752        # start a package/sub list if necessary
4753        unless ($$rin_toc_package) {
4754            $start_package_list->( $unique_name, $package );
4755        }
4756    }
4757
4758    # now write an entry in the toc for this item
4759    if ( $type eq 'package' ) {
4760        $start_package_list->( $unique_name, $name );
4761    }
4762    elsif ( $type eq 'sub' ) {
4763        $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4764    }
4765    else {
4766        $end_package_list->();
4767        $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4768    }
4769
4770    # write the anchor in the <pre> section
4771    $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4772
4773    # end the table of contents, if any, on the end of file
4774    if ( $type eq 'EOF' ) {
4775        $html_toc_fh->print( <<"TOC_END");
4776</ul>
4777<!-- END CODE INDEX -->
4778TOC_END
4779    }
4780}
4781
4782BEGIN {
4783
4784    # This is the official list of tokens which may be identified by the
4785    # user.  Long names are used as getopt keys.  Short names are
4786    # convenient short abbreviations for specifying input.  Short names
4787    # somewhat resemble token type characters, but are often different
4788    # because they may only be alphanumeric, to allow command line
4789    # input.  Also, note that because of case insensitivity of html,
4790    # this table must be in a single case only (I've chosen to use all
4791    # lower case).
4792    # When adding NEW_TOKENS: update this hash table
4793    # short names => long names
4794    %short_to_long_names = (
4795        'n'  => 'numeric',
4796        'p'  => 'paren',
4797        'q'  => 'quote',
4798        's'  => 'structure',
4799        'c'  => 'comment',
4800        'v'  => 'v-string',
4801        'cm' => 'comma',
4802        'w'  => 'bareword',
4803        'co' => 'colon',
4804        'pu' => 'punctuation',
4805        'i'  => 'identifier',
4806        'j'  => 'label',
4807        'h'  => 'here-doc-target',
4808        'hh' => 'here-doc-text',
4809        'k'  => 'keyword',
4810        'sc' => 'semicolon',
4811        'm'  => 'subroutine',
4812        'pd' => 'pod-text',
4813    );
4814
4815    # Now we have to map actual token types into one of the above short
4816    # names; any token types not mapped will get 'punctuation'
4817    # properties.
4818
4819    # The values of this hash table correspond to the keys of the
4820    # previous hash table.
4821    # The keys of this hash table are token types and can be seen
4822    # by running with --dump-token-types (-dtt).
4823
4824    # When adding NEW_TOKENS: update this hash table
4825    # $type => $short_name
4826    %token_short_names = (
4827        '#'  => 'c',
4828        'n'  => 'n',
4829        'v'  => 'v',
4830        'k'  => 'k',
4831        'F'  => 'k',
4832        'Q'  => 'q',
4833        'q'  => 'q',
4834        'J'  => 'j',
4835        'j'  => 'j',
4836        'h'  => 'h',
4837        'H'  => 'hh',
4838        'w'  => 'w',
4839        ','  => 'cm',
4840        '=>' => 'cm',
4841        ';'  => 'sc',
4842        ':'  => 'co',
4843        'f'  => 'sc',
4844        '('  => 'p',
4845        ')'  => 'p',
4846        'M'  => 'm',
4847        'P'  => 'pd',
4848        'A'  => 'co',
4849    );
4850
4851    # These token types will all be called identifiers for now
4852    # FIXME: could separate user defined modules as separate type
4853    my @identifier = qw" i t U C Y Z G :: CORE::";
4854    @token_short_names{@identifier} = ('i') x scalar(@identifier);
4855
4856    # These token types will be called 'structure'
4857    my @structure = qw" { } ";
4858    @token_short_names{@structure} = ('s') x scalar(@structure);
4859
4860    # OLD NOTES: save for reference
4861    # Any of these could be added later if it would be useful.
4862    # For now, they will by default become punctuation
4863    #    my @list = qw" L R [ ] ";
4864    #    @token_long_names{@list} = ('non-structure') x scalar(@list);
4865    #
4866    #    my @list = qw"
4867    #      / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4868    #      ";
4869    #    @token_long_names{@list} = ('math') x scalar(@list);
4870    #
4871    #    my @list = qw" & &= ~ ~= ^ ^= | |= ";
4872    #    @token_long_names{@list} = ('bit') x scalar(@list);
4873    #
4874    #    my @list = qw" == != < > <= <=> ";
4875    #    @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4876    #
4877    #    my @list = qw" && || ! &&= ||= //= ";
4878    #    @token_long_names{@list} = ('logical') x scalar(@list);
4879    #
4880    #    my @list = qw" . .= =~ !~ x x= ";
4881    #    @token_long_names{@list} = ('string-operators') x scalar(@list);
4882    #
4883    #    # Incomplete..
4884    #    my @list = qw" .. -> <> ... \ ? ";
4885    #    @token_long_names{@list} = ('misc-operators') x scalar(@list);
4886
4887}
4888
4889sub make_getopt_long_names {
4890    my $class = shift;
4891    my ($rgetopt_names) = @_;
4892    while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4893        push @$rgetopt_names, "html-color-$name=s";
4894        push @$rgetopt_names, "html-italic-$name!";
4895        push @$rgetopt_names, "html-bold-$name!";
4896    }
4897    push @$rgetopt_names, "html-color-background=s";
4898    push @$rgetopt_names, "html-linked-style-sheet=s";
4899    push @$rgetopt_names, "nohtml-style-sheets";
4900    push @$rgetopt_names, "html-pre-only";
4901    push @$rgetopt_names, "html-line-numbers";
4902    push @$rgetopt_names, "html-entities!";
4903    push @$rgetopt_names, "stylesheet";
4904    push @$rgetopt_names, "html-table-of-contents!";
4905    push @$rgetopt_names, "pod2html!";
4906    push @$rgetopt_names, "frames!";
4907    push @$rgetopt_names, "html-toc-extension=s";
4908    push @$rgetopt_names, "html-src-extension=s";
4909
4910    # Pod::Html parameters:
4911    push @$rgetopt_names, "backlink=s";
4912    push @$rgetopt_names, "cachedir=s";
4913    push @$rgetopt_names, "htmlroot=s";
4914    push @$rgetopt_names, "libpods=s";
4915    push @$rgetopt_names, "podpath=s";
4916    push @$rgetopt_names, "podroot=s";
4917    push @$rgetopt_names, "title=s";
4918
4919    # Pod::Html parameters with leading 'pod' which will be removed
4920    # before the call to Pod::Html
4921    push @$rgetopt_names, "podquiet!";
4922    push @$rgetopt_names, "podverbose!";
4923    push @$rgetopt_names, "podrecurse!";
4924    push @$rgetopt_names, "podflush";
4925    push @$rgetopt_names, "podheader!";
4926    push @$rgetopt_names, "podindex!";
4927}
4928
4929sub make_abbreviated_names {
4930
4931    # We're appending things like this to the expansion list:
4932    #      'hcc'    => [qw(html-color-comment)],
4933    #      'hck'    => [qw(html-color-keyword)],
4934    #  etc
4935    my $class = shift;
4936    my ($rexpansion) = @_;
4937
4938    # abbreviations for color/bold/italic properties
4939    while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4940        ${$rexpansion}{"hc$short_name"}  = ["html-color-$long_name"];
4941        ${$rexpansion}{"hb$short_name"}  = ["html-bold-$long_name"];
4942        ${$rexpansion}{"hi$short_name"}  = ["html-italic-$long_name"];
4943        ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4944        ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4945    }
4946
4947    # abbreviations for all other html options
4948    ${$rexpansion}{"hcbg"}  = ["html-color-background"];
4949    ${$rexpansion}{"pre"}   = ["html-pre-only"];
4950    ${$rexpansion}{"toc"}   = ["html-table-of-contents"];
4951    ${$rexpansion}{"ntoc"}  = ["nohtml-table-of-contents"];
4952    ${$rexpansion}{"nnn"}   = ["html-line-numbers"];
4953    ${$rexpansion}{"hent"}  = ["html-entities"];
4954    ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4955    ${$rexpansion}{"css"}   = ["html-linked-style-sheet"];
4956    ${$rexpansion}{"nss"}   = ["nohtml-style-sheets"];
4957    ${$rexpansion}{"ss"}    = ["stylesheet"];
4958    ${$rexpansion}{"pod"}   = ["pod2html"];
4959    ${$rexpansion}{"npod"}  = ["nopod2html"];
4960    ${$rexpansion}{"frm"}   = ["frames"];
4961    ${$rexpansion}{"nfrm"}  = ["noframes"];
4962    ${$rexpansion}{"text"}  = ["html-toc-extension"];
4963    ${$rexpansion}{"sext"}  = ["html-src-extension"];
4964}
4965
4966sub check_options {
4967
4968    # This will be called once after options have been parsed
4969    my $class = shift;
4970    $rOpts = shift;
4971
4972    # X11 color names for default settings that seemed to look ok
4973    # (these color names are only used for programming clarity; the hex
4974    # numbers are actually written)
4975    use constant ForestGreen   => "#228B22";
4976    use constant SaddleBrown   => "#8B4513";
4977    use constant magenta4      => "#8B008B";
4978    use constant IndianRed3    => "#CD5555";
4979    use constant DeepSkyBlue4  => "#00688B";
4980    use constant MediumOrchid3 => "#B452CD";
4981    use constant black         => "#000000";
4982    use constant white         => "#FFFFFF";
4983    use constant red           => "#FF0000";
4984
4985    # set default color, bold, italic properties
4986    # anything not listed here will be given the default (punctuation) color --
4987    # these types currently not listed and get default: ws pu s sc cm co p
4988    # When adding NEW_TOKENS: add an entry here if you don't want defaults
4989
4990    # set_default_properties( $short_name, default_color, bold?, italic? );
4991    set_default_properties( 'c',  ForestGreen,   0, 0 );
4992    set_default_properties( 'pd', ForestGreen,   0, 1 );
4993    set_default_properties( 'k',  magenta4,      1, 0 );    # was SaddleBrown
4994    set_default_properties( 'q',  IndianRed3,    0, 0 );
4995    set_default_properties( 'hh', IndianRed3,    0, 1 );
4996    set_default_properties( 'h',  IndianRed3,    1, 0 );
4997    set_default_properties( 'i',  DeepSkyBlue4,  0, 0 );
4998    set_default_properties( 'w',  black,         0, 0 );
4999    set_default_properties( 'n',  MediumOrchid3, 0, 0 );
5000    set_default_properties( 'v',  MediumOrchid3, 0, 0 );
5001    set_default_properties( 'j',  IndianRed3,    1, 0 );
5002    set_default_properties( 'm',  red,           1, 0 );
5003
5004    set_default_color( 'html-color-background',  white );
5005    set_default_color( 'html-color-punctuation', black );
5006
5007    # setup property lookup tables for tokens based on their short names
5008    # every token type has a short name, and will use these tables
5009    # to do the html markup
5010    while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
5011        $html_color{$short_name}  = $rOpts->{"html-color-$long_name"};
5012        $html_bold{$short_name}   = $rOpts->{"html-bold-$long_name"};
5013        $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
5014    }
5015
5016    # write style sheet to STDOUT and die if requested
5017    if ( defined( $rOpts->{'stylesheet'} ) ) {
5018        write_style_sheet_file('-');
5019        Perl::Tidy::Exit 0;
5020    }
5021
5022    # make sure user gives a file name after -css
5023    if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
5024        $css_linkname = $rOpts->{'html-linked-style-sheet'};
5025        if ( $css_linkname =~ /^-/ ) {
5026            Perl::Tidy::Die "You must specify a valid filename after -css\n";
5027        }
5028    }
5029
5030    # check for conflict
5031    if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
5032        $rOpts->{'nohtml-style-sheets'} = 0;
5033        warning("You can't specify both -css and -nss; -nss ignored\n");
5034    }
5035
5036    # write a style sheet file if necessary
5037    if ($css_linkname) {
5038
5039        # if the selected filename exists, don't write, because user may
5040        # have done some work by hand to create it; use backup name instead
5041        # Also, this will avoid a potential disaster in which the user
5042        # forgets to specify the style sheet, like this:
5043        #    perltidy -html -css myfile1.pl myfile2.pl
5044        # This would cause myfile1.pl to parsed as the style sheet by GetOpts
5045        my $css_filename = $css_linkname;
5046        unless ( -e $css_filename ) {
5047            write_style_sheet_file($css_filename);
5048        }
5049    }
5050    $missing_html_entities = 1 unless $rOpts->{'html-entities'};
5051}
5052
5053sub write_style_sheet_file {
5054
5055    my $css_filename = shift;
5056    my $fh;
5057    unless ( $fh = IO::File->new("> $css_filename") ) {
5058        Perl::Tidy::Die "can't open $css_filename: $!\n";
5059    }
5060    write_style_sheet_data($fh);
5061    eval { $fh->close };
5062}
5063
5064sub write_style_sheet_data {
5065
5066    # write the style sheet data to an open file handle
5067    my $fh = shift;
5068
5069    my $bg_color   = $rOpts->{'html-color-background'};
5070    my $text_color = $rOpts->{'html-color-punctuation'};
5071
5072    # pre-bgcolor is new, and may not be defined
5073    my $pre_bg_color = $rOpts->{'html-pre-color-background'};
5074    $pre_bg_color = $bg_color unless $pre_bg_color;
5075
5076    $fh->print(<<"EOM");
5077/* default style sheet generated by perltidy */
5078body {background: $bg_color; color: $text_color}
5079pre { color: $text_color;
5080      background: $pre_bg_color;
5081      font-family: courier;
5082    }
5083
5084EOM
5085
5086    foreach my $short_name ( sort keys %short_to_long_names ) {
5087        my $long_name = $short_to_long_names{$short_name};
5088
5089        my $abbrev = '.' . $short_name;
5090        if ( length($short_name) == 1 ) { $abbrev .= ' ' }    # for alignment
5091        my $color = $html_color{$short_name};
5092        if ( !defined($color) ) { $color = $text_color }
5093        $fh->print("$abbrev \{ color: $color;");
5094
5095        if ( $html_bold{$short_name} ) {
5096            $fh->print(" font-weight:bold;");
5097        }
5098
5099        if ( $html_italic{$short_name} ) {
5100            $fh->print(" font-style:italic;");
5101        }
5102        $fh->print("} /* $long_name */\n");
5103    }
5104}
5105
5106sub set_default_color {
5107
5108    # make sure that options hash $rOpts->{$key} contains a valid color
5109    my ( $key, $color ) = @_;
5110    if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
5111    $rOpts->{$key} = check_RGB($color);
5112}
5113
5114sub check_RGB {
5115
5116    # if color is a 6 digit hex RGB value, prepend a #, otherwise
5117    # assume that it is a valid ascii color name
5118    my ($color) = @_;
5119    if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
5120    return $color;
5121}
5122
5123sub set_default_properties {
5124    my ( $short_name, $color, $bold, $italic ) = @_;
5125
5126    set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
5127    my $key;
5128    $key = "html-bold-$short_to_long_names{$short_name}";
5129    $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
5130    $key = "html-italic-$short_to_long_names{$short_name}";
5131    $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
5132}
5133
5134sub pod_to_html {
5135
5136    # Use Pod::Html to process the pod and make the page
5137    # then merge the perltidy code sections into it.
5138    # return 1 if success, 0 otherwise
5139    my $self = shift;
5140    my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
5141    my $input_file   = $self->{_input_file};
5142    my $title        = $self->{_title};
5143    my $success_flag = 0;
5144
5145    # don't try to use pod2html if no pod
5146    unless ($pod_string) {
5147        return $success_flag;
5148    }
5149
5150    # Pod::Html requires a real temporary filename
5151    # If we are making a frame, we have a name available
5152    # Otherwise, we have to fine one
5153    my $tmpfile;
5154    if ( $rOpts->{'frames'} ) {
5155        $tmpfile = $self->{_toc_filename};
5156    }
5157    else {
5158        $tmpfile = Perl::Tidy::make_temporary_filename();
5159    }
5160    my $fh_tmp = IO::File->new( $tmpfile, 'w' );
5161    unless ($fh_tmp) {
5162        Perl::Tidy::Warn
5163          "unable to open temporary file $tmpfile; cannot use pod2html\n";
5164        return $success_flag;
5165    }
5166
5167    #------------------------------------------------------------------
5168    # Warning: a temporary file is open; we have to clean up if
5169    # things go bad.  From here on all returns should be by going to
5170    # RETURN so that the temporary file gets unlinked.
5171    #------------------------------------------------------------------
5172
5173    # write the pod text to the temporary file
5174    $fh_tmp->print($pod_string);
5175    $fh_tmp->close();
5176
5177    # Hand off the pod to pod2html.
5178    # Note that we can use the same temporary filename for input and output
5179    # because of the way pod2html works.
5180    {
5181
5182        my @args;
5183        push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
5184        my $kw;
5185
5186        # Flags with string args:
5187        # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
5188        # "podpath=s", "podroot=s"
5189        # Note: -css=s is handled by perltidy itself
5190        foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
5191            if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
5192        }
5193
5194        # Toggle switches; these have extra leading 'pod'
5195        # "header!", "index!", "recurse!", "quiet!", "verbose!"
5196        foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
5197            my $kwd = $kw;    # allows us to strip 'pod'
5198            if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
5199            elsif ( defined( $rOpts->{$kw} ) ) {
5200                $kwd =~ s/^pod//;
5201                push @args, "--no$kwd";
5202            }
5203        }
5204
5205        # "flush",
5206        $kw = 'podflush';
5207        if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
5208
5209        # Must clean up if pod2html dies (it can);
5210        # Be careful not to overwrite callers __DIE__ routine
5211        local $SIG{__DIE__} = sub {
5212            unlink $tmpfile if -e $tmpfile;
5213            Perl::Tidy::Die $_[0];
5214        };
5215
5216        pod2html(@args);
5217    }
5218    $fh_tmp = IO::File->new( $tmpfile, 'r' );
5219    unless ($fh_tmp) {
5220
5221        # this error shouldn't happen ... we just used this filename
5222        Perl::Tidy::Warn
5223          "unable to open temporary file $tmpfile; cannot use pod2html\n";
5224        goto RETURN;
5225    }
5226
5227    my $html_fh = $self->{_html_fh};
5228    my @toc;
5229    my $in_toc;
5230    my $no_print;
5231
5232    # This routine will write the html selectively and store the toc
5233    my $html_print = sub {
5234        foreach (@_) {
5235            $html_fh->print($_) unless ($no_print);
5236            if ($in_toc) { push @toc, $_ }
5237        }
5238    };
5239
5240    # loop over lines of html output from pod2html and merge in
5241    # the necessary perltidy html sections
5242    my ( $saw_body, $saw_index, $saw_body_end );
5243    while ( my $line = $fh_tmp->getline() ) {
5244
5245        if ( $line =~ /^\s*<html>\s*$/i ) {
5246            my $date = localtime;
5247            $html_print->("<!-- Generated by perltidy on $date -->\n");
5248            $html_print->($line);
5249        }
5250
5251        # Copy the perltidy css, if any, after <body> tag
5252        elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
5253            $saw_body = 1;
5254            $html_print->($css_string) if $css_string;
5255            $html_print->($line);
5256
5257            # add a top anchor and heading
5258            $html_print->("<a name=\"-top-\"></a>\n");
5259            $title = escape_html($title);
5260            $html_print->("<h1>$title</h1>\n");
5261        }
5262        elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
5263            $in_toc = 1;
5264
5265            # when frames are used, an extra table of contents in the
5266            # contents panel is confusing, so don't print it
5267            $no_print = $rOpts->{'frames'}
5268              || !$rOpts->{'html-table-of-contents'};
5269            $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
5270            $html_print->($line);
5271        }
5272
5273        # Copy the perltidy toc, if any, after the Pod::Html toc
5274        elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
5275            $saw_index = 1;
5276            $html_print->($line);
5277            if ($toc_string) {
5278                $html_print->("<hr />\n") if $rOpts->{'frames'};
5279                $html_print->("<h2>Code Index:</h2>\n");
5280                my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
5281                $html_print->(@toc);
5282            }
5283            $in_toc   = 0;
5284            $no_print = 0;
5285        }
5286
5287        # Copy one perltidy section after each marker
5288        elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
5289            $line = $2;
5290            $html_print->($1) if $1;
5291
5292            # Intermingle code and pod sections if we saw multiple =cut's.
5293            if ( $self->{_pod_cut_count} > 1 ) {
5294                my $rpre_string = shift(@$rpre_string_stack);
5295                if ($$rpre_string) {
5296                    $html_print->('<pre>');
5297                    $html_print->($$rpre_string);
5298                    $html_print->('</pre>');
5299                }
5300                else {
5301
5302                    # shouldn't happen: we stored a string before writing
5303                    # each marker.
5304                    Perl::Tidy::Warn
5305"Problem merging html stream with pod2html; order may be wrong\n";
5306                }
5307                $html_print->($line);
5308            }
5309
5310            # If didn't see multiple =cut lines, we'll put the pod out first
5311            # and then the code, because it's less confusing.
5312            else {
5313
5314                # since we are not intermixing code and pod, we don't need
5315                # or want any <hr> lines which separated pod and code
5316                $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
5317            }
5318        }
5319
5320        # Copy any remaining code section before the </body> tag
5321        elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
5322            $saw_body_end = 1;
5323            if (@$rpre_string_stack) {
5324                unless ( $self->{_pod_cut_count} > 1 ) {
5325                    $html_print->('<hr />');
5326                }
5327                while ( my $rpre_string = shift(@$rpre_string_stack) ) {
5328                    $html_print->('<pre>');
5329                    $html_print->($$rpre_string);
5330                    $html_print->('</pre>');
5331                }
5332            }
5333            $html_print->($line);
5334        }
5335        else {
5336            $html_print->($line);
5337        }
5338    }
5339
5340    $success_flag = 1;
5341    unless ($saw_body) {
5342        Perl::Tidy::Warn "Did not see <body> in pod2html output\n";
5343        $success_flag = 0;
5344    }
5345    unless ($saw_body_end) {
5346        Perl::Tidy::Warn "Did not see </body> in pod2html output\n";
5347        $success_flag = 0;
5348    }
5349    unless ($saw_index) {
5350        Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n";
5351        $success_flag = 0;
5352    }
5353
5354  RETURN:
5355    eval { $html_fh->close() };
5356
5357    # note that we have to unlink tmpfile before making frames
5358    # because the tmpfile may be one of the names used for frames
5359    unlink $tmpfile if -e $tmpfile;
5360    if ( $success_flag && $rOpts->{'frames'} ) {
5361        $self->make_frame( \@toc );
5362    }
5363    return $success_flag;
5364}
5365
5366sub make_frame {
5367
5368    # Make a frame with table of contents in the left panel
5369    # and the text in the right panel.
5370    # On entry:
5371    #  $html_filename contains the no-frames html output
5372    #  $rtoc is a reference to an array with the table of contents
5373    my $self          = shift;
5374    my ($rtoc)        = @_;
5375    my $input_file    = $self->{_input_file};
5376    my $html_filename = $self->{_html_file};
5377    my $toc_filename  = $self->{_toc_filename};
5378    my $src_filename  = $self->{_src_filename};
5379    my $title         = $self->{_title};
5380    $title = escape_html($title);
5381
5382    # FUTURE input parameter:
5383    my $top_basename = "";
5384
5385    # We need to produce 3 html files:
5386    # 1. - the table of contents
5387    # 2. - the contents (source code) itself
5388    # 3. - the frame which contains them
5389
5390    # get basenames for relative links
5391    my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
5392    my ( $src_basename, $src_path ) = fileparse($src_filename);
5393
5394    # 1. Make the table of contents panel, with appropriate changes
5395    # to the anchor names
5396    my $src_frame_name = 'SRC';
5397    my $first_anchor =
5398      write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
5399        $src_frame_name );
5400
5401    # 2. The current .html filename is renamed to be the contents panel
5402    rename( $html_filename, $src_filename )
5403      or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n";
5404
5405    # 3. Then use the original html filename for the frame
5406    write_frame_html(
5407        $title,        $html_filename, $top_basename,
5408        $toc_basename, $src_basename,  $src_frame_name
5409    );
5410}
5411
5412sub write_toc_html {
5413
5414    # write a separate html table of contents file for frames
5415    my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
5416    my $fh = IO::File->new( $toc_filename, 'w' )
5417      or Perl::Tidy::Die "Cannot open $toc_filename:$!\n";
5418    $fh->print(<<EOM);
5419<html>
5420<head>
5421<title>$title</title>
5422</head>
5423<body>
5424<h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
5425EOM
5426
5427    my $first_anchor =
5428      change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
5429    $fh->print( join "", @$rtoc );
5430
5431    $fh->print(<<EOM);
5432</body>
5433</html>
5434EOM
5435
5436}
5437
5438sub write_frame_html {
5439
5440    # write an html file to be the table of contents frame
5441    my (
5442        $title,        $frame_filename, $top_basename,
5443        $toc_basename, $src_basename,   $src_frame_name
5444    ) = @_;
5445
5446    my $fh = IO::File->new( $frame_filename, 'w' )
5447      or Perl::Tidy::Die "Cannot open $toc_basename:$!\n";
5448
5449    $fh->print(<<EOM);
5450<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
5451    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5452<?xml version="1.0" encoding="iso-8859-1" ?>
5453<html xmlns="http://www.w3.org/1999/xhtml">
5454<head>
5455<title>$title</title>
5456</head>
5457EOM
5458
5459    # two left panels, one right, if master index file
5460    if ($top_basename) {
5461        $fh->print(<<EOM);
5462<frameset cols="20%,80%">
5463<frameset rows="30%,70%">
5464<frame src = "$top_basename" />
5465<frame src = "$toc_basename" />
5466</frameset>
5467EOM
5468    }
5469
5470    # one left panels, one right, if no master index file
5471    else {
5472        $fh->print(<<EOM);
5473<frameset cols="20%,*">
5474<frame src = "$toc_basename" />
5475EOM
5476    }
5477    $fh->print(<<EOM);
5478<frame src = "$src_basename" name = "$src_frame_name" />
5479<noframes>
5480<body>
5481<p>If you see this message, you are using a non-frame-capable web client.</p>
5482<p>This document contains:</p>
5483<ul>
5484<li><a href="$toc_basename">A table of contents</a></li>
5485<li><a href="$src_basename">The source code</a></li>
5486</ul>
5487</body>
5488</noframes>
5489</frameset>
5490</html>
5491EOM
5492}
5493
5494sub change_anchor_names {
5495
5496    # add a filename and target to anchors
5497    # also return the first anchor
5498    my ( $rlines, $filename, $target ) = @_;
5499    my $first_anchor;
5500    foreach my $line (@$rlines) {
5501
5502        #  We're looking for lines like this:
5503        #  <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
5504        #  ----  -       --------  -----------------
5505        #  $1              $4            $5
5506        if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
5507            my $pre  = $1;
5508            my $name = $4;
5509            my $post = $5;
5510            my $href = "$filename#$name";
5511            $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
5512            unless ($first_anchor) { $first_anchor = $href }
5513        }
5514    }
5515    return $first_anchor;
5516}
5517
5518sub close_html_file {
5519    my $self = shift;
5520    return unless $self->{_html_file_opened};
5521
5522    my $html_fh     = $self->{_html_fh};
5523    my $rtoc_string = $self->{_rtoc_string};
5524
5525    # There are 3 basic paths to html output...
5526
5527    # ---------------------------------
5528    # Path 1: finish up if in -pre mode
5529    # ---------------------------------
5530    if ( $rOpts->{'html-pre-only'} ) {
5531        $html_fh->print( <<"PRE_END");
5532</pre>
5533PRE_END
5534        eval { $html_fh->close() };
5535        return;
5536    }
5537
5538    # Finish the index
5539    $self->add_toc_item( 'EOF', 'EOF' );
5540
5541    my $rpre_string_stack = $self->{_rpre_string_stack};
5542
5543    # Patch to darken the <pre> background color in case of pod2html and
5544    # interleaved code/documentation.  Otherwise, the distinction
5545    # between code and documentation is blurred.
5546    if (   $rOpts->{pod2html}
5547        && $self->{_pod_cut_count} >= 1
5548        && $rOpts->{'html-color-background'} eq '#FFFFFF' )
5549    {
5550        $rOpts->{'html-pre-color-background'} = '#F0F0F0';
5551    }
5552
5553    # put the css or its link into a string, if used
5554    my $css_string;
5555    my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5556
5557    # use css linked to another file
5558    if ( $rOpts->{'html-linked-style-sheet'} ) {
5559        $fh_css->print(
5560            qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5561        );
5562    }
5563
5564    # use css embedded in this file
5565    elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5566        $fh_css->print( <<'ENDCSS');
5567<style type="text/css">
5568<!--
5569ENDCSS
5570        write_style_sheet_data($fh_css);
5571        $fh_css->print( <<"ENDCSS");
5572-->
5573</style>
5574ENDCSS
5575    }
5576
5577    # -----------------------------------------------------------
5578    # path 2: use pod2html if requested
5579    #         If we fail for some reason, continue on to path 3
5580    # -----------------------------------------------------------
5581    if ( $rOpts->{'pod2html'} ) {
5582        my $rpod_string = $self->{_rpod_string};
5583        $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5584            $rpre_string_stack )
5585          && return;
5586    }
5587
5588    # --------------------------------------------------
5589    # path 3: write code in html, with pod only in italics
5590    # --------------------------------------------------
5591    my $input_file = $self->{_input_file};
5592    my $title      = escape_html($input_file);
5593    my $date       = localtime;
5594    $html_fh->print( <<"HTML_START");
5595<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5596   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5597<!-- Generated by perltidy on $date -->
5598<html xmlns="http://www.w3.org/1999/xhtml">
5599<head>
5600<title>$title</title>
5601HTML_START
5602
5603    # output the css, if used
5604    if ($css_string) {
5605        $html_fh->print($css_string);
5606        $html_fh->print( <<"ENDCSS");
5607</head>
5608<body>
5609ENDCSS
5610    }
5611    else {
5612
5613        $html_fh->print( <<"HTML_START");
5614</head>
5615<body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5616HTML_START
5617    }
5618
5619    $html_fh->print("<a name=\"-top-\"></a>\n");
5620    $html_fh->print( <<"EOM");
5621<h1>$title</h1>
5622EOM
5623
5624    # copy the table of contents
5625    if (   $$rtoc_string
5626        && !$rOpts->{'frames'}
5627        && $rOpts->{'html-table-of-contents'} )
5628    {
5629        $html_fh->print($$rtoc_string);
5630    }
5631
5632    # copy the pre section(s)
5633    my $fname_comment = $input_file;
5634    $fname_comment =~ s/--+/-/g;    # protect HTML comment tags
5635    $html_fh->print( <<"END_PRE");
5636<hr />
5637<!-- contents of filename: $fname_comment -->
5638<pre>
5639END_PRE
5640
5641    foreach my $rpre_string (@$rpre_string_stack) {
5642        $html_fh->print($$rpre_string);
5643    }
5644
5645    # and finish the html page
5646    $html_fh->print( <<"HTML_END");
5647</pre>
5648</body>
5649</html>
5650HTML_END
5651    eval { $html_fh->close() };    # could be object without close method
5652
5653    if ( $rOpts->{'frames'} ) {
5654        my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5655        $self->make_frame( \@toc );
5656    }
5657}
5658
5659sub markup_tokens {
5660    my $self = shift;
5661    my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5662    my ( @colored_tokens, $j, $string, $type, $token, $level );
5663    my $rlast_level    = $self->{_rlast_level};
5664    my $rpackage_stack = $self->{_rpackage_stack};
5665
5666    for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5667        $type  = $$rtoken_type[$j];
5668        $token = $$rtokens[$j];
5669        $level = $$rlevels[$j];
5670        $level = 0 if ( $level < 0 );
5671
5672        #-------------------------------------------------------
5673        # Update the package stack.  The package stack is needed to keep
5674        # the toc correct because some packages may be declared within
5675        # blocks and go out of scope when we leave the block.
5676        #-------------------------------------------------------
5677        if ( $level > $$rlast_level ) {
5678            unless ( $rpackage_stack->[ $level - 1 ] ) {
5679                $rpackage_stack->[ $level - 1 ] = 'main';
5680            }
5681            $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5682        }
5683        elsif ( $level < $$rlast_level ) {
5684            my $package = $rpackage_stack->[$level];
5685            unless ($package) { $package = 'main' }
5686
5687            # if we change packages due to a nesting change, we
5688            # have to make an entry in the toc
5689            if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5690                $self->add_toc_item( $package, 'package' );
5691            }
5692        }
5693        $$rlast_level = $level;
5694
5695        #-------------------------------------------------------
5696        # Intercept a sub name here; split it
5697        # into keyword 'sub' and sub name; and add an
5698        # entry in the toc
5699        #-------------------------------------------------------
5700        if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5701            $token = $self->markup_html_element( $1, 'k' );
5702            push @colored_tokens, $token;
5703            $token = $2;
5704            $type  = 'M';
5705
5706            # but don't include sub declarations in the toc;
5707            # these wlll have leading token types 'i;'
5708            my $signature = join "", @$rtoken_type;
5709            unless ( $signature =~ /^i;/ ) {
5710                my $subname = $token;
5711                $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5712                $self->add_toc_item( $subname, 'sub' );
5713            }
5714        }
5715
5716        #-------------------------------------------------------
5717        # Intercept a package name here; split it
5718        # into keyword 'package' and name; add to the toc,
5719        # and update the package stack
5720        #-------------------------------------------------------
5721        if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5722            $token = $self->markup_html_element( $1, 'k' );
5723            push @colored_tokens, $token;
5724            $token = $2;
5725            $type  = 'i';
5726            $self->add_toc_item( "$token", 'package' );
5727            $rpackage_stack->[$level] = $token;
5728        }
5729
5730        $token = $self->markup_html_element( $token, $type );
5731        push @colored_tokens, $token;
5732    }
5733    return ( \@colored_tokens );
5734}
5735
5736sub markup_html_element {
5737    my $self = shift;
5738    my ( $token, $type ) = @_;
5739
5740    return $token if ( $type eq 'b' );         # skip a blank token
5741    return $token if ( $token =~ /^\s*$/ );    # skip a blank line
5742    $token = escape_html($token);
5743
5744    # get the short abbreviation for this token type
5745    my $short_name = $token_short_names{$type};
5746    if ( !defined($short_name) ) {
5747        $short_name = "pu";                    # punctuation is default
5748    }
5749
5750    # handle style sheets..
5751    if ( !$rOpts->{'nohtml-style-sheets'} ) {
5752        if ( $short_name ne 'pu' ) {
5753            $token = qq(<span class="$short_name">) . $token . "</span>";
5754        }
5755    }
5756
5757    # handle no style sheets..
5758    else {
5759        my $color = $html_color{$short_name};
5760
5761        if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5762            $token = qq(<font color="$color">) . $token . "</font>";
5763        }
5764        if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5765        if ( $html_bold{$short_name} )   { $token = "<b>$token</b>" }
5766    }
5767    return $token;
5768}
5769
5770sub escape_html {
5771
5772    my $token = shift;
5773    if ($missing_html_entities) {
5774        $token =~ s/\&/&amp;/g;
5775        $token =~ s/\</&lt;/g;
5776        $token =~ s/\>/&gt;/g;
5777        $token =~ s/\"/&quot;/g;
5778    }
5779    else {
5780        HTML::Entities::encode_entities($token);
5781    }
5782    return $token;
5783}
5784
5785sub finish_formatting {
5786
5787    # called after last line
5788    my $self = shift;
5789    $self->close_html_file();
5790    return;
5791}
5792
5793sub write_line {
5794
5795    my $self = shift;
5796    return unless $self->{_html_file_opened};
5797    my $html_pre_fh      = $self->{_html_pre_fh};
5798    my ($line_of_tokens) = @_;
5799    my $line_type        = $line_of_tokens->{_line_type};
5800    my $input_line       = $line_of_tokens->{_line_text};
5801    my $line_number      = $line_of_tokens->{_line_number};
5802    chomp $input_line;
5803
5804    # markup line of code..
5805    my $html_line;
5806    if ( $line_type eq 'CODE' ) {
5807        my $rtoken_type = $line_of_tokens->{_rtoken_type};
5808        my $rtokens     = $line_of_tokens->{_rtokens};
5809        my $rlevels     = $line_of_tokens->{_rlevels};
5810
5811        if ( $input_line =~ /(^\s*)/ ) {
5812            $html_line = $1;
5813        }
5814        else {
5815            $html_line = "";
5816        }
5817        my ($rcolored_tokens) =
5818          $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5819        $html_line .= join '', @$rcolored_tokens;
5820    }
5821
5822    # markup line of non-code..
5823    else {
5824        my $line_character;
5825        if    ( $line_type eq 'HERE' )       { $line_character = 'H' }
5826        elsif ( $line_type eq 'HERE_END' )   { $line_character = 'h' }
5827        elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
5828        elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5829        elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
5830        elsif ( $line_type eq 'END_START' ) {
5831            $line_character = 'k';
5832            $self->add_toc_item( '__END__', '__END__' );
5833        }
5834        elsif ( $line_type eq 'DATA_START' ) {
5835            $line_character = 'k';
5836            $self->add_toc_item( '__DATA__', '__DATA__' );
5837        }
5838        elsif ( $line_type =~ /^POD/ ) {
5839            $line_character = 'P';
5840            if ( $rOpts->{'pod2html'} ) {
5841                my $html_pod_fh = $self->{_html_pod_fh};
5842                if ( $line_type eq 'POD_START' ) {
5843
5844                    my $rpre_string_stack = $self->{_rpre_string_stack};
5845                    my $rpre_string       = $rpre_string_stack->[-1];
5846
5847                    # if we have written any non-blank lines to the
5848                    # current pre section, start writing to a new output
5849                    # string
5850                    if ( $$rpre_string =~ /\S/ ) {
5851                        my $pre_string;
5852                        $html_pre_fh =
5853                          Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5854                        $self->{_html_pre_fh} = $html_pre_fh;
5855                        push @$rpre_string_stack, \$pre_string;
5856
5857                        # leave a marker in the pod stream so we know
5858                        # where to put the pre section we just
5859                        # finished.
5860                        my $for_html = '=for html';    # don't confuse pod utils
5861                        $html_pod_fh->print(<<EOM);
5862
5863$for_html
5864<!-- pERLTIDY sECTION -->
5865
5866EOM
5867                    }
5868
5869                    # otherwise, just clear the current string and start
5870                    # over
5871                    else {
5872                        $$rpre_string = "";
5873                        $html_pod_fh->print("\n");
5874                    }
5875                }
5876                $html_pod_fh->print( $input_line . "\n" );
5877                if ( $line_type eq 'POD_END' ) {
5878                    $self->{_pod_cut_count}++;
5879                    $html_pod_fh->print("\n");
5880                }
5881                return;
5882            }
5883        }
5884        else { $line_character = 'Q' }
5885        $html_line = $self->markup_html_element( $input_line, $line_character );
5886    }
5887
5888    # add the line number if requested
5889    if ( $rOpts->{'html-line-numbers'} ) {
5890        my $extra_space .=
5891            ( $line_number < 10 )   ? "   "
5892          : ( $line_number < 100 )  ? "  "
5893          : ( $line_number < 1000 ) ? " "
5894          :                           "";
5895        $html_line = $extra_space . $line_number . " " . $html_line;
5896    }
5897
5898    # write the line
5899    $html_pre_fh->print("$html_line\n");
5900}
5901
5902#####################################################################
5903#
5904# The Perl::Tidy::Formatter package adds indentation, whitespace, and
5905# line breaks to the token stream
5906#
5907# WARNING: This is not a real class for speed reasons.  Only one
5908# Formatter may be used.
5909#
5910#####################################################################
5911
5912package Perl::Tidy::Formatter;
5913
5914BEGIN {
5915
5916    # Caution: these debug flags produce a lot of output
5917    # They should all be 0 except when debugging small scripts
5918    use constant FORMATTER_DEBUG_FLAG_RECOMBINE   => 0;
5919    use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
5920    use constant FORMATTER_DEBUG_FLAG_BOND        => 0;
5921    use constant FORMATTER_DEBUG_FLAG_BREAK       => 0;
5922    use constant FORMATTER_DEBUG_FLAG_CI          => 0;
5923    use constant FORMATTER_DEBUG_FLAG_FLUSH       => 0;
5924    use constant FORMATTER_DEBUG_FLAG_FORCE       => 0;
5925    use constant FORMATTER_DEBUG_FLAG_LIST        => 0;
5926    use constant FORMATTER_DEBUG_FLAG_NOBREAK     => 0;
5927    use constant FORMATTER_DEBUG_FLAG_OUTPUT      => 0;
5928    use constant FORMATTER_DEBUG_FLAG_SPARSE      => 0;
5929    use constant FORMATTER_DEBUG_FLAG_STORE       => 0;
5930    use constant FORMATTER_DEBUG_FLAG_UNDOBP      => 0;
5931    use constant FORMATTER_DEBUG_FLAG_WHITE       => 0;
5932
5933    my $debug_warning = sub {
5934        print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
5935    };
5936
5937    FORMATTER_DEBUG_FLAG_RECOMBINE   && $debug_warning->('RECOMBINE');
5938    FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
5939    FORMATTER_DEBUG_FLAG_BOND        && $debug_warning->('BOND');
5940    FORMATTER_DEBUG_FLAG_BREAK       && $debug_warning->('BREAK');
5941    FORMATTER_DEBUG_FLAG_CI          && $debug_warning->('CI');
5942    FORMATTER_DEBUG_FLAG_FLUSH       && $debug_warning->('FLUSH');
5943    FORMATTER_DEBUG_FLAG_FORCE       && $debug_warning->('FORCE');
5944    FORMATTER_DEBUG_FLAG_LIST        && $debug_warning->('LIST');
5945    FORMATTER_DEBUG_FLAG_NOBREAK     && $debug_warning->('NOBREAK');
5946    FORMATTER_DEBUG_FLAG_OUTPUT      && $debug_warning->('OUTPUT');
5947    FORMATTER_DEBUG_FLAG_SPARSE      && $debug_warning->('SPARSE');
5948    FORMATTER_DEBUG_FLAG_STORE       && $debug_warning->('STORE');
5949    FORMATTER_DEBUG_FLAG_UNDOBP      && $debug_warning->('UNDOBP');
5950    FORMATTER_DEBUG_FLAG_WHITE       && $debug_warning->('WHITE');
5951}
5952
5953use Carp;
5954use vars qw{
5955
5956  @gnu_stack
5957  $max_gnu_stack_index
5958  $gnu_position_predictor
5959  $line_start_index_to_go
5960  $last_indentation_written
5961  $last_unadjusted_indentation
5962  $last_leading_token
5963  $last_output_short_opening_token
5964
5965  $saw_VERSION_in_this_file
5966  $saw_END_or_DATA_
5967
5968  @gnu_item_list
5969  $max_gnu_item_index
5970  $gnu_sequence_number
5971  $last_output_indentation
5972  %last_gnu_equals
5973  %gnu_comma_count
5974  %gnu_arrow_count
5975
5976  @block_type_to_go
5977  @type_sequence_to_go
5978  @container_environment_to_go
5979  @bond_strength_to_go
5980  @forced_breakpoint_to_go
5981  @token_lengths_to_go
5982  @summed_lengths_to_go
5983  @levels_to_go
5984  @leading_spaces_to_go
5985  @reduced_spaces_to_go
5986  @matching_token_to_go
5987  @mate_index_to_go
5988  @nesting_blocks_to_go
5989  @ci_levels_to_go
5990  @nesting_depth_to_go
5991  @nobreak_to_go
5992  @old_breakpoint_to_go
5993  @tokens_to_go
5994  @types_to_go
5995  @inext_to_go
5996  @iprev_to_go
5997
5998  %saved_opening_indentation
5999
6000  $max_index_to_go
6001  $comma_count_in_batch
6002  $old_line_count_in_batch
6003  $last_nonblank_index_to_go
6004  $last_nonblank_type_to_go
6005  $last_nonblank_token_to_go
6006  $last_last_nonblank_index_to_go
6007  $last_last_nonblank_type_to_go
6008  $last_last_nonblank_token_to_go
6009  @nonblank_lines_at_depth
6010  $starting_in_quote
6011  $ending_in_quote
6012  @whitespace_level_stack
6013  $whitespace_last_level
6014
6015  $in_format_skipping_section
6016  $format_skipping_pattern_begin
6017  $format_skipping_pattern_end
6018
6019  $forced_breakpoint_count
6020  $forced_breakpoint_undo_count
6021  @forced_breakpoint_undo_stack
6022  %postponed_breakpoint
6023
6024  $tabbing
6025  $embedded_tab_count
6026  $first_embedded_tab_at
6027  $last_embedded_tab_at
6028  $deleted_semicolon_count
6029  $first_deleted_semicolon_at
6030  $last_deleted_semicolon_at
6031  $added_semicolon_count
6032  $first_added_semicolon_at
6033  $last_added_semicolon_at
6034  $first_tabbing_disagreement
6035  $last_tabbing_disagreement
6036  $in_tabbing_disagreement
6037  $tabbing_disagreement_count
6038  $input_line_tabbing
6039
6040  $last_line_type
6041  $last_line_leading_type
6042  $last_line_leading_level
6043  $last_last_line_leading_level
6044
6045  %block_leading_text
6046  %block_opening_line_number
6047  $csc_new_statement_ok
6048  $csc_last_label
6049  %csc_block_label
6050  $accumulating_text_for_block
6051  $leading_block_text
6052  $rleading_block_if_elsif_text
6053  $leading_block_text_level
6054  $leading_block_text_length_exceeded
6055  $leading_block_text_line_length
6056  $leading_block_text_line_number
6057  $closing_side_comment_prefix_pattern
6058  $closing_side_comment_list_pattern
6059
6060  $last_nonblank_token
6061  $last_nonblank_type
6062  $last_last_nonblank_token
6063  $last_last_nonblank_type
6064  $last_nonblank_block_type
6065  $last_output_level
6066  %is_do_follower
6067  %is_if_brace_follower
6068  %space_after_keyword
6069  $rbrace_follower
6070  $looking_for_else
6071  %is_last_next_redo_return
6072  %is_other_brace_follower
6073  %is_else_brace_follower
6074  %is_anon_sub_brace_follower
6075  %is_anon_sub_1_brace_follower
6076  %is_sort_map_grep
6077  %is_sort_map_grep_eval
6078  %is_sort_map_grep_eval_do
6079  %is_block_without_semicolon
6080  %is_if_unless
6081  %is_and_or
6082  %is_assignment
6083  %is_chain_operator
6084  %is_if_unless_and_or_last_next_redo_return
6085
6086  @has_broken_sublist
6087  @dont_align
6088  @want_comma_break
6089
6090  $is_static_block_comment
6091  $index_start_one_line_block
6092  $semicolons_before_block_self_destruct
6093  $index_max_forced_break
6094  $input_line_number
6095  $diagnostics_object
6096  $vertical_aligner_object
6097  $logger_object
6098  $file_writer_object
6099  $formatter_self
6100  @ci_stack
6101  $last_line_had_side_comment
6102  %want_break_before
6103  %outdent_keyword
6104  $static_block_comment_pattern
6105  $static_side_comment_pattern
6106  %opening_vertical_tightness
6107  %closing_vertical_tightness
6108  %closing_token_indentation
6109  $some_closing_token_indentation
6110
6111  %opening_token_right
6112  %stack_opening_token
6113  %stack_closing_token
6114
6115  $block_brace_vertical_tightness_pattern
6116
6117  $rOpts_add_newlines
6118  $rOpts_add_whitespace
6119  $rOpts_block_brace_tightness
6120  $rOpts_block_brace_vertical_tightness
6121  $rOpts_brace_left_and_indent
6122  $rOpts_comma_arrow_breakpoints
6123  $rOpts_break_at_old_keyword_breakpoints
6124  $rOpts_break_at_old_comma_breakpoints
6125  $rOpts_break_at_old_logical_breakpoints
6126  $rOpts_break_at_old_ternary_breakpoints
6127  $rOpts_break_at_old_attribute_breakpoints
6128  $rOpts_closing_side_comment_else_flag
6129  $rOpts_closing_side_comment_maximum_text
6130  $rOpts_continuation_indentation
6131  $rOpts_cuddled_else
6132  $rOpts_delete_old_whitespace
6133  $rOpts_fuzzy_line_length
6134  $rOpts_indent_columns
6135  $rOpts_line_up_parentheses
6136  $rOpts_maximum_fields_per_table
6137  $rOpts_maximum_line_length
6138  $rOpts_variable_maximum_line_length
6139  $rOpts_short_concatenation_item_length
6140  $rOpts_keep_old_blank_lines
6141  $rOpts_ignore_old_breakpoints
6142  $rOpts_format_skipping
6143  $rOpts_space_function_paren
6144  $rOpts_space_keyword_paren
6145  $rOpts_keep_interior_semicolons
6146  $rOpts_ignore_side_comment_lengths
6147  $rOpts_stack_closing_block_brace
6148  $rOpts_whitespace_cycle
6149  $rOpts_tight_secret_operators
6150
6151  %is_opening_type
6152  %is_closing_type
6153  %is_keyword_returning_list
6154  %tightness
6155  %matching_token
6156  $rOpts
6157  %right_bond_strength
6158  %left_bond_strength
6159  %binary_ws_rules
6160  %want_left_space
6161  %want_right_space
6162  %is_digraph
6163  %is_trigraph
6164  $bli_pattern
6165  $bli_list_string
6166  %is_closing_type
6167  %is_opening_type
6168  %is_closing_token
6169  %is_opening_token
6170};
6171
6172BEGIN {
6173
6174    # default list of block types for which -bli would apply
6175    $bli_list_string = 'if else elsif unless while for foreach do : sub';
6176
6177    @_ = qw(
6178      .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
6179      <= >= == =~ !~ != ++ -- /= x=
6180    );
6181    @is_digraph{@_} = (1) x scalar(@_);
6182
6183    @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
6184    @is_trigraph{@_} = (1) x scalar(@_);
6185
6186    @_ = qw(
6187      = **= += *= &= <<= &&=
6188      -= /= |= >>= ||= //=
6189      .= %= ^=
6190      x=
6191    );
6192    @is_assignment{@_} = (1) x scalar(@_);
6193
6194    @_ = qw(
6195      grep
6196      keys
6197      map
6198      reverse
6199      sort
6200      split
6201    );
6202    @is_keyword_returning_list{@_} = (1) x scalar(@_);
6203
6204    @_ = qw(is if unless and or err last next redo return);
6205    @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
6206
6207    @_ = qw(last next redo return);
6208    @is_last_next_redo_return{@_} = (1) x scalar(@_);
6209
6210    @_ = qw(sort map grep);
6211    @is_sort_map_grep{@_} = (1) x scalar(@_);
6212
6213    @_ = qw(sort map grep eval);
6214    @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
6215
6216    @_ = qw(sort map grep eval do);
6217    @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
6218
6219    @_ = qw(if unless);
6220    @is_if_unless{@_} = (1) x scalar(@_);
6221
6222    @_ = qw(and or err);
6223    @is_and_or{@_} = (1) x scalar(@_);
6224
6225    # Identify certain operators which often occur in chains.
6226    # Note: the minus (-) causes a side effect of padding of the first line in
6227    # something like this (by sub set_logical_padding):
6228    #    Checkbutton => 'Transmission checked',
6229    #   -variable    => \$TRANS
6230    # This usually improves appearance so it seems ok.
6231    @_ = qw(&& || and or : ? . + - * /);
6232    @is_chain_operator{@_} = (1) x scalar(@_);
6233
6234    # We can remove semicolons after blocks preceded by these keywords
6235    @_ =
6236      qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
6237      unless while until for foreach given when default);
6238    @is_block_without_semicolon{@_} = (1) x scalar(@_);
6239
6240    # 'L' is token for opening { at hash key
6241    @_ = qw" L { ( [ ";
6242    @is_opening_type{@_} = (1) x scalar(@_);
6243
6244    # 'R' is token for closing } at hash key
6245    @_ = qw" R } ) ] ";
6246    @is_closing_type{@_} = (1) x scalar(@_);
6247
6248    @_ = qw" { ( [ ";
6249    @is_opening_token{@_} = (1) x scalar(@_);
6250
6251    @_ = qw" } ) ] ";
6252    @is_closing_token{@_} = (1) x scalar(@_);
6253}
6254
6255# whitespace codes
6256use constant WS_YES      => 1;
6257use constant WS_OPTIONAL => 0;
6258use constant WS_NO       => -1;
6259
6260# Token bond strengths.
6261use constant NO_BREAK    => 10000;
6262use constant VERY_STRONG => 100;
6263use constant STRONG      => 2.1;
6264use constant NOMINAL     => 1.1;
6265use constant WEAK        => 0.8;
6266use constant VERY_WEAK   => 0.55;
6267
6268# values for testing indexes in output array
6269use constant UNDEFINED_INDEX => -1;
6270
6271# Maximum number of little messages; probably need not be changed.
6272use constant MAX_NAG_MESSAGES => 6;
6273
6274# increment between sequence numbers for each type
6275# For example, ?: pairs might have numbers 7,11,15,...
6276use constant TYPE_SEQUENCE_INCREMENT => 4;
6277
6278{
6279
6280    # methods to count instances
6281    my $_count = 0;
6282    sub get_count        { $_count; }
6283    sub _increment_count { ++$_count }
6284    sub _decrement_count { --$_count }
6285}
6286
6287sub trim {
6288
6289    # trim leading and trailing whitespace from a string
6290    $_[0] =~ s/\s+$//;
6291    $_[0] =~ s/^\s+//;
6292    return $_[0];
6293}
6294
6295sub max {
6296    my $max = shift;
6297    foreach (@_) {
6298        $max = ( $max < $_ ) ? $_ : $max;
6299    }
6300    return $max;
6301}
6302
6303sub min {
6304    my $min = shift;
6305    foreach (@_) {
6306        $min = ( $min > $_ ) ? $_ : $min;
6307    }
6308    return $min;
6309}
6310
6311sub split_words {
6312
6313    # given a string containing words separated by whitespace,
6314    # return the list of words
6315    my ($str) = @_;
6316    return unless $str;
6317    $str =~ s/\s+$//;
6318    $str =~ s/^\s+//;
6319    return split( /\s+/, $str );
6320}
6321
6322# interface to Perl::Tidy::Logger routines
6323sub warning {
6324    if ($logger_object) {
6325        $logger_object->warning(@_);
6326    }
6327}
6328
6329sub complain {
6330    if ($logger_object) {
6331        $logger_object->complain(@_);
6332    }
6333}
6334
6335sub write_logfile_entry {
6336    if ($logger_object) {
6337        $logger_object->write_logfile_entry(@_);
6338    }
6339}
6340
6341sub black_box {
6342    if ($logger_object) {
6343        $logger_object->black_box(@_);
6344    }
6345}
6346
6347sub report_definite_bug {
6348    if ($logger_object) {
6349        $logger_object->report_definite_bug();
6350    }
6351}
6352
6353sub get_saw_brace_error {
6354    if ($logger_object) {
6355        $logger_object->get_saw_brace_error();
6356    }
6357}
6358
6359sub we_are_at_the_last_line {
6360    if ($logger_object) {
6361        $logger_object->we_are_at_the_last_line();
6362    }
6363}
6364
6365# interface to Perl::Tidy::Diagnostics routine
6366sub write_diagnostics {
6367
6368    if ($diagnostics_object) {
6369        $diagnostics_object->write_diagnostics(@_);
6370    }
6371}
6372
6373sub get_added_semicolon_count {
6374    my $self = shift;
6375    return $added_semicolon_count;
6376}
6377
6378sub DESTROY {
6379    $_[0]->_decrement_count();
6380}
6381
6382sub new {
6383
6384    my $class = shift;
6385
6386    # we are given an object with a write_line() method to take lines
6387    my %defaults = (
6388        sink_object        => undef,
6389        diagnostics_object => undef,
6390        logger_object      => undef,
6391    );
6392    my %args = ( %defaults, @_ );
6393
6394    $logger_object      = $args{logger_object};
6395    $diagnostics_object = $args{diagnostics_object};
6396
6397    # we create another object with a get_line() and peek_ahead() method
6398    my $sink_object = $args{sink_object};
6399    $file_writer_object =
6400      Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
6401
6402    # initialize the leading whitespace stack to negative levels
6403    # so that we can never run off the end of the stack
6404    $gnu_position_predictor = 0;    # where the current token is predicted to be
6405    $max_gnu_stack_index    = 0;
6406    $max_gnu_item_index     = -1;
6407    $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
6408    @gnu_item_list                   = ();
6409    $last_output_indentation         = 0;
6410    $last_indentation_written        = 0;
6411    $last_unadjusted_indentation     = 0;
6412    $last_leading_token              = "";
6413    $last_output_short_opening_token = 0;
6414
6415    $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
6416    $saw_END_or_DATA_         = 0;
6417
6418    @block_type_to_go            = ();
6419    @type_sequence_to_go         = ();
6420    @container_environment_to_go = ();
6421    @bond_strength_to_go         = ();
6422    @forced_breakpoint_to_go     = ();
6423    @summed_lengths_to_go        = ();    # line length to start of ith token
6424    @token_lengths_to_go         = ();
6425    @levels_to_go                = ();
6426    @matching_token_to_go        = ();
6427    @mate_index_to_go            = ();
6428    @nesting_blocks_to_go        = ();
6429    @ci_levels_to_go             = ();
6430    @nesting_depth_to_go         = (0);
6431    @nobreak_to_go               = ();
6432    @old_breakpoint_to_go        = ();
6433    @tokens_to_go                = ();
6434    @types_to_go                 = ();
6435    @leading_spaces_to_go        = ();
6436    @reduced_spaces_to_go        = ();
6437    @inext_to_go                 = ();
6438    @iprev_to_go                 = ();
6439
6440    @whitespace_level_stack = ();
6441    $whitespace_last_level  = -1;
6442
6443    @dont_align         = ();
6444    @has_broken_sublist = ();
6445    @want_comma_break   = ();
6446
6447    @ci_stack                   = ("");
6448    $first_tabbing_disagreement = 0;
6449    $last_tabbing_disagreement  = 0;
6450    $tabbing_disagreement_count = 0;
6451    $in_tabbing_disagreement    = 0;
6452    $input_line_tabbing         = undef;
6453
6454    $last_line_type               = "";
6455    $last_last_line_leading_level = 0;
6456    $last_line_leading_level      = 0;
6457    $last_line_leading_type       = '#';
6458
6459    $last_nonblank_token        = ';';
6460    $last_nonblank_type         = ';';
6461    $last_last_nonblank_token   = ';';
6462    $last_last_nonblank_type    = ';';
6463    $last_nonblank_block_type   = "";
6464    $last_output_level          = 0;
6465    $looking_for_else           = 0;
6466    $embedded_tab_count         = 0;
6467    $first_embedded_tab_at      = 0;
6468    $last_embedded_tab_at       = 0;
6469    $deleted_semicolon_count    = 0;
6470    $first_deleted_semicolon_at = 0;
6471    $last_deleted_semicolon_at  = 0;
6472    $added_semicolon_count      = 0;
6473    $first_added_semicolon_at   = 0;
6474    $last_added_semicolon_at    = 0;
6475    $last_line_had_side_comment = 0;
6476    $is_static_block_comment    = 0;
6477    %postponed_breakpoint       = ();
6478
6479    # variables for adding side comments
6480    %block_leading_text        = ();
6481    %block_opening_line_number = ();
6482    $csc_new_statement_ok      = 1;
6483    %csc_block_label           = ();
6484
6485    %saved_opening_indentation  = ();
6486    $in_format_skipping_section = 0;
6487
6488    reset_block_text_accumulator();
6489
6490    prepare_for_new_input_lines();
6491
6492    $vertical_aligner_object =
6493      Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
6494        $logger_object, $diagnostics_object );
6495
6496    if ( $rOpts->{'entab-leading-whitespace'} ) {
6497        write_logfile_entry(
6498"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
6499        );
6500    }
6501    elsif ( $rOpts->{'tabs'} ) {
6502        write_logfile_entry("Indentation will be with a tab character\n");
6503    }
6504    else {
6505        write_logfile_entry(
6506            "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
6507    }
6508
6509    # This was the start of a formatter referent, but object-oriented
6510    # coding has turned out to be too slow here.
6511    $formatter_self = {};
6512
6513    bless $formatter_self, $class;
6514
6515    # Safety check..this is not a class yet
6516    if ( _increment_count() > 1 ) {
6517        confess
6518"Attempt to create more than 1 object in $class, which is not a true class yet\n";
6519    }
6520    return $formatter_self;
6521}
6522
6523sub prepare_for_new_input_lines {
6524
6525    $gnu_sequence_number++;    # increment output batch counter
6526    %last_gnu_equals                = ();
6527    %gnu_comma_count                = ();
6528    %gnu_arrow_count                = ();
6529    $line_start_index_to_go         = 0;
6530    $max_gnu_item_index             = UNDEFINED_INDEX;
6531    $index_max_forced_break         = UNDEFINED_INDEX;
6532    $max_index_to_go                = UNDEFINED_INDEX;
6533    $last_nonblank_index_to_go      = UNDEFINED_INDEX;
6534    $last_nonblank_type_to_go       = '';
6535    $last_nonblank_token_to_go      = '';
6536    $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
6537    $last_last_nonblank_type_to_go  = '';
6538    $last_last_nonblank_token_to_go = '';
6539    $forced_breakpoint_count        = 0;
6540    $forced_breakpoint_undo_count   = 0;
6541    $rbrace_follower                = undef;
6542    $summed_lengths_to_go[0]        = 0;
6543    $old_line_count_in_batch        = 1;
6544    $comma_count_in_batch           = 0;
6545    $starting_in_quote              = 0;
6546
6547    destroy_one_line_block();
6548}
6549
6550sub write_line {
6551
6552    my $self = shift;
6553    my ($line_of_tokens) = @_;
6554
6555    my $line_type  = $line_of_tokens->{_line_type};
6556    my $input_line = $line_of_tokens->{_line_text};
6557
6558    if ( $rOpts->{notidy} ) {
6559        write_unindented_line($input_line);
6560        $last_line_type = $line_type;
6561        return;
6562    }
6563
6564    # _line_type codes are:
6565    #   SYSTEM         - system-specific code before hash-bang line
6566    #   CODE           - line of perl code (including comments)
6567    #   POD_START      - line starting pod, such as '=head'
6568    #   POD            - pod documentation text
6569    #   POD_END        - last line of pod section, '=cut'
6570    #   HERE           - text of here-document
6571    #   HERE_END       - last line of here-doc (target word)
6572    #   FORMAT         - format section
6573    #   FORMAT_END     - last line of format section, '.'
6574    #   DATA_START     - __DATA__ line
6575    #   DATA           - unidentified text following __DATA__
6576    #   END_START      - __END__ line
6577    #   END            - unidentified text following __END__
6578    #   ERROR          - we are in big trouble, probably not a perl script
6579
6580    # put a blank line after an =cut which comes before __END__ and __DATA__
6581    # (required by podchecker)
6582    if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
6583        $file_writer_object->reset_consecutive_blank_lines();
6584        if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
6585    }
6586
6587    # handle line of code..
6588    if ( $line_type eq 'CODE' ) {
6589
6590        # let logger see all non-blank lines of code
6591        if ( $input_line !~ /^\s*$/ ) {
6592            my $output_line_number =
6593              $vertical_aligner_object->get_output_line_number();
6594            black_box( $line_of_tokens, $output_line_number );
6595        }
6596        print_line_of_tokens($line_of_tokens);
6597    }
6598
6599    # handle line of non-code..
6600    else {
6601
6602        # set special flags
6603        my $skip_line = 0;
6604        my $tee_line  = 0;
6605        if ( $line_type =~ /^POD/ ) {
6606
6607            # Pod docs should have a preceding blank line.  But stay
6608            # out of __END__ and __DATA__ sections, because
6609            # the user may be using this section for any purpose whatsoever
6610            if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
6611            if ( $rOpts->{'tee-pod'} )    { $tee_line  = 1; }
6612            if (  !$skip_line
6613                && $line_type eq 'POD_START'
6614                && !$saw_END_or_DATA_ )
6615            {
6616                want_blank_line();
6617            }
6618        }
6619
6620        # leave the blank counters in a predictable state
6621        # after __END__ or __DATA__
6622        elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6623            $file_writer_object->reset_consecutive_blank_lines();
6624            $saw_END_or_DATA_ = 1;
6625        }
6626
6627        # write unindented non-code line
6628        if ( !$skip_line ) {
6629            if ($tee_line) { $file_writer_object->tee_on() }
6630            write_unindented_line($input_line);
6631            if ($tee_line) { $file_writer_object->tee_off() }
6632        }
6633    }
6634    $last_line_type = $line_type;
6635}
6636
6637sub create_one_line_block {
6638    $index_start_one_line_block            = $_[0];
6639    $semicolons_before_block_self_destruct = $_[1];
6640}
6641
6642sub destroy_one_line_block {
6643    $index_start_one_line_block            = UNDEFINED_INDEX;
6644    $semicolons_before_block_self_destruct = 0;
6645}
6646
6647sub leading_spaces_to_go {
6648
6649    # return the number of indentation spaces for a token in the output stream;
6650    # these were previously stored by 'set_leading_whitespace'.
6651
6652    my $ii = shift;
6653    if ( $ii < 0 ) { $ii = 0 }
6654    return get_SPACES( $leading_spaces_to_go[$ii] );
6655
6656}
6657
6658sub get_SPACES {
6659
6660    # return the number of leading spaces associated with an indentation
6661    # variable $indentation is either a constant number of spaces or an object
6662    # with a get_SPACES method.
6663    my $indentation = shift;
6664    return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6665}
6666
6667sub get_RECOVERABLE_SPACES {
6668
6669    # return the number of spaces (+ means shift right, - means shift left)
6670    # that we would like to shift a group of lines with the same indentation
6671    # to get them to line up with their opening parens
6672    my $indentation = shift;
6673    return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6674}
6675
6676sub get_AVAILABLE_SPACES_to_go {
6677
6678    my $item = $leading_spaces_to_go[ $_[0] ];
6679
6680    # return the number of available leading spaces associated with an
6681    # indentation variable.  $indentation is either a constant number of
6682    # spaces or an object with a get_AVAILABLE_SPACES method.
6683    return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6684}
6685
6686sub new_lp_indentation_item {
6687
6688    # this is an interface to the IndentationItem class
6689    my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6690
6691    # A negative level implies not to store the item in the item_list
6692    my $index = 0;
6693    if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6694
6695    my $item = Perl::Tidy::IndentationItem->new(
6696        $spaces,      $level,
6697        $ci_level,    $available_spaces,
6698        $index,       $gnu_sequence_number,
6699        $align_paren, $max_gnu_stack_index,
6700        $line_start_index_to_go,
6701    );
6702
6703    if ( $level >= 0 ) {
6704        $gnu_item_list[$max_gnu_item_index] = $item;
6705    }
6706
6707    return $item;
6708}
6709
6710sub set_leading_whitespace {
6711
6712    # This routine defines leading whitespace
6713    # given: the level and continuation_level of a token,
6714    # define: space count of leading string which would apply if it
6715    # were the first token of a new line.
6716
6717    my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
6718
6719    # Adjust levels if necessary to recycle whitespace:
6720    # given $level_abs, the absolute level
6721    # define $level, a possibly reduced level for whitespace
6722    my $level = $level_abs;
6723    if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
6724        if ( $level_abs < $whitespace_last_level ) {
6725            pop(@whitespace_level_stack);
6726        }
6727        if ( !@whitespace_level_stack ) {
6728            push @whitespace_level_stack, $level_abs;
6729        }
6730        elsif ( $level_abs > $whitespace_last_level ) {
6731            $level = $whitespace_level_stack[-1] +
6732              ( $level_abs - $whitespace_last_level );
6733
6734            if (
6735                # 1 Try to break at a block brace
6736                (
6737                       $level > $rOpts_whitespace_cycle
6738                    && $last_nonblank_type eq '{'
6739                    && $last_nonblank_token eq '{'
6740                )
6741
6742                # 2 Then either a brace or bracket
6743                || (   $level > $rOpts_whitespace_cycle + 1
6744                    && $last_nonblank_token =~ /^[\{\[]$/ )
6745
6746                # 3 Then a paren too
6747                || $level > $rOpts_whitespace_cycle + 2
6748              )
6749            {
6750                $level = 1;
6751            }
6752            push @whitespace_level_stack, $level;
6753        }
6754        $level = $whitespace_level_stack[-1];
6755    }
6756    $whitespace_last_level = $level_abs;
6757
6758    # modify for -bli, which adds one continuation indentation for
6759    # opening braces
6760    if (   $rOpts_brace_left_and_indent
6761        && $max_index_to_go == 0
6762        && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6763    {
6764        $ci_level++;
6765    }
6766
6767    # patch to avoid trouble when input file has negative indentation.
6768    # other logic should catch this error.
6769    if ( $level < 0 ) { $level = 0 }
6770
6771    #-------------------------------------------
6772    # handle the standard indentation scheme
6773    #-------------------------------------------
6774    unless ($rOpts_line_up_parentheses) {
6775        my $space_count =
6776          $ci_level * $rOpts_continuation_indentation +
6777          $level * $rOpts_indent_columns;
6778        my $ci_spaces =
6779          ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6780
6781        if ($in_continued_quote) {
6782            $space_count = 0;
6783            $ci_spaces   = 0;
6784        }
6785        $leading_spaces_to_go[$max_index_to_go] = $space_count;
6786        $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6787        return;
6788    }
6789
6790    #-------------------------------------------------------------
6791    # handle case of -lp indentation..
6792    #-------------------------------------------------------------
6793
6794    # The continued_quote flag means that this is the first token of a
6795    # line, and it is the continuation of some kind of multi-line quote
6796    # or pattern.  It requires special treatment because it must have no
6797    # added leading whitespace. So we create a special indentation item
6798    # which is not in the stack.
6799    if ($in_continued_quote) {
6800        my $space_count     = 0;
6801        my $available_space = 0;
6802        $level = -1;    # flag to prevent storing in item_list
6803        $leading_spaces_to_go[$max_index_to_go] =
6804          $reduced_spaces_to_go[$max_index_to_go] =
6805          new_lp_indentation_item( $space_count, $level, $ci_level,
6806            $available_space, 0 );
6807        return;
6808    }
6809
6810    # get the top state from the stack
6811    my $space_count      = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6812    my $current_level    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6813    my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6814
6815    my $type        = $types_to_go[$max_index_to_go];
6816    my $token       = $tokens_to_go[$max_index_to_go];
6817    my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6818
6819    if ( $type eq '{' || $type eq '(' ) {
6820
6821        $gnu_comma_count{ $total_depth + 1 } = 0;
6822        $gnu_arrow_count{ $total_depth + 1 } = 0;
6823
6824        # If we come to an opening token after an '=' token of some type,
6825        # see if it would be helpful to 'break' after the '=' to save space
6826        my $last_equals = $last_gnu_equals{$total_depth};
6827        if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6828
6829            # find the position if we break at the '='
6830            my $i_test = $last_equals;
6831            if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6832
6833            # TESTING
6834            ##my $too_close = ($i_test==$max_index_to_go-1);
6835
6836            my $test_position = total_line_length( $i_test, $max_index_to_go );
6837            my $mll = maximum_line_length($i_test);
6838
6839            if (
6840
6841                # the equals is not just before an open paren (testing)
6842                ##!$too_close &&
6843
6844                # if we are beyond the midpoint
6845                $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
6846
6847                # or we are beyont the 1/4 point and there was an old
6848                # break at the equals
6849                || (
6850                    $gnu_position_predictor >
6851                    $mll - $rOpts_maximum_line_length * 3 / 4
6852                    && (
6853                        $old_breakpoint_to_go[$last_equals]
6854                        || (   $last_equals > 0
6855                            && $old_breakpoint_to_go[ $last_equals - 1 ] )
6856                        || (   $last_equals > 1
6857                            && $types_to_go[ $last_equals - 1 ] eq 'b'
6858                            && $old_breakpoint_to_go[ $last_equals - 2 ] )
6859                    )
6860                )
6861              )
6862            {
6863
6864                # then make the switch -- note that we do not set a real
6865                # breakpoint here because we may not really need one; sub
6866                # scan_list will do that if necessary
6867                $line_start_index_to_go = $i_test + 1;
6868                $gnu_position_predictor = $test_position;
6869            }
6870        }
6871    }
6872
6873    my $halfway =
6874      maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
6875
6876    # Check for decreasing depth ..
6877    # Note that one token may have both decreasing and then increasing
6878    # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
6879    # in this example we would first go back to (1,0) then up to (2,0)
6880    # in a single call.
6881    if ( $level < $current_level || $ci_level < $current_ci_level ) {
6882
6883        # loop to find the first entry at or completely below this level
6884        my ( $lev, $ci_lev );
6885        while (1) {
6886            if ($max_gnu_stack_index) {
6887
6888                # save index of token which closes this level
6889                $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6890
6891                # Undo any extra indentation if we saw no commas
6892                my $available_spaces =
6893                  $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6894
6895                my $comma_count = 0;
6896                my $arrow_count = 0;
6897                if ( $type eq '}' || $type eq ')' ) {
6898                    $comma_count = $gnu_comma_count{$total_depth};
6899                    $arrow_count = $gnu_arrow_count{$total_depth};
6900                    $comma_count = 0 unless $comma_count;
6901                    $arrow_count = 0 unless $arrow_count;
6902                }
6903                $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6904                $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6905
6906                if ( $available_spaces > 0 ) {
6907
6908                    if ( $comma_count <= 0 || $arrow_count > 0 ) {
6909
6910                        my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6911                        my $seqno =
6912                          $gnu_stack[$max_gnu_stack_index]
6913                          ->get_SEQUENCE_NUMBER();
6914
6915                        # Be sure this item was created in this batch.  This
6916                        # should be true because we delete any available
6917                        # space from open items at the end of each batch.
6918                        if (   $gnu_sequence_number != $seqno
6919                            || $i > $max_gnu_item_index )
6920                        {
6921                            warning(
6922"Program bug with -lp.  seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6923                            );
6924                            report_definite_bug();
6925                        }
6926
6927                        else {
6928                            if ( $arrow_count == 0 ) {
6929                                $gnu_item_list[$i]
6930                                  ->permanently_decrease_AVAILABLE_SPACES(
6931                                    $available_spaces);
6932                            }
6933                            else {
6934                                $gnu_item_list[$i]
6935                                  ->tentatively_decrease_AVAILABLE_SPACES(
6936                                    $available_spaces);
6937                            }
6938
6939                            my $j;
6940                            for (
6941                                $j = $i + 1 ;
6942                                $j <= $max_gnu_item_index ;
6943                                $j++
6944                              )
6945                            {
6946                                $gnu_item_list[$j]
6947                                  ->decrease_SPACES($available_spaces);
6948                            }
6949                        }
6950                    }
6951                }
6952
6953                # go down one level
6954                --$max_gnu_stack_index;
6955                $lev    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6956                $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6957
6958                # stop when we reach a level at or below the current level
6959                if ( $lev <= $level && $ci_lev <= $ci_level ) {
6960                    $space_count =
6961                      $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6962                    $current_level    = $lev;
6963                    $current_ci_level = $ci_lev;
6964                    last;
6965                }
6966            }
6967
6968            # reached bottom of stack .. should never happen because
6969            # only negative levels can get here, and $level was forced
6970            # to be positive above.
6971            else {
6972                warning(
6973"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
6974                );
6975                report_definite_bug();
6976                last;
6977            }
6978        }
6979    }
6980
6981    # handle increasing depth
6982    if ( $level > $current_level || $ci_level > $current_ci_level ) {
6983
6984        # Compute the standard incremental whitespace.  This will be
6985        # the minimum incremental whitespace that will be used.  This
6986        # choice results in a smooth transition between the gnu-style
6987        # and the standard style.
6988        my $standard_increment =
6989          ( $level - $current_level ) * $rOpts_indent_columns +
6990          ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
6991
6992        # Now we have to define how much extra incremental space
6993        # ("$available_space") we want.  This extra space will be
6994        # reduced as necessary when long lines are encountered or when
6995        # it becomes clear that we do not have a good list.
6996        my $available_space = 0;
6997        my $align_paren     = 0;
6998        my $excess          = 0;
6999
7000        # initialization on empty stack..
7001        if ( $max_gnu_stack_index == 0 ) {
7002            $space_count = $level * $rOpts_indent_columns;
7003        }
7004
7005        # if this is a BLOCK, add the standard increment
7006        elsif ($last_nonblank_block_type) {
7007            $space_count += $standard_increment;
7008        }
7009
7010        # if last nonblank token was not structural indentation,
7011        # just use standard increment
7012        elsif ( $last_nonblank_type ne '{' ) {
7013            $space_count += $standard_increment;
7014        }
7015
7016        # otherwise use the space to the first non-blank level change token
7017        else {
7018
7019            $space_count = $gnu_position_predictor;
7020
7021            my $min_gnu_indentation =
7022              $gnu_stack[$max_gnu_stack_index]->get_SPACES();
7023
7024            $available_space = $space_count - $min_gnu_indentation;
7025            if ( $available_space >= $standard_increment ) {
7026                $min_gnu_indentation += $standard_increment;
7027            }
7028            elsif ( $available_space > 1 ) {
7029                $min_gnu_indentation += $available_space + 1;
7030            }
7031            elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
7032                if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
7033                    $min_gnu_indentation += 2;
7034                }
7035                else {
7036                    $min_gnu_indentation += 1;
7037                }
7038            }
7039            else {
7040                $min_gnu_indentation += $standard_increment;
7041            }
7042            $available_space = $space_count - $min_gnu_indentation;
7043
7044            if ( $available_space < 0 ) {
7045                $space_count     = $min_gnu_indentation;
7046                $available_space = 0;
7047            }
7048            $align_paren = 1;
7049        }
7050
7051        # update state, but not on a blank token
7052        if ( $types_to_go[$max_index_to_go] ne 'b' ) {
7053
7054            $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
7055
7056            ++$max_gnu_stack_index;
7057            $gnu_stack[$max_gnu_stack_index] =
7058              new_lp_indentation_item( $space_count, $level, $ci_level,
7059                $available_space, $align_paren );
7060
7061            # If the opening paren is beyond the half-line length, then
7062            # we will use the minimum (standard) indentation.  This will
7063            # help avoid problems associated with running out of space
7064            # near the end of a line.  As a result, in deeply nested
7065            # lists, there will be some indentations which are limited
7066            # to this minimum standard indentation. But the most deeply
7067            # nested container will still probably be able to shift its
7068            # parameters to the right for proper alignment, so in most
7069            # cases this will not be noticeable.
7070            if ( $available_space > 0 && $space_count > $halfway ) {
7071                $gnu_stack[$max_gnu_stack_index]
7072                  ->tentatively_decrease_AVAILABLE_SPACES($available_space);
7073            }
7074        }
7075    }
7076
7077    # Count commas and look for non-list characters.  Once we see a
7078    # non-list character, we give up and don't look for any more commas.
7079    if ( $type eq '=>' ) {
7080        $gnu_arrow_count{$total_depth}++;
7081
7082        # tentatively treating '=>' like '=' for estimating breaks
7083        # TODO: this could use some experimentation
7084        $last_gnu_equals{$total_depth} = $max_index_to_go;
7085    }
7086
7087    elsif ( $type eq ',' ) {
7088        $gnu_comma_count{$total_depth}++;
7089    }
7090
7091    elsif ( $is_assignment{$type} ) {
7092        $last_gnu_equals{$total_depth} = $max_index_to_go;
7093    }
7094
7095    # this token might start a new line
7096    # if this is a non-blank..
7097    if ( $type ne 'b' ) {
7098
7099        # and if ..
7100        if (
7101
7102            # this is the first nonblank token of the line
7103            $max_index_to_go == 1 && $types_to_go[0] eq 'b'
7104
7105            # or previous character was one of these:
7106            || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
7107
7108            # or previous character was opening and this does not close it
7109            || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
7110            || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
7111
7112            # or this token is one of these:
7113            || $type =~ /^([\.]|\|\||\&\&)$/
7114
7115            # or this is a closing structure
7116            || (   $last_nonblank_type_to_go eq '}'
7117                && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
7118
7119            # or previous token was keyword 'return'
7120            || ( $last_nonblank_type_to_go eq 'k'
7121                && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
7122
7123            # or starting a new line at certain keywords is fine
7124            || (   $type eq 'k'
7125                && $is_if_unless_and_or_last_next_redo_return{$token} )
7126
7127            # or this is after an assignment after a closing structure
7128            || (
7129                $is_assignment{$last_nonblank_type_to_go}
7130                && (
7131                    $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
7132
7133                    # and it is significantly to the right
7134                    || $gnu_position_predictor > $halfway
7135                )
7136            )
7137          )
7138        {
7139            check_for_long_gnu_style_lines();
7140            $line_start_index_to_go = $max_index_to_go;
7141
7142            # back up 1 token if we want to break before that type
7143            # otherwise, we may strand tokens like '?' or ':' on a line
7144            if ( $line_start_index_to_go > 0 ) {
7145                if ( $last_nonblank_type_to_go eq 'k' ) {
7146
7147                    if ( $want_break_before{$last_nonblank_token_to_go} ) {
7148                        $line_start_index_to_go--;
7149                    }
7150                }
7151                elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
7152                    $line_start_index_to_go--;
7153                }
7154            }
7155        }
7156    }
7157
7158    # remember the predicted position of this token on the output line
7159    if ( $max_index_to_go > $line_start_index_to_go ) {
7160        $gnu_position_predictor =
7161          total_line_length( $line_start_index_to_go, $max_index_to_go );
7162    }
7163    else {
7164        $gnu_position_predictor =
7165          $space_count + $token_lengths_to_go[$max_index_to_go];
7166    }
7167
7168    # store the indentation object for this token
7169    # this allows us to manipulate the leading whitespace
7170    # (in case we have to reduce indentation to fit a line) without
7171    # having to change any token values
7172    $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
7173    $reduced_spaces_to_go[$max_index_to_go] =
7174      ( $max_gnu_stack_index > 0 && $ci_level )
7175      ? $gnu_stack[ $max_gnu_stack_index - 1 ]
7176      : $gnu_stack[$max_gnu_stack_index];
7177    return;
7178}
7179
7180sub check_for_long_gnu_style_lines {
7181
7182    # look at the current estimated maximum line length, and
7183    # remove some whitespace if it exceeds the desired maximum
7184
7185    # this is only for the '-lp' style
7186    return unless ($rOpts_line_up_parentheses);
7187
7188    # nothing can be done if no stack items defined for this line
7189    return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7190
7191    # see if we have exceeded the maximum desired line length
7192    # keep 2 extra free because they are needed in some cases
7193    # (result of trial-and-error testing)
7194    my $spaces_needed =
7195      $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
7196
7197    return if ( $spaces_needed <= 0 );
7198
7199    # We are over the limit, so try to remove a requested number of
7200    # spaces from leading whitespace.  We are only allowed to remove
7201    # from whitespace items created on this batch, since others have
7202    # already been used and cannot be undone.
7203    my @candidates = ();
7204    my $i;
7205
7206    # loop over all whitespace items created for the current batch
7207    for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7208        my $item = $gnu_item_list[$i];
7209
7210        # item must still be open to be a candidate (otherwise it
7211        # cannot influence the current token)
7212        next if ( $item->get_CLOSED() >= 0 );
7213
7214        my $available_spaces = $item->get_AVAILABLE_SPACES();
7215
7216        if ( $available_spaces > 0 ) {
7217            push( @candidates, [ $i, $available_spaces ] );
7218        }
7219    }
7220
7221    return unless (@candidates);
7222
7223    # sort by available whitespace so that we can remove whitespace
7224    # from the maximum available first
7225    @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
7226
7227    # keep removing whitespace until we are done or have no more
7228    my $candidate;
7229    foreach $candidate (@candidates) {
7230        my ( $i, $available_spaces ) = @{$candidate};
7231        my $deleted_spaces =
7232          ( $available_spaces > $spaces_needed )
7233          ? $spaces_needed
7234          : $available_spaces;
7235
7236        # remove the incremental space from this item
7237        $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
7238
7239        my $i_debug = $i;
7240
7241        # update the leading whitespace of this item and all items
7242        # that came after it
7243        for ( ; $i <= $max_gnu_item_index ; $i++ ) {
7244
7245            my $old_spaces = $gnu_item_list[$i]->get_SPACES();
7246            if ( $old_spaces >= $deleted_spaces ) {
7247                $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
7248            }
7249
7250            # shouldn't happen except for code bug:
7251            else {
7252                my $level        = $gnu_item_list[$i_debug]->get_LEVEL();
7253                my $ci_level     = $gnu_item_list[$i_debug]->get_CI_LEVEL();
7254                my $old_level    = $gnu_item_list[$i]->get_LEVEL();
7255                my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
7256                warning(
7257"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level\n"
7258                );
7259                report_definite_bug();
7260            }
7261        }
7262        $gnu_position_predictor -= $deleted_spaces;
7263        $spaces_needed          -= $deleted_spaces;
7264        last unless ( $spaces_needed > 0 );
7265    }
7266}
7267
7268sub finish_lp_batch {
7269
7270    # This routine is called once after each each output stream batch is
7271    # finished to undo indentation for all incomplete -lp
7272    # indentation levels.  It is too risky to leave a level open,
7273    # because then we can't backtrack in case of a long line to follow.
7274    # This means that comments and blank lines will disrupt this
7275    # indentation style.  But the vertical aligner may be able to
7276    # get the space back if there are side comments.
7277
7278    # this is only for the 'lp' style
7279    return unless ($rOpts_line_up_parentheses);
7280
7281    # nothing can be done if no stack items defined for this line
7282    return if ( $max_gnu_item_index == UNDEFINED_INDEX );
7283
7284    # loop over all whitespace items created for the current batch
7285    my $i;
7286    for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
7287        my $item = $gnu_item_list[$i];
7288
7289        # only look for open items
7290        next if ( $item->get_CLOSED() >= 0 );
7291
7292        # Tentatively remove all of the available space
7293        # (The vertical aligner will try to get it back later)
7294        my $available_spaces = $item->get_AVAILABLE_SPACES();
7295        if ( $available_spaces > 0 ) {
7296
7297            # delete incremental space for this item
7298            $gnu_item_list[$i]
7299              ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
7300
7301            # Reduce the total indentation space of any nodes that follow
7302            # Note that any such nodes must necessarily be dependents
7303            # of this node.
7304            foreach ( $i + 1 .. $max_gnu_item_index ) {
7305                $gnu_item_list[$_]->decrease_SPACES($available_spaces);
7306            }
7307        }
7308    }
7309    return;
7310}
7311
7312sub reduce_lp_indentation {
7313
7314    # reduce the leading whitespace at token $i if possible by $spaces_needed
7315    # (a large value of $spaces_needed will remove all excess space)
7316    # NOTE: to be called from scan_list only for a sequence of tokens
7317    # contained between opening and closing parens/braces/brackets
7318
7319    my ( $i, $spaces_wanted ) = @_;
7320    my $deleted_spaces = 0;
7321
7322    my $item             = $leading_spaces_to_go[$i];
7323    my $available_spaces = $item->get_AVAILABLE_SPACES();
7324
7325    if (
7326        $available_spaces > 0
7327        && ( ( $spaces_wanted <= $available_spaces )
7328            || !$item->get_HAVE_CHILD() )
7329      )
7330    {
7331
7332        # we'll remove these spaces, but mark them as recoverable
7333        $deleted_spaces =
7334          $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
7335    }
7336
7337    return $deleted_spaces;
7338}
7339
7340sub token_sequence_length {
7341
7342    # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
7343    # returns 0 if $ibeg > $iend (shouldn't happen)
7344    my ( $ibeg, $iend ) = @_;
7345    return 0 if ( $iend < 0 || $ibeg > $iend );
7346    return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
7347    return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
7348}
7349
7350sub total_line_length {
7351
7352    # return length of a line of tokens ($ibeg .. $iend)
7353    my ( $ibeg, $iend ) = @_;
7354    return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
7355}
7356
7357sub maximum_line_length_for_level {
7358
7359    # return maximum line length for line starting with a given level
7360    my $maximum_line_length = $rOpts_maximum_line_length;
7361
7362    # Modify if -vmll option is selected
7363    if ($rOpts_variable_maximum_line_length) {
7364        my $level = shift;
7365        if ( $level < 0 ) { $level = 0 }
7366        $maximum_line_length += $level * $rOpts_indent_columns;
7367    }
7368    return $maximum_line_length;
7369}
7370
7371sub maximum_line_length {
7372
7373    # return maximum line length for line starting with the token at given index
7374    return maximum_line_length_for_level( $levels_to_go[ $_[0] ] );
7375
7376}
7377
7378sub excess_line_length {
7379
7380    # return number of characters by which a line of tokens ($ibeg..$iend)
7381    # exceeds the allowable line length.
7382    my ( $ibeg, $iend ) = @_;
7383    return total_line_length( $ibeg, $iend ) - maximum_line_length($ibeg);
7384}
7385
7386sub finish_formatting {
7387
7388    # flush buffer and write any informative messages
7389    my $self = shift;
7390
7391    flush();
7392    $file_writer_object->decrement_output_line_number()
7393      ;    # fix up line number since it was incremented
7394    we_are_at_the_last_line();
7395    if ( $added_semicolon_count > 0 ) {
7396        my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
7397        my $what =
7398          ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
7399        write_logfile_entry("$added_semicolon_count $what added:\n");
7400        write_logfile_entry(
7401            "  $first at input line $first_added_semicolon_at\n");
7402
7403        if ( $added_semicolon_count > 1 ) {
7404            write_logfile_entry(
7405                "   Last at input line $last_added_semicolon_at\n");
7406        }
7407        write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
7408        write_logfile_entry("\n");
7409    }
7410
7411    if ( $deleted_semicolon_count > 0 ) {
7412        my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
7413        my $what =
7414          ( $deleted_semicolon_count > 1 )
7415          ? "semicolons were"
7416          : "semicolon was";
7417        write_logfile_entry(
7418            "$deleted_semicolon_count unnecessary $what deleted:\n");
7419        write_logfile_entry(
7420            "  $first at input line $first_deleted_semicolon_at\n");
7421
7422        if ( $deleted_semicolon_count > 1 ) {
7423            write_logfile_entry(
7424                "   Last at input line $last_deleted_semicolon_at\n");
7425        }
7426        write_logfile_entry("  (Use -ndsc to prevent semicolon deletion)\n");
7427        write_logfile_entry("\n");
7428    }
7429
7430    if ( $embedded_tab_count > 0 ) {
7431        my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
7432        my $what =
7433          ( $embedded_tab_count > 1 )
7434          ? "quotes or patterns"
7435          : "quote or pattern";
7436        write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
7437        write_logfile_entry(
7438"This means the display of this script could vary with device or software\n"
7439        );
7440        write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
7441
7442        if ( $embedded_tab_count > 1 ) {
7443            write_logfile_entry(
7444                "   Last at input line $last_embedded_tab_at\n");
7445        }
7446        write_logfile_entry("\n");
7447    }
7448
7449    if ($first_tabbing_disagreement) {
7450        write_logfile_entry(
7451"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
7452        );
7453    }
7454
7455    if ($in_tabbing_disagreement) {
7456        write_logfile_entry(
7457"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
7458        );
7459    }
7460    else {
7461
7462        if ($last_tabbing_disagreement) {
7463
7464            write_logfile_entry(
7465"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
7466            );
7467        }
7468        else {
7469            write_logfile_entry("No indentation disagreement seen\n");
7470        }
7471    }
7472    if ($first_tabbing_disagreement) {
7473        write_logfile_entry(
7474"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
7475        );
7476    }
7477    write_logfile_entry("\n");
7478
7479    $vertical_aligner_object->report_anything_unusual();
7480
7481    $file_writer_object->report_line_length_errors();
7482}
7483
7484sub check_options {
7485
7486    # This routine is called to check the Opts hash after it is defined
7487
7488    ($rOpts) = @_;
7489
7490    make_static_block_comment_pattern();
7491    make_static_side_comment_pattern();
7492    make_closing_side_comment_prefix();
7493    make_closing_side_comment_list_pattern();
7494    $format_skipping_pattern_begin =
7495      make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
7496    $format_skipping_pattern_end =
7497      make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
7498
7499    # If closing side comments ARE selected, then we can safely
7500    # delete old closing side comments unless closing side comment
7501    # warnings are requested.  This is a good idea because it will
7502    # eliminate any old csc's which fall below the line count threshold.
7503    # We cannot do this if warnings are turned on, though, because we
7504    # might delete some text which has been added.  So that must
7505    # be handled when comments are created.
7506    if ( $rOpts->{'closing-side-comments'} ) {
7507        if ( !$rOpts->{'closing-side-comment-warnings'} ) {
7508            $rOpts->{'delete-closing-side-comments'} = 1;
7509        }
7510    }
7511
7512    # If closing side comments ARE NOT selected, but warnings ARE
7513    # selected and we ARE DELETING csc's, then we will pretend to be
7514    # adding with a huge interval.  This will force the comments to be
7515    # generated for comparison with the old comments, but not added.
7516    elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
7517        if ( $rOpts->{'delete-closing-side-comments'} ) {
7518            $rOpts->{'delete-closing-side-comments'}  = 0;
7519            $rOpts->{'closing-side-comments'}         = 1;
7520            $rOpts->{'closing-side-comment-interval'} = 100000000;
7521        }
7522    }
7523
7524    make_bli_pattern();
7525    make_block_brace_vertical_tightness_pattern();
7526
7527    if ( $rOpts->{'line-up-parentheses'} ) {
7528
7529        if (   $rOpts->{'indent-only'}
7530            || !$rOpts->{'add-newlines'}
7531            || !$rOpts->{'delete-old-newlines'} )
7532        {
7533            Perl::Tidy::Warn <<EOM;
7534-----------------------------------------------------------------------
7535Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
7536
7537The -lp indentation logic requires that perltidy be able to coordinate
7538arbitrarily large numbers of line breakpoints.  This isn't possible
7539with these flags. Sometimes an acceptable workaround is to use -wocb=3
7540-----------------------------------------------------------------------
7541EOM
7542            $rOpts->{'line-up-parentheses'} = 0;
7543        }
7544    }
7545
7546    # At present, tabs are not compatible with the line-up-parentheses style
7547    # (it would be possible to entab the total leading whitespace
7548    # just prior to writing the line, if desired).
7549    if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
7550        Perl::Tidy::Warn <<EOM;
7551Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
7552EOM
7553        $rOpts->{'tabs'} = 0;
7554    }
7555
7556    # Likewise, tabs are not compatible with outdenting..
7557    if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
7558        Perl::Tidy::Warn <<EOM;
7559Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
7560EOM
7561        $rOpts->{'tabs'} = 0;
7562    }
7563
7564    if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
7565        Perl::Tidy::Warn <<EOM;
7566Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
7567EOM
7568        $rOpts->{'tabs'} = 0;
7569    }
7570
7571    if ( !$rOpts->{'space-for-semicolon'} ) {
7572        $want_left_space{'f'} = -1;
7573    }
7574
7575    if ( $rOpts->{'space-terminal-semicolon'} ) {
7576        $want_left_space{';'} = 1;
7577    }
7578
7579    # implement outdenting preferences for keywords
7580    %outdent_keyword = ();
7581    unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
7582        @_ = qw(next last redo goto return);    # defaults
7583    }
7584
7585    # FUTURE: if not a keyword, assume that it is an identifier
7586    foreach (@_) {
7587        if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
7588            $outdent_keyword{$_} = 1;
7589        }
7590        else {
7591            Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword";
7592        }
7593    }
7594
7595    # implement user whitespace preferences
7596    if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
7597        @want_left_space{@_} = (1) x scalar(@_);
7598    }
7599
7600    if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
7601        @want_right_space{@_} = (1) x scalar(@_);
7602    }
7603
7604    if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
7605        @want_left_space{@_} = (-1) x scalar(@_);
7606    }
7607
7608    if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
7609        @want_right_space{@_} = (-1) x scalar(@_);
7610    }
7611    if ( $rOpts->{'dump-want-left-space'} ) {
7612        dump_want_left_space(*STDOUT);
7613        Perl::Tidy::Exit 0;
7614    }
7615
7616    if ( $rOpts->{'dump-want-right-space'} ) {
7617        dump_want_right_space(*STDOUT);
7618        Perl::Tidy::Exit 0;
7619    }
7620
7621    # default keywords for which space is introduced before an opening paren
7622    # (at present, including them messes up vertical alignment)
7623    @_ = qw(my local our and or err eq ne if else elsif until
7624      unless while for foreach return switch case given when);
7625    @space_after_keyword{@_} = (1) x scalar(@_);
7626
7627    # first remove any or all of these if desired
7628    if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
7629
7630        # -nsak='*' selects all the above keywords
7631        if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) }
7632        @space_after_keyword{@_} = (0) x scalar(@_);
7633    }
7634
7635    # then allow user to add to these defaults
7636    if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
7637        @space_after_keyword{@_} = (1) x scalar(@_);
7638    }
7639
7640    # implement user break preferences
7641    my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
7642      = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
7643      . : ? && || and or err xor
7644    );
7645
7646    my $break_after = sub {
7647        foreach my $tok (@_) {
7648            if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
7649            my $lbs = $left_bond_strength{$tok};
7650            my $rbs = $right_bond_strength{$tok};
7651            if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
7652                ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7653                  ( $lbs, $rbs );
7654            }
7655        }
7656    };
7657
7658    my $break_before = sub {
7659        foreach my $tok (@_) {
7660            my $lbs = $left_bond_strength{$tok};
7661            my $rbs = $right_bond_strength{$tok};
7662            if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7663                ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7664                  ( $lbs, $rbs );
7665            }
7666        }
7667    };
7668
7669    $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
7670    $break_before->(@all_operators)
7671      if ( $rOpts->{'break-before-all-operators'} );
7672
7673    $break_after->( split_words( $rOpts->{'want-break-after'} ) );
7674    $break_before->( split_words( $rOpts->{'want-break-before'} ) );
7675
7676    # make note if breaks are before certain key types
7677    %want_break_before = ();
7678    foreach my $tok ( @all_operators, ',' ) {
7679        $want_break_before{$tok} =
7680          $left_bond_strength{$tok} < $right_bond_strength{$tok};
7681    }
7682
7683    # Coordinate ?/: breaks, which must be similar
7684    if ( !$want_break_before{':'} ) {
7685        $want_break_before{'?'}   = $want_break_before{':'};
7686        $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7687        $left_bond_strength{'?'}  = NO_BREAK;
7688    }
7689
7690    # Define here tokens which may follow the closing brace of a do statement
7691    # on the same line, as in:
7692    #   } while ( $something);
7693    @_ = qw(until while unless if ; : );
7694    push @_, ',';
7695    @is_do_follower{@_} = (1) x scalar(@_);
7696
7697    # These tokens may follow the closing brace of an if or elsif block.
7698    # In other words, for cuddled else we want code to look like:
7699    #   } elsif ( $something) {
7700    #   } else {
7701    if ( $rOpts->{'cuddled-else'} ) {
7702        @_ = qw(else elsif);
7703        @is_if_brace_follower{@_} = (1) x scalar(@_);
7704    }
7705    else {
7706        %is_if_brace_follower = ();
7707    }
7708
7709    # nothing can follow the closing curly of an else { } block:
7710    %is_else_brace_follower = ();
7711
7712    # what can follow a multi-line anonymous sub definition closing curly:
7713    @_ = qw# ; : => or and  && || ~~ !~~ ) #;
7714    push @_, ',';
7715    @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7716
7717    # what can follow a one-line anonynomous sub closing curly:
7718    # one-line anonumous subs also have ']' here...
7719    # see tk3.t and PP.pm
7720    @_ = qw#  ; : => or and  && || ) ] ~~ !~~ #;
7721    push @_, ',';
7722    @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7723
7724    # What can follow a closing curly of a block
7725    # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7726    # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7727    @_ = qw#  ; : => or and  && || ) #;
7728    push @_, ',';
7729
7730    # allow cuddled continue if cuddled else is specified
7731    if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7732
7733    @is_other_brace_follower{@_} = (1) x scalar(@_);
7734
7735    $right_bond_strength{'{'} = WEAK;
7736    $left_bond_strength{'{'}  = VERY_STRONG;
7737
7738    # make -l=0  equal to -l=infinite
7739    if ( !$rOpts->{'maximum-line-length'} ) {
7740        $rOpts->{'maximum-line-length'} = 1000000;
7741    }
7742
7743    # make -lbl=0  equal to -lbl=infinite
7744    if ( !$rOpts->{'long-block-line-count'} ) {
7745        $rOpts->{'long-block-line-count'} = 1000000;
7746    }
7747
7748    my $ole = $rOpts->{'output-line-ending'};
7749    if ($ole) {
7750        my %endings = (
7751            dos  => "\015\012",
7752            win  => "\015\012",
7753            mac  => "\015",
7754            unix => "\012",
7755        );
7756        $ole = lc $ole;
7757        unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7758            my $str = join " ", keys %endings;
7759            Perl::Tidy::Die <<EOM;
7760Unrecognized line ending '$ole'; expecting one of: $str
7761EOM
7762        }
7763        if ( $rOpts->{'preserve-line-endings'} ) {
7764            Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
7765            $rOpts->{'preserve-line-endings'} = undef;
7766        }
7767    }
7768
7769    # hashes used to simplify setting whitespace
7770    %tightness = (
7771        '{' => $rOpts->{'brace-tightness'},
7772        '}' => $rOpts->{'brace-tightness'},
7773        '(' => $rOpts->{'paren-tightness'},
7774        ')' => $rOpts->{'paren-tightness'},
7775        '[' => $rOpts->{'square-bracket-tightness'},
7776        ']' => $rOpts->{'square-bracket-tightness'},
7777    );
7778    %matching_token = (
7779        '{' => '}',
7780        '(' => ')',
7781        '[' => ']',
7782        '?' => ':',
7783    );
7784
7785    # frequently used parameters
7786    $rOpts_add_newlines          = $rOpts->{'add-newlines'};
7787    $rOpts_add_whitespace        = $rOpts->{'add-whitespace'};
7788    $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7789    $rOpts_block_brace_vertical_tightness =
7790      $rOpts->{'block-brace-vertical-tightness'};
7791    $rOpts_brace_left_and_indent   = $rOpts->{'brace-left-and-indent'};
7792    $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7793    $rOpts_break_at_old_ternary_breakpoints =
7794      $rOpts->{'break-at-old-ternary-breakpoints'};
7795    $rOpts_break_at_old_attribute_breakpoints =
7796      $rOpts->{'break-at-old-attribute-breakpoints'};
7797    $rOpts_break_at_old_comma_breakpoints =
7798      $rOpts->{'break-at-old-comma-breakpoints'};
7799    $rOpts_break_at_old_keyword_breakpoints =
7800      $rOpts->{'break-at-old-keyword-breakpoints'};
7801    $rOpts_break_at_old_logical_breakpoints =
7802      $rOpts->{'break-at-old-logical-breakpoints'};
7803    $rOpts_closing_side_comment_else_flag =
7804      $rOpts->{'closing-side-comment-else-flag'};
7805    $rOpts_closing_side_comment_maximum_text =
7806      $rOpts->{'closing-side-comment-maximum-text'};
7807    $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7808    $rOpts_cuddled_else             = $rOpts->{'cuddled-else'};
7809    $rOpts_delete_old_whitespace    = $rOpts->{'delete-old-whitespace'};
7810    $rOpts_fuzzy_line_length        = $rOpts->{'fuzzy-line-length'};
7811    $rOpts_indent_columns           = $rOpts->{'indent-columns'};
7812    $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
7813    $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7814    $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
7815    $rOpts_whitespace_cycle         = $rOpts->{'whitespace-cycle'};
7816
7817    $rOpts_variable_maximum_line_length =
7818      $rOpts->{'variable-maximum-line-length'};
7819    $rOpts_short_concatenation_item_length =
7820      $rOpts->{'short-concatenation-item-length'};
7821
7822    $rOpts_keep_old_blank_lines     = $rOpts->{'keep-old-blank-lines'};
7823    $rOpts_ignore_old_breakpoints   = $rOpts->{'ignore-old-breakpoints'};
7824    $rOpts_format_skipping          = $rOpts->{'format-skipping'};
7825    $rOpts_space_function_paren     = $rOpts->{'space-function-paren'};
7826    $rOpts_space_keyword_paren      = $rOpts->{'space-keyword-paren'};
7827    $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
7828    $rOpts_ignore_side_comment_lengths =
7829      $rOpts->{'ignore-side-comment-lengths'};
7830
7831    # Note that both opening and closing tokens can access the opening
7832    # and closing flags of their container types.
7833    %opening_vertical_tightness = (
7834        '(' => $rOpts->{'paren-vertical-tightness'},
7835        '{' => $rOpts->{'brace-vertical-tightness'},
7836        '[' => $rOpts->{'square-bracket-vertical-tightness'},
7837        ')' => $rOpts->{'paren-vertical-tightness'},
7838        '}' => $rOpts->{'brace-vertical-tightness'},
7839        ']' => $rOpts->{'square-bracket-vertical-tightness'},
7840    );
7841
7842    %closing_vertical_tightness = (
7843        '(' => $rOpts->{'paren-vertical-tightness-closing'},
7844        '{' => $rOpts->{'brace-vertical-tightness-closing'},
7845        '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7846        ')' => $rOpts->{'paren-vertical-tightness-closing'},
7847        '}' => $rOpts->{'brace-vertical-tightness-closing'},
7848        ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7849    );
7850
7851    $rOpts_tight_secret_operators = $rOpts->{'tight-secret-operators'};
7852
7853    # assume flag for '>' same as ')' for closing qw quotes
7854    %closing_token_indentation = (
7855        ')' => $rOpts->{'closing-paren-indentation'},
7856        '}' => $rOpts->{'closing-brace-indentation'},
7857        ']' => $rOpts->{'closing-square-bracket-indentation'},
7858        '>' => $rOpts->{'closing-paren-indentation'},
7859    );
7860
7861    # flag indicating if any closing tokens are indented
7862    $some_closing_token_indentation =
7863         $rOpts->{'closing-paren-indentation'}
7864      || $rOpts->{'closing-brace-indentation'}
7865      || $rOpts->{'closing-square-bracket-indentation'}
7866      || $rOpts->{'indent-closing-brace'};
7867
7868    %opening_token_right = (
7869        '(' => $rOpts->{'opening-paren-right'},
7870        '{' => $rOpts->{'opening-hash-brace-right'},
7871        '[' => $rOpts->{'opening-square-bracket-right'},
7872    );
7873
7874    %stack_opening_token = (
7875        '(' => $rOpts->{'stack-opening-paren'},
7876        '{' => $rOpts->{'stack-opening-hash-brace'},
7877        '[' => $rOpts->{'stack-opening-square-bracket'},
7878    );
7879
7880    %stack_closing_token = (
7881        ')' => $rOpts->{'stack-closing-paren'},
7882        '}' => $rOpts->{'stack-closing-hash-brace'},
7883        ']' => $rOpts->{'stack-closing-square-bracket'},
7884    );
7885    $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
7886}
7887
7888sub make_static_block_comment_pattern {
7889
7890    # create the pattern used to identify static block comments
7891    $static_block_comment_pattern = '^\s*##';
7892
7893    # allow the user to change it
7894    if ( $rOpts->{'static-block-comment-prefix'} ) {
7895        my $prefix = $rOpts->{'static-block-comment-prefix'};
7896        $prefix =~ s/^\s*//;
7897        my $pattern = $prefix;
7898
7899        # user may give leading caret to force matching left comments only
7900        if ( $prefix !~ /^\^#/ ) {
7901            if ( $prefix !~ /^#/ ) {
7902                Perl::Tidy::Die
7903"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7904            }
7905            $pattern = '^\s*' . $prefix;
7906        }
7907        eval "'##'=~/$pattern/";
7908        if ($@) {
7909            Perl::Tidy::Die
7910"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7911        }
7912        $static_block_comment_pattern = $pattern;
7913    }
7914}
7915
7916sub make_format_skipping_pattern {
7917    my ( $opt_name, $default ) = @_;
7918    my $param = $rOpts->{$opt_name};
7919    unless ($param) { $param = $default }
7920    $param =~ s/^\s*//;
7921    if ( $param !~ /^#/ ) {
7922        Perl::Tidy::Die
7923          "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7924    }
7925    my $pattern = '^' . $param . '\s';
7926    eval "'#'=~/$pattern/";
7927    if ($@) {
7928        Perl::Tidy::Die
7929"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7930    }
7931    return $pattern;
7932}
7933
7934sub make_closing_side_comment_list_pattern {
7935
7936    # turn any input list into a regex for recognizing selected block types
7937    $closing_side_comment_list_pattern = '^\w+';
7938    if ( defined( $rOpts->{'closing-side-comment-list'} )
7939        && $rOpts->{'closing-side-comment-list'} )
7940    {
7941        $closing_side_comment_list_pattern =
7942          make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
7943    }
7944}
7945
7946sub make_bli_pattern {
7947
7948    if ( defined( $rOpts->{'brace-left-and-indent-list'} )
7949        && $rOpts->{'brace-left-and-indent-list'} )
7950    {
7951        $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
7952    }
7953
7954    $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
7955}
7956
7957sub make_block_brace_vertical_tightness_pattern {
7958
7959    # turn any input list into a regex for recognizing selected block types
7960    $block_brace_vertical_tightness_pattern =
7961      '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7962    if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
7963        && $rOpts->{'block-brace-vertical-tightness-list'} )
7964    {
7965        $block_brace_vertical_tightness_pattern =
7966          make_block_pattern( '-bbvtl',
7967            $rOpts->{'block-brace-vertical-tightness-list'} );
7968    }
7969}
7970
7971sub make_block_pattern {
7972
7973    #  given a string of block-type keywords, return a regex to match them
7974    #  The only tricky part is that labels are indicated with a single ':'
7975    #  and the 'sub' token text may have additional text after it (name of
7976    #  sub).
7977    #
7978    #  Example:
7979    #
7980    #   input string: "if else elsif unless while for foreach do : sub";
7981    #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7982
7983    my ( $abbrev, $string ) = @_;
7984    my @list  = split_words($string);
7985    my @words = ();
7986    my %seen;
7987    for my $i (@list) {
7988        if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
7989        next if $seen{$i};
7990        $seen{$i} = 1;
7991        if ( $i eq 'sub' ) {
7992        }
7993        elsif ( $i eq ';' ) {
7994            push @words, ';';
7995        }
7996        elsif ( $i eq '{' ) {
7997            push @words, '\{';
7998        }
7999        elsif ( $i eq ':' ) {
8000            push @words, '\w+:';
8001        }
8002        elsif ( $i =~ /^\w/ ) {
8003            push @words, $i;
8004        }
8005        else {
8006            Perl::Tidy::Warn
8007              "unrecognized block type $i after $abbrev, ignoring\n";
8008        }
8009    }
8010    my $pattern = '(' . join( '|', @words ) . ')$';
8011    if ( $seen{'sub'} ) {
8012        $pattern = '(' . $pattern . '|sub)';
8013    }
8014    $pattern = '^' . $pattern;
8015    return $pattern;
8016}
8017
8018sub make_static_side_comment_pattern {
8019
8020    # create the pattern used to identify static side comments
8021    $static_side_comment_pattern = '^##';
8022
8023    # allow the user to change it
8024    if ( $rOpts->{'static-side-comment-prefix'} ) {
8025        my $prefix = $rOpts->{'static-side-comment-prefix'};
8026        $prefix =~ s/^\s*//;
8027        my $pattern = '^' . $prefix;
8028        eval "'##'=~/$pattern/";
8029        if ($@) {
8030            Perl::Tidy::Die
8031"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
8032        }
8033        $static_side_comment_pattern = $pattern;
8034    }
8035}
8036
8037sub make_closing_side_comment_prefix {
8038
8039    # Be sure we have a valid closing side comment prefix
8040    my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
8041    my $csc_prefix_pattern;
8042    if ( !defined($csc_prefix) ) {
8043        $csc_prefix         = '## end';
8044        $csc_prefix_pattern = '^##\s+end';
8045    }
8046    else {
8047        my $test_csc_prefix = $csc_prefix;
8048        if ( $test_csc_prefix !~ /^#/ ) {
8049            $test_csc_prefix = '#' . $test_csc_prefix;
8050        }
8051
8052        # make a regex to recognize the prefix
8053        my $test_csc_prefix_pattern = $test_csc_prefix;
8054
8055        # escape any special characters
8056        $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
8057
8058        $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
8059
8060        # allow exact number of intermediate spaces to vary
8061        $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
8062
8063        # make sure we have a good pattern
8064        # if we fail this we probably have an error in escaping
8065        # characters.
8066        eval "'##'=~/$test_csc_prefix_pattern/";
8067        if ($@) {
8068
8069            # shouldn't happen..must have screwed up escaping, above
8070            report_definite_bug();
8071            Perl::Tidy::Warn
8072"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
8073
8074            # just warn and keep going with defaults
8075            Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n";
8076            Perl::Tidy::Warn
8077              "Using default -cscp instead; please check output\n";
8078        }
8079        else {
8080            $csc_prefix         = $test_csc_prefix;
8081            $csc_prefix_pattern = $test_csc_prefix_pattern;
8082        }
8083    }
8084    $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
8085    $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
8086}
8087
8088sub dump_want_left_space {
8089    my $fh = shift;
8090    local $" = "\n";
8091    print $fh <<EOM;
8092These values are the main control of whitespace to the left of a token type;
8093They may be altered with the -wls parameter.
8094For a list of token types, use perltidy --dump-token-types (-dtt)
8095 1 means the token wants a space to its left
8096-1 means the token does not want a space to its left
8097------------------------------------------------------------------------
8098EOM
8099    foreach ( sort keys %want_left_space ) {
8100        print $fh "$_\t$want_left_space{$_}\n";
8101    }
8102}
8103
8104sub dump_want_right_space {
8105    my $fh = shift;
8106    local $" = "\n";
8107    print $fh <<EOM;
8108These values are the main control of whitespace to the right of a token type;
8109They may be altered with the -wrs parameter.
8110For a list of token types, use perltidy --dump-token-types (-dtt)
8111 1 means the token wants a space to its right
8112-1 means the token does not want a space to its right
8113------------------------------------------------------------------------
8114EOM
8115    foreach ( sort keys %want_right_space ) {
8116        print $fh "$_\t$want_right_space{$_}\n";
8117    }
8118}
8119
8120{    # begin is_essential_whitespace
8121
8122    my %is_sort_grep_map;
8123    my %is_for_foreach;
8124
8125    BEGIN {
8126
8127        @_ = qw(sort grep map);
8128        @is_sort_grep_map{@_} = (1) x scalar(@_);
8129
8130        @_ = qw(for foreach);
8131        @is_for_foreach{@_} = (1) x scalar(@_);
8132
8133    }
8134
8135    sub is_essential_whitespace {
8136
8137        # Essential whitespace means whitespace which cannot be safely deleted
8138        # without risking the introduction of a syntax error.
8139        # We are given three tokens and their types:
8140        # ($tokenl, $typel) is the token to the left of the space in question
8141        # ($tokenr, $typer) is the token to the right of the space in question
8142        # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
8143        #
8144        # This is a slow routine but is not needed too often except when -mangle
8145        # is used.
8146        #
8147        # Note: This routine should almost never need to be changed.  It is
8148        # for avoiding syntax problems rather than for formatting.
8149        my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
8150
8151        my $result =
8152
8153          # never combine two bare words or numbers
8154          # examples:  and ::ok(1)
8155          #            return ::spw(...)
8156          #            for bla::bla:: abc
8157          # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
8158          #            $input eq"quit" to make $inputeq"quit"
8159          #            my $size=-s::SINK if $file;  <==OK but we won't do it
8160          # don't join something like: for bla::bla:: abc
8161          # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
8162          (      ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
8163              && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
8164
8165          # do not combine a number with a concatination dot
8166          # example: pom.caputo:
8167          # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
8168          || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
8169          || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
8170
8171          # do not join a minus with a bare word, because you might form
8172          # a file test operator.  Example from Complex.pm:
8173          # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
8174          || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
8175
8176          # and something like this could become ambiguous without space
8177          # after the '-':
8178          #   use constant III=>1;
8179          #   $a = $b - III;
8180          # and even this:
8181          #   $a = - III;
8182          || ( ( $tokenl eq '-' )
8183            && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
8184
8185          # '= -' should not become =- or you will get a warning
8186          # about reversed -=
8187          # || ($tokenr eq '-')
8188
8189          # keep a space between a quote and a bareword to prevent the
8190          # bareword from becoming a quote modifier.
8191          || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8192
8193          # keep a space between a token ending in '$' and any word;
8194          # this caused trouble:  "die @$ if $@"
8195          || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
8196            && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8197
8198          # perl is very fussy about spaces before <<
8199          || ( $tokenr =~ /^\<\</ )
8200
8201          # avoid combining tokens to create new meanings. Example:
8202          #     $a+ +$b must not become $a++$b
8203          || ( $is_digraph{ $tokenl . $tokenr } )
8204          || ( $is_trigraph{ $tokenl . $tokenr } )
8205
8206          # another example: do not combine these two &'s:
8207          #     allow_options & &OPT_EXECCGI
8208          || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
8209
8210          # don't combine $$ or $# with any alphanumeric
8211          # (testfile mangle.t with --mangle)
8212          || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
8213
8214          # retain any space after possible filehandle
8215          # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
8216          || ( $typel eq 'Z' )
8217
8218          # Perl is sensitive to whitespace after the + here:
8219          #  $b = xvals $a + 0.1 * yvals $a;
8220          || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
8221
8222          # keep paren separate in 'use Foo::Bar ()'
8223          || ( $tokenr eq '('
8224            && $typel eq 'w'
8225            && $typell eq 'k'
8226            && $tokenll eq 'use' )
8227
8228          # keep any space between filehandle and paren:
8229          # file mangle.t with --mangle:
8230          || ( $typel eq 'Y' && $tokenr eq '(' )
8231
8232          # retain any space after here doc operator ( hereerr.t)
8233          || ( $typel eq 'h' )
8234
8235          # be careful with a space around ++ and --, to avoid ambiguity as to
8236          # which token it applies
8237          || ( ( $typer =~ /^(pp|mm)$/ )     && ( $tokenl !~ /^[\;\{\(\[]/ ) )
8238          || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
8239
8240          # need space after foreach my; for example, this will fail in
8241          # older versions of Perl:
8242          # foreach my$ft(@filetypes)...
8243          || (
8244            $tokenl eq 'my'
8245
8246            #  /^(for|foreach)$/
8247            && $is_for_foreach{$tokenll}
8248            && $tokenr =~ /^\$/
8249          )
8250
8251          # must have space between grep and left paren; "grep(" will fail
8252          || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
8253
8254          # don't stick numbers next to left parens, as in:
8255          #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
8256          || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
8257
8258          # We must be sure that a space between a ? and a quoted string
8259          # remains if the space before the ? remains.  [Loca.pm, lockarea]
8260          # ie,
8261          #    $b=join $comma ? ',' : ':', @_;  # ok
8262          #    $b=join $comma?',' : ':', @_;    # ok!
8263          #    $b=join $comma ?',' : ':', @_;   # error!
8264          # Not really required:
8265          ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
8266
8267          # do not remove space between an '&' and a bare word because
8268          # it may turn into a function evaluation, like here
8269          # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
8270          #    $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
8271          || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
8272
8273          # space stacked labels  (TODO: check if really necessary)
8274          || ( $typel eq 'J' && $typer eq 'J' )
8275
8276          ;    # the value of this long logic sequence is the result we want
8277        return $result;
8278    }
8279}
8280
8281{
8282    my %secret_operators;
8283    my %is_leading_secret_token;
8284
8285    BEGIN {
8286
8287        # token lists for perl secret operators as compiled by Philippe Bruhat
8288        # at: https://metacpan.org/module/perlsecret
8289        %secret_operators = (
8290            'Goatse'            => [qw#= ( ) =#],        #=( )=
8291            'Venus1'            => [qw#0 +#],            # 0+
8292            'Venus2'            => [qw#+ 0#],            # +0
8293            'Enterprise'        => [qw#) x ! !#],        # ()x!!
8294            'Kite1'             => [qw#~ ~ <>#],         # ~~<>
8295            'Kite2'             => [qw#~~ <>#],          # ~~<>
8296            'Winking Fat Comma' => [ ( ',', '=>' ) ],    # ,=>
8297        );
8298
8299        # The following operators and constants are not included because they
8300        # are normally kept tight by perltidy:
8301        # !!  ~~ <~>
8302        #
8303
8304        # Make a lookup table indexed by the first token of each operator:
8305        # first token => [list, list, ...]
8306        foreach my $value ( values(%secret_operators) ) {
8307            my $tok = $value->[0];
8308            push @{ $is_leading_secret_token{$tok} }, $value;
8309        }
8310    }
8311
8312    sub secret_operator_whitespace {
8313
8314        my ( $jmax, $rtokens, $rtoken_type, $rwhite_space_flag ) = @_;
8315
8316        # Loop over all tokens in this line
8317        my ( $j, $token, $type );
8318        for ( $j = 0 ; $j <= $jmax ; $j++ ) {
8319
8320            $token = $$rtokens[$j];
8321            $type  = $$rtoken_type[$j];
8322
8323            # Skip unless this token might start a secret operator
8324            next if ( $type eq 'b' );
8325            next unless ( $is_leading_secret_token{$token} );
8326
8327            #      Loop over all secret operators with this leading token
8328            foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
8329                my $jend = $j - 1;
8330                foreach my $tok ( @{$rpattern} ) {
8331                    $jend++;
8332                    $jend++
8333
8334                      if ( $jend <= $jmax && $$rtoken_type[$jend] eq 'b' );
8335                    if ( $jend > $jmax || $tok ne $$rtokens[$jend] ) {
8336                        $jend = undef;
8337                        last;
8338                    }
8339                }
8340
8341                if ($jend) {
8342
8343                    # set flags to prevent spaces within this operator
8344                    for ( my $jj = $j + 1 ; $jj <= $jend ; $jj++ ) {
8345                        $rwhite_space_flag->[$jj] = WS_NO;
8346                    }
8347                    $j = $jend;
8348                    last;
8349                }
8350            }    ##      End Loop over all operators
8351        }    ## End loop over all tokens
8352    }    # End sub
8353}
8354
8355sub set_white_space_flag {
8356
8357    #    This routine examines each pair of nonblank tokens and
8358    #    sets values for array @white_space_flag.
8359    #
8360    #    $white_space_flag[$j] is a flag indicating whether a white space
8361    #    BEFORE token $j is needed, with the following values:
8362    #
8363    #             WS_NO      = -1 do not want a space before token $j
8364    #             WS_OPTIONAL=  0 optional space or $j is a whitespace
8365    #             WS_YES     =  1 want a space before token $j
8366    #
8367    #
8368    #   The values for the first token will be defined based
8369    #   upon the contents of the "to_go" output array.
8370    #
8371    #   Note: retain debug print statements because they are usually
8372    #   required after adding new token types.
8373
8374    BEGIN {
8375
8376        # initialize these global hashes, which control the use of
8377        # whitespace around tokens:
8378        #
8379        # %binary_ws_rules
8380        # %want_left_space
8381        # %want_right_space
8382        # %space_after_keyword
8383        #
8384        # Many token types are identical to the tokens themselves.
8385        # See the tokenizer for a complete list. Here are some special types:
8386        #   k = perl keyword
8387        #   f = semicolon in for statement
8388        #   m = unary minus
8389        #   p = unary plus
8390        # Note that :: is excluded since it should be contained in an identifier
8391        # Note that '->' is excluded because it never gets space
8392        # parentheses and brackets are excluded since they are handled specially
8393        # curly braces are included but may be overridden by logic, such as
8394        # newline logic.
8395
8396        # NEW_TOKENS: create a whitespace rule here.  This can be as
8397        # simple as adding your new letter to @spaces_both_sides, for
8398        # example.
8399
8400        @_ = qw" L { ( [ ";
8401        @is_opening_type{@_} = (1) x scalar(@_);
8402
8403        @_ = qw" R } ) ] ";
8404        @is_closing_type{@_} = (1) x scalar(@_);
8405
8406        my @spaces_both_sides = qw"
8407          + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
8408          .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
8409          &&= ||= //= <=> A k f w F n C Y U G v
8410          ";
8411
8412        my @spaces_left_side = qw"
8413          t ! ~ m p { \ h pp mm Z j
8414          ";
8415        push( @spaces_left_side, '#' );    # avoids warning message
8416
8417        my @spaces_right_side = qw"
8418          ; } ) ] R J ++ -- **=
8419          ";
8420        push( @spaces_right_side, ',' );    # avoids warning message
8421
8422        # Note that we are in a BEGIN block here.  Later in processing
8423        # the values of %want_left_space and  %want_right_space
8424        # may be overridden by any user settings specified by the
8425        # -wls and -wrs parameters.  However the binary_whitespace_rules
8426        # are hardwired and have priority.
8427        @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
8428        @want_right_space{@spaces_both_sides} =
8429          (1) x scalar(@spaces_both_sides);
8430        @want_left_space{@spaces_left_side}  = (1) x scalar(@spaces_left_side);
8431        @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
8432        @want_left_space{@spaces_right_side} =
8433          (-1) x scalar(@spaces_right_side);
8434        @want_right_space{@spaces_right_side} =
8435          (1) x scalar(@spaces_right_side);
8436        $want_left_space{'->'}      = WS_NO;
8437        $want_right_space{'->'}     = WS_NO;
8438        $want_left_space{'**'}      = WS_NO;
8439        $want_right_space{'**'}     = WS_NO;
8440        $want_right_space{'CORE::'} = WS_NO;
8441
8442        # These binary_ws_rules are hardwired and have priority over the above
8443        # settings.  It would be nice to allow adjustment by the user,
8444        # but it would be complicated to specify.
8445        #
8446        # hash type information must stay tightly bound
8447        # as in :  ${xxxx}
8448        $binary_ws_rules{'i'}{'L'} = WS_NO;
8449        $binary_ws_rules{'i'}{'{'} = WS_YES;
8450        $binary_ws_rules{'k'}{'{'} = WS_YES;
8451        $binary_ws_rules{'U'}{'{'} = WS_YES;
8452        $binary_ws_rules{'i'}{'['} = WS_NO;
8453        $binary_ws_rules{'R'}{'L'} = WS_NO;
8454        $binary_ws_rules{'R'}{'{'} = WS_NO;
8455        $binary_ws_rules{'t'}{'L'} = WS_NO;
8456        $binary_ws_rules{'t'}{'{'} = WS_NO;
8457        $binary_ws_rules{'}'}{'L'} = WS_NO;
8458        $binary_ws_rules{'}'}{'{'} = WS_NO;
8459        $binary_ws_rules{'$'}{'L'} = WS_NO;
8460        $binary_ws_rules{'$'}{'{'} = WS_NO;
8461        $binary_ws_rules{'@'}{'L'} = WS_NO;
8462        $binary_ws_rules{'@'}{'{'} = WS_NO;
8463        $binary_ws_rules{'='}{'L'} = WS_YES;
8464        $binary_ws_rules{'J'}{'J'} = WS_YES;
8465
8466        # the following includes ') {'
8467        # as in :    if ( xxx ) { yyy }
8468        $binary_ws_rules{']'}{'L'} = WS_NO;
8469        $binary_ws_rules{']'}{'{'} = WS_NO;
8470        $binary_ws_rules{')'}{'{'} = WS_YES;
8471        $binary_ws_rules{')'}{'['} = WS_NO;
8472        $binary_ws_rules{']'}{'['} = WS_NO;
8473        $binary_ws_rules{']'}{'{'} = WS_NO;
8474        $binary_ws_rules{'}'}{'['} = WS_NO;
8475        $binary_ws_rules{'R'}{'['} = WS_NO;
8476
8477        $binary_ws_rules{']'}{'++'} = WS_NO;
8478        $binary_ws_rules{']'}{'--'} = WS_NO;
8479        $binary_ws_rules{')'}{'++'} = WS_NO;
8480        $binary_ws_rules{')'}{'--'} = WS_NO;
8481
8482        $binary_ws_rules{'R'}{'++'} = WS_NO;
8483        $binary_ws_rules{'R'}{'--'} = WS_NO;
8484
8485        $binary_ws_rules{'i'}{'Q'} = WS_YES;
8486        $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
8487
8488        # FIXME: we could to split 'i' into variables and functions
8489        # and have no space for functions but space for variables.  For now,
8490        # I have a special patch in the special rules below
8491        $binary_ws_rules{'i'}{'('} = WS_NO;
8492
8493        $binary_ws_rules{'w'}{'('} = WS_NO;
8494        $binary_ws_rules{'w'}{'{'} = WS_YES;
8495    } ## end BEGIN block
8496
8497    my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
8498    my ( $last_token, $last_type, $last_block_type, $token, $type,
8499        $block_type );
8500    my (@white_space_flag);
8501    my $j_tight_closing_paren = -1;
8502
8503    if ( $max_index_to_go >= 0 ) {
8504        $token      = $tokens_to_go[$max_index_to_go];
8505        $type       = $types_to_go[$max_index_to_go];
8506        $block_type = $block_type_to_go[$max_index_to_go];
8507
8508        #---------------------------------------------------------------
8509        # Patch due to splitting of tokens with leading ->
8510        #---------------------------------------------------------------
8511        #
8512        # This routine is dealing with the raw tokens from the tokenizer,
8513        # but to get started it needs the previous token, which will
8514        # have been stored in the '_to_go' arrays.
8515        #
8516        # This patch avoids requiring two iterations to
8517        # converge for cases such as the following, where a paren
8518        # comes in on a line following a variable with leading arrow:
8519        #     $self->{main}->add_content_defer_opening
8520        #                         ($name, $wmkf, $self->{attrs}, $self);
8521        # In this case when we see the opening paren on line 2 we need
8522        # to know if the last token on the previous line had an arrow,
8523        # but it has already been split off so we have to add it back
8524        # in to avoid getting an unwanted space before the paren.
8525        if ( $type =~ /^[wi]$/ ) {
8526            my $im = $iprev_to_go[$max_index_to_go];
8527            my $tm = ( $im >= 0 ) ? $types_to_go[$im] : "";
8528            if ( $tm eq '->' ) { $token = $tm . $token }
8529        }
8530
8531        #---------------------------------------------------------------
8532        # End patch due to splitting of tokens with leading ->
8533        #---------------------------------------------------------------
8534    }
8535    else {
8536        $token      = ' ';
8537        $type       = 'b';
8538        $block_type = '';
8539    }
8540
8541    my ( $j, $ws );
8542
8543    # main loop over all tokens to define the whitespace flags
8544    for ( $j = 0 ; $j <= $jmax ; $j++ ) {
8545
8546        if ( $$rtoken_type[$j] eq 'b' ) {
8547            $white_space_flag[$j] = WS_OPTIONAL;
8548            next;
8549        }
8550
8551        # set a default value, to be changed as needed
8552        $ws              = undef;
8553        $last_token      = $token;
8554        $last_type       = $type;
8555        $last_block_type = $block_type;
8556        $token           = $$rtokens[$j];
8557        $type            = $$rtoken_type[$j];
8558        $block_type      = $$rblock_type[$j];
8559
8560        #---------------------------------------------------------------
8561        # Whitespace Rules Section 1:
8562        # Handle space on the inside of opening braces.
8563        #---------------------------------------------------------------
8564
8565        #    /^[L\{\(\[]$/
8566        if ( $is_opening_type{$last_type} ) {
8567
8568            $j_tight_closing_paren = -1;
8569
8570            # let's keep empty matched braces together: () {} []
8571            # except for BLOCKS
8572            if ( $token eq $matching_token{$last_token} ) {
8573                if ($block_type) {
8574                    $ws = WS_YES;
8575                }
8576                else {
8577                    $ws = WS_NO;
8578                }
8579            }
8580            else {
8581
8582                # we're considering the right of an opening brace
8583                # tightness = 0 means always pad inside with space
8584                # tightness = 1 means pad inside if "complex"
8585                # tightness = 2 means never pad inside with space
8586
8587                my $tightness;
8588                if (   $last_type eq '{'
8589                    && $last_token eq '{'
8590                    && $last_block_type )
8591                {
8592                    $tightness = $rOpts_block_brace_tightness;
8593                }
8594                else { $tightness = $tightness{$last_token} }
8595
8596               #=============================================================
8597               # Patch for test problem fabrice_bug.pl
8598               # We must always avoid spaces around a bare word beginning
8599               # with ^ as in:
8600               #    my $before = ${^PREMATCH};
8601               # Because all of the following cause an error in perl:
8602               #    my $before = ${ ^PREMATCH };
8603               #    my $before = ${ ^PREMATCH};
8604               #    my $before = ${^PREMATCH };
8605               # So if brace tightness flag is -bt=0 we must temporarily reset
8606               # to bt=1.  Note that here we must set tightness=1 and not 2 so
8607               # that the closing space
8608               # is also avoided (via the $j_tight_closing_paren flag in coding)
8609                if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
8610
8611                #=============================================================
8612
8613                if ( $tightness <= 0 ) {
8614                    $ws = WS_YES;
8615                }
8616                elsif ( $tightness > 1 ) {
8617                    $ws = WS_NO;
8618                }
8619                else {
8620
8621                    # Patch to count '-foo' as single token so that
8622                    # each of  $a{-foo} and $a{foo} and $a{'foo'} do
8623                    # not get spaces with default formatting.
8624                    my $j_here = $j;
8625                    ++$j_here
8626                      if ( $token eq '-'
8627                        && $last_token eq '{'
8628                        && $$rtoken_type[ $j + 1 ] eq 'w' );
8629
8630                    # $j_next is where a closing token should be if
8631                    # the container has a single token
8632                    my $j_next =
8633                      ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
8634                      ? $j_here + 2
8635                      : $j_here + 1;
8636                    my $tok_next  = $$rtokens[$j_next];
8637                    my $type_next = $$rtoken_type[$j_next];
8638
8639                    # for tightness = 1, if there is just one token
8640                    # within the matching pair, we will keep it tight
8641                    if (
8642                        $tok_next eq $matching_token{$last_token}
8643
8644                        # but watch out for this: [ [ ]    (misc.t)
8645                        && $last_token ne $token
8646                      )
8647                    {
8648
8649                        # remember where to put the space for the closing paren
8650                        $j_tight_closing_paren = $j_next;
8651                        $ws                    = WS_NO;
8652                    }
8653                    else {
8654                        $ws = WS_YES;
8655                    }
8656                }
8657            }
8658        }    # end setting space flag inside opening tokens
8659        my $ws_1 = $ws
8660          if FORMATTER_DEBUG_FLAG_WHITE;
8661
8662        #---------------------------------------------------------------
8663        # Whitespace Rules Section 2:
8664        # Handle space on inside of closing brace pairs.
8665        #---------------------------------------------------------------
8666
8667        #   /[\}\)\]R]/
8668        if ( $is_closing_type{$type} ) {
8669
8670            if ( $j == $j_tight_closing_paren ) {
8671
8672                $j_tight_closing_paren = -1;
8673                $ws                    = WS_NO;
8674            }
8675            else {
8676
8677                if ( !defined($ws) ) {
8678
8679                    my $tightness;
8680                    if ( $type eq '}' && $token eq '}' && $block_type ) {
8681                        $tightness = $rOpts_block_brace_tightness;
8682                    }
8683                    else { $tightness = $tightness{$token} }
8684
8685                    $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
8686                }
8687            }
8688        }    # end setting space flag inside closing tokens
8689
8690        my $ws_2 = $ws
8691          if FORMATTER_DEBUG_FLAG_WHITE;
8692
8693        #---------------------------------------------------------------
8694        # Whitespace Rules Section 3:
8695        # Use the binary rule table.
8696        #---------------------------------------------------------------
8697        if ( !defined($ws) ) {
8698            $ws = $binary_ws_rules{$last_type}{$type};
8699        }
8700        my $ws_3 = $ws
8701          if FORMATTER_DEBUG_FLAG_WHITE;
8702
8703        #---------------------------------------------------------------
8704        # Whitespace Rules Section 4:
8705        # Handle some special cases.
8706        #---------------------------------------------------------------
8707        if ( $token eq '(' ) {
8708
8709            # This will have to be tweaked as tokenization changes.
8710            # We usually want a space at '} (', for example:
8711            #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
8712            #
8713            # But not others:
8714            #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
8715            # At present, the above & block is marked as type L/R so this case
8716            # won't go through here.
8717            if ( $last_type eq '}' ) { $ws = WS_YES }
8718
8719            # NOTE: some older versions of Perl had occasional problems if
8720            # spaces are introduced between keywords or functions and opening
8721            # parens.  So the default is not to do this except is certain
8722            # cases.  The current Perl seems to tolerate spaces.
8723
8724            # Space between keyword and '('
8725            elsif ( $last_type eq 'k' ) {
8726                $ws = WS_NO
8727                  unless ( $rOpts_space_keyword_paren
8728                    || $space_after_keyword{$last_token} );
8729            }
8730
8731            # Space between function and '('
8732            # -----------------------------------------------------
8733            # 'w' and 'i' checks for something like:
8734            #   myfun(    &myfun(   ->myfun(
8735            # -----------------------------------------------------
8736            elsif (( $last_type =~ /^[wUG]$/ )
8737                || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
8738            {
8739                $ws = WS_NO unless ($rOpts_space_function_paren);
8740            }
8741
8742            # space between something like $i and ( in
8743            # for $i ( 0 .. 20 ) {
8744            # FIXME: eventually, type 'i' needs to be split into multiple
8745            # token types so this can be a hardwired rule.
8746            elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
8747                $ws = WS_YES;
8748            }
8749
8750            # allow constant function followed by '()' to retain no space
8751            elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
8752                $ws = WS_NO;
8753            }
8754        }
8755
8756        # patch for SWITCH/CASE: make space at ']{' optional
8757        # since the '{' might begin a case or when block
8758        elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
8759            $ws = WS_OPTIONAL;
8760        }
8761
8762        # keep space between 'sub' and '{' for anonymous sub definition
8763        if ( $type eq '{' ) {
8764            if ( $last_token eq 'sub' ) {
8765                $ws = WS_YES;
8766            }
8767
8768            # this is needed to avoid no space in '){'
8769            if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
8770
8771            # avoid any space before the brace or bracket in something like
8772            #  @opts{'a','b',...}
8773            if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
8774                $ws = WS_NO;
8775            }
8776        }
8777
8778        elsif ( $type eq 'i' ) {
8779
8780            # never a space before ->
8781            if ( $token =~ /^\-\>/ ) {
8782                $ws = WS_NO;
8783            }
8784        }
8785
8786        # retain any space between '-' and bare word
8787        elsif ( $type eq 'w' || $type eq 'C' ) {
8788            $ws = WS_OPTIONAL if $last_type eq '-';
8789
8790            # never a space before ->
8791            if ( $token =~ /^\-\>/ ) {
8792                $ws = WS_NO;
8793            }
8794        }
8795
8796        # retain any space between '-' and bare word
8797        # example: avoid space between 'USER' and '-' here:
8798        #   $myhash{USER-NAME}='steve';
8799        elsif ( $type eq 'm' || $type eq '-' ) {
8800            $ws = WS_OPTIONAL if ( $last_type eq 'w' );
8801        }
8802
8803        # always space before side comment
8804        elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
8805
8806        # always preserver whatever space was used after a possible
8807        # filehandle (except _) or here doc operator
8808        if (
8809            $type ne '#'
8810            && ( ( $last_type eq 'Z' && $last_token ne '_' )
8811                || $last_type eq 'h' )
8812          )
8813        {
8814            $ws = WS_OPTIONAL;
8815        }
8816
8817        my $ws_4 = $ws
8818          if FORMATTER_DEBUG_FLAG_WHITE;
8819
8820        #---------------------------------------------------------------
8821        # Whitespace Rules Section 5:
8822        # Apply default rules not covered above.
8823        #---------------------------------------------------------------
8824
8825        # If we fall through to here, look at the pre-defined hash tables for
8826        # the two tokens, and:
8827        #  if (they are equal) use the common value
8828        #  if (either is zero or undef) use the other
8829        #  if (either is -1) use it
8830        # That is,
8831        # left  vs right
8832        #  1    vs    1     -->  1
8833        #  0    vs    0     -->  0
8834        # -1    vs   -1     --> -1
8835        #
8836        #  0    vs   -1     --> -1
8837        #  0    vs    1     -->  1
8838        #  1    vs    0     -->  1
8839        # -1    vs    0     --> -1
8840        #
8841        # -1    vs    1     --> -1
8842        #  1    vs   -1     --> -1
8843        if ( !defined($ws) ) {
8844            my $wl = $want_left_space{$type};
8845            my $wr = $want_right_space{$last_type};
8846            if ( !defined($wl) ) { $wl = 0 }
8847            if ( !defined($wr) ) { $wr = 0 }
8848            $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8849        }
8850
8851        if ( !defined($ws) ) {
8852            $ws = 0;
8853            write_diagnostics(
8854                "WS flag is undefined for tokens $last_token $token\n");
8855        }
8856
8857        # Treat newline as a whitespace. Otherwise, we might combine
8858        # 'Send' and '-recipients' here according to the above rules:
8859        #    my $msg = new Fax::Send
8860        #      -recipients => $to,
8861        #      -data => $data;
8862        if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8863
8864        if (   ( $ws == 0 )
8865            && $j > 0
8866            && $j < $jmax
8867            && ( $last_type !~ /^[Zh]$/ ) )
8868        {
8869
8870            # If this happens, we have a non-fatal but undesirable
8871            # hole in the above rules which should be patched.
8872            write_diagnostics(
8873                "WS flag is zero for tokens $last_token $token\n");
8874        }
8875        $white_space_flag[$j] = $ws;
8876
8877        FORMATTER_DEBUG_FLAG_WHITE && do {
8878            my $str = substr( $last_token, 0, 15 );
8879            $str .= ' ' x ( 16 - length($str) );
8880            if ( !defined($ws_1) ) { $ws_1 = "*" }
8881            if ( !defined($ws_2) ) { $ws_2 = "*" }
8882            if ( !defined($ws_3) ) { $ws_3 = "*" }
8883            if ( !defined($ws_4) ) { $ws_4 = "*" }
8884            print STDOUT
8885"WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8886        };
8887    } ## end main loop
8888
8889    if ($rOpts_tight_secret_operators) {
8890        secret_operator_whitespace( $jmax, $rtokens, $rtoken_type,
8891            \@white_space_flag );
8892    }
8893
8894    return \@white_space_flag;
8895} ## end sub set_white_space_flag
8896
8897{    # begin print_line_of_tokens
8898
8899    my $rtoken_type;
8900    my $rtokens;
8901    my $rlevels;
8902    my $rslevels;
8903    my $rblock_type;
8904    my $rcontainer_type;
8905    my $rcontainer_environment;
8906    my $rtype_sequence;
8907    my $input_line;
8908    my $rnesting_tokens;
8909    my $rci_levels;
8910    my $rnesting_blocks;
8911
8912    my $in_quote;
8913    my $guessed_indentation_level;
8914
8915    # These local token variables are stored by store_token_to_go:
8916    my $block_type;
8917    my $ci_level;
8918    my $container_environment;
8919    my $container_type;
8920    my $in_continued_quote;
8921    my $level;
8922    my $nesting_blocks;
8923    my $no_internal_newlines;
8924    my $slevel;
8925    my $token;
8926    my $type;
8927    my $type_sequence;
8928
8929    # routine to pull the jth token from the line of tokens
8930    sub extract_token {
8931        my $j = shift;
8932        $token                 = $$rtokens[$j];
8933        $type                  = $$rtoken_type[$j];
8934        $block_type            = $$rblock_type[$j];
8935        $container_type        = $$rcontainer_type[$j];
8936        $container_environment = $$rcontainer_environment[$j];
8937        $type_sequence         = $$rtype_sequence[$j];
8938        $level                 = $$rlevels[$j];
8939        $slevel                = $$rslevels[$j];
8940        $nesting_blocks        = $$rnesting_blocks[$j];
8941        $ci_level              = $$rci_levels[$j];
8942    }
8943
8944    {
8945        my @saved_token;
8946
8947        sub save_current_token {
8948
8949            @saved_token = (
8950                $block_type,            $ci_level,
8951                $container_environment, $container_type,
8952                $in_continued_quote,    $level,
8953                $nesting_blocks,        $no_internal_newlines,
8954                $slevel,                $token,
8955                $type,                  $type_sequence,
8956            );
8957        }
8958
8959        sub restore_current_token {
8960            (
8961                $block_type,            $ci_level,
8962                $container_environment, $container_type,
8963                $in_continued_quote,    $level,
8964                $nesting_blocks,        $no_internal_newlines,
8965                $slevel,                $token,
8966                $type,                  $type_sequence,
8967            ) = @saved_token;
8968        }
8969    }
8970
8971    sub token_length {
8972
8973        # Returns the length of a token, given:
8974        #  $token=text of the token
8975        #  $type = type
8976        #  $not_first_token = should be TRUE if this is not the first token of
8977        #   the line.  It might the index of this token in an array.  It is
8978        #   used to test for a side comment vs a block commment.
8979        # Note: Eventually this should be the only routine determining the
8980        # length of a token in this package.
8981        my ( $token, $type, $not_first_token ) = @_;
8982        my $token_length = length($token);
8983
8984        # We mark lengths of side comments as just 1 if we are
8985        # ignoring their lengths when setting line breaks.
8986        $token_length = 1
8987          if ( $rOpts_ignore_side_comment_lengths
8988            && $not_first_token
8989            && $type eq '#' );
8990        return $token_length;
8991    }
8992
8993    sub rtoken_length {
8994
8995        # return length of ith token in @{$rtokens}
8996        my ($i) = @_;
8997        return token_length( $$rtokens[$i], $$rtoken_type[$i], $i );
8998    }
8999
9000    # Routine to place the current token into the output stream.
9001    # Called once per output token.
9002    sub store_token_to_go {
9003
9004        my $flag = $no_internal_newlines;
9005        if ( $_[0] ) { $flag = 1 }
9006
9007        $tokens_to_go[ ++$max_index_to_go ]            = $token;
9008        $types_to_go[$max_index_to_go]                 = $type;
9009        $nobreak_to_go[$max_index_to_go]               = $flag;
9010        $old_breakpoint_to_go[$max_index_to_go]        = 0;
9011        $forced_breakpoint_to_go[$max_index_to_go]     = 0;
9012        $block_type_to_go[$max_index_to_go]            = $block_type;
9013        $type_sequence_to_go[$max_index_to_go]         = $type_sequence;
9014        $container_environment_to_go[$max_index_to_go] = $container_environment;
9015        $nesting_blocks_to_go[$max_index_to_go]        = $nesting_blocks;
9016        $ci_levels_to_go[$max_index_to_go]             = $ci_level;
9017        $mate_index_to_go[$max_index_to_go]            = -1;
9018        $matching_token_to_go[$max_index_to_go]        = '';
9019        $bond_strength_to_go[$max_index_to_go]         = 0;
9020
9021        # Note: negative levels are currently retained as a diagnostic so that
9022        # the 'final indentation level' is correctly reported for bad scripts.
9023        # But this means that every use of $level as an index must be checked.
9024        # If this becomes too much of a problem, we might give up and just clip
9025        # them at zero.
9026        ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
9027        $levels_to_go[$max_index_to_go] = $level;
9028        $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
9029
9030        # link the non-blank tokens
9031        my $iprev = $max_index_to_go - 1;
9032        $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
9033        $iprev_to_go[$max_index_to_go] = $iprev;
9034        $inext_to_go[$iprev]           = $max_index_to_go
9035          if ( $iprev >= 0 && $type ne 'b' );
9036        $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
9037
9038        $token_lengths_to_go[$max_index_to_go] =
9039          token_length( $token, $type, $max_index_to_go );
9040
9041        # We keep a running sum of token lengths from the start of this batch:
9042        #   summed_lengths_to_go[$i]   = total length to just before token $i
9043        #   summed_lengths_to_go[$i+1] = total length to just after token $i
9044        $summed_lengths_to_go[ $max_index_to_go + 1 ] =
9045          $summed_lengths_to_go[$max_index_to_go] +
9046          $token_lengths_to_go[$max_index_to_go];
9047
9048        # Define the indentation that this token would have if it started
9049        # a new line.  We have to do this now because we need to know this
9050        # when considering one-line blocks.
9051        set_leading_whitespace( $level, $ci_level, $in_continued_quote );
9052
9053        # remember previous nonblank tokens seen
9054        if ( $type ne 'b' ) {
9055            $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
9056            $last_last_nonblank_type_to_go  = $last_nonblank_type_to_go;
9057            $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
9058            $last_nonblank_index_to_go      = $max_index_to_go;
9059            $last_nonblank_type_to_go       = $type;
9060            $last_nonblank_token_to_go      = $token;
9061            if ( $type eq ',' ) {
9062                $comma_count_in_batch++;
9063            }
9064        }
9065
9066        FORMATTER_DEBUG_FLAG_STORE && do {
9067            my ( $a, $b, $c ) = caller();
9068            print STDOUT
9069"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
9070        };
9071    }
9072
9073    sub insert_new_token_to_go {
9074
9075        # insert a new token into the output stream.  use same level as
9076        # previous token; assumes a character at max_index_to_go.
9077        save_current_token();
9078        ( $token, $type, $slevel, $no_internal_newlines ) = @_;
9079
9080        if ( $max_index_to_go == UNDEFINED_INDEX ) {
9081            warning("code bug: bad call to insert_new_token_to_go\n");
9082        }
9083        $level = $levels_to_go[$max_index_to_go];
9084
9085        # FIXME: it seems to be necessary to use the next, rather than
9086        # previous, value of this variable when creating a new blank (align.t)
9087        #my $slevel         = $nesting_depth_to_go[$max_index_to_go];
9088        $nesting_blocks        = $nesting_blocks_to_go[$max_index_to_go];
9089        $ci_level              = $ci_levels_to_go[$max_index_to_go];
9090        $container_environment = $container_environment_to_go[$max_index_to_go];
9091        $in_continued_quote    = 0;
9092        $block_type            = "";
9093        $type_sequence         = "";
9094        store_token_to_go();
9095        restore_current_token();
9096        return;
9097    }
9098
9099    sub print_line_of_tokens {
9100
9101        my $line_of_tokens = shift;
9102
9103        # This routine is called once per input line to process all of
9104        # the tokens on that line.  This is the first stage of
9105        # beautification.
9106        #
9107        # Full-line comments and blank lines may be processed immediately.
9108        #
9109        # For normal lines of code, the tokens are stored one-by-one,
9110        # via calls to 'sub store_token_to_go', until a known line break
9111        # point is reached.  Then, the batch of collected tokens is
9112        # passed along to 'sub output_line_to_go' for further
9113        # processing.  This routine decides if there should be
9114        # whitespace between each pair of non-white tokens, so later
9115        # routines only need to decide on any additional line breaks.
9116        # Any whitespace is initially a single space character.  Later,
9117        # the vertical aligner may expand that to be multiple space
9118        # characters if necessary for alignment.
9119
9120        # extract input line number for error messages
9121        $input_line_number = $line_of_tokens->{_line_number};
9122
9123        $rtoken_type            = $line_of_tokens->{_rtoken_type};
9124        $rtokens                = $line_of_tokens->{_rtokens};
9125        $rlevels                = $line_of_tokens->{_rlevels};
9126        $rslevels               = $line_of_tokens->{_rslevels};
9127        $rblock_type            = $line_of_tokens->{_rblock_type};
9128        $rcontainer_type        = $line_of_tokens->{_rcontainer_type};
9129        $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
9130        $rtype_sequence         = $line_of_tokens->{_rtype_sequence};
9131        $input_line             = $line_of_tokens->{_line_text};
9132        $rnesting_tokens        = $line_of_tokens->{_rnesting_tokens};
9133        $rci_levels             = $line_of_tokens->{_rci_levels};
9134        $rnesting_blocks        = $line_of_tokens->{_rnesting_blocks};
9135
9136        $in_continued_quote = $starting_in_quote =
9137          $line_of_tokens->{_starting_in_quote};
9138        $in_quote        = $line_of_tokens->{_ending_in_quote};
9139        $ending_in_quote = $in_quote;
9140        $guessed_indentation_level =
9141          $line_of_tokens->{_guessed_indentation_level};
9142
9143        my $j;
9144        my $j_next;
9145        my $jmax;
9146        my $next_nonblank_token;
9147        my $next_nonblank_token_type;
9148        my $rwhite_space_flag;
9149
9150        $jmax                    = @$rtokens - 1;
9151        $block_type              = "";
9152        $container_type          = "";
9153        $container_environment   = "";
9154        $type_sequence           = "";
9155        $no_internal_newlines    = 1 - $rOpts_add_newlines;
9156        $is_static_block_comment = 0;
9157
9158        # Handle a continued quote..
9159        if ($in_continued_quote) {
9160
9161            # A line which is entirely a quote or pattern must go out
9162            # verbatim.  Note: the \n is contained in $input_line.
9163            if ( $jmax <= 0 ) {
9164                if ( ( $input_line =~ "\t" ) ) {
9165                    note_embedded_tab();
9166                }
9167                write_unindented_line("$input_line");
9168                $last_line_had_side_comment = 0;
9169                return;
9170            }
9171        }
9172
9173        # Write line verbatim if we are in a formatting skip section
9174        if ($in_format_skipping_section) {
9175            write_unindented_line("$input_line");
9176            $last_line_had_side_comment = 0;
9177
9178            # Note: extra space appended to comment simplifies pattern matching
9179            if (   $jmax == 0
9180                && $$rtoken_type[0] eq '#'
9181                && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
9182            {
9183                $in_format_skipping_section = 0;
9184                write_logfile_entry("Exiting formatting skip section\n");
9185                $file_writer_object->reset_consecutive_blank_lines();
9186            }
9187            return;
9188        }
9189
9190        # See if we are entering a formatting skip section
9191        if (   $rOpts_format_skipping
9192            && $jmax == 0
9193            && $$rtoken_type[0] eq '#'
9194            && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
9195        {
9196            flush();
9197            $in_format_skipping_section = 1;
9198            write_logfile_entry("Entering formatting skip section\n");
9199            write_unindented_line("$input_line");
9200            $last_line_had_side_comment = 0;
9201            return;
9202        }
9203
9204        # delete trailing blank tokens
9205        if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
9206
9207        # Handle a blank line..
9208        if ( $jmax < 0 ) {
9209
9210            # If keep-old-blank-lines is zero, we delete all
9211            # old blank lines and let the blank line rules generate any
9212            # needed blanks.
9213            if ($rOpts_keep_old_blank_lines) {
9214                flush();
9215                $file_writer_object->write_blank_code_line(
9216                    $rOpts_keep_old_blank_lines == 2 );
9217                $last_line_leading_type = 'b';
9218            }
9219            $last_line_had_side_comment = 0;
9220            return;
9221        }
9222
9223        # see if this is a static block comment (starts with ## by default)
9224        my $is_static_block_comment_without_leading_space = 0;
9225        if (   $jmax == 0
9226            && $$rtoken_type[0] eq '#'
9227            && $rOpts->{'static-block-comments'}
9228            && $input_line =~ /$static_block_comment_pattern/o )
9229        {
9230            $is_static_block_comment = 1;
9231            $is_static_block_comment_without_leading_space =
9232              substr( $input_line, 0, 1 ) eq '#';
9233        }
9234
9235        # Check for comments which are line directives
9236        # Treat exactly as static block comments without leading space
9237        # reference: perlsyn, near end, section Plain Old Comments (Not!)
9238        # example: '# line 42 "new_filename.plx"'
9239        if (
9240               $jmax == 0
9241            && $$rtoken_type[0] eq '#'
9242            && $input_line =~ /^\#   \s*
9243                               line \s+ (\d+)   \s*
9244                               (?:\s("?)([^"]+)\2)? \s*
9245                               $/x
9246          )
9247        {
9248            $is_static_block_comment                       = 1;
9249            $is_static_block_comment_without_leading_space = 1;
9250        }
9251
9252        # create a hanging side comment if appropriate
9253        my $is_hanging_side_comment;
9254        if (
9255               $jmax == 0
9256            && $$rtoken_type[0] eq '#'      # only token is a comment
9257            && $last_line_had_side_comment  # last line had side comment
9258            && $input_line =~ /^\s/         # there is some leading space
9259            && !$is_static_block_comment    # do not make static comment hanging
9260            && $rOpts->{'hanging-side-comments'}    # user is allowing
9261                                                    # hanging side comments
9262                                                    # like this
9263          )
9264        {
9265
9266            # We will insert an empty qw string at the start of the token list
9267            # to force this comment to be a side comment. The vertical aligner
9268            # should then line it up with the previous side comment.
9269            $is_hanging_side_comment = 1;
9270            unshift @$rtoken_type,            'q';
9271            unshift @$rtokens,                '';
9272            unshift @$rlevels,                $$rlevels[0];
9273            unshift @$rslevels,               $$rslevels[0];
9274            unshift @$rblock_type,            '';
9275            unshift @$rcontainer_type,        '';
9276            unshift @$rcontainer_environment, '';
9277            unshift @$rtype_sequence,         '';
9278            unshift @$rnesting_tokens,        $$rnesting_tokens[0];
9279            unshift @$rci_levels,             $$rci_levels[0];
9280            unshift @$rnesting_blocks,        $$rnesting_blocks[0];
9281            $jmax = 1;
9282        }
9283
9284        # remember if this line has a side comment
9285        $last_line_had_side_comment =
9286          ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
9287
9288        # Handle a block (full-line) comment..
9289        if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
9290
9291            if ( $rOpts->{'delete-block-comments'} ) { return }
9292
9293            if ( $rOpts->{'tee-block-comments'} ) {
9294                $file_writer_object->tee_on();
9295            }
9296
9297            destroy_one_line_block();
9298            output_line_to_go();
9299
9300            # output a blank line before block comments
9301            if (
9302                # unless we follow a blank or comment line
9303                $last_line_leading_type !~ /^[#b]$/
9304
9305                # only if allowed
9306                && $rOpts->{'blanks-before-comments'}
9307
9308                # not if this is an empty comment line
9309                && $$rtokens[0] ne '#'
9310
9311                # not after a short line ending in an opening token
9312                # because we already have space above this comment.
9313                # Note that the first comment in this if block, after
9314                # the 'if (', does not get a blank line because of this.
9315                && !$last_output_short_opening_token
9316
9317                # never before static block comments
9318                && !$is_static_block_comment
9319              )
9320            {
9321                flush();    # switching to new output stream
9322                $file_writer_object->write_blank_code_line();
9323                $last_line_leading_type = 'b';
9324            }
9325
9326            # TRIM COMMENTS -- This could be turned off as a option
9327            $$rtokens[0] =~ s/\s*$//;    # trim right end
9328
9329            if (
9330                $rOpts->{'indent-block-comments'}
9331                && (  !$rOpts->{'indent-spaced-block-comments'}
9332                    || $input_line =~ /^\s+/ )
9333                && !$is_static_block_comment_without_leading_space
9334              )
9335            {
9336                extract_token(0);
9337                store_token_to_go();
9338                output_line_to_go();
9339            }
9340            else {
9341                flush();    # switching to new output stream
9342                $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
9343                $last_line_leading_type = '#';
9344            }
9345            if ( $rOpts->{'tee-block-comments'} ) {
9346                $file_writer_object->tee_off();
9347            }
9348            return;
9349        }
9350
9351        # compare input/output indentation except for continuation lines
9352        # (because they have an unknown amount of initial blank space)
9353        # and lines which are quotes (because they may have been outdented)
9354        # Note: this test is placed here because we know the continuation flag
9355        # at this point, which allows us to avoid non-meaningful checks.
9356        my $structural_indentation_level = $$rlevels[0];
9357        compare_indentation_levels( $guessed_indentation_level,
9358            $structural_indentation_level )
9359          unless ( $is_hanging_side_comment
9360            || $$rci_levels[0] > 0
9361            || $guessed_indentation_level == 0 && $$rtoken_type[0] eq 'Q' );
9362
9363        #   Patch needed for MakeMaker.  Do not break a statement
9364        #   in which $VERSION may be calculated.  See MakeMaker.pm;
9365        #   this is based on the coding in it.
9366        #   The first line of a file that matches this will be eval'd:
9367        #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
9368        #   Examples:
9369        #     *VERSION = \'1.01';
9370        #     ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
9371        #   We will pass such a line straight through without breaking
9372        #   it unless -npvl is used
9373
9374        my $is_VERSION_statement = 0;
9375
9376        if (
9377              !$saw_VERSION_in_this_file
9378            && $input_line =~ /VERSION/    # quick check to reject most lines
9379            && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
9380          )
9381        {
9382            $saw_VERSION_in_this_file = 1;
9383            $is_VERSION_statement     = 1;
9384            write_logfile_entry("passing VERSION line; -npvl deactivates\n");
9385            $no_internal_newlines = 1;
9386        }
9387
9388        # take care of indentation-only
9389        # NOTE: In previous versions we sent all qw lines out immediately here.
9390        # No longer doing this: also write a line which is entirely a 'qw' list
9391        # to allow stacking of opening and closing tokens.  Note that interior
9392        # qw lines will still go out at the end of this routine.
9393        if ( $rOpts->{'indent-only'} ) {
9394            flush();
9395            trim($input_line);
9396
9397            extract_token(0);
9398            $token                 = $input_line;
9399            $type                  = 'q';
9400            $block_type            = "";
9401            $container_type        = "";
9402            $container_environment = "";
9403            $type_sequence         = "";
9404            store_token_to_go();
9405            output_line_to_go();
9406            return;
9407        }
9408
9409        push( @$rtokens,     ' ', ' ' );   # making $j+2 valid simplifies coding
9410        push( @$rtoken_type, 'b', 'b' );
9411        ($rwhite_space_flag) =
9412          set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
9413
9414        # if the buffer hasn't been flushed, add a leading space if
9415        # necessary to keep essential whitespace. This is really only
9416        # necessary if we are squeezing out all ws.
9417        if ( $max_index_to_go >= 0 ) {
9418
9419            $old_line_count_in_batch++;
9420
9421            if (
9422                is_essential_whitespace(
9423                    $last_last_nonblank_token,
9424                    $last_last_nonblank_type,
9425                    $tokens_to_go[$max_index_to_go],
9426                    $types_to_go[$max_index_to_go],
9427                    $$rtokens[0],
9428                    $$rtoken_type[0]
9429                )
9430              )
9431            {
9432                my $slevel = $$rslevels[0];
9433                insert_new_token_to_go( ' ', 'b', $slevel,
9434                    $no_internal_newlines );
9435            }
9436        }
9437
9438        # If we just saw the end of an elsif block, write nag message
9439        # if we do not see another elseif or an else.
9440        if ($looking_for_else) {
9441
9442            unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
9443                write_logfile_entry("(No else block)\n");
9444            }
9445            $looking_for_else = 0;
9446        }
9447
9448        # This is a good place to kill incomplete one-line blocks
9449        if (   ( $semicolons_before_block_self_destruct == 0 )
9450            && ( $max_index_to_go >= 0 )
9451            && ( $types_to_go[$max_index_to_go] eq ';' )
9452            && ( $$rtokens[0] ne '}' ) )
9453        {
9454            destroy_one_line_block();
9455            output_line_to_go();
9456        }
9457
9458        # loop to process the tokens one-by-one
9459        $type  = 'b';
9460        $token = "";
9461
9462        foreach $j ( 0 .. $jmax ) {
9463
9464            # pull out the local values for this token
9465            extract_token($j);
9466
9467            if ( $type eq '#' ) {
9468
9469                # trim trailing whitespace
9470                # (there is no option at present to prevent this)
9471                $token =~ s/\s*$//;
9472
9473                if (
9474                    $rOpts->{'delete-side-comments'}
9475
9476                    # delete closing side comments if necessary
9477                    || (   $rOpts->{'delete-closing-side-comments'}
9478                        && $token =~ /$closing_side_comment_prefix_pattern/o
9479                        && $last_nonblank_block_type =~
9480                        /$closing_side_comment_list_pattern/o )
9481                  )
9482                {
9483                    if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9484                        unstore_token_to_go();
9485                    }
9486                    last;
9487                }
9488            }
9489
9490            # If we are continuing after seeing a right curly brace, flush
9491            # buffer unless we see what we are looking for, as in
9492            #   } else ...
9493            if ( $rbrace_follower && $type ne 'b' ) {
9494
9495                unless ( $rbrace_follower->{$token} ) {
9496                    output_line_to_go();
9497                }
9498                $rbrace_follower = undef;
9499            }
9500
9501            $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
9502            $next_nonblank_token      = $$rtokens[$j_next];
9503            $next_nonblank_token_type = $$rtoken_type[$j_next];
9504
9505            #--------------------------------------------------------
9506            # Start of section to patch token text
9507            #--------------------------------------------------------
9508
9509            # Modify certain tokens here for whitespace
9510            # The following is not yet done, but could be:
9511            #   sub (x x x)
9512            if ( $type =~ /^[wit]$/ ) {
9513
9514                # Examples:
9515                # change '$  var'  to '$var' etc
9516                #        '-> new'  to '->new'
9517                if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
9518                    $token =~ s/\s*//g;
9519                }
9520
9521                # Split identifiers with leading arrows, inserting blanks if
9522                # necessary.  It is easier and safer here than in the
9523                # tokenizer.  For example '->new' becomes two tokens, '->' and
9524                # 'new' with a possible blank between.
9525                #
9526                # Note: there is a related patch in sub set_white_space_flag
9527                if ( $token =~ /^\-\>(.*)$/ && $1 ) {
9528                    my $token_save = $1;
9529                    my $type_save  = $type;
9530
9531                    # store a blank to left of arrow if necessary
9532                    if (   $max_index_to_go >= 0
9533                        && $types_to_go[$max_index_to_go] ne 'b'
9534                        && $want_left_space{'->'} == WS_YES )
9535                    {
9536                        insert_new_token_to_go( ' ', 'b', $slevel,
9537                            $no_internal_newlines );
9538                    }
9539
9540                    # then store the arrow
9541                    $token = '->';
9542                    $type  = $token;
9543                    store_token_to_go();
9544
9545                    # then reset the current token to be the remainder,
9546                    # and reset the whitespace flag according to the arrow
9547                    $$rwhite_space_flag[$j] = $want_right_space{'->'};
9548                    $token                  = $token_save;
9549                    $type                   = $type_save;
9550                }
9551
9552                if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
9553
9554                # trim identifiers of trailing blanks which can occur
9555                # under some unusual circumstances, such as if the
9556                # identifier 'witch' has trailing blanks on input here:
9557                #
9558                # sub
9559                # witch
9560                # ()   # prototype may be on new line ...
9561                # ...
9562                if ( $type eq 'i' ) { $token =~ s/\s+$//g }
9563            }
9564
9565            # change 'LABEL   :'   to 'LABEL:'
9566            elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
9567
9568            # patch to add space to something like "x10"
9569            # This avoids having to split this token in the pre-tokenizer
9570            elsif ( $type eq 'n' ) {
9571                if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
9572            }
9573
9574            elsif ( $type eq 'Q' ) {
9575                note_embedded_tab() if ( $token =~ "\t" );
9576
9577                # make note of something like '$var = s/xxx/yyy/;'
9578                # in case it should have been '$var =~ s/xxx/yyy/;'
9579                if (
9580                       $token =~ /^(s|tr|y|m|\/)/
9581                    && $last_nonblank_token =~ /^(=|==|!=)$/
9582
9583                    # precededed by simple scalar
9584                    && $last_last_nonblank_type eq 'i'
9585                    && $last_last_nonblank_token =~ /^\$/
9586
9587                    # followed by some kind of termination
9588                    # (but give complaint if we can's see far enough ahead)
9589                    && $next_nonblank_token =~ /^[; \)\}]$/
9590
9591                    # scalar is not decleared
9592                    && !(
9593                           $types_to_go[0] eq 'k'
9594                        && $tokens_to_go[0] =~ /^(my|our|local)$/
9595                    )
9596                  )
9597                {
9598                    my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
9599                    complain(
9600"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
9601                    );
9602                }
9603            }
9604
9605           # trim blanks from right of qw quotes
9606           # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
9607            elsif ( $type eq 'q' ) {
9608                $token =~ s/\s*$//;
9609                note_embedded_tab() if ( $token =~ "\t" );
9610            }
9611
9612            #--------------------------------------------------------
9613            # End of section to patch token text
9614            #--------------------------------------------------------
9615
9616            # insert any needed whitespace
9617            if (   ( $type ne 'b' )
9618                && ( $max_index_to_go >= 0 )
9619                && ( $types_to_go[$max_index_to_go] ne 'b' )
9620                && $rOpts_add_whitespace )
9621            {
9622                my $ws = $$rwhite_space_flag[$j];
9623
9624                if ( $ws == 1 ) {
9625                    insert_new_token_to_go( ' ', 'b', $slevel,
9626                        $no_internal_newlines );
9627                }
9628            }
9629
9630            # Do not allow breaks which would promote a side comment to a
9631            # block comment.  In order to allow a break before an opening
9632            # or closing BLOCK, followed by a side comment, those sections
9633            # of code will handle this flag separately.
9634            my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
9635            my $is_opening_BLOCK =
9636              (      $type eq '{'
9637                  && $token eq '{'
9638                  && $block_type
9639                  && $block_type ne 't' );
9640            my $is_closing_BLOCK =
9641              (      $type eq '}'
9642                  && $token eq '}'
9643                  && $block_type
9644                  && $block_type ne 't' );
9645
9646            if (   $side_comment_follows
9647                && !$is_opening_BLOCK
9648                && !$is_closing_BLOCK )
9649            {
9650                $no_internal_newlines = 1;
9651            }
9652
9653            # We're only going to handle breaking for code BLOCKS at this
9654            # (top) level.  Other indentation breaks will be handled by
9655            # sub scan_list, which is better suited to dealing with them.
9656            if ($is_opening_BLOCK) {
9657
9658                # Tentatively output this token.  This is required before
9659                # calling starting_one_line_block.  We may have to unstore
9660                # it, though, if we have to break before it.
9661                store_token_to_go($side_comment_follows);
9662
9663                # Look ahead to see if we might form a one-line block
9664                my $too_long =
9665                  starting_one_line_block( $j, $jmax, $level, $slevel,
9666                    $ci_level, $rtokens, $rtoken_type, $rblock_type );
9667                clear_breakpoint_undo_stack();
9668
9669                # to simplify the logic below, set a flag to indicate if
9670                # this opening brace is far from the keyword which introduces it
9671                my $keyword_on_same_line = 1;
9672                if (   ( $max_index_to_go >= 0 )
9673                    && ( $last_nonblank_type eq ')' ) )
9674                {
9675                    if (   $block_type =~ /^(if|else|elsif)$/
9676                        && ( $tokens_to_go[0] eq '}' )
9677                        && $rOpts_cuddled_else )
9678                    {
9679                        $keyword_on_same_line = 1;
9680                    }
9681                    elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
9682                    {
9683                        $keyword_on_same_line = 0;
9684                    }
9685                }
9686
9687                # decide if user requested break before '{'
9688                my $want_break =
9689
9690                  # use -bl flag if not a sub block of any type
9691                  $block_type !~ /^sub/
9692                  ? $rOpts->{'opening-brace-on-new-line'}
9693
9694                  # use -sbl flag for a named sub block
9695                  : $block_type !~ /^sub\W*$/
9696                  ? $rOpts->{'opening-sub-brace-on-new-line'}
9697
9698                  # use -asbl flag for an anonymous sub block
9699                  : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
9700
9701                # Break before an opening '{' ...
9702                if (
9703
9704                    # if requested
9705                    $want_break
9706
9707                    # and we were unable to start looking for a block,
9708                    && $index_start_one_line_block == UNDEFINED_INDEX
9709
9710                    # or if it will not be on same line as its keyword, so that
9711                    # it will be outdented (eval.t, overload.t), and the user
9712                    # has not insisted on keeping it on the right
9713                    || (   !$keyword_on_same_line
9714                        && !$rOpts->{'opening-brace-always-on-right'} )
9715
9716                  )
9717                {
9718
9719                    # but only if allowed
9720                    unless ($no_internal_newlines) {
9721
9722                        # since we already stored this token, we must unstore it
9723                        unstore_token_to_go();
9724
9725                        # then output the line
9726                        output_line_to_go();
9727
9728                        # and now store this token at the start of a new line
9729                        store_token_to_go($side_comment_follows);
9730                    }
9731                }
9732
9733                # Now update for side comment
9734                if ($side_comment_follows) { $no_internal_newlines = 1 }
9735
9736                # now output this line
9737                unless ($no_internal_newlines) {
9738                    output_line_to_go();
9739                }
9740            }
9741
9742            elsif ($is_closing_BLOCK) {
9743
9744                # If there is a pending one-line block ..
9745                if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9746
9747                    # we have to terminate it if..
9748                    if (
9749
9750                    # it is too long (final length may be different from
9751                    # initial estimate). note: must allow 1 space for this token
9752                        excess_line_length( $index_start_one_line_block,
9753                            $max_index_to_go ) >= 0
9754
9755                        # or if it has too many semicolons
9756                        || (   $semicolons_before_block_self_destruct == 0
9757                            && $last_nonblank_type ne ';' )
9758                      )
9759                    {
9760                        destroy_one_line_block();
9761                    }
9762                }
9763
9764                # put a break before this closing curly brace if appropriate
9765                unless ( $no_internal_newlines
9766                    || $index_start_one_line_block != UNDEFINED_INDEX )
9767                {
9768
9769                    # add missing semicolon if ...
9770                    # there are some tokens
9771                    if (
9772                        ( $max_index_to_go > 0 )
9773
9774                        # and we don't have one
9775                        && ( $last_nonblank_type ne ';' )
9776
9777                        # patch until some block type issues are fixed:
9778                        # Do not add semi-colon for block types '{',
9779                        # '}', and ';' because we cannot be sure yet
9780                        # that this is a block and not an anonomyous
9781                        # hash (blktype.t, blktype1.t)
9782                        && ( $block_type !~ /^[\{\};]$/ )
9783
9784                        # patch: and do not add semi-colons for recently
9785                        # added block types (see tmp/semicolon.t)
9786                        && ( $block_type !~
9787                            /^(switch|case|given|when|default)$/ )
9788
9789                        # it seems best not to add semicolons in these
9790                        # special block types: sort|map|grep
9791                        && ( !$is_sort_map_grep{$block_type} )
9792
9793                        # and we are allowed to do so.
9794                        && $rOpts->{'add-semicolons'}
9795                      )
9796                    {
9797
9798                        save_current_token();
9799                        $token  = ';';
9800                        $type   = ';';
9801                        $level  = $levels_to_go[$max_index_to_go];
9802                        $slevel = $nesting_depth_to_go[$max_index_to_go];
9803                        $nesting_blocks =
9804                          $nesting_blocks_to_go[$max_index_to_go];
9805                        $ci_level       = $ci_levels_to_go[$max_index_to_go];
9806                        $block_type     = "";
9807                        $container_type = "";
9808                        $container_environment = "";
9809                        $type_sequence         = "";
9810
9811                        # Note - we remove any blank AFTER extracting its
9812                        # parameters such as level, etc, above
9813                        if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9814                            unstore_token_to_go();
9815                        }
9816                        store_token_to_go();
9817
9818                        note_added_semicolon();
9819                        restore_current_token();
9820                    }
9821
9822                    # then write out everything before this closing curly brace
9823                    output_line_to_go();
9824
9825                }
9826
9827                # Now update for side comment
9828                if ($side_comment_follows) { $no_internal_newlines = 1 }
9829
9830                # store the closing curly brace
9831                store_token_to_go();
9832
9833                # ok, we just stored a closing curly brace.  Often, but
9834                # not always, we want to end the line immediately.
9835                # So now we have to check for special cases.
9836
9837                # if this '}' successfully ends a one-line block..
9838                my $is_one_line_block = 0;
9839                my $keep_going        = 0;
9840                if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9841
9842                    # Remember the type of token just before the
9843                    # opening brace.  It would be more general to use
9844                    # a stack, but this will work for one-line blocks.
9845                    $is_one_line_block =
9846                      $types_to_go[$index_start_one_line_block];
9847
9848                    # we have to actually make it by removing tentative
9849                    # breaks that were set within it
9850                    undo_forced_breakpoint_stack(0);
9851                    set_nobreaks( $index_start_one_line_block,
9852                        $max_index_to_go - 1 );
9853
9854                    # then re-initialize for the next one-line block
9855                    destroy_one_line_block();
9856
9857                    # then decide if we want to break after the '}' ..
9858                    # We will keep going to allow certain brace followers as in:
9859                    #   do { $ifclosed = 1; last } unless $losing;
9860                    #
9861                    # But make a line break if the curly ends a
9862                    # significant block:
9863                    if (
9864                        $is_block_without_semicolon{$block_type}
9865
9866                        # if needless semicolon follows we handle it later
9867                        && $next_nonblank_token ne ';'
9868                      )
9869                    {
9870                        output_line_to_go() unless ($no_internal_newlines);
9871                    }
9872                }
9873
9874                # set string indicating what we need to look for brace follower
9875                # tokens
9876                if ( $block_type eq 'do' ) {
9877                    $rbrace_follower = \%is_do_follower;
9878                }
9879                elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
9880                    $rbrace_follower = \%is_if_brace_follower;
9881                }
9882                elsif ( $block_type eq 'else' ) {
9883                    $rbrace_follower = \%is_else_brace_follower;
9884                }
9885
9886                # added eval for borris.t
9887                elsif ($is_sort_map_grep_eval{$block_type}
9888                    || $is_one_line_block eq 'G' )
9889                {
9890                    $rbrace_follower = undef;
9891                    $keep_going      = 1;
9892                }
9893
9894                # anonymous sub
9895                elsif ( $block_type =~ /^sub\W*$/ ) {
9896
9897                    if ($is_one_line_block) {
9898                        $rbrace_follower = \%is_anon_sub_1_brace_follower;
9899                    }
9900                    else {
9901                        $rbrace_follower = \%is_anon_sub_brace_follower;
9902                    }
9903                }
9904
9905                # None of the above: specify what can follow a closing
9906                # brace of a block which is not an
9907                # if/elsif/else/do/sort/map/grep/eval
9908                # Testfiles:
9909                # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
9910                else {
9911                    $rbrace_follower = \%is_other_brace_follower;
9912                }
9913
9914                # See if an elsif block is followed by another elsif or else;
9915                # complain if not.
9916                if ( $block_type eq 'elsif' ) {
9917
9918                    if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
9919                        $looking_for_else = 1;    # ok, check on next line
9920                    }
9921                    else {
9922
9923                        unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
9924                            write_logfile_entry("No else block :(\n");
9925                        }
9926                    }
9927                }
9928
9929                # keep going after certain block types (map,sort,grep,eval)
9930                # added eval for borris.t
9931                if ($keep_going) {
9932
9933                    # keep going
9934                }
9935
9936                # if no more tokens, postpone decision until re-entring
9937                elsif ( ( $next_nonblank_token_type eq 'b' )
9938                    && $rOpts_add_newlines )
9939                {
9940                    unless ($rbrace_follower) {
9941                        output_line_to_go() unless ($no_internal_newlines);
9942                    }
9943                }
9944
9945                elsif ($rbrace_follower) {
9946
9947                    unless ( $rbrace_follower->{$next_nonblank_token} ) {
9948                        output_line_to_go() unless ($no_internal_newlines);
9949                    }
9950                    $rbrace_follower = undef;
9951                }
9952
9953                else {
9954                    output_line_to_go() unless ($no_internal_newlines);
9955                }
9956
9957            }    # end treatment of closing block token
9958
9959            # handle semicolon
9960            elsif ( $type eq ';' ) {
9961
9962                # kill one-line blocks with too many semicolons
9963                $semicolons_before_block_self_destruct--;
9964                if (
9965                    ( $semicolons_before_block_self_destruct < 0 )
9966                    || (   $semicolons_before_block_self_destruct == 0
9967                        && $next_nonblank_token_type !~ /^[b\}]$/ )
9968                  )
9969                {
9970                    destroy_one_line_block();
9971                }
9972
9973                # Remove unnecessary semicolons, but not after bare
9974                # blocks, where it could be unsafe if the brace is
9975                # mistokenized.
9976                if (
9977                    (
9978                        $last_nonblank_token eq '}'
9979                        && (
9980                            $is_block_without_semicolon{
9981                                $last_nonblank_block_type}
9982                            || $last_nonblank_block_type =~ /^sub\s+\w/
9983                            || $last_nonblank_block_type =~ /^\w+:$/ )
9984                    )
9985                    || $last_nonblank_type eq ';'
9986                  )
9987                {
9988
9989                    if (
9990                        $rOpts->{'delete-semicolons'}
9991
9992                        # don't delete ; before a # because it would promote it
9993                        # to a block comment
9994                        && ( $next_nonblank_token_type ne '#' )
9995                      )
9996                    {
9997                        note_deleted_semicolon();
9998                        output_line_to_go()
9999                          unless ( $no_internal_newlines
10000                            || $index_start_one_line_block != UNDEFINED_INDEX );
10001                        next;
10002                    }
10003                    else {
10004                        write_logfile_entry("Extra ';'\n");
10005                    }
10006                }
10007                store_token_to_go();
10008
10009                output_line_to_go()
10010                  unless ( $no_internal_newlines
10011                    || ( $rOpts_keep_interior_semicolons && $j < $jmax )
10012                    || ( $next_nonblank_token eq '}' ) );
10013
10014            }
10015
10016            # handle here_doc target string
10017            elsif ( $type eq 'h' ) {
10018                $no_internal_newlines =
10019                  1;    # no newlines after seeing here-target
10020                destroy_one_line_block();
10021                store_token_to_go();
10022            }
10023
10024            # handle all other token types
10025            else {
10026
10027                # if this is a blank...
10028                if ( $type eq 'b' ) {
10029
10030                    # make it just one character
10031                    $token = ' ' if $rOpts_add_whitespace;
10032
10033                    # delete it if unwanted by whitespace rules
10034                    # or we are deleting all whitespace
10035                    my $ws = $$rwhite_space_flag[ $j + 1 ];
10036                    if ( ( defined($ws) && $ws == -1 )
10037                        || $rOpts_delete_old_whitespace )
10038                    {
10039
10040                        # unless it might make a syntax error
10041                        next
10042                          unless is_essential_whitespace(
10043                            $last_last_nonblank_token,
10044                            $last_last_nonblank_type,
10045                            $tokens_to_go[$max_index_to_go],
10046                            $types_to_go[$max_index_to_go],
10047                            $$rtokens[ $j + 1 ],
10048                            $$rtoken_type[ $j + 1 ]
10049                          );
10050                    }
10051                }
10052                store_token_to_go();
10053            }
10054
10055            # remember two previous nonblank OUTPUT tokens
10056            if ( $type ne '#' && $type ne 'b' ) {
10057                $last_last_nonblank_token = $last_nonblank_token;
10058                $last_last_nonblank_type  = $last_nonblank_type;
10059                $last_nonblank_token      = $token;
10060                $last_nonblank_type       = $type;
10061                $last_nonblank_block_type = $block_type;
10062            }
10063
10064            # unset the continued-quote flag since it only applies to the
10065            # first token, and we want to resume normal formatting if
10066            # there are additional tokens on the line
10067            $in_continued_quote = 0;
10068
10069        }    # end of loop over all tokens in this 'line_of_tokens'
10070
10071        # we have to flush ..
10072        if (
10073
10074            # if there is a side comment
10075            ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
10076
10077            # if this line ends in a quote
10078            # NOTE: This is critically important for insuring that quoted lines
10079            # do not get processed by things like -sot and -sct
10080            || $in_quote
10081
10082            # if this is a VERSION statement
10083            || $is_VERSION_statement
10084
10085            # to keep a label at the end of a line
10086            || $type eq 'J'
10087
10088            # if we are instructed to keep all old line breaks
10089            || !$rOpts->{'delete-old-newlines'}
10090          )
10091        {
10092            destroy_one_line_block();
10093            output_line_to_go();
10094        }
10095
10096        # mark old line breakpoints in current output stream
10097        if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
10098            $old_breakpoint_to_go[$max_index_to_go] = 1;
10099        }
10100    } ## end sub print_line_of_tokens
10101} ## end block print_line_of_tokens
10102
10103# sub output_line_to_go sends one logical line of tokens on down the
10104# pipeline to the VerticalAligner package, breaking the line into continuation
10105# lines as necessary.  The line of tokens is ready to go in the "to_go"
10106# arrays.
10107sub output_line_to_go {
10108
10109    # debug stuff; this routine can be called from many points
10110    FORMATTER_DEBUG_FLAG_OUTPUT && do {
10111        my ( $a, $b, $c ) = caller;
10112        write_diagnostics(
10113"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
10114        );
10115        my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
10116        write_diagnostics("$output_str\n");
10117    };
10118
10119    # just set a tentative breakpoint if we might be in a one-line block
10120    if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
10121        set_forced_breakpoint($max_index_to_go);
10122        return;
10123    }
10124
10125    my $cscw_block_comment;
10126    $cscw_block_comment = add_closing_side_comment()
10127      if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
10128
10129    my $comma_arrow_count_contained = match_opening_and_closing_tokens();
10130
10131    # tell the -lp option we are outputting a batch so it can close
10132    # any unfinished items in its stack
10133    finish_lp_batch();
10134
10135    # If this line ends in a code block brace, set breaks at any
10136    # previous closing code block braces to breakup a chain of code
10137    # blocks on one line.  This is very rare but can happen for
10138    # user-defined subs.  For example we might be looking at this:
10139    #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
10140    my $saw_good_break = 0;    # flag to force breaks even if short line
10141    if (
10142
10143        # looking for opening or closing block brace
10144        $block_type_to_go[$max_index_to_go]
10145
10146        # but not one of these which are never duplicated on a line:
10147        # until|while|for|if|elsif|else
10148        && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
10149      )
10150    {
10151        my $lev = $nesting_depth_to_go[$max_index_to_go];
10152
10153        # Walk backwards from the end and
10154        # set break at any closing block braces at the same level.
10155        # But quit if we are not in a chain of blocks.
10156        for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
10157            last if ( $levels_to_go[$i] < $lev );    # stop at a lower level
10158            next if ( $levels_to_go[$i] > $lev );    # skip past higher level
10159
10160            if ( $block_type_to_go[$i] ) {
10161                if ( $tokens_to_go[$i] eq '}' ) {
10162                    set_forced_breakpoint($i);
10163                    $saw_good_break = 1;
10164                }
10165            }
10166
10167            # quit if we see anything besides words, function, blanks
10168            # at this level
10169            elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
10170        }
10171    }
10172
10173    my $imin = 0;
10174    my $imax = $max_index_to_go;
10175
10176    # trim any blank tokens
10177    if ( $max_index_to_go >= 0 ) {
10178        if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
10179        if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
10180    }
10181
10182    # anything left to write?
10183    if ( $imin <= $imax ) {
10184
10185        # add a blank line before certain key types but not after a comment
10186        if ( $last_line_leading_type !~ /^[#]/ ) {
10187            my $want_blank    = 0;
10188            my $leading_token = $tokens_to_go[$imin];
10189            my $leading_type  = $types_to_go[$imin];
10190
10191            # blank lines before subs except declarations and one-liners
10192            # MCONVERSION LOCATION - for sub tokenization change
10193            if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
10194                $want_blank = $rOpts->{'blank-lines-before-subs'}
10195                  if (
10196                    terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10197                        $imax ) !~ /^[\;\}]$/
10198                  );
10199            }
10200
10201            # break before all package declarations
10202            # MCONVERSION LOCATION - for tokenizaton change
10203            elsif ($leading_token =~ /^(package\s)/
10204                && $leading_type eq 'i' )
10205            {
10206                $want_blank = $rOpts->{'blank-lines-before-packages'};
10207            }
10208
10209            # break before certain key blocks except one-liners
10210            if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
10211                $want_blank = $rOpts->{'blank-lines-before-subs'}
10212                  if (
10213                    terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10214                        $imax ) ne '}'
10215                  );
10216            }
10217
10218            # Break before certain block types if we haven't had a
10219            # break at this level for a while.  This is the
10220            # difficult decision..
10221            elsif ($leading_type eq 'k'
10222                && $last_line_leading_type ne 'b'
10223                && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
10224            {
10225                my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
10226                if ( !defined($lc) ) { $lc = 0 }
10227
10228                $want_blank =
10229                     $rOpts->{'blanks-before-blocks'}
10230                  && $lc >= $rOpts->{'long-block-line-count'}
10231                  && $file_writer_object->get_consecutive_nonblank_lines() >=
10232                  $rOpts->{'long-block-line-count'}
10233                  && (
10234                    terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10235                        $imax ) ne '}'
10236                  );
10237            }
10238
10239            if ($want_blank) {
10240
10241                # future: send blank line down normal path to VerticalAligner
10242                Perl::Tidy::VerticalAligner::flush();
10243                $file_writer_object->require_blank_code_lines($want_blank);
10244            }
10245        }
10246
10247        # update blank line variables and count number of consecutive
10248        # non-blank, non-comment lines at this level
10249        $last_last_line_leading_level = $last_line_leading_level;
10250        $last_line_leading_level      = $levels_to_go[$imin];
10251        if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
10252        $last_line_leading_type = $types_to_go[$imin];
10253        if (   $last_line_leading_level == $last_last_line_leading_level
10254            && $last_line_leading_type ne 'b'
10255            && $last_line_leading_type ne '#'
10256            && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
10257        {
10258            $nonblank_lines_at_depth[$last_line_leading_level]++;
10259        }
10260        else {
10261            $nonblank_lines_at_depth[$last_line_leading_level] = 1;
10262        }
10263
10264        FORMATTER_DEBUG_FLAG_FLUSH && do {
10265            my ( $package, $file, $line ) = caller;
10266            print STDOUT
10267"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
10268        };
10269
10270        # add a couple of extra terminal blank tokens
10271        pad_array_to_go();
10272
10273        # set all forced breakpoints for good list formatting
10274        my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
10275
10276        if (
10277               $is_long_line
10278            || $old_line_count_in_batch > 1
10279
10280            # must always call scan_list() with unbalanced batches because it
10281            # is maintaining some stacks
10282            || is_unbalanced_batch()
10283
10284            # call scan_list if we might want to break at commas
10285            || (
10286                $comma_count_in_batch
10287                && (   $rOpts_maximum_fields_per_table > 0
10288                    || $rOpts_comma_arrow_breakpoints == 0 )
10289            )
10290
10291            # call scan_list if user may want to break open some one-line
10292            # hash references
10293            || (   $comma_arrow_count_contained
10294                && $rOpts_comma_arrow_breakpoints != 3 )
10295          )
10296        {
10297            ## This caused problems in one version of perl for unknown reasons:
10298            ## $saw_good_break ||= scan_list();
10299            my $sgb = scan_list();
10300            $saw_good_break ||= $sgb;
10301        }
10302
10303        # let $ri_first and $ri_last be references to lists of
10304        # first and last tokens of line fragments to output..
10305        my ( $ri_first, $ri_last );
10306
10307        # write a single line if..
10308        if (
10309
10310            # we aren't allowed to add any newlines
10311            !$rOpts_add_newlines
10312
10313            # or, we don't already have an interior breakpoint
10314            # and we didn't see a good breakpoint
10315            || (
10316                   !$forced_breakpoint_count
10317                && !$saw_good_break
10318
10319                # and this line is 'short'
10320                && !$is_long_line
10321            )
10322          )
10323        {
10324            @$ri_first = ($imin);
10325            @$ri_last  = ($imax);
10326        }
10327
10328        # otherwise use multiple lines
10329        else {
10330
10331            ( $ri_first, $ri_last, my $colon_count ) =
10332              set_continuation_breaks($saw_good_break);
10333
10334            break_all_chain_tokens( $ri_first, $ri_last );
10335
10336            break_equals( $ri_first, $ri_last );
10337
10338            # now we do a correction step to clean this up a bit
10339            # (The only time we would not do this is for debugging)
10340            if ( $rOpts->{'recombine'} ) {
10341                ( $ri_first, $ri_last ) =
10342                  recombine_breakpoints( $ri_first, $ri_last );
10343            }
10344
10345            insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
10346        }
10347
10348        # do corrector step if -lp option is used
10349        my $do_not_pad = 0;
10350        if ($rOpts_line_up_parentheses) {
10351            $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
10352        }
10353        send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
10354    }
10355    prepare_for_new_input_lines();
10356
10357    # output any new -cscw block comment
10358    if ($cscw_block_comment) {
10359        flush();
10360        $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
10361    }
10362}
10363
10364sub note_added_semicolon {
10365    $last_added_semicolon_at = $input_line_number;
10366    if ( $added_semicolon_count == 0 ) {
10367        $first_added_semicolon_at = $last_added_semicolon_at;
10368    }
10369    $added_semicolon_count++;
10370    write_logfile_entry("Added ';' here\n");
10371}
10372
10373sub note_deleted_semicolon {
10374    $last_deleted_semicolon_at = $input_line_number;
10375    if ( $deleted_semicolon_count == 0 ) {
10376        $first_deleted_semicolon_at = $last_deleted_semicolon_at;
10377    }
10378    $deleted_semicolon_count++;
10379    write_logfile_entry("Deleted unnecessary ';'\n");    # i hope ;)
10380}
10381
10382sub note_embedded_tab {
10383    $embedded_tab_count++;
10384    $last_embedded_tab_at = $input_line_number;
10385    if ( !$first_embedded_tab_at ) {
10386        $first_embedded_tab_at = $last_embedded_tab_at;
10387    }
10388
10389    if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
10390        write_logfile_entry("Embedded tabs in quote or pattern\n");
10391    }
10392}
10393
10394sub starting_one_line_block {
10395
10396    # after seeing an opening curly brace, look for the closing brace
10397    # and see if the entire block will fit on a line.  This routine is
10398    # not always right because it uses the old whitespace, so a check
10399    # is made later (at the closing brace) to make sure we really
10400    # have a one-line block.  We have to do this preliminary check,
10401    # though, because otherwise we would always break at a semicolon
10402    # within a one-line block if the block contains multiple statements.
10403
10404    my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
10405        $rblock_type )
10406      = @_;
10407
10408    # kill any current block - we can only go 1 deep
10409    destroy_one_line_block();
10410
10411    # return value:
10412    #  1=distance from start of block to opening brace exceeds line length
10413    #  0=otherwise
10414
10415    my $i_start = 0;
10416
10417    # shouldn't happen: there must have been a prior call to
10418    # store_token_to_go to put the opening brace in the output stream
10419    if ( $max_index_to_go < 0 ) {
10420        warning("program bug: store_token_to_go called incorrectly\n");
10421        report_definite_bug();
10422    }
10423    else {
10424
10425        # cannot use one-line blocks with cuddled else else/elsif lines
10426        if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
10427            return 0;
10428        }
10429    }
10430
10431    my $block_type = $$rblock_type[$j];
10432
10433    # find the starting keyword for this block (such as 'if', 'else', ...)
10434
10435    if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
10436        $i_start = $max_index_to_go;
10437    }
10438
10439    elsif ( $last_last_nonblank_token_to_go eq ')' ) {
10440
10441        # For something like "if (xxx) {", the keyword "if" will be
10442        # just after the most recent break. This will be 0 unless
10443        # we have just killed a one-line block and are starting another.
10444        # (doif.t)
10445        # Note: cannot use inext_index_to_go[] here because that array
10446        # is still being constructed.
10447        $i_start = $index_max_forced_break + 1;
10448        if ( $types_to_go[$i_start] eq 'b' ) {
10449            $i_start++;
10450        }
10451
10452        unless ( $tokens_to_go[$i_start] eq $block_type ) {
10453            return 0;
10454        }
10455    }
10456
10457    # the previous nonblank token should start these block types
10458    elsif (( $last_last_nonblank_token_to_go eq $block_type )
10459        || ( $block_type =~ /^sub/ ) )
10460    {
10461        $i_start = $last_last_nonblank_index_to_go;
10462    }
10463
10464    # patch for SWITCH/CASE to retain one-line case/when blocks
10465    elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
10466
10467        # Note: cannot use inext_index_to_go[] here because that array
10468        # is still being constructed.
10469        $i_start = $index_max_forced_break + 1;
10470        if ( $types_to_go[$i_start] eq 'b' ) {
10471            $i_start++;
10472        }
10473        unless ( $tokens_to_go[$i_start] eq $block_type ) {
10474            return 0;
10475        }
10476    }
10477
10478    else {
10479        return 1;
10480    }
10481
10482    my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
10483
10484    my $i;
10485
10486    # see if length is too long to even start
10487    if ( $pos > maximum_line_length($i_start) ) {
10488        return 1;
10489    }
10490
10491    for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
10492
10493        # old whitespace could be arbitrarily large, so don't use it
10494        if   ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
10495        else                              { $pos += rtoken_length($i) }
10496
10497        # Return false result if we exceed the maximum line length,
10498        if ( $pos > maximum_line_length($i_start) ) {
10499            return 0;
10500        }
10501
10502        # or encounter another opening brace before finding the closing brace.
10503        elsif ($$rtokens[$i] eq '{'
10504            && $$rtoken_type[$i] eq '{'
10505            && $$rblock_type[$i] )
10506        {
10507            return 0;
10508        }
10509
10510        # if we find our closing brace..
10511        elsif ($$rtokens[$i] eq '}'
10512            && $$rtoken_type[$i] eq '}'
10513            && $$rblock_type[$i] )
10514        {
10515
10516            # be sure any trailing comment also fits on the line
10517            my $i_nonblank =
10518              ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
10519
10520            # Patch for one-line sort/map/grep/eval blocks with side comments:
10521            # We will ignore the side comment length for sort/map/grep/eval
10522            # because this can lead to statements which change every time
10523            # perltidy is run.  Here is an example from Denis Moskowitz which
10524            # oscillates between these two states without this patch:
10525
10526## --------
10527## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10528##  @baz;
10529##
10530## grep {
10531##     $_->foo ne 'bar'
10532##   }    # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
10533##   @baz;
10534## --------
10535
10536            # When the first line is input it gets broken apart by the main
10537            # line break logic in sub print_line_of_tokens.
10538            # When the second line is input it gets recombined by
10539            # print_line_of_tokens and passed to the output routines.  The
10540            # output routines (set_continuation_breaks) do not break it apart
10541            # because the bond strengths are set to the highest possible value
10542            # for grep/map/eval/sort blocks, so the first version gets output.
10543            # It would be possible to fix this by changing bond strengths,
10544            # but they are high to prevent errors in older versions of perl.
10545
10546            if ( $$rtoken_type[$i_nonblank] eq '#'
10547                && !$is_sort_map_grep{$block_type} )
10548            {
10549
10550                $pos += rtoken_length($i_nonblank);
10551
10552                if ( $i_nonblank > $i + 1 ) {
10553
10554                    # source whitespace could be anything, assume
10555                    # at least one space before the hash on output
10556                    if ( $$rtoken_type[ $i + 1 ] eq 'b' ) { $pos += 1 }
10557                    else { $pos += rtoken_length( $i + 1 ) }
10558                }
10559
10560                if ( $pos >= maximum_line_length($i_start) ) {
10561                    return 0;
10562                }
10563            }
10564
10565            # ok, it's a one-line block
10566            create_one_line_block( $i_start, 20 );
10567            return 0;
10568        }
10569
10570        # just keep going for other characters
10571        else {
10572        }
10573    }
10574
10575    # Allow certain types of new one-line blocks to form by joining
10576    # input lines.  These can be safely done, but for other block types,
10577    # we keep old one-line blocks but do not form new ones. It is not
10578    # always a good idea to make as many one-line blocks as possible,
10579    # so other types are not done.  The user can always use -mangle.
10580    if ( $is_sort_map_grep_eval{$block_type} ) {
10581        create_one_line_block( $i_start, 1 );
10582    }
10583
10584    return 0;
10585}
10586
10587sub unstore_token_to_go {
10588
10589    # remove most recent token from output stream
10590    if ( $max_index_to_go > 0 ) {
10591        $max_index_to_go--;
10592    }
10593    else {
10594        $max_index_to_go = UNDEFINED_INDEX;
10595    }
10596
10597}
10598
10599sub want_blank_line {
10600    flush();
10601    $file_writer_object->want_blank_line();
10602}
10603
10604sub write_unindented_line {
10605    flush();
10606    $file_writer_object->write_line( $_[0] );
10607}
10608
10609sub undo_ci {
10610
10611    # Undo continuation indentation in certain sequences
10612    # For example, we can undo continuation indation in sort/map/grep chains
10613    #    my $dat1 = pack( "n*",
10614    #        map { $_, $lookup->{$_} }
10615    #          sort { $a <=> $b }
10616    #          grep { $lookup->{$_} ne $default } keys %$lookup );
10617    # To align the map/sort/grep keywords like this:
10618    #    my $dat1 = pack( "n*",
10619    #        map { $_, $lookup->{$_} }
10620    #        sort { $a <=> $b }
10621    #        grep { $lookup->{$_} ne $default } keys %$lookup );
10622    my ( $ri_first, $ri_last ) = @_;
10623    my ( $line_1, $line_2, $lev_last );
10624    my $this_line_is_semicolon_terminated;
10625    my $max_line = @$ri_first - 1;
10626
10627    # looking at each line of this batch..
10628    # We are looking at leading tokens and looking for a sequence
10629    # all at the same level and higher level than enclosing lines.
10630    foreach my $line ( 0 .. $max_line ) {
10631
10632        my $ibeg = $$ri_first[$line];
10633        my $lev  = $levels_to_go[$ibeg];
10634        if ( $line > 0 ) {
10635
10636            # if we have started a chain..
10637            if ($line_1) {
10638
10639                # see if it continues..
10640                if ( $lev == $lev_last ) {
10641                    if (   $types_to_go[$ibeg] eq 'k'
10642                        && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
10643                    {
10644
10645                        # chain continues...
10646                        # check for chain ending at end of a a statement
10647                        if ( $line == $max_line ) {
10648
10649                            # see of this line ends a statement
10650                            my $iend = $$ri_last[$line];
10651                            $this_line_is_semicolon_terminated =
10652                              $types_to_go[$iend] eq ';'
10653
10654                              # with possible side comment
10655                              || ( $types_to_go[$iend] eq '#'
10656                                && $iend - $ibeg >= 2
10657                                && $types_to_go[ $iend - 2 ] eq ';'
10658                                && $types_to_go[ $iend - 1 ] eq 'b' );
10659                        }
10660                        $line_2 = $line if ($this_line_is_semicolon_terminated);
10661                    }
10662                    else {
10663
10664                        # kill chain
10665                        $line_1 = undef;
10666                    }
10667                }
10668                elsif ( $lev < $lev_last ) {
10669
10670                    # chain ends with previous line
10671                    $line_2 = $line - 1;
10672                }
10673                elsif ( $lev > $lev_last ) {
10674
10675                    # kill chain
10676                    $line_1 = undef;
10677                }
10678
10679                # undo the continuation indentation if a chain ends
10680                if ( defined($line_2) && defined($line_1) ) {
10681                    my $continuation_line_count = $line_2 - $line_1 + 1;
10682                    @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10683                      (0) x ($continuation_line_count);
10684                    @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] =
10685                      @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ];
10686                    $line_1 = undef;
10687                }
10688            }
10689
10690            # not in a chain yet..
10691            else {
10692
10693                # look for start of a new sort/map/grep chain
10694                if ( $lev > $lev_last ) {
10695                    if (   $types_to_go[$ibeg] eq 'k'
10696                        && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
10697                    {
10698                        $line_1 = $line;
10699                    }
10700                }
10701            }
10702        }
10703        $lev_last = $lev;
10704    }
10705}
10706
10707sub undo_lp_ci {
10708
10709    # If there is a single, long parameter within parens, like this:
10710    #
10711    #  $self->command( "/msg "
10712    #        . $infoline->chan
10713    #        . " You said $1, but did you know that it's square was "
10714    #        . $1 * $1 . " ?" );
10715    #
10716    # we can remove the continuation indentation of the 2nd and higher lines
10717    # to achieve this effect, which is more pleasing:
10718    #
10719    #  $self->command("/msg "
10720    #                 . $infoline->chan
10721    #                 . " You said $1, but did you know that it's square was "
10722    #                 . $1 * $1 . " ?");
10723
10724    my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
10725    my $max_line = @$ri_first - 1;
10726
10727    # must be multiple lines
10728    return unless $max_line > $line_open;
10729
10730    my $lev_start     = $levels_to_go[$i_start];
10731    my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
10732
10733    # see if all additional lines in this container have continuation
10734    # indentation
10735    my $n;
10736    my $line_1 = 1 + $line_open;
10737    for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
10738        my $ibeg = $$ri_first[$n];
10739        my $iend = $$ri_last[$n];
10740        if ( $ibeg eq $closing_index ) { $n--; last }
10741        return if ( $lev_start != $levels_to_go[$ibeg] );
10742        return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
10743        last   if ( $closing_index <= $iend );
10744    }
10745
10746    # we can reduce the indentation of all continuation lines
10747    my $continuation_line_count = $n - $line_open;
10748    @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10749      (0) x ($continuation_line_count);
10750    @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
10751      @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
10752}
10753
10754sub pad_token {
10755
10756    # insert $pad_spaces before token number $ipad
10757    my ( $ipad, $pad_spaces ) = @_;
10758    if ( $pad_spaces > 0 ) {
10759        $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
10760    }
10761    elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
10762        $tokens_to_go[$ipad] = "";
10763    }
10764    else {
10765
10766        # shouldn't happen
10767        return;
10768    }
10769
10770    $token_lengths_to_go[$ipad] += $pad_spaces;
10771    for ( my $i = $ipad ; $i <= $max_index_to_go ; $i++ ) {
10772        $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
10773    }
10774}
10775
10776{
10777    my %is_math_op;
10778
10779    BEGIN {
10780
10781        @_ = qw( + - * / );
10782        @is_math_op{@_} = (1) x scalar(@_);
10783    }
10784
10785    sub set_logical_padding {
10786
10787        # Look at a batch of lines and see if extra padding can improve the
10788        # alignment when there are certain leading operators. Here is an
10789        # example, in which some extra space is introduced before
10790        # '( $year' to make it line up with the subsequent lines:
10791        #
10792        #       if (   ( $Year < 1601 )
10793        #           || ( $Year > 2899 )
10794        #           || ( $EndYear < 1601 )
10795        #           || ( $EndYear > 2899 ) )
10796        #       {
10797        #           &Error_OutOfRange;
10798        #       }
10799        #
10800        my ( $ri_first, $ri_last ) = @_;
10801        my $max_line = @$ri_first - 1;
10802
10803        my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line,
10804            $pad_spaces,
10805            $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
10806
10807        # looking at each line of this batch..
10808        foreach $line ( 0 .. $max_line - 1 ) {
10809
10810            # see if the next line begins with a logical operator
10811            $ibeg      = $$ri_first[$line];
10812            $iend      = $$ri_last[$line];
10813            $ibeg_next = $$ri_first[ $line + 1 ];
10814            $tok_next  = $tokens_to_go[$ibeg_next];
10815            $type_next = $types_to_go[$ibeg_next];
10816
10817            $has_leading_op_next = ( $tok_next =~ /^\w/ )
10818              ? $is_chain_operator{$tok_next}      # + - * / : ? && ||
10819              : $is_chain_operator{$type_next};    # and, or
10820
10821            next unless ($has_leading_op_next);
10822
10823            # next line must not be at lesser depth
10824            next
10825              if ( $nesting_depth_to_go[$ibeg] >
10826                $nesting_depth_to_go[$ibeg_next] );
10827
10828            # identify the token in this line to be padded on the left
10829            $ipad = undef;
10830
10831            # handle lines at same depth...
10832            if ( $nesting_depth_to_go[$ibeg] ==
10833                $nesting_depth_to_go[$ibeg_next] )
10834            {
10835
10836                # if this is not first line of the batch ...
10837                if ( $line > 0 ) {
10838
10839                    # and we have leading operator..
10840                    next if $has_leading_op;
10841
10842                    # Introduce padding if..
10843                    # 1. the previous line is at lesser depth, or
10844                    # 2. the previous line ends in an assignment
10845                    # 3. the previous line ends in a 'return'
10846                    # 4. the previous line ends in a comma
10847                    # Example 1: previous line at lesser depth
10848                    #       if (   ( $Year < 1601 )      # <- we are here but
10849                    #           || ( $Year > 2899 )      #  list has not yet
10850                    #           || ( $EndYear < 1601 )   # collapsed vertically
10851                    #           || ( $EndYear > 2899 ) )
10852                    #       {
10853                    #
10854                    # Example 2: previous line ending in assignment:
10855                    #    $leapyear =
10856                    #        $year % 4   ? 0     # <- We are here
10857                    #      : $year % 100 ? 1
10858                    #      : $year % 400 ? 0
10859                    #      : 1;
10860                    #
10861                    # Example 3: previous line ending in comma:
10862                    #    push @expr,
10863                    #        /test/   ? undef
10864                    #      : eval($_) ? 1
10865                    #      : eval($_) ? 1
10866                    #      :            0;
10867
10868                   # be sure levels agree (do not indent after an indented 'if')
10869                    next
10870                      if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
10871
10872                    # allow padding on first line after a comma but only if:
10873                    # (1) this is line 2 and
10874                    # (2) there are at more than three lines and
10875                    # (3) lines 3 and 4 have the same leading operator
10876                    # These rules try to prevent padding within a long
10877                    # comma-separated list.
10878                    my $ok_comma;
10879                    if (   $types_to_go[$iendm] eq ','
10880                        && $line == 1
10881                        && $max_line > 2 )
10882                    {
10883                        my $ibeg_next_next = $$ri_first[ $line + 2 ];
10884                        my $tok_next_next  = $tokens_to_go[$ibeg_next_next];
10885                        $ok_comma = $tok_next_next eq $tok_next;
10886                    }
10887
10888                    next
10889                      unless (
10890                           $is_assignment{ $types_to_go[$iendm] }
10891                        || $ok_comma
10892                        || ( $nesting_depth_to_go[$ibegm] <
10893                            $nesting_depth_to_go[$ibeg] )
10894                        || (   $types_to_go[$iendm] eq 'k'
10895                            && $tokens_to_go[$iendm] eq 'return' )
10896                      );
10897
10898                    # we will add padding before the first token
10899                    $ipad = $ibeg;
10900                }
10901
10902                # for first line of the batch..
10903                else {
10904
10905                    # WARNING: Never indent if first line is starting in a
10906                    # continued quote, which would change the quote.
10907                    next if $starting_in_quote;
10908
10909                    # if this is text after closing '}'
10910                    # then look for an interior token to pad
10911                    if ( $types_to_go[$ibeg] eq '}' ) {
10912
10913                    }
10914
10915                    # otherwise, we might pad if it looks really good
10916                    else {
10917
10918                        # we might pad token $ibeg, so be sure that it
10919                        # is at the same depth as the next line.
10920                        next
10921                          if ( $nesting_depth_to_go[$ibeg] !=
10922                            $nesting_depth_to_go[$ibeg_next] );
10923
10924                        # We can pad on line 1 of a statement if at least 3
10925                        # lines will be aligned. Otherwise, it
10926                        # can look very confusing.
10927
10928                 # We have to be careful not to pad if there are too few
10929                 # lines.  The current rule is:
10930                 # (1) in general we require at least 3 consecutive lines
10931                 # with the same leading chain operator token,
10932                 # (2) but an exception is that we only require two lines
10933                 # with leading colons if there are no more lines.  For example,
10934                 # the first $i in the following snippet would get padding
10935                 # by the second rule:
10936                 #
10937                 #   $i == 1 ? ( "First", "Color" )
10938                 # : $i == 2 ? ( "Then",  "Rarity" )
10939                 # :           ( "Then",  "Name" );
10940
10941                        if ( $max_line > 1 ) {
10942                            my $leading_token = $tokens_to_go[$ibeg_next];
10943                            my $tokens_differ;
10944
10945                            # never indent line 1 of a '.' series because
10946                            # previous line is most likely at same level.
10947                            # TODO: we should also look at the leasing_spaces
10948                            # of the last output line and skip if it is same
10949                            # as this line.
10950                            next if ( $leading_token eq '.' );
10951
10952                            my $count = 1;
10953                            foreach my $l ( 2 .. 3 ) {
10954                                last if ( $line + $l > $max_line );
10955                                my $ibeg_next_next = $$ri_first[ $line + $l ];
10956                                if ( $tokens_to_go[$ibeg_next_next] ne
10957                                    $leading_token )
10958                                {
10959                                    $tokens_differ = 1;
10960                                    last;
10961                                }
10962                                $count++;
10963                            }
10964                            next if ($tokens_differ);
10965                            next if ( $count < 3 && $leading_token ne ':' );
10966                            $ipad = $ibeg;
10967                        }
10968                        else {
10969                            next;
10970                        }
10971                    }
10972                }
10973            }
10974
10975            # find interior token to pad if necessary
10976            if ( !defined($ipad) ) {
10977
10978                for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
10979
10980                    # find any unclosed container
10981                    next
10982                      unless ( $type_sequence_to_go[$i]
10983                        && $mate_index_to_go[$i] > $iend );
10984
10985                    # find next nonblank token to pad
10986                    $ipad = $inext_to_go[$i];
10987                    last if ( $ipad > $iend );
10988                }
10989                last unless $ipad;
10990            }
10991
10992            # We cannot pad a leading token at the lowest level because
10993            # it could cause a bug in which the starting indentation
10994            # level is guessed incorrectly each time the code is run
10995            # though perltidy, thus causing the code to march off to
10996            # the right.  For example, the following snippet would have
10997            # this problem:
10998
10999##     ov_method mycan( $package, '(""' ),       $package
11000##  or ov_method mycan( $package, '(0+' ),       $package
11001##  or ov_method mycan( $package, '(bool' ),     $package
11002##  or ov_method mycan( $package, '(nomethod' ), $package;
11003
11004            # If this snippet is within a block this won't happen
11005            # unless the user just processes the snippet alone within
11006            # an editor.  In that case either the user will see and
11007            # fix the problem or it will be corrected next time the
11008            # entire file is processed with perltidy.
11009            next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 );
11010
11011## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
11012## IT DID MORE HARM THAN GOOD
11013##            ceil(
11014##                      $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
11015##                    / $upem
11016##            ),
11017##?            # do not put leading padding for just 2 lines of math
11018##?            if (   $ipad == $ibeg
11019##?                && $line > 0
11020##?                && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
11021##?                && $is_math_op{$type_next}
11022##?                && $line + 2 <= $max_line )
11023##?            {
11024##?                my $ibeg_next_next = $$ri_first[ $line + 2 ];
11025##?                my $type_next_next = $types_to_go[$ibeg_next_next];
11026##?                next if !$is_math_op{$type_next_next};
11027##?            }
11028
11029            # next line must not be at greater depth
11030            my $iend_next = $$ri_last[ $line + 1 ];
11031            next
11032              if ( $nesting_depth_to_go[ $iend_next + 1 ] >
11033                $nesting_depth_to_go[$ipad] );
11034
11035            # lines must be somewhat similar to be padded..
11036            my $inext_next = $inext_to_go[$ibeg_next];
11037            my $type       = $types_to_go[$ipad];
11038            my $type_next  = $types_to_go[ $ipad + 1 ];
11039
11040            # see if there are multiple continuation lines
11041            my $logical_continuation_lines = 1;
11042            if ( $line + 2 <= $max_line ) {
11043                my $leading_token  = $tokens_to_go[$ibeg_next];
11044                my $ibeg_next_next = $$ri_first[ $line + 2 ];
11045                if (   $tokens_to_go[$ibeg_next_next] eq $leading_token
11046                    && $nesting_depth_to_go[$ibeg_next] eq
11047                    $nesting_depth_to_go[$ibeg_next_next] )
11048                {
11049                    $logical_continuation_lines++;
11050                }
11051            }
11052
11053            # see if leading types match
11054            my $types_match = $types_to_go[$inext_next] eq $type;
11055            my $matches_without_bang;
11056
11057            # if first line has leading ! then compare the following token
11058            if ( !$types_match && $type eq '!' ) {
11059                $types_match = $matches_without_bang =
11060                  $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
11061            }
11062
11063            if (
11064
11065                # either we have multiple continuation lines to follow
11066                # and we are not padding the first token
11067                ( $logical_continuation_lines > 1 && $ipad > 0 )
11068
11069                # or..
11070                || (
11071
11072                    # types must match
11073                    $types_match
11074
11075                    # and keywords must match if keyword
11076                    && !(
11077                           $type eq 'k'
11078                        && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
11079                    )
11080                )
11081              )
11082            {
11083
11084                #----------------------begin special checks--------------
11085                #
11086                # SPECIAL CHECK 1:
11087                # A check is needed before we can make the pad.
11088                # If we are in a list with some long items, we want each
11089                # item to stand out.  So in the following example, the
11090                # first line beginning with '$casefold->' would look good
11091                # padded to align with the next line, but then it
11092                # would be indented more than the last line, so we
11093                # won't do it.
11094                #
11095                #  ok(
11096                #      $casefold->{code}         eq '0041'
11097                #        && $casefold->{status}  eq 'C'
11098                #        && $casefold->{mapping} eq '0061',
11099                #      'casefold 0x41'
11100                #  );
11101                #
11102                # Note:
11103                # It would be faster, and almost as good, to use a comma
11104                # count, and not pad if comma_count > 1 and the previous
11105                # line did not end with a comma.
11106                #
11107                my $ok_to_pad = 1;
11108
11109                my $ibg   = $$ri_first[ $line + 1 ];
11110                my $depth = $nesting_depth_to_go[ $ibg + 1 ];
11111
11112                # just use simplified formula for leading spaces to avoid
11113                # needless sub calls
11114                my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
11115
11116                # look at each line beyond the next ..
11117                my $l = $line + 1;
11118                foreach $l ( $line + 2 .. $max_line ) {
11119                    my $ibg = $$ri_first[$l];
11120
11121                    # quit looking at the end of this container
11122                    last
11123                      if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
11124                      || ( $nesting_depth_to_go[$ibg] < $depth );
11125
11126                    # cannot do the pad if a later line would be
11127                    # outdented more
11128                    if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
11129                        $ok_to_pad = 0;
11130                        last;
11131                    }
11132                }
11133
11134                # don't pad if we end in a broken list
11135                if ( $l == $max_line ) {
11136                    my $i2 = $$ri_last[$l];
11137                    if ( $types_to_go[$i2] eq '#' ) {
11138                        my $i1 = $$ri_first[$l];
11139                        next
11140                          if (
11141                            terminal_type( \@types_to_go, \@block_type_to_go,
11142                                $i1, $i2 ) eq ','
11143                          );
11144                    }
11145                }
11146
11147                # SPECIAL CHECK 2:
11148                # a minus may introduce a quoted variable, and we will
11149                # add the pad only if this line begins with a bare word,
11150                # such as for the word 'Button' here:
11151                #    [
11152                #         Button      => "Print letter \"~$_\"",
11153                #        -command     => [ sub { print "$_[0]\n" }, $_ ],
11154                #        -accelerator => "Meta+$_"
11155                #    ];
11156                #
11157                #  On the other hand, if 'Button' is quoted, it looks best
11158                #  not to pad:
11159                #    [
11160                #        'Button'     => "Print letter \"~$_\"",
11161                #        -command     => [ sub { print "$_[0]\n" }, $_ ],
11162                #        -accelerator => "Meta+$_"
11163                #    ];
11164                if ( $types_to_go[$ibeg_next] eq 'm' ) {
11165                    $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
11166                }
11167
11168                next unless $ok_to_pad;
11169
11170                #----------------------end special check---------------
11171
11172                my $length_1 = total_line_length( $ibeg,      $ipad - 1 );
11173                my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
11174                $pad_spaces = $length_2 - $length_1;
11175
11176                # If the first line has a leading ! and the second does
11177                # not, then remove one space to try to align the next
11178                # leading characters, which are often the same.  For example:
11179                #  if (  !$ts
11180                #      || $ts == $self->Holder
11181                #      || $self->Holder->Type eq "Arena" )
11182                #
11183                # This usually helps readability, but if there are subsequent
11184                # ! operators things will still get messed up.  For example:
11185                #
11186                #  if (  !exists $Net::DNS::typesbyname{$qtype}
11187                #      && exists $Net::DNS::classesbyname{$qtype}
11188                #      && !exists $Net::DNS::classesbyname{$qclass}
11189                #      && exists $Net::DNS::typesbyname{$qclass} )
11190                # We can't fix that.
11191                if ($matches_without_bang) { $pad_spaces-- }
11192
11193                # make sure this won't change if -lp is used
11194                my $indentation_1 = $leading_spaces_to_go[$ibeg];
11195                if ( ref($indentation_1) ) {
11196                    if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
11197                        my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
11198                        unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 )
11199                        {
11200                            $pad_spaces = 0;
11201                        }
11202                    }
11203                }
11204
11205                # we might be able to handle a pad of -1 by removing a blank
11206                # token
11207                if ( $pad_spaces < 0 ) {
11208
11209                    if ( $pad_spaces == -1 ) {
11210                        if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
11211                        {
11212                            pad_token( $ipad - 1, $pad_spaces );
11213                        }
11214                    }
11215                    $pad_spaces = 0;
11216                }
11217
11218                # now apply any padding for alignment
11219                if ( $ipad >= 0 && $pad_spaces ) {
11220
11221                    my $length_t = total_line_length( $ibeg, $iend );
11222                    if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
11223                    {
11224                        pad_token( $ipad, $pad_spaces );
11225                    }
11226                }
11227            }
11228        }
11229        continue {
11230            $iendm          = $iend;
11231            $ibegm          = $ibeg;
11232            $has_leading_op = $has_leading_op_next;
11233        }    # end of loop over lines
11234        return;
11235    }
11236}
11237
11238sub correct_lp_indentation {
11239
11240    # When the -lp option is used, we need to make a last pass through
11241    # each line to correct the indentation positions in case they differ
11242    # from the predictions.  This is necessary because perltidy uses a
11243    # predictor/corrector method for aligning with opening parens.  The
11244    # predictor is usually good, but sometimes stumbles.  The corrector
11245    # tries to patch things up once the actual opening paren locations
11246    # are known.
11247    my ( $ri_first, $ri_last ) = @_;
11248    my $do_not_pad = 0;
11249
11250    #  Note on flag '$do_not_pad':
11251    #  We want to avoid a situation like this, where the aligner inserts
11252    #  whitespace before the '=' to align it with a previous '=', because
11253    #  otherwise the parens might become mis-aligned in a situation like
11254    #  this, where the '=' has become aligned with the previous line,
11255    #  pushing the opening '(' forward beyond where we want it.
11256    #
11257    #  $mkFloor::currentRoom = '';
11258    #  $mkFloor::c_entry     = $c->Entry(
11259    #                                 -width        => '10',
11260    #                                 -relief       => 'sunken',
11261    #                                 ...
11262    #                                 );
11263    #
11264    #  We leave it to the aligner to decide how to do this.
11265
11266    # first remove continuation indentation if appropriate
11267    my $max_line = @$ri_first - 1;
11268
11269    # looking at each line of this batch..
11270    my ( $ibeg, $iend );
11271    my $line;
11272    foreach $line ( 0 .. $max_line ) {
11273        $ibeg = $$ri_first[$line];
11274        $iend = $$ri_last[$line];
11275
11276        # looking at each token in this output line..
11277        my $i;
11278        foreach $i ( $ibeg .. $iend ) {
11279
11280            # How many space characters to place before this token
11281            # for special alignment.  Actual padding is done in the
11282            # continue block.
11283
11284            # looking for next unvisited indentation item
11285            my $indentation = $leading_spaces_to_go[$i];
11286            if ( !$indentation->get_MARKED() ) {
11287                $indentation->set_MARKED(1);
11288
11289                # looking for indentation item for which we are aligning
11290                # with parens, braces, and brackets
11291                next unless ( $indentation->get_ALIGN_PAREN() );
11292
11293                # skip closed container on this line
11294                if ( $i > $ibeg ) {
11295                    my $im = max( $ibeg, $iprev_to_go[$i] );
11296                    if (   $type_sequence_to_go[$im]
11297                        && $mate_index_to_go[$im] <= $iend )
11298                    {
11299                        next;
11300                    }
11301                }
11302
11303                if ( $line == 1 && $i == $ibeg ) {
11304                    $do_not_pad = 1;
11305                }
11306
11307                # Ok, let's see what the error is and try to fix it
11308                my $actual_pos;
11309                my $predicted_pos = $indentation->get_SPACES();
11310                if ( $i > $ibeg ) {
11311
11312                    # token is mid-line - use length to previous token
11313                    $actual_pos = total_line_length( $ibeg, $i - 1 );
11314
11315                    # for mid-line token, we must check to see if all
11316                    # additional lines have continuation indentation,
11317                    # and remove it if so.  Otherwise, we do not get
11318                    # good alignment.
11319                    my $closing_index = $indentation->get_CLOSED();
11320                    if ( $closing_index > $iend ) {
11321                        my $ibeg_next = $$ri_first[ $line + 1 ];
11322                        if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
11323                            undo_lp_ci( $line, $i, $closing_index, $ri_first,
11324                                $ri_last );
11325                        }
11326                    }
11327                }
11328                elsif ( $line > 0 ) {
11329
11330                    # handle case where token starts a new line;
11331                    # use length of previous line
11332                    my $ibegm = $$ri_first[ $line - 1 ];
11333                    my $iendm = $$ri_last[ $line - 1 ];
11334                    $actual_pos = total_line_length( $ibegm, $iendm );
11335
11336                    # follow -pt style
11337                    ++$actual_pos
11338                      if ( $types_to_go[ $iendm + 1 ] eq 'b' );
11339                }
11340                else {
11341
11342                    # token is first character of first line of batch
11343                    $actual_pos = $predicted_pos;
11344                }
11345
11346                my $move_right = $actual_pos - $predicted_pos;
11347
11348                # done if no error to correct (gnu2.t)
11349                if ( $move_right == 0 ) {
11350                    $indentation->set_RECOVERABLE_SPACES($move_right);
11351                    next;
11352                }
11353
11354                # if we have not seen closure for this indentation in
11355                # this batch, we can only pass on a request to the
11356                # vertical aligner
11357                my $closing_index = $indentation->get_CLOSED();
11358
11359                if ( $closing_index < 0 ) {
11360                    $indentation->set_RECOVERABLE_SPACES($move_right);
11361                    next;
11362                }
11363
11364                # If necessary, look ahead to see if there is really any
11365                # leading whitespace dependent on this whitespace, and
11366                # also find the longest line using this whitespace.
11367                # Since it is always safe to move left if there are no
11368                # dependents, we only need to do this if we may have
11369                # dependent nodes or need to move right.
11370
11371                my $right_margin = 0;
11372                my $have_child   = $indentation->get_HAVE_CHILD();
11373
11374                my %saw_indentation;
11375                my $line_count = 1;
11376                $saw_indentation{$indentation} = $indentation;
11377
11378                if ( $have_child || $move_right > 0 ) {
11379                    $have_child = 0;
11380                    my $max_length = 0;
11381                    if ( $i == $ibeg ) {
11382                        $max_length = total_line_length( $ibeg, $iend );
11383                    }
11384
11385                    # look ahead at the rest of the lines of this batch..
11386                    my $line_t;
11387                    foreach $line_t ( $line + 1 .. $max_line ) {
11388                        my $ibeg_t = $$ri_first[$line_t];
11389                        my $iend_t = $$ri_last[$line_t];
11390                        last if ( $closing_index <= $ibeg_t );
11391
11392                        # remember all different indentation objects
11393                        my $indentation_t = $leading_spaces_to_go[$ibeg_t];
11394                        $saw_indentation{$indentation_t} = $indentation_t;
11395                        $line_count++;
11396
11397                        # remember longest line in the group
11398                        my $length_t = total_line_length( $ibeg_t, $iend_t );
11399                        if ( $length_t > $max_length ) {
11400                            $max_length = $length_t;
11401                        }
11402                    }
11403                    $right_margin = maximum_line_length($ibeg) - $max_length;
11404                    if ( $right_margin < 0 ) { $right_margin = 0 }
11405                }
11406
11407                my $first_line_comma_count =
11408                  grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
11409                my $comma_count = $indentation->get_COMMA_COUNT();
11410                my $arrow_count = $indentation->get_ARROW_COUNT();
11411
11412                # This is a simple approximate test for vertical alignment:
11413                # if we broke just after an opening paren, brace, bracket,
11414                # and there are 2 or more commas in the first line,
11415                # and there are no '=>'s,
11416                # then we are probably vertically aligned.  We could set
11417                # an exact flag in sub scan_list, but this is good
11418                # enough.
11419                my $indentation_count = keys %saw_indentation;
11420                my $is_vertically_aligned =
11421                  (      $i == $ibeg
11422                      && $first_line_comma_count > 1
11423                      && $indentation_count == 1
11424                      && ( $arrow_count == 0 || $arrow_count == $line_count ) );
11425
11426                # Make the move if possible ..
11427                if (
11428
11429                    # we can always move left
11430                    $move_right < 0
11431
11432                    # but we should only move right if we are sure it will
11433                    # not spoil vertical alignment
11434                    || ( $comma_count == 0 )
11435                    || ( $comma_count > 0 && !$is_vertically_aligned )
11436                  )
11437                {
11438                    my $move =
11439                      ( $move_right <= $right_margin )
11440                      ? $move_right
11441                      : $right_margin;
11442
11443                    foreach ( keys %saw_indentation ) {
11444                        $saw_indentation{$_}
11445                          ->permanently_decrease_AVAILABLE_SPACES( -$move );
11446                    }
11447                }
11448
11449                # Otherwise, record what we want and the vertical aligner
11450                # will try to recover it.
11451                else {
11452                    $indentation->set_RECOVERABLE_SPACES($move_right);
11453                }
11454            }
11455        }
11456    }
11457    return $do_not_pad;
11458}
11459
11460# flush is called to output any tokens in the pipeline, so that
11461# an alternate source of lines can be written in the correct order
11462
11463sub flush {
11464    destroy_one_line_block();
11465    output_line_to_go();
11466    Perl::Tidy::VerticalAligner::flush();
11467}
11468
11469sub reset_block_text_accumulator {
11470
11471    # save text after 'if' and 'elsif' to append after 'else'
11472    if ($accumulating_text_for_block) {
11473
11474        if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
11475            push @{$rleading_block_if_elsif_text}, $leading_block_text;
11476        }
11477    }
11478    $accumulating_text_for_block        = "";
11479    $leading_block_text                 = "";
11480    $leading_block_text_level           = 0;
11481    $leading_block_text_length_exceeded = 0;
11482    $leading_block_text_line_number     = 0;
11483    $leading_block_text_line_length     = 0;
11484}
11485
11486sub set_block_text_accumulator {
11487    my $i = shift;
11488    $accumulating_text_for_block = $tokens_to_go[$i];
11489    if ( $accumulating_text_for_block !~ /^els/ ) {
11490        $rleading_block_if_elsif_text = [];
11491    }
11492    $leading_block_text       = "";
11493    $leading_block_text_level = $levels_to_go[$i];
11494    $leading_block_text_line_number =
11495      $vertical_aligner_object->get_output_line_number();
11496    $leading_block_text_length_exceeded = 0;
11497
11498    # this will contain the column number of the last character
11499    # of the closing side comment
11500    $leading_block_text_line_length =
11501      length($csc_last_label) +
11502      length($accumulating_text_for_block) +
11503      length( $rOpts->{'closing-side-comment-prefix'} ) +
11504      $leading_block_text_level * $rOpts_indent_columns + 3;
11505}
11506
11507sub accumulate_block_text {
11508    my $i = shift;
11509
11510    # accumulate leading text for -csc, ignoring any side comments
11511    if (   $accumulating_text_for_block
11512        && !$leading_block_text_length_exceeded
11513        && $types_to_go[$i] ne '#' )
11514    {
11515
11516        my $added_length = $token_lengths_to_go[$i];
11517        $added_length += 1 if $i == 0;
11518        my $new_line_length = $leading_block_text_line_length + $added_length;
11519
11520        # we can add this text if we don't exceed some limits..
11521        if (
11522
11523            # we must not have already exceeded the text length limit
11524            length($leading_block_text) <
11525            $rOpts_closing_side_comment_maximum_text
11526
11527            # and either:
11528            # the new total line length must be below the line length limit
11529            # or the new length must be below the text length limit
11530            # (ie, we may allow one token to exceed the text length limit)
11531            && (
11532                $new_line_length <
11533                maximum_line_length_for_level($leading_block_text_level)
11534
11535                || length($leading_block_text) + $added_length <
11536                $rOpts_closing_side_comment_maximum_text
11537            )
11538
11539            # UNLESS: we are adding a closing paren before the brace we seek.
11540            # This is an attempt to avoid situations where the ... to be
11541            # added are longer than the omitted right paren, as in:
11542
11543            #   foreach my $item (@a_rather_long_variable_name_here) {
11544            #      &whatever;
11545            #   } ## end foreach my $item (@a_rather_long_variable_name_here...
11546
11547            || (
11548                $tokens_to_go[$i] eq ')'
11549                && (
11550                    (
11551                           $i + 1 <= $max_index_to_go
11552                        && $block_type_to_go[ $i + 1 ] eq
11553                        $accumulating_text_for_block
11554                    )
11555                    || (   $i + 2 <= $max_index_to_go
11556                        && $block_type_to_go[ $i + 2 ] eq
11557                        $accumulating_text_for_block )
11558                )
11559            )
11560          )
11561        {
11562
11563            # add an extra space at each newline
11564            if ( $i == 0 ) { $leading_block_text .= ' ' }
11565
11566            # add the token text
11567            $leading_block_text .= $tokens_to_go[$i];
11568            $leading_block_text_line_length = $new_line_length;
11569        }
11570
11571        # show that text was truncated if necessary
11572        elsif ( $types_to_go[$i] ne 'b' ) {
11573            $leading_block_text_length_exceeded = 1;
11574## Please see file perltidy.ERR
11575            $leading_block_text .= '...';
11576        }
11577    }
11578}
11579
11580{
11581    my %is_if_elsif_else_unless_while_until_for_foreach;
11582
11583    BEGIN {
11584
11585        # These block types may have text between the keyword and opening
11586        # curly.  Note: 'else' does not, but must be included to allow trailing
11587        # if/elsif text to be appended.
11588        # patch for SWITCH/CASE: added 'case' and 'when'
11589        @_ = qw(if elsif else unless while until for foreach case when);
11590        @is_if_elsif_else_unless_while_until_for_foreach{@_} =
11591          (1) x scalar(@_);
11592    }
11593
11594    sub accumulate_csc_text {
11595
11596        # called once per output buffer when -csc is used. Accumulates
11597        # the text placed after certain closing block braces.
11598        # Defines and returns the following for this buffer:
11599
11600        my $block_leading_text = "";    # the leading text of the last '}'
11601        my $rblock_leading_if_elsif_text;
11602        my $i_block_leading_text =
11603          -1;    # index of token owning block_leading_text
11604        my $block_line_count    = 100;    # how many lines the block spans
11605        my $terminal_type       = 'b';    # type of last nonblank token
11606        my $i_terminal          = 0;      # index of last nonblank token
11607        my $terminal_block_type = "";
11608
11609        # update most recent statement label
11610        $csc_last_label = "" unless ($csc_last_label);
11611        if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
11612        my $block_label = $csc_last_label;
11613
11614        # Loop over all tokens of this batch
11615        for my $i ( 0 .. $max_index_to_go ) {
11616            my $type       = $types_to_go[$i];
11617            my $block_type = $block_type_to_go[$i];
11618            my $token      = $tokens_to_go[$i];
11619
11620            # remember last nonblank token type
11621            if ( $type ne '#' && $type ne 'b' ) {
11622                $terminal_type       = $type;
11623                $terminal_block_type = $block_type;
11624                $i_terminal          = $i;
11625            }
11626
11627            my $type_sequence = $type_sequence_to_go[$i];
11628            if ( $block_type && $type_sequence ) {
11629
11630                if ( $token eq '}' ) {
11631
11632                    # restore any leading text saved when we entered this block
11633                    if ( defined( $block_leading_text{$type_sequence} ) ) {
11634                        ( $block_leading_text, $rblock_leading_if_elsif_text )
11635                          = @{ $block_leading_text{$type_sequence} };
11636                        $i_block_leading_text = $i;
11637                        delete $block_leading_text{$type_sequence};
11638                        $rleading_block_if_elsif_text =
11639                          $rblock_leading_if_elsif_text;
11640                    }
11641
11642                    if ( defined( $csc_block_label{$type_sequence} ) ) {
11643                        $block_label = $csc_block_label{$type_sequence};
11644                        delete $csc_block_label{$type_sequence};
11645                    }
11646
11647                    # if we run into a '}' then we probably started accumulating
11648                    # at something like a trailing 'if' clause..no harm done.
11649                    if (   $accumulating_text_for_block
11650                        && $levels_to_go[$i] <= $leading_block_text_level )
11651                    {
11652                        my $lev = $levels_to_go[$i];
11653                        reset_block_text_accumulator();
11654                    }
11655
11656                    if ( defined( $block_opening_line_number{$type_sequence} ) )
11657                    {
11658                        my $output_line_number =
11659                          $vertical_aligner_object->get_output_line_number();
11660                        $block_line_count =
11661                          $output_line_number -
11662                          $block_opening_line_number{$type_sequence} + 1;
11663                        delete $block_opening_line_number{$type_sequence};
11664                    }
11665                    else {
11666
11667                        # Error: block opening line undefined for this line..
11668                        # This shouldn't be possible, but it is not a
11669                        # significant problem.
11670                    }
11671                }
11672
11673                elsif ( $token eq '{' ) {
11674
11675                    my $line_number =
11676                      $vertical_aligner_object->get_output_line_number();
11677                    $block_opening_line_number{$type_sequence} = $line_number;
11678
11679                    # set a label for this block, except for
11680                    # a bare block which already has the label
11681                    # A label can only be used on the next {
11682                    if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
11683                    $csc_block_label{$type_sequence} = $csc_last_label;
11684                    $csc_last_label = "";
11685
11686                    if (   $accumulating_text_for_block
11687                        && $levels_to_go[$i] == $leading_block_text_level )
11688                    {
11689
11690                        if ( $accumulating_text_for_block eq $block_type ) {
11691
11692                            # save any leading text before we enter this block
11693                            $block_leading_text{$type_sequence} = [
11694                                $leading_block_text,
11695                                $rleading_block_if_elsif_text
11696                            ];
11697                            $block_opening_line_number{$type_sequence} =
11698                              $leading_block_text_line_number;
11699                            reset_block_text_accumulator();
11700                        }
11701                        else {
11702
11703                            # shouldn't happen, but not a serious error.
11704                            # We were accumulating -csc text for block type
11705                            # $accumulating_text_for_block and unexpectedly
11706                            # encountered a '{' for block type $block_type.
11707                        }
11708                    }
11709                }
11710            }
11711
11712            if (   $type eq 'k'
11713                && $csc_new_statement_ok
11714                && $is_if_elsif_else_unless_while_until_for_foreach{$token}
11715                && $token =~ /$closing_side_comment_list_pattern/o )
11716            {
11717                set_block_text_accumulator($i);
11718            }
11719            else {
11720
11721                # note: ignoring type 'q' because of tricks being played
11722                # with 'q' for hanging side comments
11723                if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
11724                    $csc_new_statement_ok =
11725                      ( $block_type || $type eq 'J' || $type eq ';' );
11726                }
11727                if (   $type eq ';'
11728                    && $accumulating_text_for_block
11729                    && $levels_to_go[$i] == $leading_block_text_level )
11730                {
11731                    reset_block_text_accumulator();
11732                }
11733                else {
11734                    accumulate_block_text($i);
11735                }
11736            }
11737        }
11738
11739        # Treat an 'else' block specially by adding preceding 'if' and
11740        # 'elsif' text.  Otherwise, the 'end else' is not helpful,
11741        # especially for cuddled-else formatting.
11742        if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
11743            $block_leading_text =
11744              make_else_csc_text( $i_terminal, $terminal_block_type,
11745                $block_leading_text, $rblock_leading_if_elsif_text );
11746        }
11747
11748        # if this line ends in a label then remember it for the next pass
11749        $csc_last_label = "";
11750        if ( $terminal_type eq 'J' ) {
11751            $csc_last_label = $tokens_to_go[$i_terminal];
11752        }
11753
11754        return ( $terminal_type, $i_terminal, $i_block_leading_text,
11755            $block_leading_text, $block_line_count, $block_label );
11756    }
11757}
11758
11759sub make_else_csc_text {
11760
11761    # create additional -csc text for an 'else' and optionally 'elsif',
11762    # depending on the value of switch
11763    # $rOpts_closing_side_comment_else_flag:
11764    #
11765    #  = 0 add 'if' text to trailing else
11766    #  = 1 same as 0 plus:
11767    #      add 'if' to 'elsif's if can fit in line length
11768    #      add last 'elsif' to trailing else if can fit in one line
11769    #  = 2 same as 1 but do not check if exceed line length
11770    #
11771    # $rif_elsif_text = a reference to a list of all previous closing
11772    # side comments created for this if block
11773    #
11774    my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
11775    my $csc_text = $block_leading_text;
11776
11777    if (   $block_type eq 'elsif'
11778        && $rOpts_closing_side_comment_else_flag == 0 )
11779    {
11780        return $csc_text;
11781    }
11782
11783    my $count = @{$rif_elsif_text};
11784    return $csc_text unless ($count);
11785
11786    my $if_text = '[ if' . $rif_elsif_text->[0];
11787
11788    # always show the leading 'if' text on 'else'
11789    if ( $block_type eq 'else' ) {
11790        $csc_text .= $if_text;
11791    }
11792
11793    # see if that's all
11794    if ( $rOpts_closing_side_comment_else_flag == 0 ) {
11795        return $csc_text;
11796    }
11797
11798    my $last_elsif_text = "";
11799    if ( $count > 1 ) {
11800        $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
11801        if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
11802    }
11803
11804    # tentatively append one more item
11805    my $saved_text = $csc_text;
11806    if ( $block_type eq 'else' ) {
11807        $csc_text .= $last_elsif_text;
11808    }
11809    else {
11810        $csc_text .= ' ' . $if_text;
11811    }
11812
11813    # all done if no length checks requested
11814    if ( $rOpts_closing_side_comment_else_flag == 2 ) {
11815        return $csc_text;
11816    }
11817
11818    # undo it if line length exceeded
11819    my $length =
11820      length($csc_text) +
11821      length($block_type) +
11822      length( $rOpts->{'closing-side-comment-prefix'} ) +
11823      $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
11824    if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
11825        $csc_text = $saved_text;
11826    }
11827    return $csc_text;
11828}
11829
11830{    # sub balance_csc_text
11831
11832    my %matching_char;
11833
11834    BEGIN {
11835        %matching_char = (
11836            '{' => '}',
11837            '(' => ')',
11838            '[' => ']',
11839            '}' => '{',
11840            ')' => '(',
11841            ']' => '[',
11842        );
11843    }
11844
11845    sub balance_csc_text {
11846
11847        # Append characters to balance a closing side comment so that editors
11848        # such as vim can correctly jump through code.
11849        # Simple Example:
11850        #  input  = ## end foreach my $foo ( sort { $b  ...
11851        #  output = ## end foreach my $foo ( sort { $b  ...})
11852
11853        # NOTE: This routine does not currently filter out structures within
11854        # quoted text because the bounce algorithims in text editors do not
11855        # necessarily do this either (a version of vim was checked and
11856        # did not do this).
11857
11858        # Some complex examples which will cause trouble for some editors:
11859        #  while ( $mask_string =~ /\{[^{]*?\}/g ) {
11860        #  if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
11861        #  if ( $1 eq '{' ) {
11862        # test file test1/braces.pl has many such examples.
11863
11864        my ($csc) = @_;
11865
11866        # loop to examine characters one-by-one, RIGHT to LEFT and
11867        # build a balancing ending, LEFT to RIGHT.
11868        for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
11869
11870            my $char = substr( $csc, $pos, 1 );
11871
11872            # ignore everything except structural characters
11873            next unless ( $matching_char{$char} );
11874
11875            # pop most recently appended character
11876            my $top = chop($csc);
11877
11878            # push it back plus the mate to the newest character
11879            # unless they balance each other.
11880            $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
11881        }
11882
11883        # return the balanced string
11884        return $csc;
11885    }
11886}
11887
11888sub add_closing_side_comment {
11889
11890    # add closing side comments after closing block braces if -csc used
11891    my $cscw_block_comment;
11892
11893    #---------------------------------------------------------------
11894    # Step 1: loop through all tokens of this line to accumulate
11895    # the text needed to create the closing side comments. Also see
11896    # how the line ends.
11897    #---------------------------------------------------------------
11898
11899    my ( $terminal_type, $i_terminal, $i_block_leading_text,
11900        $block_leading_text, $block_line_count, $block_label )
11901      = accumulate_csc_text();
11902
11903    #---------------------------------------------------------------
11904    # Step 2: make the closing side comment if this ends a block
11905    #---------------------------------------------------------------
11906    my $have_side_comment = $i_terminal != $max_index_to_go;
11907
11908    # if this line might end in a block closure..
11909    if (
11910        $terminal_type eq '}'
11911
11912        # ..and either
11913        && (
11914
11915            # the block is long enough
11916            ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
11917
11918            # or there is an existing comment to check
11919            || (   $have_side_comment
11920                && $rOpts->{'closing-side-comment-warnings'} )
11921        )
11922
11923        # .. and if this is one of the types of interest
11924        && $block_type_to_go[$i_terminal] =~
11925        /$closing_side_comment_list_pattern/o
11926
11927        # .. but not an anonymous sub
11928        # These are not normally of interest, and their closing braces are
11929        # often followed by commas or semicolons anyway.  This also avoids
11930        # possible erratic output due to line numbering inconsistencies
11931        # in the cases where their closing braces terminate a line.
11932        && $block_type_to_go[$i_terminal] ne 'sub'
11933
11934        # ..and the corresponding opening brace must is not in this batch
11935        # (because we do not need to tag one-line blocks, although this
11936        # should also be caught with a positive -csci value)
11937        && $mate_index_to_go[$i_terminal] < 0
11938
11939        # ..and either
11940        && (
11941
11942            # this is the last token (line doesn't have a side comment)
11943            !$have_side_comment
11944
11945            # or the old side comment is a closing side comment
11946            || $tokens_to_go[$max_index_to_go] =~
11947            /$closing_side_comment_prefix_pattern/o
11948        )
11949      )
11950    {
11951
11952        # then make the closing side comment text
11953        if ($block_label) { $block_label .= " " }
11954        my $token =
11955"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
11956
11957        # append any extra descriptive text collected above
11958        if ( $i_block_leading_text == $i_terminal ) {
11959            $token .= $block_leading_text;
11960        }
11961
11962        $token = balance_csc_text($token)
11963          if $rOpts->{'closing-side-comments-balanced'};
11964
11965        $token =~ s/\s*$//;    # trim any trailing whitespace
11966
11967        # handle case of existing closing side comment
11968        if ($have_side_comment) {
11969
11970            # warn if requested and tokens differ significantly
11971            if ( $rOpts->{'closing-side-comment-warnings'} ) {
11972                my $old_csc = $tokens_to_go[$max_index_to_go];
11973                my $new_csc = $token;
11974                $new_csc =~ s/\s+//g;            # trim all whitespace
11975                $old_csc =~ s/\s+//g;            # trim all whitespace
11976                $new_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
11977                $old_csc =~ s/[\]\)\}\s]*$//;    # trim trailing structures
11978                $new_csc =~ s/(\.\.\.)$//;       # trim trailing '...'
11979                my $new_trailing_dots = $1;
11980                $old_csc =~ s/(\.\.\.)\s*$//;    # trim trailing '...'
11981
11982                # Patch to handle multiple closing side comments at
11983                # else and elsif's.  These have become too complicated
11984                # to check, so if we see an indication of
11985                # '[ if' or '[ # elsif', then assume they were made
11986                # by perltidy.
11987                if ( $block_type_to_go[$i_terminal] eq 'else' ) {
11988                    if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
11989                }
11990                elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
11991                    if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
11992                }
11993
11994                # if old comment is contained in new comment,
11995                # only compare the common part.
11996                if ( length($new_csc) > length($old_csc) ) {
11997                    $new_csc = substr( $new_csc, 0, length($old_csc) );
11998                }
11999
12000                # if the new comment is shorter and has been limited,
12001                # only compare the common part.
12002                if ( length($new_csc) < length($old_csc)
12003                    && $new_trailing_dots )
12004                {
12005                    $old_csc = substr( $old_csc, 0, length($new_csc) );
12006                }
12007
12008                # any remaining difference?
12009                if ( $new_csc ne $old_csc ) {
12010
12011                    # just leave the old comment if we are below the threshold
12012                    # for creating side comments
12013                    if ( $block_line_count <
12014                        $rOpts->{'closing-side-comment-interval'} )
12015                    {
12016                        $token = undef;
12017                    }
12018
12019                    # otherwise we'll make a note of it
12020                    else {
12021
12022                        warning(
12023"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
12024                        );
12025
12026                     # save the old side comment in a new trailing block comment
12027                        my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
12028                        $year  += 1900;
12029                        $month += 1;
12030                        $cscw_block_comment =
12031"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
12032                    }
12033                }
12034                else {
12035
12036                    # No differences.. we can safely delete old comment if we
12037                    # are below the threshold
12038                    if ( $block_line_count <
12039                        $rOpts->{'closing-side-comment-interval'} )
12040                    {
12041                        $token = undef;
12042                        unstore_token_to_go()
12043                          if ( $types_to_go[$max_index_to_go] eq '#' );
12044                        unstore_token_to_go()
12045                          if ( $types_to_go[$max_index_to_go] eq 'b' );
12046                    }
12047                }
12048            }
12049
12050            # switch to the new csc (unless we deleted it!)
12051            $tokens_to_go[$max_index_to_go] = $token if $token;
12052        }
12053
12054        # handle case of NO existing closing side comment
12055        else {
12056
12057            # insert the new side comment into the output token stream
12058            my $type          = '#';
12059            my $block_type    = '';
12060            my $type_sequence = '';
12061            my $container_environment =
12062              $container_environment_to_go[$max_index_to_go];
12063            my $level                = $levels_to_go[$max_index_to_go];
12064            my $slevel               = $nesting_depth_to_go[$max_index_to_go];
12065            my $no_internal_newlines = 0;
12066
12067            my $nesting_blocks     = $nesting_blocks_to_go[$max_index_to_go];
12068            my $ci_level           = $ci_levels_to_go[$max_index_to_go];
12069            my $in_continued_quote = 0;
12070
12071            # first insert a blank token
12072            insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
12073
12074            # then the side comment
12075            insert_new_token_to_go( $token, $type, $slevel,
12076                $no_internal_newlines );
12077        }
12078    }
12079    return $cscw_block_comment;
12080}
12081
12082sub previous_nonblank_token {
12083    my ($i)  = @_;
12084    my $name = "";
12085    my $im   = $i - 1;
12086    return "" if ( $im < 0 );
12087    if ( $types_to_go[$im] eq 'b' ) { $im--; }
12088    return "" if ( $im < 0 );
12089    $name = $tokens_to_go[$im];
12090
12091    # prepend any sub name to an isolated -> to avoid unwanted alignments
12092    # [test case is test8/penco.pl]
12093    if ( $name eq '->' ) {
12094        $im--;
12095        if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
12096            $name = $tokens_to_go[$im] . $name;
12097        }
12098    }
12099    return $name;
12100}
12101
12102sub send_lines_to_vertical_aligner {
12103
12104    my ( $ri_first, $ri_last, $do_not_pad ) = @_;
12105
12106    my $rindentation_list = [0];    # ref to indentations for each line
12107
12108    # define the array @matching_token_to_go for the output tokens
12109    # which will be non-blank for each special token (such as =>)
12110    # for which alignment is required.
12111    set_vertical_alignment_markers( $ri_first, $ri_last );
12112
12113    # flush if necessary to avoid unwanted alignment
12114    my $must_flush = 0;
12115    if ( @$ri_first > 1 ) {
12116
12117        # flush before a long if statement
12118        if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
12119            $must_flush = 1;
12120        }
12121    }
12122    if ($must_flush) {
12123        Perl::Tidy::VerticalAligner::flush();
12124    }
12125
12126    undo_ci( $ri_first, $ri_last );
12127
12128    set_logical_padding( $ri_first, $ri_last );
12129
12130    # loop to prepare each line for shipment
12131    my $n_last_line = @$ri_first - 1;
12132    my $in_comma_list;
12133    for my $n ( 0 .. $n_last_line ) {
12134        my $ibeg = $$ri_first[$n];
12135        my $iend = $$ri_last[$n];
12136
12137        my ( $rtokens, $rfields, $rpatterns ) =
12138          make_alignment_patterns( $ibeg, $iend );
12139
12140        # Set flag to show how much level changes between this line
12141        # and the next line, if we have it.
12142        my $ljump = 0;
12143        if ( $n < $n_last_line ) {
12144            my $ibegp = $$ri_first[ $n + 1 ];
12145            $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
12146        }
12147
12148        my ( $indentation, $lev, $level_end, $terminal_type,
12149            $is_semicolon_terminated, $is_outdented_line )
12150          = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
12151            $ri_first, $ri_last, $rindentation_list, $ljump );
12152
12153        # we will allow outdenting of long lines..
12154        my $outdent_long_lines = (
12155
12156            # which are long quotes, if allowed
12157            ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
12158
12159            # which are long block comments, if allowed
12160              || (
12161                   $types_to_go[$ibeg] eq '#'
12162                && $rOpts->{'outdent-long-comments'}
12163
12164                # but not if this is a static block comment
12165                && !$is_static_block_comment
12166              )
12167        );
12168
12169        my $level_jump =
12170          $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
12171
12172        my $rvertical_tightness_flags =
12173          set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
12174            $ri_first, $ri_last );
12175
12176        # flush an outdented line to avoid any unwanted vertical alignment
12177        Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
12178
12179        # Set a flag at the final ':' of a ternary chain to request
12180        # vertical alignment of the final term.  Here is a
12181        # slightly complex example:
12182        #
12183        # $self->{_text} = (
12184        #    !$section        ? ''
12185        #   : $type eq 'item' ? "the $section entry"
12186        #   :                   "the section on $section"
12187        # )
12188        # . (
12189        #   $page
12190        #   ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
12191        #   : ' elsewhere in this document'
12192        # );
12193        #
12194        my $is_terminal_ternary = 0;
12195        if (   $tokens_to_go[$ibeg] eq ':'
12196            || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
12197        {
12198            my $last_leading_type = ":";
12199            if ( $n > 0 ) {
12200                my $iprev = $$ri_first[ $n - 1 ];
12201                $last_leading_type = $types_to_go[$iprev];
12202            }
12203            if (   $terminal_type ne ';'
12204                && $n_last_line > $n
12205                && $level_end == $lev )
12206            {
12207                my $inext = $$ri_first[ $n + 1 ];
12208                $level_end     = $levels_to_go[$inext];
12209                $terminal_type = $types_to_go[$inext];
12210            }
12211
12212            $is_terminal_ternary = $last_leading_type eq ':'
12213              && ( ( $terminal_type eq ';' && $level_end <= $lev )
12214                || ( $terminal_type ne ':' && $level_end < $lev ) )
12215
12216              # the termainal term must not contain any ternary terms, as in
12217              # my $ECHO = (
12218              #       $Is_MSWin32 ? ".\\echo$$"
12219              #     : $Is_MacOS   ? ":echo$$"
12220              #     : ( $Is_NetWare ? "echo$$" : "./echo$$" )
12221              # );
12222              && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ];
12223        }
12224
12225        # send this new line down the pipe
12226        my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
12227        Perl::Tidy::VerticalAligner::valign_input(
12228            $lev,
12229            $level_end,
12230            $indentation,
12231            $rfields,
12232            $rtokens,
12233            $rpatterns,
12234            $forced_breakpoint_to_go[$iend] || $in_comma_list,
12235            $outdent_long_lines,
12236            $is_terminal_ternary,
12237            $is_semicolon_terminated,
12238            $do_not_pad,
12239            $rvertical_tightness_flags,
12240            $level_jump,
12241        );
12242        $in_comma_list =
12243          $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
12244
12245        # flush an outdented line to avoid any unwanted vertical alignment
12246        Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
12247
12248        $do_not_pad = 0;
12249
12250        # Set flag indicating if this line ends in an opening
12251        # token and is very short, so that a blank line is not
12252        # needed if the subsequent line is a comment.
12253        # Examples of what we are looking for:
12254        #   {
12255        #   && (
12256        #   BEGIN {
12257        #   default {
12258        #   sub {
12259        $last_output_short_opening_token
12260
12261          # line ends in opening token
12262          = $types_to_go[$iend] =~ /^[\{\(\[L]$/
12263
12264          # and either
12265          && (
12266            # line has either single opening token
12267            $iend == $ibeg
12268
12269            # or is a single token followed by opening token.
12270            # Note that sub identifiers have blanks like 'sub doit'
12271            || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
12272          )
12273
12274          # and limit total to 10 character widths
12275          && token_sequence_length( $ibeg, $iend ) <= 10;
12276
12277    }    # end of loop to output each line
12278
12279    # remember indentation of lines containing opening containers for
12280    # later use by sub set_adjusted_indentation
12281    save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
12282}
12283
12284{    # begin make_alignment_patterns
12285
12286    my %block_type_map;
12287    my %keyword_map;
12288
12289    BEGIN {
12290
12291        # map related block names into a common name to
12292        # allow alignment
12293        %block_type_map = (
12294            'unless'  => 'if',
12295            'else'    => 'if',
12296            'elsif'   => 'if',
12297            'when'    => 'if',
12298            'default' => 'if',
12299            'case'    => 'if',
12300            'sort'    => 'map',
12301            'grep'    => 'map',
12302        );
12303
12304        # map certain keywords to the same 'if' class to align
12305        # long if/elsif sequences. [elsif.pl]
12306        %keyword_map = (
12307            'unless'  => 'if',
12308            'else'    => 'if',
12309            'elsif'   => 'if',
12310            'when'    => 'given',
12311            'default' => 'given',
12312            'case'    => 'switch',
12313
12314            # treat an 'undef' similar to numbers and quotes
12315            'undef' => 'Q',
12316        );
12317    }
12318
12319    sub make_alignment_patterns {
12320
12321        # Here we do some important preliminary work for the
12322        # vertical aligner.  We create three arrays for one
12323        # output line. These arrays contain strings that can
12324        # be tested by the vertical aligner to see if
12325        # consecutive lines can be aligned vertically.
12326        #
12327        # The three arrays are indexed on the vertical
12328        # alignment fields and are:
12329        # @tokens - a list of any vertical alignment tokens for this line.
12330        #   These are tokens, such as '=' '&&' '#' etc which
12331        #   we want to might align vertically.  These are
12332        #   decorated with various information such as
12333        #   nesting depth to prevent unwanted vertical
12334        #   alignment matches.
12335        # @fields - the actual text of the line between the vertical alignment
12336        #   tokens.
12337        # @patterns - a modified list of token types, one for each alignment
12338        #   field.  These should normally each match before alignment is
12339        #   allowed, even when the alignment tokens match.
12340        my ( $ibeg, $iend ) = @_;
12341        my @tokens   = ();
12342        my @fields   = ();
12343        my @patterns = ();
12344        my $i_start  = $ibeg;
12345        my $i;
12346
12347        my $depth                 = 0;
12348        my @container_name        = ("");
12349        my @multiple_comma_arrows = (undef);
12350
12351        my $j = 0;    # field index
12352
12353        $patterns[0] = "";
12354        for $i ( $ibeg .. $iend ) {
12355
12356            # Keep track of containers balanced on this line only.
12357            # These are used below to prevent unwanted cross-line alignments.
12358            # Unbalanced containers already avoid aligning across
12359            # container boundaries.
12360            if ( $tokens_to_go[$i] eq '(' ) {
12361
12362                # if container is balanced on this line...
12363                my $i_mate = $mate_index_to_go[$i];
12364                if ( $i_mate > $i && $i_mate <= $iend ) {
12365                    $depth++;
12366                    my $seqno = $type_sequence_to_go[$i];
12367                    my $count = comma_arrow_count($seqno);
12368                    $multiple_comma_arrows[$depth] = $count && $count > 1;
12369
12370                    # Append the previous token name to make the container name
12371                    # more unique.  This name will also be given to any commas
12372                    # within this container, and it helps avoid undesirable
12373                    # alignments of different types of containers.
12374                    my $name = previous_nonblank_token($i);
12375                    $name =~ s/^->//;
12376                    $container_name[$depth] = "+" . $name;
12377
12378                    # Make the container name even more unique if necessary.
12379                    # If we are not vertically aligning this opening paren,
12380                    # append a character count to avoid bad alignment because
12381                    # it usually looks bad to align commas within continers
12382                    # for which the opening parens do not align.  Here
12383                    # is an example very BAD alignment of commas (because
12384                    # the atan2 functions are not all aligned):
12385                    #    $XY =
12386                    #      $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
12387                    #      $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
12388                    #      $X * atan2( $X,            1 ) -
12389                    #      $Y * atan2( $Y,            1 );
12390                    #
12391                    # On the other hand, it is usually okay to align commas if
12392                    # opening parens align, such as:
12393                    #    glVertex3d( $cx + $s * $xs, $cy,            $z );
12394                    #    glVertex3d( $cx,            $cy + $s * $ys, $z );
12395                    #    glVertex3d( $cx - $s * $xs, $cy,            $z );
12396                    #    glVertex3d( $cx,            $cy - $s * $ys, $z );
12397                    #
12398                    # To distinguish between these situations, we will
12399                    # append the length of the line from the previous matching
12400                    # token, or beginning of line, to the function name.  This
12401                    # will allow the vertical aligner to reject undesirable
12402                    # matches.
12403
12404                    # if we are not aligning on this paren...
12405                    if ( $matching_token_to_go[$i] eq '' ) {
12406
12407                        # Sum length from previous alignment, or start of line.
12408                        my $len =
12409                          ( $i_start == $ibeg )
12410                          ? total_line_length( $i_start, $i - 1 )
12411                          : token_sequence_length( $i_start, $i - 1 );
12412
12413                        # tack length onto the container name to make unique
12414                        $container_name[$depth] .= "-" . $len;
12415                    }
12416                }
12417            }
12418            elsif ( $tokens_to_go[$i] eq ')' ) {
12419                $depth-- if $depth > 0;
12420            }
12421
12422            # if we find a new synchronization token, we are done with
12423            # a field
12424            if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
12425
12426                my $tok = my $raw_tok = $matching_token_to_go[$i];
12427
12428                # make separators in different nesting depths unique
12429                # by appending the nesting depth digit.
12430                if ( $raw_tok ne '#' ) {
12431                    $tok .= "$nesting_depth_to_go[$i]";
12432                }
12433
12434                # also decorate commas with any container name to avoid
12435                # unwanted cross-line alignments.
12436                if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
12437                    if ( $container_name[$depth] ) {
12438                        $tok .= $container_name[$depth];
12439                    }
12440                }
12441
12442                # Patch to avoid aligning leading and trailing if, unless.
12443                # Mark trailing if, unless statements with container names.
12444                # This makes them different from leading if, unless which
12445                # are not so marked at present.  If we ever need to name
12446                # them too, we could use ci to distinguish them.
12447                # Example problem to avoid:
12448                #    return ( 2, "DBERROR" )
12449                #      if ( $retval == 2 );
12450                #    if   ( scalar @_ ) {
12451                #        my ( $a, $b, $c, $d, $e, $f ) = @_;
12452                #    }
12453                if ( $raw_tok eq '(' ) {
12454                    my $ci = $ci_levels_to_go[$ibeg];
12455                    if (   $container_name[$depth] =~ /^\+(if|unless)/
12456                        && $ci )
12457                    {
12458                        $tok .= $container_name[$depth];
12459                    }
12460                }
12461
12462                # Decorate block braces with block types to avoid
12463                # unwanted alignments such as the following:
12464                # foreach ( @{$routput_array} ) { $fh->print($_) }
12465                # eval                          { $fh->close() };
12466                if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
12467                    my $block_type = $block_type_to_go[$i];
12468
12469                    # map certain related block types to allow
12470                    # else blocks to align
12471                    $block_type = $block_type_map{$block_type}
12472                      if ( defined( $block_type_map{$block_type} ) );
12473
12474                    # remove sub names to allow one-line sub braces to align
12475                    # regardless of name
12476                    if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
12477
12478                    # allow all control-type blocks to align
12479                    if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
12480
12481                    $tok .= $block_type;
12482                }
12483
12484                # concatenate the text of the consecutive tokens to form
12485                # the field
12486                push( @fields,
12487                    join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
12488
12489                # store the alignment token for this field
12490                push( @tokens, $tok );
12491
12492                # get ready for the next batch
12493                $i_start = $i;
12494                $j++;
12495                $patterns[$j] = "";
12496            }
12497
12498            # continue accumulating tokens
12499            # handle non-keywords..
12500            if ( $types_to_go[$i] ne 'k' ) {
12501                my $type = $types_to_go[$i];
12502
12503                # Mark most things before arrows as a quote to
12504                # get them to line up. Testfile: mixed.pl.
12505                if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
12506                    my $next_type = $types_to_go[ $i + 1 ];
12507                    my $i_next_nonblank =
12508                      ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12509
12510                    if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
12511                        $type = 'Q';
12512
12513                        # Patch to ignore leading minus before words,
12514                        # by changing pattern 'mQ' into just 'Q',
12515                        # so that we can align things like this:
12516                        #  Button   => "Print letter \"~$_\"",
12517                        #  -command => [ sub { print "$_[0]\n" }, $_ ],
12518                        if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
12519                    }
12520                }
12521
12522                # patch to make numbers and quotes align
12523                if ( $type eq 'n' ) { $type = 'Q' }
12524
12525                # patch to ignore any ! in patterns
12526                if ( $type eq '!' ) { $type = '' }
12527
12528                $patterns[$j] .= $type;
12529            }
12530
12531            # for keywords we have to use the actual text
12532            else {
12533
12534                my $tok = $tokens_to_go[$i];
12535
12536                # but map certain keywords to a common string to allow
12537                # alignment.
12538                $tok = $keyword_map{$tok}
12539                  if ( defined( $keyword_map{$tok} ) );
12540                $patterns[$j] .= $tok;
12541            }
12542        }
12543
12544        # done with this line .. join text of tokens to make the last field
12545        push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
12546        return ( \@tokens, \@fields, \@patterns );
12547    }
12548
12549}    # end make_alignment_patterns
12550
12551{    # begin unmatched_indexes
12552
12553    # closure to keep track of unbalanced containers.
12554    # arrays shared by the routines in this block:
12555    my @unmatched_opening_indexes_in_this_batch;
12556    my @unmatched_closing_indexes_in_this_batch;
12557    my %comma_arrow_count;
12558
12559    sub is_unbalanced_batch {
12560        @unmatched_opening_indexes_in_this_batch +
12561          @unmatched_closing_indexes_in_this_batch;
12562    }
12563
12564    sub comma_arrow_count {
12565        my $seqno = $_[0];
12566        return $comma_arrow_count{$seqno};
12567    }
12568
12569    sub match_opening_and_closing_tokens {
12570
12571        # Match up indexes of opening and closing braces, etc, in this batch.
12572        # This has to be done after all tokens are stored because unstoring
12573        # of tokens would otherwise cause trouble.
12574
12575        @unmatched_opening_indexes_in_this_batch = ();
12576        @unmatched_closing_indexes_in_this_batch = ();
12577        %comma_arrow_count                       = ();
12578        my $comma_arrow_count_contained = 0;
12579
12580        my ( $i, $i_mate, $token );
12581        foreach $i ( 0 .. $max_index_to_go ) {
12582            if ( $type_sequence_to_go[$i] ) {
12583                $token = $tokens_to_go[$i];
12584                if ( $token =~ /^[\(\[\{\?]$/ ) {
12585                    push @unmatched_opening_indexes_in_this_batch, $i;
12586                }
12587                elsif ( $token =~ /^[\)\]\}\:]$/ ) {
12588
12589                    $i_mate = pop @unmatched_opening_indexes_in_this_batch;
12590                    if ( defined($i_mate) && $i_mate >= 0 ) {
12591                        if ( $type_sequence_to_go[$i_mate] ==
12592                            $type_sequence_to_go[$i] )
12593                        {
12594                            $mate_index_to_go[$i]      = $i_mate;
12595                            $mate_index_to_go[$i_mate] = $i;
12596                            my $seqno = $type_sequence_to_go[$i];
12597                            if ( $comma_arrow_count{$seqno} ) {
12598                                $comma_arrow_count_contained +=
12599                                  $comma_arrow_count{$seqno};
12600                            }
12601                        }
12602                        else {
12603                            push @unmatched_opening_indexes_in_this_batch,
12604                              $i_mate;
12605                            push @unmatched_closing_indexes_in_this_batch, $i;
12606                        }
12607                    }
12608                    else {
12609                        push @unmatched_closing_indexes_in_this_batch, $i;
12610                    }
12611                }
12612            }
12613            elsif ( $tokens_to_go[$i] eq '=>' ) {
12614                if (@unmatched_opening_indexes_in_this_batch) {
12615                    my $j     = $unmatched_opening_indexes_in_this_batch[-1];
12616                    my $seqno = $type_sequence_to_go[$j];
12617                    $comma_arrow_count{$seqno}++;
12618                }
12619            }
12620        }
12621        return $comma_arrow_count_contained;
12622    }
12623
12624    sub save_opening_indentation {
12625
12626        # This should be called after each batch of tokens is output. It
12627        # saves indentations of lines of all unmatched opening tokens.
12628        # These will be used by sub get_opening_indentation.
12629
12630        my ( $ri_first, $ri_last, $rindentation_list ) = @_;
12631
12632        # we no longer need indentations of any saved indentations which
12633        # are unmatched closing tokens in this batch, because we will
12634        # never encounter them again.  So we can delete them to keep
12635        # the hash size down.
12636        foreach (@unmatched_closing_indexes_in_this_batch) {
12637            my $seqno = $type_sequence_to_go[$_];
12638            delete $saved_opening_indentation{$seqno};
12639        }
12640
12641        # we need to save indentations of any unmatched opening tokens
12642        # in this batch because we may need them in a subsequent batch.
12643        foreach (@unmatched_opening_indexes_in_this_batch) {
12644            my $seqno = $type_sequence_to_go[$_];
12645            $saved_opening_indentation{$seqno} = [
12646                lookup_opening_indentation(
12647                    $_, $ri_first, $ri_last, $rindentation_list
12648                )
12649            ];
12650        }
12651    }
12652}    # end unmatched_indexes
12653
12654sub get_opening_indentation {
12655
12656    # get the indentation of the line which output the opening token
12657    # corresponding to a given closing token in the current output batch.
12658    #
12659    # given:
12660    # $i_closing - index in this line of a closing token ')' '}' or ']'
12661    #
12662    # $ri_first - reference to list of the first index $i for each output
12663    #               line in this batch
12664    # $ri_last - reference to list of the last index $i for each output line
12665    #              in this batch
12666    # $rindentation_list - reference to a list containing the indentation
12667    #            used for each line.
12668    #
12669    # return:
12670    #   -the indentation of the line which contained the opening token
12671    #    which matches the token at index $i_opening
12672    #   -and its offset (number of columns) from the start of the line
12673    #
12674    my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
12675
12676    # first, see if the opening token is in the current batch
12677    my $i_opening = $mate_index_to_go[$i_closing];
12678    my ( $indent, $offset, $is_leading, $exists );
12679    $exists = 1;
12680    if ( $i_opening >= 0 ) {
12681
12682        # it is..look up the indentation
12683        ( $indent, $offset, $is_leading ) =
12684          lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
12685            $rindentation_list );
12686    }
12687
12688    # if not, it should have been stored in the hash by a previous batch
12689    else {
12690        my $seqno = $type_sequence_to_go[$i_closing];
12691        if ($seqno) {
12692            if ( $saved_opening_indentation{$seqno} ) {
12693                ( $indent, $offset, $is_leading ) =
12694                  @{ $saved_opening_indentation{$seqno} };
12695            }
12696
12697            # some kind of serious error
12698            # (example is badfile.t)
12699            else {
12700                $indent     = 0;
12701                $offset     = 0;
12702                $is_leading = 0;
12703                $exists     = 0;
12704            }
12705        }
12706
12707        # if no sequence number it must be an unbalanced container
12708        else {
12709            $indent     = 0;
12710            $offset     = 0;
12711            $is_leading = 0;
12712            $exists     = 0;
12713        }
12714    }
12715    return ( $indent, $offset, $is_leading, $exists );
12716}
12717
12718sub lookup_opening_indentation {
12719
12720    # get the indentation of the line in the current output batch
12721    # which output a selected opening token
12722    #
12723    # given:
12724    #   $i_opening - index of an opening token in the current output batch
12725    #                whose line indentation we need
12726    #   $ri_first - reference to list of the first index $i for each output
12727    #               line in this batch
12728    #   $ri_last - reference to list of the last index $i for each output line
12729    #              in this batch
12730    #   $rindentation_list - reference to a list containing the indentation
12731    #            used for each line.  (NOTE: the first slot in
12732    #            this list is the last returned line number, and this is
12733    #            followed by the list of indentations).
12734    #
12735    # return
12736    #   -the indentation of the line which contained token $i_opening
12737    #   -and its offset (number of columns) from the start of the line
12738
12739    my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
12740
12741    my $nline = $rindentation_list->[0];    # line number of previous lookup
12742
12743    # reset line location if necessary
12744    $nline = 0 if ( $i_opening < $ri_start->[$nline] );
12745
12746    # find the correct line
12747    unless ( $i_opening > $ri_last->[-1] ) {
12748        while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
12749    }
12750
12751    # error - token index is out of bounds - shouldn't happen
12752    else {
12753        warning(
12754"non-fatal program bug in lookup_opening_indentation - index out of range\n"
12755        );
12756        report_definite_bug();
12757        $nline = $#{$ri_last};
12758    }
12759
12760    $rindentation_list->[0] =
12761      $nline;    # save line number to start looking next call
12762    my $ibeg       = $ri_start->[$nline];
12763    my $offset     = token_sequence_length( $ibeg, $i_opening ) - 1;
12764    my $is_leading = ( $ibeg == $i_opening );
12765    return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
12766}
12767
12768{
12769    my %is_if_elsif_else_unless_while_until_for_foreach;
12770
12771    BEGIN {
12772
12773        # These block types may have text between the keyword and opening
12774        # curly.  Note: 'else' does not, but must be included to allow trailing
12775        # if/elsif text to be appended.
12776        # patch for SWITCH/CASE: added 'case' and 'when'
12777        @_ = qw(if elsif else unless while until for foreach case when);
12778        @is_if_elsif_else_unless_while_until_for_foreach{@_} =
12779          (1) x scalar(@_);
12780    }
12781
12782    sub set_adjusted_indentation {
12783
12784        # This routine has the final say regarding the actual indentation of
12785        # a line.  It starts with the basic indentation which has been
12786        # defined for the leading token, and then takes into account any
12787        # options that the user has set regarding special indenting and
12788        # outdenting.
12789
12790        my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
12791            $rindentation_list, $level_jump )
12792          = @_;
12793
12794        # we need to know the last token of this line
12795        my ( $terminal_type, $i_terminal ) =
12796          terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
12797
12798        my $is_outdented_line = 0;
12799
12800        my $is_semicolon_terminated = $terminal_type eq ';'
12801          && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
12802
12803        ##########################################################
12804        # Section 1: set a flag and a default indentation
12805        #
12806        # Most lines are indented according to the initial token.
12807        # But it is common to outdent to the level just after the
12808        # terminal token in certain cases...
12809        # adjust_indentation flag:
12810        #       0 - do not adjust
12811        #       1 - outdent
12812        #       2 - vertically align with opening token
12813        #       3 - indent
12814        ##########################################################
12815        my $adjust_indentation         = 0;
12816        my $default_adjust_indentation = $adjust_indentation;
12817
12818        my (
12819            $opening_indentation, $opening_offset,
12820            $is_leading,          $opening_exists
12821        );
12822
12823        # if we are at a closing token of some type..
12824        if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) {
12825
12826            # get the indentation of the line containing the corresponding
12827            # opening token
12828            (
12829                $opening_indentation, $opening_offset,
12830                $is_leading,          $opening_exists
12831              )
12832              = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12833                $rindentation_list );
12834
12835            # First set the default behavior:
12836            if (
12837
12838                # default behavior is to outdent closing lines
12839                # of the form:   ");  };  ];  )->xxx;"
12840                $is_semicolon_terminated
12841
12842                # and 'cuddled parens' of the form:   ")->pack("
12843                || (
12844                       $terminal_type eq '('
12845                    && $types_to_go[$ibeg] eq ')'
12846                    && ( $nesting_depth_to_go[$iend] + 1 ==
12847                        $nesting_depth_to_go[$ibeg] )
12848                )
12849
12850                # and when the next line is at a lower indentation level
12851                # PATCH: and only if the style allows undoing continuation
12852                # for all closing token types. We should really wait until
12853                # the indentation of the next line is known and then make
12854                # a decision, but that would require another pass.
12855                || ( $level_jump < 0 && !$some_closing_token_indentation )
12856              )
12857            {
12858                $adjust_indentation = 1;
12859            }
12860
12861            # outdent something like '),'
12862            if (
12863                $terminal_type eq ','
12864
12865                # allow just one character before the comma
12866                && $i_terminal == $ibeg + 1
12867
12868                # requre LIST environment; otherwise, we may outdent too much --
12869                # this can happen in calls without parentheses (overload.t);
12870                && $container_environment_to_go[$i_terminal] eq 'LIST'
12871              )
12872            {
12873                $adjust_indentation = 1;
12874            }
12875
12876            # undo continuation indentation of a terminal closing token if
12877            # it is the last token before a level decrease.  This will allow
12878            # a closing token to line up with its opening counterpart, and
12879            # avoids a indentation jump larger than 1 level.
12880            if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
12881                && $i_terminal == $ibeg )
12882            {
12883                my $ci        = $ci_levels_to_go[$ibeg];
12884                my $lev       = $levels_to_go[$ibeg];
12885                my $next_type = $types_to_go[ $ibeg + 1 ];
12886                my $i_next_nonblank =
12887                  ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
12888                if (   $i_next_nonblank <= $max_index_to_go
12889                    && $levels_to_go[$i_next_nonblank] < $lev )
12890                {
12891                    $adjust_indentation = 1;
12892                }
12893            }
12894
12895            # YVES patch 1 of 2:
12896            # Undo ci of line with leading closing eval brace,
12897            # but not beyond the indention of the line with
12898            # the opening brace.
12899            if (   $block_type_to_go[$ibeg] eq 'eval'
12900                && !$rOpts->{'line-up-parentheses'}
12901                && !$rOpts->{'indent-closing-brace'} )
12902            {
12903                (
12904                    $opening_indentation, $opening_offset,
12905                    $is_leading,          $opening_exists
12906                  )
12907                  = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12908                    $rindentation_list );
12909                my $indentation = $leading_spaces_to_go[$ibeg];
12910                if ( defined($opening_indentation)
12911                    && $indentation > $opening_indentation )
12912                {
12913                    $adjust_indentation = 1;
12914                }
12915            }
12916
12917            $default_adjust_indentation = $adjust_indentation;
12918
12919            # Now modify default behavior according to user request:
12920            # handle option to indent non-blocks of the form );  };  ];
12921            # But don't do special indentation to something like ')->pack('
12922            if ( !$block_type_to_go[$ibeg] ) {
12923                my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
12924                if ( $cti == 1 ) {
12925                    if (   $i_terminal <= $ibeg + 1
12926                        || $is_semicolon_terminated )
12927                    {
12928                        $adjust_indentation = 2;
12929                    }
12930                    else {
12931                        $adjust_indentation = 0;
12932                    }
12933                }
12934                elsif ( $cti == 2 ) {
12935                    if ($is_semicolon_terminated) {
12936                        $adjust_indentation = 3;
12937                    }
12938                    else {
12939                        $adjust_indentation = 0;
12940                    }
12941                }
12942                elsif ( $cti == 3 ) {
12943                    $adjust_indentation = 3;
12944                }
12945            }
12946
12947            # handle option to indent blocks
12948            else {
12949                if (
12950                    $rOpts->{'indent-closing-brace'}
12951                    && (
12952                        $i_terminal == $ibeg    #  isolated terminal '}'
12953                        || $is_semicolon_terminated
12954                    )
12955                  )                             #  } xxxx ;
12956                {
12957                    $adjust_indentation = 3;
12958                }
12959            }
12960        }
12961
12962        # if at ');', '};', '>;', and '];' of a terminal qw quote
12963        elsif ($$rpatterns[0] =~ /^qb*;$/
12964            && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
12965        {
12966            if ( $closing_token_indentation{$1} == 0 ) {
12967                $adjust_indentation = 1;
12968            }
12969            else {
12970                $adjust_indentation = 3;
12971            }
12972        }
12973
12974        # if line begins with a ':', align it with any
12975        # previous line leading with corresponding ?
12976        elsif ( $types_to_go[$ibeg] eq ':' ) {
12977            (
12978                $opening_indentation, $opening_offset,
12979                $is_leading,          $opening_exists
12980              )
12981              = get_opening_indentation( $ibeg, $ri_first, $ri_last,
12982                $rindentation_list );
12983            if ($is_leading) { $adjust_indentation = 2; }
12984        }
12985
12986        ##########################################################
12987        # Section 2: set indentation according to flag set above
12988        #
12989        # Select the indentation object to define leading
12990        # whitespace.  If we are outdenting something like '} } );'
12991        # then we want to use one level below the last token
12992        # ($i_terminal) in order to get it to fully outdent through
12993        # all levels.
12994        ##########################################################
12995        my $indentation;
12996        my $lev;
12997        my $level_end = $levels_to_go[$iend];
12998
12999        if ( $adjust_indentation == 0 ) {
13000            $indentation = $leading_spaces_to_go[$ibeg];
13001            $lev         = $levels_to_go[$ibeg];
13002        }
13003        elsif ( $adjust_indentation == 1 ) {
13004            $indentation = $reduced_spaces_to_go[$i_terminal];
13005            $lev         = $levels_to_go[$i_terminal];
13006        }
13007
13008        # handle indented closing token which aligns with opening token
13009        elsif ( $adjust_indentation == 2 ) {
13010
13011            # handle option to align closing token with opening token
13012            $lev = $levels_to_go[$ibeg];
13013
13014            # calculate spaces needed to align with opening token
13015            my $space_count =
13016              get_SPACES($opening_indentation) + $opening_offset;
13017
13018            # Indent less than the previous line.
13019            #
13020            # Problem: For -lp we don't exactly know what it was if there
13021            # were recoverable spaces sent to the aligner.  A good solution
13022            # would be to force a flush of the vertical alignment buffer, so
13023            # that we would know.  For now, this rule is used for -lp:
13024            #
13025            # When the last line did not start with a closing token we will
13026            # be optimistic that the aligner will recover everything wanted.
13027            #
13028            # This rule will prevent us from breaking a hierarchy of closing
13029            # tokens, and in a worst case will leave a closing paren too far
13030            # indented, but this is better than frequently leaving it not
13031            # indented enough.
13032            my $last_spaces = get_SPACES($last_indentation_written);
13033            if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
13034                $last_spaces +=
13035                  get_RECOVERABLE_SPACES($last_indentation_written);
13036            }
13037
13038            # reset the indentation to the new space count if it works
13039            # only options are all or none: nothing in-between looks good
13040            $lev = $levels_to_go[$ibeg];
13041            if ( $space_count < $last_spaces ) {
13042                if ($rOpts_line_up_parentheses) {
13043                    my $lev = $levels_to_go[$ibeg];
13044                    $indentation =
13045                      new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
13046                }
13047                else {
13048                    $indentation = $space_count;
13049                }
13050            }
13051
13052            # revert to default if it doesn't work
13053            else {
13054                $space_count = leading_spaces_to_go($ibeg);
13055                if ( $default_adjust_indentation == 0 ) {
13056                    $indentation = $leading_spaces_to_go[$ibeg];
13057                }
13058                elsif ( $default_adjust_indentation == 1 ) {
13059                    $indentation = $reduced_spaces_to_go[$i_terminal];
13060                    $lev         = $levels_to_go[$i_terminal];
13061                }
13062            }
13063        }
13064
13065        # Full indentaion of closing tokens (-icb and -icp or -cti=2)
13066        else {
13067
13068            # handle -icb (indented closing code block braces)
13069            # Updated method for indented block braces: indent one full level if
13070            # there is no continuation indentation.  This will occur for major
13071            # structures such as sub, if, else, but not for things like map
13072            # blocks.
13073            #
13074            # Note: only code blocks without continuation indentation are
13075            # handled here (if, else, unless, ..). In the following snippet,
13076            # the terminal brace of the sort block will have continuation
13077            # indentation as shown so it will not be handled by the coding
13078            # here.  We would have to undo the continuation indentation to do
13079            # this, but it probably looks ok as is.  This is a possible future
13080            # update for semicolon terminated lines.
13081            #
13082            #     if ($sortby eq 'date' or $sortby eq 'size') {
13083            #         @files = sort {
13084            #             $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
13085            #                 or $a cmp $b
13086            #                 } @files;
13087            #         }
13088            #
13089            if (   $block_type_to_go[$ibeg]
13090                && $ci_levels_to_go[$i_terminal] == 0 )
13091            {
13092                my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
13093                $indentation = $spaces + $rOpts_indent_columns;
13094
13095                # NOTE: for -lp we could create a new indentation object, but
13096                # there is probably no need to do it
13097            }
13098
13099            # handle -icp and any -icb block braces which fall through above
13100            # test such as the 'sort' block mentioned above.
13101            else {
13102
13103                # There are currently two ways to handle -icp...
13104                # One way is to use the indentation of the previous line:
13105                # $indentation = $last_indentation_written;
13106
13107                # The other way is to use the indentation that the previous line
13108                # would have had if it hadn't been adjusted:
13109                $indentation = $last_unadjusted_indentation;
13110
13111                # Current method: use the minimum of the two. This avoids
13112                # inconsistent indentation.
13113                if ( get_SPACES($last_indentation_written) <
13114                    get_SPACES($indentation) )
13115                {
13116                    $indentation = $last_indentation_written;
13117                }
13118            }
13119
13120            # use previous indentation but use own level
13121            # to cause list to be flushed properly
13122            $lev = $levels_to_go[$ibeg];
13123        }
13124
13125        # remember indentation except for multi-line quotes, which get
13126        # no indentation
13127        unless ( $ibeg == 0 && $starting_in_quote ) {
13128            $last_indentation_written    = $indentation;
13129            $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
13130            $last_leading_token          = $tokens_to_go[$ibeg];
13131        }
13132
13133        # be sure lines with leading closing tokens are not outdented more
13134        # than the line which contained the corresponding opening token.
13135
13136        #############################################################
13137        # updated per bug report in alex_bug.pl: we must not
13138        # mess with the indentation of closing logical braces so
13139        # we must treat something like '} else {' as if it were
13140        # an isolated brace my $is_isolated_block_brace = (
13141        # $iend == $ibeg ) && $block_type_to_go[$ibeg];
13142        #############################################################
13143        my $is_isolated_block_brace = $block_type_to_go[$ibeg]
13144          && ( $iend == $ibeg
13145            || $is_if_elsif_else_unless_while_until_for_foreach{
13146                $block_type_to_go[$ibeg]
13147            } );
13148
13149        # only do this for a ':; which is aligned with its leading '?'
13150        my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
13151        if (   defined($opening_indentation)
13152            && !$is_isolated_block_brace
13153            && !$is_unaligned_colon )
13154        {
13155            if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
13156                $indentation = $opening_indentation;
13157            }
13158        }
13159
13160        # remember the indentation of each line of this batch
13161        push @{$rindentation_list}, $indentation;
13162
13163        # outdent lines with certain leading tokens...
13164        if (
13165
13166            # must be first word of this batch
13167            $ibeg == 0
13168
13169            # and ...
13170            && (
13171
13172                # certain leading keywords if requested
13173                (
13174                       $rOpts->{'outdent-keywords'}
13175                    && $types_to_go[$ibeg] eq 'k'
13176                    && $outdent_keyword{ $tokens_to_go[$ibeg] }
13177                )
13178
13179                # or labels if requested
13180                || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
13181
13182                # or static block comments if requested
13183                || (   $types_to_go[$ibeg] eq '#'
13184                    && $rOpts->{'outdent-static-block-comments'}
13185                    && $is_static_block_comment )
13186            )
13187          )
13188
13189        {
13190            my $space_count = leading_spaces_to_go($ibeg);
13191            if ( $space_count > 0 ) {
13192                $space_count -= $rOpts_continuation_indentation;
13193                $is_outdented_line = 1;
13194                if ( $space_count < 0 ) { $space_count = 0 }
13195
13196                # do not promote a spaced static block comment to non-spaced;
13197                # this is not normally necessary but could be for some
13198                # unusual user inputs (such as -ci = -i)
13199                if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
13200                    $space_count = 1;
13201                }
13202
13203                if ($rOpts_line_up_parentheses) {
13204                    $indentation =
13205                      new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
13206                }
13207                else {
13208                    $indentation = $space_count;
13209                }
13210            }
13211        }
13212
13213        return ( $indentation, $lev, $level_end, $terminal_type,
13214            $is_semicolon_terminated, $is_outdented_line );
13215    }
13216}
13217
13218sub set_vertical_tightness_flags {
13219
13220    my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
13221
13222    # Define vertical tightness controls for the nth line of a batch.
13223    # We create an array of parameters which tell the vertical aligner
13224    # if we should combine this line with the next line to achieve the
13225    # desired vertical tightness.  The array of parameters contains:
13226    #
13227    #   [0] type: 1=opening non-block    2=closing non-block
13228    #             3=opening block brace  4=closing block brace
13229    #
13230    #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
13231    #             if closing: spaces of padding to use
13232    #   [2] sequence number of container
13233    #   [3] valid flag: do not append if this flag is false. Will be
13234    #       true if appropriate -vt flag is set.  Otherwise, Will be
13235    #       made true only for 2 line container in parens with -lp
13236    #
13237    # These flags are used by sub set_leading_whitespace in
13238    # the vertical aligner
13239
13240    my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
13241
13242    #--------------------------------------------------------------
13243    # Vertical Tightness Flags Section 1:
13244    # Handle Lines 1 .. n-1 but not the last line
13245    # For non-BLOCK tokens, we will need to examine the next line
13246    # too, so we won't consider the last line.
13247    #--------------------------------------------------------------
13248    if ( $n < $n_last_line ) {
13249
13250        #--------------------------------------------------------------
13251        # Vertical Tightness Flags Section 1a:
13252        # Look for Type 1, last token of this line is a non-block opening token
13253        #--------------------------------------------------------------
13254        my $ibeg_next = $$ri_first[ $n + 1 ];
13255        my $token_end = $tokens_to_go[$iend];
13256        my $iend_next = $$ri_last[ $n + 1 ];
13257        if (
13258               $type_sequence_to_go[$iend]
13259            && !$block_type_to_go[$iend]
13260            && $is_opening_token{$token_end}
13261            && (
13262                $opening_vertical_tightness{$token_end} > 0
13263
13264                # allow 2-line method call to be closed up
13265                || (   $rOpts_line_up_parentheses
13266                    && $token_end eq '('
13267                    && $iend > $ibeg
13268                    && $types_to_go[ $iend - 1 ] ne 'b' )
13269            )
13270          )
13271        {
13272
13273            # avoid multiple jumps in nesting depth in one line if
13274            # requested
13275            my $ovt       = $opening_vertical_tightness{$token_end};
13276            my $iend_next = $$ri_last[ $n + 1 ];
13277            unless (
13278                $ovt < 2
13279                && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
13280                    $nesting_depth_to_go[$ibeg_next] )
13281              )
13282            {
13283
13284                # If -vt flag has not been set, mark this as invalid
13285                # and aligner will validate it if it sees the closing paren
13286                # within 2 lines.
13287                my $valid_flag = $ovt;
13288                @{$rvertical_tightness_flags} =
13289                  ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
13290            }
13291        }
13292
13293        #--------------------------------------------------------------
13294        # Vertical Tightness Flags Section 1b:
13295        # Look for Type 2, first token of next line is a non-block closing
13296        # token .. and be sure this line does not have a side comment
13297        #--------------------------------------------------------------
13298        my $token_next = $tokens_to_go[$ibeg_next];
13299        if (   $type_sequence_to_go[$ibeg_next]
13300            && !$block_type_to_go[$ibeg_next]
13301            && $is_closing_token{$token_next}
13302            && $types_to_go[$iend] !~ '#' )    # for safety, shouldn't happen!
13303        {
13304            my $ovt = $opening_vertical_tightness{$token_next};
13305            my $cvt = $closing_vertical_tightness{$token_next};
13306            if (
13307
13308                # never append a trailing line like   )->pack(
13309                # because it will throw off later alignment
13310                (
13311                    $nesting_depth_to_go[$ibeg_next] ==
13312                    $nesting_depth_to_go[ $iend_next + 1 ] + 1
13313                )
13314                && (
13315                    $cvt == 2
13316                    || (
13317                        $container_environment_to_go[$ibeg_next] ne 'LIST'
13318                        && (
13319                            $cvt == 1
13320
13321                            # allow closing up 2-line method calls
13322                            || (   $rOpts_line_up_parentheses
13323                                && $token_next eq ')' )
13324                        )
13325                    )
13326                )
13327              )
13328            {
13329
13330                # decide which trailing closing tokens to append..
13331                my $ok = 0;
13332                if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
13333                else {
13334                    my $str = join( '',
13335                        @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
13336
13337                    # append closing token if followed by comment or ';'
13338                    if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
13339                }
13340
13341                if ($ok) {
13342                    my $valid_flag = $cvt;
13343                    @{$rvertical_tightness_flags} = (
13344                        2,
13345                        $tightness{$token_next} == 2 ? 0 : 1,
13346                        $type_sequence_to_go[$ibeg_next], $valid_flag,
13347                    );
13348                }
13349            }
13350        }
13351
13352        #--------------------------------------------------------------
13353        # Vertical Tightness Flags Section 1c:
13354        # Implement the Opening Token Right flag (Type 2)..
13355        # If requested, move an isolated trailing opening token to the end of
13356        # the previous line which ended in a comma.  We could do this
13357        # in sub recombine_breakpoints but that would cause problems
13358        # with -lp formatting.  The problem is that indentation will
13359        # quickly move far to the right in nested expressions.  By
13360        # doing it after indentation has been set, we avoid changes
13361        # to the indentation.  Actual movement of the token takes place
13362        # in sub valign_output_step_B.
13363        #--------------------------------------------------------------
13364        if (
13365            $opening_token_right{ $tokens_to_go[$ibeg_next] }
13366
13367            # previous line is not opening
13368            # (use -sot to combine with it)
13369            && !$is_opening_token{$token_end}
13370
13371            # previous line ended in one of these
13372            # (add other cases if necessary; '=>' and '.' are not necessary
13373            && !$block_type_to_go[$ibeg_next]
13374
13375            # this is a line with just an opening token
13376            && (   $iend_next == $ibeg_next
13377                || $iend_next == $ibeg_next + 2
13378                && $types_to_go[$iend_next] eq '#' )
13379
13380            # looks bad if we align vertically with the wrong container
13381            && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
13382          )
13383        {
13384            my $valid_flag = 1;
13385            my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
13386            @{$rvertical_tightness_flags} =
13387              ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
13388        }
13389
13390        #--------------------------------------------------------------
13391        # Vertical Tightness Flags Section 1d:
13392        # Stacking of opening and closing tokens (Type 2)
13393        #--------------------------------------------------------------
13394        my $stackable;
13395        my $token_beg_next = $tokens_to_go[$ibeg_next];
13396
13397        # patch to make something like 'qw(' behave like an opening paren
13398        # (aran.t)
13399        if ( $types_to_go[$ibeg_next] eq 'q' ) {
13400            if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
13401                $token_beg_next = $1;
13402            }
13403        }
13404
13405        if (   $is_closing_token{$token_end}
13406            && $is_closing_token{$token_beg_next} )
13407        {
13408            $stackable = $stack_closing_token{$token_beg_next}
13409              unless ( $block_type_to_go[$ibeg_next] )
13410              ;    # shouldn't happen; just checking
13411        }
13412        elsif ($is_opening_token{$token_end}
13413            && $is_opening_token{$token_beg_next} )
13414        {
13415            $stackable = $stack_opening_token{$token_beg_next}
13416              unless ( $block_type_to_go[$ibeg_next] )
13417              ;    # shouldn't happen; just checking
13418        }
13419
13420        if ($stackable) {
13421
13422            my $is_semicolon_terminated;
13423            if ( $n + 1 == $n_last_line ) {
13424                my ( $terminal_type, $i_terminal ) = terminal_type(
13425                    \@types_to_go, \@block_type_to_go,
13426                    $ibeg_next,    $iend_next
13427                );
13428                $is_semicolon_terminated = $terminal_type eq ';'
13429                  && $nesting_depth_to_go[$iend_next] <
13430                  $nesting_depth_to_go[$ibeg_next];
13431            }
13432
13433            # this must be a line with just an opening token
13434            # or end in a semicolon
13435            if (
13436                $is_semicolon_terminated
13437                || (   $iend_next == $ibeg_next
13438                    || $iend_next == $ibeg_next + 2
13439                    && $types_to_go[$iend_next] eq '#' )
13440              )
13441            {
13442                my $valid_flag = 1;
13443                my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
13444                @{$rvertical_tightness_flags} =
13445                  ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
13446                  );
13447            }
13448        }
13449    }
13450
13451    #--------------------------------------------------------------
13452    # Vertical Tightness Flags Section 2:
13453    # Handle type 3, opening block braces on last line of the batch
13454    # Check for a last line with isolated opening BLOCK curly
13455    #--------------------------------------------------------------
13456    elsif ($rOpts_block_brace_vertical_tightness
13457        && $ibeg eq $iend
13458        && $types_to_go[$iend] eq '{'
13459        && $block_type_to_go[$iend] =~
13460        /$block_brace_vertical_tightness_pattern/o )
13461    {
13462        @{$rvertical_tightness_flags} =
13463          ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
13464    }
13465
13466    #--------------------------------------------------------------
13467    # Vertical Tightness Flags Section 3:
13468    # Handle type 4, a closing block brace on the last line of the batch Check
13469    # for a last line with isolated closing BLOCK curly
13470    #--------------------------------------------------------------
13471    elsif ($rOpts_stack_closing_block_brace
13472        && $ibeg eq $iend
13473        && $block_type_to_go[$iend]
13474        && $types_to_go[$iend] eq '}' )
13475    {
13476        my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
13477        @{$rvertical_tightness_flags} =
13478          ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
13479    }
13480
13481    # pack in the sequence numbers of the ends of this line
13482    $rvertical_tightness_flags->[4] = get_seqno($ibeg);
13483    $rvertical_tightness_flags->[5] = get_seqno($iend);
13484    return $rvertical_tightness_flags;
13485}
13486
13487sub get_seqno {
13488
13489    # get opening and closing sequence numbers of a token for the vertical
13490    # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
13491    # to be treated somewhat like opening and closing tokens for stacking
13492    # tokens by the vertical aligner.
13493    my ($ii) = @_;
13494    my $seqno = $type_sequence_to_go[$ii];
13495    if ( $types_to_go[$ii] eq 'q' ) {
13496        my $SEQ_QW = -1;
13497        if ( $ii > 0 ) {
13498            $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
13499        }
13500        else {
13501            if ( !$ending_in_quote ) {
13502                $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
13503            }
13504        }
13505    }
13506    return ($seqno);
13507}
13508
13509{
13510    my %is_vertical_alignment_type;
13511    my %is_vertical_alignment_keyword;
13512    my %is_terminal_alignment_type;
13513
13514    BEGIN {
13515
13516        # Removed =~ from list to improve chances of alignment
13517        @_ = qw#
13518          = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
13519          { ? : => && || // ~~ !~~
13520          #;
13521        @is_vertical_alignment_type{@_} = (1) x scalar(@_);
13522
13523        # only align these at end of line
13524        @_ = qw(&& ||);
13525        @is_terminal_alignment_type{@_} = (1) x scalar(@_);
13526
13527        # eq and ne were removed from this list to improve alignment chances
13528        @_ = qw(if unless and or err for foreach while until);
13529        @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
13530    }
13531
13532    sub set_vertical_alignment_markers {
13533
13534        # This routine takes the first step toward vertical alignment of the
13535        # lines of output text.  It looks for certain tokens which can serve as
13536        # vertical alignment markers (such as an '=').
13537        #
13538        # Method: We look at each token $i in this output batch and set
13539        # $matching_token_to_go[$i] equal to those tokens at which we would
13540        # accept vertical alignment.
13541
13542        # nothing to do if we aren't allowed to change whitespace
13543        if ( !$rOpts_add_whitespace ) {
13544            for my $i ( 0 .. $max_index_to_go ) {
13545                $matching_token_to_go[$i] = '';
13546            }
13547            return;
13548        }
13549
13550        my ( $ri_first, $ri_last ) = @_;
13551
13552        # remember the index of last nonblank token before any sidecomment
13553        my $i_terminal = $max_index_to_go;
13554        if ( $types_to_go[$i_terminal] eq '#' ) {
13555            if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
13556                if ( $i_terminal > 0 ) { --$i_terminal }
13557            }
13558        }
13559
13560        # look at each line of this batch..
13561        my $last_vertical_alignment_before_index;
13562        my $vert_last_nonblank_type;
13563        my $vert_last_nonblank_token;
13564        my $vert_last_nonblank_block_type;
13565        my $max_line = @$ri_first - 1;
13566        my ( $i, $type, $token, $block_type, $alignment_type );
13567        my ( $ibeg, $iend, $line );
13568
13569        foreach $line ( 0 .. $max_line ) {
13570            $ibeg                                 = $$ri_first[$line];
13571            $iend                                 = $$ri_last[$line];
13572            $last_vertical_alignment_before_index = -1;
13573            $vert_last_nonblank_type              = '';
13574            $vert_last_nonblank_token             = '';
13575            $vert_last_nonblank_block_type        = '';
13576
13577            # look at each token in this output line..
13578            foreach $i ( $ibeg .. $iend ) {
13579                $alignment_type = '';
13580                $type           = $types_to_go[$i];
13581                $block_type     = $block_type_to_go[$i];
13582                $token          = $tokens_to_go[$i];
13583
13584                # check for flag indicating that we should not align
13585                # this token
13586                if ( $matching_token_to_go[$i] ) {
13587                    $matching_token_to_go[$i] = '';
13588                    next;
13589                }
13590
13591                #--------------------------------------------------------
13592                # First see if we want to align BEFORE this token
13593                #--------------------------------------------------------
13594
13595                # The first possible token that we can align before
13596                # is index 2 because: 1) it doesn't normally make sense to
13597                # align before the first token and 2) the second
13598                # token must be a blank if we are to align before
13599                # the third
13600                if ( $i < $ibeg + 2 ) { }
13601
13602                # must follow a blank token
13603                elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
13604
13605                # align a side comment --
13606                elsif ( $type eq '#' ) {
13607
13608                    unless (
13609
13610                        # it is a static side comment
13611                        (
13612                               $rOpts->{'static-side-comments'}
13613                            && $token =~ /$static_side_comment_pattern/o
13614                        )
13615
13616                        # or a closing side comment
13617                        || (   $vert_last_nonblank_block_type
13618                            && $token =~
13619                            /$closing_side_comment_prefix_pattern/o )
13620                      )
13621                    {
13622                        $alignment_type = $type;
13623                    }    ## Example of a static side comment
13624                }
13625
13626                # otherwise, do not align two in a row to create a
13627                # blank field
13628                elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
13629
13630                # align before one of these keywords
13631                # (within a line, since $i>1)
13632                elsif ( $type eq 'k' ) {
13633
13634                    #  /^(if|unless|and|or|eq|ne)$/
13635                    if ( $is_vertical_alignment_keyword{$token} ) {
13636                        $alignment_type = $token;
13637                    }
13638                }
13639
13640                # align before one of these types..
13641                # Note: add '.' after new vertical aligner is operational
13642                elsif ( $is_vertical_alignment_type{$type} ) {
13643                    $alignment_type = $token;
13644
13645                    # Do not align a terminal token.  Although it might
13646                    # occasionally look ok to do this, this has been found to be
13647                    # a good general rule.  The main problems are:
13648                    # (1) that the terminal token (such as an = or :) might get
13649                    # moved far to the right where it is hard to see because
13650                    # nothing follows it, and
13651                    # (2) doing so may prevent other good alignments.
13652                    # Current exceptions are && and ||
13653                    if ( $i == $iend || $i >= $i_terminal ) {
13654                        $alignment_type = ""
13655                          unless ( $is_terminal_alignment_type{$type} );
13656                    }
13657
13658                    # Do not align leading ': (' or '. ('.  This would prevent
13659                    # alignment in something like the following:
13660                    #   $extra_space .=
13661                    #       ( $input_line_number < 10 )  ? "  "
13662                    #     : ( $input_line_number < 100 ) ? " "
13663                    #     :                                "";
13664                    # or
13665                    #  $code =
13666                    #      ( $case_matters ? $accessor : " lc($accessor) " )
13667                    #    . ( $yesno        ? " eq "       : " ne " )
13668                    if (   $i == $ibeg + 2
13669                        && $types_to_go[$ibeg] =~ /^[\.\:]$/
13670                        && $types_to_go[ $i - 1 ] eq 'b' )
13671                    {
13672                        $alignment_type = "";
13673                    }
13674
13675                    # For a paren after keyword, only align something like this:
13676                    #    if    ( $a ) { &a }
13677                    #    elsif ( $b ) { &b }
13678                    if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
13679                        $alignment_type = ""
13680                          unless $vert_last_nonblank_token =~
13681                          /^(if|unless|elsif)$/;
13682                    }
13683
13684                    # be sure the alignment tokens are unique
13685                    # This didn't work well: reason not determined
13686                    # if ($token ne $type) {$alignment_type .= $type}
13687                }
13688
13689                # NOTE: This is deactivated because it causes the previous
13690                # if/elsif alignment to fail
13691                #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
13692                #{ $alignment_type = $type; }
13693
13694                if ($alignment_type) {
13695                    $last_vertical_alignment_before_index = $i;
13696                }
13697
13698                #--------------------------------------------------------
13699                # Next see if we want to align AFTER the previous nonblank
13700                #--------------------------------------------------------
13701
13702                # We want to line up ',' and interior ';' tokens, with the added
13703                # space AFTER these tokens.  (Note: interior ';' is included
13704                # because it may occur in short blocks).
13705                if (
13706
13707                    # we haven't already set it
13708                    !$alignment_type
13709
13710                    # and its not the first token of the line
13711                    && ( $i > $ibeg )
13712
13713                    # and it follows a blank
13714                    && $types_to_go[ $i - 1 ] eq 'b'
13715
13716                    # and previous token IS one of these:
13717                    && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
13718
13719                    # and it's NOT one of these
13720                    && ( $type !~ /^[b\#\)\]\}]$/ )
13721
13722                    # then go ahead and align
13723                  )
13724
13725                {
13726                    $alignment_type = $vert_last_nonblank_type;
13727                }
13728
13729                #--------------------------------------------------------
13730                # then store the value
13731                #--------------------------------------------------------
13732                $matching_token_to_go[$i] = $alignment_type;
13733                if ( $type ne 'b' ) {
13734                    $vert_last_nonblank_type       = $type;
13735                    $vert_last_nonblank_token      = $token;
13736                    $vert_last_nonblank_block_type = $block_type;
13737                }
13738            }
13739        }
13740    }
13741}
13742
13743sub terminal_type {
13744
13745    #    returns type of last token on this line (terminal token), as follows:
13746    #    returns # for a full-line comment
13747    #    returns ' ' for a blank line
13748    #    otherwise returns final token type
13749
13750    my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
13751
13752    # check for full-line comment..
13753    if ( $$rtype[$ibeg] eq '#' ) {
13754        return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
13755    }
13756    else {
13757
13758        # start at end and walk bakwards..
13759        for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
13760
13761            # skip past any side comment and blanks
13762            next if ( $$rtype[$i] eq 'b' );
13763            next if ( $$rtype[$i] eq '#' );
13764
13765            # found it..make sure it is a BLOCK termination,
13766            # but hide a terminal } after sort/grep/map because it is not
13767            # necessarily the end of the line.  (terminal.t)
13768            my $terminal_type = $$rtype[$i];
13769            if (
13770                $terminal_type eq '}'
13771                && ( !$$rblock_type[$i]
13772                    || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
13773              )
13774            {
13775                $terminal_type = 'b';
13776            }
13777            return wantarray ? ( $terminal_type, $i ) : $terminal_type;
13778        }
13779
13780        # empty line
13781        return wantarray ? ( ' ', $ibeg ) : ' ';
13782    }
13783}
13784
13785{    # set_bond_strengths
13786
13787    my %is_good_keyword_breakpoint;
13788    my %is_lt_gt_le_ge;
13789
13790    my %binary_bond_strength;
13791    my %nobreak_lhs;
13792    my %nobreak_rhs;
13793
13794    my @bias_tokens;
13795    my $delta_bias;
13796
13797    sub bias_table_key {
13798        my ( $type, $token ) = @_;
13799        my $bias_table_key = $type;
13800        if ( $type eq 'k' ) {
13801            $bias_table_key = $token;
13802            if ( $token eq 'err' ) { $bias_table_key = 'or' }
13803        }
13804        return $bias_table_key;
13805    }
13806
13807    sub set_bond_strengths {
13808
13809        BEGIN {
13810
13811            @_ = qw(if unless while until for foreach);
13812            @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
13813
13814            @_ = qw(lt gt le ge);
13815            @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
13816            #
13817            # The decision about where to break a line depends upon a "bond
13818            # strength" between tokens.  The LOWER the bond strength, the MORE
13819            # likely a break.  A bond strength may be any value but to simplify
13820            # things there are several pre-defined strength levels:
13821
13822            #    NO_BREAK    => 10000;
13823            #    VERY_STRONG => 100;
13824            #    STRONG      => 2.1;
13825            #    NOMINAL     => 1.1;
13826            #    WEAK        => 0.8;
13827            #    VERY_WEAK   => 0.55;
13828
13829            # The strength values are based on trial-and-error, and need to be
13830            # tweaked occasionally to get desired results.  Some comments:
13831            #
13832            #   1. Only relative strengths are important.  small differences
13833            #      in strengths can make big formatting differences.
13834            #   2. Each indentation level adds one unit of bond strength.
13835            #   3. A value of NO_BREAK makes an unbreakable bond
13836            #   4. A value of VERY_WEAK is the strength of a ','
13837            #   5. Values below NOMINAL are considered ok break points.
13838            #   6. Values above NOMINAL are considered poor break points.
13839            #
13840            # The bond strengths should roughly follow precenence order where
13841            # possible.  If you make changes, please check the results very
13842            # carefully on a variety of scripts.  Testing with the -extrude
13843            # options is particularly helpful in exercising all of the rules.
13844
13845            # Wherever possible, bond strengths are defined in the following
13846            # tables.  There are two main stages to setting bond strengths and
13847            # two types of tables:
13848            #
13849            # The first stage involves looking at each token individually and
13850            # defining left and right bond strengths, according to if we want
13851            # to break to the left or right side, and how good a break point it
13852            # is.  For example tokens like =, ||, && make good break points and
13853            # will have low strengths, but one might want to break on either
13854            # side to put them at the end of one line or beginning of the next.
13855            #
13856            # The second stage involves looking at certain pairs of tokens and
13857            # defining a bond strength for that particular pair.  This second
13858            # stage has priority.
13859
13860            #---------------------------------------------------------------
13861            # Bond Strength BEGIN Section 1.
13862            # Set left and right bond strengths of individual tokens.
13863            #---------------------------------------------------------------
13864
13865            # NOTE: NO_BREAK's set in this section first are HINTS which will
13866            # probably not be honored. Essential NO_BREAKS's should be set in
13867            # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
13868            # of this subroutine.
13869
13870            # Note that we are setting defaults in this section.  The user
13871            # cannot change bond strengths but can cause the left and right
13872            # bond strengths of any token type to be swapped through the use of
13873            # the -wba and -wbb flags. In this way the user can determine if a
13874            # breakpoint token should appear at the end of one line or the
13875            # beginning of the next line.
13876
13877            # The hash keys in this section are token types, plus the text of
13878            # certain keywords like 'or', 'and'.
13879
13880            # no break around possible filehandle
13881            $left_bond_strength{'Z'}  = NO_BREAK;
13882            $right_bond_strength{'Z'} = NO_BREAK;
13883
13884            # never put a bare word on a new line:
13885            # example print (STDERR, "bla"); will fail with break after (
13886            $left_bond_strength{'w'} = NO_BREAK;
13887
13888            # blanks always have infinite strength to force breaks after
13889            # real tokens
13890            $right_bond_strength{'b'} = NO_BREAK;
13891
13892            # try not to break on exponentation
13893            @_                       = qw" ** .. ... <=> ";
13894            @left_bond_strength{@_}  = (STRONG) x scalar(@_);
13895            @right_bond_strength{@_} = (STRONG) x scalar(@_);
13896
13897            # The comma-arrow has very low precedence but not a good break point
13898            $left_bond_strength{'=>'}  = NO_BREAK;
13899            $right_bond_strength{'=>'} = NOMINAL;
13900
13901            # ok to break after label
13902            $left_bond_strength{'J'}  = NO_BREAK;
13903            $right_bond_strength{'J'} = NOMINAL;
13904            $left_bond_strength{'j'}  = STRONG;
13905            $right_bond_strength{'j'} = STRONG;
13906            $left_bond_strength{'A'}  = STRONG;
13907            $right_bond_strength{'A'} = STRONG;
13908
13909            $left_bond_strength{'->'}  = STRONG;
13910            $right_bond_strength{'->'} = VERY_STRONG;
13911
13912            $left_bond_strength{'CORE::'}  = NOMINAL;
13913            $right_bond_strength{'CORE::'} = NO_BREAK;
13914
13915            # breaking AFTER modulus operator is ok:
13916            @_ = qw" % ";
13917            @left_bond_strength{@_} = (STRONG) x scalar(@_);
13918            @right_bond_strength{@_} =
13919              ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
13920
13921            # Break AFTER math operators * and /
13922            @_                       = qw" * / x  ";
13923            @left_bond_strength{@_}  = (STRONG) x scalar(@_);
13924            @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
13925
13926            # Break AFTER weakest math operators + and -
13927            # Make them weaker than * but a bit stronger than '.'
13928            @_ = qw" + - ";
13929            @left_bond_strength{@_} = (STRONG) x scalar(@_);
13930            @right_bond_strength{@_} =
13931              ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
13932
13933            # breaking BEFORE these is just ok:
13934            @_                       = qw" >> << ";
13935            @right_bond_strength{@_} = (STRONG) x scalar(@_);
13936            @left_bond_strength{@_}  = (NOMINAL) x scalar(@_);
13937
13938            # breaking before the string concatenation operator seems best
13939            # because it can be hard to see at the end of a line
13940            $right_bond_strength{'.'} = STRONG;
13941            $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
13942
13943            @_                       = qw"} ] ) R";
13944            @left_bond_strength{@_}  = (STRONG) x scalar(@_);
13945            @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
13946
13947            # make these a little weaker than nominal so that they get
13948            # favored for end-of-line characters
13949            @_ = qw"!= == =~ !~ ~~ !~~";
13950            @left_bond_strength{@_} = (STRONG) x scalar(@_);
13951            @right_bond_strength{@_} =
13952              ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
13953
13954            # break AFTER these
13955            @_ = qw" < >  | & >= <=";
13956            @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
13957            @right_bond_strength{@_} =
13958              ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
13959
13960            # breaking either before or after a quote is ok
13961            # but bias for breaking before a quote
13962            $left_bond_strength{'Q'}  = NOMINAL;
13963            $right_bond_strength{'Q'} = NOMINAL + 0.02;
13964            $left_bond_strength{'q'}  = NOMINAL;
13965            $right_bond_strength{'q'} = NOMINAL;
13966
13967            # starting a line with a keyword is usually ok
13968            $left_bond_strength{'k'} = NOMINAL;
13969
13970            # we usually want to bond a keyword strongly to what immediately
13971            # follows, rather than leaving it stranded at the end of a line
13972            $right_bond_strength{'k'} = STRONG;
13973
13974            $left_bond_strength{'G'}  = NOMINAL;
13975            $right_bond_strength{'G'} = STRONG;
13976
13977            # assignment operators
13978            @_ = qw(
13979              = **= += *= &= <<= &&=
13980              -= /= |= >>= ||= //=
13981              .= %= ^=
13982              x=
13983            );
13984
13985            # Default is is to break AFTER various assignment operators
13986            @left_bond_strength{@_} = (STRONG) x scalar(@_);
13987            @right_bond_strength{@_} =
13988              ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
13989
13990            # Default is to break BEFORE '&&' and '||' and '//'
13991            # set strength of '||' to same as '=' so that chains like
13992            # $a = $b || $c || $d   will break before the first '||'
13993            $right_bond_strength{'||'} = NOMINAL;
13994            $left_bond_strength{'||'}  = $right_bond_strength{'='};
13995
13996            # same thing for '//'
13997            $right_bond_strength{'//'} = NOMINAL;
13998            $left_bond_strength{'//'}  = $right_bond_strength{'='};
13999
14000            # set strength of && a little higher than ||
14001            $right_bond_strength{'&&'} = NOMINAL;
14002            $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
14003
14004            $left_bond_strength{';'}  = VERY_STRONG;
14005            $right_bond_strength{';'} = VERY_WEAK;
14006            $left_bond_strength{'f'}  = VERY_STRONG;
14007
14008            # make right strength of for ';' a little less than '='
14009            # to make for contents break after the ';' to avoid this:
14010            #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
14011            #     $number_of_fields )
14012            # and make it weaker than ',' and 'and' too
14013            $right_bond_strength{'f'} = VERY_WEAK - 0.03;
14014
14015            # The strengths of ?/: should be somewhere between
14016            # an '=' and a quote (NOMINAL),
14017            # make strength of ':' slightly less than '?' to help
14018            # break long chains of ? : after the colons
14019            $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
14020            $right_bond_strength{':'} = NO_BREAK;
14021            $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
14022            $right_bond_strength{'?'} = NO_BREAK;
14023
14024            $left_bond_strength{','}  = VERY_STRONG;
14025            $right_bond_strength{','} = VERY_WEAK;
14026
14027            # remaining digraphs and trigraphs not defined above
14028            @_                       = qw( :: <> ++ --);
14029            @left_bond_strength{@_}  = (WEAK) x scalar(@_);
14030            @right_bond_strength{@_} = (STRONG) x scalar(@_);
14031
14032            # Set bond strengths of certain keywords
14033            # make 'or', 'err', 'and' slightly weaker than a ','
14034            $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
14035            $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
14036            $left_bond_strength{'err'}  = VERY_WEAK - 0.02;
14037            $left_bond_strength{'xor'}  = NOMINAL;
14038            $right_bond_strength{'and'} = NOMINAL;
14039            $right_bond_strength{'or'}  = NOMINAL;
14040            $right_bond_strength{'err'} = NOMINAL;
14041            $right_bond_strength{'xor'} = STRONG;
14042
14043            #---------------------------------------------------------------
14044            # Bond Strength BEGIN Section 2.
14045            # Set binary rules for bond strengths between certain token types.
14046            #---------------------------------------------------------------
14047
14048            #  We have a little problem making tables which apply to the
14049            #  container tokens.  Here is a list of container tokens and
14050            #  their types:
14051            #
14052            #   type    tokens // meaning
14053            #      {    {, [, ( // indent
14054            #      }    }, ], ) // outdent
14055            #      [    [ // left non-structural [ (enclosing an array index)
14056            #      ]    ] // right non-structural square bracket
14057            #      (    ( // left non-structural paren
14058            #      )    ) // right non-structural paren
14059            #      L    { // left non-structural curly brace (enclosing a key)
14060            #      R    } // right non-structural curly brace
14061            #
14062            #  Some rules apply to token types and some to just the token
14063            #  itself.  We solve the problem by combining type and token into a
14064            #  new hash key for the container types.
14065            #
14066            #  If a rule applies to a token 'type' then we need to make rules
14067            #  for each of these 'type.token' combinations:
14068            #  Type    Type.Token
14069            #  {       {{, {[, {(
14070            #  [       [[
14071            #  (       ((
14072            #  L       L{
14073            #  }       }}, }], })
14074            #  ]       ]]
14075            #  )       ))
14076            #  R       R}
14077            #
14078            #  If a rule applies to a token then we need to make rules for
14079            #  these 'type.token' combinations:
14080            #  Token   Type.Token
14081            #  {       {{, L{
14082            #  [       {[, [[
14083            #  (       {(, ((
14084            #  }       }}, R}
14085            #  ]       }], ]]
14086            #  )       }), ))
14087
14088            # allow long lines before final { in an if statement, as in:
14089            #    if (..........
14090            #      ..........)
14091            #    {
14092            #
14093            # Otherwise, the line before the { tends to be too short.
14094
14095            $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
14096            $binary_bond_strength{'(('}{'{{'} = NOMINAL;
14097
14098            # break on something like '} (', but keep this stronger than a ','
14099            # example is in 'howe.pl'
14100            $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
14101            $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
14102
14103            # keep matrix and hash indices together
14104            # but make them a little below STRONG to allow breaking open
14105            # something like {'some-word'}{'some-very-long-word'} at the }{
14106            # (bracebrk.t)
14107            $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
14108            $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
14109            $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
14110            $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
14111
14112            # increase strength to the point where a break in the following
14113            # will be after the opening paren rather than at the arrow:
14114            #    $a->$b($c);
14115            $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
14116
14117            $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14118            $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14119            $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14120            $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14121            $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14122            $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
14123
14124            $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
14125            $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
14126            $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
14127            $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
14128
14129            #---------------------------------------------------------------
14130            # Binary NO_BREAK rules
14131            #---------------------------------------------------------------
14132
14133            # use strict requires that bare word and => not be separated
14134            $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
14135            $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
14136
14137            # Never break between a bareword and a following paren because
14138            # perl may give an error.  For example, if a break is placed
14139            # between 'to_filehandle' and its '(' the following line will
14140            # give a syntax error [Carp.pm]: my( $no) =fileno(
14141            # to_filehandle( $in)) ;
14142            $binary_bond_strength{'C'}{'(('} = NO_BREAK;
14143            $binary_bond_strength{'C'}{'{('} = NO_BREAK;
14144            $binary_bond_strength{'U'}{'(('} = NO_BREAK;
14145            $binary_bond_strength{'U'}{'{('} = NO_BREAK;
14146
14147            # use strict requires that bare word within braces not start new
14148            # line
14149            $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
14150
14151            $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
14152
14153            # use strict requires that bare word and => not be separated
14154            $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
14155
14156            # use strict does not allow separating type info from trailing { }
14157            # testfile is readmail.pl
14158            $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
14159            $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
14160
14161            # As a defensive measure, do not break between a '(' and a
14162            # filehandle.  In some cases, this can cause an error.  For
14163            # example, the following program works:
14164            #    my $msg="hi!\n";
14165            #    print
14166            #    ( STDOUT
14167            #    $msg
14168            #    );
14169            #
14170            # But this program fails:
14171            #    my $msg="hi!\n";
14172            #    print
14173            #    (
14174            #    STDOUT
14175            #    $msg
14176            #    );
14177            #
14178            # This is normally only a problem with the 'extrude' option
14179            $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
14180            $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
14181
14182            # never break between sub name and opening paren
14183            $binary_bond_strength{'w'}{'(('} = NO_BREAK;
14184            $binary_bond_strength{'w'}{'{('} = NO_BREAK;
14185
14186            # keep '}' together with ';'
14187            $binary_bond_strength{'}}'}{';'} = NO_BREAK;
14188
14189            # Breaking before a ++ can cause perl to guess wrong. For
14190            # example the following line will cause a syntax error
14191            # with -extrude if we break between '$i' and '++' [fixstyle2]
14192            #   print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
14193            $nobreak_lhs{'++'} = NO_BREAK;
14194
14195            # Do not break before a possible file handle
14196            $nobreak_lhs{'Z'} = NO_BREAK;
14197
14198            # use strict hates bare words on any new line.  For
14199            # example, a break before the underscore here provokes the
14200            # wrath of use strict:
14201            # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
14202            $nobreak_rhs{'F'}      = NO_BREAK;
14203            $nobreak_rhs{'CORE::'} = NO_BREAK;
14204
14205            #---------------------------------------------------------------
14206            # Bond Strength BEGIN Section 3.
14207            # Define tables and values for applying a small bias to the above
14208            # values.
14209            #---------------------------------------------------------------
14210            # Adding a small 'bias' to strengths is a simple way to make a line
14211            # break at the first of a sequence of identical terms.  For
14212            # example, to force long string of conditional operators to break
14213            # with each line ending in a ':', we can add a small number to the
14214            # bond strength of each ':' (colon.t)
14215            @bias_tokens = qw( : && || f and or . );    # tokens which get bias
14216            $delta_bias = 0.0001;    # a very small strength level
14217
14218        } ## end BEGIN
14219
14220        # patch-its always ok to break at end of line
14221        $nobreak_to_go[$max_index_to_go] = 0;
14222
14223        # we start a new set of bias values for each line
14224        my %bias;
14225        @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
14226        my $code_bias = -.01;        # bias for closing block braces
14227
14228        my $type  = 'b';
14229        my $token = ' ';
14230        my $last_type;
14231        my $last_nonblank_type  = $type;
14232        my $last_nonblank_token = $token;
14233        my $list_str            = $left_bond_strength{'?'};
14234
14235        my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
14236            $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
14237        );
14238
14239        # main loop to compute bond strengths between each pair of tokens
14240        for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
14241            $last_type = $type;
14242            if ( $type ne 'b' ) {
14243                $last_nonblank_type  = $type;
14244                $last_nonblank_token = $token;
14245            }
14246            $type = $types_to_go[$i];
14247
14248            # strength on both sides of a blank is the same
14249            if ( $type eq 'b' && $last_type ne 'b' ) {
14250                $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
14251                next;
14252            }
14253
14254            $token               = $tokens_to_go[$i];
14255            $block_type          = $block_type_to_go[$i];
14256            $i_next              = $i + 1;
14257            $next_type           = $types_to_go[$i_next];
14258            $next_token          = $tokens_to_go[$i_next];
14259            $total_nesting_depth = $nesting_depth_to_go[$i_next];
14260            $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
14261            $next_nonblank_type  = $types_to_go[$i_next_nonblank];
14262            $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
14263
14264            # We are computing the strength of the bond between the current
14265            # token and the NEXT token.
14266
14267            #---------------------------------------------------------------
14268            # Bond Strength Section 1:
14269            # First Approximation.
14270            # Use minimum of individual left and right tabulated bond
14271            # strengths.
14272            #---------------------------------------------------------------
14273            my $bsr = $right_bond_strength{$type};
14274            my $bsl = $left_bond_strength{$next_nonblank_type};
14275
14276            # define right bond strengths of certain keywords
14277            if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
14278                $bsr = $right_bond_strength{$token};
14279            }
14280            elsif ( $token eq 'ne' or $token eq 'eq' ) {
14281                $bsr = NOMINAL;
14282            }
14283
14284            # set terminal bond strength to the nominal value
14285            # this will cause good preceding breaks to be retained
14286            if ( $i_next_nonblank > $max_index_to_go ) {
14287                $bsl = NOMINAL;
14288            }
14289
14290            # define right bond strengths of certain keywords
14291            if ( $next_nonblank_type eq 'k'
14292                && defined( $left_bond_strength{$next_nonblank_token} ) )
14293            {
14294                $bsl = $left_bond_strength{$next_nonblank_token};
14295            }
14296            elsif ($next_nonblank_token eq 'ne'
14297                or $next_nonblank_token eq 'eq' )
14298            {
14299                $bsl = NOMINAL;
14300            }
14301            elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
14302                $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
14303            }
14304
14305            # Use the minimum of the left and right strengths.  Note: it might
14306            # seem that we would want to keep a NO_BREAK if either token has
14307            # this value.  This didn't work, for example because in an arrow
14308            # list, it prevents the comma from separating from the following
14309            # bare word (which is probably quoted by its arrow).  So necessary
14310            # NO_BREAK's have to be handled as special cases in the final
14311            # section.
14312            if ( !defined($bsr) ) { $bsr = VERY_STRONG }
14313            if ( !defined($bsl) ) { $bsl = VERY_STRONG }
14314            my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
14315            my $bond_str_1 = $bond_str;
14316
14317            #---------------------------------------------------------------
14318            # Bond Strength Section 2:
14319            # Apply hardwired rules..
14320            #---------------------------------------------------------------
14321
14322            # Patch to put terminal or clauses on a new line: Weaken the bond
14323            # at an || followed by die or similar keyword to make the terminal
14324            # or clause fall on a new line, like this:
14325            #
14326            #   my $class = shift
14327            #     || die "Cannot add broadcast:  No class identifier found";
14328            #
14329            # Otherwise the break will be at the previous '=' since the || and
14330            # = have the same starting strength and the or is biased, like
14331            # this:
14332            #
14333            # my $class =
14334            #   shift || die "Cannot add broadcast:  No class identifier found";
14335            #
14336            # In any case if the user places a break at either the = or the ||
14337            # it should remain there.
14338            if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
14339                if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
14340                    if ( $want_break_before{$token} && $i > 0 ) {
14341                        $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
14342                    }
14343                    else {
14344                        $bond_str -= $delta_bias;
14345                    }
14346                }
14347            }
14348
14349            # good to break after end of code blocks
14350            if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
14351
14352                $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
14353                $code_bias += $delta_bias;
14354            }
14355
14356            if ( $type eq 'k' ) {
14357
14358                # allow certain control keywords to stand out
14359                if (   $next_nonblank_type eq 'k'
14360                    && $is_last_next_redo_return{$token} )
14361                {
14362                    $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
14363                }
14364
14365                # Don't break after keyword my.  This is a quick fix for a
14366                # rare problem with perl. An example is this line from file
14367                # Container.pm:
14368
14369                # foreach my $question( Debian::DebConf::ConfigDb::gettree(
14370                # $this->{'question'} ) )
14371
14372                if ( $token eq 'my' ) {
14373                    $bond_str = NO_BREAK;
14374                }
14375
14376            }
14377
14378            # good to break before 'if', 'unless', etc
14379            if ( $is_if_brace_follower{$next_nonblank_token} ) {
14380                $bond_str = VERY_WEAK;
14381            }
14382
14383            if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
14384
14385                # FIXME: needs more testing
14386                if ( $is_keyword_returning_list{$next_nonblank_token} ) {
14387                    $bond_str = $list_str if ( $bond_str > $list_str );
14388                }
14389
14390                # keywords like 'unless', 'if', etc, within statements
14391                # make good breaks
14392                if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
14393                    $bond_str = VERY_WEAK / 1.05;
14394                }
14395            }
14396
14397            # try not to break before a comma-arrow
14398            elsif ( $next_nonblank_type eq '=>' ) {
14399                if ( $bond_str < STRONG ) { $bond_str = STRONG }
14400            }
14401
14402            #---------------------------------------------------------------
14403            # Additional hardwired NOBREAK rules
14404            #---------------------------------------------------------------
14405
14406            # map1.t -- correct for a quirk in perl
14407            if (   $token eq '('
14408                && $next_nonblank_type eq 'i'
14409                && $last_nonblank_type eq 'k'
14410                && $is_sort_map_grep{$last_nonblank_token} )
14411
14412              #     /^(sort|map|grep)$/ )
14413            {
14414                $bond_str = NO_BREAK;
14415            }
14416
14417            # extrude.t: do not break before paren at:
14418            #    -l pid_filename(
14419            if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
14420                $bond_str = NO_BREAK;
14421            }
14422
14423            # in older version of perl, use strict can cause problems with
14424            # breaks before bare words following opening parens.  For example,
14425            # this will fail under older versions if a break is made between
14426            # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
14427            # command"); close MAIL;
14428            if ( $type eq '{' ) {
14429
14430                if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
14431
14432                    # but it's fine to break if the word is followed by a '=>'
14433                    # or if it is obviously a sub call
14434                    my $i_next_next_nonblank = $i_next_nonblank + 1;
14435                    my $next_next_type = $types_to_go[$i_next_next_nonblank];
14436                    if (   $next_next_type eq 'b'
14437                        && $i_next_nonblank < $max_index_to_go )
14438                    {
14439                        $i_next_next_nonblank++;
14440                        $next_next_type = $types_to_go[$i_next_next_nonblank];
14441                    }
14442
14443                    # We'll check for an old breakpoint and keep a leading
14444                    # bareword if it was that way in the input file.
14445                    # Presumably it was ok that way.  For example, the
14446                    # following would remain unchanged:
14447                    #
14448                    # @months = (
14449                    #   January,   February, March,    April,
14450                    #   May,       June,     July,     August,
14451                    #   September, October,  November, December,
14452                    # );
14453                    #
14454                    # This should be sufficient:
14455                    if (
14456                        !$old_breakpoint_to_go[$i]
14457                        && (   $next_next_type eq ','
14458                            || $next_next_type eq '}' )
14459                      )
14460                    {
14461                        $bond_str = NO_BREAK;
14462                    }
14463                }
14464            }
14465
14466            # Do not break between a possible filehandle and a ? or / and do
14467            # not introduce a break after it if there is no blank
14468            # (extrude.t)
14469            elsif ( $type eq 'Z' ) {
14470
14471                # dont break..
14472                if (
14473
14474                    # if there is no blank and we do not want one. Examples:
14475                    #    print $x++    # do not break after $x
14476                    #    print HTML"HELLO"   # break ok after HTML
14477                    (
14478                           $next_type ne 'b'
14479                        && defined( $want_left_space{$next_type} )
14480                        && $want_left_space{$next_type} == WS_NO
14481                    )
14482
14483                    # or we might be followed by the start of a quote
14484                    || $next_nonblank_type =~ /^[\/\?]$/
14485                  )
14486                {
14487                    $bond_str = NO_BREAK;
14488                }
14489            }
14490
14491            # Breaking before a ? before a quote can cause trouble if
14492            # they are not separated by a blank.
14493            # Example: a syntax error occurs if you break before the ? here
14494            #  my$logic=join$all?' && ':' || ',@regexps;
14495            # From: Professional_Perl_Programming_Code/multifind.pl
14496            if ( $next_nonblank_type eq '?' ) {
14497                $bond_str = NO_BREAK
14498                  if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
14499            }
14500
14501            # Breaking before a . followed by a number
14502            # can cause trouble if there is no intervening space
14503            # Example: a syntax error occurs if you break before the .2 here
14504            #  $str .= pack($endian.2, ensurrogate($ord));
14505            # From: perl58/Unicode.pm
14506            elsif ( $next_nonblank_type eq '.' ) {
14507                $bond_str = NO_BREAK
14508                  if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
14509            }
14510
14511            # patch to put cuddled elses back together when on multiple
14512            # lines, as in: } \n else \n { \n
14513            if ($rOpts_cuddled_else) {
14514
14515                if (   ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
14516                    || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
14517                {
14518                    $bond_str = NO_BREAK;
14519                }
14520            }
14521            my $bond_str_2 = $bond_str;
14522
14523            #---------------------------------------------------------------
14524            # End of hardwired rules
14525            #---------------------------------------------------------------
14526
14527            #---------------------------------------------------------------
14528            # Bond Strength Section 3:
14529            # Apply table rules. These have priority over the above
14530            # hardwired rules.
14531            #---------------------------------------------------------------
14532
14533            my $tabulated_bond_str;
14534            my $ltype = $type;
14535            my $rtype = $next_nonblank_type;
14536            if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
14537            if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
14538                $rtype = $next_nonblank_type . $next_nonblank_token;
14539            }
14540
14541            if ( $binary_bond_strength{$ltype}{$rtype} ) {
14542                $bond_str           = $binary_bond_strength{$ltype}{$rtype};
14543                $tabulated_bond_str = $bond_str;
14544            }
14545
14546            if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
14547                $bond_str           = NO_BREAK;
14548                $tabulated_bond_str = $bond_str;
14549            }
14550            my $bond_str_3 = $bond_str;
14551
14552            # If the hardwired rules conflict with the tabulated bond
14553            # strength then there is an inconsistency that should be fixed
14554            FORMATTER_DEBUG_FLAG_BOND_TABLES
14555              && $tabulated_bond_str
14556              && $bond_str_1
14557              && $bond_str_1 != $bond_str_2
14558              && $bond_str_2 != $tabulated_bond_str
14559              && do {
14560                print STDERR
14561"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
14562              };
14563
14564           #-----------------------------------------------------------------
14565           # Bond Strength Section 4:
14566           # Modify strengths of certain tokens which often occur in sequence
14567           # by adding a small bias to each one in turn so that the breaks
14568           # occur from left to right.
14569           #
14570           # Note that we only changing strengths by small amounts here,
14571           # and usually increasing, so we should not be altering any NO_BREAKs.
14572           # Other routines which check for NO_BREAKs will use a tolerance
14573           # of one to avoid any problem.
14574           #-----------------------------------------------------------------
14575
14576            # The bias tables use special keys
14577            my $left_key = bias_table_key( $type, $token );
14578            my $right_key =
14579              bias_table_key( $next_nonblank_type, $next_nonblank_token );
14580
14581            # add any bias set by sub scan_list at old comma break points.
14582            if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
14583
14584            # bias left token
14585            elsif ( defined( $bias{$left_key} ) ) {
14586                if ( !$want_break_before{$left_key} ) {
14587                    $bias{$left_key} += $delta_bias;
14588                    $bond_str += $bias{$left_key};
14589                }
14590            }
14591
14592            # bias right token
14593            if ( defined( $bias{$right_key} ) ) {
14594                if ( $want_break_before{$right_key} ) {
14595
14596                    # for leading '.' align all but 'short' quotes; the idea
14597                    # is to not place something like "\n" on a single line.
14598                    if ( $right_key eq '.' ) {
14599                        unless (
14600                            $last_nonblank_type eq '.'
14601                            && (
14602                                length($token) <=
14603                                $rOpts_short_concatenation_item_length )
14604                            && ( $token !~ /^[\)\]\}]$/ )
14605                          )
14606                        {
14607                            $bias{$right_key} += $delta_bias;
14608                        }
14609                    }
14610                    else {
14611                        $bias{$right_key} += $delta_bias;
14612                    }
14613                    $bond_str += $bias{$right_key};
14614                }
14615            }
14616            my $bond_str_4 = $bond_str;
14617
14618            #---------------------------------------------------------------
14619            # Bond Strength Section 5:
14620            # Fifth Approximation.
14621            # Take nesting depth into account by adding the nesting depth
14622            # to the bond strength.
14623            #---------------------------------------------------------------
14624            my $strength;
14625
14626            if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
14627                if ( $total_nesting_depth > 0 ) {
14628                    $strength = $bond_str + $total_nesting_depth;
14629                }
14630                else {
14631                    $strength = $bond_str;
14632                }
14633            }
14634            else {
14635                $strength = NO_BREAK;
14636            }
14637
14638            # always break after side comment
14639            if ( $type eq '#' ) { $strength = 0 }
14640
14641            $bond_strength_to_go[$i] = $strength;
14642
14643            FORMATTER_DEBUG_FLAG_BOND && do {
14644                my $str = substr( $token, 0, 15 );
14645                $str .= ' ' x ( 16 - length($str) );
14646                print STDOUT
14647"BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
14648            };
14649        } ## end main loop
14650    } ## end sub set_bond_strengths
14651}
14652
14653sub pad_array_to_go {
14654
14655    # to simplify coding in scan_list and set_bond_strengths, it helps
14656    # to create some extra blank tokens at the end of the arrays
14657    $tokens_to_go[ $max_index_to_go + 1 ] = '';
14658    $tokens_to_go[ $max_index_to_go + 2 ] = '';
14659    $types_to_go[ $max_index_to_go + 1 ]  = 'b';
14660    $types_to_go[ $max_index_to_go + 2 ]  = 'b';
14661    $nesting_depth_to_go[ $max_index_to_go + 1 ] =
14662      $nesting_depth_to_go[$max_index_to_go];
14663
14664    #    /^[R\}\)\]]$/
14665    if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
14666        if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
14667
14668            # shouldn't happen:
14669            unless ( get_saw_brace_error() ) {
14670                warning(
14671"Program bug in scan_list: hit nesting error which should have been caught\n"
14672                );
14673                report_definite_bug();
14674            }
14675        }
14676        else {
14677            $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
14678        }
14679    }
14680
14681    #       /^[L\{\(\[]$/
14682    elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
14683        $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
14684    }
14685}
14686
14687{    # begin scan_list
14688
14689    my (
14690        $block_type,               $current_depth,
14691        $depth,                    $i,
14692        $i_last_nonblank_token,    $last_colon_sequence_number,
14693        $last_nonblank_token,      $last_nonblank_type,
14694        $last_nonblank_block_type, $last_old_breakpoint_count,
14695        $minimum_depth,            $next_nonblank_block_type,
14696        $next_nonblank_token,      $next_nonblank_type,
14697        $old_breakpoint_count,     $starting_breakpoint_count,
14698        $starting_depth,           $token,
14699        $type,                     $type_sequence,
14700    );
14701
14702    my (
14703        @breakpoint_stack,              @breakpoint_undo_stack,
14704        @comma_index,                   @container_type,
14705        @identifier_count_stack,        @index_before_arrow,
14706        @interrupted_list,              @item_count_stack,
14707        @last_comma_index,              @last_dot_index,
14708        @last_nonblank_type,            @old_breakpoint_count_stack,
14709        @opening_structure_index_stack, @rfor_semicolon_list,
14710        @has_old_logical_breakpoints,   @rand_or_list,
14711        @i_equals,
14712    );
14713
14714    # routine to define essential variables when we go 'up' to
14715    # a new depth
14716    sub check_for_new_minimum_depth {
14717        my $depth = shift;
14718        if ( $depth < $minimum_depth ) {
14719
14720            $minimum_depth = $depth;
14721
14722            # these arrays need not retain values between calls
14723            $breakpoint_stack[$depth]              = $starting_breakpoint_count;
14724            $container_type[$depth]                = "";
14725            $identifier_count_stack[$depth]        = 0;
14726            $index_before_arrow[$depth]            = -1;
14727            $interrupted_list[$depth]              = 1;
14728            $item_count_stack[$depth]              = 0;
14729            $last_nonblank_type[$depth]            = "";
14730            $opening_structure_index_stack[$depth] = -1;
14731
14732            $breakpoint_undo_stack[$depth]       = undef;
14733            $comma_index[$depth]                 = undef;
14734            $last_comma_index[$depth]            = undef;
14735            $last_dot_index[$depth]              = undef;
14736            $old_breakpoint_count_stack[$depth]  = undef;
14737            $has_old_logical_breakpoints[$depth] = 0;
14738            $rand_or_list[$depth]                = [];
14739            $rfor_semicolon_list[$depth]         = [];
14740            $i_equals[$depth]                    = -1;
14741
14742            # these arrays must retain values between calls
14743            if ( !defined( $has_broken_sublist[$depth] ) ) {
14744                $dont_align[$depth]         = 0;
14745                $has_broken_sublist[$depth] = 0;
14746                $want_comma_break[$depth]   = 0;
14747            }
14748        }
14749    }
14750
14751    # routine to decide which commas to break at within a container;
14752    # returns:
14753    #   $bp_count = number of comma breakpoints set
14754    #   $do_not_break_apart = a flag indicating if container need not
14755    #     be broken open
14756    sub set_comma_breakpoints {
14757
14758        my $dd                 = shift;
14759        my $bp_count           = 0;
14760        my $do_not_break_apart = 0;
14761
14762        # anything to do?
14763        if ( $item_count_stack[$dd] ) {
14764
14765            # handle commas not in containers...
14766            if ( $dont_align[$dd] ) {
14767                do_uncontained_comma_breaks($dd);
14768            }
14769
14770            # handle commas within containers...
14771            else {
14772                my $fbc = $forced_breakpoint_count;
14773
14774                # always open comma lists not preceded by keywords,
14775                # barewords, identifiers (that is, anything that doesn't
14776                # look like a function call)
14777                my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
14778
14779                set_comma_breakpoints_do(
14780                    $dd,
14781                    $opening_structure_index_stack[$dd],
14782                    $i,
14783                    $item_count_stack[$dd],
14784                    $identifier_count_stack[$dd],
14785                    $comma_index[$dd],
14786                    $next_nonblank_type,
14787                    $container_type[$dd],
14788                    $interrupted_list[$dd],
14789                    \$do_not_break_apart,
14790                    $must_break_open,
14791                );
14792                $bp_count = $forced_breakpoint_count - $fbc;
14793                $do_not_break_apart = 0 if $must_break_open;
14794            }
14795        }
14796        return ( $bp_count, $do_not_break_apart );
14797    }
14798
14799    sub do_uncontained_comma_breaks {
14800
14801        # Handle commas not in containers...
14802        # This is a catch-all routine for commas that we
14803        # don't know what to do with because the don't fall
14804        # within containers.  We will bias the bond strength
14805        # to break at commas which ended lines in the input
14806        # file.  This usually works better than just trying
14807        # to put as many items on a line as possible.  A
14808        # downside is that if the input file is garbage it
14809        # won't work very well. However, the user can always
14810        # prevent following the old breakpoints with the
14811        # -iob flag.
14812        my $dd                    = shift;
14813        my $bias                  = -.01;
14814        my $old_comma_break_count = 0;
14815        foreach my $ii ( @{ $comma_index[$dd] } ) {
14816            if ( $old_breakpoint_to_go[$ii] ) {
14817                $old_comma_break_count++;
14818                $bond_strength_to_go[$ii] = $bias;
14819
14820                # reduce bias magnitude to force breaks in order
14821                $bias *= 0.99;
14822            }
14823        }
14824
14825        # Also put a break before the first comma if
14826        # (1) there was a break there in the input, and
14827        # (2) there was exactly one old break before the first comma break
14828        # (3) OLD: there are multiple old comma breaks
14829        # (3) NEW: there are one or more old comma breaks (see return example)
14830        #
14831        # For example, we will follow the user and break after
14832        # 'print' in this snippet:
14833        #    print
14834        #      "conformability (Not the same dimension)\n",
14835        #      "\t", $have, " is ", text_unit($hu), "\n",
14836        #      "\t", $want, " is ", text_unit($wu), "\n",
14837        #      ;
14838        #
14839        # Another example, just one comma, where we will break after
14840        # the return:
14841        #  return
14842        #    $x * cos($a) - $y * sin($a),
14843        #    $x * sin($a) + $y * cos($a);
14844
14845        # Breaking a print statement:
14846        # print SAVEOUT
14847        #   ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
14848        #   ( $? & 128 ) ? " -- core dumped" : "", "\n";
14849        #
14850        #  But we will not force a break after the opening paren here
14851        #  (causes a blinker):
14852        #        $heap->{stream}->set_output_filter(
14853        #            poe::filter::reference->new('myotherfreezer') ),
14854        #          ;
14855        #
14856        my $i_first_comma = $comma_index[$dd]->[0];
14857        if ( $old_breakpoint_to_go[$i_first_comma] ) {
14858            my $level_comma = $levels_to_go[$i_first_comma];
14859            my $ibreak      = -1;
14860            my $obp_count   = 0;
14861            for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
14862                if ( $old_breakpoint_to_go[$ii] ) {
14863                    $obp_count++;
14864                    last if ( $obp_count > 1 );
14865                    $ibreak = $ii
14866                      if ( $levels_to_go[$ii] == $level_comma );
14867                }
14868            }
14869
14870            # Changed rule from multiple old commas to just one here:
14871            if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
14872            {
14873                # Do not to break before an opening token because
14874                # it can lead to "blinkers".
14875                my $ibreakm = $ibreak;
14876                $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
14877                if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
14878                {
14879                    set_forced_breakpoint($ibreak);
14880                }
14881            }
14882        }
14883    }
14884
14885    my %is_logical_container;
14886
14887    BEGIN {
14888        @_ = qw# if elsif unless while and or err not && | || ? : ! #;
14889        @is_logical_container{@_} = (1) x scalar(@_);
14890    }
14891
14892    sub set_for_semicolon_breakpoints {
14893        my $dd = shift;
14894        foreach ( @{ $rfor_semicolon_list[$dd] } ) {
14895            set_forced_breakpoint($_);
14896        }
14897    }
14898
14899    sub set_logical_breakpoints {
14900        my $dd = shift;
14901        if (
14902               $item_count_stack[$dd] == 0
14903            && $is_logical_container{ $container_type[$dd] }
14904
14905            || $has_old_logical_breakpoints[$dd]
14906          )
14907        {
14908
14909            # Look for breaks in this order:
14910            # 0   1    2   3
14911            # or  and  ||  &&
14912            foreach my $i ( 0 .. 3 ) {
14913                if ( $rand_or_list[$dd][$i] ) {
14914                    foreach ( @{ $rand_or_list[$dd][$i] } ) {
14915                        set_forced_breakpoint($_);
14916                    }
14917
14918                    # break at any 'if' and 'unless' too
14919                    foreach ( @{ $rand_or_list[$dd][4] } ) {
14920                        set_forced_breakpoint($_);
14921                    }
14922                    $rand_or_list[$dd] = [];
14923                    last;
14924                }
14925            }
14926        }
14927    }
14928
14929    sub is_unbreakable_container {
14930
14931        # never break a container of one of these types
14932        # because bad things can happen (map1.t)
14933        my $dd = shift;
14934        $is_sort_map_grep{ $container_type[$dd] };
14935    }
14936
14937    sub scan_list {
14938
14939        # This routine is responsible for setting line breaks for all lists,
14940        # so that hierarchical structure can be displayed and so that list
14941        # items can be vertically aligned.  The output of this routine is
14942        # stored in the array @forced_breakpoint_to_go, which is used to set
14943        # final breakpoints.
14944
14945        $starting_depth = $nesting_depth_to_go[0];
14946
14947        $block_type                 = ' ';
14948        $current_depth              = $starting_depth;
14949        $i                          = -1;
14950        $last_colon_sequence_number = -1;
14951        $last_nonblank_token        = ';';
14952        $last_nonblank_type         = ';';
14953        $last_nonblank_block_type   = ' ';
14954        $last_old_breakpoint_count  = 0;
14955        $minimum_depth = $current_depth + 1;    # forces update in check below
14956        $old_breakpoint_count      = 0;
14957        $starting_breakpoint_count = $forced_breakpoint_count;
14958        $token                     = ';';
14959        $type                      = ';';
14960        $type_sequence             = '';
14961
14962        my $total_depth_variation = 0;
14963        my $i_old_assignment_break;
14964        my $depth_last = $starting_depth;
14965
14966        check_for_new_minimum_depth($current_depth);
14967
14968        my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
14969        my $want_previous_breakpoint = -1;
14970
14971        my $saw_good_breakpoint;
14972        my $i_line_end   = -1;
14973        my $i_line_start = -1;
14974
14975        # loop over all tokens in this batch
14976        while ( ++$i <= $max_index_to_go ) {
14977            if ( $type ne 'b' ) {
14978                $i_last_nonblank_token    = $i - 1;
14979                $last_nonblank_type       = $type;
14980                $last_nonblank_token      = $token;
14981                $last_nonblank_block_type = $block_type;
14982            } ## end if ( $type ne 'b' )
14983            $type          = $types_to_go[$i];
14984            $block_type    = $block_type_to_go[$i];
14985            $token         = $tokens_to_go[$i];
14986            $type_sequence = $type_sequence_to_go[$i];
14987            my $next_type       = $types_to_go[ $i + 1 ];
14988            my $next_token      = $tokens_to_go[ $i + 1 ];
14989            my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
14990            $next_nonblank_type       = $types_to_go[$i_next_nonblank];
14991            $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
14992            $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
14993
14994            # set break if flag was set
14995            if ( $want_previous_breakpoint >= 0 ) {
14996                set_forced_breakpoint($want_previous_breakpoint);
14997                $want_previous_breakpoint = -1;
14998            }
14999
15000            $last_old_breakpoint_count = $old_breakpoint_count;
15001            if ( $old_breakpoint_to_go[$i] ) {
15002                $i_line_end   = $i;
15003                $i_line_start = $i_next_nonblank;
15004
15005                $old_breakpoint_count++;
15006
15007                # Break before certain keywords if user broke there and
15008                # this is a 'safe' break point. The idea is to retain
15009                # any preferred breaks for sequential list operations,
15010                # like a schwartzian transform.
15011                if ($rOpts_break_at_old_keyword_breakpoints) {
15012                    if (
15013                           $next_nonblank_type eq 'k'
15014                        && $is_keyword_returning_list{$next_nonblank_token}
15015                        && (   $type =~ /^[=\)\]\}Riw]$/
15016                            || $type eq 'k'
15017                            && $is_keyword_returning_list{$token} )
15018                      )
15019                    {
15020
15021                        # we actually have to set this break next time through
15022                        # the loop because if we are at a closing token (such
15023                        # as '}') which forms a one-line block, this break might
15024                        # get undone.
15025                        $want_previous_breakpoint = $i;
15026                    } ## end if ( $next_nonblank_type...)
15027                } ## end if ($rOpts_break_at_old_keyword_breakpoints)
15028
15029                # Break before attributes if user broke there
15030                if ($rOpts_break_at_old_attribute_breakpoints) {
15031                    if ( $next_nonblank_type eq 'A' ) {
15032                        $want_previous_breakpoint = $i;
15033                    }
15034                }
15035
15036                # remember an = break as possible good break point
15037                if ( $is_assignment{$type} ) {
15038                    $i_old_assignment_break = $i;
15039                }
15040                elsif ( $is_assignment{$next_nonblank_type} ) {
15041                    $i_old_assignment_break = $i_next_nonblank;
15042                }
15043            } ## end if ( $old_breakpoint_to_go...)
15044            next if ( $type eq 'b' );
15045            $depth = $nesting_depth_to_go[ $i + 1 ];
15046
15047            $total_depth_variation += abs( $depth - $depth_last );
15048            $depth_last = $depth;
15049
15050            # safety check - be sure we always break after a comment
15051            # Shouldn't happen .. an error here probably means that the
15052            # nobreak flag did not get turned off correctly during
15053            # formatting.
15054            if ( $type eq '#' ) {
15055                if ( $i != $max_index_to_go ) {
15056                    warning(
15057"Non-fatal program bug: backup logic needed to break after a comment\n"
15058                    );
15059                    report_definite_bug();
15060                    $nobreak_to_go[$i] = 0;
15061                    set_forced_breakpoint($i);
15062                } ## end if ( $i != $max_index_to_go)
15063            } ## end if ( $type eq '#' )
15064
15065            # Force breakpoints at certain tokens in long lines.
15066            # Note that such breakpoints will be undone later if these tokens
15067            # are fully contained within parens on a line.
15068            if (
15069
15070                # break before a keyword within a line
15071                $type eq 'k'
15072                && $i > 0
15073
15074                # if one of these keywords:
15075                && $token =~ /^(if|unless|while|until|for)$/
15076
15077                # but do not break at something like '1 while'
15078                && ( $last_nonblank_type ne 'n' || $i > 2 )
15079
15080                # and let keywords follow a closing 'do' brace
15081                && $last_nonblank_block_type ne 'do'
15082
15083                && (
15084                    $is_long_line
15085
15086                    # or container is broken (by side-comment, etc)
15087                    || (   $next_nonblank_token eq '('
15088                        && $mate_index_to_go[$i_next_nonblank] < $i )
15089                )
15090              )
15091            {
15092                set_forced_breakpoint( $i - 1 );
15093            } ## end if ( $type eq 'k' && $i...)
15094
15095            # remember locations of '||'  and '&&' for possible breaks if we
15096            # decide this is a long logical expression.
15097            if ( $type eq '||' ) {
15098                push @{ $rand_or_list[$depth][2] }, $i;
15099                ++$has_old_logical_breakpoints[$depth]
15100                  if ( ( $i == $i_line_start || $i == $i_line_end )
15101                    && $rOpts_break_at_old_logical_breakpoints );
15102            } ## end if ( $type eq '||' )
15103            elsif ( $type eq '&&' ) {
15104                push @{ $rand_or_list[$depth][3] }, $i;
15105                ++$has_old_logical_breakpoints[$depth]
15106                  if ( ( $i == $i_line_start || $i == $i_line_end )
15107                    && $rOpts_break_at_old_logical_breakpoints );
15108            } ## end elsif ( $type eq '&&' )
15109            elsif ( $type eq 'f' ) {
15110                push @{ $rfor_semicolon_list[$depth] }, $i;
15111            }
15112            elsif ( $type eq 'k' ) {
15113                if ( $token eq 'and' ) {
15114                    push @{ $rand_or_list[$depth][1] }, $i;
15115                    ++$has_old_logical_breakpoints[$depth]
15116                      if ( ( $i == $i_line_start || $i == $i_line_end )
15117                        && $rOpts_break_at_old_logical_breakpoints );
15118                } ## end if ( $token eq 'and' )
15119
15120                # break immediately at 'or's which are probably not in a logical
15121                # block -- but we will break in logical breaks below so that
15122                # they do not add to the forced_breakpoint_count
15123                elsif ( $token eq 'or' ) {
15124                    push @{ $rand_or_list[$depth][0] }, $i;
15125                    ++$has_old_logical_breakpoints[$depth]
15126                      if ( ( $i == $i_line_start || $i == $i_line_end )
15127                        && $rOpts_break_at_old_logical_breakpoints );
15128                    if ( $is_logical_container{ $container_type[$depth] } ) {
15129                    }
15130                    else {
15131                        if ($is_long_line) { set_forced_breakpoint($i) }
15132                        elsif ( ( $i == $i_line_start || $i == $i_line_end )
15133                            && $rOpts_break_at_old_logical_breakpoints )
15134                        {
15135                            $saw_good_breakpoint = 1;
15136                        }
15137                    } ## end else [ if ( $is_logical_container...)]
15138                } ## end elsif ( $token eq 'or' )
15139                elsif ( $token eq 'if' || $token eq 'unless' ) {
15140                    push @{ $rand_or_list[$depth][4] }, $i;
15141                    if ( ( $i == $i_line_start || $i == $i_line_end )
15142                        && $rOpts_break_at_old_logical_breakpoints )
15143                    {
15144                        set_forced_breakpoint($i);
15145                    }
15146                } ## end elsif ( $token eq 'if' ||...)
15147            } ## end elsif ( $type eq 'k' )
15148            elsif ( $is_assignment{$type} ) {
15149                $i_equals[$depth] = $i;
15150            }
15151
15152            if ($type_sequence) {
15153
15154                # handle any postponed closing breakpoints
15155                if ( $token =~ /^[\)\]\}\:]$/ ) {
15156                    if ( $type eq ':' ) {
15157                        $last_colon_sequence_number = $type_sequence;
15158
15159                        # retain break at a ':' line break
15160                        if ( ( $i == $i_line_start || $i == $i_line_end )
15161                            && $rOpts_break_at_old_ternary_breakpoints )
15162                        {
15163
15164                            set_forced_breakpoint($i);
15165
15166                            # break at previous '='
15167                            if ( $i_equals[$depth] > 0 ) {
15168                                set_forced_breakpoint( $i_equals[$depth] );
15169                                $i_equals[$depth] = -1;
15170                            }
15171                        } ## end if ( ( $i == $i_line_start...))
15172                    } ## end if ( $type eq ':' )
15173                    if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
15174                        my $inc = ( $type eq ':' ) ? 0 : 1;
15175                        set_forced_breakpoint( $i - $inc );
15176                        delete $postponed_breakpoint{$type_sequence};
15177                    }
15178                } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
15179
15180                # set breaks at ?/: if they will get separated (and are
15181                # not a ?/: chain), or if the '?' is at the end of the
15182                # line
15183                elsif ( $token eq '?' ) {
15184                    my $i_colon = $mate_index_to_go[$i];
15185                    if (
15186                        $i_colon <= 0  # the ':' is not in this batch
15187                        || $i == 0     # this '?' is the first token of the line
15188                        || $i ==
15189                        $max_index_to_go    # or this '?' is the last token
15190                      )
15191                    {
15192
15193                        # don't break at a '?' if preceded by ':' on
15194                        # this line of previous ?/: pair on this line.
15195                        # This is an attempt to preserve a chain of ?/:
15196                        # expressions (elsif2.t).  And don't break if
15197                        # this has a side comment.
15198                        set_forced_breakpoint($i)
15199                          unless (
15200                            $type_sequence == (
15201                                $last_colon_sequence_number +
15202                                  TYPE_SEQUENCE_INCREMENT
15203                            )
15204                            || $tokens_to_go[$max_index_to_go] eq '#'
15205                          );
15206                        set_closing_breakpoint($i);
15207                    } ## end if ( $i_colon <= 0  ||...)
15208                } ## end elsif ( $token eq '?' )
15209            } ## end if ($type_sequence)
15210
15211#print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
15212
15213            #------------------------------------------------------------
15214            # Handle Increasing Depth..
15215            #
15216            # prepare for a new list when depth increases
15217            # token $i is a '(','{', or '['
15218            #------------------------------------------------------------
15219            if ( $depth > $current_depth ) {
15220
15221                $breakpoint_stack[$depth]       = $forced_breakpoint_count;
15222                $breakpoint_undo_stack[$depth]  = $forced_breakpoint_undo_count;
15223                $has_broken_sublist[$depth]     = 0;
15224                $identifier_count_stack[$depth] = 0;
15225                $index_before_arrow[$depth]     = -1;
15226                $interrupted_list[$depth]       = 0;
15227                $item_count_stack[$depth]       = 0;
15228                $last_comma_index[$depth]       = undef;
15229                $last_dot_index[$depth]         = undef;
15230                $last_nonblank_type[$depth]     = $last_nonblank_type;
15231                $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
15232                $opening_structure_index_stack[$depth] = $i;
15233                $rand_or_list[$depth]                  = [];
15234                $rfor_semicolon_list[$depth]           = [];
15235                $i_equals[$depth]                      = -1;
15236                $want_comma_break[$depth]              = 0;
15237                $container_type[$depth] =
15238                  ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
15239                  ? $last_nonblank_token
15240                  : "";
15241                $has_old_logical_breakpoints[$depth] = 0;
15242
15243                # if line ends here then signal closing token to break
15244                if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
15245                {
15246                    set_closing_breakpoint($i);
15247                }
15248
15249                # Not all lists of values should be vertically aligned..
15250                $dont_align[$depth] =
15251
15252                  # code BLOCKS are handled at a higher level
15253                  ( $block_type ne "" )
15254
15255                  # certain paren lists
15256                  || ( $type eq '(' ) && (
15257
15258                    # it does not usually look good to align a list of
15259                    # identifiers in a parameter list, as in:
15260                    #    my($var1, $var2, ...)
15261                    # (This test should probably be refined, for now I'm just
15262                    # testing for any keyword)
15263                    ( $last_nonblank_type eq 'k' )
15264
15265                    # a trailing '(' usually indicates a non-list
15266                    || ( $next_nonblank_type eq '(' )
15267                  );
15268
15269                # patch to outdent opening brace of long if/for/..
15270                # statements (like this one).  See similar coding in
15271                # set_continuation breaks.  We have also catch it here for
15272                # short line fragments which otherwise will not go through
15273                # set_continuation_breaks.
15274                if (
15275                    $block_type
15276
15277                    # if we have the ')' but not its '(' in this batch..
15278                    && ( $last_nonblank_token eq ')' )
15279                    && $mate_index_to_go[$i_last_nonblank_token] < 0
15280
15281                    # and user wants brace to left
15282                    && !$rOpts->{'opening-brace-always-on-right'}
15283
15284                    && ( $type eq '{' )     # should be true
15285                    && ( $token eq '{' )    # should be true
15286                  )
15287                {
15288                    set_forced_breakpoint( $i - 1 );
15289                } ## end if ( $block_type && ( ...))
15290            } ## end if ( $depth > $current_depth)
15291
15292            #------------------------------------------------------------
15293            # Handle Decreasing Depth..
15294            #
15295            # finish off any old list when depth decreases
15296            # token $i is a ')','}', or ']'
15297            #------------------------------------------------------------
15298            elsif ( $depth < $current_depth ) {
15299
15300                check_for_new_minimum_depth($depth);
15301
15302                # force all outer logical containers to break after we see on
15303                # old breakpoint
15304                $has_old_logical_breakpoints[$depth] ||=
15305                  $has_old_logical_breakpoints[$current_depth];
15306
15307                # Patch to break between ') {' if the paren list is broken.
15308                # There is similar logic in set_continuation_breaks for
15309                # non-broken lists.
15310                if (   $token eq ')'
15311                    && $next_nonblank_block_type
15312                    && $interrupted_list[$current_depth]
15313                    && $next_nonblank_type eq '{'
15314                    && !$rOpts->{'opening-brace-always-on-right'} )
15315                {
15316                    set_forced_breakpoint($i);
15317                } ## end if ( $token eq ')' && ...
15318
15319#print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
15320
15321                # set breaks at commas if necessary
15322                my ( $bp_count, $do_not_break_apart ) =
15323                  set_comma_breakpoints($current_depth);
15324
15325                my $i_opening = $opening_structure_index_stack[$current_depth];
15326                my $saw_opening_structure = ( $i_opening >= 0 );
15327
15328                # this term is long if we had to break at interior commas..
15329                my $is_long_term = $bp_count > 0;
15330
15331                # If this is a short container with one or more comma arrows,
15332                # then we will mark it as a long term to open it if requested.
15333                # $rOpts_comma_arrow_breakpoints =
15334                #    0 - open only if comma precedes closing brace
15335                #    1 - stable: except for one line blocks
15336                #    2 - try to form 1 line blocks
15337                #    3 - ignore =>
15338                #    4 - always open up if vt=0
15339                #    5 - stable: even for one line blocks if vt=0
15340                if (  !$is_long_term
15341                    && $tokens_to_go[$i_opening] =~ /^[\(\{\]L]$/
15342                    && $index_before_arrow[ $depth + 1 ] > 0
15343                    && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
15344                  )
15345                {
15346                    $is_long_term = $rOpts_comma_arrow_breakpoints == 4
15347                      || ( $rOpts_comma_arrow_breakpoints == 0
15348                        && $last_nonblank_token eq ',' )
15349                      || ( $rOpts_comma_arrow_breakpoints == 5
15350                        && $old_breakpoint_to_go[$i_opening] );
15351                } ## end if ( !$is_long_term &&...)
15352
15353                # mark term as long if the length between opening and closing
15354                # parens exceeds allowed line length
15355                if ( !$is_long_term && $saw_opening_structure ) {
15356                    my $i_opening_minus = find_token_starting_list($i_opening);
15357
15358                    # Note: we have to allow for one extra space after a
15359                    # closing token so that we do not strand a comma or
15360                    # semicolon, hence the '>=' here (oneline.t)
15361                    $is_long_term =
15362                      excess_line_length( $i_opening_minus, $i ) >= 0;
15363                } ## end if ( !$is_long_term &&...)
15364
15365                # We've set breaks after all comma-arrows.  Now we have to
15366                # undo them if this can be a one-line block
15367                # (the only breakpoints set will be due to comma-arrows)
15368                if (
15369
15370                    # user doesn't require breaking after all comma-arrows
15371                    ( $rOpts_comma_arrow_breakpoints != 0 )
15372                    && ( $rOpts_comma_arrow_breakpoints != 4 )
15373
15374                    # and if the opening structure is in this batch
15375                    && $saw_opening_structure
15376
15377                    # and either on the same old line
15378                    && (
15379                        $old_breakpoint_count_stack[$current_depth] ==
15380                        $last_old_breakpoint_count
15381
15382                        # or user wants to form long blocks with arrows
15383                        || $rOpts_comma_arrow_breakpoints == 2
15384                    )
15385
15386                  # and we made some breakpoints between the opening and closing
15387                    && ( $breakpoint_undo_stack[$current_depth] <
15388                        $forced_breakpoint_undo_count )
15389
15390                    # and this block is short enough to fit on one line
15391                    # Note: use < because need 1 more space for possible comma
15392                    && !$is_long_term
15393
15394                  )
15395                {
15396                    undo_forced_breakpoint_stack(
15397                        $breakpoint_undo_stack[$current_depth] );
15398                } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
15399
15400                # now see if we have any comma breakpoints left
15401                my $has_comma_breakpoints =
15402                  ( $breakpoint_stack[$current_depth] !=
15403                      $forced_breakpoint_count );
15404
15405                # update broken-sublist flag of the outer container
15406                $has_broken_sublist[$depth] =
15407                     $has_broken_sublist[$depth]
15408                  || $has_broken_sublist[$current_depth]
15409                  || $is_long_term
15410                  || $has_comma_breakpoints;
15411
15412# Having come to the closing ')', '}', or ']', now we have to decide if we
15413# should 'open up' the structure by placing breaks at the opening and
15414# closing containers.  This is a tricky decision.  Here are some of the
15415# basic considerations:
15416#
15417# -If this is a BLOCK container, then any breakpoints will have already
15418# been set (and according to user preferences), so we need do nothing here.
15419#
15420# -If we have a comma-separated list for which we can align the list items,
15421# then we need to do so because otherwise the vertical aligner cannot
15422# currently do the alignment.
15423#
15424# -If this container does itself contain a container which has been broken
15425# open, then it should be broken open to properly show the structure.
15426#
15427# -If there is nothing to align, and no other reason to break apart,
15428# then do not do it.
15429#
15430# We will not break open the parens of a long but 'simple' logical expression.
15431# For example:
15432#
15433# This is an example of a simple logical expression and its formatting:
15434#
15435#     if ( $bigwasteofspace1 && $bigwasteofspace2
15436#         || $bigwasteofspace3 && $bigwasteofspace4 )
15437#
15438# Most people would prefer this than the 'spacey' version:
15439#
15440#     if (
15441#         $bigwasteofspace1 && $bigwasteofspace2
15442#         || $bigwasteofspace3 && $bigwasteofspace4
15443#     )
15444#
15445# To illustrate the rules for breaking logical expressions, consider:
15446#
15447#             FULLY DENSE:
15448#             if ( $opt_excl
15449#                 and ( exists $ids_excl_uc{$id_uc}
15450#                     or grep $id_uc =~ /$_/, @ids_excl_uc ))
15451#
15452# This is on the verge of being difficult to read.  The current default is to
15453# open it up like this:
15454#
15455#             DEFAULT:
15456#             if (
15457#                 $opt_excl
15458#                 and ( exists $ids_excl_uc{$id_uc}
15459#                     or grep $id_uc =~ /$_/, @ids_excl_uc )
15460#               )
15461#
15462# This is a compromise which tries to avoid being too dense and to spacey.
15463# A more spaced version would be:
15464#
15465#             SPACEY:
15466#             if (
15467#                 $opt_excl
15468#                 and (
15469#                     exists $ids_excl_uc{$id_uc}
15470#                     or grep $id_uc =~ /$_/, @ids_excl_uc
15471#                 )
15472#               )
15473#
15474# Some people might prefer the spacey version -- an option could be added.  The
15475# innermost expression contains a long block '( exists $ids_...  ')'.
15476#
15477# Here is how the logic goes: We will force a break at the 'or' that the
15478# innermost expression contains, but we will not break apart its opening and
15479# closing containers because (1) it contains no multi-line sub-containers itself,
15480# and (2) there is no alignment to be gained by breaking it open like this
15481#
15482#             and (
15483#                 exists $ids_excl_uc{$id_uc}
15484#                 or grep $id_uc =~ /$_/, @ids_excl_uc
15485#             )
15486#
15487# (although this looks perfectly ok and might be good for long expressions).  The
15488# outer 'if' container, though, contains a broken sub-container, so it will be
15489# broken open to avoid too much density.  Also, since it contains no 'or's, there
15490# will be a forced break at its 'and'.
15491
15492                # set some flags telling something about this container..
15493                my $is_simple_logical_expression = 0;
15494                if (   $item_count_stack[$current_depth] == 0
15495                    && $saw_opening_structure
15496                    && $tokens_to_go[$i_opening] eq '('
15497                    && $is_logical_container{ $container_type[$current_depth] }
15498                  )
15499                {
15500
15501                    # This seems to be a simple logical expression with
15502                    # no existing breakpoints.  Set a flag to prevent
15503                    # opening it up.
15504                    if ( !$has_comma_breakpoints ) {
15505                        $is_simple_logical_expression = 1;
15506                    }
15507
15508                    # This seems to be a simple logical expression with
15509                    # breakpoints (broken sublists, for example).  Break
15510                    # at all 'or's and '||'s.
15511                    else {
15512                        set_logical_breakpoints($current_depth);
15513                    }
15514                } ## end if ( $item_count_stack...)
15515
15516                if ( $is_long_term
15517                    && @{ $rfor_semicolon_list[$current_depth] } )
15518                {
15519                    set_for_semicolon_breakpoints($current_depth);
15520
15521                    # open up a long 'for' or 'foreach' container to allow
15522                    # leading term alignment unless -lp is used.
15523                    $has_comma_breakpoints = 1
15524                      unless $rOpts_line_up_parentheses;
15525                } ## end if ( $is_long_term && ...)
15526
15527                if (
15528
15529                    # breaks for code BLOCKS are handled at a higher level
15530                    !$block_type
15531
15532                    # we do not need to break at the top level of an 'if'
15533                    # type expression
15534                    && !$is_simple_logical_expression
15535
15536                    ## modification to keep ': (' containers vertically tight;
15537                    ## but probably better to let user set -vt=1 to avoid
15538                    ## inconsistency with other paren types
15539                    ## && ($container_type[$current_depth] ne ':')
15540
15541                    # otherwise, we require one of these reasons for breaking:
15542                    && (
15543
15544                        # - this term has forced line breaks
15545                        $has_comma_breakpoints
15546
15547                       # - the opening container is separated from this batch
15548                       #   for some reason (comment, blank line, code block)
15549                       # - this is a non-paren container spanning multiple lines
15550                        || !$saw_opening_structure
15551
15552                        # - this is a long block contained in another breakable
15553                        #   container
15554                        || (   $is_long_term
15555                            && $container_environment_to_go[$i_opening] ne
15556                            'BLOCK' )
15557                    )
15558                  )
15559                {
15560
15561                    # For -lp option, we must put a breakpoint before
15562                    # the token which has been identified as starting
15563                    # this indentation level.  This is necessary for
15564                    # proper alignment.
15565                    if ( $rOpts_line_up_parentheses && $saw_opening_structure )
15566                    {
15567                        my $item = $leading_spaces_to_go[ $i_opening + 1 ];
15568                        if (   $i_opening + 1 < $max_index_to_go
15569                            && $types_to_go[ $i_opening + 1 ] eq 'b' )
15570                        {
15571                            $item = $leading_spaces_to_go[ $i_opening + 2 ];
15572                        }
15573                        if ( defined($item) ) {
15574                            my $i_start_2 = $item->get_STARTING_INDEX();
15575                            if (
15576                                defined($i_start_2)
15577
15578                                # we are breaking after an opening brace, paren,
15579                                # so don't break before it too
15580                                && $i_start_2 ne $i_opening
15581                              )
15582                            {
15583
15584                                # Only break for breakpoints at the same
15585                                # indentation level as the opening paren
15586                                my $test1 = $nesting_depth_to_go[$i_opening];
15587                                my $test2 = $nesting_depth_to_go[$i_start_2];
15588                                if ( $test2 == $test1 ) {
15589                                    set_forced_breakpoint( $i_start_2 - 1 );
15590                                }
15591                            } ## end if ( defined($i_start_2...))
15592                        } ## end if ( defined($item) )
15593                    } ## end if ( $rOpts_line_up_parentheses...)
15594
15595                    # break after opening structure.
15596                    # note: break before closing structure will be automatic
15597                    if ( $minimum_depth <= $current_depth ) {
15598
15599                        set_forced_breakpoint($i_opening)
15600                          unless ( $do_not_break_apart
15601                            || is_unbreakable_container($current_depth) );
15602
15603                        # break at ',' of lower depth level before opening token
15604                        if ( $last_comma_index[$depth] ) {
15605                            set_forced_breakpoint( $last_comma_index[$depth] );
15606                        }
15607
15608                        # break at '.' of lower depth level before opening token
15609                        if ( $last_dot_index[$depth] ) {
15610                            set_forced_breakpoint( $last_dot_index[$depth] );
15611                        }
15612
15613                        # break before opening structure if preeced by another
15614                        # closing structure and a comma.  This is normally
15615                        # done by the previous closing brace, but not
15616                        # if it was a one-line block.
15617                        if ( $i_opening > 2 ) {
15618                            my $i_prev =
15619                              ( $types_to_go[ $i_opening - 1 ] eq 'b' )
15620                              ? $i_opening - 2
15621                              : $i_opening - 1;
15622
15623                            if (   $types_to_go[$i_prev] eq ','
15624                                && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
15625                            {
15626                                set_forced_breakpoint($i_prev);
15627                            }
15628
15629                            # also break before something like ':('  or '?('
15630                            # if appropriate.
15631                            elsif (
15632                                $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
15633                            {
15634                                my $token_prev = $tokens_to_go[$i_prev];
15635                                if ( $want_break_before{$token_prev} ) {
15636                                    set_forced_breakpoint($i_prev);
15637                                }
15638                            } ## end elsif ( $types_to_go[$i_prev...])
15639                        } ## end if ( $i_opening > 2 )
15640                    } ## end if ( $minimum_depth <=...)
15641
15642                    # break after comma following closing structure
15643                    if ( $next_type eq ',' ) {
15644                        set_forced_breakpoint( $i + 1 );
15645                    }
15646
15647                    # break before an '=' following closing structure
15648                    if (
15649                        $is_assignment{$next_nonblank_type}
15650                        && ( $breakpoint_stack[$current_depth] !=
15651                            $forced_breakpoint_count )
15652                      )
15653                    {
15654                        set_forced_breakpoint($i);
15655                    } ## end if ( $is_assignment{$next_nonblank_type...})
15656
15657                    # break at any comma before the opening structure Added
15658                    # for -lp, but seems to be good in general.  It isn't
15659                    # obvious how far back to look; the '5' below seems to
15660                    # work well and will catch the comma in something like
15661                    #  push @list, myfunc( $param, $param, ..
15662
15663                    my $icomma = $last_comma_index[$depth];
15664                    if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
15665                        unless ( $forced_breakpoint_to_go[$icomma] ) {
15666                            set_forced_breakpoint($icomma);
15667                        }
15668                    }
15669                }    # end logic to open up a container
15670
15671                # Break open a logical container open if it was already open
15672                elsif ($is_simple_logical_expression
15673                    && $has_old_logical_breakpoints[$current_depth] )
15674                {
15675                    set_logical_breakpoints($current_depth);
15676                }
15677
15678                # Handle long container which does not get opened up
15679                elsif ($is_long_term) {
15680
15681                    # must set fake breakpoint to alert outer containers that
15682                    # they are complex
15683                    set_fake_breakpoint();
15684                } ## end elsif ($is_long_term)
15685
15686            } ## end elsif ( $depth < $current_depth)
15687
15688            #------------------------------------------------------------
15689            # Handle this token
15690            #------------------------------------------------------------
15691
15692            $current_depth = $depth;
15693
15694            # handle comma-arrow
15695            if ( $type eq '=>' ) {
15696                next if ( $last_nonblank_type eq '=>' );
15697                next if $rOpts_break_at_old_comma_breakpoints;
15698                next if $rOpts_comma_arrow_breakpoints == 3;
15699                $want_comma_break[$depth]   = 1;
15700                $index_before_arrow[$depth] = $i_last_nonblank_token;
15701                next;
15702            } ## end if ( $type eq '=>' )
15703
15704            elsif ( $type eq '.' ) {
15705                $last_dot_index[$depth] = $i;
15706            }
15707
15708            # Turn off alignment if we are sure that this is not a list
15709            # environment.  To be safe, we will do this if we see certain
15710            # non-list tokens, such as ';', and also the environment is
15711            # not a list.  Note that '=' could be in any of the = operators
15712            # (lextest.t). We can't just use the reported environment
15713            # because it can be incorrect in some cases.
15714            elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
15715                && $container_environment_to_go[$i] ne 'LIST' )
15716            {
15717                $dont_align[$depth]         = 1;
15718                $want_comma_break[$depth]   = 0;
15719                $index_before_arrow[$depth] = -1;
15720            } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
15721
15722            # now just handle any commas
15723            next unless ( $type eq ',' );
15724
15725            $last_dot_index[$depth]   = undef;
15726            $last_comma_index[$depth] = $i;
15727
15728            # break here if this comma follows a '=>'
15729            # but not if there is a side comment after the comma
15730            if ( $want_comma_break[$depth] ) {
15731
15732                if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
15733                    if ($rOpts_comma_arrow_breakpoints) {
15734                        $want_comma_break[$depth] = 0;
15735                        ##$index_before_arrow[$depth] = -1;
15736                        next;
15737                    }
15738                }
15739
15740                set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
15741
15742                # break before the previous token if it looks safe
15743                # Example of something that we will not try to break before:
15744                #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
15745                # Also we don't want to break at a binary operator (like +):
15746                # $c->createOval(
15747                #    $x + $R, $y +
15748                #    $R => $x - $R,
15749                #    $y - $R, -fill   => 'black',
15750                # );
15751                my $ibreak = $index_before_arrow[$depth] - 1;
15752                if (   $ibreak > 0
15753                    && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
15754                {
15755                    if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
15756                    if ( $types_to_go[$ibreak] eq 'b' )  { $ibreak-- }
15757                    if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
15758
15759                        # don't break pointer calls, such as the following:
15760                        #  File::Spec->curdir  => 1,
15761                        # (This is tokenized as adjacent 'w' tokens)
15762                        if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
15763                            set_forced_breakpoint($ibreak);
15764                        }
15765                    } ## end if ( $types_to_go[$ibreak...])
15766                } ## end if ( $ibreak > 0 && $tokens_to_go...)
15767
15768                $want_comma_break[$depth]   = 0;
15769                $index_before_arrow[$depth] = -1;
15770
15771                # handle list which mixes '=>'s and ','s:
15772                # treat any list items so far as an interrupted list
15773                $interrupted_list[$depth] = 1;
15774                next;
15775            } ## end if ( $want_comma_break...)
15776
15777            # break after all commas above starting depth
15778            if ( $depth < $starting_depth && !$dont_align[$depth] ) {
15779                set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
15780                next;
15781            }
15782
15783            # add this comma to the list..
15784            my $item_count = $item_count_stack[$depth];
15785            if ( $item_count == 0 ) {
15786
15787                # but do not form a list with no opening structure
15788                # for example:
15789
15790                #            open INFILE_COPY, ">$input_file_copy"
15791                #              or die ("very long message");
15792
15793                if ( ( $opening_structure_index_stack[$depth] < 0 )
15794                    && $container_environment_to_go[$i] eq 'BLOCK' )
15795                {
15796                    $dont_align[$depth] = 1;
15797                }
15798            } ## end if ( $item_count == 0 )
15799
15800            $comma_index[$depth][$item_count] = $i;
15801            ++$item_count_stack[$depth];
15802            if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
15803                $identifier_count_stack[$depth]++;
15804            }
15805        } ## end while ( ++$i <= $max_index_to_go)
15806
15807        #-------------------------------------------
15808        # end of loop over all tokens in this batch
15809        #-------------------------------------------
15810
15811        # set breaks for any unfinished lists ..
15812        for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
15813
15814            $interrupted_list[$dd] = 1;
15815            $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
15816            set_comma_breakpoints($dd);
15817            set_logical_breakpoints($dd)
15818              if ( $has_old_logical_breakpoints[$dd] );
15819            set_for_semicolon_breakpoints($dd);
15820
15821            # break open container...
15822            my $i_opening = $opening_structure_index_stack[$dd];
15823            set_forced_breakpoint($i_opening)
15824              unless (
15825                is_unbreakable_container($dd)
15826
15827                # Avoid a break which would place an isolated ' or "
15828                # on a line
15829                || (   $type eq 'Q'
15830                    && $i_opening >= $max_index_to_go - 2
15831                    && $token =~ /^['"]$/ )
15832              );
15833        } ## end for ( my $dd = $current_depth...)
15834
15835        # Return a flag indicating if the input file had some good breakpoints.
15836        # This flag will be used to force a break in a line shorter than the
15837        # allowed line length.
15838        if ( $has_old_logical_breakpoints[$current_depth] ) {
15839            $saw_good_breakpoint = 1;
15840        }
15841
15842        # A complex line with one break at an = has a good breakpoint.
15843        # This is not complex ($total_depth_variation=0):
15844        # $res1
15845        #   = 10;
15846        #
15847        # This is complex ($total_depth_variation=6):
15848        # $res2 =
15849        #  (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
15850        elsif ($i_old_assignment_break
15851            && $total_depth_variation > 4
15852            && $old_breakpoint_count == 1 )
15853        {
15854            $saw_good_breakpoint = 1;
15855        } ## end elsif ( $i_old_assignment_break...)
15856
15857        return $saw_good_breakpoint;
15858    } ## end sub scan_list
15859}    # end scan_list
15860
15861sub find_token_starting_list {
15862
15863    # When testing to see if a block will fit on one line, some
15864    # previous token(s) may also need to be on the line; particularly
15865    # if this is a sub call.  So we will look back at least one
15866    # token. NOTE: This isn't perfect, but not critical, because
15867    # if we mis-identify a block, it will be wrapped and therefore
15868    # fixed the next time it is formatted.
15869    my $i_opening_paren = shift;
15870    my $i_opening_minus = $i_opening_paren;
15871    my $im1             = $i_opening_paren - 1;
15872    my $im2             = $i_opening_paren - 2;
15873    my $im3             = $i_opening_paren - 3;
15874    my $typem1          = $types_to_go[$im1];
15875    my $typem2          = $im2 >= 0 ? $types_to_go[$im2] : 'b';
15876    if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
15877        $i_opening_minus = $i_opening_paren;
15878    }
15879    elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
15880        $i_opening_minus = $im1 if $im1 >= 0;
15881
15882        # walk back to improve length estimate
15883        for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
15884            last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
15885            $i_opening_minus = $j;
15886        }
15887        if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
15888    }
15889    elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
15890    elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
15891        $i_opening_minus = $im2;
15892    }
15893    return $i_opening_minus;
15894}
15895
15896{    # begin set_comma_breakpoints_do
15897
15898    my %is_keyword_with_special_leading_term;
15899
15900    BEGIN {
15901
15902        # These keywords have prototypes which allow a special leading item
15903        # followed by a list
15904        @_ =
15905          qw(formline grep kill map printf sprintf push chmod join pack unshift);
15906        @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
15907    }
15908
15909    sub set_comma_breakpoints_do {
15910
15911        # Given a list with some commas, set breakpoints at some of the
15912        # commas, if necessary, to make it easy to read.  This list is
15913        # an example:
15914        my (
15915            $depth,               $i_opening_paren,  $i_closing_paren,
15916            $item_count,          $identifier_count, $rcomma_index,
15917            $next_nonblank_type,  $list_type,        $interrupted,
15918            $rdo_not_break_apart, $must_break_open,
15919        ) = @_;
15920
15921        # nothing to do if no commas seen
15922        return if ( $item_count < 1 );
15923        my $i_first_comma     = $$rcomma_index[0];
15924        my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
15925        my $i_last_comma      = $i_true_last_comma;
15926        if ( $i_last_comma >= $max_index_to_go ) {
15927            $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
15928            return if ( $item_count < 1 );
15929        }
15930
15931        #---------------------------------------------------------------
15932        # find lengths of all items in the list to calculate page layout
15933        #---------------------------------------------------------------
15934        my $comma_count = $item_count;
15935        my @item_lengths;
15936        my @i_term_begin;
15937        my @i_term_end;
15938        my @i_term_comma;
15939        my $i_prev_plus;
15940        my @max_length = ( 0, 0 );
15941        my $first_term_length;
15942        my $i      = $i_opening_paren;
15943        my $is_odd = 1;
15944
15945        for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
15946            $is_odd      = 1 - $is_odd;
15947            $i_prev_plus = $i + 1;
15948            $i           = $$rcomma_index[$j];
15949
15950            my $i_term_end =
15951              ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
15952            my $i_term_begin =
15953              ( $types_to_go[$i_prev_plus] eq 'b' )
15954              ? $i_prev_plus + 1
15955              : $i_prev_plus;
15956            push @i_term_begin, $i_term_begin;
15957            push @i_term_end,   $i_term_end;
15958            push @i_term_comma, $i;
15959
15960            # note: currently adding 2 to all lengths (for comma and space)
15961            my $length =
15962              2 + token_sequence_length( $i_term_begin, $i_term_end );
15963            push @item_lengths, $length;
15964
15965            if ( $j == 0 ) {
15966                $first_term_length = $length;
15967            }
15968            else {
15969
15970                if ( $length > $max_length[$is_odd] ) {
15971                    $max_length[$is_odd] = $length;
15972                }
15973            }
15974        }
15975
15976        # now we have to make a distinction between the comma count and item
15977        # count, because the item count will be one greater than the comma
15978        # count if the last item is not terminated with a comma
15979        my $i_b =
15980          ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
15981          ? $i_last_comma + 1
15982          : $i_last_comma;
15983        my $i_e =
15984          ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
15985          ? $i_closing_paren - 2
15986          : $i_closing_paren - 1;
15987        my $i_effective_last_comma = $i_last_comma;
15988
15989        my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
15990
15991        if ( $last_item_length > 0 ) {
15992
15993            # add 2 to length because other lengths include a comma and a blank
15994            $last_item_length += 2;
15995            push @item_lengths, $last_item_length;
15996            push @i_term_begin, $i_b + 1;
15997            push @i_term_end,   $i_e;
15998            push @i_term_comma, undef;
15999
16000            my $i_odd = $item_count % 2;
16001
16002            if ( $last_item_length > $max_length[$i_odd] ) {
16003                $max_length[$i_odd] = $last_item_length;
16004            }
16005
16006            $item_count++;
16007            $i_effective_last_comma = $i_e + 1;
16008
16009            if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
16010                $identifier_count++;
16011            }
16012        }
16013
16014        #---------------------------------------------------------------
16015        # End of length calculations
16016        #---------------------------------------------------------------
16017
16018        #---------------------------------------------------------------
16019        # Compound List Rule 1:
16020        # Break at (almost) every comma for a list containing a broken
16021        # sublist.  This has higher priority than the Interrupted List
16022        # Rule.
16023        #---------------------------------------------------------------
16024        if ( $has_broken_sublist[$depth] ) {
16025
16026            # Break at every comma except for a comma between two
16027            # simple, small terms.  This prevents long vertical
16028            # columns of, say, just 0's.
16029            my $small_length = 10;    # 2 + actual maximum length wanted
16030
16031            # We'll insert a break in long runs of small terms to
16032            # allow alignment in uniform tables.
16033            my $skipped_count = 0;
16034            my $columns       = table_columns_available($i_first_comma);
16035            my $fields        = int( $columns / $small_length );
16036            if (   $rOpts_maximum_fields_per_table
16037                && $fields > $rOpts_maximum_fields_per_table )
16038            {
16039                $fields = $rOpts_maximum_fields_per_table;
16040            }
16041            my $max_skipped_count = $fields - 1;
16042
16043            my $is_simple_last_term = 0;
16044            my $is_simple_next_term = 0;
16045            foreach my $j ( 0 .. $item_count ) {
16046                $is_simple_last_term = $is_simple_next_term;
16047                $is_simple_next_term = 0;
16048                if (   $j < $item_count
16049                    && $i_term_end[$j] == $i_term_begin[$j]
16050                    && $item_lengths[$j] <= $small_length )
16051                {
16052                    $is_simple_next_term = 1;
16053                }
16054                next if $j == 0;
16055                if (   $is_simple_last_term
16056                    && $is_simple_next_term
16057                    && $skipped_count < $max_skipped_count )
16058                {
16059                    $skipped_count++;
16060                }
16061                else {
16062                    $skipped_count = 0;
16063                    my $i = $i_term_comma[ $j - 1 ];
16064                    last unless defined $i;
16065                    set_forced_breakpoint($i);
16066                }
16067            }
16068
16069            # always break at the last comma if this list is
16070            # interrupted; we wouldn't want to leave a terminal '{', for
16071            # example.
16072            if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
16073            return;
16074        }
16075
16076#my ( $a, $b, $c ) = caller();
16077#print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
16078#i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
16079#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
16080
16081        #---------------------------------------------------------------
16082        # Interrupted List Rule:
16083        # A list is is forced to use old breakpoints if it was interrupted
16084        # by side comments or blank lines, or requested by user.
16085        #---------------------------------------------------------------
16086        if (   $rOpts_break_at_old_comma_breakpoints
16087            || $interrupted
16088            || $i_opening_paren < 0 )
16089        {
16090            copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
16091            return;
16092        }
16093
16094        #---------------------------------------------------------------
16095        # Looks like a list of items.  We have to look at it and size it up.
16096        #---------------------------------------------------------------
16097
16098        my $opening_token = $tokens_to_go[$i_opening_paren];
16099        my $opening_environment =
16100          $container_environment_to_go[$i_opening_paren];
16101
16102        #-------------------------------------------------------------------
16103        # Return if this will fit on one line
16104        #-------------------------------------------------------------------
16105
16106        my $i_opening_minus = find_token_starting_list($i_opening_paren);
16107        return
16108          unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
16109
16110        #-------------------------------------------------------------------
16111        # Now we know that this block spans multiple lines; we have to set
16112        # at least one breakpoint -- real or fake -- as a signal to break
16113        # open any outer containers.
16114        #-------------------------------------------------------------------
16115        set_fake_breakpoint();
16116
16117        # be sure we do not extend beyond the current list length
16118        if ( $i_effective_last_comma >= $max_index_to_go ) {
16119            $i_effective_last_comma = $max_index_to_go - 1;
16120        }
16121
16122        # Set a flag indicating if we need to break open to keep -lp
16123        # items aligned.  This is necessary if any of the list terms
16124        # exceeds the available space after the '('.
16125        my $need_lp_break_open = $must_break_open;
16126        if ( $rOpts_line_up_parentheses && !$must_break_open ) {
16127            my $columns_if_unbroken =
16128              maximum_line_length($i_opening_minus) -
16129              total_line_length( $i_opening_minus, $i_opening_paren );
16130            $need_lp_break_open =
16131                 ( $max_length[0] > $columns_if_unbroken )
16132              || ( $max_length[1] > $columns_if_unbroken )
16133              || ( $first_term_length > $columns_if_unbroken );
16134        }
16135
16136        # Specify if the list must have an even number of fields or not.
16137        # It is generally safest to assume an even number, because the
16138        # list items might be a hash list.  But if we can be sure that
16139        # it is not a hash, then we can allow an odd number for more
16140        # flexibility.
16141        my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
16142
16143        if (   $identifier_count >= $item_count - 1
16144            || $is_assignment{$next_nonblank_type}
16145            || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
16146          )
16147        {
16148            $odd_or_even = 1;
16149        }
16150
16151        # do we have a long first term which should be
16152        # left on a line by itself?
16153        my $use_separate_first_term = (
16154            $odd_or_even == 1       # only if we can use 1 field/line
16155              && $item_count > 3    # need several items
16156              && $first_term_length >
16157              2 * $max_length[0] - 2    # need long first term
16158              && $first_term_length >
16159              2 * $max_length[1] - 2    # need long first term
16160        );
16161
16162        # or do we know from the type of list that the first term should
16163        # be placed alone?
16164        if ( !$use_separate_first_term ) {
16165            if ( $is_keyword_with_special_leading_term{$list_type} ) {
16166                $use_separate_first_term = 1;
16167
16168                # should the container be broken open?
16169                if ( $item_count < 3 ) {
16170                    if ( $i_first_comma - $i_opening_paren < 4 ) {
16171                        $$rdo_not_break_apart = 1;
16172                    }
16173                }
16174                elsif ($first_term_length < 20
16175                    && $i_first_comma - $i_opening_paren < 4 )
16176                {
16177                    my $columns = table_columns_available($i_first_comma);
16178                    if ( $first_term_length < $columns ) {
16179                        $$rdo_not_break_apart = 1;
16180                    }
16181                }
16182            }
16183        }
16184
16185        # if so,
16186        if ($use_separate_first_term) {
16187
16188            # ..set a break and update starting values
16189            $use_separate_first_term = 1;
16190            set_forced_breakpoint($i_first_comma);
16191            $i_opening_paren = $i_first_comma;
16192            $i_first_comma   = $$rcomma_index[1];
16193            $item_count--;
16194            return if $comma_count == 1;
16195            shift @item_lengths;
16196            shift @i_term_begin;
16197            shift @i_term_end;
16198            shift @i_term_comma;
16199        }
16200
16201        # if not, update the metrics to include the first term
16202        else {
16203            if ( $first_term_length > $max_length[0] ) {
16204                $max_length[0] = $first_term_length;
16205            }
16206        }
16207
16208        # Field width parameters
16209        my $pair_width = ( $max_length[0] + $max_length[1] );
16210        my $max_width =
16211          ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
16212
16213        # Number of free columns across the page width for laying out tables
16214        my $columns = table_columns_available($i_first_comma);
16215
16216        # Estimated maximum number of fields which fit this space
16217        # This will be our first guess
16218        my $number_of_fields_max =
16219          maximum_number_of_fields( $columns, $odd_or_even, $max_width,
16220            $pair_width );
16221        my $number_of_fields = $number_of_fields_max;
16222
16223        # Find the best-looking number of fields
16224        # and make this our second guess if possible
16225        my ( $number_of_fields_best, $ri_ragged_break_list,
16226            $new_identifier_count )
16227          = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
16228            $max_width );
16229
16230        if (   $number_of_fields_best != 0
16231            && $number_of_fields_best < $number_of_fields_max )
16232        {
16233            $number_of_fields = $number_of_fields_best;
16234        }
16235
16236        # ----------------------------------------------------------------------
16237        # If we are crowded and the -lp option is being used, try to
16238        # undo some indentation
16239        # ----------------------------------------------------------------------
16240        if (
16241            $rOpts_line_up_parentheses
16242            && (
16243                $number_of_fields == 0
16244                || (   $number_of_fields == 1
16245                    && $number_of_fields != $number_of_fields_best )
16246            )
16247          )
16248        {
16249            my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
16250            if ( $available_spaces > 0 ) {
16251
16252                my $spaces_wanted = $max_width - $columns;    # for 1 field
16253
16254                if ( $number_of_fields_best == 0 ) {
16255                    $number_of_fields_best =
16256                      get_maximum_fields_wanted( \@item_lengths );
16257                }
16258
16259                if ( $number_of_fields_best != 1 ) {
16260                    my $spaces_wanted_2 =
16261                      1 + $pair_width - $columns;             # for 2 fields
16262                    if ( $available_spaces > $spaces_wanted_2 ) {
16263                        $spaces_wanted = $spaces_wanted_2;
16264                    }
16265                }
16266
16267                if ( $spaces_wanted > 0 ) {
16268                    my $deleted_spaces =
16269                      reduce_lp_indentation( $i_first_comma, $spaces_wanted );
16270
16271                    # redo the math
16272                    if ( $deleted_spaces > 0 ) {
16273                        $columns = table_columns_available($i_first_comma);
16274                        $number_of_fields_max =
16275                          maximum_number_of_fields( $columns, $odd_or_even,
16276                            $max_width, $pair_width );
16277                        $number_of_fields = $number_of_fields_max;
16278
16279                        if (   $number_of_fields_best == 1
16280                            && $number_of_fields >= 1 )
16281                        {
16282                            $number_of_fields = $number_of_fields_best;
16283                        }
16284                    }
16285                }
16286            }
16287        }
16288
16289        # try for one column if two won't work
16290        if ( $number_of_fields <= 0 ) {
16291            $number_of_fields = int( $columns / $max_width );
16292        }
16293
16294        # The user can place an upper bound on the number of fields,
16295        # which can be useful for doing maintenance on tables
16296        if (   $rOpts_maximum_fields_per_table
16297            && $number_of_fields > $rOpts_maximum_fields_per_table )
16298        {
16299            $number_of_fields = $rOpts_maximum_fields_per_table;
16300        }
16301
16302        # How many columns (characters) and lines would this container take
16303        # if no additional whitespace were added?
16304        my $packed_columns = token_sequence_length( $i_opening_paren + 1,
16305            $i_effective_last_comma + 1 );
16306        if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
16307        my $packed_lines = 1 + int( $packed_columns / $columns );
16308
16309        # are we an item contained in an outer list?
16310        my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
16311
16312        if ( $number_of_fields <= 0 ) {
16313
16314#         #---------------------------------------------------------------
16315#         # We're in trouble.  We can't find a single field width that works.
16316#         # There is no simple answer here; we may have a single long list
16317#         # item, or many.
16318#         #---------------------------------------------------------------
16319#
16320#         In many cases, it may be best to not force a break if there is just one
16321#         comma, because the standard continuation break logic will do a better
16322#         job without it.
16323#
16324#         In the common case that all but one of the terms can fit
16325#         on a single line, it may look better not to break open the
16326#         containing parens.  Consider, for example
16327#
16328#             $color =
16329#               join ( '/',
16330#                 sort { $color_value{$::a} <=> $color_value{$::b}; }
16331#                 keys %colors );
16332#
16333#         which will look like this with the container broken:
16334#
16335#             $color = join (
16336#                 '/',
16337#                 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
16338#             );
16339#
16340#         Here is an example of this rule for a long last term:
16341#
16342#             log_message( 0, 256, 128,
16343#                 "Number of routes in adj-RIB-in to be considered: $peercount" );
16344#
16345#         And here is an example with a long first term:
16346#
16347#         $s = sprintf(
16348# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
16349#             $r, $pu, $ps, $cu, $cs, $tt
16350#           )
16351#           if $style eq 'all';
16352
16353            my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
16354            my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
16355            my $long_first_term =
16356              excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
16357
16358            # break at every comma ...
16359            if (
16360
16361                # if requested by user or is best looking
16362                $number_of_fields_best == 1
16363
16364                # or if this is a sublist of a larger list
16365                || $in_hierarchical_list
16366
16367                # or if multiple commas and we dont have a long first or last
16368                # term
16369                || ( $comma_count > 1
16370                    && !( $long_last_term || $long_first_term ) )
16371              )
16372            {
16373                foreach ( 0 .. $comma_count - 1 ) {
16374                    set_forced_breakpoint( $$rcomma_index[$_] );
16375                }
16376            }
16377            elsif ($long_last_term) {
16378
16379                set_forced_breakpoint($i_last_comma);
16380                $$rdo_not_break_apart = 1 unless $must_break_open;
16381            }
16382            elsif ($long_first_term) {
16383
16384                set_forced_breakpoint($i_first_comma);
16385            }
16386            else {
16387
16388                # let breaks be defined by default bond strength logic
16389            }
16390            return;
16391        }
16392
16393        # --------------------------------------------------------
16394        # We have a tentative field count that seems to work.
16395        # How many lines will this require?
16396        # --------------------------------------------------------
16397        my $formatted_lines = $item_count / ($number_of_fields);
16398        if ( $formatted_lines != int $formatted_lines ) {
16399            $formatted_lines = 1 + int $formatted_lines;
16400        }
16401
16402        # So far we've been trying to fill out to the right margin.  But
16403        # compact tables are easier to read, so let's see if we can use fewer
16404        # fields without increasing the number of lines.
16405        $number_of_fields =
16406          compactify_table( $item_count, $number_of_fields, $formatted_lines,
16407            $odd_or_even );
16408
16409        # How many spaces across the page will we fill?
16410        my $columns_per_line =
16411          ( int $number_of_fields / 2 ) * $pair_width +
16412          ( $number_of_fields % 2 ) * $max_width;
16413
16414        my $formatted_columns;
16415
16416        if ( $number_of_fields > 1 ) {
16417            $formatted_columns =
16418              ( $pair_width * ( int( $item_count / 2 ) ) +
16419                  ( $item_count % 2 ) * $max_width );
16420        }
16421        else {
16422            $formatted_columns = $max_width * $item_count;
16423        }
16424        if ( $formatted_columns < $packed_columns ) {
16425            $formatted_columns = $packed_columns;
16426        }
16427
16428        my $unused_columns = $formatted_columns - $packed_columns;
16429
16430        # set some empirical parameters to help decide if we should try to
16431        # align; high sparsity does not look good, especially with few lines
16432        my $sparsity = ($unused_columns) / ($formatted_columns);
16433        my $max_allowed_sparsity =
16434            ( $item_count < 3 )    ? 0.1
16435          : ( $packed_lines == 1 ) ? 0.15
16436          : ( $packed_lines == 2 ) ? 0.4
16437          :                          0.7;
16438
16439        # Begin check for shortcut methods, which avoid treating a list
16440        # as a table for relatively small parenthesized lists.  These
16441        # are usually easier to read if not formatted as tables.
16442        if (
16443            $packed_lines <= 2                    # probably can fit in 2 lines
16444            && $item_count < 9                    # doesn't have too many items
16445            && $opening_environment eq 'BLOCK'    # not a sub-container
16446            && $opening_token eq '('              # is paren list
16447          )
16448        {
16449
16450            # Shortcut method 1: for -lp and just one comma:
16451            # This is a no-brainer, just break at the comma.
16452            if (
16453                $rOpts_line_up_parentheses    # -lp
16454                && $item_count == 2           # two items, one comma
16455                && !$must_break_open
16456              )
16457            {
16458                my $i_break = $$rcomma_index[0];
16459                set_forced_breakpoint($i_break);
16460                $$rdo_not_break_apart = 1;
16461                set_non_alignment_flags( $comma_count, $rcomma_index );
16462                return;
16463
16464            }
16465
16466            # method 2 is for most small ragged lists which might look
16467            # best if not displayed as a table.
16468            if (
16469                ( $number_of_fields == 2 && $item_count == 3 )
16470                || (
16471                    $new_identifier_count > 0    # isn't all quotes
16472                    && $sparsity > 0.15
16473                )    # would be fairly spaced gaps if aligned
16474              )
16475            {
16476
16477                my $break_count = set_ragged_breakpoints( \@i_term_comma,
16478                    $ri_ragged_break_list );
16479                ++$break_count if ($use_separate_first_term);
16480
16481                # NOTE: we should really use the true break count here,
16482                # which can be greater if there are large terms and
16483                # little space, but usually this will work well enough.
16484                unless ($must_break_open) {
16485
16486                    if ( $break_count <= 1 ) {
16487                        $$rdo_not_break_apart = 1;
16488                    }
16489                    elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
16490                    {
16491                        $$rdo_not_break_apart = 1;
16492                    }
16493                }
16494                set_non_alignment_flags( $comma_count, $rcomma_index );
16495                return;
16496            }
16497
16498        }    # end shortcut methods
16499
16500        # debug stuff
16501
16502        FORMATTER_DEBUG_FLAG_SPARSE && do {
16503            print STDOUT
16504"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
16505
16506        };
16507
16508        #---------------------------------------------------------------
16509        # Compound List Rule 2:
16510        # If this list is too long for one line, and it is an item of a
16511        # larger list, then we must format it, regardless of sparsity
16512        # (ian.t).  One reason that we have to do this is to trigger
16513        # Compound List Rule 1, above, which causes breaks at all commas of
16514        # all outer lists.  In this way, the structure will be properly
16515        # displayed.
16516        #---------------------------------------------------------------
16517
16518        # Decide if this list is too long for one line unless broken
16519        my $total_columns = table_columns_available($i_opening_paren);
16520        my $too_long      = $packed_columns > $total_columns;
16521
16522        # For a paren list, include the length of the token just before the
16523        # '(' because this is likely a sub call, and we would have to
16524        # include the sub name on the same line as the list.  This is still
16525        # imprecise, but not too bad.  (steve.t)
16526        if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
16527
16528            $too_long = excess_line_length( $i_opening_minus,
16529                $i_effective_last_comma + 1 ) > 0;
16530        }
16531
16532        # FIXME: For an item after a '=>', try to include the length of the
16533        # thing before the '=>'.  This is crude and should be improved by
16534        # actually looking back token by token.
16535        if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
16536            my $i_opening_minus = $i_opening_paren - 4;
16537            if ( $i_opening_minus >= 0 ) {
16538                $too_long = excess_line_length( $i_opening_minus,
16539                    $i_effective_last_comma + 1 ) > 0;
16540            }
16541        }
16542
16543        # Always break lists contained in '[' and '{' if too long for 1 line,
16544        # and always break lists which are too long and part of a more complex
16545        # structure.
16546        my $must_break_open_container = $must_break_open
16547          || ( $too_long
16548            && ( $in_hierarchical_list || $opening_token ne '(' ) );
16549
16550#print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
16551
16552        #---------------------------------------------------------------
16553        # The main decision:
16554        # Now decide if we will align the data into aligned columns.  Do not
16555        # attempt to align columns if this is a tiny table or it would be
16556        # too spaced.  It seems that the more packed lines we have, the
16557        # sparser the list that can be allowed and still look ok.
16558        #---------------------------------------------------------------
16559
16560        if (   ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
16561            || ( $formatted_lines < 2 )
16562            || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
16563          )
16564        {
16565
16566            #---------------------------------------------------------------
16567            # too sparse: would look ugly if aligned in a table;
16568            #---------------------------------------------------------------
16569
16570            # use old breakpoints if this is a 'big' list
16571            # FIXME: goal is to improve set_ragged_breakpoints so that
16572            # this is not necessary.
16573            if ( $packed_lines > 2 && $item_count > 10 ) {
16574                write_logfile_entry("List sparse: using old breakpoints\n");
16575                copy_old_breakpoints( $i_first_comma, $i_last_comma );
16576            }
16577
16578            # let the continuation logic handle it if 2 lines
16579            else {
16580
16581                my $break_count = set_ragged_breakpoints( \@i_term_comma,
16582                    $ri_ragged_break_list );
16583                ++$break_count if ($use_separate_first_term);
16584
16585                unless ($must_break_open_container) {
16586                    if ( $break_count <= 1 ) {
16587                        $$rdo_not_break_apart = 1;
16588                    }
16589                    elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
16590                    {
16591                        $$rdo_not_break_apart = 1;
16592                    }
16593                }
16594                set_non_alignment_flags( $comma_count, $rcomma_index );
16595            }
16596            return;
16597        }
16598
16599        #---------------------------------------------------------------
16600        # go ahead and format as a table
16601        #---------------------------------------------------------------
16602        write_logfile_entry(
16603            "List: auto formatting with $number_of_fields fields/row\n");
16604
16605        my $j_first_break =
16606          $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
16607
16608        for (
16609            my $j = $j_first_break ;
16610            $j < $comma_count ;
16611            $j += $number_of_fields
16612          )
16613        {
16614            my $i = $$rcomma_index[$j];
16615            set_forced_breakpoint($i);
16616        }
16617        return;
16618    }
16619}
16620
16621sub set_non_alignment_flags {
16622
16623    # set flag which indicates that these commas should not be
16624    # aligned
16625    my ( $comma_count, $rcomma_index ) = @_;
16626    foreach ( 0 .. $comma_count - 1 ) {
16627        $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
16628    }
16629}
16630
16631sub study_list_complexity {
16632
16633    # Look for complex tables which should be formatted with one term per line.
16634    # Returns the following:
16635    #
16636    #  \@i_ragged_break_list = list of good breakpoints to avoid lines
16637    #    which are hard to read
16638    #  $number_of_fields_best = suggested number of fields based on
16639    #    complexity; = 0 if any number may be used.
16640    #
16641    my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
16642    my $item_count            = @{$ri_term_begin};
16643    my $complex_item_count    = 0;
16644    my $number_of_fields_best = $rOpts_maximum_fields_per_table;
16645    my $i_max                 = @{$ritem_lengths} - 1;
16646    ##my @item_complexity;
16647
16648    my $i_last_last_break = -3;
16649    my $i_last_break      = -2;
16650    my @i_ragged_break_list;
16651
16652    my $definitely_complex = 30;
16653    my $definitely_simple  = 12;
16654    my $quote_count        = 0;
16655
16656    for my $i ( 0 .. $i_max ) {
16657        my $ib = $ri_term_begin->[$i];
16658        my $ie = $ri_term_end->[$i];
16659
16660        # define complexity: start with the actual term length
16661        my $weighted_length = ( $ritem_lengths->[$i] - 2 );
16662
16663        ##TBD: join types here and check for variations
16664        ##my $str=join "", @tokens_to_go[$ib..$ie];
16665
16666        my $is_quote = 0;
16667        if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
16668            $is_quote = 1;
16669            $quote_count++;
16670        }
16671        elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
16672            $quote_count++;
16673        }
16674
16675        if ( $ib eq $ie ) {
16676            if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
16677                $complex_item_count++;
16678                $weighted_length *= 2;
16679            }
16680            else {
16681            }
16682        }
16683        else {
16684            if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
16685                $complex_item_count++;
16686                $weighted_length *= 2;
16687            }
16688            if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
16689                $weighted_length += 4;
16690            }
16691        }
16692
16693        # add weight for extra tokens.
16694        $weighted_length += 2 * ( $ie - $ib );
16695
16696##        my $BUB = join '', @tokens_to_go[$ib..$ie];
16697##        print "# COMPLEXITY:$weighted_length   $BUB\n";
16698
16699##push @item_complexity, $weighted_length;
16700
16701        # now mark a ragged break after this item it if it is 'long and
16702        # complex':
16703        if ( $weighted_length >= $definitely_complex ) {
16704
16705            # if we broke after the previous term
16706            # then break before it too
16707            if (   $i_last_break == $i - 1
16708                && $i > 1
16709                && $i_last_last_break != $i - 2 )
16710            {
16711
16712                ## FIXME: don't strand a small term
16713                pop @i_ragged_break_list;
16714                push @i_ragged_break_list, $i - 2;
16715                push @i_ragged_break_list, $i - 1;
16716            }
16717
16718            push @i_ragged_break_list, $i;
16719            $i_last_last_break = $i_last_break;
16720            $i_last_break      = $i;
16721        }
16722
16723        # don't break before a small last term -- it will
16724        # not look good on a line by itself.
16725        elsif ($i == $i_max
16726            && $i_last_break == $i - 1
16727            && $weighted_length <= $definitely_simple )
16728        {
16729            pop @i_ragged_break_list;
16730        }
16731    }
16732
16733    my $identifier_count = $i_max + 1 - $quote_count;
16734
16735    # Need more tuning here..
16736    if (   $max_width > 12
16737        && $complex_item_count > $item_count / 2
16738        && $number_of_fields_best != 2 )
16739    {
16740        $number_of_fields_best = 1;
16741    }
16742
16743    return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
16744}
16745
16746sub get_maximum_fields_wanted {
16747
16748    # Not all tables look good with more than one field of items.
16749    # This routine looks at a table and decides if it should be
16750    # formatted with just one field or not.
16751    # This coding is still under development.
16752    my ($ritem_lengths) = @_;
16753
16754    my $number_of_fields_best = 0;
16755
16756    # For just a few items, we tentatively assume just 1 field.
16757    my $item_count = @{$ritem_lengths};
16758    if ( $item_count <= 5 ) {
16759        $number_of_fields_best = 1;
16760    }
16761
16762    # For larger tables, look at it both ways and see what looks best
16763    else {
16764
16765        my $is_odd            = 1;
16766        my @max_length        = ( 0, 0 );
16767        my @last_length_2     = ( undef, undef );
16768        my @first_length_2    = ( undef, undef );
16769        my $last_length       = undef;
16770        my $total_variation_1 = 0;
16771        my $total_variation_2 = 0;
16772        my @total_variation_2 = ( 0, 0 );
16773        for ( my $j = 0 ; $j < $item_count ; $j++ ) {
16774
16775            $is_odd = 1 - $is_odd;
16776            my $length = $ritem_lengths->[$j];
16777            if ( $length > $max_length[$is_odd] ) {
16778                $max_length[$is_odd] = $length;
16779            }
16780
16781            if ( defined($last_length) ) {
16782                my $dl = abs( $length - $last_length );
16783                $total_variation_1 += $dl;
16784            }
16785            $last_length = $length;
16786
16787            my $ll = $last_length_2[$is_odd];
16788            if ( defined($ll) ) {
16789                my $dl = abs( $length - $ll );
16790                $total_variation_2[$is_odd] += $dl;
16791            }
16792            else {
16793                $first_length_2[$is_odd] = $length;
16794            }
16795            $last_length_2[$is_odd] = $length;
16796        }
16797        $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
16798
16799        my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
16800        unless ( $total_variation_2 < $factor * $total_variation_1 ) {
16801            $number_of_fields_best = 1;
16802        }
16803    }
16804    return ($number_of_fields_best);
16805}
16806
16807sub table_columns_available {
16808    my $i_first_comma = shift;
16809    my $columns =
16810      maximum_line_length($i_first_comma) -
16811      leading_spaces_to_go($i_first_comma);
16812
16813    # Patch: the vertical formatter does not line up lines whose lengths
16814    # exactly equal the available line length because of allowances
16815    # that must be made for side comments.  Therefore, the number of
16816    # available columns is reduced by 1 character.
16817    $columns -= 1;
16818    return $columns;
16819}
16820
16821sub maximum_number_of_fields {
16822
16823    # how many fields will fit in the available space?
16824    my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
16825    my $max_pairs        = int( $columns / $pair_width );
16826    my $number_of_fields = $max_pairs * 2;
16827    if (   $odd_or_even == 1
16828        && $max_pairs * $pair_width + $max_width <= $columns )
16829    {
16830        $number_of_fields++;
16831    }
16832    return $number_of_fields;
16833}
16834
16835sub compactify_table {
16836
16837    # given a table with a certain number of fields and a certain number
16838    # of lines, see if reducing the number of fields will make it look
16839    # better.
16840    my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
16841    if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
16842        my $min_fields;
16843
16844        for (
16845            $min_fields = $number_of_fields ;
16846            $min_fields >= $odd_or_even
16847            && $min_fields * $formatted_lines >= $item_count ;
16848            $min_fields -= $odd_or_even
16849          )
16850        {
16851            $number_of_fields = $min_fields;
16852        }
16853    }
16854    return $number_of_fields;
16855}
16856
16857sub set_ragged_breakpoints {
16858
16859    # Set breakpoints in a list that cannot be formatted nicely as a
16860    # table.
16861    my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
16862
16863    my $break_count = 0;
16864    foreach (@$ri_ragged_break_list) {
16865        my $j = $ri_term_comma->[$_];
16866        if ($j) {
16867            set_forced_breakpoint($j);
16868            $break_count++;
16869        }
16870    }
16871    return $break_count;
16872}
16873
16874sub copy_old_breakpoints {
16875    my ( $i_first_comma, $i_last_comma ) = @_;
16876    for my $i ( $i_first_comma .. $i_last_comma ) {
16877        if ( $old_breakpoint_to_go[$i] ) {
16878            set_forced_breakpoint($i);
16879        }
16880    }
16881}
16882
16883sub set_nobreaks {
16884    my ( $i, $j ) = @_;
16885    if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
16886
16887        FORMATTER_DEBUG_FLAG_NOBREAK && do {
16888            my ( $a, $b, $c ) = caller();
16889            print STDOUT
16890"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
16891        };
16892
16893        @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
16894    }
16895
16896    # shouldn't happen; non-critical error
16897    else {
16898        FORMATTER_DEBUG_FLAG_NOBREAK && do {
16899            my ( $a, $b, $c ) = caller();
16900            print STDOUT
16901              "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
16902        };
16903    }
16904}
16905
16906sub set_fake_breakpoint {
16907
16908    # Just bump up the breakpoint count as a signal that there are breaks.
16909    # This is useful if we have breaks but may want to postpone deciding where
16910    # to make them.
16911    $forced_breakpoint_count++;
16912}
16913
16914sub set_forced_breakpoint {
16915    my $i = shift;
16916
16917    return unless defined $i && $i >= 0;
16918
16919    # when called with certain tokens, use bond strengths to decide
16920    # if we break before or after it
16921    my $token = $tokens_to_go[$i];
16922
16923    if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
16924        if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
16925    }
16926
16927    # breaks are forced before 'if' and 'unless'
16928    elsif ( $is_if_unless{$token} ) { $i-- }
16929
16930    if ( $i >= 0 && $i <= $max_index_to_go ) {
16931        my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
16932
16933        FORMATTER_DEBUG_FLAG_FORCE && do {
16934            my ( $a, $b, $c ) = caller();
16935            print STDOUT
16936"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
16937        };
16938
16939        if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
16940            $forced_breakpoint_to_go[$i_nonblank] = 1;
16941
16942            if ( $i_nonblank > $index_max_forced_break ) {
16943                $index_max_forced_break = $i_nonblank;
16944            }
16945            $forced_breakpoint_count++;
16946            $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
16947              $i_nonblank;
16948
16949            # if we break at an opening container..break at the closing
16950            if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
16951                set_closing_breakpoint($i_nonblank);
16952            }
16953        }
16954    }
16955}
16956
16957sub clear_breakpoint_undo_stack {
16958    $forced_breakpoint_undo_count = 0;
16959}
16960
16961sub undo_forced_breakpoint_stack {
16962
16963    my $i_start = shift;
16964    if ( $i_start < 0 ) {
16965        $i_start = 0;
16966        my ( $a, $b, $c ) = caller();
16967        warning(
16968"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
16969        );
16970    }
16971
16972    while ( $forced_breakpoint_undo_count > $i_start ) {
16973        my $i =
16974          $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
16975        if ( $i >= 0 && $i <= $max_index_to_go ) {
16976            $forced_breakpoint_to_go[$i] = 0;
16977            $forced_breakpoint_count--;
16978
16979            FORMATTER_DEBUG_FLAG_UNDOBP && do {
16980                my ( $a, $b, $c ) = caller();
16981                print STDOUT
16982"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
16983            };
16984        }
16985
16986        # shouldn't happen, but not a critical error
16987        else {
16988            FORMATTER_DEBUG_FLAG_UNDOBP && do {
16989                my ( $a, $b, $c ) = caller();
16990                print STDOUT
16991"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
16992            };
16993        }
16994    }
16995}
16996
16997{    # begin recombine_breakpoints
16998
16999    my %is_amp_amp;
17000    my %is_ternary;
17001    my %is_math_op;
17002    my %is_plus_minus;
17003    my %is_mult_div;
17004
17005    BEGIN {
17006
17007        @_ = qw( && || );
17008        @is_amp_amp{@_} = (1) x scalar(@_);
17009
17010        @_ = qw( ? : );
17011        @is_ternary{@_} = (1) x scalar(@_);
17012
17013        @_ = qw( + - * / );
17014        @is_math_op{@_} = (1) x scalar(@_);
17015
17016        @_ = qw( + - );
17017        @is_plus_minus{@_} = (1) x scalar(@_);
17018
17019        @_ = qw( * / );
17020        @is_mult_div{@_} = (1) x scalar(@_);
17021    }
17022
17023    sub recombine_breakpoints {
17024
17025        # sub set_continuation_breaks is very liberal in setting line breaks
17026        # for long lines, always setting breaks at good breakpoints, even
17027        # when that creates small lines.  Sometimes small line fragments
17028        # are produced which would look better if they were combined.
17029        # That's the task of this routine.
17030        #
17031        # We are given indexes to the current lines:
17032        # $ri_beg = ref to array of BEGinning indexes of each line
17033        # $ri_end = ref to array of ENDing indexes of each line
17034        my ( $ri_beg, $ri_end ) = @_;
17035
17036        # Make a list of all good joining tokens between the lines
17037        # n-1 and n.
17038        my @joint;
17039        my $nmax = @$ri_end - 1;
17040        for my $n ( 1 .. $nmax ) {
17041            my $ibeg_1 = $$ri_beg[ $n - 1 ];
17042            my $iend_1 = $$ri_end[ $n - 1 ];
17043            my $iend_2 = $$ri_end[$n];
17044            my $ibeg_2 = $$ri_beg[$n];
17045
17046            my ( $itok, $itokp, $itokm );
17047
17048            foreach my $itest ( $iend_1, $ibeg_2 ) {
17049                my $type = $types_to_go[$itest];
17050                if (   $is_math_op{$type}
17051                    || $is_amp_amp{$type}
17052                    || $is_assignment{$type}
17053                    || $type eq ':' )
17054                {
17055                    $itok = $itest;
17056                }
17057            }
17058            $joint[$n] = [$itok];
17059        }
17060
17061        my $more_to_do = 1;
17062
17063        # We keep looping over all of the lines of this batch
17064        # until there are no more possible recombinations
17065        my $nmax_last = @$ri_end;
17066        while ($more_to_do) {
17067            my $n_best = 0;
17068            my $bs_best;
17069            my $n;
17070            my $nmax = @$ri_end - 1;
17071
17072            # Safety check for infinite loop
17073            unless ( $nmax < $nmax_last ) {
17074
17075                # Shouldn't happen because splice below decreases nmax on each
17076                # pass.
17077                Perl::Tidy::Die
17078                  "Program bug-infinite loop in recombine breakpoints\n";
17079            }
17080            $nmax_last  = $nmax;
17081            $more_to_do = 0;
17082            my $previous_outdentable_closing_paren;
17083            my $leading_amp_count = 0;
17084            my $this_line_is_semicolon_terminated;
17085
17086            # loop over all remaining lines in this batch
17087            for $n ( 1 .. $nmax ) {
17088
17089                #----------------------------------------------------------
17090                # If we join the current pair of lines,
17091                # line $n-1 will become the left part of the joined line
17092                # line $n will become the right part of the joined line
17093                #
17094                # Here are Indexes of the endpoint tokens of the two lines:
17095                #
17096                #  -----line $n-1--- | -----line $n-----
17097                #  $ibeg_1   $iend_1 | $ibeg_2   $iend_2
17098                #                    ^
17099                #                    |
17100                # We want to decide if we should remove the line break
17101                # between the tokens at $iend_1 and $ibeg_2
17102                #
17103                # We will apply a number of ad-hoc tests to see if joining
17104                # here will look ok.  The code will just issue a 'next'
17105                # command if the join doesn't look good.  If we get through
17106                # the gauntlet of tests, the lines will be recombined.
17107                #----------------------------------------------------------
17108                #
17109                # beginning and ending tokens of the lines we are working on
17110                my $ibeg_1    = $$ri_beg[ $n - 1 ];
17111                my $iend_1    = $$ri_end[ $n - 1 ];
17112                my $iend_2    = $$ri_end[$n];
17113                my $ibeg_2    = $$ri_beg[$n];
17114                my $ibeg_nmax = $$ri_beg[$nmax];
17115
17116                my $type_iend_1 = $types_to_go[$iend_1];
17117                my $type_iend_2 = $types_to_go[$iend_2];
17118                my $type_ibeg_1 = $types_to_go[$ibeg_1];
17119                my $type_ibeg_2 = $types_to_go[$ibeg_2];
17120
17121                # some beginning indexes of other lines, which may not exist
17122                my $ibeg_0 = $n > 1          ? $$ri_beg[ $n - 2 ] : -1;
17123                my $ibeg_3 = $n < $nmax      ? $$ri_beg[ $n + 1 ] : -1;
17124                my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
17125
17126                my $bs_tweak = 0;
17127
17128                #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
17129                #        $nesting_depth_to_go[$ibeg_1] );
17130
17131                FORMATTER_DEBUG_FLAG_RECOMBINE && do {
17132                    print STDERR
17133"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
17134                };
17135
17136                # If line $n is the last line, we set some flags and
17137                # do any special checks for it
17138                if ( $n == $nmax ) {
17139
17140                    # a terminal '{' should stay where it is
17141                    next if $type_ibeg_2 eq '{';
17142
17143                    # set flag if statement $n ends in ';'
17144                    $this_line_is_semicolon_terminated = $type_iend_2 eq ';'
17145
17146                      # with possible side comment
17147                      || ( $type_iend_2 eq '#'
17148                        && $iend_2 - $ibeg_2 >= 2
17149                        && $types_to_go[ $iend_2 - 2 ] eq ';'
17150                        && $types_to_go[ $iend_2 - 1 ] eq 'b' );
17151                }
17152
17153                #----------------------------------------------------------
17154                # Recombine Section 1:
17155                # Examine the special token joining this line pair, if any.
17156                # Put as many tests in this section to avoid duplicate code and
17157                # to make formatting independent of whether breaks are to the
17158                # left or right of an operator.
17159                #----------------------------------------------------------
17160
17161                my ($itok) = @{ $joint[$n] };
17162                if ($itok) {
17163
17164                    # FIXME: Patch - may not be necessary
17165                    my $iend_1 =
17166                        $type_iend_1 eq 'b'
17167                      ? $iend_1 - 1
17168                      : $iend_1;
17169
17170                    my $iend_2 =
17171                        $type_iend_2 eq 'b'
17172                      ? $iend_2 - 1
17173                      : $iend_2;
17174                    ## END PATCH
17175
17176                    my $type = $types_to_go[$itok];
17177
17178                    if ( $type eq ':' ) {
17179
17180                   # do not join at a colon unless it disobeys the break request
17181                        if ( $itok eq $iend_1 ) {
17182                            next unless $want_break_before{$type};
17183                        }
17184                        else {
17185                            $leading_amp_count++;
17186                            next if $want_break_before{$type};
17187                        }
17188                    } ## end if ':'
17189
17190                    # handle math operators + - * /
17191                    elsif ( $is_math_op{$type} ) {
17192
17193                        # Combine these lines if this line is a single
17194                        # number, or if it is a short term with same
17195                        # operator as the previous line.  For example, in
17196                        # the following code we will combine all of the
17197                        # short terms $A, $B, $C, $D, $E, $F, together
17198                        # instead of leaving them one per line:
17199                        #  my $time =
17200                        #    $A * $B * $C * $D * $E * $F *
17201                        #    ( 2. * $eps * $sigma * $area ) *
17202                        #    ( 1. / $tcold**3 - 1. / $thot**3 );
17203
17204                        # This can be important in math-intensive code.
17205
17206                        my $good_combo;
17207
17208                        my $itokp  = min( $inext_to_go[$itok],  $iend_2 );
17209                        my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
17210                        my $itokm  = max( $iprev_to_go[$itok],  $ibeg_1 );
17211                        my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
17212
17213                        # check for a number on the right
17214                        if ( $types_to_go[$itokp] eq 'n' ) {
17215
17216                            # ok if nothing else on right
17217                            if ( $itokp == $iend_2 ) {
17218                                $good_combo = 1;
17219                            }
17220                            else {
17221
17222                                # look one more token to right..
17223                                # okay if math operator or some termination
17224                                $good_combo =
17225                                  ( ( $itokpp == $iend_2 )
17226                                      && $is_math_op{ $types_to_go[$itokpp] } )
17227                                  || $types_to_go[$itokpp] =~ /^[#,;]$/;
17228                            }
17229                        }
17230
17231                        # check for a number on the left
17232                        if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
17233
17234                            # okay if nothing else to left
17235                            if ( $itokm == $ibeg_1 ) {
17236                                $good_combo = 1;
17237                            }
17238
17239                            # otherwise look one more token to left
17240                            else {
17241
17242                                # okay if math operator, comma, or assignment
17243                                $good_combo = ( $itokmm == $ibeg_1 )
17244                                  && ( $is_math_op{ $types_to_go[$itokmm] }
17245                                    || $types_to_go[$itokmm] =~ /^[,]$/
17246                                    || $is_assignment{ $types_to_go[$itokmm] }
17247                                  );
17248                            }
17249                        }
17250
17251                        # look for a single short token either side of the
17252                        # operator
17253                        if ( !$good_combo ) {
17254
17255                            # Slight adjustment factor to make results
17256                            # independent of break before or after operator in
17257                            # long summed lists.  (An operator and a space make
17258                            # two spaces).
17259                            my $two = ( $itok eq $iend_1 ) ? 2 : 0;
17260
17261                            $good_combo =
17262
17263                              # numbers or id's on both sides of this joint
17264                              $types_to_go[$itokp] =~ /^[in]$/
17265                              && $types_to_go[$itokm] =~ /^[in]$/
17266
17267                              # one of the two lines must be short:
17268                              && (
17269                                (
17270                                    # no more than 2 nonblank tokens right of
17271                                    # joint
17272                                    $itokpp == $iend_2
17273
17274                                    # short
17275                                    && token_sequence_length( $itokp, $iend_2 )
17276                                    < $two +
17277                                    $rOpts_short_concatenation_item_length
17278                                )
17279                                || (
17280                                    # no more than 2 nonblank tokens left of
17281                                    # joint
17282                                    $itokmm == $ibeg_1
17283
17284                                    # short
17285                                    && token_sequence_length( $ibeg_1, $itokm )
17286                                    < 2 - $two +
17287                                    $rOpts_short_concatenation_item_length
17288                                )
17289
17290                              )
17291
17292                              # keep pure terms; don't mix +- with */
17293                              && !(
17294                                $is_plus_minus{$type}
17295                                && (   $is_mult_div{ $types_to_go[$itokmm] }
17296                                    || $is_mult_div{ $types_to_go[$itokpp] } )
17297                              )
17298                              && !(
17299                                $is_mult_div{$type}
17300                                && (   $is_plus_minus{ $types_to_go[$itokmm] }
17301                                    || $is_plus_minus{ $types_to_go[$itokpp] } )
17302                              )
17303
17304                              ;
17305                        }
17306
17307                        # it is also good to combine if we can reduce to 2 lines
17308                        if ( !$good_combo ) {
17309
17310                            # index on other line where same token would be in a
17311                            # long chain.
17312                            my $iother =
17313                              ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
17314
17315                            $good_combo =
17316                                 $n == 2
17317                              && $n == $nmax
17318                              && $types_to_go[$iother] ne $type;
17319                        }
17320
17321                        next unless ($good_combo);
17322
17323                    } ## end math
17324
17325                    elsif ( $is_amp_amp{$type} ) {
17326                        ##TBD
17327                    } ## end &&, ||
17328
17329                    elsif ( $is_assignment{$type} ) {
17330                        ##TBD
17331                    } ## end assignment
17332                }
17333
17334                #----------------------------------------------------------
17335                # Recombine Section 2:
17336                # Examine token at $iend_1 (right end of first line of pair)
17337                #----------------------------------------------------------
17338
17339                # an isolated '}' may join with a ';' terminated segment
17340                if ( $type_iend_1 eq '}' ) {
17341
17342                    # Check for cases where combining a semicolon terminated
17343                    # statement with a previous isolated closing paren will
17344                    # allow the combined line to be outdented.  This is
17345                    # generally a good move.  For example, we can join up
17346                    # the last two lines here:
17347                    #  (
17348                    #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
17349                    #      $size, $atime, $mtime, $ctime, $blksize, $blocks
17350                    #    )
17351                    #    = stat($file);
17352                    #
17353                    # to get:
17354                    #  (
17355                    #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
17356                    #      $size, $atime, $mtime, $ctime, $blksize, $blocks
17357                    #  ) = stat($file);
17358                    #
17359                    # which makes the parens line up.
17360                    #
17361                    # Another example, from Joe Matarazzo, probably looks best
17362                    # with the 'or' clause appended to the trailing paren:
17363                    #  $self->some_method(
17364                    #      PARAM1 => 'foo',
17365                    #      PARAM2 => 'bar'
17366                    #  ) or die "Some_method didn't work";
17367                    #
17368                    # But we do not want to do this for something like the -lp
17369                    # option where the paren is not outdentable because the
17370                    # trailing clause will be far to the right.
17371                    #
17372                    # The logic here is synchronized with the logic in sub
17373                    # sub set_adjusted_indentation, which actually does
17374                    # the outdenting.
17375                    #
17376                    $previous_outdentable_closing_paren =
17377                      $this_line_is_semicolon_terminated
17378
17379                      # only one token on last line
17380                      && $ibeg_1 == $iend_1
17381
17382                      # must be structural paren
17383                      && $tokens_to_go[$iend_1] eq ')'
17384
17385                      # style must allow outdenting,
17386                      && !$closing_token_indentation{')'}
17387
17388                      # only leading '&&', '||', and ':' if no others seen
17389                      # (but note: our count made below could be wrong
17390                      # due to intervening comments)
17391                      && ( $leading_amp_count == 0
17392                        || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
17393
17394                      # but leading colons probably line up with with a
17395                      # previous colon or question (count could be wrong).
17396                      && $type_ibeg_2 ne ':'
17397
17398                      # only one step in depth allowed.  this line must not
17399                      # begin with a ')' itself.
17400                      && ( $nesting_depth_to_go[$iend_1] ==
17401                        $nesting_depth_to_go[$iend_2] + 1 );
17402
17403                    # YVES patch 2 of 2:
17404                    # Allow cuddled eval chains, like this:
17405                    #   eval {
17406                    #       #STUFF;
17407                    #       1; # return true
17408                    #   } or do {
17409                    #       #handle error
17410                    #   };
17411                    # This patch works together with a patch in
17412                    # setting adjusted indentation (where the closing eval
17413                    # brace is outdented if possible).
17414                    # The problem is that an 'eval' block has continuation
17415                    # indentation and it looks better to undo it in some
17416                    # cases.  If we do not use this patch we would get:
17417                    #   eval {
17418                    #       #STUFF;
17419                    #       1; # return true
17420                    #       }
17421                    #       or do {
17422                    #       #handle error
17423                    #     };
17424                    # The alternative, for uncuddled style, is to create
17425                    # a patch in set_adjusted_indentation which undoes
17426                    # the indentation of a leading line like 'or do {'.
17427                    # This doesn't work well with -icb through
17428                    if (
17429                           $block_type_to_go[$iend_1] eq 'eval'
17430                        && !$rOpts->{'line-up-parentheses'}
17431                        && !$rOpts->{'indent-closing-brace'}
17432                        && $tokens_to_go[$iend_2] eq '{'
17433                        && (
17434                            ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
17435                            || (   $type_ibeg_2 eq 'k'
17436                                && $is_and_or{ $tokens_to_go[$ibeg_2] } )
17437                            || $is_if_unless{ $tokens_to_go[$ibeg_2] }
17438                        )
17439                      )
17440                    {
17441                        $previous_outdentable_closing_paren ||= 1;
17442                    }
17443
17444                    next
17445                      unless (
17446                        $previous_outdentable_closing_paren
17447
17448                        # handle '.' and '?' specially below
17449                        || ( $type_ibeg_2 =~ /^[\.\?]$/ )
17450                      );
17451                }
17452
17453                # YVES
17454                # honor breaks at opening brace
17455                # Added to prevent recombining something like this:
17456                #  } || eval { package main;
17457                elsif ( $type_iend_1 eq '{' ) {
17458                    next if $forced_breakpoint_to_go[$iend_1];
17459                }
17460
17461                # do not recombine lines with ending &&, ||,
17462                elsif ( $is_amp_amp{$type_iend_1} ) {
17463                    next unless $want_break_before{$type_iend_1};
17464                }
17465
17466                # Identify and recombine a broken ?/: chain
17467                elsif ( $type_iend_1 eq '?' ) {
17468
17469                    # Do not recombine different levels
17470                    next
17471                      if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
17472
17473                    # do not recombine unless next line ends in :
17474                    next unless $type_iend_2 eq ':';
17475                }
17476
17477                # for lines ending in a comma...
17478                elsif ( $type_iend_1 eq ',' ) {
17479
17480                    # Do not recombine at comma which is following the
17481                    # input bias.
17482                    # TODO: might be best to make a special flag
17483                    next if ( $old_breakpoint_to_go[$iend_1] );
17484
17485                 # an isolated '},' may join with an identifier + ';'
17486                 # this is useful for the class of a 'bless' statement (bless.t)
17487                    if (   $type_ibeg_1 eq '}'
17488                        && $type_ibeg_2 eq 'i' )
17489                    {
17490                        next
17491                          unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
17492                            && ( $iend_2 == ( $ibeg_2 + 1 ) )
17493                            && $this_line_is_semicolon_terminated );
17494
17495                        # override breakpoint
17496                        $forced_breakpoint_to_go[$iend_1] = 0;
17497                    }
17498
17499                    # but otherwise ..
17500                    else {
17501
17502                        # do not recombine after a comma unless this will leave
17503                        # just 1 more line
17504                        next unless ( $n + 1 >= $nmax );
17505
17506                    # do not recombine if there is a change in indentation depth
17507                        next
17508                          if (
17509                            $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
17510
17511                        # do not recombine a "complex expression" after a
17512                        # comma.  "complex" means no parens.
17513                        my $saw_paren;
17514                        foreach my $ii ( $ibeg_2 .. $iend_2 ) {
17515                            if ( $tokens_to_go[$ii] eq '(' ) {
17516                                $saw_paren = 1;
17517                                last;
17518                            }
17519                        }
17520                        next if $saw_paren;
17521                    }
17522                }
17523
17524                # opening paren..
17525                elsif ( $type_iend_1 eq '(' ) {
17526
17527                    # No longer doing this
17528                }
17529
17530                elsif ( $type_iend_1 eq ')' ) {
17531
17532                    # No longer doing this
17533                }
17534
17535                # keep a terminal for-semicolon
17536                elsif ( $type_iend_1 eq 'f' ) {
17537                    next;
17538                }
17539
17540                # if '=' at end of line ...
17541                elsif ( $is_assignment{$type_iend_1} ) {
17542
17543                    # keep break after = if it was in input stream
17544                    # this helps prevent 'blinkers'
17545                    next if $old_breakpoint_to_go[$iend_1]
17546
17547                      # don't strand an isolated '='
17548                      && $iend_1 != $ibeg_1;
17549
17550                    my $is_short_quote =
17551                      (      $type_ibeg_2 eq 'Q'
17552                          && $ibeg_2 == $iend_2
17553                          && token_sequence_length( $ibeg_2, $ibeg_2 ) <
17554                          $rOpts_short_concatenation_item_length );
17555                    my $is_ternary =
17556                      ( $type_ibeg_1 eq '?'
17557                          && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
17558
17559                    # always join an isolated '=', a short quote, or if this
17560                    # will put ?/: at start of adjacent lines
17561                    if (   $ibeg_1 != $iend_1
17562                        && !$is_short_quote
17563                        && !$is_ternary )
17564                    {
17565                        next
17566                          unless (
17567                            (
17568
17569                                # unless we can reduce this to two lines
17570                                $nmax < $n + 2
17571
17572                             # or three lines, the last with a leading semicolon
17573                                || (   $nmax == $n + 2
17574                                    && $types_to_go[$ibeg_nmax] eq ';' )
17575
17576                                # or the next line ends with a here doc
17577                                || $type_iend_2 eq 'h'
17578
17579                               # or the next line ends in an open paren or brace
17580                               # and the break hasn't been forced [dima.t]
17581                                || (  !$forced_breakpoint_to_go[$iend_1]
17582                                    && $type_iend_2 eq '{' )
17583                            )
17584
17585                            # do not recombine if the two lines might align well
17586                            # this is a very approximate test for this
17587                            && (   $ibeg_3 >= 0
17588                                && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
17589                          );
17590
17591                        if (
17592
17593                            # Recombine if we can make two lines
17594                            $nmax >= $n + 2
17595
17596                            # -lp users often prefer this:
17597                            #  my $title = function($env, $env, $sysarea,
17598                            #                       "bubba Borrower Entry");
17599                            #  so we will recombine if -lp is used we have
17600                            #  ending comma
17601                            && (  !$rOpts_line_up_parentheses
17602                                || $type_iend_2 ne ',' )
17603                          )
17604                        {
17605
17606                           # otherwise, scan the rhs line up to last token for
17607                           # complexity.  Note that we are not counting the last
17608                           # token in case it is an opening paren.
17609                            my $tv    = 0;
17610                            my $depth = $nesting_depth_to_go[$ibeg_2];
17611                            for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
17612                                if ( $nesting_depth_to_go[$i] != $depth ) {
17613                                    $tv++;
17614                                    last if ( $tv > 1 );
17615                                }
17616                                $depth = $nesting_depth_to_go[$i];
17617                            }
17618
17619                         # ok to recombine if no level changes before last token
17620                            if ( $tv > 0 ) {
17621
17622                                # otherwise, do not recombine if more than two
17623                                # level changes.
17624                                next if ( $tv > 1 );
17625
17626                              # check total complexity of the two adjacent lines
17627                              # that will occur if we do this join
17628                                my $istop =
17629                                  ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
17630                                for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
17631                                    if ( $nesting_depth_to_go[$i] != $depth ) {
17632                                        $tv++;
17633                                        last if ( $tv > 2 );
17634                                    }
17635                                    $depth = $nesting_depth_to_go[$i];
17636                                }
17637
17638                        # do not recombine if total is more than 2 level changes
17639                                next if ( $tv > 2 );
17640                            }
17641                        }
17642                    }
17643
17644                    unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
17645                        $forced_breakpoint_to_go[$iend_1] = 0;
17646                    }
17647                }
17648
17649                # for keywords..
17650                elsif ( $type_iend_1 eq 'k' ) {
17651
17652                    # make major control keywords stand out
17653                    # (recombine.t)
17654                    next
17655                      if (
17656
17657                        #/^(last|next|redo|return)$/
17658                        $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
17659
17660                        # but only if followed by multiple lines
17661                        && $n < $nmax
17662                      );
17663
17664                    if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
17665                        next
17666                          unless $want_break_before{ $tokens_to_go[$iend_1] };
17667                    }
17668                }
17669
17670                #----------------------------------------------------------
17671                # Recombine Section 3:
17672                # Examine token at $ibeg_2 (left end of second line of pair)
17673                #----------------------------------------------------------
17674
17675                # join lines identified above as capable of
17676                # causing an outdented line with leading closing paren
17677                # Note that we are skipping the rest of this section
17678                if ($previous_outdentable_closing_paren) {
17679                    $forced_breakpoint_to_go[$iend_1] = 0;
17680                }
17681
17682                # handle lines with leading &&, ||
17683                elsif ( $is_amp_amp{$type_ibeg_2} ) {
17684
17685                    $leading_amp_count++;
17686
17687                    # ok to recombine if it follows a ? or :
17688                    # and is followed by an open paren..
17689                    my $ok =
17690                      (      $is_ternary{$type_ibeg_1}
17691                          && $tokens_to_go[$iend_2] eq '(' )
17692
17693                    # or is followed by a ? or : at same depth
17694                    #
17695                    # We are looking for something like this. We can
17696                    # recombine the && line with the line above to make the
17697                    # structure more clear:
17698                    #  return
17699                    #    exists $G->{Attr}->{V}
17700                    #    && exists $G->{Attr}->{V}->{$u}
17701                    #    ? %{ $G->{Attr}->{V}->{$u} }
17702                    #    : ();
17703                    #
17704                    # We should probably leave something like this alone:
17705                    #  return
17706                    #       exists $G->{Attr}->{E}
17707                    #    && exists $G->{Attr}->{E}->{$u}
17708                    #    && exists $G->{Attr}->{E}->{$u}->{$v}
17709                    #    ? %{ $G->{Attr}->{E}->{$u}->{$v} }
17710                    #    : ();
17711                    # so that we either have all of the &&'s (or ||'s)
17712                    # on one line, as in the first example, or break at
17713                    # each one as in the second example.  However, it
17714                    # sometimes makes things worse to check for this because
17715                    # it prevents multiple recombinations.  So this is not done.
17716                      || ( $ibeg_3 >= 0
17717                        && $is_ternary{ $types_to_go[$ibeg_3] }
17718                        && $nesting_depth_to_go[$ibeg_3] ==
17719                        $nesting_depth_to_go[$ibeg_2] );
17720
17721                    next if !$ok && $want_break_before{$type_ibeg_2};
17722                    $forced_breakpoint_to_go[$iend_1] = 0;
17723
17724                    # tweak the bond strength to give this joint priority
17725                    # over ? and :
17726                    $bs_tweak = 0.25;
17727                }
17728
17729                # Identify and recombine a broken ?/: chain
17730                elsif ( $type_ibeg_2 eq '?' ) {
17731
17732                    # Do not recombine different levels
17733                    my $lev = $levels_to_go[$ibeg_2];
17734                    next if ( $lev ne $levels_to_go[$ibeg_1] );
17735
17736                    # Do not recombine a '?' if either next line or
17737                    # previous line does not start with a ':'.  The reasons
17738                    # are that (1) no alignment of the ? will be possible
17739                    # and (2) the expression is somewhat complex, so the
17740                    # '?' is harder to see in the interior of the line.
17741                    my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
17742                    my $precedes_colon =
17743                      $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
17744                    next unless ( $follows_colon || $precedes_colon );
17745
17746                    # we will always combining a ? line following a : line
17747                    if ( !$follows_colon ) {
17748
17749                        # ...otherwise recombine only if it looks like a chain.
17750                        # we will just look at a few nearby lines to see if
17751                        # this looks like a chain.
17752                        my $local_count = 0;
17753                        foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
17754                            $local_count++
17755                              if $ii >= 0
17756                              && $types_to_go[$ii] eq ':'
17757                              && $levels_to_go[$ii] == $lev;
17758                        }
17759                        next unless ( $local_count > 1 );
17760                    }
17761                    $forced_breakpoint_to_go[$iend_1] = 0;
17762                }
17763
17764                # do not recombine lines with leading '.'
17765                elsif ( $type_ibeg_2 eq '.' ) {
17766                    my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
17767                    next
17768                      unless (
17769
17770                   # ... unless there is just one and we can reduce
17771                   # this to two lines if we do.  For example, this
17772                   #
17773                   #
17774                   #  $bodyA .=
17775                   #    '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
17776                   #
17777                   #  looks better than this:
17778                   #  $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
17779                   #    . '$args .= $pat;'
17780
17781                        (
17782                               $n == 2
17783                            && $n == $nmax
17784                            && $type_ibeg_1 ne $type_ibeg_2
17785                        )
17786
17787                        #  ... or this would strand a short quote , like this
17788                        #                . "some long qoute"
17789                        #                . "\n";
17790
17791                        || (   $types_to_go[$i_next_nonblank] eq 'Q'
17792                            && $i_next_nonblank >= $iend_2 - 1
17793                            && $token_lengths_to_go[$i_next_nonblank] <
17794                            $rOpts_short_concatenation_item_length )
17795                      );
17796                }
17797
17798                # handle leading keyword..
17799                elsif ( $type_ibeg_2 eq 'k' ) {
17800
17801                    # handle leading "or"
17802                    if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
17803                        next
17804                          unless (
17805                            $this_line_is_semicolon_terminated
17806                            && (
17807
17808                                # following 'if' or 'unless' or 'or'
17809                                $type_ibeg_1 eq 'k'
17810                                && $is_if_unless{ $tokens_to_go[$ibeg_1] }
17811
17812                                # important: only combine a very simple or
17813                                # statement because the step below may have
17814                                # combined a trailing 'and' with this or,
17815                                # and we do not want to then combine
17816                                # everything together
17817                                && ( $iend_2 - $ibeg_2 <= 7 )
17818                            )
17819                          );
17820                    }
17821
17822                    # handle leading 'and'
17823                    elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
17824
17825                        # Decide if we will combine a single terminal 'and'
17826                        # after an 'if' or 'unless'.
17827
17828                        #     This looks best with the 'and' on the same
17829                        #     line as the 'if':
17830                        #
17831                        #         $a = 1
17832                        #           if $seconds and $nu < 2;
17833                        #
17834                        #     But this looks better as shown:
17835                        #
17836                        #         $a = 1
17837                        #           if !$this->{Parents}{$_}
17838                        #           or $this->{Parents}{$_} eq $_;
17839                        #
17840                        next
17841                          unless (
17842                            $this_line_is_semicolon_terminated
17843                            && (
17844
17845                                # following 'if' or 'unless' or 'or'
17846                                $type_ibeg_1 eq 'k'
17847                                && (   $is_if_unless{ $tokens_to_go[$ibeg_1] }
17848                                    || $tokens_to_go[$ibeg_1] eq 'or' )
17849                            )
17850                          );
17851                    }
17852
17853                    # handle leading "if" and "unless"
17854                    elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
17855
17856                      # FIXME: This is still experimental..may not be too useful
17857                        next
17858                          unless (
17859                            $this_line_is_semicolon_terminated
17860
17861                            #  previous line begins with 'and' or 'or'
17862                            && $type_ibeg_1 eq 'k'
17863                            && $is_and_or{ $tokens_to_go[$ibeg_1] }
17864
17865                          );
17866                    }
17867
17868                    # handle all other leading keywords
17869                    else {
17870
17871                        # keywords look best at start of lines,
17872                        # but combine things like "1 while"
17873                        unless ( $is_assignment{$type_iend_1} ) {
17874                            next
17875                              if ( ( $type_iend_1 ne 'k' )
17876                                && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
17877                        }
17878                    }
17879                }
17880
17881                # similar treatment of && and || as above for 'and' and 'or':
17882                # NOTE: This block of code is currently bypassed because
17883                # of a previous block but is retained for possible future use.
17884                elsif ( $is_amp_amp{$type_ibeg_2} ) {
17885
17886                    # maybe looking at something like:
17887                    # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
17888
17889                    next
17890                      unless (
17891                        $this_line_is_semicolon_terminated
17892
17893                        # previous line begins with an 'if' or 'unless' keyword
17894                        && $type_ibeg_1 eq 'k'
17895                        && $is_if_unless{ $tokens_to_go[$ibeg_1] }
17896
17897                      );
17898                }
17899
17900                # handle line with leading = or similar
17901                elsif ( $is_assignment{$type_ibeg_2} ) {
17902                    next unless ( $n == 1 || $n == $nmax );
17903                    next if $old_breakpoint_to_go[$iend_1];
17904                    next
17905                      unless (
17906
17907                        # unless we can reduce this to two lines
17908                        $nmax == 2
17909
17910                        # or three lines, the last with a leading semicolon
17911                        || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
17912
17913                        # or the next line ends with a here doc
17914                        || $type_iend_2 eq 'h'
17915
17916                        # or this is a short line ending in ;
17917                        || ( $n == $nmax && $this_line_is_semicolon_terminated )
17918                      );
17919                    $forced_breakpoint_to_go[$iend_1] = 0;
17920                }
17921
17922                #----------------------------------------------------------
17923                # Recombine Section 4:
17924                # Combine the lines if we arrive here and it is possible
17925                #----------------------------------------------------------
17926
17927                # honor hard breakpoints
17928                next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
17929
17930                my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
17931
17932                # combined line cannot be too long
17933                my $excess = excess_line_length( $ibeg_1, $iend_2 );
17934                next if ( $excess > 0 );
17935
17936                # Require a few extra spaces before recombining lines if we are
17937                # at an old breakpoint unless this is a simple list or terminal
17938                # line.  The goal is to avoid oscillating between two
17939                # quasi-stable end states.  For example this snippet caused
17940                # problems:
17941##    my $this =
17942##    bless {
17943##        TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
17944##      },
17945##      $type;
17946                next
17947                  if ( $old_breakpoint_to_go[$iend_1]
17948                    && !$this_line_is_semicolon_terminated
17949                    && $n < $nmax
17950                    && $excess + 4 > 0
17951                    && $type_iend_2 ne ',' );
17952
17953                # do not recombine if we would skip in indentation levels
17954                if ( $n < $nmax ) {
17955                    my $if_next = $$ri_beg[ $n + 1 ];
17956                    next
17957                      if (
17958                           $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
17959                        && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
17960
17961                        # but an isolated 'if (' is undesirable
17962                        && !(
17963                               $n == 1
17964                            && $iend_1 - $ibeg_1 <= 2
17965                            && $type_ibeg_1 eq 'k'
17966                            && $tokens_to_go[$ibeg_1] eq 'if'
17967                            && $tokens_to_go[$iend_1] ne '('
17968                        )
17969                      );
17970                }
17971
17972                # honor no-break's
17973                next if ( $bs >= NO_BREAK - 1 );
17974
17975                # remember the pair with the greatest bond strength
17976                if ( !$n_best ) {
17977                    $n_best  = $n;
17978                    $bs_best = $bs;
17979                }
17980                else {
17981
17982                    if ( $bs > $bs_best ) {
17983                        $n_best  = $n;
17984                        $bs_best = $bs;
17985                    }
17986                }
17987            }
17988
17989            # recombine the pair with the greatest bond strength
17990            if ($n_best) {
17991                splice @$ri_beg, $n_best, 1;
17992                splice @$ri_end, $n_best - 1, 1;
17993                splice @joint, $n_best, 1;
17994
17995                # keep going if we are still making progress
17996                $more_to_do++;
17997            }
17998        }
17999        return ( $ri_beg, $ri_end );
18000    }
18001}    # end recombine_breakpoints
18002
18003sub break_all_chain_tokens {
18004
18005    # scan the current breakpoints looking for breaks at certain "chain
18006    # operators" (. : && || + etc) which often occur repeatedly in a long
18007    # statement.  If we see a break at any one, break at all similar tokens
18008    # within the same container.
18009    #
18010    my ( $ri_left, $ri_right ) = @_;
18011
18012    my %saw_chain_type;
18013    my %left_chain_type;
18014    my %right_chain_type;
18015    my %interior_chain_type;
18016    my $nmax = @$ri_right - 1;
18017
18018    # scan the left and right end tokens of all lines
18019    my $count = 0;
18020    for my $n ( 0 .. $nmax ) {
18021        my $il    = $$ri_left[$n];
18022        my $ir    = $$ri_right[$n];
18023        my $typel = $types_to_go[$il];
18024        my $typer = $types_to_go[$ir];
18025        $typel = '+' if ( $typel eq '-' );    # treat + and - the same
18026        $typer = '+' if ( $typer eq '-' );
18027        $typel = '*' if ( $typel eq '/' );    # treat * and / the same
18028        $typer = '*' if ( $typer eq '/' );
18029        my $tokenl = $tokens_to_go[$il];
18030        my $tokenr = $tokens_to_go[$ir];
18031
18032        if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
18033            next if ( $typel eq '?' );
18034            push @{ $left_chain_type{$typel} }, $il;
18035            $saw_chain_type{$typel} = 1;
18036            $count++;
18037        }
18038        if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
18039            next if ( $typer eq '?' );
18040            push @{ $right_chain_type{$typer} }, $ir;
18041            $saw_chain_type{$typer} = 1;
18042            $count++;
18043        }
18044    }
18045    return unless $count;
18046
18047    # now look for any interior tokens of the same types
18048    $count = 0;
18049    for my $n ( 0 .. $nmax ) {
18050        my $il = $$ri_left[$n];
18051        my $ir = $$ri_right[$n];
18052        for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
18053            my $type = $types_to_go[$i];
18054            $type = '+' if ( $type eq '-' );
18055            $type = '*' if ( $type eq '/' );
18056            if ( $saw_chain_type{$type} ) {
18057                push @{ $interior_chain_type{$type} }, $i;
18058                $count++;
18059            }
18060        }
18061    }
18062    return unless $count;
18063
18064    # now make a list of all new break points
18065    my @insert_list;
18066
18067    # loop over all chain types
18068    foreach my $type ( keys %saw_chain_type ) {
18069
18070        # quit if just ONE continuation line with leading .  For example--
18071        # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
18072        #  . $contents;
18073        last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
18074
18075        # loop over all interior chain tokens
18076        foreach my $itest ( @{ $interior_chain_type{$type} } ) {
18077
18078            # loop over all left end tokens of same type
18079            if ( $left_chain_type{$type} ) {
18080                next if $nobreak_to_go[ $itest - 1 ];
18081                foreach my $i ( @{ $left_chain_type{$type} } ) {
18082                    next unless in_same_container( $i, $itest );
18083                    push @insert_list, $itest - 1;
18084
18085                    # Break at matching ? if this : is at a different level.
18086                    # For example, the ? before $THRf_DEAD in the following
18087                    # should get a break if its : gets a break.
18088                    #
18089                    # my $flags =
18090                    #     ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
18091                    #   : ( $_ & 4 ) ? $THRf_R_DETACHED
18092                    #   :              $THRf_R_JOINABLE;
18093                    if (   $type eq ':'
18094                        && $levels_to_go[$i] != $levels_to_go[$itest] )
18095                    {
18096                        my $i_question = $mate_index_to_go[$itest];
18097                        if ( $i_question > 0 ) {
18098                            push @insert_list, $i_question - 1;
18099                        }
18100                    }
18101                    last;
18102                }
18103            }
18104
18105            # loop over all right end tokens of same type
18106            if ( $right_chain_type{$type} ) {
18107                next if $nobreak_to_go[$itest];
18108                foreach my $i ( @{ $right_chain_type{$type} } ) {
18109                    next unless in_same_container( $i, $itest );
18110                    push @insert_list, $itest;
18111
18112                    # break at matching ? if this : is at a different level
18113                    if (   $type eq ':'
18114                        && $levels_to_go[$i] != $levels_to_go[$itest] )
18115                    {
18116                        my $i_question = $mate_index_to_go[$itest];
18117                        if ( $i_question >= 0 ) {
18118                            push @insert_list, $i_question;
18119                        }
18120                    }
18121                    last;
18122                }
18123            }
18124        }
18125    }
18126
18127    # insert any new break points
18128    if (@insert_list) {
18129        insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18130    }
18131}
18132
18133sub break_equals {
18134
18135    # Look for assignment operators that could use a breakpoint.
18136    # For example, in the following snippet
18137    #
18138    #    $HOME = $ENV{HOME}
18139    #      || $ENV{LOGDIR}
18140    #      || $pw[7]
18141    #      || die "no home directory for user $<";
18142    #
18143    # we could break at the = to get this, which is a little nicer:
18144    #    $HOME =
18145    #         $ENV{HOME}
18146    #      || $ENV{LOGDIR}
18147    #      || $pw[7]
18148    #      || die "no home directory for user $<";
18149    #
18150    # The logic here follows the logic in set_logical_padding, which
18151    # will add the padding in the second line to improve alignment.
18152    #
18153    my ( $ri_left, $ri_right ) = @_;
18154    my $nmax = @$ri_right - 1;
18155    return unless ( $nmax >= 2 );
18156
18157    # scan the left ends of first two lines
18158    my $tokbeg = "";
18159    my $depth_beg;
18160    for my $n ( 1 .. 2 ) {
18161        my $il     = $$ri_left[$n];
18162        my $typel  = $types_to_go[$il];
18163        my $tokenl = $tokens_to_go[$il];
18164
18165        my $has_leading_op = ( $tokenl =~ /^\w/ )
18166          ? $is_chain_operator{$tokenl}    # + - * / : ? && ||
18167          : $is_chain_operator{$typel};    # and, or
18168        return unless ($has_leading_op);
18169        if ( $n > 1 ) {
18170            return
18171              unless ( $tokenl eq $tokbeg
18172                && $nesting_depth_to_go[$il] eq $depth_beg );
18173        }
18174        $tokbeg    = $tokenl;
18175        $depth_beg = $nesting_depth_to_go[$il];
18176    }
18177
18178    # now look for any interior tokens of the same types
18179    my $il = $$ri_left[0];
18180    my $ir = $$ri_right[0];
18181
18182    # now make a list of all new break points
18183    my @insert_list;
18184    for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
18185        my $type = $types_to_go[$i];
18186        if (   $is_assignment{$type}
18187            && $nesting_depth_to_go[$i] eq $depth_beg )
18188        {
18189            if ( $want_break_before{$type} ) {
18190                push @insert_list, $i - 1;
18191            }
18192            else {
18193                push @insert_list, $i;
18194            }
18195        }
18196    }
18197
18198    # Break after a 'return' followed by a chain of operators
18199    #  return ( $^O !~ /win32|dos/i )
18200    #    && ( $^O ne 'VMS' )
18201    #    && ( $^O ne 'OS2' )
18202    #    && ( $^O ne 'MacOS' );
18203    # To give:
18204    #  return
18205    #       ( $^O !~ /win32|dos/i )
18206    #    && ( $^O ne 'VMS' )
18207    #    && ( $^O ne 'OS2' )
18208    #    && ( $^O ne 'MacOS' );
18209    my $i = 0;
18210    if (   $types_to_go[$i] eq 'k'
18211        && $tokens_to_go[$i] eq 'return'
18212        && $ir > $il
18213        && $nesting_depth_to_go[$i] eq $depth_beg )
18214    {
18215        push @insert_list, $i;
18216    }
18217
18218    return unless (@insert_list);
18219
18220    # One final check...
18221    # scan second and third lines and be sure there are no assignments
18222    # we want to avoid breaking at an = to make something like this:
18223    #    unless ( $icon =
18224    #           $html_icons{"$type-$state"}
18225    #        or $icon = $html_icons{$type}
18226    #        or $icon = $html_icons{$state} )
18227    for my $n ( 1 .. 2 ) {
18228        my $il = $$ri_left[$n];
18229        my $ir = $$ri_right[$n];
18230        for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
18231            my $type = $types_to_go[$i];
18232            return
18233              if ( $is_assignment{$type}
18234                && $nesting_depth_to_go[$i] eq $depth_beg );
18235        }
18236    }
18237
18238    # ok, insert any new break point
18239    if (@insert_list) {
18240        insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18241    }
18242}
18243
18244sub insert_final_breaks {
18245
18246    my ( $ri_left, $ri_right ) = @_;
18247
18248    my $nmax = @$ri_right - 1;
18249
18250    # scan the left and right end tokens of all lines
18251    my $count         = 0;
18252    my $i_first_colon = -1;
18253    for my $n ( 0 .. $nmax ) {
18254        my $il    = $$ri_left[$n];
18255        my $ir    = $$ri_right[$n];
18256        my $typel = $types_to_go[$il];
18257        my $typer = $types_to_go[$ir];
18258        return if ( $typel eq '?' );
18259        return if ( $typer eq '?' );
18260        if    ( $typel eq ':' ) { $i_first_colon = $il; last; }
18261        elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
18262    }
18263
18264    # For long ternary chains,
18265    # if the first : we see has its # ? is in the interior
18266    # of a preceding line, then see if there are any good
18267    # breakpoints before the ?.
18268    if ( $i_first_colon > 0 ) {
18269        my $i_question = $mate_index_to_go[$i_first_colon];
18270        if ( $i_question > 0 ) {
18271            my @insert_list;
18272            for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
18273                my $token = $tokens_to_go[$ii];
18274                my $type  = $types_to_go[$ii];
18275
18276                # For now, a good break is either a comma or a 'return'.
18277                if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
18278                    && in_same_container( $ii, $i_question ) )
18279                {
18280                    push @insert_list, $ii;
18281                    last;
18282                }
18283            }
18284
18285            # insert any new break points
18286            if (@insert_list) {
18287                insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
18288            }
18289        }
18290    }
18291}
18292
18293sub in_same_container {
18294
18295    # check to see if tokens at i1 and i2 are in the
18296    # same container, and not separated by a comma, ? or :
18297    my ( $i1, $i2 ) = @_;
18298    my $type  = $types_to_go[$i1];
18299    my $depth = $nesting_depth_to_go[$i1];
18300    return unless ( $nesting_depth_to_go[$i2] == $depth );
18301    if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
18302
18303    ###########################################################
18304    # This is potentially a very slow routine and not critical.
18305    # For safety just give up for large differences.
18306    # See test file 'infinite_loop.txt'
18307    # TODO: replace this loop with a data structure
18308    ###########################################################
18309    return if ( $i2 - $i1 > 200 );
18310
18311    for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
18312        next   if ( $nesting_depth_to_go[$i] > $depth );
18313        return if ( $nesting_depth_to_go[$i] < $depth );
18314
18315        my $tok = $tokens_to_go[$i];
18316        $tok = ',' if $tok eq '=>';    # treat => same as ,
18317
18318        # Example: we would not want to break at any of these .'s
18319        #  : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
18320        if ( $type ne ':' ) {
18321            return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
18322        }
18323        else {
18324            return if ( $tok =~ /^[\,]$/ );
18325        }
18326    }
18327    return 1;
18328}
18329
18330sub set_continuation_breaks {
18331
18332    # Define an array of indexes for inserting newline characters to
18333    # keep the line lengths below the maximum desired length.  There is
18334    # an implied break after the last token, so it need not be included.
18335
18336    # Method:
18337    # This routine is part of series of routines which adjust line
18338    # lengths.  It is only called if a statement is longer than the
18339    # maximum line length, or if a preliminary scanning located
18340    # desirable break points.   Sub scan_list has already looked at
18341    # these tokens and set breakpoints (in array
18342    # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
18343    # after commas, after opening parens, and before closing parens).
18344    # This routine will honor these breakpoints and also add additional
18345    # breakpoints as necessary to keep the line length below the maximum
18346    # requested.  It bases its decision on where the 'bond strength' is
18347    # lowest.
18348
18349    # Output: returns references to the arrays:
18350    #  @i_first
18351    #  @i_last
18352    # which contain the indexes $i of the first and last tokens on each
18353    # line.
18354
18355    # In addition, the array:
18356    #   $forced_breakpoint_to_go[$i]
18357    # may be updated to be =1 for any index $i after which there must be
18358    # a break.  This signals later routines not to undo the breakpoint.
18359
18360    my $saw_good_break = shift;
18361    my @i_first        = ();      # the first index to output
18362    my @i_last         = ();      # the last index to output
18363    my @i_colon_breaks = ();      # needed to decide if we have to break at ?'s
18364    if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
18365
18366    set_bond_strengths();
18367
18368    my $imin = 0;
18369    my $imax = $max_index_to_go;
18370    if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
18371    if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
18372    my $i_begin = $imin;          # index for starting next iteration
18373
18374    my $leading_spaces          = leading_spaces_to_go($imin);
18375    my $line_count              = 0;
18376    my $last_break_strength     = NO_BREAK;
18377    my $i_last_break            = -1;
18378    my $max_bias                = 0.001;
18379    my $tiny_bias               = 0.0001;
18380    my $leading_alignment_token = "";
18381    my $leading_alignment_type  = "";
18382
18383    # see if any ?/:'s are in order
18384    my $colons_in_order = 1;
18385    my $last_tok        = "";
18386    my @colon_list  = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ];
18387    my $colon_count = @colon_list;
18388    foreach (@colon_list) {
18389        if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
18390        $last_tok = $_;
18391    }
18392
18393    # This is a sufficient but not necessary condition for colon chain
18394    my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
18395
18396    #-------------------------------------------------------
18397    # BEGINNING of main loop to set continuation breakpoints
18398    # Keep iterating until we reach the end
18399    #-------------------------------------------------------
18400    while ( $i_begin <= $imax ) {
18401        my $lowest_strength        = NO_BREAK;
18402        my $starting_sum           = $summed_lengths_to_go[$i_begin];
18403        my $i_lowest               = -1;
18404        my $i_test                 = -1;
18405        my $lowest_next_token      = '';
18406        my $lowest_next_type       = 'b';
18407        my $i_lowest_next_nonblank = -1;
18408
18409        #-------------------------------------------------------
18410        # BEGINNING of inner loop to find the best next breakpoint
18411        #-------------------------------------------------------
18412        for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
18413            my $type                     = $types_to_go[$i_test];
18414            my $token                    = $tokens_to_go[$i_test];
18415            my $next_type                = $types_to_go[ $i_test + 1 ];
18416            my $next_token               = $tokens_to_go[ $i_test + 1 ];
18417            my $i_next_nonblank          = $inext_to_go[$i_test];
18418            my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
18419            my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
18420            my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
18421            my $strength                 = $bond_strength_to_go[$i_test];
18422            my $maximum_line_length      = maximum_line_length($i_begin);
18423
18424            # use old breaks as a tie-breaker.  For example to
18425            # prevent blinkers with -pbp in this code:
18426
18427##@keywords{
18428##    qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
18429##    = ();
18430
18431            # At the same time try to prevent a leading * in this code
18432            # with the default formatting:
18433            #
18434##                return
18435##                    factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
18436##                  * ( $x**( $a - 1 ) )
18437##                  * ( ( 1 - $x )**( $b - 1 ) );
18438
18439            # reduce strength a bit to break ties at an old breakpoint ...
18440            if (
18441                $old_breakpoint_to_go[$i_test]
18442
18443                # which is a 'good' breakpoint, meaning ...
18444                # we don't want to break before it
18445                && !$want_break_before{$type}
18446
18447                # and either we want to break before the next token
18448                # or the next token is not short (i.e. not a '*', '/' etc.)
18449                && $i_next_nonblank <= $imax
18450                && (   $want_break_before{$next_nonblank_type}
18451                    || $token_lengths_to_go[$i_next_nonblank] > 2
18452                    || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
18453              )
18454            {
18455                $strength -= $tiny_bias;
18456            }
18457
18458            # otherwise increase strength a bit if this token would be at the
18459            # maximum line length.  This is necessary to avoid blinking
18460            # in the above example when the -iob flag is added.
18461            else {
18462                my $len =
18463                  $leading_spaces +
18464                  $summed_lengths_to_go[ $i_test + 1 ] -
18465                  $starting_sum;
18466                if ( $len >= $maximum_line_length ) {
18467                    $strength += $tiny_bias;
18468                }
18469            }
18470
18471            my $must_break = 0;
18472
18473            # Force an immediate break at certain operators
18474            # with lower level than the start of the line,
18475            # unless we've already seen a better break.
18476            #
18477            ##############################################
18478            # Note on an issue with a preceding ?
18479            ##############################################
18480            # We don't include a ? in the above list, but there may
18481            # be a break at a previous ? if the line is long.
18482            # Because of this we do not want to force a break if
18483            # there is a previous ? on this line.  For now the best way
18484            # to do this is to not break if we have seen a lower strength
18485            # point, which is probably a ?.
18486            #
18487            # Example of unwanted breaks we are avoiding at a '.' following a ?
18488            # from pod2html using perltidy -gnu:
18489            # )
18490            # ? "\n&lt;A NAME=\""
18491            # . $value
18492            # . "\"&gt;\n$text&lt;/A&gt;\n"
18493            # : "\n$type$pod2.html\#" . $value . "\"&gt;$text&lt;\/A&gt;\n";
18494            if (
18495                (
18496                    $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
18497                    || (   $next_nonblank_type eq 'k'
18498                        && $next_nonblank_token =~ /^(and|or)$/ )
18499                )
18500                && ( $nesting_depth_to_go[$i_begin] >
18501                    $nesting_depth_to_go[$i_next_nonblank] )
18502                && ( $strength <= $lowest_strength )
18503              )
18504            {
18505                set_forced_breakpoint($i_next_nonblank);
18506            }
18507
18508            if (
18509
18510                # Try to put a break where requested by scan_list
18511                $forced_breakpoint_to_go[$i_test]
18512
18513                # break between ) { in a continued line so that the '{' can
18514                # be outdented
18515                # See similar logic in scan_list which catches instances
18516                # where a line is just something like ') {'.  We have to
18517                # be careful because the corresponding block keyword might
18518                # not be on the first line, such as 'for' here:
18519                #
18520                # eval {
18521                #     for ("a") {
18522                #         for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
18523                #     }
18524                # };
18525                #
18526                || (   $line_count
18527                    && ( $token eq ')' )
18528                    && ( $next_nonblank_type eq '{' )
18529                    && ($next_nonblank_block_type)
18530                    && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
18531                    && !$rOpts->{'opening-brace-always-on-right'} )
18532
18533                # There is an implied forced break at a terminal opening brace
18534                || ( ( $type eq '{' ) && ( $i_test == $imax ) )
18535              )
18536            {
18537
18538                # Forced breakpoints must sometimes be overridden, for example
18539                # because of a side comment causing a NO_BREAK.  It is easier
18540                # to catch this here than when they are set.
18541                if ( $strength < NO_BREAK - 1 ) {
18542                    $strength   = $lowest_strength - $tiny_bias;
18543                    $must_break = 1;
18544                }
18545            }
18546
18547            # quit if a break here would put a good terminal token on
18548            # the next line and we already have a possible break
18549            if (
18550                   !$must_break
18551                && ( $next_nonblank_type =~ /^[\;\,]$/ )
18552                && (
18553                    (
18554                        $leading_spaces +
18555                        $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
18556                        $starting_sum
18557                    ) > $maximum_line_length
18558                )
18559              )
18560            {
18561                last if ( $i_lowest >= 0 );
18562            }
18563
18564            # Avoid a break which would strand a single punctuation
18565            # token.  For example, we do not want to strand a leading
18566            # '.' which is followed by a long quoted string.
18567            # But note that we do want to do this with -extrude (l=1)
18568            # so please test any changes to this code on -extrude.
18569            if (
18570                   !$must_break
18571                && ( $i_test == $i_begin )
18572                && ( $i_test < $imax )
18573                && ( $token eq $type )
18574                && (
18575                    (
18576                        $leading_spaces +
18577                        $summed_lengths_to_go[ $i_test + 1 ] -
18578                        $starting_sum
18579                    ) < $maximum_line_length
18580                )
18581              )
18582            {
18583                $i_test = min( $imax, $inext_to_go[$i_test] );
18584                redo;
18585            }
18586
18587            if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
18588            {
18589
18590                # break at previous best break if it would have produced
18591                # a leading alignment of certain common tokens, and it
18592                # is different from the latest candidate break
18593                last
18594                  if ($leading_alignment_type);
18595
18596                # Force at least one breakpoint if old code had good
18597                # break It is only called if a breakpoint is required or
18598                # desired.  This will probably need some adjustments
18599                # over time.  A goal is to try to be sure that, if a new
18600                # side comment is introduced into formated text, then
18601                # the same breakpoints will occur.  scbreak.t
18602                last
18603                  if (
18604                    $i_test == $imax              # we are at the end
18605                    && !$forced_breakpoint_count  #
18606                    && $saw_good_break            # old line had good break
18607                    && $type =~ /^[#;\{]$/        # and this line ends in
18608                                                  # ';' or side comment
18609                    && $i_last_break < 0          # and we haven't made a break
18610                    && $i_lowest >= 0             # and we saw a possible break
18611                    && $i_lowest < $imax - 1      # (but not just before this ;)
18612                    && $strength - $lowest_strength < 0.5 * WEAK # and it's good
18613                  );
18614
18615                # Do not skip past an important break point in a short final
18616                # segment.  For example, without this check we would miss the
18617                # break at the final / in the following code:
18618                #
18619                #  $depth_stop =
18620                #    ( $tau * $mass_pellet * $q_0 *
18621                #        ( 1. - exp( -$t_stop / $tau ) ) -
18622                #        4. * $pi * $factor * $k_ice *
18623                #        ( $t_melt - $t_ice ) *
18624                #        $r_pellet *
18625                #        $t_stop ) /
18626                #    ( $rho_ice * $Qs * $pi * $r_pellet**2 );
18627                #
18628                if (   $line_count > 2
18629                    && $i_lowest < $i_test
18630                    && $i_test > $imax - 2
18631                    && $nesting_depth_to_go[$i_begin] >
18632                    $nesting_depth_to_go[$i_lowest]
18633                    && $lowest_strength < $last_break_strength - .5 * WEAK )
18634                {
18635                    # Make this break for math operators for now
18636                    my $ir = $inext_to_go[$i_lowest];
18637                    my $il = $iprev_to_go[$ir];
18638                    last
18639                      if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
18640                        || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
18641                }
18642
18643                # Update the minimum bond strength location
18644                $lowest_strength        = $strength;
18645                $i_lowest               = $i_test;
18646                $lowest_next_token      = $next_nonblank_token;
18647                $lowest_next_type       = $next_nonblank_type;
18648                $i_lowest_next_nonblank = $i_next_nonblank;
18649                last if $must_break;
18650
18651                # set flags to remember if a break here will produce a
18652                # leading alignment of certain common tokens
18653                if (   $line_count > 0
18654                    && $i_test < $imax
18655                    && ( $lowest_strength - $last_break_strength <= $max_bias )
18656                  )
18657                {
18658                    my $i_last_end = $iprev_to_go[$i_begin];
18659                    my $tok_beg    = $tokens_to_go[$i_begin];
18660                    my $type_beg   = $types_to_go[$i_begin];
18661                    if (
18662
18663                        # check for leading alignment of certain tokens
18664                        (
18665                               $tok_beg eq $next_nonblank_token
18666                            && $is_chain_operator{$tok_beg}
18667                            && (   $type_beg eq 'k'
18668                                || $type_beg eq $tok_beg )
18669                            && $nesting_depth_to_go[$i_begin] >=
18670                            $nesting_depth_to_go[$i_next_nonblank]
18671                        )
18672
18673                        || (   $tokens_to_go[$i_last_end] eq $token
18674                            && $is_chain_operator{$token}
18675                            && ( $type eq 'k' || $type eq $token )
18676                            && $nesting_depth_to_go[$i_last_end] >=
18677                            $nesting_depth_to_go[$i_test] )
18678                      )
18679                    {
18680                        $leading_alignment_token = $next_nonblank_token;
18681                        $leading_alignment_type  = $next_nonblank_type;
18682                    }
18683                }
18684            }
18685
18686            my $too_long = ( $i_test >= $imax );
18687            if ( !$too_long ) {
18688                my $next_length =
18689                  $leading_spaces +
18690                  $summed_lengths_to_go[ $i_test + 2 ] -
18691                  $starting_sum;
18692                $too_long = $next_length > $maximum_line_length;
18693
18694                # To prevent blinkers we will avoid leaving a token exactly at
18695                # the line length limit unless it is the last token or one of
18696                # several "good" types.
18697                #
18698                # The following code was a blinker with -pbp before this
18699                # modification:
18700##                    $last_nonblank_token eq '('
18701##                        && $is_indirect_object_taker{ $paren_type
18702##                            [$paren_depth] }
18703                # The issue causing the problem is that if the
18704                # term [$paren_depth] gets broken across a line then
18705                # the whitespace routine doesn't see both opening and closing
18706                # brackets and will format like '[ $paren_depth ]'.  This
18707                # leads to an oscillation in length depending if we break
18708                # before the closing bracket or not.
18709                if (  !$too_long
18710                    && $i_test + 1 < $imax
18711                    && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
18712                {
18713                    $too_long = $next_length >= $maximum_line_length;
18714                }
18715            }
18716
18717            FORMATTER_DEBUG_FLAG_BREAK
18718              && do {
18719                my $ltok     = $token;
18720                my $rtok     = $next_nonblank_token ? $next_nonblank_token : "";
18721                my $i_testp2 = $i_test + 2;
18722                if ( $i_testp2 > $max_index_to_go + 1 ) {
18723                    $i_testp2 = $max_index_to_go + 1;
18724                }
18725                if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
18726                if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
18727                print STDOUT
18728"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength    $ltok $rtok\n";
18729              };
18730
18731            # allow one extra terminal token after exceeding line length
18732            # if it would strand this token.
18733            if (   $rOpts_fuzzy_line_length
18734                && $too_long
18735                && $i_lowest == $i_test
18736                && $token_lengths_to_go[$i_test] > 1
18737                && $next_nonblank_type =~ /^[\;\,]$/ )
18738            {
18739                $too_long = 0;
18740            }
18741
18742            last
18743              if (
18744                ( $i_test == $imax )    # we're done if no more tokens,
18745                || (
18746                    ( $i_lowest >= 0 )    # or no more space and we have a break
18747                    && $too_long
18748                )
18749              );
18750        }
18751
18752        #-------------------------------------------------------
18753        # END of inner loop to find the best next breakpoint
18754        # Now decide exactly where to put the breakpoint
18755        #-------------------------------------------------------
18756
18757        # it's always ok to break at imax if no other break was found
18758        if ( $i_lowest < 0 ) { $i_lowest = $imax }
18759
18760        # semi-final index calculation
18761        my $i_next_nonblank     = $inext_to_go[$i_lowest];
18762        my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
18763        my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18764
18765        #-------------------------------------------------------
18766        # ?/: rule 1 : if a break here will separate a '?' on this
18767        # line from its closing ':', then break at the '?' instead.
18768        #-------------------------------------------------------
18769        my $i;
18770        foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
18771            next unless ( $tokens_to_go[$i] eq '?' );
18772
18773            # do not break if probable sequence of ?/: statements
18774            next if ($is_colon_chain);
18775
18776            # do not break if statement is broken by side comment
18777            next
18778              if (
18779                $tokens_to_go[$max_index_to_go] eq '#'
18780                && terminal_type( \@types_to_go, \@block_type_to_go, 0,
18781                    $max_index_to_go ) !~ /^[\;\}]$/
18782              );
18783
18784            # no break needed if matching : is also on the line
18785            next
18786              if ( $mate_index_to_go[$i] >= 0
18787                && $mate_index_to_go[$i] <= $i_next_nonblank );
18788
18789            $i_lowest = $i;
18790            if ( $want_break_before{'?'} ) { $i_lowest-- }
18791            last;
18792        }
18793
18794        #-------------------------------------------------------
18795        # END of inner loop to find the best next breakpoint:
18796        # Break the line after the token with index i=$i_lowest
18797        #-------------------------------------------------------
18798
18799        # final index calculation
18800        $i_next_nonblank     = $inext_to_go[$i_lowest];
18801        $next_nonblank_type  = $types_to_go[$i_next_nonblank];
18802        $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
18803
18804        FORMATTER_DEBUG_FLAG_BREAK
18805          && print STDOUT
18806          "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
18807
18808        #-------------------------------------------------------
18809        # ?/: rule 2 : if we break at a '?', then break at its ':'
18810        #
18811        # Note: this rule is also in sub scan_list to handle a break
18812        # at the start and end of a line (in case breaks are dictated
18813        # by side comments).
18814        #-------------------------------------------------------
18815        if ( $next_nonblank_type eq '?' ) {
18816            set_closing_breakpoint($i_next_nonblank);
18817        }
18818        elsif ( $types_to_go[$i_lowest] eq '?' ) {
18819            set_closing_breakpoint($i_lowest);
18820        }
18821
18822        #-------------------------------------------------------
18823        # ?/: rule 3 : if we break at a ':' then we save
18824        # its location for further work below.  We may need to go
18825        # back and break at its '?'.
18826        #-------------------------------------------------------
18827        if ( $next_nonblank_type eq ':' ) {
18828            push @i_colon_breaks, $i_next_nonblank;
18829        }
18830        elsif ( $types_to_go[$i_lowest] eq ':' ) {
18831            push @i_colon_breaks, $i_lowest;
18832        }
18833
18834        # here we should set breaks for all '?'/':' pairs which are
18835        # separated by this line
18836
18837        $line_count++;
18838
18839        # save this line segment, after trimming blanks at the ends
18840        push( @i_first,
18841            ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
18842        push( @i_last,
18843            ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
18844
18845        # set a forced breakpoint at a container opening, if necessary, to
18846        # signal a break at a closing container.  Excepting '(' for now.
18847        if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
18848            && !$forced_breakpoint_to_go[$i_lowest] )
18849        {
18850            set_closing_breakpoint($i_lowest);
18851        }
18852
18853        # get ready to go again
18854        $i_begin                 = $i_lowest + 1;
18855        $last_break_strength     = $lowest_strength;
18856        $i_last_break            = $i_lowest;
18857        $leading_alignment_token = "";
18858        $leading_alignment_type  = "";
18859        $lowest_next_token       = '';
18860        $lowest_next_type        = 'b';
18861
18862        if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
18863            $i_begin++;
18864        }
18865
18866        # update indentation size
18867        if ( $i_begin <= $imax ) {
18868            $leading_spaces = leading_spaces_to_go($i_begin);
18869        }
18870    }
18871
18872    #-------------------------------------------------------
18873    # END of main loop to set continuation breakpoints
18874    # Now go back and make any necessary corrections
18875    #-------------------------------------------------------
18876
18877    #-------------------------------------------------------
18878    # ?/: rule 4 -- if we broke at a ':', then break at
18879    # corresponding '?' unless this is a chain of ?: expressions
18880    #-------------------------------------------------------
18881    if (@i_colon_breaks) {
18882
18883        # using a simple method for deciding if we are in a ?/: chain --
18884        # this is a chain if it has multiple ?/: pairs all in order;
18885        # otherwise not.
18886        # Note that if line starts in a ':' we count that above as a break
18887        my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
18888
18889        unless ($is_chain) {
18890            my @insert_list = ();
18891            foreach (@i_colon_breaks) {
18892                my $i_question = $mate_index_to_go[$_];
18893                if ( $i_question >= 0 ) {
18894                    if ( $want_break_before{'?'} ) {
18895                        $i_question = $iprev_to_go[$i_question];
18896                    }
18897
18898                    if ( $i_question >= 0 ) {
18899                        push @insert_list, $i_question;
18900                    }
18901                }
18902                insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
18903            }
18904        }
18905    }
18906    return ( \@i_first, \@i_last, $colon_count );
18907}
18908
18909sub insert_additional_breaks {
18910
18911    # this routine will add line breaks at requested locations after
18912    # sub set_continuation_breaks has made preliminary breaks.
18913
18914    my ( $ri_break_list, $ri_first, $ri_last ) = @_;
18915    my $i_f;
18916    my $i_l;
18917    my $line_number = 0;
18918    my $i_break_left;
18919    foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
18920
18921        $i_f = $$ri_first[$line_number];
18922        $i_l = $$ri_last[$line_number];
18923        while ( $i_break_left >= $i_l ) {
18924            $line_number++;
18925
18926            # shouldn't happen unless caller passes bad indexes
18927            if ( $line_number >= @$ri_last ) {
18928                warning(
18929"Non-fatal program bug: couldn't set break at $i_break_left\n"
18930                );
18931                report_definite_bug();
18932                return;
18933            }
18934            $i_f = $$ri_first[$line_number];
18935            $i_l = $$ri_last[$line_number];
18936        }
18937
18938        # Do not leave a blank at the end of a line; back up if necessary
18939        if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
18940
18941        my $i_break_right = $inext_to_go[$i_break_left];
18942        if (   $i_break_left >= $i_f
18943            && $i_break_left < $i_l
18944            && $i_break_right > $i_f
18945            && $i_break_right <= $i_l )
18946        {
18947            splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
18948            splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
18949        }
18950    }
18951}
18952
18953sub set_closing_breakpoint {
18954
18955    # set a breakpoint at a matching closing token
18956    # at present, this is only used to break at a ':' which matches a '?'
18957    my $i_break = shift;
18958
18959    if ( $mate_index_to_go[$i_break] >= 0 ) {
18960
18961        # CAUTION: infinite recursion possible here:
18962        #   set_closing_breakpoint calls set_forced_breakpoint, and
18963        #   set_forced_breakpoint call set_closing_breakpoint
18964        #   ( test files attrib.t, BasicLyx.pm.html).
18965        # Don't reduce the '2' in the statement below
18966        if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
18967
18968            # break before } ] and ), but sub set_forced_breakpoint will decide
18969            # to break before or after a ? and :
18970            my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
18971            set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
18972        }
18973    }
18974    else {
18975        my $type_sequence = $type_sequence_to_go[$i_break];
18976        if ($type_sequence) {
18977            my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
18978            $postponed_breakpoint{$type_sequence} = 1;
18979        }
18980    }
18981}
18982
18983sub compare_indentation_levels {
18984
18985    # check to see if output line tabbing agrees with input line
18986    # this can be very useful for debugging a script which has an extra
18987    # or missing brace
18988    my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
18989    if ( $guessed_indentation_level ne $structural_indentation_level ) {
18990        $last_tabbing_disagreement = $input_line_number;
18991
18992        if ($in_tabbing_disagreement) {
18993        }
18994        else {
18995            $tabbing_disagreement_count++;
18996
18997            if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
18998                write_logfile_entry(
18999"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
19000                );
19001            }
19002            $in_tabbing_disagreement    = $input_line_number;
19003            $first_tabbing_disagreement = $in_tabbing_disagreement
19004              unless ($first_tabbing_disagreement);
19005        }
19006    }
19007    else {
19008
19009        if ($in_tabbing_disagreement) {
19010
19011            if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
19012                write_logfile_entry(
19013"End indentation disagreement from input line $in_tabbing_disagreement\n"
19014                );
19015
19016                if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
19017                    write_logfile_entry(
19018                        "No further tabbing disagreements will be noted\n");
19019                }
19020            }
19021            $in_tabbing_disagreement = 0;
19022        }
19023    }
19024}
19025
19026#####################################################################
19027#
19028# the Perl::Tidy::IndentationItem class supplies items which contain
19029# how much whitespace should be used at the start of a line
19030#
19031#####################################################################
19032
19033package Perl::Tidy::IndentationItem;
19034
19035# Indexes for indentation items
19036use constant SPACES             => 0;     # total leading white spaces
19037use constant LEVEL              => 1;     # the indentation 'level'
19038use constant CI_LEVEL           => 2;     # the 'continuation level'
19039use constant AVAILABLE_SPACES   => 3;     # how many left spaces available
19040                                          # for this level
19041use constant CLOSED             => 4;     # index where we saw closing '}'
19042use constant COMMA_COUNT        => 5;     # how many commas at this level?
19043use constant SEQUENCE_NUMBER    => 6;     # output batch number
19044use constant INDEX              => 7;     # index in output batch list
19045use constant HAVE_CHILD         => 8;     # any dependents?
19046use constant RECOVERABLE_SPACES => 9;     # how many spaces to the right
19047                                          # we would like to move to get
19048                                          # alignment (negative if left)
19049use constant ALIGN_PAREN        => 10;    # do we want to try to align
19050                                          # with an opening structure?
19051use constant MARKED             => 11;    # if visited by corrector logic
19052use constant STACK_DEPTH        => 12;    # indentation nesting depth
19053use constant STARTING_INDEX     => 13;    # first token index of this level
19054use constant ARROW_COUNT        => 14;    # how many =>'s
19055
19056sub new {
19057
19058    # Create an 'indentation_item' which describes one level of leading
19059    # whitespace when the '-lp' indentation is used.  We return
19060    # a reference to an anonymous array of associated variables.
19061    # See above constants for storage scheme.
19062    my (
19063        $class,               $spaces,           $level,
19064        $ci_level,            $available_spaces, $index,
19065        $gnu_sequence_number, $align_paren,      $stack_depth,
19066        $starting_index,
19067    ) = @_;
19068    my $closed            = -1;
19069    my $arrow_count       = 0;
19070    my $comma_count       = 0;
19071    my $have_child        = 0;
19072    my $want_right_spaces = 0;
19073    my $marked            = 0;
19074    bless [
19075        $spaces,              $level,          $ci_level,
19076        $available_spaces,    $closed,         $comma_count,
19077        $gnu_sequence_number, $index,          $have_child,
19078        $want_right_spaces,   $align_paren,    $marked,
19079        $stack_depth,         $starting_index, $arrow_count,
19080    ], $class;
19081}
19082
19083sub permanently_decrease_AVAILABLE_SPACES {
19084
19085    # make a permanent reduction in the available indentation spaces
19086    # at one indentation item.  NOTE: if there are child nodes, their
19087    # total SPACES must be reduced by the caller.
19088
19089    my ( $item, $spaces_needed ) = @_;
19090    my $available_spaces = $item->get_AVAILABLE_SPACES();
19091    my $deleted_spaces =
19092      ( $available_spaces > $spaces_needed )
19093      ? $spaces_needed
19094      : $available_spaces;
19095    $item->decrease_AVAILABLE_SPACES($deleted_spaces);
19096    $item->decrease_SPACES($deleted_spaces);
19097    $item->set_RECOVERABLE_SPACES(0);
19098
19099    return $deleted_spaces;
19100}
19101
19102sub tentatively_decrease_AVAILABLE_SPACES {
19103
19104    # We are asked to tentatively delete $spaces_needed of indentation
19105    # for a indentation item.  We may want to undo this later.  NOTE: if
19106    # there are child nodes, their total SPACES must be reduced by the
19107    # caller.
19108    my ( $item, $spaces_needed ) = @_;
19109    my $available_spaces = $item->get_AVAILABLE_SPACES();
19110    my $deleted_spaces =
19111      ( $available_spaces > $spaces_needed )
19112      ? $spaces_needed
19113      : $available_spaces;
19114    $item->decrease_AVAILABLE_SPACES($deleted_spaces);
19115    $item->decrease_SPACES($deleted_spaces);
19116    $item->increase_RECOVERABLE_SPACES($deleted_spaces);
19117    return $deleted_spaces;
19118}
19119
19120sub get_STACK_DEPTH {
19121    my $self = shift;
19122    return $self->[STACK_DEPTH];
19123}
19124
19125sub get_SPACES {
19126    my $self = shift;
19127    return $self->[SPACES];
19128}
19129
19130sub get_MARKED {
19131    my $self = shift;
19132    return $self->[MARKED];
19133}
19134
19135sub set_MARKED {
19136    my ( $self, $value ) = @_;
19137    if ( defined($value) ) {
19138        $self->[MARKED] = $value;
19139    }
19140    return $self->[MARKED];
19141}
19142
19143sub get_AVAILABLE_SPACES {
19144    my $self = shift;
19145    return $self->[AVAILABLE_SPACES];
19146}
19147
19148sub decrease_SPACES {
19149    my ( $self, $value ) = @_;
19150    if ( defined($value) ) {
19151        $self->[SPACES] -= $value;
19152    }
19153    return $self->[SPACES];
19154}
19155
19156sub decrease_AVAILABLE_SPACES {
19157    my ( $self, $value ) = @_;
19158    if ( defined($value) ) {
19159        $self->[AVAILABLE_SPACES] -= $value;
19160    }
19161    return $self->[AVAILABLE_SPACES];
19162}
19163
19164sub get_ALIGN_PAREN {
19165    my $self = shift;
19166    return $self->[ALIGN_PAREN];
19167}
19168
19169sub get_RECOVERABLE_SPACES {
19170    my $self = shift;
19171    return $self->[RECOVERABLE_SPACES];
19172}
19173
19174sub set_RECOVERABLE_SPACES {
19175    my ( $self, $value ) = @_;
19176    if ( defined($value) ) {
19177        $self->[RECOVERABLE_SPACES] = $value;
19178    }
19179    return $self->[RECOVERABLE_SPACES];
19180}
19181
19182sub increase_RECOVERABLE_SPACES {
19183    my ( $self, $value ) = @_;
19184    if ( defined($value) ) {
19185        $self->[RECOVERABLE_SPACES] += $value;
19186    }
19187    return $self->[RECOVERABLE_SPACES];
19188}
19189
19190sub get_CI_LEVEL {
19191    my $self = shift;
19192    return $self->[CI_LEVEL];
19193}
19194
19195sub get_LEVEL {
19196    my $self = shift;
19197    return $self->[LEVEL];
19198}
19199
19200sub get_SEQUENCE_NUMBER {
19201    my $self = shift;
19202    return $self->[SEQUENCE_NUMBER];
19203}
19204
19205sub get_INDEX {
19206    my $self = shift;
19207    return $self->[INDEX];
19208}
19209
19210sub get_STARTING_INDEX {
19211    my $self = shift;
19212    return $self->[STARTING_INDEX];
19213}
19214
19215sub set_HAVE_CHILD {
19216    my ( $self, $value ) = @_;
19217    if ( defined($value) ) {
19218        $self->[HAVE_CHILD] = $value;
19219    }
19220    return $self->[HAVE_CHILD];
19221}
19222
19223sub get_HAVE_CHILD {
19224    my $self = shift;
19225    return $self->[HAVE_CHILD];
19226}
19227
19228sub set_ARROW_COUNT {
19229    my ( $self, $value ) = @_;
19230    if ( defined($value) ) {
19231        $self->[ARROW_COUNT] = $value;
19232    }
19233    return $self->[ARROW_COUNT];
19234}
19235
19236sub get_ARROW_COUNT {
19237    my $self = shift;
19238    return $self->[ARROW_COUNT];
19239}
19240
19241sub set_COMMA_COUNT {
19242    my ( $self, $value ) = @_;
19243    if ( defined($value) ) {
19244        $self->[COMMA_COUNT] = $value;
19245    }
19246    return $self->[COMMA_COUNT];
19247}
19248
19249sub get_COMMA_COUNT {
19250    my $self = shift;
19251    return $self->[COMMA_COUNT];
19252}
19253
19254sub set_CLOSED {
19255    my ( $self, $value ) = @_;
19256    if ( defined($value) ) {
19257        $self->[CLOSED] = $value;
19258    }
19259    return $self->[CLOSED];
19260}
19261
19262sub get_CLOSED {
19263    my $self = shift;
19264    return $self->[CLOSED];
19265}
19266
19267#####################################################################
19268#
19269# the Perl::Tidy::VerticalAligner::Line class supplies an object to
19270# contain a single output line
19271#
19272#####################################################################
19273
19274package Perl::Tidy::VerticalAligner::Line;
19275
19276{
19277
19278    use strict;
19279    use Carp;
19280
19281    use constant JMAX                      => 0;
19282    use constant JMAX_ORIGINAL_LINE        => 1;
19283    use constant RTOKENS                   => 2;
19284    use constant RFIELDS                   => 3;
19285    use constant RPATTERNS                 => 4;
19286    use constant INDENTATION               => 5;
19287    use constant LEADING_SPACE_COUNT       => 6;
19288    use constant OUTDENT_LONG_LINES        => 7;
19289    use constant LIST_TYPE                 => 8;
19290    use constant IS_HANGING_SIDE_COMMENT   => 9;
19291    use constant RALIGNMENTS               => 10;
19292    use constant MAXIMUM_LINE_LENGTH       => 11;
19293    use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
19294
19295    my %_index_map;
19296    $_index_map{jmax}                      = JMAX;
19297    $_index_map{jmax_original_line}        = JMAX_ORIGINAL_LINE;
19298    $_index_map{rtokens}                   = RTOKENS;
19299    $_index_map{rfields}                   = RFIELDS;
19300    $_index_map{rpatterns}                 = RPATTERNS;
19301    $_index_map{indentation}               = INDENTATION;
19302    $_index_map{leading_space_count}       = LEADING_SPACE_COUNT;
19303    $_index_map{outdent_long_lines}        = OUTDENT_LONG_LINES;
19304    $_index_map{list_type}                 = LIST_TYPE;
19305    $_index_map{is_hanging_side_comment}   = IS_HANGING_SIDE_COMMENT;
19306    $_index_map{ralignments}               = RALIGNMENTS;
19307    $_index_map{maximum_line_length}       = MAXIMUM_LINE_LENGTH;
19308    $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
19309
19310    my @_default_data = ();
19311    $_default_data[JMAX]                      = undef;
19312    $_default_data[JMAX_ORIGINAL_LINE]        = undef;
19313    $_default_data[RTOKENS]                   = undef;
19314    $_default_data[RFIELDS]                   = undef;
19315    $_default_data[RPATTERNS]                 = undef;
19316    $_default_data[INDENTATION]               = undef;
19317    $_default_data[LEADING_SPACE_COUNT]       = undef;
19318    $_default_data[OUTDENT_LONG_LINES]        = undef;
19319    $_default_data[LIST_TYPE]                 = undef;
19320    $_default_data[IS_HANGING_SIDE_COMMENT]   = undef;
19321    $_default_data[RALIGNMENTS]               = [];
19322    $_default_data[MAXIMUM_LINE_LENGTH]       = undef;
19323    $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
19324
19325    {
19326
19327        # methods to count object population
19328        my $_count = 0;
19329        sub get_count        { $_count; }
19330        sub _increment_count { ++$_count }
19331        sub _decrement_count { --$_count }
19332    }
19333
19334    # Constructor may be called as a class method
19335    sub new {
19336        my ( $caller, %arg ) = @_;
19337        my $caller_is_obj = ref($caller);
19338        my $class = $caller_is_obj || $caller;
19339        no strict "refs";
19340        my $self = bless [], $class;
19341
19342        $self->[RALIGNMENTS] = [];
19343
19344        my $index;
19345        foreach ( keys %_index_map ) {
19346            $index = $_index_map{$_};
19347            if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
19348            elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
19349            else { $self->[$index] = $_default_data[$index] }
19350        }
19351
19352        $self->_increment_count();
19353        return $self;
19354    }
19355
19356    sub DESTROY {
19357        $_[0]->_decrement_count();
19358    }
19359
19360    sub get_jmax                      { $_[0]->[JMAX] }
19361    sub get_jmax_original_line        { $_[0]->[JMAX_ORIGINAL_LINE] }
19362    sub get_rtokens                   { $_[0]->[RTOKENS] }
19363    sub get_rfields                   { $_[0]->[RFIELDS] }
19364    sub get_rpatterns                 { $_[0]->[RPATTERNS] }
19365    sub get_indentation               { $_[0]->[INDENTATION] }
19366    sub get_leading_space_count       { $_[0]->[LEADING_SPACE_COUNT] }
19367    sub get_outdent_long_lines        { $_[0]->[OUTDENT_LONG_LINES] }
19368    sub get_list_type                 { $_[0]->[LIST_TYPE] }
19369    sub get_is_hanging_side_comment   { $_[0]->[IS_HANGING_SIDE_COMMENT] }
19370    sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
19371
19372    sub set_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
19373    sub get_alignment  { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
19374    sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
19375    sub get_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
19376
19377    sub get_starting_column {
19378        $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
19379    }
19380
19381    sub increment_column {
19382        $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
19383    }
19384    sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
19385
19386    sub current_field_width {
19387        my $self = shift;
19388        my ($j) = @_;
19389        if ( $j == 0 ) {
19390            return $self->get_column($j);
19391        }
19392        else {
19393            return $self->get_column($j) - $self->get_column( $j - 1 );
19394        }
19395    }
19396
19397    sub field_width_growth {
19398        my $self = shift;
19399        my $j    = shift;
19400        return $self->get_column($j) - $self->get_starting_column($j);
19401    }
19402
19403    sub starting_field_width {
19404        my $self = shift;
19405        my $j    = shift;
19406        if ( $j == 0 ) {
19407            return $self->get_starting_column($j);
19408        }
19409        else {
19410            return $self->get_starting_column($j) -
19411              $self->get_starting_column( $j - 1 );
19412        }
19413    }
19414
19415    sub increase_field_width {
19416
19417        my $self = shift;
19418        my ( $j, $pad ) = @_;
19419        my $jmax = $self->get_jmax();
19420        for my $k ( $j .. $jmax ) {
19421            $self->increment_column( $k, $pad );
19422        }
19423    }
19424
19425    sub get_available_space_on_right {
19426        my $self = shift;
19427        my $jmax = $self->get_jmax();
19428        return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
19429    }
19430
19431    sub set_jmax                    { $_[0]->[JMAX]                    = $_[1] }
19432    sub set_jmax_original_line      { $_[0]->[JMAX_ORIGINAL_LINE]      = $_[1] }
19433    sub set_rtokens                 { $_[0]->[RTOKENS]                 = $_[1] }
19434    sub set_rfields                 { $_[0]->[RFIELDS]                 = $_[1] }
19435    sub set_rpatterns               { $_[0]->[RPATTERNS]               = $_[1] }
19436    sub set_indentation             { $_[0]->[INDENTATION]             = $_[1] }
19437    sub set_leading_space_count     { $_[0]->[LEADING_SPACE_COUNT]     = $_[1] }
19438    sub set_outdent_long_lines      { $_[0]->[OUTDENT_LONG_LINES]      = $_[1] }
19439    sub set_list_type               { $_[0]->[LIST_TYPE]               = $_[1] }
19440    sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
19441    sub set_alignment               { $_[0]->[RALIGNMENTS]->[ $_[1] ]  = $_[2] }
19442
19443}
19444
19445#####################################################################
19446#
19447# the Perl::Tidy::VerticalAligner::Alignment class holds information
19448# on a single column being aligned
19449#
19450#####################################################################
19451package Perl::Tidy::VerticalAligner::Alignment;
19452
19453{
19454
19455    use strict;
19456
19457    #use Carp;
19458
19459    # Symbolic array indexes
19460    use constant COLUMN          => 0;    # the current column number
19461    use constant STARTING_COLUMN => 1;    # column number when created
19462    use constant MATCHING_TOKEN  => 2;    # what token we are matching
19463    use constant STARTING_LINE   => 3;    # the line index of creation
19464    use constant ENDING_LINE     => 4;    # the most recent line to use it
19465    use constant SAVED_COLUMN    => 5;    # the most recent line to use it
19466    use constant SERIAL_NUMBER   => 6;    # unique number for this alignment
19467                                          # (just its index in an array)
19468
19469    # Correspondence between variables and array indexes
19470    my %_index_map;
19471    $_index_map{column}          = COLUMN;
19472    $_index_map{starting_column} = STARTING_COLUMN;
19473    $_index_map{matching_token}  = MATCHING_TOKEN;
19474    $_index_map{starting_line}   = STARTING_LINE;
19475    $_index_map{ending_line}     = ENDING_LINE;
19476    $_index_map{saved_column}    = SAVED_COLUMN;
19477    $_index_map{serial_number}   = SERIAL_NUMBER;
19478
19479    my @_default_data = ();
19480    $_default_data[COLUMN]          = undef;
19481    $_default_data[STARTING_COLUMN] = undef;
19482    $_default_data[MATCHING_TOKEN]  = undef;
19483    $_default_data[STARTING_LINE]   = undef;
19484    $_default_data[ENDING_LINE]     = undef;
19485    $_default_data[SAVED_COLUMN]    = undef;
19486    $_default_data[SERIAL_NUMBER]   = undef;
19487
19488    # class population count
19489    {
19490        my $_count = 0;
19491        sub get_count        { $_count; }
19492        sub _increment_count { ++$_count }
19493        sub _decrement_count { --$_count }
19494    }
19495
19496    # constructor
19497    sub new {
19498        my ( $caller, %arg ) = @_;
19499        my $caller_is_obj = ref($caller);
19500        my $class = $caller_is_obj || $caller;
19501        no strict "refs";
19502        my $self = bless [], $class;
19503
19504        foreach ( keys %_index_map ) {
19505            my $index = $_index_map{$_};
19506            if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
19507            elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
19508            else { $self->[$index] = $_default_data[$index] }
19509        }
19510        $self->_increment_count();
19511        return $self;
19512    }
19513
19514    sub DESTROY {
19515        $_[0]->_decrement_count();
19516    }
19517
19518    sub get_column          { return $_[0]->[COLUMN] }
19519    sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
19520    sub get_matching_token  { return $_[0]->[MATCHING_TOKEN] }
19521    sub get_starting_line   { return $_[0]->[STARTING_LINE] }
19522    sub get_ending_line     { return $_[0]->[ENDING_LINE] }
19523    sub get_serial_number   { return $_[0]->[SERIAL_NUMBER] }
19524
19525    sub set_column          { $_[0]->[COLUMN]          = $_[1] }
19526    sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
19527    sub set_matching_token  { $_[0]->[MATCHING_TOKEN]  = $_[1] }
19528    sub set_starting_line   { $_[0]->[STARTING_LINE]   = $_[1] }
19529    sub set_ending_line     { $_[0]->[ENDING_LINE]     = $_[1] }
19530    sub increment_column { $_[0]->[COLUMN] += $_[1] }
19531
19532    sub save_column    { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
19533    sub restore_column { $_[0]->[COLUMN]       = $_[0]->[SAVED_COLUMN] }
19534
19535}
19536
19537package Perl::Tidy::VerticalAligner;
19538
19539# The Perl::Tidy::VerticalAligner package collects output lines and
19540# attempts to line up certain common tokens, such as => and #, which are
19541# identified by the calling routine.
19542#
19543# There are two main routines: valign_input and flush.  Append acts as a
19544# storage buffer, collecting lines into a group which can be vertically
19545# aligned.  When alignment is no longer possible or desirable, it dumps
19546# the group to flush.
19547#
19548#     valign_input -----> flush
19549#
19550#     collects          writes
19551#     vertical          one
19552#     groups            group
19553
19554BEGIN {
19555
19556    # Caution: these debug flags produce a lot of output
19557    # They should all be 0 except when debugging small scripts
19558
19559    use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
19560    use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
19561    use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
19562    use constant VALIGN_DEBUG_FLAG_TABS    => 0;
19563
19564    my $debug_warning = sub {
19565        print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
19566    };
19567
19568    VALIGN_DEBUG_FLAG_APPEND  && $debug_warning->('APPEND');
19569    VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
19570    VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY');
19571    VALIGN_DEBUG_FLAG_TABS    && $debug_warning->('TABS');
19572
19573}
19574
19575use vars qw(
19576  $vertical_aligner_self
19577  $current_line
19578  $maximum_alignment_index
19579  $ralignment_list
19580  $maximum_jmax_seen
19581  $minimum_jmax_seen
19582  $previous_minimum_jmax_seen
19583  $previous_maximum_jmax_seen
19584  $maximum_line_index
19585  $group_level
19586  $group_type
19587  $group_maximum_gap
19588  $marginal_match
19589  $last_level_written
19590  $last_leading_space_count
19591  $extra_indent_ok
19592  $zero_count
19593  @group_lines
19594  $last_comment_column
19595  $last_side_comment_line_number
19596  $last_side_comment_length
19597  $last_side_comment_level
19598  $outdented_line_count
19599  $first_outdented_line_at
19600  $last_outdented_line_at
19601  $diagnostics_object
19602  $logger_object
19603  $file_writer_object
19604  @side_comment_history
19605  $comment_leading_space_count
19606  $is_matching_terminal_line
19607  $consecutive_block_comments
19608
19609  $cached_line_text
19610  $cached_line_type
19611  $cached_line_flag
19612  $cached_seqno
19613  $cached_line_valid
19614  $cached_line_leading_space_count
19615  $cached_seqno_string
19616
19617  $valign_buffer_filling
19618  @valign_buffer
19619
19620  $seqno_string
19621  $last_nonblank_seqno_string
19622
19623  $rOpts
19624
19625  $rOpts_maximum_line_length
19626  $rOpts_variable_maximum_line_length
19627  $rOpts_continuation_indentation
19628  $rOpts_indent_columns
19629  $rOpts_tabs
19630  $rOpts_entab_leading_whitespace
19631  $rOpts_valign
19632
19633  $rOpts_fixed_position_side_comment
19634  $rOpts_minimum_space_to_comment
19635
19636);
19637
19638sub initialize {
19639
19640    my $class;
19641
19642    ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
19643      = @_;
19644
19645    # variables describing the entire space group:
19646    $ralignment_list            = [];
19647    $group_level                = 0;
19648    $last_level_written         = -1;
19649    $extra_indent_ok            = 0;    # can we move all lines to the right?
19650    $last_side_comment_length   = 0;
19651    $maximum_jmax_seen          = 0;
19652    $minimum_jmax_seen          = 0;
19653    $previous_minimum_jmax_seen = 0;
19654    $previous_maximum_jmax_seen = 0;
19655
19656    # variables describing each line of the group
19657    @group_lines = ();                  # list of all lines in group
19658
19659    $outdented_line_count          = 0;
19660    $first_outdented_line_at       = 0;
19661    $last_outdented_line_at        = 0;
19662    $last_side_comment_line_number = 0;
19663    $last_side_comment_level       = -1;
19664    $is_matching_terminal_line     = 0;
19665
19666    # most recent 3 side comments; [ line number, column ]
19667    $side_comment_history[0] = [ -300, 0 ];
19668    $side_comment_history[1] = [ -200, 0 ];
19669    $side_comment_history[2] = [ -100, 0 ];
19670
19671    # valign_output_step_B cache:
19672    $cached_line_text                = "";
19673    $cached_line_type                = 0;
19674    $cached_line_flag                = 0;
19675    $cached_seqno                    = 0;
19676    $cached_line_valid               = 0;
19677    $cached_line_leading_space_count = 0;
19678    $cached_seqno_string             = "";
19679
19680    # string of sequence numbers joined together
19681    $seqno_string               = "";
19682    $last_nonblank_seqno_string = "";
19683
19684    # frequently used parameters
19685    $rOpts_indent_columns           = $rOpts->{'indent-columns'};
19686    $rOpts_tabs                     = $rOpts->{'tabs'};
19687    $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
19688    $rOpts_fixed_position_side_comment =
19689      $rOpts->{'fixed-position-side-comment'};
19690    $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
19691    $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
19692    $rOpts_variable_maximum_line_length =
19693      $rOpts->{'variable-maximum-line-length'};
19694    $rOpts_valign = $rOpts->{'valign'};
19695
19696    $consecutive_block_comments = 0;
19697    forget_side_comment();
19698
19699    initialize_for_new_group();
19700
19701    $vertical_aligner_self = {};
19702    bless $vertical_aligner_self, $class;
19703    return $vertical_aligner_self;
19704}
19705
19706sub initialize_for_new_group {
19707    $maximum_line_index      = -1;      # lines in the current group
19708    $maximum_alignment_index = -1;      # alignments in current group
19709    $zero_count              = 0;       # count consecutive lines without tokens
19710    $current_line            = undef;   # line being matched for alignment
19711    $group_maximum_gap       = 0;       # largest gap introduced
19712    $group_type              = "";
19713    $marginal_match          = 0;
19714    $comment_leading_space_count = 0;
19715    $last_leading_space_count    = 0;
19716}
19717
19718# interface to Perl::Tidy::Diagnostics routines
19719sub write_diagnostics {
19720    if ($diagnostics_object) {
19721        $diagnostics_object->write_diagnostics(@_);
19722    }
19723}
19724
19725# interface to Perl::Tidy::Logger routines
19726sub warning {
19727    if ($logger_object) {
19728        $logger_object->warning(@_);
19729    }
19730}
19731
19732sub write_logfile_entry {
19733    if ($logger_object) {
19734        $logger_object->write_logfile_entry(@_);
19735    }
19736}
19737
19738sub report_definite_bug {
19739    if ($logger_object) {
19740        $logger_object->report_definite_bug();
19741    }
19742}
19743
19744sub get_SPACES {
19745
19746    # return the number of leading spaces associated with an indentation
19747    # variable $indentation is either a constant number of spaces or an
19748    # object with a get_SPACES method.
19749    my $indentation = shift;
19750    return ref($indentation) ? $indentation->get_SPACES() : $indentation;
19751}
19752
19753sub get_RECOVERABLE_SPACES {
19754
19755    # return the number of spaces (+ means shift right, - means shift left)
19756    # that we would like to shift a group of lines with the same indentation
19757    # to get them to line up with their opening parens
19758    my $indentation = shift;
19759    return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
19760}
19761
19762sub get_STACK_DEPTH {
19763
19764    my $indentation = shift;
19765    return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
19766}
19767
19768sub make_alignment {
19769    my ( $col, $token ) = @_;
19770
19771    # make one new alignment at column $col which aligns token $token
19772    ++$maximum_alignment_index;
19773    my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
19774        column          => $col,
19775        starting_column => $col,
19776        matching_token  => $token,
19777        starting_line   => $maximum_line_index,
19778        ending_line     => $maximum_line_index,
19779        serial_number   => $maximum_alignment_index,
19780    );
19781    $ralignment_list->[$maximum_alignment_index] = $alignment;
19782    return $alignment;
19783}
19784
19785sub dump_alignments {
19786    print STDOUT
19787"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
19788    for my $i ( 0 .. $maximum_alignment_index ) {
19789        my $column          = $ralignment_list->[$i]->get_column();
19790        my $starting_column = $ralignment_list->[$i]->get_starting_column();
19791        my $matching_token  = $ralignment_list->[$i]->get_matching_token();
19792        my $starting_line   = $ralignment_list->[$i]->get_starting_line();
19793        my $ending_line     = $ralignment_list->[$i]->get_ending_line();
19794        print STDOUT
19795"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
19796    }
19797}
19798
19799sub save_alignment_columns {
19800    for my $i ( 0 .. $maximum_alignment_index ) {
19801        $ralignment_list->[$i]->save_column();
19802    }
19803}
19804
19805sub restore_alignment_columns {
19806    for my $i ( 0 .. $maximum_alignment_index ) {
19807        $ralignment_list->[$i]->restore_column();
19808    }
19809}
19810
19811sub forget_side_comment {
19812    $last_comment_column = 0;
19813}
19814
19815sub maximum_line_length_for_level {
19816
19817    # return maximum line length for line starting with a given level
19818    my $maximum_line_length = $rOpts_maximum_line_length;
19819    if ($rOpts_variable_maximum_line_length) {
19820        my $level = shift;
19821        if ( $level < 0 ) { $level = 0 }
19822        $maximum_line_length += $level * $rOpts_indent_columns;
19823    }
19824    return $maximum_line_length;
19825}
19826
19827sub valign_input {
19828
19829    # Place one line in the current vertical group.
19830    #
19831    # The input parameters are:
19832    #     $level = indentation level of this line
19833    #     $rfields = reference to array of fields
19834    #     $rpatterns = reference to array of patterns, one per field
19835    #     $rtokens   = reference to array of tokens starting fields 1,2,..
19836    #
19837    # Here is an example of what this package does.  In this example,
19838    # we are trying to line up both the '=>' and the '#'.
19839    #
19840    #         '18' => 'grave',    #   \`
19841    #         '19' => 'acute',    #   `'
19842    #         '20' => 'caron',    #   \v
19843    # <-tabs-><f1-><--field 2 ---><-f3->
19844    # |            |              |    |
19845    # |            |              |    |
19846    # col1        col2         col3 col4
19847    #
19848    # The calling routine has already broken the entire line into 3 fields as
19849    # indicated.  (So the work of identifying promising common tokens has
19850    # already been done).
19851    #
19852    # In this example, there will be 2 tokens being matched: '=>' and '#'.
19853    # They are the leading parts of fields 2 and 3, but we do need to know
19854    # what they are so that we can dump a group of lines when these tokens
19855    # change.
19856    #
19857    # The fields contain the actual characters of each field.  The patterns
19858    # are like the fields, but they contain mainly token types instead
19859    # of tokens, so they have fewer characters.  They are used to be
19860    # sure we are matching fields of similar type.
19861    #
19862    # In this example, there will be 4 column indexes being adjusted.  The
19863    # first one is always at zero.  The interior columns are at the start of
19864    # the matching tokens, and the last one tracks the maximum line length.
19865    #
19866    # Each time a new line comes in, it joins the current vertical
19867    # group if possible.  Otherwise it causes the current group to be dumped
19868    # and a new group is started.
19869    #
19870    # For each new group member, the column locations are increased, as
19871    # necessary, to make room for the new fields.  When the group is finally
19872    # output, these column numbers are used to compute the amount of spaces of
19873    # padding needed for each field.
19874    #
19875    # Programming note: the fields are assumed not to have any tab characters.
19876    # Tabs have been previously removed except for tabs in quoted strings and
19877    # side comments.  Tabs in these fields can mess up the column counting.
19878    # The log file warns the user if there are any such tabs.
19879
19880    my (
19881        $level,               $level_end,
19882        $indentation,         $rfields,
19883        $rtokens,             $rpatterns,
19884        $is_forced_break,     $outdent_long_lines,
19885        $is_terminal_ternary, $is_terminal_statement,
19886        $do_not_pad,          $rvertical_tightness_flags,
19887        $level_jump,
19888    ) = @_;
19889
19890    # number of fields is $jmax
19891    # number of tokens between fields is $jmax-1
19892    my $jmax = $#{$rfields};
19893
19894    my $leading_space_count = get_SPACES($indentation);
19895
19896    # set outdented flag to be sure we either align within statements or
19897    # across statement boundaries, but not both.
19898    my $is_outdented = $last_leading_space_count > $leading_space_count;
19899    $last_leading_space_count = $leading_space_count;
19900
19901    # Patch: undo for hanging side comment
19902    my $is_hanging_side_comment =
19903      ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
19904    $is_outdented = 0 if $is_hanging_side_comment;
19905
19906    # Forget side comment alignment after seeing 2 or more block comments
19907    my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
19908    if ($is_block_comment) {
19909        $consecutive_block_comments++;
19910    }
19911    else {
19912        if ( $consecutive_block_comments > 1 ) { forget_side_comment() }
19913        $consecutive_block_comments = 0;
19914    }
19915
19916    VALIGN_DEBUG_FLAG_APPEND0 && do {
19917        print STDOUT
19918"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
19919    };
19920
19921    # Validate cached line if necessary: If we can produce a container
19922    # with just 2 lines total by combining an existing cached opening
19923    # token with the closing token to follow, then we will mark both
19924    # cached flags as valid.
19925    if ($rvertical_tightness_flags) {
19926        if (   $maximum_line_index <= 0
19927            && $cached_line_type
19928            && $cached_seqno
19929            && $rvertical_tightness_flags->[2]
19930            && $rvertical_tightness_flags->[2] == $cached_seqno )
19931        {
19932            $rvertical_tightness_flags->[3] ||= 1;
19933            $cached_line_valid ||= 1;
19934        }
19935    }
19936
19937    # do not join an opening block brace with an unbalanced line
19938    # unless requested with a flag value of 2
19939    if (   $cached_line_type == 3
19940        && $maximum_line_index < 0
19941        && $cached_line_flag < 2
19942        && $level_jump != 0 )
19943    {
19944        $cached_line_valid = 0;
19945    }
19946
19947    # patch until new aligner is finished
19948    if ($do_not_pad) { my_flush() }
19949
19950    # shouldn't happen:
19951    if ( $level < 0 ) { $level = 0 }
19952
19953    # do not align code across indentation level changes
19954    # or if vertical alignment is turned off for debugging
19955    if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
19956
19957        # we are allowed to shift a group of lines to the right if its
19958        # level is greater than the previous and next group
19959        $extra_indent_ok =
19960          ( $level < $group_level && $last_level_written < $group_level );
19961
19962        my_flush();
19963
19964        # If we know that this line will get flushed out by itself because
19965        # of level changes, we can leave the extra_indent_ok flag set.
19966        # That way, if we get an external flush call, we will still be
19967        # able to do some -lp alignment if necessary.
19968        $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
19969
19970        $group_level = $level;
19971
19972        # wait until after the above flush to get the leading space
19973        # count because it may have been changed if the -icp flag is in
19974        # effect
19975        $leading_space_count = get_SPACES($indentation);
19976
19977    }
19978
19979    # --------------------------------------------------------------------
19980    # Patch to collect outdentable block COMMENTS
19981    # --------------------------------------------------------------------
19982    my $is_blank_line = "";
19983    if ( $group_type eq 'COMMENT' ) {
19984        if (
19985            (
19986                   $is_block_comment
19987                && $outdent_long_lines
19988                && $leading_space_count == $comment_leading_space_count
19989            )
19990            || $is_blank_line
19991          )
19992        {
19993            $group_lines[ ++$maximum_line_index ] = $rfields->[0];
19994            return;
19995        }
19996        else {
19997            my_flush();
19998        }
19999    }
20000
20001    # --------------------------------------------------------------------
20002    # add dummy fields for terminal ternary
20003    # --------------------------------------------------------------------
20004    my $j_terminal_match;
20005    if ( $is_terminal_ternary && $current_line ) {
20006        $j_terminal_match =
20007          fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
20008        $jmax = @{$rfields} - 1;
20009    }
20010
20011    # --------------------------------------------------------------------
20012    # add dummy fields for else statement
20013    # --------------------------------------------------------------------
20014    if (   $rfields->[0] =~ /^else\s*$/
20015        && $current_line
20016        && $level_jump == 0 )
20017    {
20018        $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
20019        $jmax = @{$rfields} - 1;
20020    }
20021
20022    # --------------------------------------------------------------------
20023    # Step 1. Handle simple line of code with no fields to match.
20024    # --------------------------------------------------------------------
20025    if ( $jmax <= 0 ) {
20026        $zero_count++;
20027
20028        if ( $maximum_line_index >= 0
20029            && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
20030        {
20031
20032            # flush the current group if it has some aligned columns..
20033            if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
20034
20035            # flush current group if we are just collecting side comments..
20036            elsif (
20037
20038                # ...and we haven't seen a comment lately
20039                ( $zero_count > 3 )
20040
20041                # ..or if this new line doesn't fit to the left of the comments
20042                || ( ( $leading_space_count + length( $$rfields[0] ) ) >
20043                    $group_lines[0]->get_column(0) )
20044              )
20045            {
20046                my_flush();
20047            }
20048        }
20049
20050        # patch to start new COMMENT group if this comment may be outdented
20051        if (   $is_block_comment
20052            && $outdent_long_lines
20053            && $maximum_line_index < 0 )
20054        {
20055            $group_type                           = 'COMMENT';
20056            $comment_leading_space_count          = $leading_space_count;
20057            $group_lines[ ++$maximum_line_index ] = $rfields->[0];
20058            return;
20059        }
20060
20061        # just write this line directly if no current group, no side comment,
20062        # and no space recovery is needed.
20063        if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
20064        {
20065            valign_output_step_B( $leading_space_count, $$rfields[0], 0,
20066                $outdent_long_lines, $rvertical_tightness_flags, $level );
20067            return;
20068        }
20069    }
20070    else {
20071        $zero_count = 0;
20072    }
20073
20074    # programming check: (shouldn't happen)
20075    # an error here implies an incorrect call was made
20076    if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
20077        warning(
20078"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
20079        );
20080        report_definite_bug();
20081    }
20082
20083    # --------------------------------------------------------------------
20084    # create an object to hold this line
20085    # --------------------------------------------------------------------
20086    my $new_line = new Perl::Tidy::VerticalAligner::Line(
20087        jmax                      => $jmax,
20088        jmax_original_line        => $jmax,
20089        rtokens                   => $rtokens,
20090        rfields                   => $rfields,
20091        rpatterns                 => $rpatterns,
20092        indentation               => $indentation,
20093        leading_space_count       => $leading_space_count,
20094        outdent_long_lines        => $outdent_long_lines,
20095        list_type                 => "",
20096        is_hanging_side_comment   => $is_hanging_side_comment,
20097        maximum_line_length       => maximum_line_length_for_level($level),
20098        rvertical_tightness_flags => $rvertical_tightness_flags,
20099    );
20100
20101    # Initialize a global flag saying if the last line of the group should
20102    # match end of group and also terminate the group.  There should be no
20103    # returns between here and where the flag is handled at the bottom.
20104    my $col_matching_terminal = 0;
20105    if ( defined($j_terminal_match) ) {
20106
20107        # remember the column of the terminal ? or { to match with
20108        $col_matching_terminal = $current_line->get_column($j_terminal_match);
20109
20110        # set global flag for sub decide_if_aligned
20111        $is_matching_terminal_line = 1;
20112    }
20113
20114    # --------------------------------------------------------------------
20115    # It simplifies things to create a zero length side comment
20116    # if none exists.
20117    # --------------------------------------------------------------------
20118    make_side_comment( $new_line, $level_end );
20119
20120    # --------------------------------------------------------------------
20121    # Decide if this is a simple list of items.
20122    # There are 3 list types: none, comma, comma-arrow.
20123    # We use this below to be less restrictive in deciding what to align.
20124    # --------------------------------------------------------------------
20125    if ($is_forced_break) {
20126        decide_if_list($new_line);
20127    }
20128
20129    if ($current_line) {
20130
20131        # --------------------------------------------------------------------
20132        # Allow hanging side comment to join current group, if any
20133        # This will help keep side comments aligned, because otherwise we
20134        # will have to start a new group, making alignment less likely.
20135        # --------------------------------------------------------------------
20136        join_hanging_comment( $new_line, $current_line )
20137          if $is_hanging_side_comment;
20138
20139        # --------------------------------------------------------------------
20140        # If there is just one previous line, and it has more fields
20141        # than the new line, try to join fields together to get a match with
20142        # the new line.  At the present time, only a single leading '=' is
20143        # allowed to be compressed out.  This is useful in rare cases where
20144        # a table is forced to use old breakpoints because of side comments,
20145        # and the table starts out something like this:
20146        #   my %MonthChars = ('0', 'Jan',   # side comment
20147        #                     '1', 'Feb',
20148        #                     '2', 'Mar',
20149        # Eliminating the '=' field will allow the remaining fields to line up.
20150        # This situation does not occur if there are no side comments
20151        # because scan_list would put a break after the opening '('.
20152        # --------------------------------------------------------------------
20153        eliminate_old_fields( $new_line, $current_line );
20154
20155        # --------------------------------------------------------------------
20156        # If the new line has more fields than the current group,
20157        # see if we can match the first fields and combine the remaining
20158        # fields of the new line.
20159        # --------------------------------------------------------------------
20160        eliminate_new_fields( $new_line, $current_line );
20161
20162        # --------------------------------------------------------------------
20163        # Flush previous group unless all common tokens and patterns match..
20164        # --------------------------------------------------------------------
20165        check_match( $new_line, $current_line );
20166
20167        # --------------------------------------------------------------------
20168        # See if there is space for this line in the current group (if any)
20169        # --------------------------------------------------------------------
20170        if ($current_line) {
20171            check_fit( $new_line, $current_line );
20172        }
20173    }
20174
20175    # --------------------------------------------------------------------
20176    # Append this line to the current group (or start new group)
20177    # --------------------------------------------------------------------
20178    add_to_group($new_line);
20179
20180    # Future update to allow this to vary:
20181    $current_line = $new_line if ( $maximum_line_index == 0 );
20182
20183    # output this group if it ends in a terminal else or ternary line
20184    if ( defined($j_terminal_match) ) {
20185
20186        # if there is only one line in the group (maybe due to failure to match
20187        # perfectly with previous lines), then align the ? or { of this
20188        # terminal line with the previous one unless that would make the line
20189        # too long
20190        if ( $maximum_line_index == 0 ) {
20191            my $col_now = $current_line->get_column($j_terminal_match);
20192            my $pad     = $col_matching_terminal - $col_now;
20193            my $padding_available =
20194              $current_line->get_available_space_on_right();
20195            if ( $pad > 0 && $pad <= $padding_available ) {
20196                $current_line->increase_field_width( $j_terminal_match, $pad );
20197            }
20198        }
20199        my_flush();
20200        $is_matching_terminal_line = 0;
20201    }
20202
20203    # --------------------------------------------------------------------
20204    # Step 8. Some old debugging stuff
20205    # --------------------------------------------------------------------
20206    VALIGN_DEBUG_FLAG_APPEND && do {
20207        print STDOUT "APPEND fields:";
20208        dump_array(@$rfields);
20209        print STDOUT "APPEND tokens:";
20210        dump_array(@$rtokens);
20211        print STDOUT "APPEND patterns:";
20212        dump_array(@$rpatterns);
20213        dump_alignments();
20214    };
20215
20216    return;
20217}
20218
20219sub join_hanging_comment {
20220
20221    my $line = shift;
20222    my $jmax = $line->get_jmax();
20223    return 0 unless $jmax == 1;    # must be 2 fields
20224    my $rtokens = $line->get_rtokens();
20225    return 0 unless $$rtokens[0] eq '#';    # the second field is a comment..
20226    my $rfields = $line->get_rfields();
20227    return 0 unless $$rfields[0] =~ /^\s*$/;    # the first field is empty...
20228    my $old_line            = shift;
20229    my $maximum_field_index = $old_line->get_jmax();
20230    return 0
20231      unless $maximum_field_index > $jmax;    # the current line has more fields
20232    my $rpatterns = $line->get_rpatterns();
20233
20234    $line->set_is_hanging_side_comment(1);
20235    $jmax = $maximum_field_index;
20236    $line->set_jmax($jmax);
20237    $$rfields[$jmax]         = $$rfields[1];
20238    $$rtokens[ $jmax - 1 ]   = $$rtokens[0];
20239    $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
20240    for ( my $j = 1 ; $j < $jmax ; $j++ ) {
20241        $$rfields[$j]         = " ";  # NOTE: caused glitch unless 1 blank, why?
20242        $$rtokens[ $j - 1 ]   = "";
20243        $$rpatterns[ $j - 1 ] = "";
20244    }
20245    return 1;
20246}
20247
20248sub eliminate_old_fields {
20249
20250    my $new_line = shift;
20251    my $jmax     = $new_line->get_jmax();
20252    if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
20253    if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
20254
20255    # there must be one previous line
20256    return unless ( $maximum_line_index == 0 );
20257
20258    my $old_line            = shift;
20259    my $maximum_field_index = $old_line->get_jmax();
20260
20261    ###############################################
20262    # this line must have fewer fields
20263    return unless $maximum_field_index > $jmax;
20264    ###############################################
20265
20266    # Identify specific cases where field elimination is allowed:
20267    # case=1: both lines have comma-separated lists, and the first
20268    #         line has an equals
20269    # case=2: both lines have leading equals
20270
20271    # case 1 is the default
20272    my $case = 1;
20273
20274    # See if case 2: both lines have leading '='
20275    # We'll require smiliar leading patterns in this case
20276    my $old_rtokens   = $old_line->get_rtokens();
20277    my $rtokens       = $new_line->get_rtokens();
20278    my $rpatterns     = $new_line->get_rpatterns();
20279    my $old_rpatterns = $old_line->get_rpatterns();
20280    if (   $rtokens->[0] =~ /^=\d*$/
20281        && $old_rtokens->[0] eq $rtokens->[0]
20282        && $old_rpatterns->[0] eq $rpatterns->[0] )
20283    {
20284        $case = 2;
20285    }
20286
20287    # not too many fewer fields in new line for case 1
20288    return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
20289
20290    # case 1 must have side comment
20291    my $old_rfields = $old_line->get_rfields();
20292    return
20293      if ( $case == 1
20294        && length( $$old_rfields[$maximum_field_index] ) == 0 );
20295
20296    my $rfields = $new_line->get_rfields();
20297
20298    my $hid_equals = 0;
20299
20300    my @new_alignments        = ();
20301    my @new_fields            = ();
20302    my @new_matching_patterns = ();
20303    my @new_matching_tokens   = ();
20304
20305    my $j = 0;
20306    my $k;
20307    my $current_field   = '';
20308    my $current_pattern = '';
20309
20310    # loop over all old tokens
20311    my $in_match = 0;
20312    for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
20313        $current_field   .= $$old_rfields[$k];
20314        $current_pattern .= $$old_rpatterns[$k];
20315        last if ( $j > $jmax - 1 );
20316
20317        if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
20318            $in_match                  = 1;
20319            $new_fields[$j]            = $current_field;
20320            $new_matching_patterns[$j] = $current_pattern;
20321            $current_field             = '';
20322            $current_pattern           = '';
20323            $new_matching_tokens[$j]   = $$old_rtokens[$k];
20324            $new_alignments[$j]        = $old_line->get_alignment($k);
20325            $j++;
20326        }
20327        else {
20328
20329            if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
20330                last if ( $case == 2 );    # avoid problems with stuff
20331                                           # like:   $a=$b=$c=$d;
20332                $hid_equals = 1;
20333            }
20334            last
20335              if ( $in_match && $case == 1 )
20336              ;    # disallow gaps in matching field types in case 1
20337        }
20338    }
20339
20340    # Modify the current state if we are successful.
20341    # We must exactly reach the ends of both lists for success.
20342    if (   ( $j == $jmax )
20343        && ( $current_field eq '' )
20344        && ( $case != 1 || $hid_equals ) )
20345    {
20346        $k = $maximum_field_index;
20347        $current_field   .= $$old_rfields[$k];
20348        $current_pattern .= $$old_rpatterns[$k];
20349        $new_fields[$j]            = $current_field;
20350        $new_matching_patterns[$j] = $current_pattern;
20351
20352        $new_alignments[$j] = $old_line->get_alignment($k);
20353        $maximum_field_index = $j;
20354
20355        $old_line->set_alignments(@new_alignments);
20356        $old_line->set_jmax($jmax);
20357        $old_line->set_rtokens( \@new_matching_tokens );
20358        $old_line->set_rfields( \@new_fields );
20359        $old_line->set_rpatterns( \@$rpatterns );
20360    }
20361}
20362
20363# create an empty side comment if none exists
20364sub make_side_comment {
20365    my $new_line  = shift;
20366    my $level_end = shift;
20367    my $jmax      = $new_line->get_jmax();
20368    my $rtokens   = $new_line->get_rtokens();
20369
20370    # if line does not have a side comment...
20371    if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
20372        my $rfields   = $new_line->get_rfields();
20373        my $rpatterns = $new_line->get_rpatterns();
20374        $$rtokens[$jmax]     = '#';
20375        $$rfields[ ++$jmax ] = '';
20376        $$rpatterns[$jmax]   = '#';
20377        $new_line->set_jmax($jmax);
20378        $new_line->set_jmax_original_line($jmax);
20379    }
20380
20381    # line has a side comment..
20382    else {
20383
20384        # don't remember old side comment location for very long
20385        my $line_number = $vertical_aligner_self->get_output_line_number();
20386        my $rfields     = $new_line->get_rfields();
20387        if (
20388            $line_number - $last_side_comment_line_number > 12
20389
20390            # and don't remember comment location across block level changes
20391            || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
20392          )
20393        {
20394            forget_side_comment();
20395        }
20396        $last_side_comment_line_number = $line_number;
20397        $last_side_comment_level       = $level_end;
20398    }
20399}
20400
20401sub decide_if_list {
20402
20403    my $line = shift;
20404
20405    # A list will be taken to be a line with a forced break in which all
20406    # of the field separators are commas or comma-arrows (except for the
20407    # trailing #)
20408
20409    # List separator tokens are things like ',3'   or '=>2',
20410    # where the trailing digit is the nesting depth.  Allow braces
20411    # to allow nested list items.
20412    my $rtokens    = $line->get_rtokens();
20413    my $test_token = $$rtokens[0];
20414    if ( $test_token =~ /^(\,|=>)/ ) {
20415        my $list_type = $test_token;
20416        my $jmax      = $line->get_jmax();
20417
20418        foreach ( 1 .. $jmax - 2 ) {
20419            if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
20420                $list_type = "";
20421                last;
20422            }
20423        }
20424        $line->set_list_type($list_type);
20425    }
20426}
20427
20428sub eliminate_new_fields {
20429
20430    return unless ( $maximum_line_index >= 0 );
20431    my ( $new_line, $old_line ) = @_;
20432    my $jmax = $new_line->get_jmax();
20433
20434    my $old_rtokens = $old_line->get_rtokens();
20435    my $rtokens     = $new_line->get_rtokens();
20436    my $is_assignment =
20437      ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
20438
20439    # must be monotonic variation
20440    return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
20441
20442    # must be more fields in the new line
20443    my $maximum_field_index = $old_line->get_jmax();
20444    return unless ( $maximum_field_index < $jmax );
20445
20446    unless ($is_assignment) {
20447        return
20448          unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
20449          ;    # only if monotonic
20450
20451        # never combine fields of a comma list
20452        return
20453          unless ( $maximum_field_index > 1 )
20454          && ( $new_line->get_list_type() !~ /^,/ );
20455    }
20456
20457    my $rfields       = $new_line->get_rfields();
20458    my $rpatterns     = $new_line->get_rpatterns();
20459    my $old_rpatterns = $old_line->get_rpatterns();
20460
20461    # loop over all OLD tokens except comment and check match
20462    my $match = 1;
20463    my $k;
20464    for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
20465        if (   ( $$old_rtokens[$k] ne $$rtokens[$k] )
20466            || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
20467        {
20468            $match = 0;
20469            last;
20470        }
20471    }
20472
20473    # first tokens agree, so combine extra new tokens
20474    if ($match) {
20475        for $k ( $maximum_field_index .. $jmax - 1 ) {
20476
20477            $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
20478            $$rfields[$k] = "";
20479            $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
20480            $$rpatterns[$k] = "";
20481        }
20482
20483        $$rtokens[ $maximum_field_index - 1 ] = '#';
20484        $$rfields[$maximum_field_index]       = $$rfields[$jmax];
20485        $$rpatterns[$maximum_field_index]     = $$rpatterns[$jmax];
20486        $jmax                                 = $maximum_field_index;
20487    }
20488    $new_line->set_jmax($jmax);
20489}
20490
20491sub fix_terminal_ternary {
20492
20493    # Add empty fields as necessary to align a ternary term
20494    # like this:
20495    #
20496    #  my $leapyear =
20497    #      $year % 4   ? 0
20498    #    : $year % 100 ? 1
20499    #    : $year % 400 ? 0
20500    #    :               1;
20501    #
20502    # returns 1 if the terminal item should be indented
20503
20504    my ( $rfields, $rtokens, $rpatterns ) = @_;
20505
20506    my $jmax        = @{$rfields} - 1;
20507    my $old_line    = $group_lines[$maximum_line_index];
20508    my $rfields_old = $old_line->get_rfields();
20509
20510    my $rpatterns_old       = $old_line->get_rpatterns();
20511    my $rtokens_old         = $old_line->get_rtokens();
20512    my $maximum_field_index = $old_line->get_jmax();
20513
20514    # look for the question mark after the :
20515    my ($jquestion);
20516    my $depth_question;
20517    my $pad = "";
20518    for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
20519        my $tok = $rtokens_old->[$j];
20520        if ( $tok =~ /^\?(\d+)$/ ) {
20521            $depth_question = $1;
20522
20523            # depth must be correct
20524            next unless ( $depth_question eq $group_level );
20525
20526            $jquestion = $j;
20527            if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
20528                $pad = " " x length($1);
20529            }
20530            else {
20531                return;    # shouldn't happen
20532            }
20533            last;
20534        }
20535    }
20536    return unless ( defined($jquestion) );    # shouldn't happen
20537
20538    # Now splice the tokens and patterns of the previous line
20539    # into the else line to insure a match.  Add empty fields
20540    # as necessary.
20541    my $jadd = $jquestion;
20542
20543    # Work on copies of the actual arrays in case we have
20544    # to return due to an error
20545    my @fields   = @{$rfields};
20546    my @patterns = @{$rpatterns};
20547    my @tokens   = @{$rtokens};
20548
20549    VALIGN_DEBUG_FLAG_TERNARY && do {
20550        local $" = '><';
20551        print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
20552        print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
20553        print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
20554        print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
20555        print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
20556        print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
20557    };
20558
20559    # handle cases of leading colon on this line
20560    if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
20561
20562        my ( $colon, $therest ) = ( $1, $2 );
20563
20564        # Handle sub-case of first field with leading colon plus additional code
20565        # This is the usual situation as at the '1' below:
20566        #  ...
20567        #  : $year % 400 ? 0
20568        #  :               1;
20569        if ($therest) {
20570
20571            # Split the first field after the leading colon and insert padding.
20572            # Note that this padding will remain even if the terminal value goes
20573            # out on a separate line.  This does not seem to look to bad, so no
20574            # mechanism has been included to undo it.
20575            my $field1 = shift @fields;
20576            unshift @fields, ( $colon, $pad . $therest );
20577
20578            # change the leading pattern from : to ?
20579            return unless ( $patterns[0] =~ s/^\:/?/ );
20580
20581            # install leading tokens and patterns of existing line
20582            unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
20583            unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
20584
20585            # insert appropriate number of empty fields
20586            splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
20587        }
20588
20589        # handle sub-case of first field just equal to leading colon.
20590        # This can happen for example in the example below where
20591        # the leading '(' would create a new alignment token
20592        # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
20593        # :                        ( $mname = $name . '->' );
20594        else {
20595
20596            return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
20597
20598            # prepend a leading ? onto the second pattern
20599            $patterns[1] = "?b" . $patterns[1];
20600
20601            # pad the second field
20602            $fields[1] = $pad . $fields[1];
20603
20604            # install leading tokens and patterns of existing line, replacing
20605            # leading token and inserting appropriate number of empty fields
20606            splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
20607            splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
20608            splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
20609        }
20610    }
20611
20612    # Handle case of no leading colon on this line.  This will
20613    # be the case when -wba=':' is used.  For example,
20614    #  $year % 400 ? 0 :
20615    #                1;
20616    else {
20617
20618        # install leading tokens and patterns of existing line
20619        $patterns[0] = '?' . 'b' . $patterns[0];
20620        unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
20621        unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
20622
20623        # insert appropriate number of empty fields
20624        $jadd = $jquestion + 1;
20625        $fields[0] = $pad . $fields[0];
20626        splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
20627    }
20628
20629    VALIGN_DEBUG_FLAG_TERNARY && do {
20630        local $" = '><';
20631        print STDOUT "MODIFIED TOKENS=<@tokens>\n";
20632        print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
20633        print STDOUT "MODIFIED FIELDS=<@fields>\n";
20634    };
20635
20636    # all ok .. update the arrays
20637    @{$rfields}   = @fields;
20638    @{$rtokens}   = @tokens;
20639    @{$rpatterns} = @patterns;
20640
20641    # force a flush after this line
20642    return $jquestion;
20643}
20644
20645sub fix_terminal_else {
20646
20647    # Add empty fields as necessary to align a balanced terminal
20648    # else block to a previous if/elsif/unless block,
20649    # like this:
20650    #
20651    #  if   ( 1 || $x ) { print "ok 13\n"; }
20652    #  else             { print "not ok 13\n"; }
20653    #
20654    # returns 1 if the else block should be indented
20655    #
20656    my ( $rfields, $rtokens, $rpatterns ) = @_;
20657    my $jmax = @{$rfields} - 1;
20658    return unless ( $jmax > 0 );
20659
20660    # check for balanced else block following if/elsif/unless
20661    my $rfields_old = $current_line->get_rfields();
20662
20663    # TBD: add handling for 'case'
20664    return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
20665
20666    # look for the opening brace after the else, and extrace the depth
20667    my $tok_brace = $rtokens->[0];
20668    my $depth_brace;
20669    if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
20670
20671    # probably:  "else # side_comment"
20672    else { return }
20673
20674    my $rpatterns_old       = $current_line->get_rpatterns();
20675    my $rtokens_old         = $current_line->get_rtokens();
20676    my $maximum_field_index = $current_line->get_jmax();
20677
20678    # be sure the previous if/elsif is followed by an opening paren
20679    my $jparen    = 0;
20680    my $tok_paren = '(' . $depth_brace;
20681    my $tok_test  = $rtokens_old->[$jparen];
20682    return unless ( $tok_test eq $tok_paren );    # shouldn't happen
20683
20684    # Now find the opening block brace
20685    my ($jbrace);
20686    for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
20687        my $tok = $rtokens_old->[$j];
20688        if ( $tok eq $tok_brace ) {
20689            $jbrace = $j;
20690            last;
20691        }
20692    }
20693    return unless ( defined($jbrace) );           # shouldn't happen
20694
20695    # Now splice the tokens and patterns of the previous line
20696    # into the else line to insure a match.  Add empty fields
20697    # as necessary.
20698    my $jadd = $jbrace - $jparen;
20699    splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
20700    splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
20701    splice( @{$rfields}, 1, 0, ('') x $jadd );
20702
20703    # force a flush after this line if it does not follow a case
20704    return $jbrace
20705      unless ( $rfields_old->[0] =~ /^case\s*$/ );
20706}
20707
20708{    # sub check_match
20709    my %is_good_alignment;
20710
20711    BEGIN {
20712
20713        # Vertically aligning on certain "good" tokens is usually okay
20714        # so we can be less restrictive in marginal cases.
20715        @_ = qw( { ? => = );
20716        push @_, (',');
20717        @is_good_alignment{@_} = (1) x scalar(@_);
20718    }
20719
20720    sub check_match {
20721
20722        # See if the current line matches the current vertical alignment group.
20723        # If not, flush the current group.
20724        my $new_line = shift;
20725        my $old_line = shift;
20726
20727        # uses global variables:
20728        #  $previous_minimum_jmax_seen
20729        #  $maximum_jmax_seen
20730        #  $maximum_line_index
20731        #  $marginal_match
20732        my $jmax                = $new_line->get_jmax();
20733        my $maximum_field_index = $old_line->get_jmax();
20734
20735        # flush if this line has too many fields
20736        if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
20737
20738        # flush if adding this line would make a non-monotonic field count
20739        if (
20740            ( $maximum_field_index > $jmax )    # this has too few fields
20741            && (
20742                ( $previous_minimum_jmax_seen <
20743                    $jmax )                     # and wouldn't be monotonic
20744                || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
20745            )
20746          )
20747        {
20748            goto NO_MATCH;
20749        }
20750
20751        # otherwise see if this line matches the current group
20752        my $jmax_original_line      = $new_line->get_jmax_original_line();
20753        my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
20754        my $rtokens                 = $new_line->get_rtokens();
20755        my $rfields                 = $new_line->get_rfields();
20756        my $rpatterns               = $new_line->get_rpatterns();
20757        my $list_type               = $new_line->get_list_type();
20758
20759        my $group_list_type = $old_line->get_list_type();
20760        my $old_rpatterns   = $old_line->get_rpatterns();
20761        my $old_rtokens     = $old_line->get_rtokens();
20762
20763        my $jlimit = $jmax - 1;
20764        if ( $maximum_field_index > $jmax ) {
20765            $jlimit = $jmax_original_line;
20766            --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
20767        }
20768
20769        # handle comma-separated lists ..
20770        if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
20771            for my $j ( 0 .. $jlimit ) {
20772                my $old_tok = $$old_rtokens[$j];
20773                next unless $old_tok;
20774                my $new_tok = $$rtokens[$j];
20775                next unless $new_tok;
20776
20777                # lists always match ...
20778                # unless they would align any '=>'s with ','s
20779                goto NO_MATCH
20780                  if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
20781                    || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
20782            }
20783        }
20784
20785        # do detailed check for everything else except hanging side comments
20786        elsif ( !$is_hanging_side_comment ) {
20787
20788            my $leading_space_count = $new_line->get_leading_space_count();
20789
20790            my $max_pad = 0;
20791            my $min_pad = 0;
20792            my $saw_good_alignment;
20793
20794            for my $j ( 0 .. $jlimit ) {
20795
20796                my $old_tok = $$old_rtokens[$j];
20797                my $new_tok = $$rtokens[$j];
20798
20799                # Note on encoding used for alignment tokens:
20800                # -------------------------------------------
20801                # Tokens are "decorated" with information which can help
20802                # prevent unwanted alignments.  Consider for example the
20803                # following two lines:
20804                #   local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
20805                #   local ( $i, $f ) = &'bdiv( $xn, $xd );
20806                # There are three alignment tokens in each line, a comma,
20807                # an =, and a comma.  In the first line these three tokens
20808                # are encoded as:
20809                #    ,4+local-18     =3      ,4+split-7
20810                # and in the second line they are encoded as
20811                #    ,4+local-18     =3      ,4+&'bdiv-8
20812                # Tokens always at least have token name and nesting
20813                # depth.  So in this example the ='s are at depth 3 and
20814                # the ,'s are at depth 4.  This prevents aligning tokens
20815                # of different depths.  Commas contain additional
20816                # information, as follows:
20817                # ,  {depth} + {container name} - {spaces to opening paren}
20818                # This allows us to reject matching the rightmost commas
20819                # in the above two lines, since they are for different
20820                # function calls.  This encoding is done in
20821                # 'sub send_lines_to_vertical_aligner'.
20822
20823                # Pick off actual token.
20824                # Everything up to the first digit is the actual token.
20825                my $alignment_token = $new_tok;
20826                if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
20827
20828                # see if the decorated tokens match
20829                my $tokens_match = $new_tok eq $old_tok
20830
20831                  # Exception for matching terminal : of ternary statement..
20832                  # consider containers prefixed by ? and : a match
20833                  || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
20834
20835                # No match if the alignment tokens differ...
20836                if ( !$tokens_match ) {
20837
20838                    # ...Unless this is a side comment
20839                    if (
20840                        $j == $jlimit
20841
20842                        # and there is either at least one alignment token
20843                        # or this is a single item following a list.  This
20844                        # latter rule is required for 'December' to join
20845                        # the following list:
20846                        # my (@months) = (
20847                        #     '',       'January',   'February', 'March',
20848                        #     'April',  'May',       'June',     'July',
20849                        #     'August', 'September', 'October',  'November',
20850                        #     'December'
20851                        # );
20852                        # If it doesn't then the -lp formatting will fail.
20853                        && ( $j > 0 || $old_tok =~ /^,/ )
20854                      )
20855                    {
20856                        $marginal_match = 1
20857                          if ( $marginal_match == 0
20858                            && $maximum_line_index == 0 );
20859                        last;
20860                    }
20861
20862                    goto NO_MATCH;
20863                }
20864
20865                # Calculate amount of padding required to fit this in.
20866                # $pad is the number of spaces by which we must increase
20867                # the current field to squeeze in this field.
20868                my $pad =
20869                  length( $$rfields[$j] ) - $old_line->current_field_width($j);
20870                if ( $j == 0 ) { $pad += $leading_space_count; }
20871
20872                # remember max pads to limit marginal cases
20873                if ( $alignment_token ne '#' ) {
20874                    if ( $pad > $max_pad ) { $max_pad = $pad }
20875                    if ( $pad < $min_pad ) { $min_pad = $pad }
20876                }
20877                if ( $is_good_alignment{$alignment_token} ) {
20878                    $saw_good_alignment = 1;
20879                }
20880
20881                # If patterns don't match, we have to be careful...
20882                if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
20883
20884                    # flag this as a marginal match since patterns differ
20885                    $marginal_match = 1
20886                      if ( $marginal_match == 0 && $maximum_line_index == 0 );
20887
20888                    # We have to be very careful about aligning commas
20889                    # when the pattern's don't match, because it can be
20890                    # worse to create an alignment where none is needed
20891                    # than to omit one.  Here's an example where the ','s
20892                    # are not in named continers.  The first line below
20893                    # should not match the next two:
20894                    #   ( $a, $b ) = ( $b, $r );
20895                    #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
20896                    #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
20897                    if ( $alignment_token eq ',' ) {
20898
20899                       # do not align commas unless they are in named containers
20900                        goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
20901                    }
20902
20903                    # do not align parens unless patterns match;
20904                    # large ugly spaces can occur in math expressions.
20905                    elsif ( $alignment_token eq '(' ) {
20906
20907                        # But we can allow a match if the parens don't
20908                        # require any padding.
20909                        if ( $pad != 0 ) { goto NO_MATCH }
20910                    }
20911
20912                    # Handle an '=' alignment with different patterns to
20913                    # the left.
20914                    elsif ( $alignment_token eq '=' ) {
20915
20916                        # It is best to be a little restrictive when
20917                        # aligning '=' tokens.  Here is an example of
20918                        # two lines that we will not align:
20919                        #       my $variable=6;
20920                        #       $bb=4;
20921                        # The problem is that one is a 'my' declaration,
20922                        # and the other isn't, so they're not very similar.
20923                        # We will filter these out by comparing the first
20924                        # letter of the pattern.  This is crude, but works
20925                        # well enough.
20926                        if (
20927                            substr( $$old_rpatterns[$j], 0, 1 ) ne
20928                            substr( $$rpatterns[$j],     0, 1 ) )
20929                        {
20930                            goto NO_MATCH;
20931                        }
20932
20933                        # If we pass that test, we'll call it a marginal match.
20934                        # Here is an example of a marginal match:
20935                        #       $done{$$op} = 1;
20936                        #       $op         = compile_bblock($op);
20937                        # The left tokens are both identifiers, but
20938                        # one accesses a hash and the other doesn't.
20939                        # We'll let this be a tentative match and undo
20940                        # it later if we don't find more than 2 lines
20941                        # in the group.
20942                        elsif ( $maximum_line_index == 0 ) {
20943                            $marginal_match =
20944                              2;    # =2 prevents being undone below
20945                        }
20946                    }
20947                }
20948
20949                # Don't let line with fewer fields increase column widths
20950                # ( align3.t )
20951                if ( $maximum_field_index > $jmax ) {
20952
20953                    # Exception: suspend this rule to allow last lines to join
20954                    if ( $pad > 0 ) { goto NO_MATCH; }
20955                }
20956            } ## end for my $j ( 0 .. $jlimit)
20957
20958            # Turn off the "marginal match" flag in some cases...
20959            # A "marginal match" occurs when the alignment tokens agree
20960            # but there are differences in the other tokens (patterns).
20961            # If we leave the marginal match flag set, then the rule is that we
20962            # will align only if there are more than two lines in the group.
20963            # We will turn of the flag if we almost have a match
20964            # and either we have seen a good alignment token or we
20965            # just need a small pad (2 spaces) to fit.  These rules are
20966            # the result of experimentation.  Tokens which misaligned by just
20967            # one or two characters are annoying.  On the other hand,
20968            # large gaps to less important alignment tokens are also annoying.
20969            if (   $marginal_match == 1
20970                && $jmax == $maximum_field_index
20971                && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
20972              )
20973            {
20974                $marginal_match = 0;
20975            }
20976            ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
20977        }
20978
20979        # We have a match (even if marginal).
20980        # If the current line has fewer fields than the current group
20981        # but otherwise matches, copy the remaining group fields to
20982        # make it a perfect match.
20983        if ( $maximum_field_index > $jmax ) {
20984            my $comment = $$rfields[$jmax];
20985            for $jmax ( $jlimit .. $maximum_field_index ) {
20986                $$rtokens[$jmax]     = $$old_rtokens[$jmax];
20987                $$rfields[ ++$jmax ] = '';
20988                $$rpatterns[$jmax]   = $$old_rpatterns[$jmax];
20989            }
20990            $$rfields[$jmax] = $comment;
20991            $new_line->set_jmax($jmax);
20992        }
20993        return;
20994
20995      NO_MATCH:
20996        ##print "BUBBA: no match jmax=$jmax  max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
20997        my_flush();
20998        return;
20999    }
21000}
21001
21002sub check_fit {
21003
21004    return unless ( $maximum_line_index >= 0 );
21005    my $new_line = shift;
21006    my $old_line = shift;
21007
21008    my $jmax                    = $new_line->get_jmax();
21009    my $leading_space_count     = $new_line->get_leading_space_count();
21010    my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
21011    my $rtokens                 = $new_line->get_rtokens();
21012    my $rfields                 = $new_line->get_rfields();
21013    my $rpatterns               = $new_line->get_rpatterns();
21014
21015    my $group_list_type = $group_lines[0]->get_list_type();
21016
21017    my $padding_so_far    = 0;
21018    my $padding_available = $old_line->get_available_space_on_right();
21019
21020    # save current columns in case this doesn't work
21021    save_alignment_columns();
21022
21023    my ( $j, $pad, $eight );
21024    my $maximum_field_index = $old_line->get_jmax();
21025    for $j ( 0 .. $jmax ) {
21026
21027        $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
21028
21029        if ( $j == 0 ) {
21030            $pad += $leading_space_count;
21031        }
21032
21033        # remember largest gap of the group, excluding gap to side comment
21034        if (   $pad < 0
21035            && $group_maximum_gap < -$pad
21036            && $j > 0
21037            && $j < $jmax - 1 )
21038        {
21039            $group_maximum_gap = -$pad;
21040        }
21041
21042        next if $pad < 0;
21043
21044        ## This patch helps sometimes, but it doesn't check to see if
21045        ## the line is too long even without the side comment.  It needs
21046        ## to be reworked.
21047        ##don't let a long token with no trailing side comment push
21048        ##side comments out, or end a group.  (sidecmt1.t)
21049        ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
21050
21051        # This line will need space; lets see if we want to accept it..
21052        if (
21053
21054            # not if this won't fit
21055            ( $pad > $padding_available )
21056
21057            # previously, there were upper bounds placed on padding here
21058            # (maximum_whitespace_columns), but they were not really helpful
21059
21060          )
21061        {
21062
21063            # revert to starting state then flush; things didn't work out
21064            restore_alignment_columns();
21065            my_flush();
21066            last;
21067        }
21068
21069        # patch to avoid excessive gaps in previous lines,
21070        # due to a line of fewer fields.
21071        #   return join( ".",
21072        #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
21073        #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
21074        next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
21075
21076        # looks ok, squeeze this field in
21077        $old_line->increase_field_width( $j, $pad );
21078        $padding_available -= $pad;
21079
21080        # remember largest gap of the group, excluding gap to side comment
21081        if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
21082            $group_maximum_gap = $pad;
21083        }
21084    }
21085}
21086
21087sub add_to_group {
21088
21089    # The current line either starts a new alignment group or is
21090    # accepted into the current alignment group.
21091    my $new_line = shift;
21092    $group_lines[ ++$maximum_line_index ] = $new_line;
21093
21094    # initialize field lengths if starting new group
21095    if ( $maximum_line_index == 0 ) {
21096
21097        my $jmax    = $new_line->get_jmax();
21098        my $rfields = $new_line->get_rfields();
21099        my $rtokens = $new_line->get_rtokens();
21100        my $j;
21101        my $col = $new_line->get_leading_space_count();
21102
21103        for $j ( 0 .. $jmax ) {
21104            $col += length( $$rfields[$j] );
21105
21106            # create initial alignments for the new group
21107            my $token = "";
21108            if ( $j < $jmax ) { $token = $$rtokens[$j] }
21109            my $alignment = make_alignment( $col, $token );
21110            $new_line->set_alignment( $j, $alignment );
21111        }
21112
21113        $maximum_jmax_seen = $jmax;
21114        $minimum_jmax_seen = $jmax;
21115    }
21116
21117    # use previous alignments otherwise
21118    else {
21119        my @new_alignments =
21120          $group_lines[ $maximum_line_index - 1 ]->get_alignments();
21121        $new_line->set_alignments(@new_alignments);
21122    }
21123
21124    # remember group jmax extremes for next call to valign_input
21125    $previous_minimum_jmax_seen = $minimum_jmax_seen;
21126    $previous_maximum_jmax_seen = $maximum_jmax_seen;
21127}
21128
21129sub dump_array {
21130
21131    # debug routine to dump array contents
21132    local $" = ')(';
21133    print STDOUT "(@_)\n";
21134}
21135
21136# flush() sends the current Perl::Tidy::VerticalAligner group down the
21137# pipeline to Perl::Tidy::FileWriter.
21138
21139# This is the external flush, which also empties the buffer and cache
21140sub flush {
21141
21142    # the buffer must be emptied first, then any cached text
21143    dump_valign_buffer();
21144
21145    if ( $maximum_line_index < 0 ) {
21146        if ($cached_line_type) {
21147            $seqno_string = $cached_seqno_string;
21148            valign_output_step_C( $cached_line_text,
21149                $cached_line_leading_space_count,
21150                $last_level_written );
21151            $cached_line_type    = 0;
21152            $cached_line_text    = "";
21153            $cached_seqno_string = "";
21154        }
21155    }
21156    else {
21157        my_flush();
21158    }
21159}
21160
21161sub reduce_valign_buffer_indentation {
21162
21163    my ($diff) = @_;
21164    if ( $valign_buffer_filling && $diff ) {
21165        my $max_valign_buffer = @valign_buffer;
21166        for ( my $i = 0 ; $i < $max_valign_buffer ; $i++ ) {
21167            my ( $line, $leading_space_count, $level ) =
21168              @{ $valign_buffer[$i] };
21169            my $ws = substr( $line, 0, $diff );
21170            if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
21171                $line = substr( $line, $diff );
21172            }
21173            if ( $leading_space_count >= $diff ) {
21174                $leading_space_count -= $diff;
21175                $level = level_change( $leading_space_count, $diff, $level );
21176            }
21177            $valign_buffer[$i] = [ $line, $leading_space_count, $level ];
21178        }
21179    }
21180}
21181
21182sub level_change {
21183
21184    # compute decrease in level when we remove $diff spaces from the
21185    # leading spaces
21186    my ( $leading_space_count, $diff, $level ) = @_;
21187    if ($rOpts_indent_columns) {
21188        my $olev =
21189          int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
21190        my $nlev = int( $leading_space_count / $rOpts_indent_columns );
21191        $level -= ( $olev - $nlev );
21192        if ( $level < 0 ) { $level = 0 }
21193    }
21194    return $level;
21195}
21196
21197sub dump_valign_buffer {
21198    if (@valign_buffer) {
21199        foreach (@valign_buffer) {
21200            valign_output_step_D( @{$_} );
21201        }
21202        @valign_buffer = ();
21203    }
21204    $valign_buffer_filling = "";
21205}
21206
21207# This is the internal flush, which leaves the cache intact
21208sub my_flush {
21209
21210    return if ( $maximum_line_index < 0 );
21211
21212    # handle a group of comment lines
21213    if ( $group_type eq 'COMMENT' ) {
21214
21215        VALIGN_DEBUG_FLAG_APPEND0 && do {
21216            my ( $a, $b, $c ) = caller();
21217            print STDOUT
21218"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
21219
21220        };
21221        my $leading_space_count = $comment_leading_space_count;
21222        my $leading_string      = get_leading_string($leading_space_count);
21223
21224        # zero leading space count if any lines are too long
21225        my $max_excess = 0;
21226        for my $i ( 0 .. $maximum_line_index ) {
21227            my $str = $group_lines[$i];
21228            my $excess =
21229              length($str) +
21230              $leading_space_count -
21231              maximum_line_length_for_level($group_level);
21232            if ( $excess > $max_excess ) {
21233                $max_excess = $excess;
21234            }
21235        }
21236
21237        if ( $max_excess > 0 ) {
21238            $leading_space_count -= $max_excess;
21239            if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
21240            $last_outdented_line_at =
21241              $file_writer_object->get_output_line_number();
21242            unless ($outdented_line_count) {
21243                $first_outdented_line_at = $last_outdented_line_at;
21244            }
21245            $outdented_line_count += ( $maximum_line_index + 1 );
21246        }
21247
21248        # write the group of lines
21249        my $outdent_long_lines = 0;
21250        for my $i ( 0 .. $maximum_line_index ) {
21251            valign_output_step_B( $leading_space_count, $group_lines[$i], 0,
21252                $outdent_long_lines, "", $group_level );
21253        }
21254    }
21255
21256    # handle a group of code lines
21257    else {
21258
21259        VALIGN_DEBUG_FLAG_APPEND0 && do {
21260            my $group_list_type = $group_lines[0]->get_list_type();
21261            my ( $a, $b, $c ) = caller();
21262            my $maximum_field_index = $group_lines[0]->get_jmax();
21263            print STDOUT
21264"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
21265
21266        };
21267
21268        # some small groups are best left unaligned
21269        my $do_not_align = decide_if_aligned();
21270
21271        # optimize side comment location
21272        $do_not_align = adjust_side_comment($do_not_align);
21273
21274        # recover spaces for -lp option if possible
21275        my $extra_leading_spaces = get_extra_leading_spaces();
21276
21277        # all lines of this group have the same basic leading spacing
21278        my $group_leader_length = $group_lines[0]->get_leading_space_count();
21279
21280        # add extra leading spaces if helpful
21281        my $min_ci_gap = improve_continuation_indentation( $do_not_align,
21282            $group_leader_length );
21283
21284        # loop to output all lines
21285        for my $i ( 0 .. $maximum_line_index ) {
21286            my $line = $group_lines[$i];
21287            valign_output_step_A( $line, $min_ci_gap, $do_not_align,
21288                $group_leader_length, $extra_leading_spaces );
21289        }
21290    }
21291    initialize_for_new_group();
21292}
21293
21294sub decide_if_aligned {
21295
21296    # Do not try to align two lines which are not really similar
21297    return unless $maximum_line_index == 1;
21298    return if ($is_matching_terminal_line);
21299
21300    my $group_list_type = $group_lines[0]->get_list_type();
21301
21302    my $do_not_align = (
21303
21304        # always align lists
21305        !$group_list_type
21306
21307          && (
21308
21309            # don't align if it was just a marginal match
21310            $marginal_match
21311
21312            # don't align two lines with big gap
21313            || $group_maximum_gap > 12
21314
21315            # or lines with differing number of alignment tokens
21316            # TODO: this could be improved.  It occasionally rejects
21317            # good matches.
21318            || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
21319          )
21320    );
21321
21322    # But try to convert them into a simple comment group if the first line
21323    # a has side comment
21324    my $rfields             = $group_lines[0]->get_rfields();
21325    my $maximum_field_index = $group_lines[0]->get_jmax();
21326    if (   $do_not_align
21327        && ( $maximum_line_index > 0 )
21328        && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
21329    {
21330        combine_fields();
21331        $do_not_align = 0;
21332    }
21333    return $do_not_align;
21334}
21335
21336sub adjust_side_comment {
21337
21338    my $do_not_align = shift;
21339
21340    # let's see if we can move the side comment field out a little
21341    # to improve readability (the last field is always a side comment field)
21342    my $have_side_comment       = 0;
21343    my $first_side_comment_line = -1;
21344    my $maximum_field_index     = $group_lines[0]->get_jmax();
21345    for my $i ( 0 .. $maximum_line_index ) {
21346        my $line = $group_lines[$i];
21347
21348        if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
21349            $have_side_comment       = 1;
21350            $first_side_comment_line = $i;
21351            last;
21352        }
21353    }
21354
21355    my $kmax = $maximum_field_index + 1;
21356
21357    if ($have_side_comment) {
21358
21359        my $line = $group_lines[0];
21360
21361        # the maximum space without exceeding the line length:
21362        my $avail = $line->get_available_space_on_right();
21363
21364        # try to use the previous comment column
21365        my $side_comment_column = $line->get_column( $kmax - 2 );
21366        my $move                = $last_comment_column - $side_comment_column;
21367
21368##        my $sc_line0 = $side_comment_history[0]->[0];
21369##        my $sc_col0  = $side_comment_history[0]->[1];
21370##        my $sc_line1 = $side_comment_history[1]->[0];
21371##        my $sc_col1  = $side_comment_history[1]->[1];
21372##        my $sc_line2 = $side_comment_history[2]->[0];
21373##        my $sc_col2  = $side_comment_history[2]->[1];
21374##
21375##        # FUTURE UPDATES:
21376##        # Be sure to ignore 'do not align' and  '} # end comments'
21377##        # Find first $move > 0 and $move <= $avail as follows:
21378##        # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
21379##        # 2. try sc_col2 if (line-sc_line2) < 12
21380##        # 3. try min possible space, plus up to 8,
21381##        # 4. try min possible space
21382
21383        if ( $kmax > 0 && !$do_not_align ) {
21384
21385            # but if this doesn't work, give up and use the minimum space
21386            if ( $move > $avail ) {
21387                $move = $rOpts_minimum_space_to_comment - 1;
21388            }
21389
21390            # but we want some minimum space to the comment
21391            my $min_move = $rOpts_minimum_space_to_comment - 1;
21392            if (   $move >= 0
21393                && $last_side_comment_length > 0
21394                && ( $first_side_comment_line == 0 )
21395                && $group_level == $last_level_written )
21396            {
21397                $min_move = 0;
21398            }
21399
21400            if ( $move < $min_move ) {
21401                $move = $min_move;
21402            }
21403
21404            # prevously, an upper bound was placed on $move here,
21405            # (maximum_space_to_comment), but it was not helpful
21406
21407            # don't exceed the available space
21408            if ( $move > $avail ) { $move = $avail }
21409
21410            # we can only increase space, never decrease
21411            if ( $move > 0 ) {
21412                $line->increase_field_width( $maximum_field_index - 1, $move );
21413            }
21414
21415            # remember this column for the next group
21416            $last_comment_column = $line->get_column( $kmax - 2 );
21417        }
21418        else {
21419
21420            # try to at least line up the existing side comment location
21421            if ( $kmax > 0 && $move > 0 && $move < $avail ) {
21422                $line->increase_field_width( $maximum_field_index - 1, $move );
21423                $do_not_align = 0;
21424            }
21425
21426            # reset side comment column if we can't align
21427            else {
21428                forget_side_comment();
21429            }
21430        }
21431    }
21432    return $do_not_align;
21433}
21434
21435sub improve_continuation_indentation {
21436    my ( $do_not_align, $group_leader_length ) = @_;
21437
21438    # See if we can increase the continuation indentation
21439    # to move all continuation lines closer to the next field
21440    # (unless it is a comment).
21441    #
21442    # '$min_ci_gap'is the extra indentation that we may need to introduce.
21443    # We will only introduce this to fields which already have some ci.
21444    # Without this variable, we would occasionally get something like this
21445    # (Complex.pm):
21446    #
21447    # use overload '+' => \&plus,
21448    #   '-'            => \&minus,
21449    #   '*'            => \&multiply,
21450    #   ...
21451    #   'tan'          => \&tan,
21452    #   'atan2'        => \&atan2,
21453    #
21454    # Whereas with this variable, we can shift variables over to get this:
21455    #
21456    # use overload '+' => \&plus,
21457    #          '-'     => \&minus,
21458    #          '*'     => \&multiply,
21459    #          ...
21460    #          'tan'   => \&tan,
21461    #          'atan2' => \&atan2,
21462
21463    ## Deactivated####################
21464    # The trouble with this patch is that it may, for example,
21465    # move in some 'or's  or ':'s, and leave some out, so that the
21466    # left edge alignment suffers.
21467    return 0;
21468    ###########################################
21469
21470    my $maximum_field_index = $group_lines[0]->get_jmax();
21471
21472    my $min_ci_gap = maximum_line_length_for_level($group_level);
21473    if ( $maximum_field_index > 1 && !$do_not_align ) {
21474
21475        for my $i ( 0 .. $maximum_line_index ) {
21476            my $line                = $group_lines[$i];
21477            my $leading_space_count = $line->get_leading_space_count();
21478            my $rfields             = $line->get_rfields();
21479
21480            my $gap =
21481              $line->get_column(0) -
21482              $leading_space_count -
21483              length( $$rfields[0] );
21484
21485            if ( $leading_space_count > $group_leader_length ) {
21486                if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
21487            }
21488        }
21489
21490        if ( $min_ci_gap >= maximum_line_length_for_level($group_level) ) {
21491            $min_ci_gap = 0;
21492        }
21493    }
21494    else {
21495        $min_ci_gap = 0;
21496    }
21497    return $min_ci_gap;
21498}
21499
21500sub valign_output_step_A {
21501
21502    ###############################################################
21503    # This is Step A in writing vertically aligned lines.
21504    # The line is prepared according to the alignments which have
21505    # been found and shipped to the next step.
21506    ###############################################################
21507
21508    my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
21509        $extra_leading_spaces )
21510      = @_;
21511    my $rfields                   = $line->get_rfields();
21512    my $leading_space_count       = $line->get_leading_space_count();
21513    my $outdent_long_lines        = $line->get_outdent_long_lines();
21514    my $maximum_field_index       = $line->get_jmax();
21515    my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
21516
21517    # add any extra spaces
21518    if ( $leading_space_count > $group_leader_length ) {
21519        $leading_space_count += $min_ci_gap;
21520    }
21521
21522    my $str = $$rfields[0];
21523
21524    # loop to concatenate all fields of this line and needed padding
21525    my $total_pad_count = 0;
21526    my ( $j, $pad );
21527    for $j ( 1 .. $maximum_field_index ) {
21528
21529        # skip zero-length side comments
21530        last
21531          if ( ( $j == $maximum_field_index )
21532            && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
21533          );
21534
21535        # compute spaces of padding before this field
21536        my $col = $line->get_column( $j - 1 );
21537        $pad = $col - ( length($str) + $leading_space_count );
21538
21539        if ($do_not_align) {
21540            $pad =
21541              ( $j < $maximum_field_index )
21542              ? 0
21543              : $rOpts_minimum_space_to_comment - 1;
21544        }
21545
21546        # if the -fpsc flag is set, move the side comment to the selected
21547        # column if and only if it is possible, ignoring constraints on
21548        # line length and minimum space to comment
21549        if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
21550        {
21551            my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
21552            if ( $newpad >= 0 ) { $pad = $newpad; }
21553        }
21554
21555        # accumulate the padding
21556        if ( $pad > 0 ) { $total_pad_count += $pad; }
21557
21558        # add this field
21559        if ( !defined $$rfields[$j] ) {
21560            write_diagnostics("UNDEFined field at j=$j\n");
21561        }
21562
21563        # only add padding when we have a finite field;
21564        # this avoids extra terminal spaces if we have empty fields
21565        if ( length( $$rfields[$j] ) > 0 ) {
21566            $str .= ' ' x $total_pad_count;
21567            $total_pad_count = 0;
21568            $str .= $$rfields[$j];
21569        }
21570        else {
21571            $total_pad_count = 0;
21572        }
21573
21574        # update side comment history buffer
21575        if ( $j == $maximum_field_index ) {
21576            my $lineno = $file_writer_object->get_output_line_number();
21577            shift @side_comment_history;
21578            push @side_comment_history, [ $lineno, $col ];
21579        }
21580    }
21581
21582    my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
21583
21584    # ship this line off
21585    valign_output_step_B( $leading_space_count + $extra_leading_spaces,
21586        $str, $side_comment_length, $outdent_long_lines,
21587        $rvertical_tightness_flags, $group_level );
21588}
21589
21590sub get_extra_leading_spaces {
21591
21592    #----------------------------------------------------------
21593    # Define any extra indentation space (for the -lp option).
21594    # Here is why:
21595    # If a list has side comments, sub scan_list must dump the
21596    # list before it sees everything.  When this happens, it sets
21597    # the indentation to the standard scheme, but notes how
21598    # many spaces it would have liked to use.  We may be able
21599    # to recover that space here in the event that that all of the
21600    # lines of a list are back together again.
21601    #----------------------------------------------------------
21602
21603    my $extra_leading_spaces = 0;
21604    if ($extra_indent_ok) {
21605        my $object = $group_lines[0]->get_indentation();
21606        if ( ref($object) ) {
21607            my $extra_indentation_spaces_wanted =
21608              get_RECOVERABLE_SPACES($object);
21609
21610            # all indentation objects must be the same
21611            my $i;
21612            for $i ( 1 .. $maximum_line_index ) {
21613                if ( $object != $group_lines[$i]->get_indentation() ) {
21614                    $extra_indentation_spaces_wanted = 0;
21615                    last;
21616                }
21617            }
21618
21619            if ($extra_indentation_spaces_wanted) {
21620
21621                # the maximum space without exceeding the line length:
21622                my $avail = $group_lines[0]->get_available_space_on_right();
21623                $extra_leading_spaces =
21624                  ( $avail > $extra_indentation_spaces_wanted )
21625                  ? $extra_indentation_spaces_wanted
21626                  : $avail;
21627
21628                # update the indentation object because with -icp the terminal
21629                # ');' will use the same adjustment.
21630                $object->permanently_decrease_AVAILABLE_SPACES(
21631                    -$extra_leading_spaces );
21632            }
21633        }
21634    }
21635    return $extra_leading_spaces;
21636}
21637
21638sub combine_fields {
21639
21640    # combine all fields except for the comment field  ( sidecmt.t )
21641    # Uses global variables:
21642    #  @group_lines
21643    #  $maximum_line_index
21644    my ( $j, $k );
21645    my $maximum_field_index = $group_lines[0]->get_jmax();
21646    for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
21647        my $line    = $group_lines[$j];
21648        my $rfields = $line->get_rfields();
21649        foreach ( 1 .. $maximum_field_index - 1 ) {
21650            $$rfields[0] .= $$rfields[$_];
21651        }
21652        $$rfields[1] = $$rfields[$maximum_field_index];
21653
21654        $line->set_jmax(1);
21655        $line->set_column( 0, 0 );
21656        $line->set_column( 1, 0 );
21657
21658    }
21659    $maximum_field_index = 1;
21660
21661    for $j ( 0 .. $maximum_line_index ) {
21662        my $line    = $group_lines[$j];
21663        my $rfields = $line->get_rfields();
21664        for $k ( 0 .. $maximum_field_index ) {
21665            my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
21666            if ( $k == 0 ) {
21667                $pad += $group_lines[$j]->get_leading_space_count();
21668            }
21669
21670            if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
21671
21672        }
21673    }
21674}
21675
21676sub get_output_line_number {
21677
21678    # the output line number reported to a caller is the number of items
21679    # written plus the number of items in the buffer
21680    my $self = shift;
21681    1 + $maximum_line_index + $file_writer_object->get_output_line_number();
21682}
21683
21684sub valign_output_step_B {
21685
21686    ###############################################################
21687    # This is Step B in writing vertically aligned lines.
21688    # Vertical tightness is applied according to preset flags.
21689    # In particular this routine handles stacking of opening
21690    # and closing tokens.
21691    ###############################################################
21692
21693    my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
21694        $rvertical_tightness_flags, $level )
21695      = @_;
21696
21697    # handle outdenting of long lines:
21698    if ($outdent_long_lines) {
21699        my $excess =
21700          length($str) -
21701          $side_comment_length +
21702          $leading_space_count -
21703          maximum_line_length_for_level($level);
21704        if ( $excess > 0 ) {
21705            $leading_space_count = 0;
21706            $last_outdented_line_at =
21707              $file_writer_object->get_output_line_number();
21708
21709            unless ($outdented_line_count) {
21710                $first_outdented_line_at = $last_outdented_line_at;
21711            }
21712            $outdented_line_count++;
21713        }
21714    }
21715
21716    # Make preliminary leading whitespace.  It could get changed
21717    # later by entabbing, so we have to keep track of any changes
21718    # to the leading_space_count from here on.
21719    my $leading_string =
21720      $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
21721
21722    # Unpack any recombination data; it was packed by
21723    # sub send_lines_to_vertical_aligner. Contents:
21724    #
21725    #   [0] type: 1=opening non-block    2=closing non-block
21726    #             3=opening block brace  4=closing block brace
21727    #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
21728    #             if closing: spaces of padding to use
21729    #   [2] sequence number of container
21730    #   [3] valid flag: do not append if this flag is false
21731    #
21732    my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
21733        $seqno_end );
21734    if ($rvertical_tightness_flags) {
21735        (
21736            $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
21737            $seqno_end
21738        ) = @{$rvertical_tightness_flags};
21739    }
21740
21741    $seqno_string = $seqno_end;
21742
21743    # handle any cached line ..
21744    # either append this line to it or write it out
21745    if ( length($cached_line_text) ) {
21746
21747        # Dump an invalid cached line
21748        if ( !$cached_line_valid ) {
21749            valign_output_step_C( $cached_line_text,
21750                $cached_line_leading_space_count,
21751                $last_level_written );
21752        }
21753
21754        # Handle cached line ending in OPENING tokens
21755        elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
21756
21757            my $gap = $leading_space_count - length($cached_line_text);
21758
21759            # handle option of just one tight opening per line:
21760            if ( $cached_line_flag == 1 ) {
21761                if ( defined($open_or_close) && $open_or_close == 1 ) {
21762                    $gap = -1;
21763                }
21764            }
21765
21766            if ( $gap >= 0 && defined($seqno_beg) ) {
21767                $leading_string      = $cached_line_text . ' ' x $gap;
21768                $leading_space_count = $cached_line_leading_space_count;
21769                $seqno_string        = $cached_seqno_string . ':' . $seqno_beg;
21770                $level               = $last_level_written;
21771            }
21772            else {
21773                valign_output_step_C( $cached_line_text,
21774                    $cached_line_leading_space_count,
21775                    $last_level_written );
21776            }
21777        }
21778
21779        # Handle cached line ending in CLOSING tokens
21780        else {
21781            my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
21782            if (
21783
21784                # The new line must start with container
21785                $seqno_beg
21786
21787                # The container combination must be okay..
21788                && (
21789
21790                    # okay to combine like types
21791                    ( $open_or_close == $cached_line_type )
21792
21793                    # closing block brace may append to non-block
21794                    || ( $cached_line_type == 2 && $open_or_close == 4 )
21795
21796                    # something like ');'
21797                    || ( !$open_or_close && $cached_line_type == 2 )
21798
21799                )
21800
21801                # The combined line must fit
21802                && (
21803                    length($test_line) <=
21804                    maximum_line_length_for_level($last_level_written) )
21805              )
21806            {
21807
21808                $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
21809
21810                # Patch to outdent closing tokens ending # in ');'
21811                # If we are joining a line like ');' to a previous stacked
21812                # set of closing tokens, then decide if we may outdent the
21813                # combined stack to the indentation of the ');'.  Since we
21814                # should not normally outdent any of the other tokens more than
21815                # the indentation of the lines that contained them, we will
21816                # only do this if all of the corresponding opening
21817                # tokens were on the same line.  This can happen with
21818                # -sot and -sct.  For example, it is ok here:
21819                #   __PACKAGE__->load_components( qw(
21820                #         PK::Auto
21821                #         Core
21822                #   ));
21823                #
21824                #   But, for example, we do not outdent in this example because
21825                #   that would put the closing sub brace out farther than the
21826                #   opening sub brace:
21827                #
21828                #   perltidy -sot -sct
21829                #   $c->Tk::bind(
21830                #       '<Control-f>' => sub {
21831                #           my ($c) = @_;
21832                #           my $e = $c->XEvent;
21833                #           itemsUnderArea $c;
21834                #       } );
21835                #
21836                if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
21837
21838                    # The way to tell this is if the stacked sequence numbers
21839                    # of this output line are the reverse of the stacked
21840                    # sequence numbers of the previous non-blank line of
21841                    # sequence numbers.  So we can join if the previous
21842                    # nonblank string of tokens is the mirror image.  For
21843                    # example if stack )}] is 13:8:6 then we are looking for a
21844                    # leading stack like [{( which is 6:8:13 We only need to
21845                    # check the two ends, because the intermediate tokens must
21846                    # fall in order.  Note on speed: having to split on colons
21847                    # and eliminate multiple colons might appear to be slow,
21848                    # but it's not an issue because we almost never come
21849                    # through here.  In a typical file we don't.
21850                    $seqno_string =~ s/^:+//;
21851                    $last_nonblank_seqno_string =~ s/^:+//;
21852                    $seqno_string =~ s/:+/:/g;
21853                    $last_nonblank_seqno_string =~ s/:+/:/g;
21854
21855                    # how many spaces can we outdent?
21856                    my $diff =
21857                      $cached_line_leading_space_count - $leading_space_count;
21858                    if (   $diff > 0
21859                        && length($seqno_string)
21860                        && length($last_nonblank_seqno_string) ==
21861                        length($seqno_string) )
21862                    {
21863                        my @seqno_last =
21864                          ( split ':', $last_nonblank_seqno_string );
21865                        my @seqno_now = ( split ':', $seqno_string );
21866                        if (   $seqno_now[-1] == $seqno_last[0]
21867                            && $seqno_now[0] == $seqno_last[-1] )
21868                        {
21869
21870                            # OK to outdent ..
21871                            # for absolute safety, be sure we only remove
21872                            # whitespace
21873                            my $ws = substr( $test_line, 0, $diff );
21874                            if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
21875
21876                                $test_line = substr( $test_line, $diff );
21877                                $cached_line_leading_space_count -= $diff;
21878                                $last_level_written =
21879                                  level_change(
21880                                    $cached_line_leading_space_count,
21881                                    $diff, $last_level_written );
21882                                reduce_valign_buffer_indentation($diff);
21883                            }
21884
21885                            # shouldn't happen, but not critical:
21886                            ##else {
21887                            ## ERROR transferring indentation here
21888                            ##}
21889                        }
21890                    }
21891                }
21892
21893                $str                 = $test_line;
21894                $leading_string      = "";
21895                $leading_space_count = $cached_line_leading_space_count;
21896                $level               = $last_level_written;
21897            }
21898            else {
21899                valign_output_step_C( $cached_line_text,
21900                    $cached_line_leading_space_count,
21901                    $last_level_written );
21902            }
21903        }
21904    }
21905    $cached_line_type = 0;
21906    $cached_line_text = "";
21907
21908    # make the line to be written
21909    my $line = $leading_string . $str;
21910
21911    # write or cache this line
21912    if ( !$open_or_close || $side_comment_length > 0 ) {
21913        valign_output_step_C( $line, $leading_space_count, $level );
21914    }
21915    else {
21916        $cached_line_text                = $line;
21917        $cached_line_type                = $open_or_close;
21918        $cached_line_flag                = $tightness_flag;
21919        $cached_seqno                    = $seqno;
21920        $cached_line_valid               = $valid;
21921        $cached_line_leading_space_count = $leading_space_count;
21922        $cached_seqno_string             = $seqno_string;
21923    }
21924
21925    $last_level_written       = $level;
21926    $last_side_comment_length = $side_comment_length;
21927    $extra_indent_ok          = 0;
21928}
21929
21930sub valign_output_step_C {
21931
21932    ###############################################################
21933    # This is Step C in writing vertically aligned lines.
21934    # Lines are either stored in a buffer or passed along to the next step.
21935    # The reason for storing lines is that we may later want to reduce their
21936    # indentation when -sot and -sct are both used.
21937    ###############################################################
21938    my @args = @_;
21939
21940    # Dump any saved lines if we see a line with an unbalanced opening or
21941    # closing token.
21942    dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling );
21943
21944    # Either store or write this line
21945    if ($valign_buffer_filling) {
21946        push @valign_buffer, [@args];
21947    }
21948    else {
21949        valign_output_step_D(@args);
21950    }
21951
21952    # For lines starting or ending with opening or closing tokens..
21953    if ($seqno_string) {
21954        $last_nonblank_seqno_string = $seqno_string;
21955
21956        # Start storing lines when we see a line with multiple stacked opening
21957        # tokens.
21958        if ( $args[0] =~ /[\{\(\[]\s*[\{\(\[]$/ ) {
21959            $valign_buffer_filling = $seqno_string;
21960        }
21961    }
21962}
21963
21964sub valign_output_step_D {
21965
21966    ###############################################################
21967    # This is Step D in writing vertically aligned lines.
21968    # Write one vertically aligned line of code to the output object.
21969    ###############################################################
21970
21971    my ( $line, $leading_space_count, $level ) = @_;
21972
21973    # The line is currently correct if there is no tabbing (recommended!)
21974    # We may have to lop off some leading spaces and replace with tabs.
21975    if ( $leading_space_count > 0 ) {
21976
21977        # Nothing to do if no tabs
21978        if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
21979            || $rOpts_indent_columns <= 0 )
21980        {
21981
21982            # nothing to do
21983        }
21984
21985        # Handle entab option
21986        elsif ($rOpts_entab_leading_whitespace) {
21987            my $space_count =
21988              $leading_space_count % $rOpts_entab_leading_whitespace;
21989            my $tab_count =
21990              int( $leading_space_count / $rOpts_entab_leading_whitespace );
21991            my $leading_string = "\t" x $tab_count . ' ' x $space_count;
21992            if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
21993                substr( $line, 0, $leading_space_count ) = $leading_string;
21994            }
21995            else {
21996
21997                # shouldn't happen - program error counting whitespace
21998                # - skip entabbing
21999                VALIGN_DEBUG_FLAG_TABS
22000                  && warning(
22001"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
22002                  );
22003            }
22004        }
22005
22006        # Handle option of one tab per level
22007        else {
22008            my $leading_string = ( "\t" x $level );
22009            my $space_count =
22010              $leading_space_count - $level * $rOpts_indent_columns;
22011
22012            # shouldn't happen:
22013            if ( $space_count < 0 ) {
22014
22015                # But it could be an outdented comment
22016                if ( $line !~ /^\s*#/ ) {
22017                    VALIGN_DEBUG_FLAG_TABS
22018                      && warning(
22019"Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n"
22020                      );
22021                }
22022                $leading_string = ( ' ' x $leading_space_count );
22023            }
22024            else {
22025                $leading_string .= ( ' ' x $space_count );
22026            }
22027            if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
22028                substr( $line, 0, $leading_space_count ) = $leading_string;
22029            }
22030            else {
22031
22032                # shouldn't happen - program error counting whitespace
22033                # we'll skip entabbing
22034                VALIGN_DEBUG_FLAG_TABS
22035                  && warning(
22036"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
22037                  );
22038            }
22039        }
22040    }
22041    $file_writer_object->write_code_line( $line . "\n" );
22042}
22043
22044{    # begin get_leading_string
22045
22046    my @leading_string_cache;
22047
22048    sub get_leading_string {
22049
22050        # define the leading whitespace string for this line..
22051        my $leading_whitespace_count = shift;
22052
22053        # Handle case of zero whitespace, which includes multi-line quotes
22054        # (which may have a finite level; this prevents tab problems)
22055        if ( $leading_whitespace_count <= 0 ) {
22056            return "";
22057        }
22058
22059        # look for previous result
22060        elsif ( $leading_string_cache[$leading_whitespace_count] ) {
22061            return $leading_string_cache[$leading_whitespace_count];
22062        }
22063
22064        # must compute a string for this number of spaces
22065        my $leading_string;
22066
22067        # Handle simple case of no tabs
22068        if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
22069            || $rOpts_indent_columns <= 0 )
22070        {
22071            $leading_string = ( ' ' x $leading_whitespace_count );
22072        }
22073
22074        # Handle entab option
22075        elsif ($rOpts_entab_leading_whitespace) {
22076            my $space_count =
22077              $leading_whitespace_count % $rOpts_entab_leading_whitespace;
22078            my $tab_count = int(
22079                $leading_whitespace_count / $rOpts_entab_leading_whitespace );
22080            $leading_string = "\t" x $tab_count . ' ' x $space_count;
22081        }
22082
22083        # Handle option of one tab per level
22084        else {
22085            $leading_string = ( "\t" x $group_level );
22086            my $space_count =
22087              $leading_whitespace_count - $group_level * $rOpts_indent_columns;
22088
22089            # shouldn't happen:
22090            if ( $space_count < 0 ) {
22091                VALIGN_DEBUG_FLAG_TABS
22092                  && warning(
22093"Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
22094                  );
22095
22096                # -- skip entabbing
22097                $leading_string = ( ' ' x $leading_whitespace_count );
22098            }
22099            else {
22100                $leading_string .= ( ' ' x $space_count );
22101            }
22102        }
22103        $leading_string_cache[$leading_whitespace_count] = $leading_string;
22104        return $leading_string;
22105    }
22106}    # end get_leading_string
22107
22108sub report_anything_unusual {
22109    my $self = shift;
22110    if ( $outdented_line_count > 0 ) {
22111        write_logfile_entry(
22112            "$outdented_line_count long lines were outdented:\n");
22113        write_logfile_entry(
22114            "  First at output line $first_outdented_line_at\n");
22115
22116        if ( $outdented_line_count > 1 ) {
22117            write_logfile_entry(
22118                "   Last at output line $last_outdented_line_at\n");
22119        }
22120        write_logfile_entry(
22121            "  use -noll to prevent outdenting, -l=n to increase line length\n"
22122        );
22123        write_logfile_entry("\n");
22124    }
22125}
22126
22127#####################################################################
22128#
22129# the Perl::Tidy::FileWriter class writes the output file
22130#
22131#####################################################################
22132
22133package Perl::Tidy::FileWriter;
22134
22135# Maximum number of little messages; probably need not be changed.
22136use constant MAX_NAG_MESSAGES => 6;
22137
22138sub write_logfile_entry {
22139    my $self          = shift;
22140    my $logger_object = $self->{_logger_object};
22141    if ($logger_object) {
22142        $logger_object->write_logfile_entry(@_);
22143    }
22144}
22145
22146sub new {
22147    my $class = shift;
22148    my ( $line_sink_object, $rOpts, $logger_object ) = @_;
22149
22150    bless {
22151        _line_sink_object           => $line_sink_object,
22152        _logger_object              => $logger_object,
22153        _rOpts                      => $rOpts,
22154        _output_line_number         => 1,
22155        _consecutive_blank_lines    => 0,
22156        _consecutive_nonblank_lines => 0,
22157        _first_line_length_error    => 0,
22158        _max_line_length_error      => 0,
22159        _last_line_length_error     => 0,
22160        _first_line_length_error_at => 0,
22161        _max_line_length_error_at   => 0,
22162        _last_line_length_error_at  => 0,
22163        _line_length_error_count    => 0,
22164        _max_output_line_length     => 0,
22165        _max_output_line_length_at  => 0,
22166    }, $class;
22167}
22168
22169sub tee_on {
22170    my $self = shift;
22171    $self->{_line_sink_object}->tee_on();
22172}
22173
22174sub tee_off {
22175    my $self = shift;
22176    $self->{_line_sink_object}->tee_off();
22177}
22178
22179sub get_output_line_number {
22180    my $self = shift;
22181    return $self->{_output_line_number};
22182}
22183
22184sub decrement_output_line_number {
22185    my $self = shift;
22186    $self->{_output_line_number}--;
22187}
22188
22189sub get_consecutive_nonblank_lines {
22190    my $self = shift;
22191    return $self->{_consecutive_nonblank_lines};
22192}
22193
22194sub reset_consecutive_blank_lines {
22195    my $self = shift;
22196    $self->{_consecutive_blank_lines} = 0;
22197}
22198
22199sub want_blank_line {
22200    my $self = shift;
22201    unless ( $self->{_consecutive_blank_lines} ) {
22202        $self->write_blank_code_line();
22203    }
22204}
22205
22206sub require_blank_code_lines {
22207
22208    # write out the requested number of blanks regardless of the value of -mbl
22209    # unless -mbl=0.  This allows extra blank lines to be written for subs and
22210    # packages even with the default -mbl=1
22211    my $self   = shift;
22212    my $count  = shift;
22213    my $need   = $count - $self->{_consecutive_blank_lines};
22214    my $rOpts  = $self->{_rOpts};
22215    my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0;
22216    for ( my $i = 0 ; $i < $need ; $i++ ) {
22217        $self->write_blank_code_line($forced);
22218    }
22219}
22220
22221sub write_blank_code_line {
22222    my $self   = shift;
22223    my $forced = shift;
22224    my $rOpts  = $self->{_rOpts};
22225    return
22226      if (!$forced
22227        && $self->{_consecutive_blank_lines} >=
22228        $rOpts->{'maximum-consecutive-blank-lines'} );
22229    $self->{_consecutive_blank_lines}++;
22230    $self->{_consecutive_nonblank_lines} = 0;
22231    $self->write_line("\n");
22232}
22233
22234sub write_code_line {
22235    my $self = shift;
22236    my $a    = shift;
22237
22238    if ( $a =~ /^\s*$/ ) {
22239        my $rOpts = $self->{_rOpts};
22240        return
22241          if ( $self->{_consecutive_blank_lines} >=
22242            $rOpts->{'maximum-consecutive-blank-lines'} );
22243        $self->{_consecutive_blank_lines}++;
22244        $self->{_consecutive_nonblank_lines} = 0;
22245    }
22246    else {
22247        $self->{_consecutive_blank_lines} = 0;
22248        $self->{_consecutive_nonblank_lines}++;
22249    }
22250    $self->write_line($a);
22251}
22252
22253sub write_line {
22254    my $self = shift;
22255    my $a    = shift;
22256
22257    # TODO: go through and see if the test is necessary here
22258    if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
22259
22260    $self->{_line_sink_object}->write_line($a);
22261
22262    # This calculation of excess line length ignores any internal tabs
22263    my $rOpts  = $self->{_rOpts};
22264    my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
22265    if ( $a =~ /^\t+/g ) {
22266        $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
22267    }
22268
22269    # Note that we just incremented output line number to future value
22270    # so we must subtract 1 for current line number
22271    if ( length($a) > 1 + $self->{_max_output_line_length} ) {
22272        $self->{_max_output_line_length}    = length($a) - 1;
22273        $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
22274    }
22275
22276    if ( $exceed > 0 ) {
22277        my $output_line_number = $self->{_output_line_number};
22278        $self->{_last_line_length_error}    = $exceed;
22279        $self->{_last_line_length_error_at} = $output_line_number - 1;
22280        if ( $self->{_line_length_error_count} == 0 ) {
22281            $self->{_first_line_length_error}    = $exceed;
22282            $self->{_first_line_length_error_at} = $output_line_number - 1;
22283        }
22284
22285        if (
22286            $self->{_last_line_length_error} > $self->{_max_line_length_error} )
22287        {
22288            $self->{_max_line_length_error}    = $exceed;
22289            $self->{_max_line_length_error_at} = $output_line_number - 1;
22290        }
22291
22292        if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
22293            $self->write_logfile_entry(
22294                "Line length exceeded by $exceed characters\n");
22295        }
22296        $self->{_line_length_error_count}++;
22297    }
22298
22299}
22300
22301sub report_line_length_errors {
22302    my $self                    = shift;
22303    my $rOpts                   = $self->{_rOpts};
22304    my $line_length_error_count = $self->{_line_length_error_count};
22305    if ( $line_length_error_count == 0 ) {
22306        $self->write_logfile_entry(
22307            "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
22308        my $max_output_line_length    = $self->{_max_output_line_length};
22309        my $max_output_line_length_at = $self->{_max_output_line_length_at};
22310        $self->write_logfile_entry(
22311"  Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
22312        );
22313
22314    }
22315    else {
22316
22317        my $word = ( $line_length_error_count > 1 ) ? "s" : "";
22318        $self->write_logfile_entry(
22319"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
22320        );
22321
22322        $word = ( $line_length_error_count > 1 ) ? "First" : "";
22323        my $first_line_length_error    = $self->{_first_line_length_error};
22324        my $first_line_length_error_at = $self->{_first_line_length_error_at};
22325        $self->write_logfile_entry(
22326" $word at line $first_line_length_error_at by $first_line_length_error characters\n"
22327        );
22328
22329        if ( $line_length_error_count > 1 ) {
22330            my $max_line_length_error     = $self->{_max_line_length_error};
22331            my $max_line_length_error_at  = $self->{_max_line_length_error_at};
22332            my $last_line_length_error    = $self->{_last_line_length_error};
22333            my $last_line_length_error_at = $self->{_last_line_length_error_at};
22334            $self->write_logfile_entry(
22335" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
22336            );
22337            $self->write_logfile_entry(
22338" Last at line $last_line_length_error_at by $last_line_length_error characters\n"
22339            );
22340        }
22341    }
22342}
22343
22344#####################################################################
22345#
22346# The Perl::Tidy::Debugger class shows line tokenization
22347#
22348#####################################################################
22349
22350package Perl::Tidy::Debugger;
22351
22352sub new {
22353
22354    my ( $class, $filename ) = @_;
22355
22356    bless {
22357        _debug_file        => $filename,
22358        _debug_file_opened => 0,
22359        _fh                => undef,
22360    }, $class;
22361}
22362
22363sub really_open_debug_file {
22364
22365    my $self       = shift;
22366    my $debug_file = $self->{_debug_file};
22367    my $fh;
22368    unless ( $fh = IO::File->new("> $debug_file") ) {
22369        Perl::Tidy::Warn("can't open $debug_file: $!\n");
22370    }
22371    $self->{_debug_file_opened} = 1;
22372    $self->{_fh}                = $fh;
22373    print $fh
22374      "Use -dump-token-types (-dtt) to get a list of token type codes\n";
22375}
22376
22377sub close_debug_file {
22378
22379    my $self = shift;
22380    my $fh   = $self->{_fh};
22381    if ( $self->{_debug_file_opened} ) {
22382
22383        eval { $self->{_fh}->close() };
22384    }
22385}
22386
22387sub write_debug_entry {
22388
22389    # This is a debug dump routine which may be modified as necessary
22390    # to dump tokens on a line-by-line basis.  The output will be written
22391    # to the .DEBUG file when the -D flag is entered.
22392    my $self           = shift;
22393    my $line_of_tokens = shift;
22394
22395    my $input_line        = $line_of_tokens->{_line_text};
22396    my $rtoken_type       = $line_of_tokens->{_rtoken_type};
22397    my $rtokens           = $line_of_tokens->{_rtokens};
22398    my $rlevels           = $line_of_tokens->{_rlevels};
22399    my $rslevels          = $line_of_tokens->{_rslevels};
22400    my $rblock_type       = $line_of_tokens->{_rblock_type};
22401    my $input_line_number = $line_of_tokens->{_line_number};
22402    my $line_type         = $line_of_tokens->{_line_type};
22403
22404    my ( $j, $num );
22405
22406    my $token_str              = "$input_line_number: ";
22407    my $reconstructed_original = "$input_line_number: ";
22408    my $block_str              = "$input_line_number: ";
22409
22410    #$token_str .= "$line_type: ";
22411    #$reconstructed_original .= "$line_type: ";
22412
22413    my $pattern   = "";
22414    my @next_char = ( '"', '"' );
22415    my $i_next    = 0;
22416    unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
22417    my $fh = $self->{_fh};
22418
22419    for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
22420
22421        # testing patterns
22422        if ( $$rtoken_type[$j] eq 'k' ) {
22423            $pattern .= $$rtokens[$j];
22424        }
22425        else {
22426            $pattern .= $$rtoken_type[$j];
22427        }
22428        $reconstructed_original .= $$rtokens[$j];
22429        $block_str .= "($$rblock_type[$j])";
22430        $num = length( $$rtokens[$j] );
22431        my $type_str = $$rtoken_type[$j];
22432
22433        # be sure there are no blank tokens (shouldn't happen)
22434        # This can only happen if a programming error has been made
22435        # because all valid tokens are non-blank
22436        if ( $type_str eq ' ' ) {
22437            print $fh "BLANK TOKEN on the next line\n";
22438            $type_str = $next_char[$i_next];
22439            $i_next   = 1 - $i_next;
22440        }
22441
22442        if ( length($type_str) == 1 ) {
22443            $type_str = $type_str x $num;
22444        }
22445        $token_str .= $type_str;
22446    }
22447
22448    # Write what you want here ...
22449    # print $fh "$input_line\n";
22450    # print $fh "$pattern\n";
22451    print $fh "$reconstructed_original\n";
22452    print $fh "$token_str\n";
22453
22454    #print $fh "$block_str\n";
22455}
22456
22457#####################################################################
22458#
22459# The Perl::Tidy::LineBuffer class supplies a 'get_line()'
22460# method for returning the next line to be parsed, as well as a
22461# 'peek_ahead()' method
22462#
22463# The input parameter is an object with a 'get_line()' method
22464# which returns the next line to be parsed
22465#
22466#####################################################################
22467
22468package Perl::Tidy::LineBuffer;
22469
22470sub new {
22471
22472    my $class              = shift;
22473    my $line_source_object = shift;
22474
22475    return bless {
22476        _line_source_object => $line_source_object,
22477        _rlookahead_buffer  => [],
22478    }, $class;
22479}
22480
22481sub peek_ahead {
22482    my $self               = shift;
22483    my $buffer_index       = shift;
22484    my $line               = undef;
22485    my $line_source_object = $self->{_line_source_object};
22486    my $rlookahead_buffer  = $self->{_rlookahead_buffer};
22487    if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
22488        $line = $$rlookahead_buffer[$buffer_index];
22489    }
22490    else {
22491        $line = $line_source_object->get_line();
22492        push( @$rlookahead_buffer, $line );
22493    }
22494    return $line;
22495}
22496
22497sub get_line {
22498    my $self               = shift;
22499    my $line               = undef;
22500    my $line_source_object = $self->{_line_source_object};
22501    my $rlookahead_buffer  = $self->{_rlookahead_buffer};
22502
22503    if ( scalar(@$rlookahead_buffer) ) {
22504        $line = shift @$rlookahead_buffer;
22505    }
22506    else {
22507        $line = $line_source_object->get_line();
22508    }
22509    return $line;
22510}
22511
22512########################################################################
22513#
22514# the Perl::Tidy::Tokenizer package is essentially a filter which
22515# reads lines of perl source code from a source object and provides
22516# corresponding tokenized lines through its get_line() method.  Lines
22517# flow from the source_object to the caller like this:
22518#
22519# source_object --> LineBuffer_object --> Tokenizer -->  calling routine
22520#   get_line()         get_line()           get_line()     line_of_tokens
22521#
22522# The source object can be any object with a get_line() method which
22523# supplies one line (a character string) perl call.
22524# The LineBuffer object is created by the Tokenizer.
22525# The Tokenizer returns a reference to a data structure 'line_of_tokens'
22526# containing one tokenized line for each call to its get_line() method.
22527#
22528# WARNING: This is not a real class yet.  Only one tokenizer my be used.
22529#
22530########################################################################
22531
22532package Perl::Tidy::Tokenizer;
22533
22534BEGIN {
22535
22536    # Caution: these debug flags produce a lot of output
22537    # They should all be 0 except when debugging small scripts
22538
22539    use constant TOKENIZER_DEBUG_FLAG_EXPECT   => 0;
22540    use constant TOKENIZER_DEBUG_FLAG_NSCAN    => 0;
22541    use constant TOKENIZER_DEBUG_FLAG_QUOTE    => 0;
22542    use constant TOKENIZER_DEBUG_FLAG_SCAN_ID  => 0;
22543    use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
22544
22545    my $debug_warning = sub {
22546        print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n";
22547    };
22548
22549    TOKENIZER_DEBUG_FLAG_EXPECT   && $debug_warning->('EXPECT');
22550    TOKENIZER_DEBUG_FLAG_NSCAN    && $debug_warning->('NSCAN');
22551    TOKENIZER_DEBUG_FLAG_QUOTE    && $debug_warning->('QUOTE');
22552    TOKENIZER_DEBUG_FLAG_SCAN_ID  && $debug_warning->('SCAN_ID');
22553    TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
22554
22555}
22556
22557use Carp;
22558
22559# PACKAGE VARIABLES for for processing an entire FILE.
22560use vars qw{
22561  $tokenizer_self
22562
22563  $last_nonblank_token
22564  $last_nonblank_type
22565  $last_nonblank_block_type
22566  $statement_type
22567  $in_attribute_list
22568  $current_package
22569  $context
22570
22571  %is_constant
22572  %is_user_function
22573  %user_function_prototype
22574  %is_block_function
22575  %is_block_list_function
22576  %saw_function_definition
22577
22578  $brace_depth
22579  $paren_depth
22580  $square_bracket_depth
22581
22582  @current_depth
22583  @total_depth
22584  $total_depth
22585  @nesting_sequence_number
22586  @current_sequence_number
22587  @paren_type
22588  @paren_semicolon_count
22589  @paren_structural_type
22590  @brace_type
22591  @brace_structural_type
22592  @brace_context
22593  @brace_package
22594  @square_bracket_type
22595  @square_bracket_structural_type
22596  @depth_array
22597  @nested_ternary_flag
22598  @nested_statement_type
22599  @starting_line_of_current_depth
22600};
22601
22602# GLOBAL CONSTANTS for routines in this package
22603use vars qw{
22604  %is_indirect_object_taker
22605  %is_block_operator
22606  %expecting_operator_token
22607  %expecting_operator_types
22608  %expecting_term_types
22609  %expecting_term_token
22610  %is_digraph
22611  %is_file_test_operator
22612  %is_trigraph
22613  %is_valid_token_type
22614  %is_keyword
22615  %is_code_block_token
22616  %really_want_term
22617  @opening_brace_names
22618  @closing_brace_names
22619  %is_keyword_taking_list
22620  %is_q_qq_qw_qx_qr_s_y_tr_m
22621};
22622
22623# possible values of operator_expected()
22624use constant TERM     => -1;
22625use constant UNKNOWN  => 0;
22626use constant OPERATOR => 1;
22627
22628# possible values of context
22629use constant SCALAR_CONTEXT  => -1;
22630use constant UNKNOWN_CONTEXT => 0;
22631use constant LIST_CONTEXT    => 1;
22632
22633# Maximum number of little messages; probably need not be changed.
22634use constant MAX_NAG_MESSAGES => 6;
22635
22636{
22637
22638    # methods to count instances
22639    my $_count = 0;
22640    sub get_count        { $_count; }
22641    sub _increment_count { ++$_count }
22642    sub _decrement_count { --$_count }
22643}
22644
22645sub DESTROY {
22646    $_[0]->_decrement_count();
22647}
22648
22649sub new {
22650
22651    my $class = shift;
22652
22653    # Note: 'tabs' and 'indent_columns' are temporary and should be
22654    # removed asap
22655    my %defaults = (
22656        source_object        => undef,
22657        debugger_object      => undef,
22658        diagnostics_object   => undef,
22659        logger_object        => undef,
22660        starting_level       => undef,
22661        indent_columns       => 4,
22662        tabsize              => 8,
22663        look_for_hash_bang   => 0,
22664        trim_qw              => 1,
22665        look_for_autoloader  => 1,
22666        look_for_selfloader  => 1,
22667        starting_line_number => 1,
22668    );
22669    my %args = ( %defaults, @_ );
22670
22671    # we are given an object with a get_line() method to supply source lines
22672    my $source_object = $args{source_object};
22673
22674    # we create another object with a get_line() and peek_ahead() method
22675    my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
22676
22677    # Tokenizer state data is as follows:
22678    # _rhere_target_list    reference to list of here-doc targets
22679    # _here_doc_target      the target string for a here document
22680    # _here_quote_character the type of here-doc quoting (" ' ` or none)
22681    #                       to determine if interpolation is done
22682    # _quote_target         character we seek if chasing a quote
22683    # _line_start_quote     line where we started looking for a long quote
22684    # _in_here_doc          flag indicating if we are in a here-doc
22685    # _in_pod               flag set if we are in pod documentation
22686    # _in_error             flag set if we saw severe error (binary in script)
22687    # _in_data              flag set if we are in __DATA__ section
22688    # _in_end               flag set if we are in __END__ section
22689    # _in_format            flag set if we are in a format description
22690    # _in_attribute_list    flag telling if we are looking for attributes
22691    # _in_quote             flag telling if we are chasing a quote
22692    # _starting_level       indentation level of first line
22693    # _line_buffer_object   object with get_line() method to supply source code
22694    # _diagnostics_object   place to write debugging information
22695    # _unexpected_error_count  error count used to limit output
22696    # _lower_case_labels_at  line numbers where lower case labels seen
22697    $tokenizer_self = {
22698        _rhere_target_list                  => [],
22699        _in_here_doc                        => 0,
22700        _here_doc_target                    => "",
22701        _here_quote_character               => "",
22702        _in_data                            => 0,
22703        _in_end                             => 0,
22704        _in_format                          => 0,
22705        _in_error                           => 0,
22706        _in_pod                             => 0,
22707        _in_attribute_list                  => 0,
22708        _in_quote                           => 0,
22709        _quote_target                       => "",
22710        _line_start_quote                   => -1,
22711        _starting_level                     => $args{starting_level},
22712        _know_starting_level                => defined( $args{starting_level} ),
22713        _tabsize                            => $args{tabsize},
22714        _indent_columns                     => $args{indent_columns},
22715        _look_for_hash_bang                 => $args{look_for_hash_bang},
22716        _trim_qw                            => $args{trim_qw},
22717        _continuation_indentation           => $args{continuation_indentation},
22718        _outdent_labels                     => $args{outdent_labels},
22719        _last_line_number                   => $args{starting_line_number} - 1,
22720        _saw_perl_dash_P                    => 0,
22721        _saw_perl_dash_w                    => 0,
22722        _saw_use_strict                     => 0,
22723        _saw_v_string                       => 0,
22724        _look_for_autoloader                => $args{look_for_autoloader},
22725        _look_for_selfloader                => $args{look_for_selfloader},
22726        _saw_autoloader                     => 0,
22727        _saw_selfloader                     => 0,
22728        _saw_hash_bang                      => 0,
22729        _saw_end                            => 0,
22730        _saw_data                           => 0,
22731        _saw_negative_indentation           => 0,
22732        _started_tokenizing                 => 0,
22733        _line_buffer_object                 => $line_buffer_object,
22734        _debugger_object                    => $args{debugger_object},
22735        _diagnostics_object                 => $args{diagnostics_object},
22736        _logger_object                      => $args{logger_object},
22737        _unexpected_error_count             => 0,
22738        _started_looking_for_here_target_at => 0,
22739        _nearly_matched_here_target_at      => undef,
22740        _line_text                          => "",
22741        _rlower_case_labels_at              => undef,
22742    };
22743
22744    prepare_for_a_new_file();
22745    find_starting_indentation_level();
22746
22747    bless $tokenizer_self, $class;
22748
22749    # This is not a full class yet, so die if an attempt is made to
22750    # create more than one object.
22751
22752    if ( _increment_count() > 1 ) {
22753        confess
22754"Attempt to create more than 1 object in $class, which is not a true class yet\n";
22755    }
22756
22757    return $tokenizer_self;
22758
22759}
22760
22761# interface to Perl::Tidy::Logger routines
22762sub warning {
22763    my $logger_object = $tokenizer_self->{_logger_object};
22764    if ($logger_object) {
22765        $logger_object->warning(@_);
22766    }
22767}
22768
22769sub complain {
22770    my $logger_object = $tokenizer_self->{_logger_object};
22771    if ($logger_object) {
22772        $logger_object->complain(@_);
22773    }
22774}
22775
22776sub write_logfile_entry {
22777    my $logger_object = $tokenizer_self->{_logger_object};
22778    if ($logger_object) {
22779        $logger_object->write_logfile_entry(@_);
22780    }
22781}
22782
22783sub interrupt_logfile {
22784    my $logger_object = $tokenizer_self->{_logger_object};
22785    if ($logger_object) {
22786        $logger_object->interrupt_logfile();
22787    }
22788}
22789
22790sub resume_logfile {
22791    my $logger_object = $tokenizer_self->{_logger_object};
22792    if ($logger_object) {
22793        $logger_object->resume_logfile();
22794    }
22795}
22796
22797sub increment_brace_error {
22798    my $logger_object = $tokenizer_self->{_logger_object};
22799    if ($logger_object) {
22800        $logger_object->increment_brace_error();
22801    }
22802}
22803
22804sub report_definite_bug {
22805    my $logger_object = $tokenizer_self->{_logger_object};
22806    if ($logger_object) {
22807        $logger_object->report_definite_bug();
22808    }
22809}
22810
22811sub brace_warning {
22812    my $logger_object = $tokenizer_self->{_logger_object};
22813    if ($logger_object) {
22814        $logger_object->brace_warning(@_);
22815    }
22816}
22817
22818sub get_saw_brace_error {
22819    my $logger_object = $tokenizer_self->{_logger_object};
22820    if ($logger_object) {
22821        $logger_object->get_saw_brace_error();
22822    }
22823    else {
22824        0;
22825    }
22826}
22827
22828# interface to Perl::Tidy::Diagnostics routines
22829sub write_diagnostics {
22830    if ( $tokenizer_self->{_diagnostics_object} ) {
22831        $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
22832    }
22833}
22834
22835sub report_tokenization_errors {
22836
22837    my $self = shift;
22838
22839    my $level = get_indentation_level();
22840    if ( $level != $tokenizer_self->{_starting_level} ) {
22841        warning("final indentation level: $level\n");
22842    }
22843
22844    check_final_nesting_depths();
22845
22846    if ( $tokenizer_self->{_look_for_hash_bang}
22847        && !$tokenizer_self->{_saw_hash_bang} )
22848    {
22849        warning(
22850            "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
22851    }
22852
22853    if ( $tokenizer_self->{_in_format} ) {
22854        warning("hit EOF while in format description\n");
22855    }
22856
22857    if ( $tokenizer_self->{_in_pod} ) {
22858
22859        # Just write log entry if this is after __END__ or __DATA__
22860        # because this happens to often, and it is not likely to be
22861        # a parsing error.
22862        if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
22863            write_logfile_entry(
22864"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
22865            );
22866        }
22867
22868        else {
22869            complain(
22870"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
22871            );
22872        }
22873
22874    }
22875
22876    if ( $tokenizer_self->{_in_here_doc} ) {
22877        my $here_doc_target = $tokenizer_self->{_here_doc_target};
22878        my $started_looking_for_here_target_at =
22879          $tokenizer_self->{_started_looking_for_here_target_at};
22880        if ($here_doc_target) {
22881            warning(
22882"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
22883            );
22884        }
22885        else {
22886            warning(
22887"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
22888            );
22889        }
22890        my $nearly_matched_here_target_at =
22891          $tokenizer_self->{_nearly_matched_here_target_at};
22892        if ($nearly_matched_here_target_at) {
22893            warning(
22894"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
22895            );
22896        }
22897    }
22898
22899    if ( $tokenizer_self->{_in_quote} ) {
22900        my $line_start_quote = $tokenizer_self->{_line_start_quote};
22901        my $quote_target     = $tokenizer_self->{_quote_target};
22902        my $what =
22903          ( $tokenizer_self->{_in_attribute_list} )
22904          ? "attribute list"
22905          : "quote/pattern";
22906        warning(
22907"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
22908        );
22909    }
22910
22911    unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
22912        if ( $] < 5.006 ) {
22913            write_logfile_entry("Suggest including '-w parameter'\n");
22914        }
22915        else {
22916            write_logfile_entry("Suggest including 'use warnings;'\n");
22917        }
22918    }
22919
22920    if ( $tokenizer_self->{_saw_perl_dash_P} ) {
22921        write_logfile_entry("Use of -P parameter for defines is discouraged\n");
22922    }
22923
22924    unless ( $tokenizer_self->{_saw_use_strict} ) {
22925        write_logfile_entry("Suggest including 'use strict;'\n");
22926    }
22927
22928    # it is suggested that lables have at least one upper case character
22929    # for legibility and to avoid code breakage as new keywords are introduced
22930    if ( $tokenizer_self->{_rlower_case_labels_at} ) {
22931        my @lower_case_labels_at =
22932          @{ $tokenizer_self->{_rlower_case_labels_at} };
22933        write_logfile_entry(
22934            "Suggest using upper case characters in label(s)\n");
22935        local $" = ')(';
22936        write_logfile_entry("  defined at line(s): (@lower_case_labels_at)\n");
22937    }
22938}
22939
22940sub report_v_string {
22941
22942    # warn if this version can't handle v-strings
22943    my $tok = shift;
22944    unless ( $tokenizer_self->{_saw_v_string} ) {
22945        $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
22946    }
22947    if ( $] < 5.006 ) {
22948        warning(
22949"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
22950        );
22951    }
22952}
22953
22954sub get_input_line_number {
22955    return $tokenizer_self->{_last_line_number};
22956}
22957
22958# returns the next tokenized line
22959sub get_line {
22960
22961    my $self = shift;
22962
22963    # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
22964    # $square_bracket_depth, $paren_depth
22965
22966    my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
22967    $tokenizer_self->{_line_text} = $input_line;
22968
22969    return undef unless ($input_line);
22970
22971    my $input_line_number = ++$tokenizer_self->{_last_line_number};
22972
22973    # Find and remove what characters terminate this line, including any
22974    # control r
22975    my $input_line_separator = "";
22976    if ( chomp($input_line) ) { $input_line_separator = $/ }
22977
22978    # TODO: what other characters should be included here?
22979    if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
22980        $input_line_separator = $2 . $input_line_separator;
22981    }
22982
22983    # for backwards compatibility we keep the line text terminated with
22984    # a newline character
22985    $input_line .= "\n";
22986    $tokenizer_self->{_line_text} = $input_line;    # update
22987
22988    # create a data structure describing this line which will be
22989    # returned to the caller.
22990
22991    # _line_type codes are:
22992    #   SYSTEM         - system-specific code before hash-bang line
22993    #   CODE           - line of perl code (including comments)
22994    #   POD_START      - line starting pod, such as '=head'
22995    #   POD            - pod documentation text
22996    #   POD_END        - last line of pod section, '=cut'
22997    #   HERE           - text of here-document
22998    #   HERE_END       - last line of here-doc (target word)
22999    #   FORMAT         - format section
23000    #   FORMAT_END     - last line of format section, '.'
23001    #   DATA_START     - __DATA__ line
23002    #   DATA           - unidentified text following __DATA__
23003    #   END_START      - __END__ line
23004    #   END            - unidentified text following __END__
23005    #   ERROR          - we are in big trouble, probably not a perl script
23006
23007    # Other variables:
23008    #   _curly_brace_depth     - depth of curly braces at start of line
23009    #   _square_bracket_depth  - depth of square brackets at start of line
23010    #   _paren_depth           - depth of parens at start of line
23011    #   _starting_in_quote     - this line continues a multi-line quote
23012    #                            (so don't trim leading blanks!)
23013    #   _ending_in_quote       - this line ends in a multi-line quote
23014    #                            (so don't trim trailing blanks!)
23015    my $line_of_tokens = {
23016        _line_type                 => 'EOF',
23017        _line_text                 => $input_line,
23018        _line_number               => $input_line_number,
23019        _rtoken_type               => undef,
23020        _rtokens                   => undef,
23021        _rlevels                   => undef,
23022        _rslevels                  => undef,
23023        _rblock_type               => undef,
23024        _rcontainer_type           => undef,
23025        _rcontainer_environment    => undef,
23026        _rtype_sequence            => undef,
23027        _rnesting_tokens           => undef,
23028        _rci_levels                => undef,
23029        _rnesting_blocks           => undef,
23030        _guessed_indentation_level => 0,
23031        _starting_in_quote    => 0,                    # to be set by subroutine
23032        _ending_in_quote      => 0,
23033        _curly_brace_depth    => $brace_depth,
23034        _square_bracket_depth => $square_bracket_depth,
23035        _paren_depth          => $paren_depth,
23036        _quote_character      => '',
23037    };
23038
23039    # must print line unchanged if we are in a here document
23040    if ( $tokenizer_self->{_in_here_doc} ) {
23041
23042        $line_of_tokens->{_line_type} = 'HERE';
23043        my $here_doc_target      = $tokenizer_self->{_here_doc_target};
23044        my $here_quote_character = $tokenizer_self->{_here_quote_character};
23045        my $candidate_target     = $input_line;
23046        chomp $candidate_target;
23047        if ( $candidate_target eq $here_doc_target ) {
23048            $tokenizer_self->{_nearly_matched_here_target_at} = undef;
23049            $line_of_tokens->{_line_type}                     = 'HERE_END';
23050            write_logfile_entry("Exiting HERE document $here_doc_target\n");
23051
23052            my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
23053            if (@$rhere_target_list) {    # there can be multiple here targets
23054                ( $here_doc_target, $here_quote_character ) =
23055                  @{ shift @$rhere_target_list };
23056                $tokenizer_self->{_here_doc_target} = $here_doc_target;
23057                $tokenizer_self->{_here_quote_character} =
23058                  $here_quote_character;
23059                write_logfile_entry(
23060                    "Entering HERE document $here_doc_target\n");
23061                $tokenizer_self->{_nearly_matched_here_target_at} = undef;
23062                $tokenizer_self->{_started_looking_for_here_target_at} =
23063                  $input_line_number;
23064            }
23065            else {
23066                $tokenizer_self->{_in_here_doc}          = 0;
23067                $tokenizer_self->{_here_doc_target}      = "";
23068                $tokenizer_self->{_here_quote_character} = "";
23069            }
23070        }
23071
23072        # check for error of extra whitespace
23073        # note for PERL6: leading whitespace is allowed
23074        else {
23075            $candidate_target =~ s/\s*$//;
23076            $candidate_target =~ s/^\s*//;
23077            if ( $candidate_target eq $here_doc_target ) {
23078                $tokenizer_self->{_nearly_matched_here_target_at} =
23079                  $input_line_number;
23080            }
23081        }
23082        return $line_of_tokens;
23083    }
23084
23085    # must print line unchanged if we are in a format section
23086    elsif ( $tokenizer_self->{_in_format} ) {
23087
23088        if ( $input_line =~ /^\.[\s#]*$/ ) {
23089            write_logfile_entry("Exiting format section\n");
23090            $tokenizer_self->{_in_format} = 0;
23091            $line_of_tokens->{_line_type} = 'FORMAT_END';
23092        }
23093        else {
23094            $line_of_tokens->{_line_type} = 'FORMAT';
23095        }
23096        return $line_of_tokens;
23097    }
23098
23099    # must print line unchanged if we are in pod documentation
23100    elsif ( $tokenizer_self->{_in_pod} ) {
23101
23102        $line_of_tokens->{_line_type} = 'POD';
23103        if ( $input_line =~ /^=cut/ ) {
23104            $line_of_tokens->{_line_type} = 'POD_END';
23105            write_logfile_entry("Exiting POD section\n");
23106            $tokenizer_self->{_in_pod} = 0;
23107        }
23108        if ( $input_line =~ /^\#\!.*perl\b/ ) {
23109            warning(
23110                "Hash-bang in pod can cause older versions of perl to fail! \n"
23111            );
23112        }
23113
23114        return $line_of_tokens;
23115    }
23116
23117    # must print line unchanged if we have seen a severe error (i.e., we
23118    # are seeing illegal tokens and connot continue.  Syntax errors do
23119    # not pass this route).  Calling routine can decide what to do, but
23120    # the default can be to just pass all lines as if they were after __END__
23121    elsif ( $tokenizer_self->{_in_error} ) {
23122        $line_of_tokens->{_line_type} = 'ERROR';
23123        return $line_of_tokens;
23124    }
23125
23126    # print line unchanged if we are __DATA__ section
23127    elsif ( $tokenizer_self->{_in_data} ) {
23128
23129        # ...but look for POD
23130        # Note that the _in_data and _in_end flags remain set
23131        # so that we return to that state after seeing the
23132        # end of a pod section
23133        if ( $input_line =~ /^=(?!cut)/ ) {
23134            $line_of_tokens->{_line_type} = 'POD_START';
23135            write_logfile_entry("Entering POD section\n");
23136            $tokenizer_self->{_in_pod} = 1;
23137            return $line_of_tokens;
23138        }
23139        else {
23140            $line_of_tokens->{_line_type} = 'DATA';
23141            return $line_of_tokens;
23142        }
23143    }
23144
23145    # print line unchanged if we are in __END__ section
23146    elsif ( $tokenizer_self->{_in_end} ) {
23147
23148        # ...but look for POD
23149        # Note that the _in_data and _in_end flags remain set
23150        # so that we return to that state after seeing the
23151        # end of a pod section
23152        if ( $input_line =~ /^=(?!cut)/ ) {
23153            $line_of_tokens->{_line_type} = 'POD_START';
23154            write_logfile_entry("Entering POD section\n");
23155            $tokenizer_self->{_in_pod} = 1;
23156            return $line_of_tokens;
23157        }
23158        else {
23159            $line_of_tokens->{_line_type} = 'END';
23160            return $line_of_tokens;
23161        }
23162    }
23163
23164    # check for a hash-bang line if we haven't seen one
23165    if ( !$tokenizer_self->{_saw_hash_bang} ) {
23166        if ( $input_line =~ /^\#\!.*perl\b/ ) {
23167            $tokenizer_self->{_saw_hash_bang} = $input_line_number;
23168
23169            # check for -w and -P flags
23170            if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
23171                $tokenizer_self->{_saw_perl_dash_P} = 1;
23172            }
23173
23174            if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
23175                $tokenizer_self->{_saw_perl_dash_w} = 1;
23176            }
23177
23178            if (   ( $input_line_number > 1 )
23179                && ( !$tokenizer_self->{_look_for_hash_bang} ) )
23180            {
23181
23182                # this is helpful for VMS systems; we may have accidentally
23183                # tokenized some DCL commands
23184                if ( $tokenizer_self->{_started_tokenizing} ) {
23185                    warning(
23186"There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
23187                    );
23188                }
23189                else {
23190                    complain("Useless hash-bang after line 1\n");
23191                }
23192            }
23193
23194            # Report the leading hash-bang as a system line
23195            # This will prevent -dac from deleting it
23196            else {
23197                $line_of_tokens->{_line_type} = 'SYSTEM';
23198                return $line_of_tokens;
23199            }
23200        }
23201    }
23202
23203    # wait for a hash-bang before parsing if the user invoked us with -x
23204    if ( $tokenizer_self->{_look_for_hash_bang}
23205        && !$tokenizer_self->{_saw_hash_bang} )
23206    {
23207        $line_of_tokens->{_line_type} = 'SYSTEM';
23208        return $line_of_tokens;
23209    }
23210
23211    # a first line of the form ': #' will be marked as SYSTEM
23212    # since lines of this form may be used by tcsh
23213    if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
23214        $line_of_tokens->{_line_type} = 'SYSTEM';
23215        return $line_of_tokens;
23216    }
23217
23218    # now we know that it is ok to tokenize the line...
23219    # the line tokenizer will modify any of these private variables:
23220    #        _rhere_target_list
23221    #        _in_data
23222    #        _in_end
23223    #        _in_format
23224    #        _in_error
23225    #        _in_pod
23226    #        _in_quote
23227    my $ending_in_quote_last = $tokenizer_self->{_in_quote};
23228    tokenize_this_line($line_of_tokens);
23229
23230    # Now finish defining the return structure and return it
23231    $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
23232
23233    # handle severe error (binary data in script)
23234    if ( $tokenizer_self->{_in_error} ) {
23235        $tokenizer_self->{_in_quote} = 0;    # to avoid any more messages
23236        warning("Giving up after error\n");
23237        $line_of_tokens->{_line_type} = 'ERROR';
23238        reset_indentation_level(0);          # avoid error messages
23239        return $line_of_tokens;
23240    }
23241
23242    # handle start of pod documentation
23243    if ( $tokenizer_self->{_in_pod} ) {
23244
23245        # This gets tricky..above a __DATA__ or __END__ section, perl
23246        # accepts '=cut' as the start of pod section. But afterwards,
23247        # only pod utilities see it and they may ignore an =cut without
23248        # leading =head.  In any case, this isn't good.
23249        if ( $input_line =~ /^=cut\b/ ) {
23250            if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
23251                complain("=cut while not in pod ignored\n");
23252                $tokenizer_self->{_in_pod}    = 0;
23253                $line_of_tokens->{_line_type} = 'POD_END';
23254            }
23255            else {
23256                $line_of_tokens->{_line_type} = 'POD_START';
23257                complain(
23258"=cut starts a pod section .. this can fool pod utilities.\n"
23259                );
23260                write_logfile_entry("Entering POD section\n");
23261            }
23262        }
23263
23264        else {
23265            $line_of_tokens->{_line_type} = 'POD_START';
23266            write_logfile_entry("Entering POD section\n");
23267        }
23268
23269        return $line_of_tokens;
23270    }
23271
23272    # update indentation levels for log messages
23273    if ( $input_line !~ /^\s*$/ ) {
23274        my $rlevels = $line_of_tokens->{_rlevels};
23275        $line_of_tokens->{_guessed_indentation_level} =
23276          guess_old_indentation_level($input_line);
23277    }
23278
23279    # see if this line contains here doc targets
23280    my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
23281    if (@$rhere_target_list) {
23282
23283        my ( $here_doc_target, $here_quote_character ) =
23284          @{ shift @$rhere_target_list };
23285        $tokenizer_self->{_in_here_doc}          = 1;
23286        $tokenizer_self->{_here_doc_target}      = $here_doc_target;
23287        $tokenizer_self->{_here_quote_character} = $here_quote_character;
23288        write_logfile_entry("Entering HERE document $here_doc_target\n");
23289        $tokenizer_self->{_started_looking_for_here_target_at} =
23290          $input_line_number;
23291    }
23292
23293    # NOTE: __END__ and __DATA__ statements are written unformatted
23294    # because they can theoretically contain additional characters
23295    # which are not tokenized (and cannot be read with <DATA> either!).
23296    if ( $tokenizer_self->{_in_data} ) {
23297        $line_of_tokens->{_line_type} = 'DATA_START';
23298        write_logfile_entry("Starting __DATA__ section\n");
23299        $tokenizer_self->{_saw_data} = 1;
23300
23301        # keep parsing after __DATA__ if use SelfLoader was seen
23302        if ( $tokenizer_self->{_saw_selfloader} ) {
23303            $tokenizer_self->{_in_data} = 0;
23304            write_logfile_entry(
23305                "SelfLoader seen, continuing; -nlsl deactivates\n");
23306        }
23307
23308        return $line_of_tokens;
23309    }
23310
23311    elsif ( $tokenizer_self->{_in_end} ) {
23312        $line_of_tokens->{_line_type} = 'END_START';
23313        write_logfile_entry("Starting __END__ section\n");
23314        $tokenizer_self->{_saw_end} = 1;
23315
23316        # keep parsing after __END__ if use AutoLoader was seen
23317        if ( $tokenizer_self->{_saw_autoloader} ) {
23318            $tokenizer_self->{_in_end} = 0;
23319            write_logfile_entry(
23320                "AutoLoader seen, continuing; -nlal deactivates\n");
23321        }
23322        return $line_of_tokens;
23323    }
23324
23325    # now, finally, we know that this line is type 'CODE'
23326    $line_of_tokens->{_line_type} = 'CODE';
23327
23328    # remember if we have seen any real code
23329    if (  !$tokenizer_self->{_started_tokenizing}
23330        && $input_line !~ /^\s*$/
23331        && $input_line !~ /^\s*#/ )
23332    {
23333        $tokenizer_self->{_started_tokenizing} = 1;
23334    }
23335
23336    if ( $tokenizer_self->{_debugger_object} ) {
23337        $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
23338    }
23339
23340    # Note: if keyword 'format' occurs in this line code, it is still CODE
23341    # (keyword 'format' need not start a line)
23342    if ( $tokenizer_self->{_in_format} ) {
23343        write_logfile_entry("Entering format section\n");
23344    }
23345
23346    if ( $tokenizer_self->{_in_quote}
23347        and ( $tokenizer_self->{_line_start_quote} < 0 ) )
23348    {
23349
23350        #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
23351        if (
23352            ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
23353        {
23354            $tokenizer_self->{_line_start_quote} = $input_line_number;
23355            write_logfile_entry(
23356                "Start multi-line quote or pattern ending in $quote_target\n");
23357        }
23358    }
23359    elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
23360        and !$tokenizer_self->{_in_quote} )
23361    {
23362        $tokenizer_self->{_line_start_quote} = -1;
23363        write_logfile_entry("End of multi-line quote or pattern\n");
23364    }
23365
23366    # we are returning a line of CODE
23367    return $line_of_tokens;
23368}
23369
23370sub find_starting_indentation_level {
23371
23372    # We need to find the indentation level of the first line of the
23373    # script being formatted.  Often it will be zero for an entire file,
23374    # but if we are formatting a local block of code (within an editor for
23375    # example) it may not be zero.  The user may specify this with the
23376    # -sil=n parameter but normally doesn't so we have to guess.
23377    #
23378    # USES GLOBAL VARIABLES: $tokenizer_self
23379    my $starting_level = 0;
23380
23381    # use value if given as parameter
23382    if ( $tokenizer_self->{_know_starting_level} ) {
23383        $starting_level = $tokenizer_self->{_starting_level};
23384    }
23385
23386    # if we know there is a hash_bang line, the level must be zero
23387    elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
23388        $tokenizer_self->{_know_starting_level} = 1;
23389    }
23390
23391    # otherwise figure it out from the input file
23392    else {
23393        my $line;
23394        my $i = 0;
23395
23396        # keep looking at lines until we find a hash bang or piece of code
23397        my $msg = "";
23398        while ( $line =
23399            $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
23400        {
23401
23402            # if first line is #! then assume starting level is zero
23403            if ( $i == 1 && $line =~ /^\#\!/ ) {
23404                $starting_level = 0;
23405                last;
23406            }
23407            next if ( $line =~ /^\s*#/ );    # skip past comments
23408            next if ( $line =~ /^\s*$/ );    # skip past blank lines
23409            $starting_level = guess_old_indentation_level($line);
23410            last;
23411        }
23412        $msg = "Line $i implies starting-indentation-level = $starting_level\n";
23413        write_logfile_entry("$msg");
23414    }
23415    $tokenizer_self->{_starting_level} = $starting_level;
23416    reset_indentation_level($starting_level);
23417}
23418
23419sub guess_old_indentation_level {
23420    my ($line) = @_;
23421
23422    # Guess the indentation level of an input line.
23423    #
23424    # For the first line of code this result will define the starting
23425    # indentation level.  It will mainly be non-zero when perltidy is applied
23426    # within an editor to a local block of code.
23427    #
23428    # This is an impossible task in general because we can't know what tabs
23429    # meant for the old script and how many spaces were used for one
23430    # indentation level in the given input script.  For example it may have
23431    # been previously formatted with -i=7 -et=3.  But we can at least try to
23432    # make sure that perltidy guesses correctly if it is applied repeatedly to
23433    # a block of code within an editor, so that the block stays at the same
23434    # level when perltidy is applied repeatedly.
23435    #
23436    # USES GLOBAL VARIABLES: $tokenizer_self
23437    my $level = 0;
23438
23439    # find leading tabs, spaces, and any statement label
23440    my $spaces = 0;
23441    if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
23442
23443        # If there are leading tabs, we use the tab scheme for this run, if
23444        # any, so that the code will remain stable when editing.
23445        if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} }
23446
23447        if ($2) { $spaces += length($2) }
23448
23449        # correct for outdented labels
23450        if ( $3 && $tokenizer_self->{'_outdent_labels'} ) {
23451            $spaces += $tokenizer_self->{_continuation_indentation};
23452        }
23453    }
23454
23455    # compute indentation using the value of -i for this run.
23456    # If -i=0 is used for this run (which is possible) it doesn't matter
23457    # what we do here but we'll guess that the old run used 4 spaces per level.
23458    my $indent_columns = $tokenizer_self->{_indent_columns};
23459    $indent_columns = 4 if ( !$indent_columns );
23460    $level = int( $spaces / $indent_columns );
23461    return ($level);
23462}
23463
23464# This is a currently unused debug routine
23465sub dump_functions {
23466
23467    my $fh = *STDOUT;
23468    my ( $pkg, $sub );
23469    foreach $pkg ( keys %is_user_function ) {
23470        print $fh "\nnon-constant subs in package $pkg\n";
23471
23472        foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
23473            my $msg = "";
23474            if ( $is_block_list_function{$pkg}{$sub} ) {
23475                $msg = 'block_list';
23476            }
23477
23478            if ( $is_block_function{$pkg}{$sub} ) {
23479                $msg = 'block';
23480            }
23481            print $fh "$sub $msg\n";
23482        }
23483    }
23484
23485    foreach $pkg ( keys %is_constant ) {
23486        print $fh "\nconstants and constant subs in package $pkg\n";
23487
23488        foreach $sub ( keys %{ $is_constant{$pkg} } ) {
23489            print $fh "$sub\n";
23490        }
23491    }
23492}
23493
23494sub ones_count {
23495
23496    # count number of 1's in a string of 1's and 0's
23497    # example: ones_count("010101010101") gives 6
23498    return ( my $cis = $_[0] ) =~ tr/1/0/;
23499}
23500
23501sub prepare_for_a_new_file {
23502
23503    # previous tokens needed to determine what to expect next
23504    $last_nonblank_token      = ';';    # the only possible starting state which
23505    $last_nonblank_type       = ';';    # will make a leading brace a code block
23506    $last_nonblank_block_type = '';
23507
23508    # scalars for remembering statement types across multiple lines
23509    $statement_type    = '';            # '' or 'use' or 'sub..' or 'case..'
23510    $in_attribute_list = 0;
23511
23512    # scalars for remembering where we are in the file
23513    $current_package = "main";
23514    $context         = UNKNOWN_CONTEXT;
23515
23516    # hashes used to remember function information
23517    %is_constant             = ();      # user-defined constants
23518    %is_user_function        = ();      # user-defined functions
23519    %user_function_prototype = ();      # their prototypes
23520    %is_block_function       = ();
23521    %is_block_list_function  = ();
23522    %saw_function_definition = ();
23523
23524    # variables used to track depths of various containers
23525    # and report nesting errors
23526    $paren_depth          = 0;
23527    $brace_depth          = 0;
23528    $square_bracket_depth = 0;
23529    @current_depth[ 0 .. $#closing_brace_names ] =
23530      (0) x scalar @closing_brace_names;
23531    $total_depth = 0;
23532    @total_depth = ();
23533    @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
23534      ( 0 .. $#closing_brace_names );
23535    @current_sequence_number             = ();
23536    $paren_type[$paren_depth]            = '';
23537    $paren_semicolon_count[$paren_depth] = 0;
23538    $paren_structural_type[$brace_depth] = '';
23539    $brace_type[$brace_depth] = ';';    # identify opening brace as code block
23540    $brace_structural_type[$brace_depth]                   = '';
23541    $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
23542    $brace_package[$paren_depth]                           = $current_package;
23543    $square_bracket_type[$square_bracket_depth]            = '';
23544    $square_bracket_structural_type[$square_bracket_depth] = '';
23545
23546    initialize_tokenizer_state();
23547}
23548
23549{                                       # begin tokenize_this_line
23550
23551    use constant BRACE          => 0;
23552    use constant SQUARE_BRACKET => 1;
23553    use constant PAREN          => 2;
23554    use constant QUESTION_COLON => 3;
23555
23556    # TV1: scalars for processing one LINE.
23557    # Re-initialized on each entry to sub tokenize_this_line.
23558    my (
23559        $block_type,        $container_type,    $expecting,
23560        $i,                 $i_tok,             $input_line,
23561        $input_line_number, $last_nonblank_i,   $max_token_index,
23562        $next_tok,          $next_type,         $peeked_ahead,
23563        $prototype,         $rhere_target_list, $rtoken_map,
23564        $rtoken_type,       $rtokens,           $tok,
23565        $type,              $type_sequence,     $indent_flag,
23566    );
23567
23568    # TV2: refs to ARRAYS for processing one LINE
23569    # Re-initialized on each call.
23570    my $routput_token_list     = [];    # stack of output token indexes
23571    my $routput_token_type     = [];    # token types
23572    my $routput_block_type     = [];    # types of code block
23573    my $routput_container_type = [];    # paren types, such as if, elsif, ..
23574    my $routput_type_sequence  = [];    # nesting sequential number
23575    my $routput_indent_flag    = [];    #
23576
23577    # TV3: SCALARS for quote variables.  These are initialized with a
23578    # subroutine call and continually updated as lines are processed.
23579    my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
23580        $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
23581
23582    # TV4: SCALARS for multi-line identifiers and
23583    # statements. These are initialized with a subroutine call
23584    # and continually updated as lines are processed.
23585    my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
23586
23587    # TV5: SCALARS for tracking indentation level.
23588    # Initialized once and continually updated as lines are
23589    # processed.
23590    my (
23591        $nesting_token_string,      $nesting_type_string,
23592        $nesting_block_string,      $nesting_block_flag,
23593        $nesting_list_string,       $nesting_list_flag,
23594        $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
23595        $in_statement_continuation, $level_in_tokenizer,
23596        $slevel_in_tokenizer,       $rslevel_stack,
23597    );
23598
23599    # TV6: SCALARS for remembering several previous
23600    # tokens. Initialized once and continually updated as
23601    # lines are processed.
23602    my (
23603        $last_nonblank_container_type,     $last_nonblank_type_sequence,
23604        $last_last_nonblank_token,         $last_last_nonblank_type,
23605        $last_last_nonblank_block_type,    $last_last_nonblank_container_type,
23606        $last_last_nonblank_type_sequence, $last_nonblank_prototype,
23607    );
23608
23609    # ----------------------------------------------------------------
23610    # beginning of tokenizer variable access and manipulation routines
23611    # ----------------------------------------------------------------
23612
23613    sub initialize_tokenizer_state {
23614
23615        # TV1: initialized on each call
23616        # TV2: initialized on each call
23617        # TV3:
23618        $in_quote                = 0;
23619        $quote_type              = 'Q';
23620        $quote_character         = "";
23621        $quote_pos               = 0;
23622        $quote_depth             = 0;
23623        $quoted_string_1         = "";
23624        $quoted_string_2         = "";
23625        $allowed_quote_modifiers = "";
23626
23627        # TV4:
23628        $id_scan_state     = '';
23629        $identifier        = '';
23630        $want_paren        = "";
23631        $indented_if_level = 0;
23632
23633        # TV5:
23634        $nesting_token_string             = "";
23635        $nesting_type_string              = "";
23636        $nesting_block_string             = '1';    # initially in a block
23637        $nesting_block_flag               = 1;
23638        $nesting_list_string              = '0';    # initially not in a list
23639        $nesting_list_flag                = 0;      # initially not in a list
23640        $ci_string_in_tokenizer           = "";
23641        $continuation_string_in_tokenizer = "0";
23642        $in_statement_continuation        = 0;
23643        $level_in_tokenizer               = 0;
23644        $slevel_in_tokenizer              = 0;
23645        $rslevel_stack                    = [];
23646
23647        # TV6:
23648        $last_nonblank_container_type      = '';
23649        $last_nonblank_type_sequence       = '';
23650        $last_last_nonblank_token          = ';';
23651        $last_last_nonblank_type           = ';';
23652        $last_last_nonblank_block_type     = '';
23653        $last_last_nonblank_container_type = '';
23654        $last_last_nonblank_type_sequence  = '';
23655        $last_nonblank_prototype           = "";
23656    }
23657
23658    sub save_tokenizer_state {
23659
23660        my $rTV1 = [
23661            $block_type,        $container_type,    $expecting,
23662            $i,                 $i_tok,             $input_line,
23663            $input_line_number, $last_nonblank_i,   $max_token_index,
23664            $next_tok,          $next_type,         $peeked_ahead,
23665            $prototype,         $rhere_target_list, $rtoken_map,
23666            $rtoken_type,       $rtokens,           $tok,
23667            $type,              $type_sequence,     $indent_flag,
23668        ];
23669
23670        my $rTV2 = [
23671            $routput_token_list,    $routput_token_type,
23672            $routput_block_type,    $routput_container_type,
23673            $routput_type_sequence, $routput_indent_flag,
23674        ];
23675
23676        my $rTV3 = [
23677            $in_quote,        $quote_type,
23678            $quote_character, $quote_pos,
23679            $quote_depth,     $quoted_string_1,
23680            $quoted_string_2, $allowed_quote_modifiers,
23681        ];
23682
23683        my $rTV4 =
23684          [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
23685
23686        my $rTV5 = [
23687            $nesting_token_string,      $nesting_type_string,
23688            $nesting_block_string,      $nesting_block_flag,
23689            $nesting_list_string,       $nesting_list_flag,
23690            $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
23691            $in_statement_continuation, $level_in_tokenizer,
23692            $slevel_in_tokenizer,       $rslevel_stack,
23693        ];
23694
23695        my $rTV6 = [
23696            $last_nonblank_container_type,
23697            $last_nonblank_type_sequence,
23698            $last_last_nonblank_token,
23699            $last_last_nonblank_type,
23700            $last_last_nonblank_block_type,
23701            $last_last_nonblank_container_type,
23702            $last_last_nonblank_type_sequence,
23703            $last_nonblank_prototype,
23704        ];
23705        return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
23706    }
23707
23708    sub restore_tokenizer_state {
23709        my ($rstate) = @_;
23710        my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
23711        (
23712            $block_type,        $container_type,    $expecting,
23713            $i,                 $i_tok,             $input_line,
23714            $input_line_number, $last_nonblank_i,   $max_token_index,
23715            $next_tok,          $next_type,         $peeked_ahead,
23716            $prototype,         $rhere_target_list, $rtoken_map,
23717            $rtoken_type,       $rtokens,           $tok,
23718            $type,              $type_sequence,     $indent_flag,
23719        ) = @{$rTV1};
23720
23721        (
23722            $routput_token_list,    $routput_token_type,
23723            $routput_block_type,    $routput_container_type,
23724            $routput_type_sequence, $routput_type_sequence,
23725        ) = @{$rTV2};
23726
23727        (
23728            $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
23729            $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
23730        ) = @{$rTV3};
23731
23732        ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
23733          @{$rTV4};
23734
23735        (
23736            $nesting_token_string,      $nesting_type_string,
23737            $nesting_block_string,      $nesting_block_flag,
23738            $nesting_list_string,       $nesting_list_flag,
23739            $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
23740            $in_statement_continuation, $level_in_tokenizer,
23741            $slevel_in_tokenizer,       $rslevel_stack,
23742        ) = @{$rTV5};
23743
23744        (
23745            $last_nonblank_container_type,
23746            $last_nonblank_type_sequence,
23747            $last_last_nonblank_token,
23748            $last_last_nonblank_type,
23749            $last_last_nonblank_block_type,
23750            $last_last_nonblank_container_type,
23751            $last_last_nonblank_type_sequence,
23752            $last_nonblank_prototype,
23753        ) = @{$rTV6};
23754    }
23755
23756    sub get_indentation_level {
23757
23758        # patch to avoid reporting error if indented if is not terminated
23759        if ($indented_if_level) { return $level_in_tokenizer - 1 }
23760        return $level_in_tokenizer;
23761    }
23762
23763    sub reset_indentation_level {
23764        $level_in_tokenizer  = $_[0];
23765        $slevel_in_tokenizer = $_[0];
23766        push @{$rslevel_stack}, $slevel_in_tokenizer;
23767    }
23768
23769    sub peeked_ahead {
23770        $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
23771    }
23772
23773    # ------------------------------------------------------------
23774    # end of tokenizer variable access and manipulation routines
23775    # ------------------------------------------------------------
23776
23777    # ------------------------------------------------------------
23778    # beginning of various scanner interface routines
23779    # ------------------------------------------------------------
23780    sub scan_replacement_text {
23781
23782        # check for here-docs in replacement text invoked by
23783        # a substitution operator with executable modifier 'e'.
23784        #
23785        # given:
23786        #  $replacement_text
23787        # return:
23788        #  $rht = reference to any here-doc targets
23789        my ($replacement_text) = @_;
23790
23791        # quick check
23792        return undef unless ( $replacement_text =~ /<</ );
23793
23794        write_logfile_entry("scanning replacement text for here-doc targets\n");
23795
23796        # save the logger object for error messages
23797        my $logger_object = $tokenizer_self->{_logger_object};
23798
23799        # localize all package variables
23800        local (
23801            $tokenizer_self,                 $last_nonblank_token,
23802            $last_nonblank_type,             $last_nonblank_block_type,
23803            $statement_type,                 $in_attribute_list,
23804            $current_package,                $context,
23805            %is_constant,                    %is_user_function,
23806            %user_function_prototype,        %is_block_function,
23807            %is_block_list_function,         %saw_function_definition,
23808            $brace_depth,                    $paren_depth,
23809            $square_bracket_depth,           @current_depth,
23810            @total_depth,                    $total_depth,
23811            @nesting_sequence_number,        @current_sequence_number,
23812            @paren_type,                     @paren_semicolon_count,
23813            @paren_structural_type,          @brace_type,
23814            @brace_structural_type,          @brace_context,
23815            @brace_package,                  @square_bracket_type,
23816            @square_bracket_structural_type, @depth_array,
23817            @starting_line_of_current_depth, @nested_ternary_flag,
23818            @nested_statement_type,
23819        );
23820
23821        # save all lexical variables
23822        my $rstate = save_tokenizer_state();
23823        _decrement_count();    # avoid error check for multiple tokenizers
23824
23825        # make a new tokenizer
23826        my $rOpts = {};
23827        my $rpending_logfile_message;
23828        my $source_object =
23829          Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
23830            $rpending_logfile_message );
23831        my $tokenizer = Perl::Tidy::Tokenizer->new(
23832            source_object        => $source_object,
23833            logger_object        => $logger_object,
23834            starting_line_number => $input_line_number,
23835        );
23836
23837        # scan the replacement text
23838        1 while ( $tokenizer->get_line() );
23839
23840        # remove any here doc targets
23841        my $rht = undef;
23842        if ( $tokenizer_self->{_in_here_doc} ) {
23843            $rht = [];
23844            push @{$rht},
23845              [
23846                $tokenizer_self->{_here_doc_target},
23847                $tokenizer_self->{_here_quote_character}
23848              ];
23849            if ( $tokenizer_self->{_rhere_target_list} ) {
23850                push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
23851                $tokenizer_self->{_rhere_target_list} = undef;
23852            }
23853            $tokenizer_self->{_in_here_doc} = undef;
23854        }
23855
23856        # now its safe to report errors
23857        $tokenizer->report_tokenization_errors();
23858
23859        # restore all tokenizer lexical variables
23860        restore_tokenizer_state($rstate);
23861
23862        # return the here doc targets
23863        return $rht;
23864    }
23865
23866    sub scan_bare_identifier {
23867        ( $i, $tok, $type, $prototype ) =
23868          scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
23869            $rtoken_map, $max_token_index );
23870    }
23871
23872    sub scan_identifier {
23873        ( $i, $tok, $type, $id_scan_state, $identifier ) =
23874          scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
23875            $max_token_index, $expecting );
23876    }
23877
23878    sub scan_id {
23879        ( $i, $tok, $type, $id_scan_state ) =
23880          scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
23881            $id_scan_state, $max_token_index );
23882    }
23883
23884    sub scan_number {
23885        my $number;
23886        ( $i, $type, $number ) =
23887          scan_number_do( $input_line, $i, $rtoken_map, $type,
23888            $max_token_index );
23889        return $number;
23890    }
23891
23892    # a sub to warn if token found where term expected
23893    sub error_if_expecting_TERM {
23894        if ( $expecting == TERM ) {
23895            if ( $really_want_term{$last_nonblank_type} ) {
23896                unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
23897                    $rtoken_type, $input_line );
23898                1;
23899            }
23900        }
23901    }
23902
23903    # a sub to warn if token found where operator expected
23904    sub error_if_expecting_OPERATOR {
23905        if ( $expecting == OPERATOR ) {
23906            my $thing = defined $_[0] ? $_[0] : $tok;
23907            unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
23908                $rtoken_map, $rtoken_type, $input_line );
23909            if ( $i_tok == 0 ) {
23910                interrupt_logfile();
23911                warning("Missing ';' above?\n");
23912                resume_logfile();
23913            }
23914            1;
23915        }
23916    }
23917
23918    # ------------------------------------------------------------
23919    # end scanner interfaces
23920    # ------------------------------------------------------------
23921
23922    my %is_for_foreach;
23923    @_ = qw(for foreach);
23924    @is_for_foreach{@_} = (1) x scalar(@_);
23925
23926    my %is_my_our;
23927    @_ = qw(my our);
23928    @is_my_our{@_} = (1) x scalar(@_);
23929
23930    # These keywords may introduce blocks after parenthesized expressions,
23931    # in the form:
23932    # keyword ( .... ) { BLOCK }
23933    # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
23934    my %is_blocktype_with_paren;
23935    @_ = qw(if elsif unless while until for foreach switch case given when);
23936    @is_blocktype_with_paren{@_} = (1) x scalar(@_);
23937
23938    # ------------------------------------------------------------
23939    # begin hash of code for handling most token types
23940    # ------------------------------------------------------------
23941    my $tokenization_code = {
23942
23943        # no special code for these types yet, but syntax checks
23944        # could be added
23945
23946##      '!'   => undef,
23947##      '!='  => undef,
23948##      '!~'  => undef,
23949##      '%='  => undef,
23950##      '&&=' => undef,
23951##      '&='  => undef,
23952##      '+='  => undef,
23953##      '-='  => undef,
23954##      '..'  => undef,
23955##      '..'  => undef,
23956##      '...' => undef,
23957##      '.='  => undef,
23958##      '<<=' => undef,
23959##      '<='  => undef,
23960##      '<=>' => undef,
23961##      '<>'  => undef,
23962##      '='   => undef,
23963##      '=='  => undef,
23964##      '=~'  => undef,
23965##      '>='  => undef,
23966##      '>>'  => undef,
23967##      '>>=' => undef,
23968##      '\\'  => undef,
23969##      '^='  => undef,
23970##      '|='  => undef,
23971##      '||=' => undef,
23972##      '//=' => undef,
23973##      '~'   => undef,
23974##      '~~'  => undef,
23975##      '!~~'  => undef,
23976
23977        '>' => sub {
23978            error_if_expecting_TERM()
23979              if ( $expecting == TERM );
23980        },
23981        '|' => sub {
23982            error_if_expecting_TERM()
23983              if ( $expecting == TERM );
23984        },
23985        '$' => sub {
23986
23987            # start looking for a scalar
23988            error_if_expecting_OPERATOR("Scalar")
23989              if ( $expecting == OPERATOR );
23990            scan_identifier();
23991
23992            if ( $identifier eq '$^W' ) {
23993                $tokenizer_self->{_saw_perl_dash_w} = 1;
23994            }
23995
23996            # Check for indentifier in indirect object slot
23997            # (vorboard.pl, sort.t).  Something like:
23998            #   /^(print|printf|sort|exec|system)$/
23999            if (
24000                $is_indirect_object_taker{$last_nonblank_token}
24001
24002                || ( ( $last_nonblank_token eq '(' )
24003                    && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
24004                || ( $last_nonblank_type =~ /^[Uw]$/ )    # possible object
24005              )
24006            {
24007                $type = 'Z';
24008            }
24009        },
24010        '(' => sub {
24011
24012            ++$paren_depth;
24013            $paren_semicolon_count[$paren_depth] = 0;
24014            if ($want_paren) {
24015                $container_type = $want_paren;
24016                $want_paren     = "";
24017            }
24018            else {
24019                $container_type = $last_nonblank_token;
24020
24021                # We can check for a syntax error here of unexpected '(',
24022                # but this is going to get messy...
24023                if (
24024                    $expecting == OPERATOR
24025
24026                    # be sure this is not a method call of the form
24027                    # &method(...), $method->(..), &{method}(...),
24028                    # $ref[2](list) is ok & short for $ref[2]->(list)
24029                    # NOTE: at present, braces in something like &{ xxx }
24030                    # are not marked as a block, we might have a method call
24031                    && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
24032
24033                  )
24034                {
24035
24036                    # ref: camel 3 p 703.
24037                    if ( $last_last_nonblank_token eq 'do' ) {
24038                        complain(
24039"do SUBROUTINE is deprecated; consider & or -> notation\n"
24040                        );
24041                    }
24042                    else {
24043
24044                        # if this is an empty list, (), then it is not an
24045                        # error; for example, we might have a constant pi and
24046                        # invoke it with pi() or just pi;
24047                        my ( $next_nonblank_token, $i_next ) =
24048                          find_next_nonblank_token( $i, $rtokens,
24049                            $max_token_index );
24050                        if ( $next_nonblank_token ne ')' ) {
24051                            my $hint;
24052                            error_if_expecting_OPERATOR('(');
24053
24054                            if ( $last_nonblank_type eq 'C' ) {
24055                                $hint =
24056                                  "$last_nonblank_token has a void prototype\n";
24057                            }
24058                            elsif ( $last_nonblank_type eq 'i' ) {
24059                                if (   $i_tok > 0
24060                                    && $last_nonblank_token =~ /^\$/ )
24061                                {
24062                                    $hint =
24063"Do you mean '$last_nonblank_token->(' ?\n";
24064                                }
24065                            }
24066                            if ($hint) {
24067                                interrupt_logfile();
24068                                warning($hint);
24069                                resume_logfile();
24070                            }
24071                        } ## end if ( $next_nonblank_token...
24072                    } ## end else [ if ( $last_last_nonblank_token...
24073                } ## end if ( $expecting == OPERATOR...
24074            }
24075            $paren_type[$paren_depth] = $container_type;
24076            ( $type_sequence, $indent_flag ) =
24077              increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
24078
24079            # propagate types down through nested parens
24080            # for example: the second paren in 'if ((' would be structural
24081            # since the first is.
24082
24083            if ( $last_nonblank_token eq '(' ) {
24084                $type = $last_nonblank_type;
24085            }
24086
24087            #     We exclude parens as structural after a ',' because it
24088            #     causes subtle problems with continuation indentation for
24089            #     something like this, where the first 'or' will not get
24090            #     indented.
24091            #
24092            #         assert(
24093            #             __LINE__,
24094            #             ( not defined $check )
24095            #               or ref $check
24096            #               or $check eq "new"
24097            #               or $check eq "old",
24098            #         );
24099            #
24100            #     Likewise, we exclude parens where a statement can start
24101            #     because of problems with continuation indentation, like
24102            #     these:
24103            #
24104            #         ($firstline =~ /^#\!.*perl/)
24105            #         and (print $File::Find::name, "\n")
24106            #           and (return 1);
24107            #
24108            #         (ref($usage_fref) =~ /CODE/)
24109            #         ? &$usage_fref
24110            #           : (&blast_usage, &blast_params, &blast_general_params);
24111
24112            else {
24113                $type = '{';
24114            }
24115
24116            if ( $last_nonblank_type eq ')' ) {
24117                warning(
24118                    "Syntax error? found token '$last_nonblank_type' then '('\n"
24119                );
24120            }
24121            $paren_structural_type[$paren_depth] = $type;
24122
24123        },
24124        ')' => sub {
24125            ( $type_sequence, $indent_flag ) =
24126              decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
24127
24128            if ( $paren_structural_type[$paren_depth] eq '{' ) {
24129                $type = '}';
24130            }
24131
24132            $container_type = $paren_type[$paren_depth];
24133
24134            #    /^(for|foreach)$/
24135            if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
24136                my $num_sc = $paren_semicolon_count[$paren_depth];
24137                if ( $num_sc > 0 && $num_sc != 2 ) {
24138                    warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
24139                }
24140            }
24141
24142            if ( $paren_depth > 0 ) { $paren_depth-- }
24143        },
24144        ',' => sub {
24145            if ( $last_nonblank_type eq ',' ) {
24146                complain("Repeated ','s \n");
24147            }
24148
24149            # patch for operator_expected: note if we are in the list (use.t)
24150            if ( $statement_type eq 'use' ) { $statement_type = '_use' }
24151##                FIXME: need to move this elsewhere, perhaps check after a '('
24152##                elsif ($last_nonblank_token eq '(') {
24153##                    warning("Leading ','s illegal in some versions of perl\n");
24154##                }
24155        },
24156        ';' => sub {
24157            $context        = UNKNOWN_CONTEXT;
24158            $statement_type = '';
24159
24160            #    /^(for|foreach)$/
24161            if ( $is_for_foreach{ $paren_type[$paren_depth] } )
24162            {    # mark ; in for loop
24163
24164                # Be careful: we do not want a semicolon such as the
24165                # following to be included:
24166                #
24167                #    for (sort {strcoll($a,$b);} keys %investments) {
24168
24169                if (   $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
24170                    && $square_bracket_depth ==
24171                    $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
24172                {
24173
24174                    $type = 'f';
24175                    $paren_semicolon_count[$paren_depth]++;
24176                }
24177            }
24178
24179        },
24180        '"' => sub {
24181            error_if_expecting_OPERATOR("String")
24182              if ( $expecting == OPERATOR );
24183            $in_quote                = 1;
24184            $type                    = 'Q';
24185            $allowed_quote_modifiers = "";
24186        },
24187        "'" => sub {
24188            error_if_expecting_OPERATOR("String")
24189              if ( $expecting == OPERATOR );
24190            $in_quote                = 1;
24191            $type                    = 'Q';
24192            $allowed_quote_modifiers = "";
24193        },
24194        '`' => sub {
24195            error_if_expecting_OPERATOR("String")
24196              if ( $expecting == OPERATOR );
24197            $in_quote                = 1;
24198            $type                    = 'Q';
24199            $allowed_quote_modifiers = "";
24200        },
24201        '/' => sub {
24202            my $is_pattern;
24203
24204            if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
24205                my $msg;
24206                ( $is_pattern, $msg ) =
24207                  guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
24208                    $max_token_index );
24209
24210                if ($msg) {
24211                    write_diagnostics("DIVIDE:$msg\n");
24212                    write_logfile_entry($msg);
24213                }
24214            }
24215            else { $is_pattern = ( $expecting == TERM ) }
24216
24217            if ($is_pattern) {
24218                $in_quote                = 1;
24219                $type                    = 'Q';
24220                $allowed_quote_modifiers = '[msixpodualgc]';
24221            }
24222            else {    # not a pattern; check for a /= token
24223
24224                if ( $$rtokens[ $i + 1 ] eq '=' ) {    # form token /=
24225                    $i++;
24226                    $tok  = '/=';
24227                    $type = $tok;
24228                }
24229
24230              #DEBUG - collecting info on what tokens follow a divide
24231              # for development of guessing algorithm
24232              #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
24233              #    #write_diagnostics( "DIVIDE? $input_line\n" );
24234              #}
24235            }
24236        },
24237        '{' => sub {
24238
24239            # if we just saw a ')', we will label this block with
24240            # its type.  We need to do this to allow sub
24241            # code_block_type to determine if this brace starts a
24242            # code block or anonymous hash.  (The type of a paren
24243            # pair is the preceding token, such as 'if', 'else',
24244            # etc).
24245            $container_type = "";
24246
24247            # ATTRS: for a '{' following an attribute list, reset
24248            # things to look like we just saw the sub name
24249            if ( $statement_type =~ /^sub/ ) {
24250                $last_nonblank_token = $statement_type;
24251                $last_nonblank_type  = 'i';
24252                $statement_type      = "";
24253            }
24254
24255            # patch for SWITCH/CASE: hide these keywords from an immediately
24256            # following opening brace
24257            elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
24258                && $statement_type eq $last_nonblank_token )
24259            {
24260                $last_nonblank_token = ";";
24261            }
24262
24263            elsif ( $last_nonblank_token eq ')' ) {
24264                $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
24265
24266                # defensive move in case of a nesting error (pbug.t)
24267                # in which this ')' had no previous '('
24268                # this nesting error will have been caught
24269                if ( !defined($last_nonblank_token) ) {
24270                    $last_nonblank_token = 'if';
24271                }
24272
24273                # check for syntax error here;
24274                unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
24275                    my $list = join( ' ', sort keys %is_blocktype_with_paren );
24276                    warning(
24277                        "syntax error at ') {', didn't see one of: $list\n");
24278                }
24279            }
24280
24281            # patch for paren-less for/foreach glitch, part 2.
24282            # see note below under 'qw'
24283            elsif ($last_nonblank_token eq 'qw'
24284                && $is_for_foreach{$want_paren} )
24285            {
24286                $last_nonblank_token = $want_paren;
24287                if ( $last_last_nonblank_token eq $want_paren ) {
24288                    warning(
24289"syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
24290                    );
24291
24292                }
24293                $want_paren = "";
24294            }
24295
24296            # now identify which of the three possible types of
24297            # curly braces we have: hash index container, anonymous
24298            # hash reference, or code block.
24299
24300            # non-structural (hash index) curly brace pair
24301            # get marked 'L' and 'R'
24302            if ( is_non_structural_brace() ) {
24303                $type = 'L';
24304
24305                # patch for SWITCH/CASE:
24306                # allow paren-less identifier after 'when'
24307                # if the brace is preceded by a space
24308                if (   $statement_type eq 'when'
24309                    && $last_nonblank_type eq 'i'
24310                    && $last_last_nonblank_type eq 'k'
24311                    && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
24312                {
24313                    $type       = '{';
24314                    $block_type = $statement_type;
24315                }
24316            }
24317
24318            # code and anonymous hash have the same type, '{', but are
24319            # distinguished by 'block_type',
24320            # which will be blank for an anonymous hash
24321            else {
24322
24323                $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
24324                    $max_token_index );
24325
24326                # remember a preceding smartmatch operator
24327                if ( $last_nonblank_type eq '~~' ) {
24328                    $block_type = $last_nonblank_type;
24329                }
24330
24331                # patch to promote bareword type to function taking block
24332                if (   $block_type
24333                    && $last_nonblank_type eq 'w'
24334                    && $last_nonblank_i >= 0 )
24335                {
24336                    if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
24337                        $routput_token_type->[$last_nonblank_i] = 'G';
24338                    }
24339                }
24340
24341                # patch for SWITCH/CASE: if we find a stray opening block brace
24342                # where we might accept a 'case' or 'when' block, then take it
24343                if (   $statement_type eq 'case'
24344                    || $statement_type eq 'when' )
24345                {
24346                    if ( !$block_type || $block_type eq '}' ) {
24347                        $block_type = $statement_type;
24348                    }
24349                }
24350            }
24351            $brace_type[ ++$brace_depth ]        = $block_type;
24352            $brace_package[$brace_depth]         = $current_package;
24353            $brace_structural_type[$brace_depth] = $type;
24354            $brace_context[$brace_depth]         = $context;
24355            ( $type_sequence, $indent_flag ) =
24356              increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
24357        },
24358        '}' => sub {
24359            $block_type = $brace_type[$brace_depth];
24360            if ($block_type) { $statement_type = '' }
24361            if ( defined( $brace_package[$brace_depth] ) ) {
24362                $current_package = $brace_package[$brace_depth];
24363            }
24364
24365            # can happen on brace error (caught elsewhere)
24366            else {
24367            }
24368            ( $type_sequence, $indent_flag ) =
24369              decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
24370
24371            if ( $brace_structural_type[$brace_depth] eq 'L' ) {
24372                $type = 'R';
24373            }
24374
24375            # propagate type information for 'do' and 'eval' blocks, and also
24376            # for smartmatch operator.  This is necessary to enable us to know
24377            # if an operator or term is expected next.
24378            if ( $is_block_operator{$block_type} || $block_type eq '~~' ) {
24379                $tok = $block_type;
24380            }
24381
24382            $context = $brace_context[$brace_depth];
24383            if ( $brace_depth > 0 ) { $brace_depth--; }
24384        },
24385        '&' => sub {    # maybe sub call? start looking
24386
24387            # We have to check for sub call unless we are sure we
24388            # are expecting an operator.  This example from s2p
24389            # got mistaken as a q operator in an early version:
24390            #   print BODY &q(<<'EOT');
24391            if ( $expecting != OPERATOR ) {
24392
24393                # But only look for a sub call if we are expecting a term or
24394                # if there is no existing space after the &.
24395                # For example we probably don't want & as sub call here:
24396                #    Fcntl::S_IRUSR & $mode;
24397                if ( $expecting == TERM || $next_type ne 'b' ) {
24398                    scan_identifier();
24399                }
24400            }
24401            else {
24402            }
24403        },
24404        '<' => sub {    # angle operator or less than?
24405
24406            if ( $expecting != OPERATOR ) {
24407                ( $i, $type ) =
24408                  find_angle_operator_termination( $input_line, $i, $rtoken_map,
24409                    $expecting, $max_token_index );
24410
24411                if ( $type eq '<' && $expecting == TERM ) {
24412                    error_if_expecting_TERM();
24413                    interrupt_logfile();
24414                    warning("Unterminated <> operator?\n");
24415                    resume_logfile();
24416                }
24417            }
24418            else {
24419            }
24420        },
24421        '?' => sub {    # ?: conditional or starting pattern?
24422
24423            my $is_pattern;
24424
24425            if ( $expecting == UNKNOWN ) {
24426
24427                my $msg;
24428                ( $is_pattern, $msg ) =
24429                  guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
24430                    $max_token_index );
24431
24432                if ($msg) { write_logfile_entry($msg) }
24433            }
24434            else { $is_pattern = ( $expecting == TERM ) }
24435
24436            if ($is_pattern) {
24437                $in_quote                = 1;
24438                $type                    = 'Q';
24439                $allowed_quote_modifiers = '[msixpodualgc]';
24440            }
24441            else {
24442                ( $type_sequence, $indent_flag ) =
24443                  increase_nesting_depth( QUESTION_COLON,
24444                    $$rtoken_map[$i_tok] );
24445            }
24446        },
24447        '*' => sub {    # typeglob, or multiply?
24448
24449            if ( $expecting == TERM ) {
24450                scan_identifier();
24451            }
24452            else {
24453
24454                if ( $$rtokens[ $i + 1 ] eq '=' ) {
24455                    $tok  = '*=';
24456                    $type = $tok;
24457                    $i++;
24458                }
24459                elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
24460                    $tok  = '**';
24461                    $type = $tok;
24462                    $i++;
24463                    if ( $$rtokens[ $i + 1 ] eq '=' ) {
24464                        $tok  = '**=';
24465                        $type = $tok;
24466                        $i++;
24467                    }
24468                }
24469            }
24470        },
24471        '.' => sub {    # what kind of . ?
24472
24473            if ( $expecting != OPERATOR ) {
24474                scan_number();
24475                if ( $type eq '.' ) {
24476                    error_if_expecting_TERM()
24477                      if ( $expecting == TERM );
24478                }
24479            }
24480            else {
24481            }
24482        },
24483        ':' => sub {
24484
24485            # if this is the first nonblank character, call it a label
24486            # since perl seems to just swallow it
24487            if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
24488                $type = 'J';
24489            }
24490
24491            # ATTRS: check for a ':' which introduces an attribute list
24492            # (this might eventually get its own token type)
24493            elsif ( $statement_type =~ /^sub/ ) {
24494                $type              = 'A';
24495                $in_attribute_list = 1;
24496            }
24497
24498            # check for scalar attribute, such as
24499            # my $foo : shared = 1;
24500            elsif ($is_my_our{$statement_type}
24501                && $current_depth[QUESTION_COLON] == 0 )
24502            {
24503                $type              = 'A';
24504                $in_attribute_list = 1;
24505            }
24506
24507            # otherwise, it should be part of a ?/: operator
24508            else {
24509                ( $type_sequence, $indent_flag ) =
24510                  decrease_nesting_depth( QUESTION_COLON,
24511                    $$rtoken_map[$i_tok] );
24512                if ( $last_nonblank_token eq '?' ) {
24513                    warning("Syntax error near ? :\n");
24514                }
24515            }
24516        },
24517        '+' => sub {    # what kind of plus?
24518
24519            if ( $expecting == TERM ) {
24520                my $number = scan_number();
24521
24522                # unary plus is safest assumption if not a number
24523                if ( !defined($number) ) { $type = 'p'; }
24524            }
24525            elsif ( $expecting == OPERATOR ) {
24526            }
24527            else {
24528                if ( $next_type eq 'w' ) { $type = 'p' }
24529            }
24530        },
24531        '@' => sub {
24532
24533            error_if_expecting_OPERATOR("Array")
24534              if ( $expecting == OPERATOR );
24535            scan_identifier();
24536        },
24537        '%' => sub {    # hash or modulo?
24538
24539            # first guess is hash if no following blank
24540            if ( $expecting == UNKNOWN ) {
24541                if ( $next_type ne 'b' ) { $expecting = TERM }
24542            }
24543            if ( $expecting == TERM ) {
24544                scan_identifier();
24545            }
24546        },
24547        '[' => sub {
24548            $square_bracket_type[ ++$square_bracket_depth ] =
24549              $last_nonblank_token;
24550            ( $type_sequence, $indent_flag ) =
24551              increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
24552
24553            # It may seem odd, but structural square brackets have
24554            # type '{' and '}'.  This simplifies the indentation logic.
24555            if ( !is_non_structural_brace() ) {
24556                $type = '{';
24557            }
24558            $square_bracket_structural_type[$square_bracket_depth] = $type;
24559        },
24560        ']' => sub {
24561            ( $type_sequence, $indent_flag ) =
24562              decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
24563
24564            if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
24565            {
24566                $type = '}';
24567            }
24568
24569            # propagate type information for smartmatch operator.  This is
24570            # necessary to enable us to know if an operator or term is expected
24571            # next.
24572            if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) {
24573                $tok = $square_bracket_type[$square_bracket_depth];
24574            }
24575
24576            if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
24577        },
24578        '-' => sub {    # what kind of minus?
24579
24580            if ( ( $expecting != OPERATOR )
24581                && $is_file_test_operator{$next_tok} )
24582            {
24583                my ( $next_nonblank_token, $i_next ) =
24584                  find_next_nonblank_token( $i + 1, $rtokens,
24585                    $max_token_index );
24586
24587                # check for a quoted word like "-w=>xx";
24588                # it is sufficient to just check for a following '='
24589                if ( $next_nonblank_token eq '=' ) {
24590                    $type = 'm';
24591                }
24592                else {
24593                    $i++;
24594                    $tok .= $next_tok;
24595                    $type = 'F';
24596                }
24597            }
24598            elsif ( $expecting == TERM ) {
24599                my $number = scan_number();
24600
24601                # maybe part of bareword token? unary is safest
24602                if ( !defined($number) ) { $type = 'm'; }
24603
24604            }
24605            elsif ( $expecting == OPERATOR ) {
24606            }
24607            else {
24608
24609                if ( $next_type eq 'w' ) {
24610                    $type = 'm';
24611                }
24612            }
24613        },
24614
24615        '^' => sub {
24616
24617            # check for special variables like ${^WARNING_BITS}
24618            if ( $expecting == TERM ) {
24619
24620                # FIXME: this should work but will not catch errors
24621                # because we also have to be sure that previous token is
24622                # a type character ($,@,%).
24623                if ( $last_nonblank_token eq '{'
24624                    && ( $next_tok =~ /^[A-Za-z_]/ ) )
24625                {
24626
24627                    if ( $next_tok eq 'W' ) {
24628                        $tokenizer_self->{_saw_perl_dash_w} = 1;
24629                    }
24630                    $tok  = $tok . $next_tok;
24631                    $i    = $i + 1;
24632                    $type = 'w';
24633                }
24634
24635                else {
24636                    unless ( error_if_expecting_TERM() ) {
24637
24638                        # Something like this is valid but strange:
24639                        # undef ^I;
24640                        complain("The '^' seems unusual here\n");
24641                    }
24642                }
24643            }
24644        },
24645
24646        '::' => sub {    # probably a sub call
24647            scan_bare_identifier();
24648        },
24649        '<<' => sub {    # maybe a here-doc?
24650            return
24651              unless ( $i < $max_token_index )
24652              ;          # here-doc not possible if end of line
24653
24654            if ( $expecting != OPERATOR ) {
24655                my ( $found_target, $here_doc_target, $here_quote_character,
24656                    $saw_error );
24657                (
24658                    $found_target, $here_doc_target, $here_quote_character, $i,
24659                    $saw_error
24660                  )
24661                  = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
24662                    $max_token_index );
24663
24664                if ($found_target) {
24665                    push @{$rhere_target_list},
24666                      [ $here_doc_target, $here_quote_character ];
24667                    $type = 'h';
24668                    if ( length($here_doc_target) > 80 ) {
24669                        my $truncated = substr( $here_doc_target, 0, 80 );
24670                        complain("Long here-target: '$truncated' ...\n");
24671                    }
24672                    elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
24673                        complain(
24674                            "Unconventional here-target: '$here_doc_target'\n"
24675                        );
24676                    }
24677                }
24678                elsif ( $expecting == TERM ) {
24679                    unless ($saw_error) {
24680
24681                        # shouldn't happen..
24682                        warning("Program bug; didn't find here doc target\n");
24683                        report_definite_bug();
24684                    }
24685                }
24686            }
24687            else {
24688            }
24689        },
24690        '->' => sub {
24691
24692            # if -> points to a bare word, we must scan for an identifier,
24693            # otherwise something like ->y would look like the y operator
24694            scan_identifier();
24695        },
24696
24697        # type = 'pp' for pre-increment, '++' for post-increment
24698        '++' => sub {
24699            if ( $expecting == TERM ) { $type = 'pp' }
24700            elsif ( $expecting == UNKNOWN ) {
24701                my ( $next_nonblank_token, $i_next ) =
24702                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
24703                if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
24704            }
24705        },
24706
24707        '=>' => sub {
24708            if ( $last_nonblank_type eq $tok ) {
24709                complain("Repeated '=>'s \n");
24710            }
24711
24712            # patch for operator_expected: note if we are in the list (use.t)
24713            # TODO: make version numbers a new token type
24714            if ( $statement_type eq 'use' ) { $statement_type = '_use' }
24715        },
24716
24717        # type = 'mm' for pre-decrement, '--' for post-decrement
24718        '--' => sub {
24719
24720            if ( $expecting == TERM ) { $type = 'mm' }
24721            elsif ( $expecting == UNKNOWN ) {
24722                my ( $next_nonblank_token, $i_next ) =
24723                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
24724                if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
24725            }
24726        },
24727
24728        '&&' => sub {
24729            error_if_expecting_TERM()
24730              if ( $expecting == TERM );
24731        },
24732
24733        '||' => sub {
24734            error_if_expecting_TERM()
24735              if ( $expecting == TERM );
24736        },
24737
24738        '//' => sub {
24739            error_if_expecting_TERM()
24740              if ( $expecting == TERM );
24741        },
24742    };
24743
24744    # ------------------------------------------------------------
24745    # end hash of code for handling individual token types
24746    # ------------------------------------------------------------
24747
24748    my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
24749
24750    # These block types terminate statements and do not need a trailing
24751    # semicolon
24752    # patched for SWITCH/CASE/
24753    my %is_zero_continuation_block_type;
24754    @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
24755      if elsif else unless while until for foreach switch case given when);
24756    @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
24757
24758    my %is_not_zero_continuation_block_type;
24759    @_ = qw(sort grep map do eval);
24760    @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
24761
24762    my %is_logical_container;
24763    @_ = qw(if elsif unless while and or err not && !  || for foreach);
24764    @is_logical_container{@_} = (1) x scalar(@_);
24765
24766    my %is_binary_type;
24767    @_ = qw(|| &&);
24768    @is_binary_type{@_} = (1) x scalar(@_);
24769
24770    my %is_binary_keyword;
24771    @_ = qw(and or err eq ne cmp);
24772    @is_binary_keyword{@_} = (1) x scalar(@_);
24773
24774    # 'L' is token for opening { at hash key
24775    my %is_opening_type;
24776    @_ = qw" L { ( [ ";
24777    @is_opening_type{@_} = (1) x scalar(@_);
24778
24779    # 'R' is token for closing } at hash key
24780    my %is_closing_type;
24781    @_ = qw" R } ) ] ";
24782    @is_closing_type{@_} = (1) x scalar(@_);
24783
24784    my %is_redo_last_next_goto;
24785    @_ = qw(redo last next goto);
24786    @is_redo_last_next_goto{@_} = (1) x scalar(@_);
24787
24788    my %is_use_require;
24789    @_ = qw(use require);
24790    @is_use_require{@_} = (1) x scalar(@_);
24791
24792    my %is_sub_package;
24793    @_ = qw(sub package);
24794    @is_sub_package{@_} = (1) x scalar(@_);
24795
24796    # This hash holds the hash key in $tokenizer_self for these keywords:
24797    my %is_format_END_DATA = (
24798        'format'   => '_in_format',
24799        '__END__'  => '_in_end',
24800        '__DATA__' => '_in_data',
24801    );
24802
24803    # ref: camel 3 p 147,
24804    # but perl may accept undocumented flags
24805    # perl 5.10 adds 'p' (preserve)
24806    # Perl version 5.16, http://perldoc.perl.org/perlop.html,  has these:
24807    # /PATTERN/msixpodualgc or m?PATTERN?msixpodualgc
24808    # s/PATTERN/REPLACEMENT/msixpodualgcer
24809    # y/SEARCHLIST/REPLACEMENTLIST/cdsr
24810    # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
24811    # qr/STRING/msixpodual
24812    my %quote_modifiers = (
24813        's'  => '[msixpodualgcer]',
24814        'y'  => '[cdsr]',
24815        'tr' => '[cdsr]',
24816        'm'  => '[msixpodualgc]',
24817        'qr' => '[msixpodual]',
24818        'q'  => "",
24819        'qq' => "",
24820        'qw' => "",
24821        'qx' => "",
24822    );
24823
24824    # table showing how many quoted things to look for after quote operator..
24825    # s, y, tr have 2 (pattern and replacement)
24826    # others have 1 (pattern only)
24827    my %quote_items = (
24828        's'  => 2,
24829        'y'  => 2,
24830        'tr' => 2,
24831        'm'  => 1,
24832        'qr' => 1,
24833        'q'  => 1,
24834        'qq' => 1,
24835        'qw' => 1,
24836        'qx' => 1,
24837    );
24838
24839    sub tokenize_this_line {
24840
24841  # This routine breaks a line of perl code into tokens which are of use in
24842  # indentation and reformatting.  One of my goals has been to define tokens
24843  # such that a newline may be inserted between any pair of tokens without
24844  # changing or invalidating the program. This version comes close to this,
24845  # although there are necessarily a few exceptions which must be caught by
24846  # the formatter.  Many of these involve the treatment of bare words.
24847  #
24848  # The tokens and their types are returned in arrays.  See previous
24849  # routine for their names.
24850  #
24851  # See also the array "valid_token_types" in the BEGIN section for an
24852  # up-to-date list.
24853  #
24854  # To simplify things, token types are either a single character, or they
24855  # are identical to the tokens themselves.
24856  #
24857  # As a debugging aid, the -D flag creates a file containing a side-by-side
24858  # comparison of the input string and its tokenization for each line of a file.
24859  # This is an invaluable debugging aid.
24860  #
24861  # In addition to tokens, and some associated quantities, the tokenizer
24862  # also returns flags indication any special line types.  These include
24863  # quotes, here_docs, formats.
24864  #
24865  # -----------------------------------------------------------------------
24866  #
24867  # How to add NEW_TOKENS:
24868  #
24869  # New token types will undoubtedly be needed in the future both to keep up
24870  # with changes in perl and to help adapt the tokenizer to other applications.
24871  #
24872  # Here are some notes on the minimal steps.  I wrote these notes while
24873  # adding the 'v' token type for v-strings, which are things like version
24874  # numbers 5.6.0, and ip addresses, and will use that as an example.  ( You
24875  # can use your editor to search for the string "NEW_TOKENS" to find the
24876  # appropriate sections to change):
24877  #
24878  # *. Try to talk somebody else into doing it!  If not, ..
24879  #
24880  # *. Make a backup of your current version in case things don't work out!
24881  #
24882  # *. Think of a new, unused character for the token type, and add to
24883  # the array @valid_token_types in the BEGIN section of this package.
24884  # For example, I used 'v' for v-strings.
24885  #
24886  # *. Implement coding to recognize the $type of the token in this routine.
24887  # This is the hardest part, and is best done by imitating or modifying
24888  # some of the existing coding.  For example, to recognize v-strings, I
24889  # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
24890  # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
24891  #
24892  # *. Update sub operator_expected.  This update is critically important but
24893  # the coding is trivial.  Look at the comments in that routine for help.
24894  # For v-strings, which should behave like numbers, I just added 'v' to the
24895  # regex used to handle numbers and strings (types 'n' and 'Q').
24896  #
24897  # *. Implement a 'bond strength' rule in sub set_bond_strengths in
24898  # Perl::Tidy::Formatter for breaking lines around this token type.  You can
24899  # skip this step and take the default at first, then adjust later to get
24900  # desired results.  For adding type 'v', I looked at sub bond_strength and
24901  # saw that number type 'n' was using default strengths, so I didn't do
24902  # anything.  I may tune it up someday if I don't like the way line
24903  # breaks with v-strings look.
24904  #
24905  # *. Implement a 'whitespace' rule in sub set_white_space_flag in
24906  # Perl::Tidy::Formatter.  For adding type 'v', I looked at this routine
24907  # and saw that type 'n' used spaces on both sides, so I just added 'v'
24908  # to the array @spaces_both_sides.
24909  #
24910  # *. Update HtmlWriter package so that users can colorize the token as
24911  # desired.  This is quite easy; see comments identified by 'NEW_TOKENS' in
24912  # that package.  For v-strings, I initially chose to use a default color
24913  # equal to the default for numbers, but it might be nice to change that
24914  # eventually.
24915  #
24916  # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
24917  #
24918  # *. Run lots and lots of debug tests.  Start with special files designed
24919  # to test the new token type.  Run with the -D flag to create a .DEBUG
24920  # file which shows the tokenization.  When these work ok, test as many old
24921  # scripts as possible.  Start with all of the '.t' files in the 'test'
24922  # directory of the distribution file.  Compare .tdy output with previous
24923  # version and updated version to see the differences.  Then include as
24924  # many more files as possible. My own technique has been to collect a huge
24925  # number of perl scripts (thousands!) into one directory and run perltidy
24926  # *, then run diff between the output of the previous version and the
24927  # current version.
24928  #
24929  # *. For another example, search for the smartmatch operator '~~'
24930  # with your editor to see where updates were made for it.
24931  #
24932  # -----------------------------------------------------------------------
24933
24934        my $line_of_tokens = shift;
24935        my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
24936
24937        # patch while coding change is underway
24938        # make callers private data to allow access
24939        # $tokenizer_self = $caller_tokenizer_self;
24940
24941        # extract line number for use in error messages
24942        $input_line_number = $line_of_tokens->{_line_number};
24943
24944        # reinitialize for multi-line quote
24945        $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
24946
24947        # check for pod documentation
24948        if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
24949
24950            # must not be in multi-line quote
24951            # and must not be in an eqn
24952            if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
24953            {
24954                $tokenizer_self->{_in_pod} = 1;
24955                return;
24956            }
24957        }
24958
24959        $input_line = $untrimmed_input_line;
24960
24961        chomp $input_line;
24962
24963        # trim start of this line unless we are continuing a quoted line
24964        # do not trim end because we might end in a quote (test: deken4.pl)
24965        # Perl::Tidy::Formatter will delete needless trailing blanks
24966        unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
24967            $input_line =~ s/^\s*//;    # trim left end
24968        }
24969
24970        # update the copy of the line for use in error messages
24971        # This must be exactly what we give the pre_tokenizer
24972        $tokenizer_self->{_line_text} = $input_line;
24973
24974        # re-initialize for the main loop
24975        $routput_token_list     = [];    # stack of output token indexes
24976        $routput_token_type     = [];    # token types
24977        $routput_block_type     = [];    # types of code block
24978        $routput_container_type = [];    # paren types, such as if, elsif, ..
24979        $routput_type_sequence  = [];    # nesting sequential number
24980
24981        $rhere_target_list = [];
24982
24983        $tok             = $last_nonblank_token;
24984        $type            = $last_nonblank_type;
24985        $prototype       = $last_nonblank_prototype;
24986        $last_nonblank_i = -1;
24987        $block_type      = $last_nonblank_block_type;
24988        $container_type  = $last_nonblank_container_type;
24989        $type_sequence   = $last_nonblank_type_sequence;
24990        $indent_flag     = 0;
24991        $peeked_ahead    = 0;
24992
24993        # tokenization is done in two stages..
24994        # stage 1 is a very simple pre-tokenization
24995        my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
24996
24997        # a little optimization for a full-line comment
24998        if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
24999            $max_tokens_wanted = 1    # no use tokenizing a comment
25000        }
25001
25002        # start by breaking the line into pre-tokens
25003        ( $rtokens, $rtoken_map, $rtoken_type ) =
25004          pre_tokenize( $input_line, $max_tokens_wanted );
25005
25006        $max_token_index = scalar(@$rtokens) - 1;
25007        push( @$rtokens,    ' ', ' ', ' ' ); # extra whitespace simplifies logic
25008        push( @$rtoken_map, 0,   0,   0 );   # shouldn't be referenced
25009        push( @$rtoken_type, 'b', 'b', 'b' );
25010
25011        # initialize for main loop
25012        for $i ( 0 .. $max_token_index + 3 ) {
25013            $routput_token_type->[$i]     = "";
25014            $routput_block_type->[$i]     = "";
25015            $routput_container_type->[$i] = "";
25016            $routput_type_sequence->[$i]  = "";
25017            $routput_indent_flag->[$i]    = 0;
25018        }
25019        $i     = -1;
25020        $i_tok = -1;
25021
25022        # ------------------------------------------------------------
25023        # begin main tokenization loop
25024        # ------------------------------------------------------------
25025
25026        # we are looking at each pre-token of one line and combining them
25027        # into tokens
25028        while ( ++$i <= $max_token_index ) {
25029
25030            if ($in_quote) {    # continue looking for end of a quote
25031                $type = $quote_type;
25032
25033                unless ( @{$routput_token_list} )
25034                {               # initialize if continuation line
25035                    push( @{$routput_token_list}, $i );
25036                    $routput_token_type->[$i] = $type;
25037
25038                }
25039                $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
25040
25041                # scan for the end of the quote or pattern
25042                (
25043                    $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25044                    $quoted_string_1, $quoted_string_2
25045                  )
25046                  = do_quote(
25047                    $i,               $in_quote,    $quote_character,
25048                    $quote_pos,       $quote_depth, $quoted_string_1,
25049                    $quoted_string_2, $rtokens,     $rtoken_map,
25050                    $max_token_index
25051                  );
25052
25053                # all done if we didn't find it
25054                last if ($in_quote);
25055
25056                # save pattern and replacement text for rescanning
25057                my $qs1 = $quoted_string_1;
25058                my $qs2 = $quoted_string_2;
25059
25060                # re-initialize for next search
25061                $quote_character = '';
25062                $quote_pos       = 0;
25063                $quote_type      = 'Q';
25064                $quoted_string_1 = "";
25065                $quoted_string_2 = "";
25066                last if ( ++$i > $max_token_index );
25067
25068                # look for any modifiers
25069                if ($allowed_quote_modifiers) {
25070
25071                    # check for exact quote modifiers
25072                    if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
25073                        my $str = $$rtokens[$i];
25074                        my $saw_modifier_e;
25075                        while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
25076                            my $pos = pos($str);
25077                            my $char = substr( $str, $pos - 1, 1 );
25078                            $saw_modifier_e ||= ( $char eq 'e' );
25079                        }
25080
25081                        # For an 'e' quote modifier we must scan the replacement
25082                        # text for here-doc targets.
25083                        if ($saw_modifier_e) {
25084
25085                            my $rht = scan_replacement_text($qs1);
25086
25087                            # Change type from 'Q' to 'h' for quotes with
25088                            # here-doc targets so that the formatter (see sub
25089                            # print_line_of_tokens) will not make any line
25090                            # breaks after this point.
25091                            if ($rht) {
25092                                push @{$rhere_target_list}, @{$rht};
25093                                $type = 'h';
25094                                if ( $i_tok < 0 ) {
25095                                    my $ilast = $routput_token_list->[-1];
25096                                    $routput_token_type->[$ilast] = $type;
25097                                }
25098                            }
25099                        }
25100
25101                        if ( defined( pos($str) ) ) {
25102
25103                            # matched
25104                            if ( pos($str) == length($str) ) {
25105                                last if ( ++$i > $max_token_index );
25106                            }
25107
25108                            # Looks like a joined quote modifier
25109                            # and keyword, maybe something like
25110                            # s/xxx/yyy/gefor @k=...
25111                            # Example is "galgen.pl".  Would have to split
25112                            # the word and insert a new token in the
25113                            # pre-token list.  This is so rare that I haven't
25114                            # done it.  Will just issue a warning citation.
25115
25116                            # This error might also be triggered if my quote
25117                            # modifier characters are incomplete
25118                            else {
25119                                warning(<<EOM);
25120
25121Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
25122Please put a space between quote modifiers and trailing keywords.
25123EOM
25124
25125                           # print "token $$rtokens[$i]\n";
25126                           # my $num = length($str) - pos($str);
25127                           # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
25128                           # print "continuing with new token $$rtokens[$i]\n";
25129
25130                                # skipping past this token does least damage
25131                                last if ( ++$i > $max_token_index );
25132                            }
25133                        }
25134                        else {
25135
25136                            # example file: rokicki4.pl
25137                            # This error might also be triggered if my quote
25138                            # modifier characters are incomplete
25139                            write_logfile_entry(
25140"Note: found word $str at quote modifier location\n"
25141                            );
25142                        }
25143                    }
25144
25145                    # re-initialize
25146                    $allowed_quote_modifiers = "";
25147                }
25148            }
25149
25150            unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) {
25151
25152                # try to catch some common errors
25153                if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
25154
25155                    if ( $last_nonblank_token eq 'eq' ) {
25156                        complain("Should 'eq' be '==' here ?\n");
25157                    }
25158                    elsif ( $last_nonblank_token eq 'ne' ) {
25159                        complain("Should 'ne' be '!=' here ?\n");
25160                    }
25161                }
25162
25163                $last_last_nonblank_token      = $last_nonblank_token;
25164                $last_last_nonblank_type       = $last_nonblank_type;
25165                $last_last_nonblank_block_type = $last_nonblank_block_type;
25166                $last_last_nonblank_container_type =
25167                  $last_nonblank_container_type;
25168                $last_last_nonblank_type_sequence =
25169                  $last_nonblank_type_sequence;
25170                $last_nonblank_token          = $tok;
25171                $last_nonblank_type           = $type;
25172                $last_nonblank_prototype      = $prototype;
25173                $last_nonblank_block_type     = $block_type;
25174                $last_nonblank_container_type = $container_type;
25175                $last_nonblank_type_sequence  = $type_sequence;
25176                $last_nonblank_i              = $i_tok;
25177            }
25178
25179            # store previous token type
25180            if ( $i_tok >= 0 ) {
25181                $routput_token_type->[$i_tok]     = $type;
25182                $routput_block_type->[$i_tok]     = $block_type;
25183                $routput_container_type->[$i_tok] = $container_type;
25184                $routput_type_sequence->[$i_tok]  = $type_sequence;
25185                $routput_indent_flag->[$i_tok]    = $indent_flag;
25186            }
25187            my $pre_tok  = $$rtokens[$i];        # get the next pre-token
25188            my $pre_type = $$rtoken_type[$i];    # and type
25189            $tok  = $pre_tok;
25190            $type = $pre_type;                   # to be modified as necessary
25191            $block_type = "";    # blank for all tokens except code block braces
25192            $container_type = "";    # blank for all tokens except some parens
25193            $type_sequence  = "";    # blank for all tokens except ?/:
25194            $indent_flag    = 0;
25195            $prototype = "";    # blank for all tokens except user defined subs
25196            $i_tok     = $i;
25197
25198            # this pre-token will start an output token
25199            push( @{$routput_token_list}, $i_tok );
25200
25201            # continue gathering identifier if necessary
25202            # but do not start on blanks and comments
25203            if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
25204
25205                if ( $id_scan_state =~ /^(sub|package)/ ) {
25206                    scan_id();
25207                }
25208                else {
25209                    scan_identifier();
25210                }
25211
25212                last if ($id_scan_state);
25213                next if ( ( $i > 0 ) || $type );
25214
25215                # didn't find any token; start over
25216                $type = $pre_type;
25217                $tok  = $pre_tok;
25218            }
25219
25220            # handle whitespace tokens..
25221            next if ( $type eq 'b' );
25222            my $prev_tok  = $i > 0 ? $$rtokens[ $i - 1 ]     : ' ';
25223            my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
25224
25225            # Build larger tokens where possible, since we are not in a quote.
25226            #
25227            # First try to assemble digraphs.  The following tokens are
25228            # excluded and handled specially:
25229            # '/=' is excluded because the / might start a pattern.
25230            # 'x=' is excluded since it might be $x=, with $ on previous line
25231            # '**' and *= might be typeglobs of punctuation variables
25232            # I have allowed tokens starting with <, such as <=,
25233            # because I don't think these could be valid angle operators.
25234            # test file: storrs4.pl
25235            my $test_tok   = $tok . $$rtokens[ $i + 1 ];
25236            my $combine_ok = $is_digraph{$test_tok};
25237
25238            # check for special cases which cannot be combined
25239            if ($combine_ok) {
25240
25241                # '//' must be defined_or operator if an operator is expected.
25242                # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
25243                # could be migrated here for clarity
25244                if ( $test_tok eq '//' ) {
25245                    my $next_type = $$rtokens[ $i + 1 ];
25246                    my $expecting =
25247                      operator_expected( $prev_type, $tok, $next_type );
25248                    $combine_ok = 0 unless ( $expecting == OPERATOR );
25249                }
25250            }
25251
25252            if (
25253                $combine_ok
25254                && ( $test_tok ne '/=' )    # might be pattern
25255                && ( $test_tok ne 'x=' )    # might be $x
25256                && ( $test_tok ne '**' )    # typeglob?
25257                && ( $test_tok ne '*=' )    # typeglob?
25258              )
25259            {
25260                $tok = $test_tok;
25261                $i++;
25262
25263                # Now try to assemble trigraphs.  Note that all possible
25264                # perl trigraphs can be constructed by appending a character
25265                # to a digraph.
25266                $test_tok = $tok . $$rtokens[ $i + 1 ];
25267
25268                if ( $is_trigraph{$test_tok} ) {
25269                    $tok = $test_tok;
25270                    $i++;
25271                }
25272            }
25273
25274            $type      = $tok;
25275            $next_tok  = $$rtokens[ $i + 1 ];
25276            $next_type = $$rtoken_type[ $i + 1 ];
25277
25278            TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
25279                local $" = ')(';
25280                my @debug_list = (
25281                    $last_nonblank_token,      $tok,
25282                    $next_tok,                 $brace_depth,
25283                    $brace_type[$brace_depth], $paren_depth,
25284                    $paren_type[$paren_depth]
25285                );
25286                print STDOUT "TOKENIZE:(@debug_list)\n";
25287            };
25288
25289            # turn off attribute list on first non-blank, non-bareword
25290            if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
25291
25292            ###############################################################
25293            # We have the next token, $tok.
25294            # Now we have to examine this token and decide what it is
25295            # and define its $type
25296            #
25297            # section 1: bare words
25298            ###############################################################
25299
25300            if ( $pre_type eq 'w' ) {
25301                $expecting = operator_expected( $prev_type, $tok, $next_type );
25302                my ( $next_nonblank_token, $i_next ) =
25303                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
25304
25305                # ATTRS: handle sub and variable attributes
25306                if ($in_attribute_list) {
25307
25308                    # treat bare word followed by open paren like qw(
25309                    if ( $next_nonblank_token eq '(' ) {
25310                        $in_quote                = $quote_items{'q'};
25311                        $allowed_quote_modifiers = $quote_modifiers{'q'};
25312                        $type                    = 'q';
25313                        $quote_type              = 'q';
25314                        next;
25315                    }
25316
25317                    # handle bareword not followed by open paren
25318                    else {
25319                        $type = 'w';
25320                        next;
25321                    }
25322                }
25323
25324                # quote a word followed by => operator
25325                if ( $next_nonblank_token eq '=' ) {
25326
25327                    if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
25328                        if ( $is_constant{$current_package}{$tok} ) {
25329                            $type = 'C';
25330                        }
25331                        elsif ( $is_user_function{$current_package}{$tok} ) {
25332                            $type = 'U';
25333                            $prototype =
25334                              $user_function_prototype{$current_package}{$tok};
25335                        }
25336                        elsif ( $tok =~ /^v\d+$/ ) {
25337                            $type = 'v';
25338                            report_v_string($tok);
25339                        }
25340                        else { $type = 'w' }
25341
25342                        next;
25343                    }
25344                }
25345
25346     # quote a bare word within braces..like xxx->{s}; note that we
25347     # must be sure this is not a structural brace, to avoid
25348     # mistaking {s} in the following for a quoted bare word:
25349     #     for(@[){s}bla}BLA}
25350     # Also treat q in something like var{-q} as a bare word, not qoute operator
25351                if (
25352                    $next_nonblank_token eq '}'
25353                    && (
25354                        $last_nonblank_type eq 'L'
25355                        || (   $last_nonblank_type eq 'm'
25356                            && $last_last_nonblank_type eq 'L' )
25357                    )
25358                  )
25359                {
25360                    $type = 'w';
25361                    next;
25362                }
25363
25364                # a bare word immediately followed by :: is not a keyword;
25365                # use $tok_kw when testing for keywords to avoid a mistake
25366                my $tok_kw = $tok;
25367                if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
25368                {
25369                    $tok_kw .= '::';
25370                }
25371
25372                # handle operator x (now we know it isn't $x=)
25373                if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
25374                    if ( $tok eq 'x' ) {
25375
25376                        if ( $$rtokens[ $i + 1 ] eq '=' ) {    # x=
25377                            $tok  = 'x=';
25378                            $type = $tok;
25379                            $i++;
25380                        }
25381                        else {
25382                            $type = 'x';
25383                        }
25384                    }
25385
25386                    # FIXME: Patch: mark something like x4 as an integer for now
25387                    # It gets fixed downstream.  This is easier than
25388                    # splitting the pretoken.
25389                    else {
25390                        $type = 'n';
25391                    }
25392                }
25393                elsif ( $tok_kw eq 'CORE::' ) {
25394                    $type = $tok = $tok_kw;
25395                    $i += 2;
25396                }
25397                elsif ( ( $tok eq 'strict' )
25398                    and ( $last_nonblank_token eq 'use' ) )
25399                {
25400                    $tokenizer_self->{_saw_use_strict} = 1;
25401                    scan_bare_identifier();
25402                }
25403
25404                elsif ( ( $tok eq 'warnings' )
25405                    and ( $last_nonblank_token eq 'use' ) )
25406                {
25407                    $tokenizer_self->{_saw_perl_dash_w} = 1;
25408
25409                    # scan as identifier, so that we pick up something like:
25410                    # use warnings::register
25411                    scan_bare_identifier();
25412                }
25413
25414                elsif (
25415                       $tok eq 'AutoLoader'
25416                    && $tokenizer_self->{_look_for_autoloader}
25417                    && (
25418                        $last_nonblank_token eq 'use'
25419
25420                        # these regexes are from AutoSplit.pm, which we want
25421                        # to mimic
25422                        || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
25423                        || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
25424                    )
25425                  )
25426                {
25427                    write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
25428                    $tokenizer_self->{_saw_autoloader}      = 1;
25429                    $tokenizer_self->{_look_for_autoloader} = 0;
25430                    scan_bare_identifier();
25431                }
25432
25433                elsif (
25434                       $tok eq 'SelfLoader'
25435                    && $tokenizer_self->{_look_for_selfloader}
25436                    && (   $last_nonblank_token eq 'use'
25437                        || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
25438                        || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
25439                  )
25440                {
25441                    write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
25442                    $tokenizer_self->{_saw_selfloader}      = 1;
25443                    $tokenizer_self->{_look_for_selfloader} = 0;
25444                    scan_bare_identifier();
25445                }
25446
25447                elsif ( ( $tok eq 'constant' )
25448                    and ( $last_nonblank_token eq 'use' ) )
25449                {
25450                    scan_bare_identifier();
25451                    my ( $next_nonblank_token, $i_next ) =
25452                      find_next_nonblank_token( $i, $rtokens,
25453                        $max_token_index );
25454
25455                    if ($next_nonblank_token) {
25456
25457                        if ( $is_keyword{$next_nonblank_token} ) {
25458
25459                            # Assume qw is used as a quote and okay, as in:
25460                            #  use constant qw{ DEBUG 0 };
25461                            # Not worth trying to parse for just a warning
25462
25463                            # NOTE: This warning is deactivated because recent
25464                            # versions of perl do not complain here, but
25465                            # the coding is retained for reference.
25466                            if ( 0 && $next_nonblank_token ne 'qw' ) {
25467                                warning(
25468"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
25469                                );
25470                            }
25471                        }
25472
25473                        # FIXME: could check for error in which next token is
25474                        # not a word (number, punctuation, ..)
25475                        else {
25476                            $is_constant{$current_package}{$next_nonblank_token}
25477                              = 1;
25478                        }
25479                    }
25480                }
25481
25482                # various quote operators
25483                elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
25484                    if ( $expecting == OPERATOR ) {
25485
25486                        # patch for paren-less for/foreach glitch, part 1
25487                        # perl will accept this construct as valid:
25488                        #
25489                        #    foreach my $key qw\Uno Due Tres Quadro\ {
25490                        #        print "Set $key\n";
25491                        #    }
25492                        unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
25493                        {
25494                            error_if_expecting_OPERATOR();
25495                        }
25496                    }
25497                    $in_quote                = $quote_items{$tok};
25498                    $allowed_quote_modifiers = $quote_modifiers{$tok};
25499
25500                   # All quote types are 'Q' except possibly qw quotes.
25501                   # qw quotes are special in that they may generally be trimmed
25502                   # of leading and trailing whitespace.  So they are given a
25503                   # separate type, 'q', unless requested otherwise.
25504                    $type =
25505                      ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
25506                      ? 'q'
25507                      : 'Q';
25508                    $quote_type = $type;
25509                }
25510
25511                # check for a statement label
25512                elsif (
25513                       ( $next_nonblank_token eq ':' )
25514                    && ( $$rtokens[ $i_next + 1 ] ne ':' )
25515                    && ( $i_next <= $max_token_index )      # colon on same line
25516                    && label_ok()
25517                  )
25518                {
25519                    if ( $tok !~ /[A-Z]/ ) {
25520                        push @{ $tokenizer_self->{_rlower_case_labels_at} },
25521                          $input_line_number;
25522                    }
25523                    $type = 'J';
25524                    $tok .= ':';
25525                    $i = $i_next;
25526                    next;
25527                }
25528
25529                #      'sub' || 'package'
25530                elsif ( $is_sub_package{$tok_kw} ) {
25531                    error_if_expecting_OPERATOR()
25532                      if ( $expecting == OPERATOR );
25533                    scan_id();
25534                }
25535
25536                # Note on token types for format, __DATA__, __END__:
25537                # It simplifies things to give these type ';', so that when we
25538                # start rescanning we will be expecting a token of type TERM.
25539                # We will switch to type 'k' before outputting the tokens.
25540                elsif ( $is_format_END_DATA{$tok_kw} ) {
25541                    $type = ';';    # make tokenizer look for TERM next
25542                    $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
25543                    last;
25544                }
25545
25546                elsif ( $is_keyword{$tok_kw} ) {
25547                    $type = 'k';
25548
25549                    # Since for and foreach may not be followed immediately
25550                    # by an opening paren, we have to remember which keyword
25551                    # is associated with the next '('
25552                    if ( $is_for_foreach{$tok} ) {
25553                        if ( new_statement_ok() ) {
25554                            $want_paren = $tok;
25555                        }
25556                    }
25557
25558                    # recognize 'use' statements, which are special
25559                    elsif ( $is_use_require{$tok} ) {
25560                        $statement_type = $tok;
25561                        error_if_expecting_OPERATOR()
25562                          if ( $expecting == OPERATOR );
25563                    }
25564
25565                    # remember my and our to check for trailing ": shared"
25566                    elsif ( $is_my_our{$tok} ) {
25567                        $statement_type = $tok;
25568                    }
25569
25570                    # Check for misplaced 'elsif' and 'else', but allow isolated
25571                    # else or elsif blocks to be formatted.  This is indicated
25572                    # by a last noblank token of ';'
25573                    elsif ( $tok eq 'elsif' ) {
25574                        if (   $last_nonblank_token ne ';'
25575                            && $last_nonblank_block_type !~
25576                            /^(if|elsif|unless)$/ )
25577                        {
25578                            warning(
25579"expecting '$tok' to follow one of 'if|elsif|unless'\n"
25580                            );
25581                        }
25582                    }
25583                    elsif ( $tok eq 'else' ) {
25584
25585                        # patched for SWITCH/CASE
25586                        if (   $last_nonblank_token ne ';'
25587                            && $last_nonblank_block_type !~
25588                            /^(if|elsif|unless|case|when)$/ )
25589                        {
25590                            warning(
25591"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
25592                            );
25593                        }
25594                    }
25595                    elsif ( $tok eq 'continue' ) {
25596                        if (   $last_nonblank_token ne ';'
25597                            && $last_nonblank_block_type !~
25598                            /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
25599                        {
25600
25601                            # note: ';' '{' and '}' in list above
25602                            # because continues can follow bare blocks;
25603                            # ':' is labeled block
25604                            #
25605                            ############################################
25606                            # NOTE: This check has been deactivated because
25607                            # continue has an alternative usage for given/when
25608                            # blocks in perl 5.10
25609                            ## warning("'$tok' should follow a block\n");
25610                            ############################################
25611                        }
25612                    }
25613
25614                    # patch for SWITCH/CASE if 'case' and 'when are
25615                    # treated as keywords.
25616                    elsif ( $tok eq 'when' || $tok eq 'case' ) {
25617                        $statement_type = $tok;    # next '{' is block
25618                    }
25619
25620                    #
25621                    # indent trailing if/unless/while/until
25622                    # outdenting will be handled by later indentation loop
25623## DEACTIVATED: unfortunately this can cause some unwanted indentation like:
25624##$opt_o = 1
25625##  if !(
25626##             $opt_b
25627##          || $opt_c
25628##          || $opt_d
25629##          || $opt_f
25630##          || $opt_i
25631##          || $opt_l
25632##          || $opt_o
25633##          || $opt_x
25634##  );
25635##                    if (   $tok =~ /^(if|unless|while|until)$/
25636##                        && $next_nonblank_token ne '(' )
25637##                    {
25638##                        $indent_flag = 1;
25639##                    }
25640                }
25641
25642                # check for inline label following
25643                #         /^(redo|last|next|goto)$/
25644                elsif (( $last_nonblank_type eq 'k' )
25645                    && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
25646                {
25647                    $type = 'j';
25648                    next;
25649                }
25650
25651                # something else --
25652                else {
25653
25654                    scan_bare_identifier();
25655                    if ( $type eq 'w' ) {
25656
25657                        if ( $expecting == OPERATOR ) {
25658
25659                            # don't complain about possible indirect object
25660                            # notation.
25661                            # For example:
25662                            #   package main;
25663                            #   sub new($) { ... }
25664                            #   $b = new A::;  # calls A::new
25665                            #   $c = new A;    # same thing but suspicious
25666                            # This will call A::new but we have a 'new' in
25667                            # main:: which looks like a constant.
25668                            #
25669                            if ( $last_nonblank_type eq 'C' ) {
25670                                if ( $tok !~ /::$/ ) {
25671                                    complain(<<EOM);
25672Expecting operator after '$last_nonblank_token' but found bare word '$tok'
25673       Maybe indirectet object notation?
25674EOM
25675                                }
25676                            }
25677                            else {
25678                                error_if_expecting_OPERATOR("bareword");
25679                            }
25680                        }
25681
25682                        # mark bare words immediately followed by a paren as
25683                        # functions
25684                        $next_tok = $$rtokens[ $i + 1 ];
25685                        if ( $next_tok eq '(' ) {
25686                            $type = 'U';
25687                        }
25688
25689                        # underscore after file test operator is file handle
25690                        if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
25691                            $type = 'Z';
25692                        }
25693
25694                        # patch for SWITCH/CASE if 'case' and 'when are
25695                        # not treated as keywords:
25696                        if (
25697                            (
25698                                   $tok eq 'case'
25699                                && $brace_type[$brace_depth] eq 'switch'
25700                            )
25701                            || (   $tok eq 'when'
25702                                && $brace_type[$brace_depth] eq 'given' )
25703                          )
25704                        {
25705                            $statement_type = $tok;    # next '{' is block
25706                            $type = 'k';    # for keyword syntax coloring
25707                        }
25708
25709                        # patch for SWITCH/CASE if switch and given not keywords
25710                        # Switch is not a perl 5 keyword, but we will gamble
25711                        # and mark switch followed by paren as a keyword.  This
25712                        # is only necessary to get html syntax coloring nice,
25713                        # and does not commit this as being a switch/case.
25714                        if ( $next_nonblank_token eq '('
25715                            && ( $tok eq 'switch' || $tok eq 'given' ) )
25716                        {
25717                            $type = 'k';    # for keyword syntax coloring
25718                        }
25719                    }
25720                }
25721            }
25722
25723            ###############################################################
25724            # section 2: strings of digits
25725            ###############################################################
25726            elsif ( $pre_type eq 'd' ) {
25727                $expecting = operator_expected( $prev_type, $tok, $next_type );
25728                error_if_expecting_OPERATOR("Number")
25729                  if ( $expecting == OPERATOR );
25730                my $number = scan_number();
25731                if ( !defined($number) ) {
25732
25733                    # shouldn't happen - we should always get a number
25734                    warning("non-number beginning with digit--program bug\n");
25735                    report_definite_bug();
25736                }
25737            }
25738
25739            ###############################################################
25740            # section 3: all other tokens
25741            ###############################################################
25742
25743            else {
25744                last if ( $tok eq '#' );
25745                my $code = $tokenization_code->{$tok};
25746                if ($code) {
25747                    $expecting =
25748                      operator_expected( $prev_type, $tok, $next_type );
25749                    $code->();
25750                    redo if $in_quote;
25751                }
25752            }
25753        }
25754
25755        # -----------------------------
25756        # end of main tokenization loop
25757        # -----------------------------
25758
25759        if ( $i_tok >= 0 ) {
25760            $routput_token_type->[$i_tok]     = $type;
25761            $routput_block_type->[$i_tok]     = $block_type;
25762            $routput_container_type->[$i_tok] = $container_type;
25763            $routput_type_sequence->[$i_tok]  = $type_sequence;
25764            $routput_indent_flag->[$i_tok]    = $indent_flag;
25765        }
25766
25767        unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
25768            $last_last_nonblank_token          = $last_nonblank_token;
25769            $last_last_nonblank_type           = $last_nonblank_type;
25770            $last_last_nonblank_block_type     = $last_nonblank_block_type;
25771            $last_last_nonblank_container_type = $last_nonblank_container_type;
25772            $last_last_nonblank_type_sequence  = $last_nonblank_type_sequence;
25773            $last_nonblank_token               = $tok;
25774            $last_nonblank_type                = $type;
25775            $last_nonblank_block_type          = $block_type;
25776            $last_nonblank_container_type      = $container_type;
25777            $last_nonblank_type_sequence       = $type_sequence;
25778            $last_nonblank_prototype           = $prototype;
25779        }
25780
25781        # reset indentation level if necessary at a sub or package
25782        # in an attempt to recover from a nesting error
25783        if ( $level_in_tokenizer < 0 ) {
25784            if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
25785                reset_indentation_level(0);
25786                brace_warning("resetting level to 0 at $1 $2\n");
25787            }
25788        }
25789
25790        # all done tokenizing this line ...
25791        # now prepare the final list of tokens and types
25792
25793        my @token_type     = ();   # stack of output token types
25794        my @block_type     = ();   # stack of output code block types
25795        my @container_type = ();   # stack of output code container types
25796        my @type_sequence  = ();   # stack of output type sequence numbers
25797        my @tokens         = ();   # output tokens
25798        my @levels         = ();   # structural brace levels of output tokens
25799        my @slevels        = ();   # secondary nesting levels of output tokens
25800        my @nesting_tokens = ();   # string of tokens leading to this depth
25801        my @nesting_types  = ();   # string of token types leading to this depth
25802        my @nesting_blocks = ();   # string of block types leading to this depth
25803        my @nesting_lists  = ();   # string of list types leading to this depth
25804        my @ci_string = ();  # string needed to compute continuation indentation
25805        my @container_environment = ();    # BLOCK or LIST
25806        my $container_environment = '';
25807        my $im                    = -1;    # previous $i value
25808        my $num;
25809        my $ci_string_sum = ones_count($ci_string_in_tokenizer);
25810
25811# Computing Token Indentation
25812#
25813#     The final section of the tokenizer forms tokens and also computes
25814#     parameters needed to find indentation.  It is much easier to do it
25815#     in the tokenizer than elsewhere.  Here is a brief description of how
25816#     indentation is computed.  Perl::Tidy computes indentation as the sum
25817#     of 2 terms:
25818#
25819#     (1) structural indentation, such as if/else/elsif blocks
25820#     (2) continuation indentation, such as long parameter call lists.
25821#
25822#     These are occasionally called primary and secondary indentation.
25823#
25824#     Structural indentation is introduced by tokens of type '{', although
25825#     the actual tokens might be '{', '(', or '['.  Structural indentation
25826#     is of two types: BLOCK and non-BLOCK.  Default structural indentation
25827#     is 4 characters if the standard indentation scheme is used.
25828#
25829#     Continuation indentation is introduced whenever a line at BLOCK level
25830#     is broken before its termination.  Default continuation indentation
25831#     is 2 characters in the standard indentation scheme.
25832#
25833#     Both types of indentation may be nested arbitrarily deep and
25834#     interlaced.  The distinction between the two is somewhat arbitrary.
25835#
25836#     For each token, we will define two variables which would apply if
25837#     the current statement were broken just before that token, so that
25838#     that token started a new line:
25839#
25840#     $level = the structural indentation level,
25841#     $ci_level = the continuation indentation level
25842#
25843#     The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
25844#     assuming defaults.  However, in some special cases it is customary
25845#     to modify $ci_level from this strict value.
25846#
25847#     The total structural indentation is easy to compute by adding and
25848#     subtracting 1 from a saved value as types '{' and '}' are seen.  The
25849#     running value of this variable is $level_in_tokenizer.
25850#
25851#     The total continuation is much more difficult to compute, and requires
25852#     several variables.  These veriables are:
25853#
25854#     $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
25855#       each indentation level, if there are intervening open secondary
25856#       structures just prior to that level.
25857#     $continuation_string_in_tokenizer = a string of 1's and 0's indicating
25858#       if the last token at that level is "continued", meaning that it
25859#       is not the first token of an expression.
25860#     $nesting_block_string = a string of 1's and 0's indicating, for each
25861#       indentation level, if the level is of type BLOCK or not.
25862#     $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
25863#     $nesting_list_string = a string of 1's and 0's indicating, for each
25864#       indentation level, if it is is appropriate for list formatting.
25865#       If so, continuation indentation is used to indent long list items.
25866#     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
25867#     @{$rslevel_stack} = a stack of total nesting depths at each
25868#       structural indentation level, where "total nesting depth" means
25869#       the nesting depth that would occur if every nesting token -- '{', '[',
25870#       and '(' -- , regardless of context, is used to compute a nesting
25871#       depth.
25872
25873        #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
25874        #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
25875
25876        my ( $ci_string_i, $level_i, $nesting_block_string_i,
25877            $nesting_list_string_i, $nesting_token_string_i,
25878            $nesting_type_string_i, );
25879
25880        foreach $i ( @{$routput_token_list} )
25881        {    # scan the list of pre-tokens indexes
25882
25883            # self-checking for valid token types
25884            my $type                    = $routput_token_type->[$i];
25885            my $forced_indentation_flag = $routput_indent_flag->[$i];
25886
25887            # See if we should undo the $forced_indentation_flag.
25888            # Forced indentation after 'if', 'unless', 'while' and 'until'
25889            # expressions without trailing parens is optional and doesn't
25890            # always look good.  It is usually okay for a trailing logical
25891            # expression, but if the expression is a function call, code block,
25892            # or some kind of list it puts in an unwanted extra indentation
25893            # level which is hard to remove.
25894            #
25895            # Example where extra indentation looks ok:
25896            # return 1
25897            #   if $det_a < 0 and $det_b > 0
25898            #       or $det_a > 0 and $det_b < 0;
25899            #
25900            # Example where extra indentation is not needed because
25901            # the eval brace also provides indentation:
25902            # print "not " if defined eval {
25903            #     reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
25904            # };
25905            #
25906            # The following rule works fairly well:
25907            #   Undo the flag if the end of this line, or start of the next
25908            #   line, is an opening container token or a comma.
25909            # This almost always works, but if not after another pass it will
25910            # be stable.
25911            if ( $forced_indentation_flag && $type eq 'k' ) {
25912                my $ixlast  = -1;
25913                my $ilast   = $routput_token_list->[$ixlast];
25914                my $toklast = $routput_token_type->[$ilast];
25915                if ( $toklast eq '#' ) {
25916                    $ixlast--;
25917                    $ilast   = $routput_token_list->[$ixlast];
25918                    $toklast = $routput_token_type->[$ilast];
25919                }
25920                if ( $toklast eq 'b' ) {
25921                    $ixlast--;
25922                    $ilast   = $routput_token_list->[$ixlast];
25923                    $toklast = $routput_token_type->[$ilast];
25924                }
25925                if ( $toklast =~ /^[\{,]$/ ) {
25926                    $forced_indentation_flag = 0;
25927                }
25928                else {
25929                    ( $toklast, my $i_next ) =
25930                      find_next_nonblank_token( $max_token_index, $rtokens,
25931                        $max_token_index );
25932                    if ( $toklast =~ /^[\{,]$/ ) {
25933                        $forced_indentation_flag = 0;
25934                    }
25935                }
25936            }
25937
25938            # if we are already in an indented if, see if we should outdent
25939            if ($indented_if_level) {
25940
25941                # don't try to nest trailing if's - shouldn't happen
25942                if ( $type eq 'k' ) {
25943                    $forced_indentation_flag = 0;
25944                }
25945
25946                # check for the normal case - outdenting at next ';'
25947                elsif ( $type eq ';' ) {
25948                    if ( $level_in_tokenizer == $indented_if_level ) {
25949                        $forced_indentation_flag = -1;
25950                        $indented_if_level       = 0;
25951                    }
25952                }
25953
25954                # handle case of missing semicolon
25955                elsif ( $type eq '}' ) {
25956                    if ( $level_in_tokenizer == $indented_if_level ) {
25957                        $indented_if_level = 0;
25958
25959                        # TBD: This could be a subroutine call
25960                        $level_in_tokenizer--;
25961                        if ( @{$rslevel_stack} > 1 ) {
25962                            pop( @{$rslevel_stack} );
25963                        }
25964                        if ( length($nesting_block_string) > 1 )
25965                        {    # true for valid script
25966                            chop $nesting_block_string;
25967                            chop $nesting_list_string;
25968                        }
25969
25970                    }
25971                }
25972            }
25973
25974            my $tok = $$rtokens[$i];   # the token, but ONLY if same as pretoken
25975            $level_i = $level_in_tokenizer;
25976
25977            # This can happen by running perltidy on non-scripts
25978            # although it could also be bug introduced by programming change.
25979            # Perl silently accepts a 032 (^Z) and takes it as the end
25980            if ( !$is_valid_token_type{$type} ) {
25981                my $val = ord($type);
25982                warning(
25983                    "unexpected character decimal $val ($type) in script\n");
25984                $tokenizer_self->{_in_error} = 1;
25985            }
25986
25987            # ----------------------------------------------------------------
25988            # TOKEN TYPE PATCHES
25989            #  output __END__, __DATA__, and format as type 'k' instead of ';'
25990            # to make html colors correct, etc.
25991            my $fix_type = $type;
25992            if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
25993
25994            # output anonymous 'sub' as keyword
25995            if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
25996
25997            # -----------------------------------------------------------------
25998
25999            $nesting_token_string_i = $nesting_token_string;
26000            $nesting_type_string_i  = $nesting_type_string;
26001            $nesting_block_string_i = $nesting_block_string;
26002            $nesting_list_string_i  = $nesting_list_string;
26003
26004            # set primary indentation levels based on structural braces
26005            # Note: these are set so that the leading braces have a HIGHER
26006            # level than their CONTENTS, which is convenient for indentation
26007            # Also, define continuation indentation for each token.
26008            if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
26009            {
26010
26011                # use environment before updating
26012                $container_environment =
26013                    $nesting_block_flag ? 'BLOCK'
26014                  : $nesting_list_flag  ? 'LIST'
26015                  :                       "";
26016
26017                # if the difference between total nesting levels is not 1,
26018                # there are intervening non-structural nesting types between
26019                # this '{' and the previous unclosed '{'
26020                my $intervening_secondary_structure = 0;
26021                if ( @{$rslevel_stack} ) {
26022                    $intervening_secondary_structure =
26023                      $slevel_in_tokenizer - $rslevel_stack->[-1];
26024                }
26025
26026     # Continuation Indentation
26027     #
26028     # Having tried setting continuation indentation both in the formatter and
26029     # in the tokenizer, I can say that setting it in the tokenizer is much,
26030     # much easier.  The formatter already has too much to do, and can't
26031     # make decisions on line breaks without knowing what 'ci' will be at
26032     # arbitrary locations.
26033     #
26034     # But a problem with setting the continuation indentation (ci) here
26035     # in the tokenizer is that we do not know where line breaks will actually
26036     # be.  As a result, we don't know if we should propagate continuation
26037     # indentation to higher levels of structure.
26038     #
26039     # For nesting of only structural indentation, we never need to do this.
26040     # For example, in a long if statement, like this
26041     #
26042     #   if ( !$output_block_type[$i]
26043     #     && ($in_statement_continuation) )
26044     #   {           <--outdented
26045     #       do_something();
26046     #   }
26047     #
26048     # the second line has ci but we do normally give the lines within the BLOCK
26049     # any ci.  This would be true if we had blocks nested arbitrarily deeply.
26050     #
26051     # But consider something like this, where we have created a break after
26052     # an opening paren on line 1, and the paren is not (currently) a
26053     # structural indentation token:
26054     #
26055     # my $file = $menubar->Menubutton(
26056     #   qw/-text File -underline 0 -menuitems/ => [
26057     #       [
26058     #           Cascade    => '~View',
26059     #           -menuitems => [
26060     #           ...
26061     #
26062     # The second line has ci, so it would seem reasonable to propagate it
26063     # down, giving the third line 1 ci + 1 indentation.  This suggests the
26064     # following rule, which is currently used to propagating ci down: if there
26065     # are any non-structural opening parens (or brackets, or braces), before
26066     # an opening structural brace, then ci is propagated down, and otherwise
26067     # not.  The variable $intervening_secondary_structure contains this
26068     # information for the current token, and the string
26069     # "$ci_string_in_tokenizer" is a stack of previous values of this
26070     # variable.
26071
26072                # save the current states
26073                push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
26074                $level_in_tokenizer++;
26075
26076                if ($forced_indentation_flag) {
26077
26078                    # break BEFORE '?' when there is forced indentation
26079                    if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
26080                    if ( $type eq 'k' ) {
26081                        $indented_if_level = $level_in_tokenizer;
26082                    }
26083
26084                    # do not change container environement here if we are not
26085                    # at a real list. Adding this check prevents "blinkers"
26086                    # often near 'unless" clauses, such as in the following
26087                    # code:
26088##          next
26089##            unless -e (
26090##                    $archive =
26091##                      File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" )
26092##            );
26093
26094                    $nesting_block_string .= "$nesting_block_flag";
26095                }
26096                else {
26097
26098                    if ( $routput_block_type->[$i] ) {
26099                        $nesting_block_flag = 1;
26100                        $nesting_block_string .= '1';
26101                    }
26102                    else {
26103                        $nesting_block_flag = 0;
26104                        $nesting_block_string .= '0';
26105                    }
26106                }
26107
26108                # we will use continuation indentation within containers
26109                # which are not blocks and not logical expressions
26110                my $bit = 0;
26111                if ( !$routput_block_type->[$i] ) {
26112
26113                    # propagate flag down at nested open parens
26114                    if ( $routput_container_type->[$i] eq '(' ) {
26115                        $bit = 1 if $nesting_list_flag;
26116                    }
26117
26118                  # use list continuation if not a logical grouping
26119                  # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
26120                    else {
26121                        $bit = 1
26122                          unless
26123                          $is_logical_container{ $routput_container_type->[$i]
26124                          };
26125                    }
26126                }
26127                $nesting_list_string .= $bit;
26128                $nesting_list_flag = $bit;
26129
26130                $ci_string_in_tokenizer .=
26131                  ( $intervening_secondary_structure != 0 ) ? '1' : '0';
26132                $ci_string_sum = ones_count($ci_string_in_tokenizer);
26133                $continuation_string_in_tokenizer .=
26134                  ( $in_statement_continuation > 0 ) ? '1' : '0';
26135
26136   #  Sometimes we want to give an opening brace continuation indentation,
26137   #  and sometimes not.  For code blocks, we don't do it, so that the leading
26138   #  '{' gets outdented, like this:
26139   #
26140   #   if ( !$output_block_type[$i]
26141   #     && ($in_statement_continuation) )
26142   #   {           <--outdented
26143   #
26144   #  For other types, we will give them continuation indentation.  For example,
26145   #  here is how a list looks with the opening paren indented:
26146   #
26147   #     @LoL =
26148   #       ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
26149   #         [ "homer", "marge", "bart" ], );
26150   #
26151   #  This looks best when 'ci' is one-half of the indentation  (i.e., 2 and 4)
26152
26153                my $total_ci = $ci_string_sum;
26154                if (
26155                    !$routput_block_type->[$i]    # patch: skip for BLOCK
26156                    && ($in_statement_continuation)
26157                    && !( $forced_indentation_flag && $type eq ':' )
26158                  )
26159                {
26160                    $total_ci += $in_statement_continuation
26161                      unless ( $ci_string_in_tokenizer =~ /1$/ );
26162                }
26163
26164                $ci_string_i               = $total_ci;
26165                $in_statement_continuation = 0;
26166            }
26167
26168            elsif ($type eq '}'
26169                || $type eq 'R'
26170                || $forced_indentation_flag < 0 )
26171            {
26172
26173                # only a nesting error in the script would prevent popping here
26174                if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
26175
26176                $level_i = --$level_in_tokenizer;
26177
26178                # restore previous level values
26179                if ( length($nesting_block_string) > 1 )
26180                {    # true for valid script
26181                    chop $nesting_block_string;
26182                    $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
26183                    chop $nesting_list_string;
26184                    $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
26185
26186                    chop $ci_string_in_tokenizer;
26187                    $ci_string_sum = ones_count($ci_string_in_tokenizer);
26188
26189                    $in_statement_continuation =
26190                      chop $continuation_string_in_tokenizer;
26191
26192                    # zero continuation flag at terminal BLOCK '}' which
26193                    # ends a statement.
26194                    if ( $routput_block_type->[$i] ) {
26195
26196                        # ...These include non-anonymous subs
26197                        # note: could be sub ::abc { or sub 'abc
26198                        if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
26199
26200                         # note: older versions of perl require the /gc modifier
26201                         # here or else the \G does not work.
26202                            if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
26203                            {
26204                                $in_statement_continuation = 0;
26205                            }
26206                        }
26207
26208# ...and include all block types except user subs with
26209# block prototypes and these: (sort|grep|map|do|eval)
26210# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
26211                        elsif (
26212                            $is_zero_continuation_block_type{
26213                                $routput_block_type->[$i]
26214                            } )
26215                        {
26216                            $in_statement_continuation = 0;
26217                        }
26218
26219                        # ..but these are not terminal types:
26220                        #     /^(sort|grep|map|do|eval)$/ )
26221                        elsif (
26222                            $is_not_zero_continuation_block_type{
26223                                $routput_block_type->[$i]
26224                            } )
26225                        {
26226                        }
26227
26228                        # ..and a block introduced by a label
26229                        # /^\w+\s*:$/gc ) {
26230                        elsif ( $routput_block_type->[$i] =~ /:$/ ) {
26231                            $in_statement_continuation = 0;
26232                        }
26233
26234                        # user function with block prototype
26235                        else {
26236                            $in_statement_continuation = 0;
26237                        }
26238                    }
26239
26240                    # If we are in a list, then
26241                    # we must set continuatoin indentation at the closing
26242                    # paren of something like this (paren after $check):
26243                    #     assert(
26244                    #         __LINE__,
26245                    #         ( not defined $check )
26246                    #           or ref $check
26247                    #           or $check eq "new"
26248                    #           or $check eq "old",
26249                    #     );
26250                    elsif ( $tok eq ')' ) {
26251                        $in_statement_continuation = 1
26252                          if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
26253                    }
26254
26255                    elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
26256                }
26257
26258                # use environment after updating
26259                $container_environment =
26260                    $nesting_block_flag ? 'BLOCK'
26261                  : $nesting_list_flag  ? 'LIST'
26262                  :                       "";
26263                $ci_string_i = $ci_string_sum + $in_statement_continuation;
26264                $nesting_block_string_i = $nesting_block_string;
26265                $nesting_list_string_i  = $nesting_list_string;
26266            }
26267
26268            # not a structural indentation type..
26269            else {
26270
26271                $container_environment =
26272                    $nesting_block_flag ? 'BLOCK'
26273                  : $nesting_list_flag  ? 'LIST'
26274                  :                       "";
26275
26276                # zero the continuation indentation at certain tokens so
26277                # that they will be at the same level as its container.  For
26278                # commas, this simplifies the -lp indentation logic, which
26279                # counts commas.  For ?: it makes them stand out.
26280                if ($nesting_list_flag) {
26281                    if ( $type =~ /^[,\?\:]$/ ) {
26282                        $in_statement_continuation = 0;
26283                    }
26284                }
26285
26286                # be sure binary operators get continuation indentation
26287                if (
26288                    $container_environment
26289                    && (   $type eq 'k' && $is_binary_keyword{$tok}
26290                        || $is_binary_type{$type} )
26291                  )
26292                {
26293                    $in_statement_continuation = 1;
26294                }
26295
26296                # continuation indentation is sum of any open ci from previous
26297                # levels plus the current level
26298                $ci_string_i = $ci_string_sum + $in_statement_continuation;
26299
26300                # update continuation flag ...
26301                # if this isn't a blank or comment..
26302                if ( $type ne 'b' && $type ne '#' ) {
26303
26304                    # and we are in a BLOCK
26305                    if ($nesting_block_flag) {
26306
26307                        # the next token after a ';' and label starts a new stmt
26308                        if ( $type eq ';' || $type eq 'J' ) {
26309                            $in_statement_continuation = 0;
26310                        }
26311
26312                        # otherwise, we are continuing the current statement
26313                        else {
26314                            $in_statement_continuation = 1;
26315                        }
26316                    }
26317
26318                    # if we are not in a BLOCK..
26319                    else {
26320
26321                        # do not use continuation indentation if not list
26322                        # environment (could be within if/elsif clause)
26323                        if ( !$nesting_list_flag ) {
26324                            $in_statement_continuation = 0;
26325                        }
26326
26327                       # otherwise, the next token after a ',' starts a new term
26328                        elsif ( $type eq ',' ) {
26329                            $in_statement_continuation = 0;
26330                        }
26331
26332                        # otherwise, we are continuing the current term
26333                        else {
26334                            $in_statement_continuation = 1;
26335                        }
26336                    }
26337                }
26338            }
26339
26340            if ( $level_in_tokenizer < 0 ) {
26341                unless ( $tokenizer_self->{_saw_negative_indentation} ) {
26342                    $tokenizer_self->{_saw_negative_indentation} = 1;
26343                    warning("Starting negative indentation\n");
26344                }
26345            }
26346
26347            # set secondary nesting levels based on all continment token types
26348            # Note: these are set so that the nesting depth is the depth
26349            # of the PREVIOUS TOKEN, which is convenient for setting
26350            # the stength of token bonds
26351            my $slevel_i = $slevel_in_tokenizer;
26352
26353            #    /^[L\{\(\[]$/
26354            if ( $is_opening_type{$type} ) {
26355                $slevel_in_tokenizer++;
26356                $nesting_token_string .= $tok;
26357                $nesting_type_string  .= $type;
26358            }
26359
26360            #       /^[R\}\)\]]$/
26361            elsif ( $is_closing_type{$type} ) {
26362                $slevel_in_tokenizer--;
26363                my $char = chop $nesting_token_string;
26364
26365                if ( $char ne $matching_start_token{$tok} ) {
26366                    $nesting_token_string .= $char . $tok;
26367                    $nesting_type_string  .= $type;
26368                }
26369                else {
26370                    chop $nesting_type_string;
26371                }
26372            }
26373
26374            push( @block_type,            $routput_block_type->[$i] );
26375            push( @ci_string,             $ci_string_i );
26376            push( @container_environment, $container_environment );
26377            push( @container_type,        $routput_container_type->[$i] );
26378            push( @levels,                $level_i );
26379            push( @nesting_tokens,        $nesting_token_string_i );
26380            push( @nesting_types,         $nesting_type_string_i );
26381            push( @slevels,               $slevel_i );
26382            push( @token_type,            $fix_type );
26383            push( @type_sequence,         $routput_type_sequence->[$i] );
26384            push( @nesting_blocks,        $nesting_block_string );
26385            push( @nesting_lists,         $nesting_list_string );
26386
26387            # now form the previous token
26388            if ( $im >= 0 ) {
26389                $num =
26390                  $$rtoken_map[$i] - $$rtoken_map[$im];    # how many characters
26391
26392                if ( $num > 0 ) {
26393                    push( @tokens,
26394                        substr( $input_line, $$rtoken_map[$im], $num ) );
26395                }
26396            }
26397            $im = $i;
26398        }
26399
26400        $num = length($input_line) - $$rtoken_map[$im];    # make the last token
26401        if ( $num > 0 ) {
26402            push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
26403        }
26404
26405        $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
26406        $tokenizer_self->{_in_quote}          = $in_quote;
26407        $tokenizer_self->{_quote_target} =
26408          $in_quote ? matching_end_token($quote_character) : "";
26409        $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
26410
26411        $line_of_tokens->{_rtoken_type}            = \@token_type;
26412        $line_of_tokens->{_rtokens}                = \@tokens;
26413        $line_of_tokens->{_rblock_type}            = \@block_type;
26414        $line_of_tokens->{_rcontainer_type}        = \@container_type;
26415        $line_of_tokens->{_rcontainer_environment} = \@container_environment;
26416        $line_of_tokens->{_rtype_sequence}         = \@type_sequence;
26417        $line_of_tokens->{_rlevels}                = \@levels;
26418        $line_of_tokens->{_rslevels}               = \@slevels;
26419        $line_of_tokens->{_rnesting_tokens}        = \@nesting_tokens;
26420        $line_of_tokens->{_rci_levels}             = \@ci_string;
26421        $line_of_tokens->{_rnesting_blocks}        = \@nesting_blocks;
26422
26423        return;
26424    }
26425}    # end tokenize_this_line
26426
26427#########i#############################################################
26428# Tokenizer routines which assist in identifying token types
26429#######################################################################
26430
26431sub operator_expected {
26432
26433    # Many perl symbols have two or more meanings.  For example, '<<'
26434    # can be a shift operator or a here-doc operator.  The
26435    # interpretation of these symbols depends on the current state of
26436    # the tokenizer, which may either be expecting a term or an
26437    # operator.  For this example, a << would be a shift if an operator
26438    # is expected, and a here-doc if a term is expected.  This routine
26439    # is called to make this decision for any current token.  It returns
26440    # one of three possible values:
26441    #
26442    #     OPERATOR - operator expected (or at least, not a term)
26443    #     UNKNOWN  - can't tell
26444    #     TERM     - a term is expected (or at least, not an operator)
26445    #
26446    # The decision is based on what has been seen so far.  This
26447    # information is stored in the "$last_nonblank_type" and
26448    # "$last_nonblank_token" variables.  For example, if the
26449    # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
26450    # if $last_nonblank_type is 'n' (numeric), we are expecting an
26451    # OPERATOR.
26452    #
26453    # If a UNKNOWN is returned, the calling routine must guess. A major
26454    # goal of this tokenizer is to minimize the possibility of returning
26455    # UNKNOWN, because a wrong guess can spoil the formatting of a
26456    # script.
26457    #
26458    # adding NEW_TOKENS: it is critically important that this routine be
26459    # updated to allow it to determine if an operator or term is to be
26460    # expected after the new token.  Doing this simply involves adding
26461    # the new token character to one of the regexes in this routine or
26462    # to one of the hash lists
26463    # that it uses, which are initialized in the BEGIN section.
26464    # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
26465    # $statement_type
26466
26467    my ( $prev_type, $tok, $next_type ) = @_;
26468
26469    my $op_expected = UNKNOWN;
26470
26471##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
26472
26473# Note: function prototype is available for token type 'U' for future
26474# program development.  It contains the leading and trailing parens,
26475# and no blanks.  It might be used to eliminate token type 'C', for
26476# example (prototype = '()'). Thus:
26477# if ($last_nonblank_type eq 'U') {
26478#     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
26479# }
26480
26481    # A possible filehandle (or object) requires some care...
26482    if ( $last_nonblank_type eq 'Z' ) {
26483
26484        # angle.t
26485        if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
26486            $op_expected = UNKNOWN;
26487        }
26488
26489        # For possible file handle like "$a", Perl uses weird parsing rules.
26490        # For example:
26491        # print $a/2,"/hi";   - division
26492        # print $a / 2,"/hi"; - division
26493        # print $a/ 2,"/hi";  - division
26494        # print $a /2,"/hi";  - pattern (and error)!
26495        elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
26496            $op_expected = TERM;
26497        }
26498
26499        # Note when an operation is being done where a
26500        # filehandle might be expected, since a change in whitespace
26501        # could change the interpretation of the statement.
26502        else {
26503            if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
26504                complain("operator in print statement not recommended\n");
26505                $op_expected = OPERATOR;
26506            }
26507        }
26508    }
26509
26510    # Check for smartmatch operator before preceding brace or square bracket.
26511    # For example, at the ? after the ] in the following expressions we are
26512    # expecting an operator:
26513    #
26514    # qr/3/ ~~ ['1234'] ? 1 : 0;
26515    # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
26516    elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) {
26517        $op_expected = OPERATOR;
26518    }
26519
26520    # handle something after 'do' and 'eval'
26521    elsif ( $is_block_operator{$last_nonblank_token} ) {
26522
26523        # something like $a = eval "expression";
26524        #                          ^
26525        if ( $last_nonblank_type eq 'k' ) {
26526            $op_expected = TERM;    # expression or list mode following keyword
26527        }
26528
26529        # something like $a = do { BLOCK } / 2;
26530        # or this ? after a smartmatch anonynomous hash or array reference:
26531        #   qr/3/ ~~ ['1234'] ? 1 : 0;
26532        #                                  ^
26533        else {
26534            $op_expected = OPERATOR;    # block mode following }
26535        }
26536    }
26537
26538    # handle bare word..
26539    elsif ( $last_nonblank_type eq 'w' ) {
26540
26541        # unfortunately, we can't tell what type of token to expect next
26542        # after most bare words
26543        $op_expected = UNKNOWN;
26544    }
26545
26546    # operator, but not term possible after these types
26547    # Note: moved ')' from type to token because parens in list context
26548    # get marked as '{' '}' now.  This is a minor glitch in the following:
26549    #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
26550    #
26551    elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
26552        || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
26553    {
26554        $op_expected = OPERATOR;
26555
26556        # in a 'use' statement, numbers and v-strings are not true
26557        # numbers, so to avoid incorrect error messages, we will
26558        # mark them as unknown for now (use.t)
26559        # TODO: it would be much nicer to create a new token V for VERSION
26560        # number in a use statement.  Then this could be a check on type V
26561        # and related patches which change $statement_type for '=>'
26562        # and ',' could be removed.  Further, it would clean things up to
26563        # scan the 'use' statement with a separate subroutine.
26564        if (   ( $statement_type eq 'use' )
26565            && ( $last_nonblank_type =~ /^[nv]$/ ) )
26566        {
26567            $op_expected = UNKNOWN;
26568        }
26569
26570        # expecting VERSION or {} after package NAMESPACE
26571        elsif ($statement_type =~ /^package\b/
26572            && $last_nonblank_token =~ /^package\b/ )
26573        {
26574            $op_expected = TERM;
26575        }
26576    }
26577
26578    # no operator after many keywords, such as "die", "warn", etc
26579    elsif ( $expecting_term_token{$last_nonblank_token} ) {
26580
26581        # patch for dor.t (defined or).
26582        # perl functions which may be unary operators
26583        # TODO: This list is incomplete, and these should be put
26584        # into a hash.
26585        if (   $tok eq '/'
26586            && $next_type eq '/'
26587            && $last_nonblank_type eq 'k'
26588            && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
26589        {
26590            $op_expected = OPERATOR;
26591        }
26592        else {
26593            $op_expected = TERM;
26594        }
26595    }
26596
26597    # no operator after things like + - **  (i.e., other operators)
26598    elsif ( $expecting_term_types{$last_nonblank_type} ) {
26599        $op_expected = TERM;
26600    }
26601
26602    # a few operators, like "time", have an empty prototype () and so
26603    # take no parameters but produce a value to operate on
26604    elsif ( $expecting_operator_token{$last_nonblank_token} ) {
26605        $op_expected = OPERATOR;
26606    }
26607
26608    # post-increment and decrement produce values to be operated on
26609    elsif ( $expecting_operator_types{$last_nonblank_type} ) {
26610        $op_expected = OPERATOR;
26611    }
26612
26613    # no value to operate on after sub block
26614    elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
26615
26616    # a right brace here indicates the end of a simple block.
26617    # all non-structural right braces have type 'R'
26618    # all braces associated with block operator keywords have been given those
26619    # keywords as "last_nonblank_token" and caught above.
26620    # (This statement is order dependent, and must come after checking
26621    # $last_nonblank_token).
26622    elsif ( $last_nonblank_type eq '}' ) {
26623
26624        # patch for dor.t (defined or).
26625        if (   $tok eq '/'
26626            && $next_type eq '/'
26627            && $last_nonblank_token eq ']' )
26628        {
26629            $op_expected = OPERATOR;
26630        }
26631        else {
26632            $op_expected = TERM;
26633        }
26634    }
26635
26636    # something else..what did I forget?
26637    else {
26638
26639        # collecting diagnostics on unknown operator types..see what was missed
26640        $op_expected = UNKNOWN;
26641        write_diagnostics(
26642"OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
26643        );
26644    }
26645
26646    TOKENIZER_DEBUG_FLAG_EXPECT && do {
26647        print STDOUT
26648"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
26649    };
26650    return $op_expected;
26651}
26652
26653sub new_statement_ok {
26654
26655    # return true if the current token can start a new statement
26656    # USES GLOBAL VARIABLES: $last_nonblank_type
26657
26658    return label_ok()    # a label would be ok here
26659
26660      || $last_nonblank_type eq 'J';    # or we follow a label
26661
26662}
26663
26664sub label_ok {
26665
26666    # Decide if a bare word followed by a colon here is a label
26667    # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
26668    # $brace_depth, @brace_type
26669
26670    # if it follows an opening or closing code block curly brace..
26671    if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
26672        && $last_nonblank_type eq $last_nonblank_token )
26673    {
26674
26675        # it is a label if and only if the curly encloses a code block
26676        return $brace_type[$brace_depth];
26677    }
26678
26679    # otherwise, it is a label if and only if it follows a ';' (real or fake)
26680    # or another label
26681    else {
26682        return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' );
26683    }
26684}
26685
26686sub code_block_type {
26687
26688    # Decide if this is a block of code, and its type.
26689    # Must be called only when $type = $token = '{'
26690    # The problem is to distinguish between the start of a block of code
26691    # and the start of an anonymous hash reference
26692    # Returns "" if not code block, otherwise returns 'last_nonblank_token'
26693    # to indicate the type of code block.  (For example, 'last_nonblank_token'
26694    # might be 'if' for an if block, 'else' for an else block, etc).
26695    # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
26696    # $last_nonblank_block_type, $brace_depth, @brace_type
26697
26698    # handle case of multiple '{'s
26699
26700# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
26701
26702    my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
26703    if (   $last_nonblank_token eq '{'
26704        && $last_nonblank_type eq $last_nonblank_token )
26705    {
26706
26707        # opening brace where a statement may appear is probably
26708        # a code block but might be and anonymous hash reference
26709        if ( $brace_type[$brace_depth] ) {
26710            return decide_if_code_block( $i, $rtokens, $rtoken_type,
26711                $max_token_index );
26712        }
26713
26714        # cannot start a code block within an anonymous hash
26715        else {
26716            return "";
26717        }
26718    }
26719
26720    elsif ( $last_nonblank_token eq ';' ) {
26721
26722        # an opening brace where a statement may appear is probably
26723        # a code block but might be and anonymous hash reference
26724        return decide_if_code_block( $i, $rtokens, $rtoken_type,
26725            $max_token_index );
26726    }
26727
26728    # handle case of '}{'
26729    elsif ($last_nonblank_token eq '}'
26730        && $last_nonblank_type eq $last_nonblank_token )
26731    {
26732
26733        # a } { situation ...
26734        # could be hash reference after code block..(blktype1.t)
26735        if ($last_nonblank_block_type) {
26736            return decide_if_code_block( $i, $rtokens, $rtoken_type,
26737                $max_token_index );
26738        }
26739
26740        # must be a block if it follows a closing hash reference
26741        else {
26742            return $last_nonblank_token;
26743        }
26744    }
26745
26746    # NOTE: braces after type characters start code blocks, but for
26747    # simplicity these are not identified as such.  See also
26748    # sub is_non_structural_brace.
26749    # elsif ( $last_nonblank_type eq 't' ) {
26750    #    return $last_nonblank_token;
26751    # }
26752
26753    # brace after label:
26754    elsif ( $last_nonblank_type eq 'J' ) {
26755        return $last_nonblank_token;
26756    }
26757
26758# otherwise, look at previous token.  This must be a code block if
26759# it follows any of these:
26760# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
26761    elsif ( $is_code_block_token{$last_nonblank_token} ) {
26762
26763        # Bug Patch: Note that the opening brace after the 'if' in the following
26764        # snippet is an anonymous hash ref and not a code block!
26765        #   print 'hi' if { x => 1, }->{x};
26766        # We can identify this situation because the last nonblank type
26767        # will be a keyword (instead of a closing peren)
26768        if (   $last_nonblank_token =~ /^(if|unless)$/
26769            && $last_nonblank_type eq 'k' )
26770        {
26771            return "";
26772        }
26773        else {
26774            return $last_nonblank_token;
26775        }
26776    }
26777
26778    # or a sub or package BLOCK
26779    elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
26780        && $last_nonblank_token =~ /^(sub|package)\b/ )
26781    {
26782        return $last_nonblank_token;
26783    }
26784
26785    elsif ( $statement_type =~ /^(sub|package)\b/ ) {
26786        return $statement_type;
26787    }
26788
26789    # user-defined subs with block parameters (like grep/map/eval)
26790    elsif ( $last_nonblank_type eq 'G' ) {
26791        return $last_nonblank_token;
26792    }
26793
26794    # check bareword
26795    elsif ( $last_nonblank_type eq 'w' ) {
26796        return decide_if_code_block( $i, $rtokens, $rtoken_type,
26797            $max_token_index );
26798    }
26799
26800    # anything else must be anonymous hash reference
26801    else {
26802        return "";
26803    }
26804}
26805
26806sub decide_if_code_block {
26807
26808    # USES GLOBAL VARIABLES: $last_nonblank_token
26809    my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
26810    my ( $next_nonblank_token, $i_next ) =
26811      find_next_nonblank_token( $i, $rtokens, $max_token_index );
26812
26813    # we are at a '{' where a statement may appear.
26814    # We must decide if this brace starts an anonymous hash or a code
26815    # block.
26816    # return "" if anonymous hash, and $last_nonblank_token otherwise
26817
26818    # initialize to be code BLOCK
26819    my $code_block_type = $last_nonblank_token;
26820
26821    # Check for the common case of an empty anonymous hash reference:
26822    # Maybe something like sub { { } }
26823    if ( $next_nonblank_token eq '}' ) {
26824        $code_block_type = "";
26825    }
26826
26827    else {
26828
26829        # To guess if this '{' is an anonymous hash reference, look ahead
26830        # and test as follows:
26831        #
26832        # it is a hash reference if next come:
26833        #   - a string or digit followed by a comma or =>
26834        #   - bareword followed by =>
26835        # otherwise it is a code block
26836        #
26837        # Examples of anonymous hash ref:
26838        # {'aa',};
26839        # {1,2}
26840        #
26841        # Examples of code blocks:
26842        # {1; print "hello\n", 1;}
26843        # {$a,1};
26844
26845        # We are only going to look ahead one more (nonblank/comment) line.
26846        # Strange formatting could cause a bad guess, but that's unlikely.
26847        my @pre_types  = @$rtoken_type[ $i + 1 .. $max_token_index ];
26848        my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
26849        my ( $rpre_tokens, $rpre_types ) =
26850          peek_ahead_for_n_nonblank_pre_tokens(20);    # 20 is arbitrary but
26851                                                       # generous, and prevents
26852                                                       # wasting lots of
26853                                                       # time in mangled files
26854        if ( defined($rpre_types) && @$rpre_types ) {
26855            push @pre_types,  @$rpre_types;
26856            push @pre_tokens, @$rpre_tokens;
26857        }
26858
26859        # put a sentinal token to simplify stopping the search
26860        push @pre_types, '}';
26861
26862        my $jbeg = 0;
26863        $jbeg = 1 if $pre_types[0] eq 'b';
26864
26865        # first look for one of these
26866        #  - bareword
26867        #  - bareword with leading -
26868        #  - digit
26869        #  - quoted string
26870        my $j = $jbeg;
26871        if ( $pre_types[$j] =~ /^[\'\"]/ ) {
26872
26873            # find the closing quote; don't worry about escapes
26874            my $quote_mark = $pre_types[$j];
26875            for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
26876                if ( $pre_types[$k] eq $quote_mark ) {
26877                    $j = $k + 1;
26878                    my $next = $pre_types[$j];
26879                    last;
26880                }
26881            }
26882        }
26883        elsif ( $pre_types[$j] eq 'd' ) {
26884            $j++;
26885        }
26886        elsif ( $pre_types[$j] eq 'w' ) {
26887            unless ( $is_keyword{ $pre_tokens[$j] } ) {
26888                $j++;
26889            }
26890        }
26891        elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
26892            $j++;
26893        }
26894        if ( $j > $jbeg ) {
26895
26896            $j++ if $pre_types[$j] eq 'b';
26897
26898            # it's a hash ref if a comma or => follow next
26899            if ( $pre_types[$j] eq ','
26900                || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
26901            {
26902                $code_block_type = "";
26903            }
26904        }
26905    }
26906
26907    return $code_block_type;
26908}
26909
26910sub unexpected {
26911
26912    # report unexpected token type and show where it is
26913    # USES GLOBAL VARIABLES: $tokenizer_self
26914    my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
26915        $rpretoken_type, $input_line )
26916      = @_;
26917
26918    if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
26919        my $msg = "found $found where $expecting expected";
26920        my $pos = $$rpretoken_map[$i_tok];
26921        interrupt_logfile();
26922        my $input_line_number = $tokenizer_self->{_last_line_number};
26923        my ( $offset, $numbered_line, $underline ) =
26924          make_numbered_line( $input_line_number, $input_line, $pos );
26925        $underline = write_on_underline( $underline, $pos - $offset, '^' );
26926
26927        my $trailer = "";
26928        if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
26929            my $pos_prev = $$rpretoken_map[$last_nonblank_i];
26930            my $num;
26931            if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
26932                $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
26933            }
26934            else {
26935                $num = $pos - $pos_prev;
26936            }
26937            if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
26938
26939            $underline =
26940              write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
26941            $trailer = " (previous token underlined)";
26942        }
26943        warning( $numbered_line . "\n" );
26944        warning( $underline . "\n" );
26945        warning( $msg . $trailer . "\n" );
26946        resume_logfile();
26947    }
26948}
26949
26950sub is_non_structural_brace {
26951
26952    # Decide if a brace or bracket is structural or non-structural
26953    # by looking at the previous token and type
26954    # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
26955
26956    # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
26957    # Tentatively deactivated because it caused the wrong operator expectation
26958    # for this code:
26959    #      $user = @vars[1] / 100;
26960    # Must update sub operator_expected before re-implementing.
26961    # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
26962    #    return 0;
26963    # }
26964
26965    # NOTE: braces after type characters start code blocks, but for
26966    # simplicity these are not identified as such.  See also
26967    # sub code_block_type
26968    # if ($last_nonblank_type eq 't') {return 0}
26969
26970    # otherwise, it is non-structural if it is decorated
26971    # by type information.
26972    # For example, the '{' here is non-structural:   ${xxx}
26973    (
26974        $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
26975
26976          # or if we follow a hash or array closing curly brace or bracket
26977          # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
26978          # because the first '}' would have been given type 'R'
26979          || $last_nonblank_type =~ /^([R\]])$/
26980    );
26981}
26982
26983#########i#############################################################
26984# Tokenizer routines for tracking container nesting depths
26985#######################################################################
26986
26987# The following routines keep track of nesting depths of the nesting
26988# types, ( [ { and ?.  This is necessary for determining the indentation
26989# level, and also for debugging programs.  Not only do they keep track of
26990# nesting depths of the individual brace types, but they check that each
26991# of the other brace types is balanced within matching pairs.  For
26992# example, if the program sees this sequence:
26993#
26994#         {  ( ( ) }
26995#
26996# then it can determine that there is an extra left paren somewhere
26997# between the { and the }.  And so on with every other possible
26998# combination of outer and inner brace types.  For another
26999# example:
27000#
27001#         ( [ ..... ]  ] )
27002#
27003# which has an extra ] within the parens.
27004#
27005# The brace types have indexes 0 .. 3 which are indexes into
27006# the matrices.
27007#
27008# The pair ? : are treated as just another nesting type, with ? acting
27009# as the opening brace and : acting as the closing brace.
27010#
27011# The matrix
27012#
27013#         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
27014#
27015# saves the nesting depth of brace type $b (where $b is either of the other
27016# nesting types) when brace type $a enters a new depth.  When this depth
27017# decreases, a check is made that the current depth of brace types $b is
27018# unchanged, or otherwise there must have been an error.  This can
27019# be very useful for localizing errors, particularly when perl runs to
27020# the end of a large file (such as this one) and announces that there
27021# is a problem somewhere.
27022#
27023# A numerical sequence number is maintained for every nesting type,
27024# so that each matching pair can be uniquely identified in a simple
27025# way.
27026
27027sub increase_nesting_depth {
27028    my ( $aa, $pos ) = @_;
27029
27030    # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
27031    # @current_sequence_number, @depth_array, @starting_line_of_current_depth,
27032    # $statement_type
27033    my $bb;
27034    $current_depth[$aa]++;
27035    $total_depth++;
27036    $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
27037    my $input_line_number = $tokenizer_self->{_last_line_number};
27038    my $input_line        = $tokenizer_self->{_line_text};
27039
27040    # Sequence numbers increment by number of items.  This keeps
27041    # a unique set of numbers but still allows the relative location
27042    # of any type to be determined.
27043    $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
27044    my $seqno = $nesting_sequence_number[$aa];
27045    $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
27046
27047    $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
27048      [ $input_line_number, $input_line, $pos ];
27049
27050    for $bb ( 0 .. $#closing_brace_names ) {
27051        next if ( $bb == $aa );
27052        $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
27053    }
27054
27055    # set a flag for indenting a nested ternary statement
27056    my $indent = 0;
27057    if ( $aa == QUESTION_COLON ) {
27058        $nested_ternary_flag[ $current_depth[$aa] ] = 0;
27059        if ( $current_depth[$aa] > 1 ) {
27060            if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
27061                my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
27062                if ( $pdepth == $total_depth - 1 ) {
27063                    $indent = 1;
27064                    $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
27065                }
27066            }
27067        }
27068    }
27069    $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type;
27070    $statement_type = "";
27071    return ( $seqno, $indent );
27072}
27073
27074sub decrease_nesting_depth {
27075
27076    my ( $aa, $pos ) = @_;
27077
27078    # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
27079    # @current_sequence_number, @depth_array, @starting_line_of_current_depth
27080    # $statement_type
27081    my $bb;
27082    my $seqno             = 0;
27083    my $input_line_number = $tokenizer_self->{_last_line_number};
27084    my $input_line        = $tokenizer_self->{_line_text};
27085
27086    my $outdent = 0;
27087    $total_depth--;
27088    if ( $current_depth[$aa] > 0 ) {
27089
27090        # set a flag for un-indenting after seeing a nested ternary statement
27091        $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
27092        if ( $aa == QUESTION_COLON ) {
27093            $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
27094        }
27095        $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ];
27096
27097        # check that any brace types $bb contained within are balanced
27098        for $bb ( 0 .. $#closing_brace_names ) {
27099            next if ( $bb == $aa );
27100
27101            unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
27102                $current_depth[$bb] )
27103            {
27104                my $diff =
27105                  $current_depth[$bb] -
27106                  $depth_array[$aa][$bb][ $current_depth[$aa] ];
27107
27108                # don't whine too many times
27109                my $saw_brace_error = get_saw_brace_error();
27110                if (
27111                    $saw_brace_error <= MAX_NAG_MESSAGES
27112
27113                    # if too many closing types have occurred, we probably
27114                    # already caught this error
27115                    && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
27116                  )
27117                {
27118                    interrupt_logfile();
27119                    my $rsl =
27120                      $starting_line_of_current_depth[$aa]
27121                      [ $current_depth[$aa] ];
27122                    my $sl  = $$rsl[0];
27123                    my $rel = [ $input_line_number, $input_line, $pos ];
27124                    my $el  = $$rel[0];
27125                    my ($ess);
27126
27127                    if ( $diff == 1 || $diff == -1 ) {
27128                        $ess = '';
27129                    }
27130                    else {
27131                        $ess = 's';
27132                    }
27133                    my $bname =
27134                      ( $diff > 0 )
27135                      ? $opening_brace_names[$bb]
27136                      : $closing_brace_names[$bb];
27137                    write_error_indicator_pair( @$rsl, '^' );
27138                    my $msg = <<"EOM";
27139Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
27140EOM
27141
27142                    if ( $diff > 0 ) {
27143                        my $rml =
27144                          $starting_line_of_current_depth[$bb]
27145                          [ $current_depth[$bb] ];
27146                        my $ml = $$rml[0];
27147                        $msg .=
27148"    The most recent un-matched $bname is on line $ml\n";
27149                        write_error_indicator_pair( @$rml, '^' );
27150                    }
27151                    write_error_indicator_pair( @$rel, '^' );
27152                    warning($msg);
27153                    resume_logfile();
27154                }
27155                increment_brace_error();
27156            }
27157        }
27158        $current_depth[$aa]--;
27159    }
27160    else {
27161
27162        my $saw_brace_error = get_saw_brace_error();
27163        if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
27164            my $msg = <<"EOM";
27165There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
27166EOM
27167            indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
27168        }
27169        increment_brace_error();
27170    }
27171    return ( $seqno, $outdent );
27172}
27173
27174sub check_final_nesting_depths {
27175    my ($aa);
27176
27177    # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
27178
27179    for $aa ( 0 .. $#closing_brace_names ) {
27180
27181        if ( $current_depth[$aa] ) {
27182            my $rsl =
27183              $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
27184            my $sl  = $$rsl[0];
27185            my $msg = <<"EOM";
27186Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
27187The most recent un-matched $opening_brace_names[$aa] is on line $sl
27188EOM
27189            indicate_error( $msg, @$rsl, '^' );
27190            increment_brace_error();
27191        }
27192    }
27193}
27194
27195#########i#############################################################
27196# Tokenizer routines for looking ahead in input stream
27197#######################################################################
27198
27199sub peek_ahead_for_n_nonblank_pre_tokens {
27200
27201    # returns next n pretokens if they exist
27202    # returns undef's if hits eof without seeing any pretokens
27203    # USES GLOBAL VARIABLES: $tokenizer_self
27204    my $max_pretokens = shift;
27205    my $line;
27206    my $i = 0;
27207    my ( $rpre_tokens, $rmap, $rpre_types );
27208
27209    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
27210    {
27211        $line =~ s/^\s*//;    # trim leading blanks
27212        next if ( length($line) <= 0 );    # skip blank
27213        next if ( $line =~ /^#/ );         # skip comment
27214        ( $rpre_tokens, $rmap, $rpre_types ) =
27215          pre_tokenize( $line, $max_pretokens );
27216        last;
27217    }
27218    return ( $rpre_tokens, $rpre_types );
27219}
27220
27221# look ahead for next non-blank, non-comment line of code
27222sub peek_ahead_for_nonblank_token {
27223
27224    # USES GLOBAL VARIABLES: $tokenizer_self
27225    my ( $rtokens, $max_token_index ) = @_;
27226    my $line;
27227    my $i = 0;
27228
27229    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
27230    {
27231        $line =~ s/^\s*//;    # trim leading blanks
27232        next if ( length($line) <= 0 );    # skip blank
27233        next if ( $line =~ /^#/ );         # skip comment
27234        my ( $rtok, $rmap, $rtype ) =
27235          pre_tokenize( $line, 2 );        # only need 2 pre-tokens
27236        my $j = $max_token_index + 1;
27237        my $tok;
27238
27239        foreach $tok (@$rtok) {
27240            last if ( $tok =~ "\n" );
27241            $$rtokens[ ++$j ] = $tok;
27242        }
27243        last;
27244    }
27245    return $rtokens;
27246}
27247
27248#########i#############################################################
27249# Tokenizer guessing routines for ambiguous situations
27250#######################################################################
27251
27252sub guess_if_pattern_or_conditional {
27253
27254    # this routine is called when we have encountered a ? following an
27255    # unknown bareword, and we must decide if it starts a pattern or not
27256    # input parameters:
27257    #   $i - token index of the ? starting possible pattern
27258    # output parameters:
27259    #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
27260    #   msg = a warning or diagnostic message
27261    # USES GLOBAL VARIABLES: $last_nonblank_token
27262    my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27263    my $is_pattern = 0;
27264    my $msg        = "guessing that ? after $last_nonblank_token starts a ";
27265
27266    if ( $i >= $max_token_index ) {
27267        $msg .= "conditional (no end to pattern found on the line)\n";
27268    }
27269    else {
27270        my $ibeg = $i;
27271        $i = $ibeg + 1;
27272        my $next_token = $$rtokens[$i];    # first token after ?
27273
27274        # look for a possible ending ? on this line..
27275        my $in_quote        = 1;
27276        my $quote_depth     = 0;
27277        my $quote_character = '';
27278        my $quote_pos       = 0;
27279        my $quoted_string;
27280        (
27281            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27282            $quoted_string
27283          )
27284          = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27285            $quote_pos, $quote_depth, $max_token_index );
27286
27287        if ($in_quote) {
27288
27289            # we didn't find an ending ? on this line,
27290            # so we bias towards conditional
27291            $is_pattern = 0;
27292            $msg .= "conditional (no ending ? on this line)\n";
27293
27294            # we found an ending ?, so we bias towards a pattern
27295        }
27296        else {
27297
27298            if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
27299                $is_pattern = 1;
27300                $msg .= "pattern (found ending ? and pattern expected)\n";
27301            }
27302            else {
27303                $msg .= "pattern (uncertain, but found ending ?)\n";
27304            }
27305        }
27306    }
27307    return ( $is_pattern, $msg );
27308}
27309
27310sub guess_if_pattern_or_division {
27311
27312    # this routine is called when we have encountered a / following an
27313    # unknown bareword, and we must decide if it starts a pattern or is a
27314    # division
27315    # input parameters:
27316    #   $i - token index of the / starting possible pattern
27317    # output parameters:
27318    #   $is_pattern = 0 if probably division,  =1 if probably a pattern
27319    #   msg = a warning or diagnostic message
27320    # USES GLOBAL VARIABLES: $last_nonblank_token
27321    my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
27322    my $is_pattern = 0;
27323    my $msg        = "guessing that / after $last_nonblank_token starts a ";
27324
27325    if ( $i >= $max_token_index ) {
27326        "division (no end to pattern found on the line)\n";
27327    }
27328    else {
27329        my $ibeg = $i;
27330        my $divide_expected =
27331          numerator_expected( $i, $rtokens, $max_token_index );
27332        $i = $ibeg + 1;
27333        my $next_token = $$rtokens[$i];    # first token after slash
27334
27335        # look for a possible ending / on this line..
27336        my $in_quote        = 1;
27337        my $quote_depth     = 0;
27338        my $quote_character = '';
27339        my $quote_pos       = 0;
27340        my $quoted_string;
27341        (
27342            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27343            $quoted_string
27344          )
27345          = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27346            $quote_pos, $quote_depth, $max_token_index );
27347
27348        if ($in_quote) {
27349
27350            # we didn't find an ending / on this line,
27351            # so we bias towards division
27352            if ( $divide_expected >= 0 ) {
27353                $is_pattern = 0;
27354                $msg .= "division (no ending / on this line)\n";
27355            }
27356            else {
27357                $msg        = "multi-line pattern (division not possible)\n";
27358                $is_pattern = 1;
27359            }
27360
27361        }
27362
27363        # we found an ending /, so we bias towards a pattern
27364        else {
27365
27366            if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
27367
27368                if ( $divide_expected >= 0 ) {
27369
27370                    if ( $i - $ibeg > 60 ) {
27371                        $msg .= "division (matching / too distant)\n";
27372                        $is_pattern = 0;
27373                    }
27374                    else {
27375                        $msg .= "pattern (but division possible too)\n";
27376                        $is_pattern = 1;
27377                    }
27378                }
27379                else {
27380                    $is_pattern = 1;
27381                    $msg .= "pattern (division not possible)\n";
27382                }
27383            }
27384            else {
27385
27386                if ( $divide_expected >= 0 ) {
27387                    $is_pattern = 0;
27388                    $msg .= "division (pattern not possible)\n";
27389                }
27390                else {
27391                    $is_pattern = 1;
27392                    $msg .=
27393                      "pattern (uncertain, but division would not work here)\n";
27394                }
27395            }
27396        }
27397    }
27398    return ( $is_pattern, $msg );
27399}
27400
27401# try to resolve here-doc vs. shift by looking ahead for
27402# non-code or the end token (currently only looks for end token)
27403# returns 1 if it is probably a here doc, 0 if not
27404sub guess_if_here_doc {
27405
27406    # This is how many lines we will search for a target as part of the
27407    # guessing strategy.  It is a constant because there is probably
27408    # little reason to change it.
27409    # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
27410    # %is_constant,
27411    use constant HERE_DOC_WINDOW => 40;
27412
27413    my $next_token        = shift;
27414    my $here_doc_expected = 0;
27415    my $line;
27416    my $k   = 0;
27417    my $msg = "checking <<";
27418
27419    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
27420    {
27421        chomp $line;
27422
27423        if ( $line =~ /^$next_token$/ ) {
27424            $msg .= " -- found target $next_token ahead $k lines\n";
27425            $here_doc_expected = 1;    # got it
27426            last;
27427        }
27428        last if ( $k >= HERE_DOC_WINDOW );
27429    }
27430
27431    unless ($here_doc_expected) {
27432
27433        if ( !defined($line) ) {
27434            $here_doc_expected = -1;    # hit eof without seeing target
27435            $msg .= " -- must be shift; target $next_token not in file\n";
27436
27437        }
27438        else {                          # still unsure..taking a wild guess
27439
27440            if ( !$is_constant{$current_package}{$next_token} ) {
27441                $here_doc_expected = 1;
27442                $msg .=
27443                  " -- guessing it's a here-doc ($next_token not a constant)\n";
27444            }
27445            else {
27446                $msg .=
27447                  " -- guessing it's a shift ($next_token is a constant)\n";
27448            }
27449        }
27450    }
27451    write_logfile_entry($msg);
27452    return $here_doc_expected;
27453}
27454
27455#########i#############################################################
27456# Tokenizer Routines for scanning identifiers and related items
27457#######################################################################
27458
27459sub scan_bare_identifier_do {
27460
27461    # this routine is called to scan a token starting with an alphanumeric
27462    # variable or package separator, :: or '.
27463    # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
27464    # $last_nonblank_type,@paren_type, $paren_depth
27465
27466    my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
27467        $max_token_index )
27468      = @_;
27469    my $i_begin = $i;
27470    my $package = undef;
27471
27472    my $i_beg = $i;
27473
27474    # we have to back up one pretoken at a :: since each : is one pretoken
27475    if ( $tok eq '::' ) { $i_beg-- }
27476    if ( $tok eq '->' ) { $i_beg-- }
27477    my $pos_beg = $$rtoken_map[$i_beg];
27478    pos($input_line) = $pos_beg;
27479
27480    #  Examples:
27481    #   A::B::C
27482    #   A::
27483    #   ::A
27484    #   A'B
27485    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
27486
27487        my $pos  = pos($input_line);
27488        my $numc = $pos - $pos_beg;
27489        $tok = substr( $input_line, $pos_beg, $numc );
27490
27491        # type 'w' includes anything without leading type info
27492        # ($,%,@,*) including something like abc::def::ghi
27493        $type = 'w';
27494
27495        my $sub_name = "";
27496        if ( defined($2) ) { $sub_name = $2; }
27497        if ( defined($1) ) {
27498            $package = $1;
27499
27500            # patch: don't allow isolated package name which just ends
27501            # in the old style package separator (single quote).  Example:
27502            #   use CGI':all';
27503            if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
27504                $pos--;
27505            }
27506
27507            $package =~ s/\'/::/g;
27508            if ( $package =~ /^\:/ ) { $package = 'main' . $package }
27509            $package =~ s/::$//;
27510        }
27511        else {
27512            $package = $current_package;
27513
27514            if ( $is_keyword{$tok} ) {
27515                $type = 'k';
27516            }
27517        }
27518
27519        # if it is a bareword..
27520        if ( $type eq 'w' ) {
27521
27522            # check for v-string with leading 'v' type character
27523            # (This seems to have presidence over filehandle, type 'Y')
27524            if ( $tok =~ /^v\d[_\d]*$/ ) {
27525
27526                # we only have the first part - something like 'v101' -
27527                # look for more
27528                if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
27529                    $pos  = pos($input_line);
27530                    $numc = $pos - $pos_beg;
27531                    $tok  = substr( $input_line, $pos_beg, $numc );
27532                }
27533                $type = 'v';
27534
27535                # warn if this version can't handle v-strings
27536                report_v_string($tok);
27537            }
27538
27539            elsif ( $is_constant{$package}{$sub_name} ) {
27540                $type = 'C';
27541            }
27542
27543            # bareword after sort has implied empty prototype; for example:
27544            # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
27545            # This has priority over whatever the user has specified.
27546            elsif ($last_nonblank_token eq 'sort'
27547                && $last_nonblank_type eq 'k' )
27548            {
27549                $type = 'Z';
27550            }
27551
27552            # Note: strangely, perl does not seem to really let you create
27553            # functions which act like eval and do, in the sense that eval
27554            # and do may have operators following the final }, but any operators
27555            # that you create with prototype (&) apparently do not allow
27556            # trailing operators, only terms.  This seems strange.
27557            # If this ever changes, here is the update
27558            # to make perltidy behave accordingly:
27559
27560            # elsif ( $is_block_function{$package}{$tok} ) {
27561            #    $tok='eval'; # patch to do braces like eval  - doesn't work
27562            #    $type = 'k';
27563            #}
27564            # FIXME: This could become a separate type to allow for different
27565            # future behavior:
27566            elsif ( $is_block_function{$package}{$sub_name} ) {
27567                $type = 'G';
27568            }
27569
27570            elsif ( $is_block_list_function{$package}{$sub_name} ) {
27571                $type = 'G';
27572            }
27573            elsif ( $is_user_function{$package}{$sub_name} ) {
27574                $type      = 'U';
27575                $prototype = $user_function_prototype{$package}{$sub_name};
27576            }
27577
27578            # check for indirect object
27579            elsif (
27580
27581                # added 2001-03-27: must not be followed immediately by '('
27582                # see fhandle.t
27583                ( $input_line !~ m/\G\(/gc )
27584
27585                # and
27586                && (
27587
27588                    # preceded by keyword like 'print', 'printf' and friends
27589                    $is_indirect_object_taker{$last_nonblank_token}
27590
27591                    # or preceded by something like 'print(' or 'printf('
27592                    || (
27593                        ( $last_nonblank_token eq '(' )
27594                        && $is_indirect_object_taker{ $paren_type[$paren_depth]
27595                        }
27596
27597                    )
27598                )
27599              )
27600            {
27601
27602                # may not be indirect object unless followed by a space
27603                if ( $input_line =~ m/\G\s+/gc ) {
27604                    $type = 'Y';
27605
27606                    # Abandon Hope ...
27607                    # Perl's indirect object notation is a very bad
27608                    # thing and can cause subtle bugs, especially for
27609                    # beginning programmers.  And I haven't even been
27610                    # able to figure out a sane warning scheme which
27611                    # doesn't get in the way of good scripts.
27612
27613                    # Complain if a filehandle has any lower case
27614                    # letters.  This is suggested good practice.
27615                    # Use 'sub_name' because something like
27616                    # main::MYHANDLE is ok for filehandle
27617                    if ( $sub_name =~ /[a-z]/ ) {
27618
27619                        # could be bug caused by older perltidy if
27620                        # followed by '('
27621                        if ( $input_line =~ m/\G\s*\(/gc ) {
27622                            complain(
27623"Caution: unknown word '$tok' in indirect object slot\n"
27624                            );
27625                        }
27626                    }
27627                }
27628
27629                # bareword not followed by a space -- may not be filehandle
27630                # (may be function call defined in a 'use' statement)
27631                else {
27632                    $type = 'Z';
27633                }
27634            }
27635        }
27636
27637        # Now we must convert back from character position
27638        # to pre_token index.
27639        # I don't think an error flag can occur here ..but who knows
27640        my $error;
27641        ( $i, $error ) =
27642          inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27643        if ($error) {
27644            warning("scan_bare_identifier: Possibly invalid tokenization\n");
27645        }
27646    }
27647
27648    # no match but line not blank - could be syntax error
27649    # perl will take '::' alone without complaint
27650    else {
27651        $type = 'w';
27652
27653        # change this warning to log message if it becomes annoying
27654        warning("didn't find identifier after leading ::\n");
27655    }
27656    return ( $i, $tok, $type, $prototype );
27657}
27658
27659sub scan_id_do {
27660
27661# This is the new scanner and will eventually replace scan_identifier.
27662# Only type 'sub' and 'package' are implemented.
27663# Token types $ * % @ & -> are not yet implemented.
27664#
27665# Scan identifier following a type token.
27666# The type of call depends on $id_scan_state: $id_scan_state = ''
27667# for starting call, in which case $tok must be the token defining
27668# the type.
27669#
27670# If the type token is the last nonblank token on the line, a value
27671# of $id_scan_state = $tok is returned, indicating that further
27672# calls must be made to get the identifier.  If the type token is
27673# not the last nonblank token on the line, the identifier is
27674# scanned and handled and a value of '' is returned.
27675# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
27676# $statement_type, $tokenizer_self
27677
27678    my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
27679        $max_token_index )
27680      = @_;
27681    my $type = '';
27682    my ( $i_beg, $pos_beg );
27683
27684    #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
27685    #my ($a,$b,$c) = caller;
27686    #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
27687
27688    # on re-entry, start scanning at first token on the line
27689    if ($id_scan_state) {
27690        $i_beg = $i;
27691        $type  = '';
27692    }
27693
27694    # on initial entry, start scanning just after type token
27695    else {
27696        $i_beg         = $i + 1;
27697        $id_scan_state = $tok;
27698        $type          = 't';
27699    }
27700
27701    # find $i_beg = index of next nonblank token,
27702    # and handle empty lines
27703    my $blank_line          = 0;
27704    my $next_nonblank_token = $$rtokens[$i_beg];
27705    if ( $i_beg > $max_token_index ) {
27706        $blank_line = 1;
27707    }
27708    else {
27709
27710        # only a '#' immediately after a '$' is not a comment
27711        if ( $next_nonblank_token eq '#' ) {
27712            unless ( $tok eq '$' ) {
27713                $blank_line = 1;
27714            }
27715        }
27716
27717        if ( $next_nonblank_token =~ /^\s/ ) {
27718            ( $next_nonblank_token, $i_beg ) =
27719              find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
27720                $max_token_index );
27721            if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
27722                $blank_line = 1;
27723            }
27724        }
27725    }
27726
27727    # handle non-blank line; identifier, if any, must follow
27728    unless ($blank_line) {
27729
27730        if ( $id_scan_state eq 'sub' ) {
27731            ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
27732                $input_line, $i,             $i_beg,
27733                $tok,        $type,          $rtokens,
27734                $rtoken_map, $id_scan_state, $max_token_index
27735            );
27736        }
27737
27738        elsif ( $id_scan_state eq 'package' ) {
27739            ( $i, $tok, $type ) =
27740              do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
27741                $rtoken_map, $max_token_index );
27742            $id_scan_state = '';
27743        }
27744
27745        else {
27746            warning("invalid token in scan_id: $tok\n");
27747            $id_scan_state = '';
27748        }
27749    }
27750
27751    if ( $id_scan_state && ( !defined($type) || !$type ) ) {
27752
27753        # shouldn't happen:
27754        warning(
27755"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
27756        );
27757        report_definite_bug();
27758    }
27759
27760    TOKENIZER_DEBUG_FLAG_NSCAN && do {
27761        print STDOUT
27762          "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
27763    };
27764    return ( $i, $tok, $type, $id_scan_state );
27765}
27766
27767sub check_prototype {
27768    my ( $proto, $package, $subname ) = @_;
27769    return unless ( defined($package) && defined($subname) );
27770    if ( defined($proto) ) {
27771        $proto =~ s/^\s*\(\s*//;
27772        $proto =~ s/\s*\)$//;
27773        if ($proto) {
27774            $is_user_function{$package}{$subname}        = 1;
27775            $user_function_prototype{$package}{$subname} = "($proto)";
27776
27777            # prototypes containing '&' must be treated specially..
27778            if ( $proto =~ /\&/ ) {
27779
27780                # right curly braces of prototypes ending in
27781                # '&' may be followed by an operator
27782                if ( $proto =~ /\&$/ ) {
27783                    $is_block_function{$package}{$subname} = 1;
27784                }
27785
27786                # right curly braces of prototypes NOT ending in
27787                # '&' may NOT be followed by an operator
27788                elsif ( $proto !~ /\&$/ ) {
27789                    $is_block_list_function{$package}{$subname} = 1;
27790                }
27791            }
27792        }
27793        else {
27794            $is_constant{$package}{$subname} = 1;
27795        }
27796    }
27797    else {
27798        $is_user_function{$package}{$subname} = 1;
27799    }
27800}
27801
27802sub do_scan_package {
27803
27804    # do_scan_package parses a package name
27805    # it is called with $i_beg equal to the index of the first nonblank
27806    # token following a 'package' token.
27807    # USES GLOBAL VARIABLES: $current_package,
27808
27809    # package NAMESPACE
27810    # package NAMESPACE VERSION
27811    # package NAMESPACE BLOCK
27812    # package NAMESPACE VERSION BLOCK
27813    #
27814    # If VERSION is provided, package sets the $VERSION variable in the given
27815    # namespace to a version object with the VERSION provided. VERSION must be
27816    # a "strict" style version number as defined by the version module: a
27817    # positive decimal number (integer or decimal-fraction) without
27818    # exponentiation or else a dotted-decimal v-string with a leading 'v'
27819    # character and at least three components.
27820    # reference http://perldoc.perl.org/functions/package.html
27821
27822    my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
27823        $max_token_index )
27824      = @_;
27825    my $package = undef;
27826    my $pos_beg = $$rtoken_map[$i_beg];
27827    pos($input_line) = $pos_beg;
27828
27829    # handle non-blank line; package name, if any, must follow
27830    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
27831        $package = $1;
27832        $package = ( defined($1) && $1 ) ? $1 : 'main';
27833        $package =~ s/\'/::/g;
27834        if ( $package =~ /^\:/ ) { $package = 'main' . $package }
27835        $package =~ s/::$//;
27836        my $pos  = pos($input_line);
27837        my $numc = $pos - $pos_beg;
27838        $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
27839        $type = 'i';
27840
27841        # Now we must convert back from character position
27842        # to pre_token index.
27843        # I don't think an error flag can occur here ..but ?
27844        my $error;
27845        ( $i, $error ) =
27846          inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
27847        if ($error) { warning("Possibly invalid package\n") }
27848        $current_package = $package;
27849
27850        # we should now have package NAMESPACE
27851        # now expecting VERSION, BLOCK, or ; to follow ...
27852        # package NAMESPACE VERSION
27853        # package NAMESPACE BLOCK
27854        # package NAMESPACE VERSION BLOCK
27855        my ( $next_nonblank_token, $i_next ) =
27856          find_next_nonblank_token( $i, $rtokens, $max_token_index );
27857        if ( $next_nonblank_token =~ /^[v\.\d;\{\}]$/ ) {
27858            $statement_type = $tok;
27859        }
27860        else {
27861            warning(
27862                "Unexpected '$next_nonblank_token' after package name '$tok'\n"
27863            );
27864        }
27865    }
27866
27867    # no match but line not blank --
27868    # could be a label with name package, like package:  , for example.
27869    else {
27870        $type = 'k';
27871    }
27872
27873    return ( $i, $tok, $type );
27874}
27875
27876sub scan_identifier_do {
27877
27878    # This routine assembles tokens into identifiers.  It maintains a
27879    # scan state, id_scan_state.  It updates id_scan_state based upon
27880    # current id_scan_state and token, and returns an updated
27881    # id_scan_state and the next index after the identifier.
27882    # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
27883    # $last_nonblank_type
27884
27885    my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
27886        $expecting )
27887      = @_;
27888    my $i_begin   = $i;
27889    my $type      = '';
27890    my $tok_begin = $$rtokens[$i_begin];
27891    if ( $tok_begin eq ':' ) { $tok_begin = '::' }
27892    my $id_scan_state_begin = $id_scan_state;
27893    my $identifier_begin    = $identifier;
27894    my $tok                 = $tok_begin;
27895    my $message             = "";
27896
27897    # these flags will be used to help figure out the type:
27898    my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
27899    my $saw_type;
27900
27901    # allow old package separator (') except in 'use' statement
27902    my $allow_tick = ( $last_nonblank_token ne 'use' );
27903
27904    # get started by defining a type and a state if necessary
27905    unless ($id_scan_state) {
27906        $context = UNKNOWN_CONTEXT;
27907
27908        # fixup for digraph
27909        if ( $tok eq '>' ) {
27910            $tok       = '->';
27911            $tok_begin = $tok;
27912        }
27913        $identifier = $tok;
27914
27915        if ( $tok eq '$' || $tok eq '*' ) {
27916            $id_scan_state = '$';
27917            $context       = SCALAR_CONTEXT;
27918        }
27919        elsif ( $tok eq '%' || $tok eq '@' ) {
27920            $id_scan_state = '$';
27921            $context       = LIST_CONTEXT;
27922        }
27923        elsif ( $tok eq '&' ) {
27924            $id_scan_state = '&';
27925        }
27926        elsif ( $tok eq 'sub' or $tok eq 'package' ) {
27927            $saw_alpha     = 0;     # 'sub' is considered type info here
27928            $id_scan_state = '$';
27929            $identifier .= ' ';     # need a space to separate sub from sub name
27930        }
27931        elsif ( $tok eq '::' ) {
27932            $id_scan_state = 'A';
27933        }
27934        elsif ( $tok =~ /^[A-Za-z_]/ ) {
27935            $id_scan_state = ':';
27936        }
27937        elsif ( $tok eq '->' ) {
27938            $id_scan_state = '$';
27939        }
27940        else {
27941
27942            # shouldn't happen
27943            my ( $a, $b, $c ) = caller;
27944            warning("Program Bug: scan_identifier given bad token = $tok \n");
27945            warning("   called from sub $a  line: $c\n");
27946            report_definite_bug();
27947        }
27948        $saw_type = !$saw_alpha;
27949    }
27950    else {
27951        $i--;
27952        $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
27953    }
27954
27955    # now loop to gather the identifier
27956    my $i_save = $i;
27957
27958    while ( $i < $max_token_index ) {
27959        $i_save = $i unless ( $tok =~ /^\s*$/ );
27960        $tok = $$rtokens[ ++$i ];
27961
27962        if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
27963            $tok = '::';
27964            $i++;
27965        }
27966
27967        if ( $id_scan_state eq '$' ) {    # starting variable name
27968
27969            if ( $tok eq '$' ) {
27970
27971                $identifier .= $tok;
27972
27973                # we've got a punctuation variable if end of line (punct.t)
27974                if ( $i == $max_token_index ) {
27975                    $type          = 'i';
27976                    $id_scan_state = '';
27977                    last;
27978                }
27979            }
27980            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
27981                $saw_alpha     = 1;
27982                $id_scan_state = ':';           # now need ::
27983                $identifier .= $tok;
27984            }
27985            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
27986                $saw_alpha     = 1;
27987                $id_scan_state = ':';                 # now need ::
27988                $identifier .= $tok;
27989
27990                # Perl will accept leading digits in identifiers,
27991                # although they may not always produce useful results.
27992                # Something like $main::0 is ok.  But this also works:
27993                #
27994                #  sub howdy::123::bubba{ print "bubba $54321!\n" }
27995                #  howdy::123::bubba();
27996                #
27997            }
27998            elsif ( $tok =~ /^[0-9]/ ) {    # numeric
27999                $saw_alpha     = 1;
28000                $id_scan_state = ':';       # now need ::
28001                $identifier .= $tok;
28002            }
28003            elsif ( $tok eq '::' ) {
28004                $id_scan_state = 'A';
28005                $identifier .= $tok;
28006            }
28007            elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
28008                $identifier .= $tok;    # keep same state, a $ could follow
28009            }
28010            elsif ( $tok eq '{' ) {
28011
28012                # check for something like ${#} or ${�}
28013                ##if (   $identifier eq '$'
28014                if (
28015                    (
28016                           $identifier eq '$'
28017                        || $identifier eq '@'
28018                        || $identifier eq '$#'
28019                    )
28020                    && $i + 2 <= $max_token_index
28021                    && $$rtokens[ $i + 2 ] eq '}'
28022                    && $$rtokens[ $i + 1 ] !~ /[\s\w]/
28023                  )
28024                {
28025                    my $next2 = $$rtokens[ $i + 2 ];
28026                    my $next1 = $$rtokens[ $i + 1 ];
28027                    $identifier .= $tok . $next1 . $next2;
28028                    $i += 2;
28029                    $id_scan_state = '';
28030                    last;
28031                }
28032
28033                # skip something like ${xxx} or ->{
28034                $id_scan_state = '';
28035
28036                # if this is the first token of a line, any tokens for this
28037                # identifier have already been accumulated
28038                if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
28039                $i = $i_save;
28040                last;
28041            }
28042
28043            # space ok after leading $ % * & @
28044            elsif ( $tok =~ /^\s*$/ ) {
28045
28046                if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
28047
28048                    if ( length($identifier) > 1 ) {
28049                        $id_scan_state = '';
28050                        $i             = $i_save;
28051                        $type          = 'i';    # probably punctuation variable
28052                        last;
28053                    }
28054                    else {
28055
28056                        # spaces after $'s are common, and space after @
28057                        # is harmless, so only complain about space
28058                        # after other type characters. Space after $ and
28059                        # @ will be removed in formatting.  Report space
28060                        # after % and * because they might indicate a
28061                        # parsing error.  In other words '% ' might be a
28062                        # modulo operator.  Delete this warning if it
28063                        # gets annoying.
28064                        if ( $identifier !~ /^[\@\$]$/ ) {
28065                            $message =
28066                              "Space in identifier, following $identifier\n";
28067                        }
28068                    }
28069                }
28070
28071                # else:
28072                # space after '->' is ok
28073            }
28074            elsif ( $tok eq '^' ) {
28075
28076                # check for some special variables like $^W
28077                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
28078                    $identifier .= $tok;
28079                    $id_scan_state = 'A';
28080
28081                    # Perl accepts '$^]' or '@^]', but
28082                    # there must not be a space before the ']'.
28083                    my $next1 = $$rtokens[ $i + 1 ];
28084                    if ( $next1 eq ']' ) {
28085                        $i++;
28086                        $identifier .= $next1;
28087                        $id_scan_state = "";
28088                        last;
28089                    }
28090                }
28091                else {
28092                    $id_scan_state = '';
28093                }
28094            }
28095            else {    # something else
28096
28097                # check for various punctuation variables
28098                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
28099                    $identifier .= $tok;
28100                }
28101
28102                elsif ( $identifier eq '$#' ) {
28103
28104                    if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
28105
28106                    # perl seems to allow just these: $#: $#- $#+
28107                    elsif ( $tok =~ /^[\:\-\+]$/ ) {
28108                        $type = 'i';
28109                        $identifier .= $tok;
28110                    }
28111                    else {
28112                        $i = $i_save;
28113                        write_logfile_entry( 'Use of $# is deprecated' . "\n" );
28114                    }
28115                }
28116                elsif ( $identifier eq '$$' ) {
28117
28118                    # perl does not allow references to punctuation
28119                    # variables without braces.  For example, this
28120                    # won't work:
28121                    #  $:=\4;
28122                    #  $a = $$:;
28123                    # You would have to use
28124                    #  $a = ${$:};
28125
28126                    $i = $i_save;
28127                    if   ( $tok eq '{' ) { $type = 't' }
28128                    else                 { $type = 'i' }
28129                }
28130                elsif ( $identifier eq '->' ) {
28131                    $i = $i_save;
28132                }
28133                else {
28134                    $i = $i_save;
28135                    if ( length($identifier) == 1 ) { $identifier = ''; }
28136                }
28137                $id_scan_state = '';
28138                last;
28139            }
28140        }
28141        elsif ( $id_scan_state eq '&' ) {    # starting sub call?
28142
28143            if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
28144                $id_scan_state = ':';          # now need ::
28145                $saw_alpha     = 1;
28146                $identifier .= $tok;
28147            }
28148            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
28149                $id_scan_state = ':';                 # now need ::
28150                $saw_alpha     = 1;
28151                $identifier .= $tok;
28152            }
28153            elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
28154                $id_scan_state = ':';       # now need ::
28155                $saw_alpha     = 1;
28156                $identifier .= $tok;
28157            }
28158            elsif ( $tok =~ /^\s*$/ ) {     # allow space
28159            }
28160            elsif ( $tok eq '::' ) {        # leading ::
28161                $id_scan_state = 'A';       # accept alpha next
28162                $identifier .= $tok;
28163            }
28164            elsif ( $tok eq '{' ) {
28165                if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
28166                $i             = $i_save;
28167                $id_scan_state = '';
28168                last;
28169            }
28170            else {
28171
28172                # punctuation variable?
28173                # testfile: cunningham4.pl
28174                #
28175                # We have to be careful here.  If we are in an unknown state,
28176                # we will reject the punctuation variable.  In the following
28177                # example the '&' is a binary opeator but we are in an unknown
28178                # state because there is no sigil on 'Prima', so we don't
28179                # know what it is.  But it is a bad guess that
28180                # '&~' is a punction variable.
28181                # $self->{text}->{colorMap}->[
28182                #   Prima::PodView::COLOR_CODE_FOREGROUND
28183                #   & ~tb::COLOR_INDEX ] =
28184                #   $sec->{ColorCode}
28185                if ( $identifier eq '&' && $expecting ) {
28186                    $identifier .= $tok;
28187                }
28188                else {
28189                    $identifier = '';
28190                    $i          = $i_save;
28191                    $type       = '&';
28192                }
28193                $id_scan_state = '';
28194                last;
28195            }
28196        }
28197        elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
28198
28199            if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
28200                $identifier .= $tok;
28201                $id_scan_state = ':';        # now need ::
28202                $saw_alpha     = 1;
28203            }
28204            elsif ( $tok eq "'" && $allow_tick ) {
28205                $identifier .= $tok;
28206                $id_scan_state = ':';        # now need ::
28207                $saw_alpha     = 1;
28208            }
28209            elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
28210                $identifier .= $tok;
28211                $id_scan_state = ':';        # now need ::
28212                $saw_alpha     = 1;
28213            }
28214            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
28215                $id_scan_state = '(';
28216                $identifier .= $tok;
28217            }
28218            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
28219                $id_scan_state = ')';
28220                $identifier .= $tok;
28221            }
28222            else {
28223                $id_scan_state = '';
28224                $i             = $i_save;
28225                last;
28226            }
28227        }
28228        elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
28229
28230            if ( $tok eq '::' ) {            # got it
28231                $identifier .= $tok;
28232                $id_scan_state = 'A';        # now require alpha
28233            }
28234            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
28235                $identifier .= $tok;
28236                $id_scan_state = ':';           # now need ::
28237                $saw_alpha     = 1;
28238            }
28239            elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
28240                $identifier .= $tok;
28241                $id_scan_state = ':';           # now need ::
28242                $saw_alpha     = 1;
28243            }
28244            elsif ( $tok eq "'" && $allow_tick ) {    # tick
28245
28246                if ( $is_keyword{$identifier} ) {
28247                    $id_scan_state = '';              # that's all
28248                    $i             = $i_save;
28249                }
28250                else {
28251                    $identifier .= $tok;
28252                }
28253            }
28254            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
28255                $id_scan_state = '(';
28256                $identifier .= $tok;
28257            }
28258            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
28259                $id_scan_state = ')';
28260                $identifier .= $tok;
28261            }
28262            else {
28263                $id_scan_state = '';        # that's all
28264                $i             = $i_save;
28265                last;
28266            }
28267        }
28268        elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
28269
28270            if ( $tok eq '(' ) {             # got it
28271                $identifier .= $tok;
28272                $id_scan_state = ')';        # now find the end of it
28273            }
28274            elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
28275                $identifier .= $tok;
28276            }
28277            else {
28278                $id_scan_state = '';         # that's all - no prototype
28279                $i             = $i_save;
28280                last;
28281            }
28282        }
28283        elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
28284
28285            if ( $tok eq ')' ) {             # got it
28286                $identifier .= $tok;
28287                $id_scan_state = '';         # all done
28288                last;
28289            }
28290            elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
28291                $identifier .= $tok;
28292            }
28293            else {    # probable error in script, but keep going
28294                warning("Unexpected '$tok' while seeking end of prototype\n");
28295                $identifier .= $tok;
28296            }
28297        }
28298        else {        # can get here due to error in initialization
28299            $id_scan_state = '';
28300            $i             = $i_save;
28301            last;
28302        }
28303    }
28304
28305    if ( $id_scan_state eq ')' ) {
28306        warning("Hit end of line while seeking ) to end prototype\n");
28307    }
28308
28309    # once we enter the actual identifier, it may not extend beyond
28310    # the end of the current line
28311    if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
28312        $id_scan_state = '';
28313    }
28314    if ( $i < 0 ) { $i = 0 }
28315
28316    unless ($type) {
28317
28318        if ($saw_type) {
28319
28320            if ($saw_alpha) {
28321                if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
28322                    $type = 'w';
28323                }
28324                else { $type = 'i' }
28325            }
28326            elsif ( $identifier eq '->' ) {
28327                $type = '->';
28328            }
28329            elsif (
28330                ( length($identifier) > 1 )
28331
28332                # In something like '@$=' we have an identifier '@$'
28333                # In something like '$${' we have type '$$' (and only
28334                # part of an identifier)
28335                && !( $identifier =~ /\$$/ && $tok eq '{' )
28336                && ( $identifier !~ /^(sub |package )$/ )
28337              )
28338            {
28339                $type = 'i';
28340            }
28341            else { $type = 't' }
28342        }
28343        elsif ($saw_alpha) {
28344
28345            # type 'w' includes anything without leading type info
28346            # ($,%,@,*) including something like abc::def::ghi
28347            $type = 'w';
28348        }
28349        else {
28350            $type = '';
28351        }    # this can happen on a restart
28352    }
28353
28354    if ($identifier) {
28355        $tok = $identifier;
28356        if ($message) { write_logfile_entry($message) }
28357    }
28358    else {
28359        $tok = $tok_begin;
28360        $i   = $i_begin;
28361    }
28362
28363    TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
28364        my ( $a, $b, $c ) = caller;
28365        print STDOUT
28366"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
28367        print STDOUT
28368"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
28369    };
28370    return ( $i, $tok, $type, $id_scan_state, $identifier );
28371}
28372
28373{
28374
28375    # saved package and subnames in case prototype is on separate line
28376    my ( $package_saved, $subname_saved );
28377
28378    sub do_scan_sub {
28379
28380        # do_scan_sub parses a sub name and prototype
28381        # it is called with $i_beg equal to the index of the first nonblank
28382        # token following a 'sub' token.
28383
28384        # TODO: add future error checks to be sure we have a valid
28385        # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
28386        # a name is given if and only if a non-anonymous sub is
28387        # appropriate.
28388        # USES GLOBAL VARS: $current_package, $last_nonblank_token,
28389        # $in_attribute_list, %saw_function_definition,
28390        # $statement_type
28391
28392        my (
28393            $input_line, $i,             $i_beg,
28394            $tok,        $type,          $rtokens,
28395            $rtoken_map, $id_scan_state, $max_token_index
28396        ) = @_;
28397        $id_scan_state = "";    # normally we get everything in one call
28398        my $subname = undef;
28399        my $package = undef;
28400        my $proto   = undef;
28401        my $attrs   = undef;
28402        my $match;
28403
28404        my $pos_beg = $$rtoken_map[$i_beg];
28405        pos($input_line) = $pos_beg;
28406
28407        # sub NAME PROTO ATTRS
28408        if (
28409            $input_line =~ m/\G\s*
28410        ((?:\w*(?:'|::))*)  # package - something that ends in :: or '
28411        (\w+)               # NAME    - required
28412        (\s*\([^){]*\))?    # PROTO   - something in parens
28413        (\s*:)?             # ATTRS   - leading : of attribute list
28414        /gcx
28415          )
28416        {
28417            $match   = 1;
28418            $subname = $2;
28419            $proto   = $3;
28420            $attrs   = $4;
28421
28422            $package = ( defined($1) && $1 ) ? $1 : $current_package;
28423            $package =~ s/\'/::/g;
28424            if ( $package =~ /^\:/ ) { $package = 'main' . $package }
28425            $package =~ s/::$//;
28426            my $pos  = pos($input_line);
28427            my $numc = $pos - $pos_beg;
28428            $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
28429            $type = 'i';
28430        }
28431
28432        # Look for prototype/attributes not preceded on this line by subname;
28433        # This might be an anonymous sub with attributes,
28434        # or a prototype on a separate line from its sub name
28435        elsif (
28436            $input_line =~ m/\G(\s*\([^){]*\))?  # PROTO
28437            (\s*:)?                              # ATTRS leading ':'
28438            /gcx
28439            && ( $1 || $2 )
28440          )
28441        {
28442            $match = 1;
28443            $proto = $1;
28444            $attrs = $2;
28445
28446            # Handle prototype on separate line from subname
28447            if ($subname_saved) {
28448                $package = $package_saved;
28449                $subname = $subname_saved;
28450                $tok     = $last_nonblank_token;
28451            }
28452            $type = 'i';
28453        }
28454
28455        if ($match) {
28456
28457            # ATTRS: if there are attributes, back up and let the ':' be
28458            # found later by the scanner.
28459            my $pos = pos($input_line);
28460            if ($attrs) {
28461                $pos -= length($attrs);
28462            }
28463
28464            my $next_nonblank_token = $tok;
28465
28466            # catch case of line with leading ATTR ':' after anonymous sub
28467            if ( $pos == $pos_beg && $tok eq ':' ) {
28468                $type              = 'A';
28469                $in_attribute_list = 1;
28470            }
28471
28472            # We must convert back from character position
28473            # to pre_token index.
28474            else {
28475
28476                # I don't think an error flag can occur here ..but ?
28477                my $error;
28478                ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
28479                    $max_token_index );
28480                if ($error) { warning("Possibly invalid sub\n") }
28481
28482                # check for multiple definitions of a sub
28483                ( $next_nonblank_token, my $i_next ) =
28484                  find_next_nonblank_token_on_this_line( $i, $rtokens,
28485                    $max_token_index );
28486            }
28487
28488            if ( $next_nonblank_token =~ /^(\s*|#)$/ )
28489            {    # skip blank or side comment
28490                my ( $rpre_tokens, $rpre_types ) =
28491                  peek_ahead_for_n_nonblank_pre_tokens(1);
28492                if ( defined($rpre_tokens) && @$rpre_tokens ) {
28493                    $next_nonblank_token = $rpre_tokens->[0];
28494                }
28495                else {
28496                    $next_nonblank_token = '}';
28497                }
28498            }
28499            $package_saved = "";
28500            $subname_saved = "";
28501            if ( $next_nonblank_token eq '{' ) {
28502                if ($subname) {
28503
28504                    # Check for multiple definitions of a sub, but
28505                    # it is ok to have multiple sub BEGIN, etc,
28506                    # so we do not complain if name is all caps
28507                    if (   $saw_function_definition{$package}{$subname}
28508                        && $subname !~ /^[A-Z]+$/ )
28509                    {
28510                        my $lno = $saw_function_definition{$package}{$subname};
28511                        warning(
28512"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
28513                        );
28514                    }
28515                    $saw_function_definition{$package}{$subname} =
28516                      $tokenizer_self->{_last_line_number};
28517                }
28518            }
28519            elsif ( $next_nonblank_token eq ';' ) {
28520            }
28521            elsif ( $next_nonblank_token eq '}' ) {
28522            }
28523
28524            # ATTRS - if an attribute list follows, remember the name
28525            # of the sub so the next opening brace can be labeled.
28526            # Setting 'statement_type' causes any ':'s to introduce
28527            # attributes.
28528            elsif ( $next_nonblank_token eq ':' ) {
28529                $statement_type = $tok;
28530            }
28531
28532            # see if PROTO follows on another line:
28533            elsif ( $next_nonblank_token eq '(' ) {
28534                if ( $attrs || $proto ) {
28535                    warning(
28536"unexpected '(' after definition or declaration of sub '$subname'\n"
28537                    );
28538                }
28539                else {
28540                    $id_scan_state  = 'sub';    # we must come back to get proto
28541                    $statement_type = $tok;
28542                    $package_saved  = $package;
28543                    $subname_saved  = $subname;
28544                }
28545            }
28546            elsif ($next_nonblank_token) {      # EOF technically ok
28547                warning(
28548"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
28549                );
28550            }
28551            check_prototype( $proto, $package, $subname );
28552        }
28553
28554        # no match but line not blank
28555        else {
28556        }
28557        return ( $i, $tok, $type, $id_scan_state );
28558    }
28559}
28560
28561#########i###############################################################
28562# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
28563#########################################################################
28564
28565sub find_next_nonblank_token {
28566    my ( $i, $rtokens, $max_token_index ) = @_;
28567
28568    if ( $i >= $max_token_index ) {
28569        if ( !peeked_ahead() ) {
28570            peeked_ahead(1);
28571            $rtokens =
28572              peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
28573        }
28574    }
28575    my $next_nonblank_token = $$rtokens[ ++$i ];
28576
28577    if ( $next_nonblank_token =~ /^\s*$/ ) {
28578        $next_nonblank_token = $$rtokens[ ++$i ];
28579    }
28580    return ( $next_nonblank_token, $i );
28581}
28582
28583sub numerator_expected {
28584
28585    # this is a filter for a possible numerator, in support of guessing
28586    # for the / pattern delimiter token.
28587    # returns -
28588    #   1 - yes
28589    #   0 - can't tell
28590    #  -1 - no
28591    # Note: I am using the convention that variables ending in
28592    # _expected have these 3 possible values.
28593    my ( $i, $rtokens, $max_token_index ) = @_;
28594    my $next_token = $$rtokens[ $i + 1 ];
28595    if ( $next_token eq '=' ) { $i++; }    # handle /=
28596    my ( $next_nonblank_token, $i_next ) =
28597      find_next_nonblank_token( $i, $rtokens, $max_token_index );
28598
28599    if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
28600        1;
28601    }
28602    else {
28603
28604        if ( $next_nonblank_token =~ /^\s*$/ ) {
28605            0;
28606        }
28607        else {
28608            -1;
28609        }
28610    }
28611}
28612
28613sub pattern_expected {
28614
28615    # This is the start of a filter for a possible pattern.
28616    # It looks at the token after a possbible pattern and tries to
28617    # determine if that token could end a pattern.
28618    # returns -
28619    #   1 - yes
28620    #   0 - can't tell
28621    #  -1 - no
28622    my ( $i, $rtokens, $max_token_index ) = @_;
28623    my $next_token = $$rtokens[ $i + 1 ];
28624    if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; }   # skip possible modifier
28625    my ( $next_nonblank_token, $i_next ) =
28626      find_next_nonblank_token( $i, $rtokens, $max_token_index );
28627
28628    # list of tokens which may follow a pattern
28629    # (can probably be expanded)
28630    if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
28631    {
28632        1;
28633    }
28634    else {
28635
28636        if ( $next_nonblank_token =~ /^\s*$/ ) {
28637            0;
28638        }
28639        else {
28640            -1;
28641        }
28642    }
28643}
28644
28645sub find_next_nonblank_token_on_this_line {
28646    my ( $i, $rtokens, $max_token_index ) = @_;
28647    my $next_nonblank_token;
28648
28649    if ( $i < $max_token_index ) {
28650        $next_nonblank_token = $$rtokens[ ++$i ];
28651
28652        if ( $next_nonblank_token =~ /^\s*$/ ) {
28653
28654            if ( $i < $max_token_index ) {
28655                $next_nonblank_token = $$rtokens[ ++$i ];
28656            }
28657        }
28658    }
28659    else {
28660        $next_nonblank_token = "";
28661    }
28662    return ( $next_nonblank_token, $i );
28663}
28664
28665sub find_angle_operator_termination {
28666
28667    # We are looking at a '<' and want to know if it is an angle operator.
28668    # We are to return:
28669    #   $i = pretoken index of ending '>' if found, current $i otherwise
28670    #   $type = 'Q' if found, '>' otherwise
28671    my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
28672    my $i    = $i_beg;
28673    my $type = '<';
28674    pos($input_line) = 1 + $$rtoken_map[$i];
28675
28676    my $filter;
28677
28678    # we just have to find the next '>' if a term is expected
28679    if ( $expecting == TERM ) { $filter = '[\>]' }
28680
28681    # we have to guess if we don't know what is expected
28682    elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
28683
28684    # shouldn't happen - we shouldn't be here if operator is expected
28685    else { warning("Program Bug in find_angle_operator_termination\n") }
28686
28687    # To illustrate what we might be looking at, in case we are
28688    # guessing, here are some examples of valid angle operators
28689    # (or file globs):
28690    #  <tmp_imp/*>
28691    #  <FH>
28692    #  <$fh>
28693    #  <*.c *.h>
28694    #  <_>
28695    #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
28696    #  <${PREFIX}*img*.$IMAGE_TYPE>
28697    #  <img*.$IMAGE_TYPE>
28698    #  <Timg*.$IMAGE_TYPE>
28699    #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
28700    #
28701    # Here are some examples of lines which do not have angle operators:
28702    #  return undef unless $self->[2]++ < $#{$self->[1]};
28703    #  < 2  || @$t >
28704    #
28705    # the following line from dlister.pl caused trouble:
28706    #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
28707    #
28708    # If the '<' starts an angle operator, it must end on this line and
28709    # it must not have certain characters like ';' and '=' in it.  I use
28710    # this to limit the testing.  This filter should be improved if
28711    # possible.
28712
28713    if ( $input_line =~ /($filter)/g ) {
28714
28715        if ( $1 eq '>' ) {
28716
28717            # We MAY have found an angle operator termination if we get
28718            # here, but we need to do more to be sure we haven't been
28719            # fooled.
28720            my $pos = pos($input_line);
28721
28722            my $pos_beg = $$rtoken_map[$i];
28723            my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
28724
28725            # Reject if the closing '>' follows a '-' as in:
28726            # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
28727            if ( $expecting eq UNKNOWN ) {
28728                my $check = substr( $input_line, $pos - 2, 1 );
28729                if ( $check eq '-' ) {
28730                    return ( $i, $type );
28731                }
28732            }
28733
28734            ######################################debug#####
28735            #write_diagnostics( "ANGLE? :$str\n");
28736            #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
28737            ######################################debug#####
28738            $type = 'Q';
28739            my $error;
28740            ( $i, $error ) =
28741              inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
28742
28743            # It may be possible that a quote ends midway in a pretoken.
28744            # If this happens, it may be necessary to split the pretoken.
28745            if ($error) {
28746                warning(
28747                    "Possible tokinization error..please check this line\n");
28748                report_possible_bug();
28749            }
28750
28751            # Now let's see where we stand....
28752            # OK if math op not possible
28753            if ( $expecting == TERM ) {
28754            }
28755
28756            # OK if there are no more than 2 pre-tokens inside
28757            # (not possible to write 2 token math between < and >)
28758            # This catches most common cases
28759            elsif ( $i <= $i_beg + 3 ) {
28760                write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
28761            }
28762
28763            # Not sure..
28764            else {
28765
28766                # Let's try a Brace Test: any braces inside must balance
28767                my $br = 0;
28768                while ( $str =~ /\{/g ) { $br++ }
28769                while ( $str =~ /\}/g ) { $br-- }
28770                my $sb = 0;
28771                while ( $str =~ /\[/g ) { $sb++ }
28772                while ( $str =~ /\]/g ) { $sb-- }
28773                my $pr = 0;
28774                while ( $str =~ /\(/g ) { $pr++ }
28775                while ( $str =~ /\)/g ) { $pr-- }
28776
28777                # if braces do not balance - not angle operator
28778                if ( $br || $sb || $pr ) {
28779                    $i    = $i_beg;
28780                    $type = '<';
28781                    write_diagnostics(
28782                        "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
28783                }
28784
28785                # we should keep doing more checks here...to be continued
28786                # Tentatively accepting this as a valid angle operator.
28787                # There are lots more things that can be checked.
28788                else {
28789                    write_diagnostics(
28790                        "ANGLE-Guessing yes: $str expecting=$expecting\n");
28791                    write_logfile_entry("Guessing angle operator here: $str\n");
28792                }
28793            }
28794        }
28795
28796        # didn't find ending >
28797        else {
28798            if ( $expecting == TERM ) {
28799                warning("No ending > for angle operator\n");
28800            }
28801        }
28802    }
28803    return ( $i, $type );
28804}
28805
28806sub scan_number_do {
28807
28808    #  scan a number in any of the formats that Perl accepts
28809    #  Underbars (_) are allowed in decimal numbers.
28810    #  input parameters -
28811    #      $input_line  - the string to scan
28812    #      $i           - pre_token index to start scanning
28813    #    $rtoken_map    - reference to the pre_token map giving starting
28814    #                    character position in $input_line of token $i
28815    #  output parameters -
28816    #    $i            - last pre_token index of the number just scanned
28817    #    number        - the number (characters); or undef if not a number
28818
28819    my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
28820    my $pos_beg = $$rtoken_map[$i];
28821    my $pos;
28822    my $i_begin = $i;
28823    my $number  = undef;
28824    my $type    = $input_type;
28825
28826    my $first_char = substr( $input_line, $pos_beg, 1 );
28827
28828    # Look for bad starting characters; Shouldn't happen..
28829    if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
28830        warning("Program bug - scan_number given character $first_char\n");
28831        report_definite_bug();
28832        return ( $i, $type, $number );
28833    }
28834
28835    # handle v-string without leading 'v' character ('Two Dot' rule)
28836    # (vstring.t)
28837    # TODO: v-strings may contain underscores
28838    pos($input_line) = $pos_beg;
28839    if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
28840        $pos = pos($input_line);
28841        my $numc = $pos - $pos_beg;
28842        $number = substr( $input_line, $pos_beg, $numc );
28843        $type = 'v';
28844        report_v_string($number);
28845    }
28846
28847    # handle octal, hex, binary
28848    if ( !defined($number) ) {
28849        pos($input_line) = $pos_beg;
28850        if ( $input_line =~
28851            /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g )
28852        {
28853            $pos = pos($input_line);
28854            my $numc = $pos - $pos_beg;
28855            $number = substr( $input_line, $pos_beg, $numc );
28856            $type = 'n';
28857        }
28858    }
28859
28860    # handle decimal
28861    if ( !defined($number) ) {
28862        pos($input_line) = $pos_beg;
28863
28864        if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
28865            $pos = pos($input_line);
28866
28867            # watch out for things like 0..40 which would give 0. by this;
28868            if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
28869                && ( substr( $input_line, $pos, 1 ) eq '.' ) )
28870            {
28871                $pos--;
28872            }
28873            my $numc = $pos - $pos_beg;
28874            $number = substr( $input_line, $pos_beg, $numc );
28875            $type = 'n';
28876        }
28877    }
28878
28879    # filter out non-numbers like e + - . e2  .e3 +e6
28880    # the rule: at least one digit, and any 'e' must be preceded by a digit
28881    if (
28882        $number !~ /\d/    # no digits
28883        || (   $number =~ /^(.*)[eE]/
28884            && $1 !~ /\d/ )    # or no digits before the 'e'
28885      )
28886    {
28887        $number = undef;
28888        $type   = $input_type;
28889        return ( $i, $type, $number );
28890    }
28891
28892    # Found a number; now we must convert back from character position
28893    # to pre_token index. An error here implies user syntax error.
28894    # An example would be an invalid octal number like '009'.
28895    my $error;
28896    ( $i, $error ) =
28897      inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
28898    if ($error) { warning("Possibly invalid number\n") }
28899
28900    return ( $i, $type, $number );
28901}
28902
28903sub inverse_pretoken_map {
28904
28905    # Starting with the current pre_token index $i, scan forward until
28906    # finding the index of the next pre_token whose position is $pos.
28907    my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
28908    my $error = 0;
28909
28910    while ( ++$i <= $max_token_index ) {
28911
28912        if ( $pos <= $$rtoken_map[$i] ) {
28913
28914            # Let the calling routine handle errors in which we do not
28915            # land on a pre-token boundary.  It can happen by running
28916            # perltidy on some non-perl scripts, for example.
28917            if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
28918            $i--;
28919            last;
28920        }
28921    }
28922    return ( $i, $error );
28923}
28924
28925sub find_here_doc {
28926
28927    # find the target of a here document, if any
28928    # input parameters:
28929    #   $i - token index of the second < of <<
28930    #   ($i must be less than the last token index if this is called)
28931    # output parameters:
28932    #   $found_target = 0 didn't find target; =1 found target
28933    #   HERE_TARGET - the target string (may be empty string)
28934    #   $i - unchanged if not here doc,
28935    #    or index of the last token of the here target
28936    #   $saw_error - flag noting unbalanced quote on here target
28937    my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
28938    my $ibeg                 = $i;
28939    my $found_target         = 0;
28940    my $here_doc_target      = '';
28941    my $here_quote_character = '';
28942    my $saw_error            = 0;
28943    my ( $next_nonblank_token, $i_next_nonblank, $next_token );
28944    $next_token = $$rtokens[ $i + 1 ];
28945
28946    # perl allows a backslash before the target string (heredoc.t)
28947    my $backslash = 0;
28948    if ( $next_token eq '\\' ) {
28949        $backslash  = 1;
28950        $next_token = $$rtokens[ $i + 2 ];
28951    }
28952
28953    ( $next_nonblank_token, $i_next_nonblank ) =
28954      find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
28955
28956    if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
28957
28958        my $in_quote    = 1;
28959        my $quote_depth = 0;
28960        my $quote_pos   = 0;
28961        my $quoted_string;
28962
28963        (
28964            $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
28965            $quoted_string
28966          )
28967          = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
28968            $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
28969
28970        if ($in_quote) {    # didn't find end of quote, so no target found
28971            $i = $ibeg;
28972            if ( $expecting == TERM ) {
28973                warning(
28974"Did not find here-doc string terminator ($here_quote_character) before end of line \n"
28975                );
28976                $saw_error = 1;
28977            }
28978        }
28979        else {              # found ending quote
28980            my $j;
28981            $found_target = 1;
28982
28983            my $tokj;
28984            for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
28985                $tokj = $$rtokens[$j];
28986
28987                # we have to remove any backslash before the quote character
28988                # so that the here-doc-target exactly matches this string
28989                next
28990                  if ( $tokj eq "\\"
28991                    && $j < $i - 1
28992                    && $$rtokens[ $j + 1 ] eq $here_quote_character );
28993                $here_doc_target .= $tokj;
28994            }
28995        }
28996    }
28997
28998    elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
28999        $found_target = 1;
29000        write_logfile_entry(
29001            "found blank here-target after <<; suggest using \"\"\n");
29002        $i = $ibeg;
29003    }
29004    elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
29005
29006        my $here_doc_expected;
29007        if ( $expecting == UNKNOWN ) {
29008            $here_doc_expected = guess_if_here_doc($next_token);
29009        }
29010        else {
29011            $here_doc_expected = 1;
29012        }
29013
29014        if ($here_doc_expected) {
29015            $found_target    = 1;
29016            $here_doc_target = $next_token;
29017            $i               = $ibeg + 1;
29018        }
29019
29020    }
29021    else {
29022
29023        if ( $expecting == TERM ) {
29024            $found_target = 1;
29025            write_logfile_entry("Note: bare here-doc operator <<\n");
29026        }
29027        else {
29028            $i = $ibeg;
29029        }
29030    }
29031
29032    # patch to neglect any prepended backslash
29033    if ( $found_target && $backslash ) { $i++ }
29034
29035    return ( $found_target, $here_doc_target, $here_quote_character, $i,
29036        $saw_error );
29037}
29038
29039sub do_quote {
29040
29041    # follow (or continue following) quoted string(s)
29042    # $in_quote return code:
29043    #   0 - ok, found end
29044    #   1 - still must find end of quote whose target is $quote_character
29045    #   2 - still looking for end of first of two quotes
29046    #
29047    # Returns updated strings:
29048    #  $quoted_string_1 = quoted string seen while in_quote=1
29049    #  $quoted_string_2 = quoted string seen while in_quote=2
29050    my (
29051        $i,               $in_quote,    $quote_character,
29052        $quote_pos,       $quote_depth, $quoted_string_1,
29053        $quoted_string_2, $rtokens,     $rtoken_map,
29054        $max_token_index
29055    ) = @_;
29056
29057    my $in_quote_starting = $in_quote;
29058
29059    my $quoted_string;
29060    if ( $in_quote == 2 ) {    # two quotes/quoted_string_1s to follow
29061        my $ibeg = $i;
29062        (
29063            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29064            $quoted_string
29065          )
29066          = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
29067            $quote_pos, $quote_depth, $max_token_index );
29068        $quoted_string_2 .= $quoted_string;
29069        if ( $in_quote == 1 ) {
29070            if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
29071            $quote_character = '';
29072        }
29073        else {
29074            $quoted_string_2 .= "\n";
29075        }
29076    }
29077
29078    if ( $in_quote == 1 ) {    # one (more) quote to follow
29079        my $ibeg = $i;
29080        (
29081            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29082            $quoted_string
29083          )
29084          = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
29085            $quote_pos, $quote_depth, $max_token_index );
29086        $quoted_string_1 .= $quoted_string;
29087        if ( $in_quote == 1 ) {
29088            $quoted_string_1 .= "\n";
29089        }
29090    }
29091    return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
29092        $quoted_string_1, $quoted_string_2 );
29093}
29094
29095sub follow_quoted_string {
29096
29097    # scan for a specific token, skipping escaped characters
29098    # if the quote character is blank, use the first non-blank character
29099    # input parameters:
29100    #   $rtokens = reference to the array of tokens
29101    #   $i = the token index of the first character to search
29102    #   $in_quote = number of quoted strings being followed
29103    #   $beginning_tok = the starting quote character
29104    #   $quote_pos = index to check next for alphanumeric delimiter
29105    # output parameters:
29106    #   $i = the token index of the ending quote character
29107    #   $in_quote = decremented if found end, unchanged if not
29108    #   $beginning_tok = the starting quote character
29109    #   $quote_pos = index to check next for alphanumeric delimiter
29110    #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
29111    #   $quoted_string = the text of the quote (without quotation tokens)
29112    my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
29113        $max_token_index )
29114      = @_;
29115    my ( $tok, $end_tok );
29116    my $i             = $i_beg - 1;
29117    my $quoted_string = "";
29118
29119    TOKENIZER_DEBUG_FLAG_QUOTE && do {
29120        print STDOUT
29121"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
29122    };
29123
29124    # get the corresponding end token
29125    if ( $beginning_tok !~ /^\s*$/ ) {
29126        $end_tok = matching_end_token($beginning_tok);
29127    }
29128
29129    # a blank token means we must find and use the first non-blank one
29130    else {
29131        my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
29132
29133        while ( $i < $max_token_index ) {
29134            $tok = $$rtokens[ ++$i ];
29135
29136            if ( $tok !~ /^\s*$/ ) {
29137
29138                if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
29139                    $i = $max_token_index;
29140                }
29141                else {
29142
29143                    if ( length($tok) > 1 ) {
29144                        if ( $quote_pos <= 0 ) { $quote_pos = 1 }
29145                        $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
29146                    }
29147                    else {
29148                        $beginning_tok = $tok;
29149                        $quote_pos     = 0;
29150                    }
29151                    $end_tok     = matching_end_token($beginning_tok);
29152                    $quote_depth = 1;
29153                    last;
29154                }
29155            }
29156            else {
29157                $allow_quote_comments = 1;
29158            }
29159        }
29160    }
29161
29162    # There are two different loops which search for the ending quote
29163    # character.  In the rare case of an alphanumeric quote delimiter, we
29164    # have to look through alphanumeric tokens character-by-character, since
29165    # the pre-tokenization process combines multiple alphanumeric
29166    # characters, whereas for a non-alphanumeric delimiter, only tokens of
29167    # length 1 can match.
29168
29169    ###################################################################
29170    # Case 1 (rare): loop for case of alphanumeric quote delimiter..
29171    # "quote_pos" is the position the current word to begin searching
29172    ###################################################################
29173    if ( $beginning_tok =~ /\w/ ) {
29174
29175        # Note this because it is not recommended practice except
29176        # for obfuscated perl contests
29177        if ( $in_quote == 1 ) {
29178            write_logfile_entry(
29179                "Note: alphanumeric quote delimiter ($beginning_tok) \n");
29180        }
29181
29182        while ( $i < $max_token_index ) {
29183
29184            if ( $quote_pos == 0 || ( $i < 0 ) ) {
29185                $tok = $$rtokens[ ++$i ];
29186
29187                if ( $tok eq '\\' ) {
29188
29189                    # retain backslash unless it hides the end token
29190                    $quoted_string .= $tok
29191                      unless $$rtokens[ $i + 1 ] eq $end_tok;
29192                    $quote_pos++;
29193                    last if ( $i >= $max_token_index );
29194                    $tok = $$rtokens[ ++$i ];
29195                }
29196            }
29197            my $old_pos = $quote_pos;
29198
29199            unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
29200            {
29201
29202            }
29203            $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
29204
29205            if ( $quote_pos > 0 ) {
29206
29207                $quoted_string .=
29208                  substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
29209
29210                $quote_depth--;
29211
29212                if ( $quote_depth == 0 ) {
29213                    $in_quote--;
29214                    last;
29215                }
29216            }
29217            else {
29218                $quoted_string .= substr( $tok, $old_pos );
29219            }
29220        }
29221    }
29222
29223    ########################################################################
29224    # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
29225    ########################################################################
29226    else {
29227
29228        while ( $i < $max_token_index ) {
29229            $tok = $$rtokens[ ++$i ];
29230
29231            if ( $tok eq $end_tok ) {
29232                $quote_depth--;
29233
29234                if ( $quote_depth == 0 ) {
29235                    $in_quote--;
29236                    last;
29237                }
29238            }
29239            elsif ( $tok eq $beginning_tok ) {
29240                $quote_depth++;
29241            }
29242            elsif ( $tok eq '\\' ) {
29243
29244                # retain backslash unless it hides the beginning or end token
29245                $tok = $$rtokens[ ++$i ];
29246                $quoted_string .= '\\'
29247                  unless ( $tok eq $end_tok || $tok eq $beginning_tok );
29248            }
29249            $quoted_string .= $tok;
29250        }
29251    }
29252    if ( $i > $max_token_index ) { $i = $max_token_index }
29253    return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
29254        $quoted_string );
29255}
29256
29257sub indicate_error {
29258    my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
29259    interrupt_logfile();
29260    warning($msg);
29261    write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
29262    resume_logfile();
29263}
29264
29265sub write_error_indicator_pair {
29266    my ( $line_number, $input_line, $pos, $carrat ) = @_;
29267    my ( $offset, $numbered_line, $underline ) =
29268      make_numbered_line( $line_number, $input_line, $pos );
29269    $underline = write_on_underline( $underline, $pos - $offset, $carrat );
29270    warning( $numbered_line . "\n" );
29271    $underline =~ s/\s*$//;
29272    warning( $underline . "\n" );
29273}
29274
29275sub make_numbered_line {
29276
29277    #  Given an input line, its line number, and a character position of
29278    #  interest, create a string not longer than 80 characters of the form
29279    #     $lineno: sub_string
29280    #  such that the sub_string of $str contains the position of interest
29281    #
29282    #  Here is an example of what we want, in this case we add trailing
29283    #  '...' because the line is long.
29284    #
29285    # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
29286    #
29287    #  Here is another example, this time in which we used leading '...'
29288    #  because of excessive length:
29289    #
29290    # 2: ... er of the World Wide Web Consortium's
29291    #
29292    #  input parameters are:
29293    #   $lineno = line number
29294    #   $str = the text of the line
29295    #   $pos = position of interest (the error) : 0 = first character
29296    #
29297    #   We return :
29298    #     - $offset = an offset which corrects the position in case we only
29299    #       display part of a line, such that $pos-$offset is the effective
29300    #       position from the start of the displayed line.
29301    #     - $numbered_line = the numbered line as above,
29302    #     - $underline = a blank 'underline' which is all spaces with the same
29303    #       number of characters as the numbered line.
29304
29305    my ( $lineno, $str, $pos ) = @_;
29306    my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
29307    my $excess = length($str) - $offset - 68;
29308    my $numc   = ( $excess > 0 ) ? 68 : undef;
29309
29310    if ( defined($numc) ) {
29311        if ( $offset == 0 ) {
29312            $str = substr( $str, $offset, $numc - 4 ) . " ...";
29313        }
29314        else {
29315            $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
29316        }
29317    }
29318    else {
29319
29320        if ( $offset == 0 ) {
29321        }
29322        else {
29323            $str = "... " . substr( $str, $offset + 4 );
29324        }
29325    }
29326
29327    my $numbered_line = sprintf( "%d: ", $lineno );
29328    $offset -= length($numbered_line);
29329    $numbered_line .= $str;
29330    my $underline = " " x length($numbered_line);
29331    return ( $offset, $numbered_line, $underline );
29332}
29333
29334sub write_on_underline {
29335
29336    # The "underline" is a string that shows where an error is; it starts
29337    # out as a string of blanks with the same length as the numbered line of
29338    # code above it, and we have to add marking to show where an error is.
29339    # In the example below, we want to write the string '--^' just below
29340    # the line of bad code:
29341    #
29342    # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
29343    #                 ---^
29344    # We are given the current underline string, plus a position and a
29345    # string to write on it.
29346    #
29347    # In the above example, there will be 2 calls to do this:
29348    # First call:  $pos=19, pos_chr=^
29349    # Second call: $pos=16, pos_chr=---
29350    #
29351    # This is a trivial thing to do with substr, but there is some
29352    # checking to do.
29353
29354    my ( $underline, $pos, $pos_chr ) = @_;
29355
29356    # check for error..shouldn't happen
29357    unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
29358        return $underline;
29359    }
29360    my $excess = length($pos_chr) + $pos - length($underline);
29361    if ( $excess > 0 ) {
29362        $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
29363    }
29364    substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
29365    return ($underline);
29366}
29367
29368sub pre_tokenize {
29369
29370    # Break a string, $str, into a sequence of preliminary tokens.  We
29371    # are interested in these types of tokens:
29372    #   words       (type='w'),            example: 'max_tokens_wanted'
29373    #   digits      (type = 'd'),          example: '0755'
29374    #   whitespace  (type = 'b'),          example: '   '
29375    #   any other single character (i.e. punct; type = the character itself).
29376    # We cannot do better than this yet because we might be in a quoted
29377    # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
29378    # tokens.
29379    my ( $str, $max_tokens_wanted ) = @_;
29380
29381    # we return references to these 3 arrays:
29382    my @tokens    = ();     # array of the tokens themselves
29383    my @token_map = (0);    # string position of start of each token
29384    my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
29385
29386    do {
29387
29388        # whitespace
29389        if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
29390
29391        # numbers
29392        # note that this must come before words!
29393        elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
29394
29395        # words
29396        elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
29397
29398        # single-character punctuation
29399        elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
29400
29401        # that's all..
29402        else {
29403            return ( \@tokens, \@token_map, \@type );
29404        }
29405
29406        push @tokens,    $1;
29407        push @token_map, pos($str);
29408
29409    } while ( --$max_tokens_wanted != 0 );
29410
29411    return ( \@tokens, \@token_map, \@type );
29412}
29413
29414sub show_tokens {
29415
29416    # this is an old debug routine
29417    my ( $rtokens, $rtoken_map ) = @_;
29418    my $num = scalar(@$rtokens);
29419    my $i;
29420
29421    for ( $i = 0 ; $i < $num ; $i++ ) {
29422        my $len = length( $$rtokens[$i] );
29423        print STDOUT "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
29424    }
29425}
29426
29427sub matching_end_token {
29428
29429    # find closing character for a pattern
29430    my $beginning_token = shift;
29431
29432    if ( $beginning_token eq '{' ) {
29433        '}';
29434    }
29435    elsif ( $beginning_token eq '[' ) {
29436        ']';
29437    }
29438    elsif ( $beginning_token eq '<' ) {
29439        '>';
29440    }
29441    elsif ( $beginning_token eq '(' ) {
29442        ')';
29443    }
29444    else {
29445        $beginning_token;
29446    }
29447}
29448
29449sub dump_token_types {
29450    my $class = shift;
29451    my $fh    = shift;
29452
29453    # This should be the latest list of token types in use
29454    # adding NEW_TOKENS: add a comment here
29455    print $fh <<'END_OF_LIST';
29456
29457Here is a list of the token types currently used for lines of type 'CODE'.
29458For the following tokens, the "type" of a token is just the token itself.
29459
29460.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
29461( ) <= >= == =~ !~ != ++ -- /= x=
29462... **= <<= >>= &&= ||= //= <=>
29463, + - / * | % ! x ~ = \ ? : . < > ^ &
29464
29465The following additional token types are defined:
29466
29467 type    meaning
29468    b    blank (white space)
29469    {    indent: opening structural curly brace or square bracket or paren
29470         (code block, anonymous hash reference, or anonymous array reference)
29471    }    outdent: right structural curly brace or square bracket or paren
29472    [    left non-structural square bracket (enclosing an array index)
29473    ]    right non-structural square bracket
29474    (    left non-structural paren (all but a list right of an =)
29475    )    right non-structural paren
29476    L    left non-structural curly brace (enclosing a key)
29477    R    right non-structural curly brace
29478    ;    terminal semicolon
29479    f    indicates a semicolon in a "for" statement
29480    h    here_doc operator <<
29481    #    a comment
29482    Q    indicates a quote or pattern
29483    q    indicates a qw quote block
29484    k    a perl keyword
29485    C    user-defined constant or constant function (with void prototype = ())
29486    U    user-defined function taking parameters
29487    G    user-defined function taking block parameter (like grep/map/eval)
29488    M    (unused, but reserved for subroutine definition name)
29489    P    (unused, but -html uses it to label pod text)
29490    t    type indicater such as %,$,@,*,&,sub
29491    w    bare word (perhaps a subroutine call)
29492    i    identifier of some type (with leading %, $, @, *, &, sub, -> )
29493    n    a number
29494    v    a v-string
29495    F    a file test operator (like -e)
29496    Y    File handle
29497    Z    identifier in indirect object slot: may be file handle, object
29498    J    LABEL:  code block label
29499    j    LABEL after next, last, redo, goto
29500    p    unary +
29501    m    unary -
29502    pp   pre-increment operator ++
29503    mm   pre-decrement operator --
29504    A    : used as attribute separator
29505
29506    Here are the '_line_type' codes used internally:
29507    SYSTEM         - system-specific code before hash-bang line
29508    CODE           - line of perl code (including comments)
29509    POD_START      - line starting pod, such as '=head'
29510    POD            - pod documentation text
29511    POD_END        - last line of pod section, '=cut'
29512    HERE           - text of here-document
29513    HERE_END       - last line of here-doc (target word)
29514    FORMAT         - format section
29515    FORMAT_END     - last line of format section, '.'
29516    DATA_START     - __DATA__ line
29517    DATA           - unidentified text following __DATA__
29518    END_START      - __END__ line
29519    END            - unidentified text following __END__
29520    ERROR          - we are in big trouble, probably not a perl script
29521END_OF_LIST
29522}
29523
29524BEGIN {
29525
29526    # These names are used in error messages
29527    @opening_brace_names = qw# '{' '[' '(' '?' #;
29528    @closing_brace_names = qw# '}' ']' ')' ':' #;
29529
29530    my @digraphs = qw(
29531      .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
29532      <= >= == =~ !~ != ++ -- /= x= ~~
29533    );
29534    @is_digraph{@digraphs} = (1) x scalar(@digraphs);
29535
29536    my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
29537    @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
29538
29539    # make a hash of all valid token types for self-checking the tokenizer
29540    # (adding NEW_TOKENS : select a new character and add to this list)
29541    my @valid_token_types = qw#
29542      A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
29543      { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
29544      #;
29545    push( @valid_token_types, @digraphs );
29546    push( @valid_token_types, @trigraphs );
29547    push( @valid_token_types, ( '#', ',', 'CORE::' ) );
29548    @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
29549
29550    # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
29551    my @file_test_operators =
29552      qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
29553    @is_file_test_operator{@file_test_operators} =
29554      (1) x scalar(@file_test_operators);
29555
29556    # these functions have prototypes of the form (&), so when they are
29557    # followed by a block, that block MAY BE followed by an operator.
29558    # Smartmatch operator ~~ may be followed by anonomous hash or array ref
29559    @_ = qw( do eval );
29560    @is_block_operator{@_} = (1) x scalar(@_);
29561
29562    # these functions allow an identifier in the indirect object slot
29563    @_ = qw( print printf sort exec system say);
29564    @is_indirect_object_taker{@_} = (1) x scalar(@_);
29565
29566    # These tokens may precede a code block
29567    # patched for SWITCH/CASE
29568    @_ =
29569      qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
29570      unless do while until eval for foreach map grep sort
29571      switch case given when);
29572    @is_code_block_token{@_} = (1) x scalar(@_);
29573
29574    # I'll build the list of keywords incrementally
29575    my @Keywords = ();
29576
29577    # keywords and tokens after which a value or pattern is expected,
29578    # but not an operator.  In other words, these should consume terms
29579    # to their right, or at least they are not expected to be followed
29580    # immediately by operators.
29581    my @value_requestor = qw(
29582      AUTOLOAD
29583      BEGIN
29584      CHECK
29585      DESTROY
29586      END
29587      EQ
29588      GE
29589      GT
29590      INIT
29591      LE
29592      LT
29593      NE
29594      UNITCHECK
29595      abs
29596      accept
29597      alarm
29598      and
29599      atan2
29600      bind
29601      binmode
29602      bless
29603      break
29604      caller
29605      chdir
29606      chmod
29607      chomp
29608      chop
29609      chown
29610      chr
29611      chroot
29612      close
29613      closedir
29614      cmp
29615      connect
29616      continue
29617      cos
29618      crypt
29619      dbmclose
29620      dbmopen
29621      defined
29622      delete
29623      die
29624      dump
29625      each
29626      else
29627      elsif
29628      eof
29629      eq
29630      exec
29631      exists
29632      exit
29633      exp
29634      fcntl
29635      fileno
29636      flock
29637      for
29638      foreach
29639      formline
29640      ge
29641      getc
29642      getgrgid
29643      getgrnam
29644      gethostbyaddr
29645      gethostbyname
29646      getnetbyaddr
29647      getnetbyname
29648      getpeername
29649      getpgrp
29650      getpriority
29651      getprotobyname
29652      getprotobynumber
29653      getpwnam
29654      getpwuid
29655      getservbyname
29656      getservbyport
29657      getsockname
29658      getsockopt
29659      glob
29660      gmtime
29661      goto
29662      grep
29663      gt
29664      hex
29665      if
29666      index
29667      int
29668      ioctl
29669      join
29670      keys
29671      kill
29672      last
29673      lc
29674      lcfirst
29675      le
29676      length
29677      link
29678      listen
29679      local
29680      localtime
29681      lock
29682      log
29683      lstat
29684      lt
29685      map
29686      mkdir
29687      msgctl
29688      msgget
29689      msgrcv
29690      msgsnd
29691      my
29692      ne
29693      next
29694      no
29695      not
29696      oct
29697      open
29698      opendir
29699      or
29700      ord
29701      our
29702      pack
29703      pipe
29704      pop
29705      pos
29706      print
29707      printf
29708      prototype
29709      push
29710      quotemeta
29711      rand
29712      read
29713      readdir
29714      readlink
29715      readline
29716      readpipe
29717      recv
29718      redo
29719      ref
29720      rename
29721      require
29722      reset
29723      return
29724      reverse
29725      rewinddir
29726      rindex
29727      rmdir
29728      scalar
29729      seek
29730      seekdir
29731      select
29732      semctl
29733      semget
29734      semop
29735      send
29736      sethostent
29737      setnetent
29738      setpgrp
29739      setpriority
29740      setprotoent
29741      setservent
29742      setsockopt
29743      shift
29744      shmctl
29745      shmget
29746      shmread
29747      shmwrite
29748      shutdown
29749      sin
29750      sleep
29751      socket
29752      socketpair
29753      sort
29754      splice
29755      split
29756      sprintf
29757      sqrt
29758      srand
29759      stat
29760      study
29761      substr
29762      symlink
29763      syscall
29764      sysopen
29765      sysread
29766      sysseek
29767      system
29768      syswrite
29769      tell
29770      telldir
29771      tie
29772      tied
29773      truncate
29774      uc
29775      ucfirst
29776      umask
29777      undef
29778      unless
29779      unlink
29780      unpack
29781      unshift
29782      untie
29783      until
29784      use
29785      utime
29786      values
29787      vec
29788      waitpid
29789      warn
29790      while
29791      write
29792      xor
29793
29794      switch
29795      case
29796      given
29797      when
29798      err
29799      say
29800    );
29801
29802    # patched above for SWITCH/CASE given/when err say
29803    # 'err' is a fairly safe addition.
29804    # TODO: 'default' still needed if appropriate
29805    # 'use feature' seen, but perltidy works ok without it.
29806    # Concerned that 'default' could break code.
29807    push( @Keywords, @value_requestor );
29808
29809    # These are treated the same but are not keywords:
29810    my @extra_vr = qw(
29811      constant
29812      vars
29813    );
29814    push( @value_requestor, @extra_vr );
29815
29816    @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
29817
29818    # this list contains keywords which do not look for arguments,
29819    # so that they might be followed by an operator, or at least
29820    # not a term.
29821    my @operator_requestor = qw(
29822      endgrent
29823      endhostent
29824      endnetent
29825      endprotoent
29826      endpwent
29827      endservent
29828      fork
29829      getgrent
29830      gethostent
29831      getlogin
29832      getnetent
29833      getppid
29834      getprotoent
29835      getpwent
29836      getservent
29837      setgrent
29838      setpwent
29839      time
29840      times
29841      wait
29842      wantarray
29843    );
29844
29845    push( @Keywords, @operator_requestor );
29846
29847    # These are treated the same but are not considered keywords:
29848    my @extra_or = qw(
29849      STDERR
29850      STDIN
29851      STDOUT
29852    );
29853
29854    push( @operator_requestor, @extra_or );
29855
29856    @expecting_operator_token{@operator_requestor} =
29857      (1) x scalar(@operator_requestor);
29858
29859    # these token TYPES expect trailing operator but not a term
29860    # note: ++ and -- are post-increment and decrement, 'C' = constant
29861    my @operator_requestor_types = qw( ++ -- C <> q );
29862    @expecting_operator_types{@operator_requestor_types} =
29863      (1) x scalar(@operator_requestor_types);
29864
29865    # these token TYPES consume values (terms)
29866    # note: pp and mm are pre-increment and decrement
29867    # f=semicolon in for,  F=file test operator
29868    my @value_requestor_type = qw#
29869      L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
29870      **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
29871      <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
29872      f F pp mm Y p m U J G j >> << ^ t
29873      #;
29874    push( @value_requestor_type, ',' )
29875      ;    # (perl doesn't like a ',' in a qw block)
29876    @expecting_term_types{@value_requestor_type} =
29877      (1) x scalar(@value_requestor_type);
29878
29879    # Note: the following valid token types are not assigned here to
29880    # hashes requesting to be followed by values or terms, but are
29881    # instead currently hard-coded into sub operator_expected:
29882    # ) -> :: Q R Z ] b h i k n v w } #
29883
29884    # For simple syntax checking, it is nice to have a list of operators which
29885    # will really be unhappy if not followed by a term.  This includes most
29886    # of the above...
29887    %really_want_term = %expecting_term_types;
29888
29889    # with these exceptions...
29890    delete $really_want_term{'U'}; # user sub, depends on prototype
29891    delete $really_want_term{'F'}; # file test works on $_ if no following term
29892    delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
29893                                   # let perl do it
29894
29895    @_ = qw(q qq qw qx qr s y tr m);
29896    @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
29897
29898    # These keywords are handled specially in the tokenizer code:
29899    my @special_keywords = qw(
29900      do
29901      eval
29902      format
29903      m
29904      package
29905      q
29906      qq
29907      qr
29908      qw
29909      qx
29910      s
29911      sub
29912      tr
29913      y
29914    );
29915    push( @Keywords, @special_keywords );
29916
29917    # Keywords after which list formatting may be used
29918    # WARNING: do not include |map|grep|eval or perl may die on
29919    # syntax errors (map1.t).
29920    my @keyword_taking_list = qw(
29921      and
29922      chmod
29923      chomp
29924      chop
29925      chown
29926      dbmopen
29927      die
29928      elsif
29929      exec
29930      fcntl
29931      for
29932      foreach
29933      formline
29934      getsockopt
29935      if
29936      index
29937      ioctl
29938      join
29939      kill
29940      local
29941      msgctl
29942      msgrcv
29943      msgsnd
29944      my
29945      open
29946      or
29947      our
29948      pack
29949      print
29950      printf
29951      push
29952      read
29953      readpipe
29954      recv
29955      return
29956      reverse
29957      rindex
29958      seek
29959      select
29960      semctl
29961      semget
29962      send
29963      setpriority
29964      setsockopt
29965      shmctl
29966      shmget
29967      shmread
29968      shmwrite
29969      socket
29970      socketpair
29971      sort
29972      splice
29973      split
29974      sprintf
29975      substr
29976      syscall
29977      sysopen
29978      sysread
29979      sysseek
29980      system
29981      syswrite
29982      tie
29983      unless
29984      unlink
29985      unpack
29986      unshift
29987      until
29988      vec
29989      warn
29990      while
29991      given
29992      when
29993    );
29994    @is_keyword_taking_list{@keyword_taking_list} =
29995      (1) x scalar(@keyword_taking_list);
29996
29997    # These are not used in any way yet
29998    #    my @unused_keywords = qw(
29999    #     __FILE__
30000    #     __LINE__
30001    #     __PACKAGE__
30002    #     );
30003
30004    #  The list of keywords was originally extracted from function 'keyword' in
30005    #  perl file toke.c version 5.005.03, using this utility, plus a
30006    #  little editing: (file getkwd.pl):
30007    #  while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
30008    #  Add 'get' prefix where necessary, then split into the above lists.
30009    #  This list should be updated as necessary.
30010    #  The list should not contain these special variables:
30011    #  ARGV DATA ENV SIG STDERR STDIN STDOUT
30012    #  __DATA__ __END__
30013
30014    @is_keyword{@Keywords} = (1) x scalar(@Keywords);
30015}
300161;
30017__END__
30018
30019=head1 NAME
30020
30021Perl::Tidy - Parses and beautifies perl source
30022
30023=head1 SYNOPSIS
30024
30025    use Perl::Tidy;
30026
30027    my $error_flag = Perl::Tidy::perltidy(
30028        source            => $source,
30029        destination       => $destination,
30030        stderr            => $stderr,
30031        argv              => $argv,
30032        perltidyrc        => $perltidyrc,
30033        logfile           => $logfile,
30034        errorfile         => $errorfile,
30035        formatter         => $formatter,           # callback object (see below)
30036        dump_options      => $dump_options,
30037        dump_options_type => $dump_options_type,
30038        prefilter         => $prefilter_coderef,
30039        postfilter        => $postfilter_coderef,
30040    );
30041
30042=head1 DESCRIPTION
30043
30044This module makes the functionality of the perltidy utility available to perl
30045scripts.  Any or all of the input parameters may be omitted, in which case the
30046@ARGV array will be used to provide input parameters as described
30047in the perltidy(1) man page.
30048
30049For example, the perltidy script is basically just this:
30050
30051    use Perl::Tidy;
30052    Perl::Tidy::perltidy();
30053
30054The call to B<perltidy> returns a scalar B<$error_flag> which is TRUE if an
30055error caused premature termination, and FALSE if the process ran to normal
30056completion.  Additional discuss of errors is contained below in the L<ERROR
30057HANDLING> section.
30058
30059The module accepts input and output streams by a variety of methods.
30060The following list of parameters may be any of the following: a
30061filename, an ARRAY reference, a SCALAR reference, or an object with
30062either a B<getline> or B<print> method, as appropriate.
30063
30064        source            - the source of the script to be formatted
30065        destination       - the destination of the formatted output
30066        stderr            - standard error output
30067        perltidyrc        - the .perltidyrc file
30068        logfile           - the .LOG file stream, if any
30069        errorfile         - the .ERR file stream, if any
30070        dump_options      - ref to a hash to receive parameters (see below),
30071        dump_options_type - controls contents of dump_options
30072        dump_getopt_flags - ref to a hash to receive Getopt flags
30073        dump_options_category - ref to a hash giving category of options
30074        dump_abbreviations    - ref to a hash giving all abbreviations
30075
30076The following chart illustrates the logic used to decide how to
30077treat a parameter.
30078
30079   ref($param)  $param is assumed to be:
30080   -----------  ---------------------
30081   undef        a filename
30082   SCALAR       ref to string
30083   ARRAY        ref to array
30084   (other)      object with getline (if source) or print method
30085
30086If the parameter is an object, and the object has a B<close> method, that
30087close method will be called at the end of the stream.
30088
30089=over 4
30090
30091=item source
30092
30093If the B<source> parameter is given, it defines the source of the input stream.
30094If an input stream is defined with the B<source> parameter then no other source
30095filenames may be specified in the @ARGV array or B<argv> parameter.
30096
30097=item destination
30098
30099If the B<destination> parameter is given, it will be used to define the
30100file or memory location to receive output of perltidy.
30101
30102=item stderr
30103
30104The B<stderr> parameter allows the calling program to redirect the stream that
30105would otherwise go to the standard error output device to any of the stream
30106types listed above.  This stream contains important warnings and errors
30107related to the parameters passed to perltidy.
30108
30109=item perltidyrc
30110
30111If the B<perltidyrc> file is given, it will be used instead of any
30112F<.perltidyrc> configuration file that would otherwise be used.
30113
30114=item errorfile
30115
30116The B<errorfile> parameter allows the calling program to capture
30117the stream that would otherwise go to either a .ERR file.  This
30118stream contains warnings or errors related to the contents of one
30119source file or stream.
30120
30121The reason that this is different from the stderr stream is that when perltidy
30122is called to process multiple files there will be up to one .ERR file created
30123for each file and it would be very confusing if they were combined.
30124
30125However if perltidy is called to process just a single perl script then it may
30126be more conveninent to combine the B<errorfile> stream with the B<stderr>
30127stream.  This can be done by setting the B<-se> parameter, in which case this
30128parameter is ignored.
30129
30130=item logfile
30131
30132The B<logfile> parameter allows the calling program to capture
30133the stream that would otherwise go to a .LOG file.  This
30134stream is only created if requested with a B<-g> parameter.  It
30135contains detailed diagnostic information about a script
30136which may be useful for debugging.
30137
30138=item argv
30139
30140If the B<argv> parameter is given, it will be used instead of the
30141B<@ARGV> array.  The B<argv> parameter may be a string, a reference to a
30142string, or a reference to an array.  If it is a string or reference to a
30143string, it will be parsed into an array of items just as if it were a
30144command line string.
30145
30146=item dump_options
30147
30148If the B<dump_options> parameter is given, it must be the reference to a hash.
30149In this case, the parameters contained in any perltidyrc configuration file
30150will be placed in this hash and perltidy will return immediately.  This is
30151equivalent to running perltidy with --dump-options, except that the perameters
30152are returned in a hash rather than dumped to standard output.  Also, by default
30153only the parameters in the perltidyrc file are returned, but this can be
30154changed (see the next parameter).  This parameter provides a convenient method
30155for external programs to read a perltidyrc file.  An example program using
30156this feature, F<perltidyrc_dump.pl>, is included in the distribution.
30157
30158Any combination of the B<dump_> parameters may be used together.
30159
30160=item dump_options_type
30161
30162This parameter is a string which can be used to control the parameters placed
30163in the hash reference supplied by B<dump_options>.  The possible values are
30164'perltidyrc' (default) and 'full'.  The 'full' parameter causes both the
30165default options plus any options found in a perltidyrc file to be returned.
30166
30167=item dump_getopt_flags
30168
30169If the B<dump_getopt_flags> parameter is given, it must be the reference to a
30170hash.  This hash will receive all of the parameters that perltidy understands
30171and flags that are passed to Getopt::Long.  This parameter may be
30172used alone or with the B<dump_options> flag.  Perltidy will
30173exit immediately after filling this hash.  See the demo program
30174F<perltidyrc_dump.pl> for example usage.
30175
30176=item dump_options_category
30177
30178If the B<dump_options_category> parameter is given, it must be the reference to a
30179hash.  This hash will receive a hash with keys equal to all long parameter names
30180and values equal to the title of the corresponding section of the perltidy manual.
30181See the demo program F<perltidyrc_dump.pl> for example usage.
30182
30183=item dump_abbreviations
30184
30185If the B<dump_abbreviations> parameter is given, it must be the reference to a
30186hash.  This hash will receive all abbreviations used by Perl::Tidy.  See the
30187demo program F<perltidyrc_dump.pl> for example usage.
30188
30189=item prefilter
30190
30191A code reference that will be applied to the source before tidying. It is
30192expected to take the full content as a string in its input, and output the
30193transformed content.
30194
30195=item postfilter
30196
30197A code reference that will be applied to the tidied result before outputting.
30198It is expected to take the full content as a string in its input, and output
30199the transformed content.
30200
30201Note: A convenient way to check the function of your custom prefilter and
30202postfilter code is to use the --notidy option, first with just the prefilter
30203and then with both the prefilter and postfilter.  See also the file
30204B<filter_example.pl> in the perltidy distribution.
30205
30206=back
30207
30208=head1 ERROR HANDLING
30209
30210Perltidy will return with an error flag indicating if the process had to be
30211terminated early due to errors in the input parameters.  This can happen for
30212example if a parameter is misspelled or given an invalid value.  The calling
30213program should check this flag because if it is set the destination stream will
30214be empty or incomplete and should be ignored.  Error messages in the B<stderr>
30215stream will indicate the cause of any problem.
30216
30217If the error flag is not set then perltidy ran to completion.   However there
30218may still be warning messages in the B<stderr> stream related to control
30219parameters, and there may be warning messages in the B<errorfile> stream
30220relating to possible syntax errors in the source code being tidied.
30221
30222In the event of a catastrophic error for which recovery is not possible
30223B<perltidy> terminates by making calls to B<croak> or B<confess> to help the
30224programmer localize the problem.  These should normally only occur during
30225program development.
30226
30227=head1 NOTES ON FORMATTING PARAMETERS
30228
30229Parameters which control formatting may be passed in several ways: in a
30230F<.perltidyrc> configuration file, in the B<perltidyrc> parameter, and in the
30231B<argv> parameter.
30232
30233The B<-syn> (B<--check-syntax>) flag may be used with all source and
30234destination streams except for standard input and output.  However
30235data streams which are not associated with a filename will
30236be copied to a temporary file before being be passed to Perl.  This
30237use of temporary files can cause somewhat confusing output from Perl.
30238
30239If the B<-pbp> style is used it will typically be necessary to also
30240specify a B<-nst> flag.  This is necessary to turn off the B<-st> flag
30241contained in the B<-pbp> parameter set which otherwise would direct
30242the output stream to the standard output.
30243
30244=head1 EXAMPLES
30245
30246The following example uses string references to hold the input and output
30247code and error streams, and illustrates checking for errors.
30248
30249  use Perl::Tidy;
30250
30251  my $source_string = <<'EOT';
30252  my$error=Perl::Tidy::perltidy(argv=>$argv,source=>\$source_string,
30253    destination=>\$dest_string,stderr=>\$stderr_string,
30254  errorfile=>\$errorfile_string,);
30255  EOT
30256
30257  my $dest_string;
30258  my $stderr_string;
30259  my $errorfile_string;
30260  my $argv = "-npro";   # Ignore any .perltidyrc at this site
30261  $argv .= " -pbp";     # Format according to perl best practices
30262  $argv .= " -nst";     # Must turn off -st in case -pbp is specified
30263  $argv .= " -se";      # -se appends the errorfile to stderr
30264  ## $argv .= " --spell-check";  # uncomment to trigger an error
30265
30266  print "<<RAW SOURCE>>\n$source_string\n";
30267
30268  my $error = Perl::Tidy::perltidy(
30269      argv        => $argv,
30270      source      => \$source_string,
30271      destination => \$dest_string,
30272      stderr      => \$stderr_string,
30273      errorfile   => \$errorfile_string,    # ignored when -se flag is set
30274      ##phasers   => 'stun',                # uncomment to trigger an error
30275  );
30276
30277  if ($error) {
30278
30279      # serious error in input parameters, no tidied output
30280      print "<<STDERR>>\n$stderr_string\n";
30281      die "Exiting because of serious errors\n";
30282  }
30283
30284  if ($dest_string)      { print "<<TIDIED SOURCE>>\n$dest_string\n" }
30285  if ($stderr_string)    { print "<<STDERR>>\n$stderr_string\n" }
30286  if ($errorfile_string) { print "<<.ERR file>>\n$errorfile_string\n" }
30287
30288Additional examples are given in examples section of the perltidy distribution.
30289
30290=head1 Using the B<formatter> Callback Object
30291
30292The B<formatter> parameter is an optional callback object which allows
30293the calling program to receive tokenized lines directly from perltidy for
30294further specialized processing.  When this parameter is used, the two
30295formatting options which are built into perltidy (beautification or
30296html) are ignored.  The following diagram illustrates the logical flow:
30297
30298                    |-- (normal route)   -> code beautification
30299  caller->perltidy->|-- (-html flag )    -> create html
30300                    |-- (formatter given)-> callback to write_line
30301
30302This can be useful for processing perl scripts in some way.  The
30303parameter C<$formatter> in the perltidy call,
30304
30305        formatter   => $formatter,
30306
30307is an object created by the caller with a C<write_line> method which
30308will accept and process tokenized lines, one line per call.  Here is
30309a simple example of a C<write_line> which merely prints the line number,
30310the line type (as determined by perltidy), and the text of the line:
30311
30312 sub write_line {
30313
30314     # This is called from perltidy line-by-line
30315     my $self              = shift;
30316     my $line_of_tokens    = shift;
30317     my $line_type         = $line_of_tokens->{_line_type};
30318     my $input_line_number = $line_of_tokens->{_line_number};
30319     my $input_line        = $line_of_tokens->{_line_text};
30320     print "$input_line_number:$line_type:$input_line";
30321 }
30322
30323The complete program, B<perllinetype>, is contained in the examples section of
30324the source distribution.  As this example shows, the callback method
30325receives a parameter B<$line_of_tokens>, which is a reference to a hash
30326of other useful information.  This example uses these hash entries:
30327
30328 $line_of_tokens->{_line_number} - the line number (1,2,...)
30329 $line_of_tokens->{_line_text}   - the text of the line
30330 $line_of_tokens->{_line_type}   - the type of the line, one of:
30331
30332    SYSTEM         - system-specific code before hash-bang line
30333    CODE           - line of perl code (including comments)
30334    POD_START      - line starting pod, such as '=head'
30335    POD            - pod documentation text
30336    POD_END        - last line of pod section, '=cut'
30337    HERE           - text of here-document
30338    HERE_END       - last line of here-doc (target word)
30339    FORMAT         - format section
30340    FORMAT_END     - last line of format section, '.'
30341    DATA_START     - __DATA__ line
30342    DATA           - unidentified text following __DATA__
30343    END_START      - __END__ line
30344    END            - unidentified text following __END__
30345    ERROR          - we are in big trouble, probably not a perl script
30346
30347Most applications will be only interested in lines of type B<CODE>.  For
30348another example, let's write a program which checks for one of the
30349so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
30350can slow down processing.  Here is a B<write_line>, from the example
30351program B<find_naughty.pl>, which does that:
30352
30353 sub write_line {
30354
30355     # This is called back from perltidy line-by-line
30356     # We're looking for $`, $&, and $'
30357     my ( $self, $line_of_tokens ) = @_;
30358
30359     # pull out some stuff we might need
30360     my $line_type         = $line_of_tokens->{_line_type};
30361     my $input_line_number = $line_of_tokens->{_line_number};
30362     my $input_line        = $line_of_tokens->{_line_text};
30363     my $rtoken_type       = $line_of_tokens->{_rtoken_type};
30364     my $rtokens           = $line_of_tokens->{_rtokens};
30365     chomp $input_line;
30366
30367     # skip comments, pod, etc
30368     return if ( $line_type ne 'CODE' );
30369
30370     # loop over tokens looking for $`, $&, and $'
30371     for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
30372
30373         # we only want to examine token types 'i' (identifier)
30374         next unless $$rtoken_type[$j] eq 'i';
30375
30376         # pull out the actual token text
30377         my $token = $$rtokens[$j];
30378
30379         # and check it
30380         if ( $token =~ /^\$[\`\&\']$/ ) {
30381             print STDERR
30382               "$input_line_number: $token\n";
30383         }
30384     }
30385 }
30386
30387This example pulls out these tokenization variables from the $line_of_tokens
30388hash reference:
30389
30390     $rtoken_type = $line_of_tokens->{_rtoken_type};
30391     $rtokens     = $line_of_tokens->{_rtokens};
30392
30393The variable C<$rtoken_type> is a reference to an array of token type codes,
30394and C<$rtokens> is a reference to a corresponding array of token text.
30395These are obviously only defined for lines of type B<CODE>.
30396Perltidy classifies tokens into types, and has a brief code for each type.
30397You can get a complete list at any time by running perltidy from the
30398command line with
30399
30400     perltidy --dump-token-types
30401
30402In the present example, we are only looking for tokens of type B<i>
30403(identifiers), so the for loop skips past all other types.  When an
30404identifier is found, its actual text is checked to see if it is one
30405being sought.  If so, the above write_line prints the token and its
30406line number.
30407
30408The B<formatter> feature is relatively new in perltidy, and further
30409documentation needs to be written to complete its description.  However,
30410several example programs have been written and can be found in the
30411B<examples> section of the source distribution.  Probably the best way
30412to get started is to find one of the examples which most closely matches
30413your application and start modifying it.
30414
30415For help with perltidy's pecular way of breaking lines into tokens, you
30416might run, from the command line,
30417
30418 perltidy -D filename
30419
30420where F<filename> is a short script of interest.  This will produce
30421F<filename.DEBUG> with interleaved lines of text and their token types.
30422The B<-D> flag has been in perltidy from the beginning for this purpose.
30423If you want to see the code which creates this file, it is
30424C<write_debug_entry> in Tidy.pm.
30425
30426=head1 EXPORT
30427
30428  &perltidy
30429
30430=head1 CREDITS
30431
30432Thanks to Hugh Myers who developed the initial modular interface
30433to perltidy.
30434
30435=head1 VERSION
30436
30437This man page documents Perl::Tidy version 20121207.
30438
30439=head1 LICENSE
30440
30441This package is free software; you can redistribute it and/or modify it
30442under the terms of the "GNU General Public License".
30443
30444Please refer to the file "COPYING" for details.
30445
30446=head1 AUTHOR
30447
30448 Steve Hancock
30449 perltidy at users.sourceforge.net
30450
30451=head1 SEE ALSO
30452
30453The perltidy(1) man page describes all of the features of perltidy.  It
30454can be found at http://perltidy.sourceforge.net.
30455
30456=cut
30457