1# 2############################################################ 3# 4# perltidy - a perl script indenter and formatter 5# 6# Copyright (c) 2000-2012 by Steve Hancock 7# Distributed under the GPL license agreement; see file COPYING 8# 9# This program is free software; you can redistribute it and/or modify 10# it under the terms of the GNU General Public License as published by 11# the Free Software Foundation; either version 2 of the License, or 12# (at your option) any later version. 13# 14# This program is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17# GNU General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License along 20# with this program; if not, write to the Free Software Foundation, Inc., 21# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 22# 23# For brief instructions instructions, try 'perltidy -h'. 24# For more complete documentation, try 'man perltidy' 25# or visit http://perltidy.sourceforge.net 26# 27# This script is an example of the default style. It was formatted with: 28# 29# perltidy Tidy.pm 30# 31# Code Contributions: See ChangeLog.html for a complete history. 32# Michael Cartmell supplied code for adaptation to VMS and helped with 33# v-strings. 34# Hugh S. Myers supplied sub streamhandle and the supporting code to 35# create a Perl::Tidy module which can operate on strings, arrays, etc. 36# Yves Orton supplied coding to help detect Windows versions. 37# Axel Rose supplied a patch for MacPerl. 38# Sebastien Aperghis-Tramoni supplied a patch for the defined or operator. 39# Dan Tyrell contributed a patch for binary I/O. 40# Ueli Hugenschmidt contributed a patch for -fpsc 41# Sam Kington supplied a patch to identify the initial indentation of 42# entabbed code. 43# jonathan swartz supplied patches for: 44# * .../ pattern, which looks upwards from directory 45# * --notidy, to be used in directories where we want to avoid 46# accidentally tidying 47# * prefilter and postfilter 48# * iterations option 49# 50# Many others have supplied key ideas, suggestions, and bug reports; 51# see the CHANGES file. 52# 53############################################################ 54 55package Perl::Tidy; 56use 5.004; # need IO::File from 5.004 or later 57BEGIN { $^W = 1; } # turn on warnings 58 59use strict; 60use Exporter; 61use Carp; 62$|++; 63 64use vars qw{ 65 $VERSION 66 @ISA 67 @EXPORT 68 $missing_file_spec 69 $fh_stderr 70}; 71 72@ISA = qw( Exporter ); 73@EXPORT = qw( &perltidy ); 74 75use Cwd; 76use IO::File; 77use File::Basename; 78use File::Copy; 79 80BEGIN { 81 ( $VERSION = q($Id: Tidy.pm,v 1.74 2012/12/07 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker 82} 83 84sub streamhandle { 85 86 # given filename and mode (r or w), create an object which: 87 # has a 'getline' method if mode='r', and 88 # has a 'print' method if mode='w'. 89 # The objects also need a 'close' method. 90 # 91 # How the object is made: 92 # 93 # if $filename is: Make object using: 94 # ---------------- ----------------- 95 # '-' (STDIN if mode = 'r', STDOUT if mode='w') 96 # string IO::File 97 # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray) 98 # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar) 99 # object object 100 # (check for 'print' method for 'w' mode) 101 # (check for 'getline' method for 'r' mode) 102 my $ref = ref( my $filename = shift ); 103 my $mode = shift; 104 my $New; 105 my $fh; 106 107 # handle a reference 108 if ($ref) { 109 if ( $ref eq 'ARRAY' ) { 110 $New = sub { Perl::Tidy::IOScalarArray->new(@_) }; 111 } 112 elsif ( $ref eq 'SCALAR' ) { 113 $New = sub { Perl::Tidy::IOScalar->new(@_) }; 114 } 115 else { 116 117 # Accept an object with a getline method for reading. Note: 118 # IO::File is built-in and does not respond to the defined 119 # operator. If this causes trouble, the check can be 120 # skipped and we can just let it crash if there is no 121 # getline. 122 if ( $mode =~ /[rR]/ ) { 123 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) { 124 $New = sub { $filename }; 125 } 126 else { 127 $New = sub { undef }; 128 confess <<EOM; 129------------------------------------------------------------------------ 130No 'getline' method is defined for object of class $ref 131Please check your call to Perl::Tidy::perltidy. Trace follows. 132------------------------------------------------------------------------ 133EOM 134 } 135 } 136 137 # Accept an object with a print method for writing. 138 # See note above about IO::File 139 if ( $mode =~ /[wW]/ ) { 140 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) { 141 $New = sub { $filename }; 142 } 143 else { 144 $New = sub { undef }; 145 confess <<EOM; 146------------------------------------------------------------------------ 147No 'print' method is defined for object of class $ref 148Please check your call to Perl::Tidy::perltidy. Trace follows. 149------------------------------------------------------------------------ 150EOM 151 } 152 } 153 } 154 } 155 156 # handle a string 157 else { 158 if ( $filename eq '-' ) { 159 $New = sub { $mode eq 'w' ? *STDOUT : *STDIN } 160 } 161 else { 162 $New = sub { IO::File->new(@_) }; 163 } 164 } 165 $fh = $New->( $filename, $mode ) 166 or Warn("Couldn't open file:$filename in mode:$mode : $!\n"); 167 return $fh, ( $ref or $filename ); 168} 169 170sub find_input_line_ending { 171 172 # Peek at a file and return first line ending character. 173 # Quietly return undef in case of any trouble. 174 my ($input_file) = @_; 175 my $ending; 176 177 # silently ignore input from object or stdin 178 if ( ref($input_file) || $input_file eq '-' ) { 179 return $ending; 180 } 181 open( INFILE, $input_file ) || return $ending; 182 183 binmode INFILE; 184 my $buf; 185 read( INFILE, $buf, 1024 ); 186 close INFILE; 187 if ( $buf && $buf =~ /([\012\015]+)/ ) { 188 my $test = $1; 189 190 # dos 191 if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" } 192 193 # mac 194 elsif ( $test =~ /^\015+$/ ) { $ending = "\015" } 195 196 # unix 197 elsif ( $test =~ /^\012+$/ ) { $ending = "\012" } 198 199 # unknown 200 else { } 201 } 202 203 # no ending seen 204 else { } 205 206 return $ending; 207} 208 209sub catfile { 210 211 # concatenate a path and file basename 212 # returns undef in case of error 213 214 BEGIN { eval "require File::Spec"; $missing_file_spec = $@; } 215 216 # use File::Spec if we can 217 unless ($missing_file_spec) { 218 return File::Spec->catfile(@_); 219 } 220 221 # Perl 5.004 systems may not have File::Spec so we'll make 222 # a simple try. We assume File::Basename is available. 223 # return undef if not successful. 224 my $name = pop @_; 225 my $path = join '/', @_; 226 my $test_file = $path . $name; 227 my ( $test_name, $test_path ) = fileparse($test_file); 228 return $test_file if ( $test_name eq $name ); 229 return undef if ( $^O eq 'VMS' ); 230 231 # this should work at least for Windows and Unix: 232 $test_file = $path . '/' . $name; 233 ( $test_name, $test_path ) = fileparse($test_file); 234 return $test_file if ( $test_name eq $name ); 235 return undef; 236} 237 238sub make_temporary_filename { 239 240 # Make a temporary filename. 241 # The POSIX tmpnam() function has been unreliable for non-unix systems 242 # (at least for the win32 systems that I've tested), so use a pre-defined 243 # name for them. A disadvantage of this is that two perltidy 244 # runs in the same working directory may conflict. However, the chance of 245 # that is small and manageable by the user, especially on systems for which 246 # the POSIX tmpnam function doesn't work. 247 my $name = "perltidy.TMP"; 248 if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) { 249 return $name; 250 } 251 eval "use POSIX qw(tmpnam)"; 252 if ($@) { return $name } 253 use IO::File; 254 255 # just make a couple of tries before giving up and using the default 256 for ( 0 .. 3 ) { 257 my $tmpname = tmpnam(); 258 my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL ); 259 if ($fh) { 260 $fh->close(); 261 return ($tmpname); 262 last; 263 } 264 } 265 return ($name); 266} 267 268# Here is a map of the flow of data from the input source to the output 269# line sink: 270# 271# LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter--> 272# input groups output 273# lines tokens lines of lines lines 274# lines 275# 276# The names correspond to the package names responsible for the unit processes. 277# 278# The overall process is controlled by the "main" package. 279# 280# LineSource is the stream of input lines 281# 282# Tokenizer analyzes a line and breaks it into tokens, peeking ahead 283# if necessary. A token is any section of the input line which should be 284# manipulated as a single entity during formatting. For example, a single 285# ',' character is a token, and so is an entire side comment. It handles 286# the complexities of Perl syntax, such as distinguishing between '<<' as 287# a shift operator and as a here-document, or distinguishing between '/' 288# as a divide symbol and as a pattern delimiter. 289# 290# Formatter inserts and deletes whitespace between tokens, and breaks 291# sequences of tokens at appropriate points as output lines. It bases its 292# decisions on the default rules as modified by any command-line options. 293# 294# VerticalAligner collects groups of lines together and tries to line up 295# certain tokens, such as '=>', '#', and '=' by adding whitespace. 296# 297# FileWriter simply writes lines to the output stream. 298# 299# The Logger package, not shown, records significant events and warning 300# messages. It writes a .LOG file, which may be saved with a 301# '-log' or a '-g' flag. 302 303sub perltidy { 304 305 my %defaults = ( 306 argv => undef, 307 destination => undef, 308 formatter => undef, 309 logfile => undef, 310 errorfile => undef, 311 perltidyrc => undef, 312 source => undef, 313 stderr => undef, 314 dump_options => undef, 315 dump_options_type => undef, 316 dump_getopt_flags => undef, 317 dump_options_category => undef, 318 dump_options_range => undef, 319 dump_abbreviations => undef, 320 prefilter => undef, 321 postfilter => undef, 322 ); 323 324 # don't overwrite callers ARGV 325 local @ARGV = @ARGV; 326 local *STDERR = *STDERR; 327 328 my %input_hash = @_; 329 330 if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) { 331 local $" = ')('; 332 my @good_keys = sort keys %defaults; 333 @bad_keys = sort @bad_keys; 334 confess <<EOM; 335------------------------------------------------------------------------ 336Unknown perltidy parameter : (@bad_keys) 337perltidy only understands : (@good_keys) 338------------------------------------------------------------------------ 339 340EOM 341 } 342 343 my $get_hash_ref = sub { 344 my ($key) = @_; 345 my $hash_ref = $input_hash{$key}; 346 if ( defined($hash_ref) ) { 347 unless ( ref($hash_ref) eq 'HASH' ) { 348 my $what = ref($hash_ref); 349 my $but_is = 350 $what ? "but is ref to $what" : "but is not a reference"; 351 croak <<EOM; 352------------------------------------------------------------------------ 353error in call to perltidy: 354-$key must be reference to HASH $but_is 355------------------------------------------------------------------------ 356EOM 357 } 358 } 359 return $hash_ref; 360 }; 361 362 %input_hash = ( %defaults, %input_hash ); 363 my $argv = $input_hash{'argv'}; 364 my $destination_stream = $input_hash{'destination'}; 365 my $errorfile_stream = $input_hash{'errorfile'}; 366 my $logfile_stream = $input_hash{'logfile'}; 367 my $perltidyrc_stream = $input_hash{'perltidyrc'}; 368 my $source_stream = $input_hash{'source'}; 369 my $stderr_stream = $input_hash{'stderr'}; 370 my $user_formatter = $input_hash{'formatter'}; 371 my $prefilter = $input_hash{'prefilter'}; 372 my $postfilter = $input_hash{'postfilter'}; 373 374 if ($stderr_stream) { 375 ( $fh_stderr, my $stderr_file ) = 376 Perl::Tidy::streamhandle( $stderr_stream, 'w' ); 377 if ( !$fh_stderr ) { 378 croak <<EOM; 379------------------------------------------------------------------------ 380Unable to redirect STDERR to $stderr_stream 381Please check value of -stderr in call to perltidy 382------------------------------------------------------------------------ 383EOM 384 } 385 } 386 else { 387 $fh_stderr = *STDERR; 388 } 389 390 sub Warn ($) { $fh_stderr->print( $_[0] ); } 391 392 sub Exit ($) { 393 if ( $_[0] ) { goto ERROR_EXIT } 394 else { goto NORMAL_EXIT } 395 } 396 397 sub Die ($) { Warn $_[0]; Exit(1); } 398 399 # extract various dump parameters 400 my $dump_options_type = $input_hash{'dump_options_type'}; 401 my $dump_options = $get_hash_ref->('dump_options'); 402 my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags'); 403 my $dump_options_category = $get_hash_ref->('dump_options_category'); 404 my $dump_abbreviations = $get_hash_ref->('dump_abbreviations'); 405 my $dump_options_range = $get_hash_ref->('dump_options_range'); 406 407 # validate dump_options_type 408 if ( defined($dump_options) ) { 409 unless ( defined($dump_options_type) ) { 410 $dump_options_type = 'perltidyrc'; 411 } 412 unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) { 413 croak <<EOM; 414------------------------------------------------------------------------ 415Please check value of -dump_options_type in call to perltidy; 416saw: '$dump_options_type' 417expecting: 'perltidyrc' or 'full' 418------------------------------------------------------------------------ 419EOM 420 421 } 422 } 423 else { 424 $dump_options_type = ""; 425 } 426 427 if ($user_formatter) { 428 429 # if the user defines a formatter, there is no output stream, 430 # but we need a null stream to keep coding simple 431 $destination_stream = Perl::Tidy::DevNull->new(); 432 } 433 434 # see if ARGV is overridden 435 if ( defined($argv) ) { 436 437 my $rargv = ref $argv; 438 if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef } 439 440 # ref to ARRAY 441 if ($rargv) { 442 if ( $rargv eq 'ARRAY' ) { 443 @ARGV = @$argv; 444 } 445 else { 446 croak <<EOM; 447------------------------------------------------------------------------ 448Please check value of -argv in call to perltidy; 449it must be a string or ref to ARRAY but is: $rargv 450------------------------------------------------------------------------ 451EOM 452 } 453 } 454 455 # string 456 else { 457 my ( $rargv, $msg ) = parse_args($argv); 458 if ($msg) { 459 Die <<EOM; 460Error parsing this string passed to to perltidy with 'argv': 461$msg 462EOM 463 } 464 @ARGV = @{$rargv}; 465 } 466 } 467 468 my $rpending_complaint; 469 $$rpending_complaint = ""; 470 my $rpending_logfile_message; 471 $$rpending_logfile_message = ""; 472 473 my ( $is_Windows, $Windows_type ) = look_for_Windows($rpending_complaint); 474 475 # VMS file names are restricted to a 40.40 format, so we append _tdy 476 # instead of .tdy, etc. (but see also sub check_vms_filename) 477 my $dot; 478 my $dot_pattern; 479 if ( $^O eq 'VMS' ) { 480 $dot = '_'; 481 $dot_pattern = '_'; 482 } 483 else { 484 $dot = '.'; 485 $dot_pattern = '\.'; # must escape for use in regex 486 } 487 488 #--------------------------------------------------------------- 489 # get command line options 490 #--------------------------------------------------------------- 491 my ( 492 $rOpts, $config_file, $rraw_options, 493 $saw_extrude, $saw_pbp, $roption_string, 494 $rexpansion, $roption_category, $roption_range 495 ) 496 = process_command_line( 497 $perltidyrc_stream, $is_Windows, $Windows_type, 498 $rpending_complaint, $dump_options_type, 499 ); 500 501 #--------------------------------------------------------------- 502 # Handle requests to dump information 503 #--------------------------------------------------------------- 504 505 # return or exit immediately after all dumps 506 my $quit_now = 0; 507 508 # Getopt parameters and their flags 509 if ( defined($dump_getopt_flags) ) { 510 $quit_now = 1; 511 foreach my $op ( @{$roption_string} ) { 512 my $opt = $op; 513 my $flag = ""; 514 515 # Examples: 516 # some-option=s 517 # some-option=i 518 # some-option:i 519 # some-option! 520 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) { 521 $opt = $1; 522 $flag = $2; 523 } 524 $dump_getopt_flags->{$opt} = $flag; 525 } 526 } 527 528 if ( defined($dump_options_category) ) { 529 $quit_now = 1; 530 %{$dump_options_category} = %{$roption_category}; 531 } 532 533 if ( defined($dump_options_range) ) { 534 $quit_now = 1; 535 %{$dump_options_range} = %{$roption_range}; 536 } 537 538 if ( defined($dump_abbreviations) ) { 539 $quit_now = 1; 540 %{$dump_abbreviations} = %{$rexpansion}; 541 } 542 543 if ( defined($dump_options) ) { 544 $quit_now = 1; 545 %{$dump_options} = %{$rOpts}; 546 } 547 548 Exit 0 if ($quit_now); 549 550 # make printable string of options for this run as possible diagnostic 551 my $readable_options = readable_options( $rOpts, $roption_string ); 552 553 # dump from command line 554 if ( $rOpts->{'dump-options'} ) { 555 print STDOUT $readable_options; 556 Exit 0; 557 } 558 559 #--------------------------------------------------------------- 560 # check parameters and their interactions 561 #--------------------------------------------------------------- 562 my $tabsize = 563 check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ); 564 565 if ($user_formatter) { 566 $rOpts->{'format'} = 'user'; 567 } 568 569 # there must be one entry here for every possible format 570 my %default_file_extension = ( 571 tidy => 'tdy', 572 html => 'html', 573 user => '', 574 ); 575 576 # be sure we have a valid output format 577 unless ( exists $default_file_extension{ $rOpts->{'format'} } ) { 578 my $formats = join ' ', 579 sort map { "'" . $_ . "'" } keys %default_file_extension; 580 my $fmt = $rOpts->{'format'}; 581 Die "-format='$fmt' but must be one of: $formats\n"; 582 } 583 584 my $output_extension = make_extension( $rOpts->{'output-file-extension'}, 585 $default_file_extension{ $rOpts->{'format'} }, $dot ); 586 587 # If the backup extension contains a / character then the backup should 588 # be deleted when the -b option is used. On older versions of 589 # perltidy this will generate an error message due to an illegal 590 # file name. 591 # 592 # A backup file will still be generated but will be deleted 593 # at the end. If -bext='/' then this extension will be 594 # the default 'bak'. Otherwise it will be whatever characters 595 # remains after all '/' characters are removed. For example: 596 # -bext extension slashes 597 # '/' bak 1 598 # '/delete' delete 1 599 # 'delete/' delete 1 600 # '/dev/null' devnull 2 (Currently not allowed) 601 my $bext = $rOpts->{'backup-file-extension'}; 602 my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g ); 603 604 # At present only one forward slash is allowed. In the future multiple 605 # slashes may be allowed to allow for other options 606 if ( $delete_backup > 1 ) { 607 Die "-bext=$bext contains more than one '/'\n"; 608 } 609 610 my $backup_extension = 611 make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot ); 612 613 my $html_toc_extension = 614 make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot ); 615 616 my $html_src_extension = 617 make_extension( $rOpts->{'html-src-extension'}, 'src', $dot ); 618 619 # check for -b option; 620 # silently ignore unless beautify mode 621 my $in_place_modify = $rOpts->{'backup-and-modify-in-place'} 622 && $rOpts->{'format'} eq 'tidy'; 623 624 # turn off -b with warnings in case of conflicts with other options 625 if ($in_place_modify) { 626 if ( $rOpts->{'standard-output'} ) { 627 my $msg = "Ignoring -b; you may not use -b and -st together"; 628 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); 629 Warn "$msg\n"; 630 $in_place_modify = 0; 631 } 632 if ($destination_stream) { 633 Warn 634"Ignoring -b; you may not specify a destination stream and -b together\n"; 635 $in_place_modify = 0; 636 } 637 if ( ref($source_stream) ) { 638 Warn 639"Ignoring -b; you may not specify a source array and -b together\n"; 640 $in_place_modify = 0; 641 } 642 if ( $rOpts->{'outfile'} ) { 643 Warn "Ignoring -b; you may not use -b and -o together\n"; 644 $in_place_modify = 0; 645 } 646 if ( defined( $rOpts->{'output-path'} ) ) { 647 Warn "Ignoring -b; you may not use -b and -opath together\n"; 648 $in_place_modify = 0; 649 } 650 } 651 652 Perl::Tidy::Formatter::check_options($rOpts); 653 if ( $rOpts->{'format'} eq 'html' ) { 654 Perl::Tidy::HtmlWriter->check_options($rOpts); 655 } 656 657 # make the pattern of file extensions that we shouldn't touch 658 my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)"; 659 if ($output_extension) { 660 my $ext = quotemeta($output_extension); 661 $forbidden_file_extensions .= "|$ext"; 662 } 663 if ( $in_place_modify && $backup_extension ) { 664 my $ext = quotemeta($backup_extension); 665 $forbidden_file_extensions .= "|$ext"; 666 } 667 $forbidden_file_extensions .= ')$'; 668 669 # Create a diagnostics object if requested; 670 # This is only useful for code development 671 my $diagnostics_object = undef; 672 if ( $rOpts->{'DIAGNOSTICS'} ) { 673 $diagnostics_object = Perl::Tidy::Diagnostics->new(); 674 } 675 676 # no filenames should be given if input is from an array 677 if ($source_stream) { 678 if ( @ARGV > 0 ) { 679 Die 680"You may not specify any filenames when a source array is given\n"; 681 } 682 683 # we'll stuff the source array into ARGV 684 unshift( @ARGV, $source_stream ); 685 686 # No special treatment for source stream which is a filename. 687 # This will enable checks for binary files and other bad stuff. 688 $source_stream = undef unless ref($source_stream); 689 } 690 691 # use stdin by default if no source array and no args 692 else { 693 unshift( @ARGV, '-' ) unless @ARGV; 694 } 695 696 #--------------------------------------------------------------- 697 # Ready to go... 698 # main loop to process all files in argument list 699 #--------------------------------------------------------------- 700 my $number_of_files = @ARGV; 701 my $formatter = undef; 702 my $tokenizer = undef; 703 while ( my $input_file = shift @ARGV ) { 704 my $fileroot; 705 my $input_file_permissions; 706 707 #--------------------------------------------------------------- 708 # prepare this input stream 709 #--------------------------------------------------------------- 710 if ($source_stream) { 711 $fileroot = "perltidy"; 712 } 713 elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN 714 $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc 715 $in_place_modify = 0; 716 } 717 else { 718 $fileroot = $input_file; 719 unless ( -e $input_file ) { 720 721 # file doesn't exist - check for a file glob 722 if ( $input_file =~ /([\?\*\[\{])/ ) { 723 724 # Windows shell may not remove quotes, so do it 725 my $input_file = $input_file; 726 if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } 727 if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } 728 my $pattern = fileglob_to_re($input_file); 729 ##eval "/$pattern/"; 730 if ( !$@ && opendir( DIR, './' ) ) { 731 my @files = 732 grep { /$pattern/ && !-d $_ } readdir(DIR); 733 closedir(DIR); 734 if (@files) { 735 unshift @ARGV, @files; 736 next; 737 } 738 } 739 } 740 Warn "skipping file: '$input_file': no matches found\n"; 741 next; 742 } 743 744 unless ( -f $input_file ) { 745 Warn "skipping file: $input_file: not a regular file\n"; 746 next; 747 } 748 749 # As a safety precaution, skip zero length files. 750 # If for example a source file got clobberred somehow, 751 # the old .tdy or .bak files might still exist so we 752 # shouldn't overwrite them with zero length files. 753 unless ( -s $input_file ) { 754 Warn "skipping file: $input_file: Zero size\n"; 755 next; 756 } 757 758 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { 759 Warn 760 "skipping file: $input_file: Non-text (override with -f)\n"; 761 next; 762 } 763 764 # we should have a valid filename now 765 $fileroot = $input_file; 766 $input_file_permissions = ( stat $input_file )[2] & 07777; 767 768 if ( $^O eq 'VMS' ) { 769 ( $fileroot, $dot ) = check_vms_filename($fileroot); 770 } 771 772 # add option to change path here 773 if ( defined( $rOpts->{'output-path'} ) ) { 774 775 my ( $base, $old_path ) = fileparse($fileroot); 776 my $new_path = $rOpts->{'output-path'}; 777 unless ( -d $new_path ) { 778 unless ( mkdir $new_path, 0777 ) { 779 Die "unable to create directory $new_path: $!\n"; 780 } 781 } 782 my $path = $new_path; 783 $fileroot = catfile( $path, $base ); 784 unless ($fileroot) { 785 Die <<EOM; 786------------------------------------------------------------------------ 787Problem combining $new_path and $base to make a filename; check -opath 788------------------------------------------------------------------------ 789EOM 790 } 791 } 792 } 793 794 # Skip files with same extension as the output files because 795 # this can lead to a messy situation with files like 796 # script.tdy.tdy.tdy ... or worse problems ... when you 797 # rerun perltidy over and over with wildcard input. 798 if ( 799 !$source_stream 800 && ( $input_file =~ /$forbidden_file_extensions/o 801 || $input_file eq 'DIAGNOSTICS' ) 802 ) 803 { 804 Warn "skipping file: $input_file: wrong extension\n"; 805 next; 806 } 807 808 # the 'source_object' supplies a method to read the input file 809 my $source_object = 810 Perl::Tidy::LineSource->new( $input_file, $rOpts, 811 $rpending_logfile_message ); 812 next unless ($source_object); 813 814 # Prefilters and postfilters: The prefilter is a code reference 815 # that will be applied to the source before tidying, and the 816 # postfilter is a code reference to the result before outputting. 817 if ($prefilter) { 818 my $buf = ''; 819 while ( my $line = $source_object->get_line() ) { 820 $buf .= $line; 821 } 822 $buf = $prefilter->($buf); 823 824 $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts, 825 $rpending_logfile_message ); 826 } 827 828 # register this file name with the Diagnostics package 829 $diagnostics_object->set_input_file($input_file) 830 if $diagnostics_object; 831 832 #--------------------------------------------------------------- 833 # prepare the output stream 834 #--------------------------------------------------------------- 835 my $output_file = undef; 836 my $actual_output_extension; 837 838 if ( $rOpts->{'outfile'} ) { 839 840 if ( $number_of_files <= 1 ) { 841 842 if ( $rOpts->{'standard-output'} ) { 843 my $msg = "You may not use -o and -st together"; 844 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); 845 Die "$msg\n"; 846 } 847 elsif ($destination_stream) { 848 Die 849"You may not specify a destination array and -o together\n"; 850 } 851 elsif ( defined( $rOpts->{'output-path'} ) ) { 852 Die "You may not specify -o and -opath together\n"; 853 } 854 elsif ( defined( $rOpts->{'output-file-extension'} ) ) { 855 Die "You may not specify -o and -oext together\n"; 856 } 857 $output_file = $rOpts->{outfile}; 858 859 # make sure user gives a file name after -o 860 if ( $output_file =~ /^-/ ) { 861 Die "You must specify a valid filename after -o\n"; 862 } 863 864 # do not overwrite input file with -o 865 if ( defined($input_file_permissions) 866 && ( $output_file eq $input_file ) ) 867 { 868 Die "Use 'perltidy -b $input_file' to modify in-place\n"; 869 } 870 } 871 else { 872 Die "You may not use -o with more than one input file\n"; 873 } 874 } 875 elsif ( $rOpts->{'standard-output'} ) { 876 if ($destination_stream) { 877 my $msg = 878 "You may not specify a destination array and -st together\n"; 879 $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); 880 Die "$msg\n"; 881 } 882 $output_file = '-'; 883 884 if ( $number_of_files <= 1 ) { 885 } 886 else { 887 Die "You may not use -st with more than one input file\n"; 888 } 889 } 890 elsif ($destination_stream) { 891 $output_file = $destination_stream; 892 } 893 elsif ($source_stream) { # source but no destination goes to stdout 894 $output_file = '-'; 895 } 896 elsif ( $input_file eq '-' ) { 897 $output_file = '-'; 898 } 899 else { 900 if ($in_place_modify) { 901 $output_file = IO::File->new_tmpfile() 902 or Die "cannot open temp file for -b option: $!\n"; 903 } 904 else { 905 $actual_output_extension = $output_extension; 906 $output_file = $fileroot . $output_extension; 907 } 908 } 909 910 # the 'sink_object' knows how to write the output file 911 my $tee_file = $fileroot . $dot . "TEE"; 912 913 my $line_separator = $rOpts->{'output-line-ending'}; 914 if ( $rOpts->{'preserve-line-endings'} ) { 915 $line_separator = find_input_line_ending($input_file); 916 } 917 918 # Eventually all I/O may be done with binmode, but for now it is 919 # only done when a user requests a particular line separator 920 # through the -ple or -ole flags 921 my $binmode = 0; 922 if ( defined($line_separator) ) { $binmode = 1 } 923 else { $line_separator = "\n" } 924 925 my ( $sink_object, $postfilter_buffer ); 926 if ($postfilter) { 927 $sink_object = 928 Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file, 929 $line_separator, $rOpts, $rpending_logfile_message, $binmode ); 930 } 931 else { 932 $sink_object = 933 Perl::Tidy::LineSink->new( $output_file, $tee_file, 934 $line_separator, $rOpts, $rpending_logfile_message, $binmode ); 935 } 936 937 #--------------------------------------------------------------- 938 # initialize the error logger for this file 939 #--------------------------------------------------------------- 940 my $warning_file = $fileroot . $dot . "ERR"; 941 if ($errorfile_stream) { $warning_file = $errorfile_stream } 942 my $log_file = $fileroot . $dot . "LOG"; 943 if ($logfile_stream) { $log_file = $logfile_stream } 944 945 my $logger_object = 946 Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file, 947 $fh_stderr, $saw_extrude ); 948 write_logfile_header( 949 $rOpts, $logger_object, $config_file, 950 $rraw_options, $Windows_type, $readable_options, 951 ); 952 if ($$rpending_logfile_message) { 953 $logger_object->write_logfile_entry($$rpending_logfile_message); 954 } 955 if ($$rpending_complaint) { 956 $logger_object->complain($$rpending_complaint); 957 } 958 959 #--------------------------------------------------------------- 960 # initialize the debug object, if any 961 #--------------------------------------------------------------- 962 my $debugger_object = undef; 963 if ( $rOpts->{DEBUG} ) { 964 $debugger_object = 965 Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" ); 966 } 967 968 #--------------------------------------------------------------- 969 # loop over iterations for one source stream 970 #--------------------------------------------------------------- 971 972 # We will do a convergence test if 3 or more iterations are allowed. 973 # It would be pointless for fewer because we have to make at least 974 # two passes before we can see if we are converged, and the test 975 # would just slow things down. 976 my $max_iterations = $rOpts->{'iterations'}; 977 my $convergence_log_message; 978 my %saw_md5; 979 my $do_convergence_test = $max_iterations > 2; 980 if ($do_convergence_test) { 981 eval "use Digest::MD5 qw(md5_hex)"; 982 $do_convergence_test = !$@; 983 } 984 985 # save objects to allow redirecting output during iterations 986 my $sink_object_final = $sink_object; 987 my $debugger_object_final = $debugger_object; 988 my $logger_object_final = $logger_object; 989 990 for ( my $iter = 1 ; $iter <= $max_iterations ; $iter++ ) { 991 992 # send output stream to temp buffers until last iteration 993 my $sink_buffer; 994 if ( $iter < $max_iterations ) { 995 $sink_object = 996 Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file, 997 $line_separator, $rOpts, $rpending_logfile_message, 998 $binmode ); 999 } 1000 else { 1001 $sink_object = $sink_object_final; 1002 } 1003 1004 # Save logger, debugger output only on pass 1 because: 1005 # (1) line number references must be to the starting 1006 # source, not an intermediate result, and 1007 # (2) we need to know if there are errors so we can stop the 1008 # iterations early if necessary. 1009 if ( $iter > 1 ) { 1010 $debugger_object = undef; 1011 $logger_object = undef; 1012 } 1013 1014 #------------------------------------------------------------ 1015 # create a formatter for this file : html writer or 1016 # pretty printer 1017 #------------------------------------------------------------ 1018 1019 # we have to delete any old formatter because, for safety, 1020 # the formatter will check to see that there is only one. 1021 $formatter = undef; 1022 1023 if ($user_formatter) { 1024 $formatter = $user_formatter; 1025 } 1026 elsif ( $rOpts->{'format'} eq 'html' ) { 1027 $formatter = 1028 Perl::Tidy::HtmlWriter->new( $fileroot, $output_file, 1029 $actual_output_extension, $html_toc_extension, 1030 $html_src_extension ); 1031 } 1032 elsif ( $rOpts->{'format'} eq 'tidy' ) { 1033 $formatter = Perl::Tidy::Formatter->new( 1034 logger_object => $logger_object, 1035 diagnostics_object => $diagnostics_object, 1036 sink_object => $sink_object, 1037 ); 1038 } 1039 else { 1040 Die "I don't know how to do -format=$rOpts->{'format'}\n"; 1041 } 1042 1043 unless ($formatter) { 1044 Die "Unable to continue with $rOpts->{'format'} formatting\n"; 1045 } 1046 1047 #--------------------------------------------------------------- 1048 # create the tokenizer for this file 1049 #--------------------------------------------------------------- 1050 $tokenizer = undef; # must destroy old tokenizer 1051 $tokenizer = Perl::Tidy::Tokenizer->new( 1052 source_object => $source_object, 1053 logger_object => $logger_object, 1054 debugger_object => $debugger_object, 1055 diagnostics_object => $diagnostics_object, 1056 tabsize => $tabsize, 1057 1058 starting_level => $rOpts->{'starting-indentation-level'}, 1059 indent_columns => $rOpts->{'indent-columns'}, 1060 look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, 1061 look_for_autoloader => $rOpts->{'look-for-autoloader'}, 1062 look_for_selfloader => $rOpts->{'look-for-selfloader'}, 1063 trim_qw => $rOpts->{'trim-qw'}, 1064 1065 continuation_indentation => 1066 $rOpts->{'continuation-indentation'}, 1067 outdent_labels => $rOpts->{'outdent-labels'}, 1068 ); 1069 1070 #--------------------------------------------------------------- 1071 # now we can do it 1072 #--------------------------------------------------------------- 1073 process_this_file( $tokenizer, $formatter ); 1074 1075 #--------------------------------------------------------------- 1076 # close the input source and report errors 1077 #--------------------------------------------------------------- 1078 $source_object->close_input_file(); 1079 1080 # line source for next iteration (if any) comes from the current 1081 # temporary output buffer 1082 if ( $iter < $max_iterations ) { 1083 1084 $sink_object->close_output_file(); 1085 $source_object = 1086 Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts, 1087 $rpending_logfile_message ); 1088 1089 # stop iterations if errors or converged 1090 my $stop_now = $logger_object->{_warning_count}; 1091 if ($stop_now) { 1092 $convergence_log_message = <<EOM; 1093Stopping iterations because of errors. 1094EOM 1095 } 1096 elsif ($do_convergence_test) { 1097 my $digest = md5_hex($sink_buffer); 1098 if ( !$saw_md5{$digest} ) { 1099 $saw_md5{$digest} = $iter; 1100 } 1101 else { 1102 1103 # Deja vu, stop iterating 1104 $stop_now = 1; 1105 my $iterm = $iter - 1; 1106 if ( $saw_md5{$digest} != $iterm ) { 1107 1108 # Blinking (oscillating) between two stable 1109 # end states. This has happened in the past 1110 # but at present there are no known instances. 1111 $convergence_log_message = <<EOM; 1112Blinking. Output for iteration $iter same as for $saw_md5{$digest}. 1113EOM 1114 $diagnostics_object->write_diagnostics( 1115 $convergence_log_message) 1116 if $diagnostics_object; 1117 } 1118 else { 1119 $convergence_log_message = <<EOM; 1120Converged. Output for iteration $iter same as for iter $iterm. 1121EOM 1122 $diagnostics_object->write_diagnostics( 1123 $convergence_log_message) 1124 if $diagnostics_object && $iterm > 2; 1125 } 1126 } 1127 } ## end if ($do_convergence_test) 1128 1129 if ($stop_now) { 1130 1131 # we are stopping the iterations early; 1132 # copy the output stream to its final destination 1133 $sink_object = $sink_object_final; 1134 while ( my $line = $source_object->get_line() ) { 1135 $sink_object->write_line($line); 1136 } 1137 $source_object->close_input_file(); 1138 last; 1139 } 1140 } ## end if ( $iter < $max_iterations) 1141 } # end loop over iterations for one source file 1142 1143 # restore objects which have been temporarily undefined 1144 # for second and higher iterations 1145 $debugger_object = $debugger_object_final; 1146 $logger_object = $logger_object_final; 1147 1148 $logger_object->write_logfile_entry($convergence_log_message) 1149 if $convergence_log_message; 1150 1151 #--------------------------------------------------------------- 1152 # Perform any postfilter operation 1153 #--------------------------------------------------------------- 1154 if ($postfilter) { 1155 $sink_object->close_output_file(); 1156 $sink_object = 1157 Perl::Tidy::LineSink->new( $output_file, $tee_file, 1158 $line_separator, $rOpts, $rpending_logfile_message, $binmode ); 1159 my $buf = $postfilter->($postfilter_buffer); 1160 $source_object = 1161 Perl::Tidy::LineSource->new( \$buf, $rOpts, 1162 $rpending_logfile_message ); 1163 while ( my $line = $source_object->get_line() ) { 1164 $sink_object->write_line($line); 1165 } 1166 $source_object->close_input_file(); 1167 } 1168 1169 # Save names of the input and output files for syntax check 1170 my $ifname = $input_file; 1171 my $ofname = $output_file; 1172 1173 #--------------------------------------------------------------- 1174 # handle the -b option (backup and modify in-place) 1175 #--------------------------------------------------------------- 1176 if ($in_place_modify) { 1177 unless ( -f $input_file ) { 1178 1179 # oh, oh, no real file to backup .. 1180 # shouldn't happen because of numerous preliminary checks 1181 Die 1182"problem with -b backing up input file '$input_file': not a file\n"; 1183 } 1184 my $backup_name = $input_file . $backup_extension; 1185 if ( -f $backup_name ) { 1186 unlink($backup_name) 1187 or Die 1188"unable to remove previous '$backup_name' for -b option; check permissions: $!\n"; 1189 } 1190 1191 # backup the input file 1192 # we use copy for symlinks, move for regular files 1193 if ( -l $input_file ) { 1194 File::Copy::copy( $input_file, $backup_name ) 1195 or Die "File::Copy failed trying to backup source: $!"; 1196 } 1197 else { 1198 rename( $input_file, $backup_name ) 1199 or Die 1200"problem renaming $input_file to $backup_name for -b option: $!\n"; 1201 } 1202 $ifname = $backup_name; 1203 1204 # copy the output to the original input file 1205 # NOTE: it would be nice to just close $output_file and use 1206 # File::Copy::copy here, but in this case $output_file is the 1207 # handle of an open nameless temporary file so we would lose 1208 # everything if we closed it. 1209 seek( $output_file, 0, 0 ) 1210 or Die "unable to rewind a temporary file for -b option: $!\n"; 1211 my $fout = IO::File->new("> $input_file") 1212 or Die 1213"problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n"; 1214 binmode $fout; 1215 my $line; 1216 while ( $line = $output_file->getline() ) { 1217 $fout->print($line); 1218 } 1219 $fout->close(); 1220 $output_file = $input_file; 1221 $ofname = $input_file; 1222 } 1223 1224 #--------------------------------------------------------------- 1225 # clean up and report errors 1226 #--------------------------------------------------------------- 1227 $sink_object->close_output_file() if $sink_object; 1228 $debugger_object->close_debug_file() if $debugger_object; 1229 1230 # set output file permissions 1231 if ( $output_file && -f $output_file && !-l $output_file ) { 1232 if ($input_file_permissions) { 1233 1234 # give output script same permissions as input script, but 1235 # make it user-writable or else we can't run perltidy again. 1236 # Thus we retain whatever executable flags were set. 1237 if ( $rOpts->{'format'} eq 'tidy' ) { 1238 chmod( $input_file_permissions | 0600, $output_file ); 1239 } 1240 1241 # else use default permissions for html and any other format 1242 } 1243 } 1244 1245 #--------------------------------------------------------------- 1246 # Do syntax check if requested and possible 1247 #--------------------------------------------------------------- 1248 my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes 1249 if ( $logger_object 1250 && $rOpts->{'check-syntax'} 1251 && $ifname 1252 && $ofname ) 1253 { 1254 $infile_syntax_ok = 1255 check_syntax( $ifname, $ofname, $logger_object, $rOpts ); 1256 } 1257 1258 #--------------------------------------------------------------- 1259 # remove the original file for in-place modify as follows: 1260 # $delete_backup=0 never 1261 # $delete_backup=1 only if no errors 1262 # $delete_backup>1 always : NOT ALLOWED, too risky, see above 1263 #--------------------------------------------------------------- 1264 if ( $in_place_modify 1265 && $delete_backup 1266 && -f $ifname 1267 && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) ) 1268 { 1269 1270 # As an added safety precaution, do not delete the source file 1271 # if its size has dropped from positive to zero, since this 1272 # could indicate a disaster of some kind, including a hardware 1273 # failure. Actually, this could happen if you had a file of 1274 # all comments (or pod) and deleted everything with -dac (-dap) 1275 # for some reason. 1276 if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) { 1277 Warn( 1278"output file '$output_file' missing or zero length; original '$ifname' not deleted\n" 1279 ); 1280 } 1281 else { 1282 unlink($ifname) 1283 or Die 1284"unable to remove previous '$ifname' for -b option; check permissions: $!\n"; 1285 } 1286 } 1287 1288 $logger_object->finish( $infile_syntax_ok, $formatter ) 1289 if $logger_object; 1290 } # end of main loop to process all files 1291 1292 NORMAL_EXIT: 1293 return 0; 1294 1295 ERROR_EXIT: 1296 return 1; 1297} # end of main program perltidy 1298 1299sub get_stream_as_named_file { 1300 1301 # Return the name of a file containing a stream of data, creating 1302 # a temporary file if necessary. 1303 # Given: 1304 # $stream - the name of a file or stream 1305 # Returns: 1306 # $fname = name of file if possible, or undef 1307 # $if_tmpfile = true if temp file, undef if not temp file 1308 # 1309 # This routine is needed for passing actual files to Perl for 1310 # a syntax check. 1311 my ($stream) = @_; 1312 my $is_tmpfile; 1313 my $fname; 1314 if ($stream) { 1315 if ( ref($stream) ) { 1316 my ( $fh_stream, $fh_name ) = 1317 Perl::Tidy::streamhandle( $stream, 'r' ); 1318 if ($fh_stream) { 1319 my ( $fout, $tmpnam ); 1320 1321 # TODO: fix the tmpnam routine to return an open filehandle 1322 $tmpnam = Perl::Tidy::make_temporary_filename(); 1323 $fout = IO::File->new( $tmpnam, 'w' ); 1324 1325 if ($fout) { 1326 $fname = $tmpnam; 1327 $is_tmpfile = 1; 1328 binmode $fout; 1329 while ( my $line = $fh_stream->getline() ) { 1330 $fout->print($line); 1331 } 1332 $fout->close(); 1333 } 1334 $fh_stream->close(); 1335 } 1336 } 1337 elsif ( $stream ne '-' && -f $stream ) { 1338 $fname = $stream; 1339 } 1340 } 1341 return ( $fname, $is_tmpfile ); 1342} 1343 1344sub fileglob_to_re { 1345 1346 # modified (corrected) from version in find2perl 1347 my $x = shift; 1348 $x =~ s#([./^\$()])#\\$1#g; # escape special characters 1349 $x =~ s#\*#.*#g; # '*' -> '.*' 1350 $x =~ s#\?#.#g; # '?' -> '.' 1351 "^$x\\z"; # match whole word 1352} 1353 1354sub make_extension { 1355 1356 # Make a file extension, including any leading '.' if necessary 1357 # The '.' may actually be an '_' under VMS 1358 my ( $extension, $default, $dot ) = @_; 1359 1360 # Use the default if none specified 1361 $extension = $default unless ($extension); 1362 1363 # Only extensions with these leading characters get a '.' 1364 # This rule gives the user some freedom 1365 if ( $extension =~ /^[a-zA-Z0-9]/ ) { 1366 $extension = $dot . $extension; 1367 } 1368 return $extension; 1369} 1370 1371sub write_logfile_header { 1372 my ( 1373 $rOpts, $logger_object, $config_file, 1374 $rraw_options, $Windows_type, $readable_options 1375 ) = @_; 1376 $logger_object->write_logfile_entry( 1377"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n" 1378 ); 1379 if ($Windows_type) { 1380 $logger_object->write_logfile_entry("Windows type is $Windows_type\n"); 1381 } 1382 my $options_string = join( ' ', @$rraw_options ); 1383 1384 if ($config_file) { 1385 $logger_object->write_logfile_entry( 1386 "Found Configuration File >>> $config_file \n"); 1387 } 1388 $logger_object->write_logfile_entry( 1389 "Configuration and command line parameters for this run:\n"); 1390 $logger_object->write_logfile_entry("$options_string\n"); 1391 1392 if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) { 1393 $rOpts->{'logfile'} = 1; # force logfile to be saved 1394 $logger_object->write_logfile_entry( 1395 "Final parameter set for this run\n"); 1396 $logger_object->write_logfile_entry( 1397 "------------------------------------\n"); 1398 1399 $logger_object->write_logfile_entry($readable_options); 1400 1401 $logger_object->write_logfile_entry( 1402 "------------------------------------\n"); 1403 } 1404 $logger_object->write_logfile_entry( 1405 "To find error messages search for 'WARNING' with your editor\n"); 1406} 1407 1408sub generate_options { 1409 1410 ###################################################################### 1411 # Generate and return references to: 1412 # @option_string - the list of options to be passed to Getopt::Long 1413 # @defaults - the list of default options 1414 # %expansion - a hash showing how all abbreviations are expanded 1415 # %category - a hash giving the general category of each option 1416 # %option_range - a hash giving the valid ranges of certain options 1417 1418 # Note: a few options are not documented in the man page and usage 1419 # message. This is because these are experimental or debug options and 1420 # may or may not be retained in future versions. 1421 # 1422 # Here are the undocumented flags as far as I know. Any of them 1423 # may disappear at any time. They are mainly for fine-tuning 1424 # and debugging. 1425 # 1426 # fll --> fuzzy-line-length # a trivial parameter which gets 1427 # turned off for the extrude option 1428 # which is mainly for debugging 1429 # scl --> short-concatenation-item-length # helps break at '.' 1430 # recombine # for debugging line breaks 1431 # valign # for debugging vertical alignment 1432 # I --> DIAGNOSTICS # for debugging 1433 ###################################################################### 1434 1435 # here is a summary of the Getopt codes: 1436 # <none> does not take an argument 1437 # =s takes a mandatory string 1438 # :s takes an optional string (DO NOT USE - filenames will get eaten up) 1439 # =i takes a mandatory integer 1440 # :i takes an optional integer (NOT RECOMMENDED - can cause trouble) 1441 # ! does not take an argument and may be negated 1442 # i.e., -foo and -nofoo are allowed 1443 # a double dash signals the end of the options list 1444 # 1445 #--------------------------------------------------------------- 1446 # Define the option string passed to GetOptions. 1447 #--------------------------------------------------------------- 1448 1449 my @option_string = (); 1450 my %expansion = (); 1451 my %option_category = (); 1452 my %option_range = (); 1453 my $rexpansion = \%expansion; 1454 1455 # names of categories in manual 1456 # leading integers will allow sorting 1457 my @category_name = ( 1458 '0. I/O control', 1459 '1. Basic formatting options', 1460 '2. Code indentation control', 1461 '3. Whitespace control', 1462 '4. Comment controls', 1463 '5. Linebreak controls', 1464 '6. Controlling list formatting', 1465 '7. Retaining or ignoring existing line breaks', 1466 '8. Blank line control', 1467 '9. Other controls', 1468 '10. HTML options', 1469 '11. pod2html options', 1470 '12. Controlling HTML properties', 1471 '13. Debugging', 1472 ); 1473 1474 # These options are parsed directly by perltidy: 1475 # help h 1476 # version v 1477 # However, they are included in the option set so that they will 1478 # be seen in the options dump. 1479 1480 # These long option names have no abbreviations or are treated specially 1481 @option_string = qw( 1482 html! 1483 noprofile 1484 no-profile 1485 npro 1486 recombine! 1487 valign! 1488 notidy 1489 ); 1490 1491 my $category = 13; # Debugging 1492 foreach (@option_string) { 1493 my $opt = $_; # must avoid changing the actual flag 1494 $opt =~ s/!$//; 1495 $option_category{$opt} = $category_name[$category]; 1496 } 1497 1498 $category = 11; # HTML 1499 $option_category{html} = $category_name[$category]; 1500 1501 # routine to install and check options 1502 my $add_option = sub { 1503 my ( $long_name, $short_name, $flag ) = @_; 1504 push @option_string, $long_name . $flag; 1505 $option_category{$long_name} = $category_name[$category]; 1506 if ($short_name) { 1507 if ( $expansion{$short_name} ) { 1508 my $existing_name = $expansion{$short_name}[0]; 1509 Die 1510"redefining abbreviation $short_name for $long_name; already used for $existing_name\n"; 1511 } 1512 $expansion{$short_name} = [$long_name]; 1513 if ( $flag eq '!' ) { 1514 my $nshort_name = 'n' . $short_name; 1515 my $nolong_name = 'no' . $long_name; 1516 if ( $expansion{$nshort_name} ) { 1517 my $existing_name = $expansion{$nshort_name}[0]; 1518 Die 1519"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n"; 1520 } 1521 $expansion{$nshort_name} = [$nolong_name]; 1522 } 1523 } 1524 }; 1525 1526 # Install long option names which have a simple abbreviation. 1527 # Options with code '!' get standard negation ('no' for long names, 1528 # 'n' for abbreviations). Categories follow the manual. 1529 1530 ########################### 1531 $category = 0; # I/O_Control 1532 ########################### 1533 $add_option->( 'backup-and-modify-in-place', 'b', '!' ); 1534 $add_option->( 'backup-file-extension', 'bext', '=s' ); 1535 $add_option->( 'force-read-binary', 'f', '!' ); 1536 $add_option->( 'format', 'fmt', '=s' ); 1537 $add_option->( 'iterations', 'it', '=i' ); 1538 $add_option->( 'logfile', 'log', '!' ); 1539 $add_option->( 'logfile-gap', 'g', ':i' ); 1540 $add_option->( 'outfile', 'o', '=s' ); 1541 $add_option->( 'output-file-extension', 'oext', '=s' ); 1542 $add_option->( 'output-path', 'opath', '=s' ); 1543 $add_option->( 'profile', 'pro', '=s' ); 1544 $add_option->( 'quiet', 'q', '!' ); 1545 $add_option->( 'standard-error-output', 'se', '!' ); 1546 $add_option->( 'standard-output', 'st', '!' ); 1547 $add_option->( 'warning-output', 'w', '!' ); 1548 1549 # options which are both toggle switches and values moved here 1550 # to hide from tidyview (which does not show category 0 flags): 1551 # -ole moved here from category 1 1552 # -sil moved here from category 2 1553 $add_option->( 'output-line-ending', 'ole', '=s' ); 1554 $add_option->( 'starting-indentation-level', 'sil', '=i' ); 1555 1556 ######################################## 1557 $category = 1; # Basic formatting options 1558 ######################################## 1559 $add_option->( 'check-syntax', 'syn', '!' ); 1560 $add_option->( 'entab-leading-whitespace', 'et', '=i' ); 1561 $add_option->( 'indent-columns', 'i', '=i' ); 1562 $add_option->( 'maximum-line-length', 'l', '=i' ); 1563 $add_option->( 'variable-maximum-line-length', 'vmll', '!' ); 1564 $add_option->( 'whitespace-cycle', 'wc', '=i' ); 1565 $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' ); 1566 $add_option->( 'preserve-line-endings', 'ple', '!' ); 1567 $add_option->( 'tabs', 't', '!' ); 1568 $add_option->( 'default-tabsize', 'dt', '=i' ); 1569 1570 ######################################## 1571 $category = 2; # Code indentation control 1572 ######################################## 1573 $add_option->( 'continuation-indentation', 'ci', '=i' ); 1574 $add_option->( 'line-up-parentheses', 'lp', '!' ); 1575 $add_option->( 'outdent-keyword-list', 'okwl', '=s' ); 1576 $add_option->( 'outdent-keywords', 'okw', '!' ); 1577 $add_option->( 'outdent-labels', 'ola', '!' ); 1578 $add_option->( 'outdent-long-quotes', 'olq', '!' ); 1579 $add_option->( 'indent-closing-brace', 'icb', '!' ); 1580 $add_option->( 'closing-token-indentation', 'cti', '=i' ); 1581 $add_option->( 'closing-paren-indentation', 'cpi', '=i' ); 1582 $add_option->( 'closing-brace-indentation', 'cbi', '=i' ); 1583 $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' ); 1584 $add_option->( 'brace-left-and-indent', 'bli', '!' ); 1585 $add_option->( 'brace-left-and-indent-list', 'blil', '=s' ); 1586 1587 ######################################## 1588 $category = 3; # Whitespace control 1589 ######################################## 1590 $add_option->( 'add-semicolons', 'asc', '!' ); 1591 $add_option->( 'add-whitespace', 'aws', '!' ); 1592 $add_option->( 'block-brace-tightness', 'bbt', '=i' ); 1593 $add_option->( 'brace-tightness', 'bt', '=i' ); 1594 $add_option->( 'delete-old-whitespace', 'dws', '!' ); 1595 $add_option->( 'delete-semicolons', 'dsm', '!' ); 1596 $add_option->( 'nospace-after-keyword', 'nsak', '=s' ); 1597 $add_option->( 'nowant-left-space', 'nwls', '=s' ); 1598 $add_option->( 'nowant-right-space', 'nwrs', '=s' ); 1599 $add_option->( 'paren-tightness', 'pt', '=i' ); 1600 $add_option->( 'space-after-keyword', 'sak', '=s' ); 1601 $add_option->( 'space-for-semicolon', 'sfs', '!' ); 1602 $add_option->( 'space-function-paren', 'sfp', '!' ); 1603 $add_option->( 'space-keyword-paren', 'skp', '!' ); 1604 $add_option->( 'space-terminal-semicolon', 'sts', '!' ); 1605 $add_option->( 'square-bracket-tightness', 'sbt', '=i' ); 1606 $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' ); 1607 $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' ); 1608 $add_option->( 'tight-secret-operators', 'tso', '!' ); 1609 $add_option->( 'trim-qw', 'tqw', '!' ); 1610 $add_option->( 'want-left-space', 'wls', '=s' ); 1611 $add_option->( 'want-right-space', 'wrs', '=s' ); 1612 1613 ######################################## 1614 $category = 4; # Comment controls 1615 ######################################## 1616 $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' ); 1617 $add_option->( 'closing-side-comment-interval', 'csci', '=i' ); 1618 $add_option->( 'closing-side-comment-list', 'cscl', '=s' ); 1619 $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' ); 1620 $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' ); 1621 $add_option->( 'closing-side-comment-warnings', 'cscw', '!' ); 1622 $add_option->( 'closing-side-comments', 'csc', '!' ); 1623 $add_option->( 'closing-side-comments-balanced', 'cscb', '!' ); 1624 $add_option->( 'format-skipping', 'fs', '!' ); 1625 $add_option->( 'format-skipping-begin', 'fsb', '=s' ); 1626 $add_option->( 'format-skipping-end', 'fse', '=s' ); 1627 $add_option->( 'hanging-side-comments', 'hsc', '!' ); 1628 $add_option->( 'indent-block-comments', 'ibc', '!' ); 1629 $add_option->( 'indent-spaced-block-comments', 'isbc', '!' ); 1630 $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' ); 1631 $add_option->( 'minimum-space-to-comment', 'msc', '=i' ); 1632 $add_option->( 'outdent-long-comments', 'olc', '!' ); 1633 $add_option->( 'outdent-static-block-comments', 'osbc', '!' ); 1634 $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' ); 1635 $add_option->( 'static-block-comments', 'sbc', '!' ); 1636 $add_option->( 'static-side-comment-prefix', 'sscp', '=s' ); 1637 $add_option->( 'static-side-comments', 'ssc', '!' ); 1638 $add_option->( 'ignore-side-comment-lengths', 'iscl', '!' ); 1639 1640 ######################################## 1641 $category = 5; # Linebreak controls 1642 ######################################## 1643 $add_option->( 'add-newlines', 'anl', '!' ); 1644 $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' ); 1645 $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' ); 1646 $add_option->( 'brace-vertical-tightness', 'bvt', '=i' ); 1647 $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' ); 1648 $add_option->( 'cuddled-else', 'ce', '!' ); 1649 $add_option->( 'delete-old-newlines', 'dnl', '!' ); 1650 $add_option->( 'opening-brace-always-on-right', 'bar', '!' ); 1651 $add_option->( 'opening-brace-on-new-line', 'bl', '!' ); 1652 $add_option->( 'opening-hash-brace-right', 'ohbr', '!' ); 1653 $add_option->( 'opening-paren-right', 'opr', '!' ); 1654 $add_option->( 'opening-square-bracket-right', 'osbr', '!' ); 1655 $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' ); 1656 $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' ); 1657 $add_option->( 'paren-vertical-tightness', 'pvt', '=i' ); 1658 $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' ); 1659 $add_option->( 'stack-closing-block-brace', 'scbb', '!' ); 1660 $add_option->( 'stack-closing-hash-brace', 'schb', '!' ); 1661 $add_option->( 'stack-closing-paren', 'scp', '!' ); 1662 $add_option->( 'stack-closing-square-bracket', 'scsb', '!' ); 1663 $add_option->( 'stack-opening-block-brace', 'sobb', '!' ); 1664 $add_option->( 'stack-opening-hash-brace', 'sohb', '!' ); 1665 $add_option->( 'stack-opening-paren', 'sop', '!' ); 1666 $add_option->( 'stack-opening-square-bracket', 'sosb', '!' ); 1667 $add_option->( 'vertical-tightness', 'vt', '=i' ); 1668 $add_option->( 'vertical-tightness-closing', 'vtc', '=i' ); 1669 $add_option->( 'want-break-after', 'wba', '=s' ); 1670 $add_option->( 'want-break-before', 'wbb', '=s' ); 1671 $add_option->( 'break-after-all-operators', 'baao', '!' ); 1672 $add_option->( 'break-before-all-operators', 'bbao', '!' ); 1673 $add_option->( 'keep-interior-semicolons', 'kis', '!' ); 1674 1675 ######################################## 1676 $category = 6; # Controlling list formatting 1677 ######################################## 1678 $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' ); 1679 $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' ); 1680 $add_option->( 'maximum-fields-per-table', 'mft', '=i' ); 1681 1682 ######################################## 1683 $category = 7; # Retaining or ignoring existing line breaks 1684 ######################################## 1685 $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' ); 1686 $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' ); 1687 $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' ); 1688 $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' ); 1689 $add_option->( 'ignore-old-breakpoints', 'iob', '!' ); 1690 1691 ######################################## 1692 $category = 8; # Blank line control 1693 ######################################## 1694 $add_option->( 'blanks-before-blocks', 'bbb', '!' ); 1695 $add_option->( 'blanks-before-comments', 'bbc', '!' ); 1696 $add_option->( 'blank-lines-before-subs', 'blbs', '=i' ); 1697 $add_option->( 'blank-lines-before-packages', 'blbp', '=i' ); 1698 $add_option->( 'long-block-line-count', 'lbl', '=i' ); 1699 $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); 1700 $add_option->( 'keep-old-blank-lines', 'kbl', '=i' ); 1701 1702 ######################################## 1703 $category = 9; # Other controls 1704 ######################################## 1705 $add_option->( 'delete-block-comments', 'dbc', '!' ); 1706 $add_option->( 'delete-closing-side-comments', 'dcsc', '!' ); 1707 $add_option->( 'delete-pod', 'dp', '!' ); 1708 $add_option->( 'delete-side-comments', 'dsc', '!' ); 1709 $add_option->( 'tee-block-comments', 'tbc', '!' ); 1710 $add_option->( 'tee-pod', 'tp', '!' ); 1711 $add_option->( 'tee-side-comments', 'tsc', '!' ); 1712 $add_option->( 'look-for-autoloader', 'lal', '!' ); 1713 $add_option->( 'look-for-hash-bang', 'x', '!' ); 1714 $add_option->( 'look-for-selfloader', 'lsl', '!' ); 1715 $add_option->( 'pass-version-line', 'pvl', '!' ); 1716 1717 ######################################## 1718 $category = 13; # Debugging 1719 ######################################## 1720 $add_option->( 'DEBUG', 'D', '!' ); 1721 $add_option->( 'DIAGNOSTICS', 'I', '!' ); 1722 $add_option->( 'dump-defaults', 'ddf', '!' ); 1723 $add_option->( 'dump-long-names', 'dln', '!' ); 1724 $add_option->( 'dump-options', 'dop', '!' ); 1725 $add_option->( 'dump-profile', 'dpro', '!' ); 1726 $add_option->( 'dump-short-names', 'dsn', '!' ); 1727 $add_option->( 'dump-token-types', 'dtt', '!' ); 1728 $add_option->( 'dump-want-left-space', 'dwls', '!' ); 1729 $add_option->( 'dump-want-right-space', 'dwrs', '!' ); 1730 $add_option->( 'fuzzy-line-length', 'fll', '!' ); 1731 $add_option->( 'help', 'h', '' ); 1732 $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); 1733 $add_option->( 'show-options', 'opt', '!' ); 1734 $add_option->( 'version', 'v', '' ); 1735 $add_option->( 'memoize', 'mem', '!' ); 1736 1737 #--------------------------------------------------------------------- 1738 1739 # The Perl::Tidy::HtmlWriter will add its own options to the string 1740 Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string ); 1741 1742 ######################################## 1743 # Set categories 10, 11, 12 1744 ######################################## 1745 # Based on their known order 1746 $category = 12; # HTML properties 1747 foreach my $opt (@option_string) { 1748 my $long_name = $opt; 1749 $long_name =~ s/(!|=.*|:.*)$//; 1750 unless ( defined( $option_category{$long_name} ) ) { 1751 if ( $long_name =~ /^html-linked/ ) { 1752 $category = 10; # HTML options 1753 } 1754 elsif ( $long_name =~ /^pod2html/ ) { 1755 $category = 11; # Pod2html 1756 } 1757 $option_category{$long_name} = $category_name[$category]; 1758 } 1759 } 1760 1761 #--------------------------------------------------------------- 1762 # Assign valid ranges to certain options 1763 #--------------------------------------------------------------- 1764 # In the future, these may be used to make preliminary checks 1765 # hash keys are long names 1766 # If key or value is undefined: 1767 # strings may have any value 1768 # integer ranges are >=0 1769 # If value is defined: 1770 # value is [qw(any valid words)] for strings 1771 # value is [min, max] for integers 1772 # if min is undefined, there is no lower limit 1773 # if max is undefined, there is no upper limit 1774 # Parameters not listed here have defaults 1775 %option_range = ( 1776 'format' => [ 'tidy', 'html', 'user' ], 1777 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ], 1778 1779 'block-brace-tightness' => [ 0, 2 ], 1780 'brace-tightness' => [ 0, 2 ], 1781 'paren-tightness' => [ 0, 2 ], 1782 'square-bracket-tightness' => [ 0, 2 ], 1783 1784 'block-brace-vertical-tightness' => [ 0, 2 ], 1785 'brace-vertical-tightness' => [ 0, 2 ], 1786 'brace-vertical-tightness-closing' => [ 0, 2 ], 1787 'paren-vertical-tightness' => [ 0, 2 ], 1788 'paren-vertical-tightness-closing' => [ 0, 2 ], 1789 'square-bracket-vertical-tightness' => [ 0, 2 ], 1790 'square-bracket-vertical-tightness-closing' => [ 0, 2 ], 1791 'vertical-tightness' => [ 0, 2 ], 1792 'vertical-tightness-closing' => [ 0, 2 ], 1793 1794 'closing-brace-indentation' => [ 0, 3 ], 1795 'closing-paren-indentation' => [ 0, 3 ], 1796 'closing-square-bracket-indentation' => [ 0, 3 ], 1797 'closing-token-indentation' => [ 0, 3 ], 1798 1799 'closing-side-comment-else-flag' => [ 0, 2 ], 1800 'comma-arrow-breakpoints' => [ 0, 5 ], 1801 ); 1802 1803 # Note: we could actually allow negative ci if someone really wants it: 1804 # $option_range{'continuation-indentation'} = [ undef, undef ]; 1805 1806 #--------------------------------------------------------------- 1807 # Assign default values to the above options here, except 1808 # for 'outfile' and 'help'. 1809 # These settings should approximate the perlstyle(1) suggestions. 1810 #--------------------------------------------------------------- 1811 my @defaults = qw( 1812 add-newlines 1813 add-semicolons 1814 add-whitespace 1815 blanks-before-blocks 1816 blanks-before-comments 1817 blank-lines-before-subs=1 1818 blank-lines-before-packages=1 1819 block-brace-tightness=0 1820 block-brace-vertical-tightness=0 1821 brace-tightness=1 1822 brace-vertical-tightness-closing=0 1823 brace-vertical-tightness=0 1824 break-at-old-logical-breakpoints 1825 break-at-old-ternary-breakpoints 1826 break-at-old-attribute-breakpoints 1827 break-at-old-keyword-breakpoints 1828 comma-arrow-breakpoints=5 1829 nocheck-syntax 1830 closing-side-comment-interval=6 1831 closing-side-comment-maximum-text=20 1832 closing-side-comment-else-flag=0 1833 closing-side-comments-balanced 1834 closing-paren-indentation=0 1835 closing-brace-indentation=0 1836 closing-square-bracket-indentation=0 1837 continuation-indentation=2 1838 delete-old-newlines 1839 delete-semicolons 1840 fuzzy-line-length 1841 hanging-side-comments 1842 indent-block-comments 1843 indent-columns=4 1844 iterations=1 1845 keep-old-blank-lines=1 1846 long-block-line-count=8 1847 look-for-autoloader 1848 look-for-selfloader 1849 maximum-consecutive-blank-lines=1 1850 maximum-fields-per-table=0 1851 maximum-line-length=80 1852 memoize 1853 minimum-space-to-comment=4 1854 nobrace-left-and-indent 1855 nocuddled-else 1856 nodelete-old-whitespace 1857 nohtml 1858 nologfile 1859 noquiet 1860 noshow-options 1861 nostatic-side-comments 1862 notabs 1863 nowarning-output 1864 outdent-labels 1865 outdent-long-quotes 1866 outdent-long-comments 1867 paren-tightness=1 1868 paren-vertical-tightness-closing=0 1869 paren-vertical-tightness=0 1870 pass-version-line 1871 recombine 1872 valign 1873 short-concatenation-item-length=8 1874 space-for-semicolon 1875 square-bracket-tightness=1 1876 square-bracket-vertical-tightness-closing=0 1877 square-bracket-vertical-tightness=0 1878 static-block-comments 1879 trim-qw 1880 format=tidy 1881 backup-file-extension=bak 1882 format-skipping 1883 default-tabsize=8 1884 1885 pod2html 1886 html-table-of-contents 1887 html-entities 1888 ); 1889 1890 push @defaults, "perl-syntax-check-flags=-c -T"; 1891 1892 #--------------------------------------------------------------- 1893 # Define abbreviations which will be expanded into the above primitives. 1894 # These may be defined recursively. 1895 #--------------------------------------------------------------- 1896 %expansion = ( 1897 %expansion, 1898 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], 1899 'fnl' => [qw(freeze-newlines)], 1900 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], 1901 'fws' => [qw(freeze-whitespace)], 1902 'freeze-blank-lines' => 1903 [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)], 1904 'fbl' => [qw(freeze-blank-lines)], 1905 'indent-only' => [qw(freeze-newlines freeze-whitespace)], 1906 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)], 1907 'nooutdent-long-lines' => 1908 [qw(nooutdent-long-quotes nooutdent-long-comments)], 1909 'noll' => [qw(nooutdent-long-lines)], 1910 'io' => [qw(indent-only)], 1911 'delete-all-comments' => 1912 [qw(delete-block-comments delete-side-comments delete-pod)], 1913 'nodelete-all-comments' => 1914 [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)], 1915 'dac' => [qw(delete-all-comments)], 1916 'ndac' => [qw(nodelete-all-comments)], 1917 'gnu' => [qw(gnu-style)], 1918 'pbp' => [qw(perl-best-practices)], 1919 'tee-all-comments' => 1920 [qw(tee-block-comments tee-side-comments tee-pod)], 1921 'notee-all-comments' => 1922 [qw(notee-block-comments notee-side-comments notee-pod)], 1923 'tac' => [qw(tee-all-comments)], 1924 'ntac' => [qw(notee-all-comments)], 1925 'html' => [qw(format=html)], 1926 'nhtml' => [qw(format=tidy)], 1927 'tidy' => [qw(format=tidy)], 1928 1929 'swallow-optional-blank-lines' => [qw(kbl=0)], 1930 'noswallow-optional-blank-lines' => [qw(kbl=1)], 1931 'sob' => [qw(kbl=0)], 1932 'nsob' => [qw(kbl=1)], 1933 1934 'break-after-comma-arrows' => [qw(cab=0)], 1935 'nobreak-after-comma-arrows' => [qw(cab=1)], 1936 'baa' => [qw(cab=0)], 1937 'nbaa' => [qw(cab=1)], 1938 1939 'blanks-before-subs' => [qw(blbs=1 blbp=1)], 1940 'bbs' => [qw(blbs=1 blbp=1)], 1941 'noblanks-before-subs' => [qw(blbs=0 blbp=0)], 1942 'nbbs' => [qw(blbs=0 blbp=0)], 1943 1944 'break-at-old-trinary-breakpoints' => [qw(bot)], 1945 1946 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)], 1947 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)], 1948 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)], 1949 'icp' => [qw(cpi=2 cbi=2 csbi=2)], 1950 'nicp' => [qw(cpi=0 cbi=0 csbi=0)], 1951 1952 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)], 1953 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)], 1954 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)], 1955 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)], 1956 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)], 1957 1958 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)], 1959 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)], 1960 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)], 1961 1962 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)], 1963 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)], 1964 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)], 1965 1966 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], 1967 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], 1968 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], 1969 1970 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], 1971 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], 1972 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], 1973 1974 'otr' => [qw(opr ohbr osbr)], 1975 'opening-token-right' => [qw(opr ohbr osbr)], 1976 'notr' => [qw(nopr nohbr nosbr)], 1977 'noopening-token-right' => [qw(nopr nohbr nosbr)], 1978 1979 'sot' => [qw(sop sohb sosb)], 1980 'nsot' => [qw(nsop nsohb nsosb)], 1981 'stack-opening-tokens' => [qw(sop sohb sosb)], 1982 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)], 1983 1984 'sct' => [qw(scp schb scsb)], 1985 'stack-closing-tokens' => => [qw(scp schb scsb)], 1986 'nsct' => [qw(nscp nschb nscsb)], 1987 'nostack-opening-tokens' => [qw(nscp nschb nscsb)], 1988 1989 'sac' => [qw(sot sct)], 1990 'nsac' => [qw(nsot nsct)], 1991 'stack-all-containers' => [qw(sot sct)], 1992 'nostack-all-containers' => [qw(nsot nsct)], 1993 1994 'act=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)], 1995 'act=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)], 1996 'act=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)], 1997 'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)], 1998 'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)], 1999 'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)], 2000 2001 'stack-opening-block-brace' => [qw(bbvt=2 bbvtl=*)], 2002 'sobb' => [qw(bbvt=2 bbvtl=*)], 2003 'nostack-opening-block-brace' => [qw(bbvt=0)], 2004 'nsobb' => [qw(bbvt=0)], 2005 2006 'converge' => [qw(it=4)], 2007 'noconverge' => [qw(it=1)], 2008 'conv' => [qw(it=4)], 2009 'nconv' => [qw(it=1)], 2010 2011 # 'mangle' originally deleted pod and comments, but to keep it 2012 # reversible, it no longer does. But if you really want to 2013 # delete them, just use: 2014 # -mangle -dac 2015 2016 # An interesting use for 'mangle' is to do this: 2017 # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new 2018 # which will form as many one-line blocks as possible 2019 2020 'mangle' => [ 2021 qw( 2022 check-syntax 2023 keep-old-blank-lines=0 2024 delete-old-newlines 2025 delete-old-whitespace 2026 delete-semicolons 2027 indent-columns=0 2028 maximum-consecutive-blank-lines=0 2029 maximum-line-length=100000 2030 noadd-newlines 2031 noadd-semicolons 2032 noadd-whitespace 2033 noblanks-before-blocks 2034 blank-lines-before-subs=0 2035 blank-lines-before-packages=0 2036 notabs 2037 ) 2038 ], 2039 2040 # 'extrude' originally deleted pod and comments, but to keep it 2041 # reversible, it no longer does. But if you really want to 2042 # delete them, just use 2043 # extrude -dac 2044 # 2045 # An interesting use for 'extrude' is to do this: 2046 # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new 2047 # which will break up all one-line blocks. 2048 2049 'extrude' => [ 2050 qw( 2051 check-syntax 2052 ci=0 2053 delete-old-newlines 2054 delete-old-whitespace 2055 delete-semicolons 2056 indent-columns=0 2057 maximum-consecutive-blank-lines=0 2058 maximum-line-length=1 2059 noadd-semicolons 2060 noadd-whitespace 2061 noblanks-before-blocks 2062 blank-lines-before-subs=0 2063 blank-lines-before-packages=0 2064 nofuzzy-line-length 2065 notabs 2066 norecombine 2067 ) 2068 ], 2069 2070 # this style tries to follow the GNU Coding Standards (which do 2071 # not really apply to perl but which are followed by some perl 2072 # programmers). 2073 'gnu-style' => [ 2074 qw( 2075 lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1 2076 ) 2077 ], 2078 2079 # Style suggested in Damian Conway's Perl Best Practices 2080 'perl-best-practices' => [ 2081 qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq), 2082q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=) 2083 ], 2084 2085 # Additional styles can be added here 2086 ); 2087 2088 Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion ); 2089 2090 # Uncomment next line to dump all expansions for debugging: 2091 # dump_short_names(\%expansion); 2092 return ( 2093 \@option_string, \@defaults, \%expansion, 2094 \%option_category, \%option_range 2095 ); 2096 2097} # end of generate_options 2098 2099# Memoize process_command_line. Given same @ARGV passed in, return same 2100# values and same @ARGV back. 2101# This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds 2102# up masontidy (https://metacpan.org/module/masontidy) 2103 2104my %process_command_line_cache; 2105 2106sub process_command_line { 2107 2108 my ( 2109 $perltidyrc_stream, $is_Windows, $Windows_type, 2110 $rpending_complaint, $dump_options_type 2111 ) = @_; 2112 2113 my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type; 2114 if ($use_cache) { 2115 my $cache_key = join( chr(28), @ARGV ); 2116 if ( my $result = $process_command_line_cache{$cache_key} ) { 2117 my ( $argv, @retvals ) = @$result; 2118 @ARGV = @$argv; 2119 return @retvals; 2120 } 2121 else { 2122 my @retvals = _process_command_line(@_); 2123 $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ] 2124 if $retvals[0]->{'memoize'}; 2125 return @retvals; 2126 } 2127 } 2128 else { 2129 return _process_command_line(@_); 2130 } 2131} 2132 2133# (note the underscore here) 2134sub _process_command_line { 2135 2136 my ( 2137 $perltidyrc_stream, $is_Windows, $Windows_type, 2138 $rpending_complaint, $dump_options_type 2139 ) = @_; 2140 2141 use Getopt::Long; 2142 2143 my ( 2144 $roption_string, $rdefaults, $rexpansion, 2145 $roption_category, $roption_range 2146 ) = generate_options(); 2147 2148 #--------------------------------------------------------------- 2149 # set the defaults by passing the above list through GetOptions 2150 #--------------------------------------------------------------- 2151 my %Opts = (); 2152 { 2153 local @ARGV; 2154 my $i; 2155 2156 # do not load the defaults if we are just dumping perltidyrc 2157 unless ( $dump_options_type eq 'perltidyrc' ) { 2158 for $i (@$rdefaults) { push @ARGV, "--" . $i } 2159 } 2160 2161 # Patch to save users Getopt::Long configuration 2162 # and set to Getopt::Long defaults. Use eval to avoid 2163 # breaking old versions of Perl without these routines. 2164 my $glc; 2165 eval { $glc = Getopt::Long::Configure() }; 2166 unless ($@) { 2167 eval { Getopt::Long::ConfigDefaults() }; 2168 } 2169 else { $glc = undef } 2170 2171 if ( !GetOptions( \%Opts, @$roption_string ) ) { 2172 Die "Programming Bug: error in setting default options"; 2173 } 2174 2175 # Patch to put the previous Getopt::Long configuration back 2176 eval { Getopt::Long::Configure($glc) } if defined $glc; 2177 } 2178 2179 my $word; 2180 my @raw_options = (); 2181 my $config_file = ""; 2182 my $saw_ignore_profile = 0; 2183 my $saw_extrude = 0; 2184 my $saw_pbp = 0; 2185 my $saw_dump_profile = 0; 2186 my $i; 2187 2188 #--------------------------------------------------------------- 2189 # Take a first look at the command-line parameters. Do as many 2190 # immediate dumps as possible, which can avoid confusion if the 2191 # perltidyrc file has an error. 2192 #--------------------------------------------------------------- 2193 foreach $i (@ARGV) { 2194 2195 $i =~ s/^--/-/; 2196 if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) { 2197 $saw_ignore_profile = 1; 2198 } 2199 2200 # note: this must come before -pro and -profile, below: 2201 elsif ( $i =~ /^-(dump-profile|dpro)$/ ) { 2202 $saw_dump_profile = 1; 2203 } 2204 elsif ( $i =~ /^-(pro|profile)=(.+)/ ) { 2205 if ($config_file) { 2206 Warn 2207"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n"; 2208 } 2209 $config_file = $2; 2210 2211 # resolve <dir>/.../<file>, meaning look upwards from directory 2212 if ( defined($config_file) ) { 2213 if ( my ( $start_dir, $search_file ) = 2214 ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) ) 2215 { 2216 $start_dir = '.' if !$start_dir; 2217 $start_dir = Cwd::realpath($start_dir); 2218 if ( my $found_file = 2219 find_file_upwards( $start_dir, $search_file ) ) 2220 { 2221 $config_file = $found_file; 2222 } 2223 } 2224 } 2225 unless ( -e $config_file ) { 2226 Warn "cannot find file given with -pro=$config_file: $!\n"; 2227 $config_file = ""; 2228 } 2229 } 2230 elsif ( $i =~ /^-(pro|profile)=?$/ ) { 2231 Die "usage: -pro=filename or --profile=filename, no spaces\n"; 2232 } 2233 elsif ( $i =~ /^-extrude$/ ) { 2234 $saw_extrude = 1; 2235 } 2236 elsif ( $i =~ /^-(pbp|perl-best-practices)$/ ) { 2237 $saw_pbp = 1; 2238 } 2239 elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) { 2240 usage(); 2241 Exit 0; 2242 } 2243 elsif ( $i =~ /^-(version|v)$/ ) { 2244 show_version(); 2245 Exit 0; 2246 } 2247 elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) { 2248 dump_defaults(@$rdefaults); 2249 Exit 0; 2250 } 2251 elsif ( $i =~ /^-(dump-long-names|dln)$/ ) { 2252 dump_long_names(@$roption_string); 2253 Exit 0; 2254 } 2255 elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) { 2256 dump_short_names($rexpansion); 2257 Exit 0; 2258 } 2259 elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) { 2260 Perl::Tidy::Tokenizer->dump_token_types(*STDOUT); 2261 Exit 0; 2262 } 2263 } 2264 2265 if ( $saw_dump_profile && $saw_ignore_profile ) { 2266 Warn "No profile to dump because of -npro\n"; 2267 Exit 1; 2268 } 2269 2270 #--------------------------------------------------------------- 2271 # read any .perltidyrc configuration file 2272 #--------------------------------------------------------------- 2273 unless ($saw_ignore_profile) { 2274 2275 # resolve possible conflict between $perltidyrc_stream passed 2276 # as call parameter to perltidy and -pro=filename on command 2277 # line. 2278 if ($perltidyrc_stream) { 2279 if ($config_file) { 2280 Warn <<EOM; 2281 Conflict: a perltidyrc configuration file was specified both as this 2282 perltidy call parameter: $perltidyrc_stream 2283 and with this -profile=$config_file. 2284 Using -profile=$config_file. 2285EOM 2286 } 2287 else { 2288 $config_file = $perltidyrc_stream; 2289 } 2290 } 2291 2292 # look for a config file if we don't have one yet 2293 my $rconfig_file_chatter; 2294 $$rconfig_file_chatter = ""; 2295 $config_file = 2296 find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter, 2297 $rpending_complaint ) 2298 unless $config_file; 2299 2300 # open any config file 2301 my $fh_config; 2302 if ($config_file) { 2303 ( $fh_config, $config_file ) = 2304 Perl::Tidy::streamhandle( $config_file, 'r' ); 2305 unless ($fh_config) { 2306 $$rconfig_file_chatter .= 2307 "# $config_file exists but cannot be opened\n"; 2308 } 2309 } 2310 2311 if ($saw_dump_profile) { 2312 dump_config_file( $fh_config, $config_file, $rconfig_file_chatter ); 2313 Exit 0; 2314 } 2315 2316 if ($fh_config) { 2317 2318 my ( $rconfig_list, $death_message, $_saw_pbp ) = 2319 read_config_file( $fh_config, $config_file, $rexpansion ); 2320 Die $death_message if ($death_message); 2321 $saw_pbp ||= $_saw_pbp; 2322 2323 # process any .perltidyrc parameters right now so we can 2324 # localize errors 2325 if (@$rconfig_list) { 2326 local @ARGV = @$rconfig_list; 2327 2328 expand_command_abbreviations( $rexpansion, \@raw_options, 2329 $config_file ); 2330 2331 if ( !GetOptions( \%Opts, @$roption_string ) ) { 2332 Die 2333"Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n"; 2334 } 2335 2336 # Anything left in this local @ARGV is an error and must be 2337 # invalid bare words from the configuration file. We cannot 2338 # check this earlier because bare words may have been valid 2339 # values for parameters. We had to wait for GetOptions to have 2340 # a look at @ARGV. 2341 if (@ARGV) { 2342 my $count = @ARGV; 2343 my $str = "\'" . pop(@ARGV) . "\'"; 2344 while ( my $param = pop(@ARGV) ) { 2345 if ( length($str) < 70 ) { 2346 $str .= ", '$param'"; 2347 } 2348 else { 2349 $str .= ", ..."; 2350 last; 2351 } 2352 } 2353 Die <<EOM; 2354There are $count unrecognized values in the configuration file '$config_file': 2355$str 2356Use leading dashes for parameters. Use -npro to ignore this file. 2357EOM 2358 } 2359 2360 # Undo any options which cause premature exit. They are not 2361 # appropriate for a config file, and it could be hard to 2362 # diagnose the cause of the premature exit. 2363 foreach ( 2364 qw{ 2365 dump-defaults 2366 dump-long-names 2367 dump-options 2368 dump-profile 2369 dump-short-names 2370 dump-token-types 2371 dump-want-left-space 2372 dump-want-right-space 2373 help 2374 stylesheet 2375 version 2376 } 2377 ) 2378 { 2379 2380 if ( defined( $Opts{$_} ) ) { 2381 delete $Opts{$_}; 2382 Warn "ignoring --$_ in config file: $config_file\n"; 2383 } 2384 } 2385 } 2386 } 2387 } 2388 2389 #--------------------------------------------------------------- 2390 # now process the command line parameters 2391 #--------------------------------------------------------------- 2392 expand_command_abbreviations( $rexpansion, \@raw_options, $config_file ); 2393 2394 local $SIG{'__WARN__'} = sub { Warn $_[0] }; 2395 if ( !GetOptions( \%Opts, @$roption_string ) ) { 2396 Die "Error on command line; for help try 'perltidy -h'\n"; 2397 } 2398 2399 return ( 2400 \%Opts, $config_file, \@raw_options, 2401 $saw_extrude, $saw_pbp, $roption_string, 2402 $rexpansion, $roption_category, $roption_range 2403 ); 2404} # end of process_command_line 2405 2406sub check_options { 2407 2408 my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_; 2409 2410 #--------------------------------------------------------------- 2411 # check and handle any interactions among the basic options.. 2412 #--------------------------------------------------------------- 2413 2414 # Since -vt, -vtc, and -cti are abbreviations, but under 2415 # msdos, an unquoted input parameter like vtc=1 will be 2416 # seen as 2 parameters, vtc and 1, so the abbreviations 2417 # won't be seen. Therefore, we will catch them here if 2418 # they get through. 2419 2420 if ( defined $rOpts->{'vertical-tightness'} ) { 2421 my $vt = $rOpts->{'vertical-tightness'}; 2422 $rOpts->{'paren-vertical-tightness'} = $vt; 2423 $rOpts->{'square-bracket-vertical-tightness'} = $vt; 2424 $rOpts->{'brace-vertical-tightness'} = $vt; 2425 } 2426 2427 if ( defined $rOpts->{'vertical-tightness-closing'} ) { 2428 my $vtc = $rOpts->{'vertical-tightness-closing'}; 2429 $rOpts->{'paren-vertical-tightness-closing'} = $vtc; 2430 $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc; 2431 $rOpts->{'brace-vertical-tightness-closing'} = $vtc; 2432 } 2433 2434 if ( defined $rOpts->{'closing-token-indentation'} ) { 2435 my $cti = $rOpts->{'closing-token-indentation'}; 2436 $rOpts->{'closing-square-bracket-indentation'} = $cti; 2437 $rOpts->{'closing-brace-indentation'} = $cti; 2438 $rOpts->{'closing-paren-indentation'} = $cti; 2439 } 2440 2441 # In quiet mode, there is no log file and hence no way to report 2442 # results of syntax check, so don't do it. 2443 if ( $rOpts->{'quiet'} ) { 2444 $rOpts->{'check-syntax'} = 0; 2445 } 2446 2447 # can't check syntax if no output 2448 if ( $rOpts->{'format'} ne 'tidy' ) { 2449 $rOpts->{'check-syntax'} = 0; 2450 } 2451 2452 # Never let Windows 9x/Me systems run syntax check -- this will prevent a 2453 # wide variety of nasty problems on these systems, because they cannot 2454 # reliably run backticks. Don't even think about changing this! 2455 if ( $rOpts->{'check-syntax'} 2456 && $is_Windows 2457 && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) ) 2458 { 2459 $rOpts->{'check-syntax'} = 0; 2460 } 2461 2462 # It's really a bad idea to check syntax as root unless you wrote 2463 # the script yourself. FIXME: not sure if this works with VMS 2464 unless ($is_Windows) { 2465 2466 if ( $< == 0 && $rOpts->{'check-syntax'} ) { 2467 $rOpts->{'check-syntax'} = 0; 2468 $$rpending_complaint .= 2469"Syntax check deactivated for safety; you shouldn't run this as root\n"; 2470 } 2471 } 2472 2473 # check iteration count and quietly fix if necessary: 2474 # - iterations option only applies to code beautification mode 2475 # - the convergence check should stop most runs on iteration 2, and 2476 # virtually all on iteration 3. But we'll allow up to 6. 2477 if ( $rOpts->{'format'} ne 'tidy' ) { 2478 $rOpts->{'iterations'} = 1; 2479 } 2480 elsif ( defined( $rOpts->{'iterations'} ) ) { 2481 if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 } 2482 elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 } 2483 } 2484 else { 2485 $rOpts->{'iterations'} = 1; 2486 } 2487 2488 # check for reasonable number of blank lines and fix to avoid problems 2489 if ( $rOpts->{'blank-lines-before-subs'} ) { 2490 if ( $rOpts->{'blank-lines-before-subs'} < 0 ) { 2491 $rOpts->{'blank-lines-before-subs'} = 0; 2492 Warn "negative value of -blbs, setting 0\n"; 2493 } 2494 if ( $rOpts->{'blank-lines-before-subs'} > 100 ) { 2495 Warn "unreasonably large value of -blbs, reducing\n"; 2496 $rOpts->{'blank-lines-before-subs'} = 100; 2497 } 2498 } 2499 if ( $rOpts->{'blank-lines-before-packages'} ) { 2500 if ( $rOpts->{'blank-lines-before-packages'} < 0 ) { 2501 Warn "negative value of -blbp, setting 0\n"; 2502 $rOpts->{'blank-lines-before-packages'} = 0; 2503 } 2504 if ( $rOpts->{'blank-lines-before-packages'} > 100 ) { 2505 Warn "unreasonably large value of -blbp, reducing\n"; 2506 $rOpts->{'blank-lines-before-packages'} = 100; 2507 } 2508 } 2509 2510 # see if user set a non-negative logfile-gap 2511 if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) { 2512 2513 # a zero gap will be taken as a 1 2514 if ( $rOpts->{'logfile-gap'} == 0 ) { 2515 $rOpts->{'logfile-gap'} = 1; 2516 } 2517 2518 # setting a non-negative logfile gap causes logfile to be saved 2519 $rOpts->{'logfile'} = 1; 2520 } 2521 2522 # not setting logfile gap, or setting it negative, causes default of 50 2523 else { 2524 $rOpts->{'logfile-gap'} = 50; 2525 } 2526 2527 # set short-cut flag when only indentation is to be done. 2528 # Note that the user may or may not have already set the 2529 # indent-only flag. 2530 if ( !$rOpts->{'add-whitespace'} 2531 && !$rOpts->{'delete-old-whitespace'} 2532 && !$rOpts->{'add-newlines'} 2533 && !$rOpts->{'delete-old-newlines'} ) 2534 { 2535 $rOpts->{'indent-only'} = 1; 2536 } 2537 2538 # -isbc implies -ibc 2539 if ( $rOpts->{'indent-spaced-block-comments'} ) { 2540 $rOpts->{'indent-block-comments'} = 1; 2541 } 2542 2543 # -bli flag implies -bl 2544 if ( $rOpts->{'brace-left-and-indent'} ) { 2545 $rOpts->{'opening-brace-on-new-line'} = 1; 2546 } 2547 2548 if ( $rOpts->{'opening-brace-always-on-right'} 2549 && $rOpts->{'opening-brace-on-new-line'} ) 2550 { 2551 Warn <<EOM; 2552 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and 2553 'opening-brace-on-new-line' (-bl). Ignoring -bl. 2554EOM 2555 $rOpts->{'opening-brace-on-new-line'} = 0; 2556 } 2557 2558 # it simplifies things if -bl is 0 rather than undefined 2559 if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) { 2560 $rOpts->{'opening-brace-on-new-line'} = 0; 2561 } 2562 2563 # -sbl defaults to -bl if not defined 2564 if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) { 2565 $rOpts->{'opening-sub-brace-on-new-line'} = 2566 $rOpts->{'opening-brace-on-new-line'}; 2567 } 2568 2569 if ( $rOpts->{'entab-leading-whitespace'} ) { 2570 if ( $rOpts->{'entab-leading-whitespace'} < 0 ) { 2571 Warn "-et=n must use a positive integer; ignoring -et\n"; 2572 $rOpts->{'entab-leading-whitespace'} = undef; 2573 } 2574 2575 # entab leading whitespace has priority over the older 'tabs' option 2576 if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; } 2577 } 2578 2579 # set a default tabsize to be used in guessing the starting indentation 2580 # level if and only if this run does not use tabs and the old code does 2581 # use tabs 2582 if ( $rOpts->{'default-tabsize'} ) { 2583 if ( $rOpts->{'default-tabsize'} < 0 ) { 2584 Warn "negative value of -dt, setting 0\n"; 2585 $rOpts->{'default-tabsize'} = 0; 2586 } 2587 if ( $rOpts->{'default-tabsize'} > 20 ) { 2588 Warn "unreasonably large value of -dt, reducing\n"; 2589 $rOpts->{'default-tabsize'} = 20; 2590 } 2591 } 2592 else { 2593 $rOpts->{'default-tabsize'} = 8; 2594 } 2595 2596 # Define $tabsize, the number of spaces per tab for use in 2597 # guessing the indentation of source lines with leading tabs. 2598 # Assume same as for this run if tabs are used , otherwise assume 2599 # a default value, typically 8 2600 my $tabsize = 2601 $rOpts->{'entab-leading-whitespace'} 2602 ? $rOpts->{'entab-leading-whitespace'} 2603 : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'} 2604 : $rOpts->{'default-tabsize'}; 2605 return $tabsize; 2606} 2607 2608sub find_file_upwards { 2609 my ( $search_dir, $search_file ) = @_; 2610 2611 $search_dir =~ s{/+$}{}; 2612 $search_file =~ s{^/+}{}; 2613 2614 while (1) { 2615 my $try_path = "$search_dir/$search_file"; 2616 if ( -f $try_path ) { 2617 return $try_path; 2618 } 2619 elsif ( $search_dir eq '/' ) { 2620 return undef; 2621 } 2622 else { 2623 $search_dir = dirname($search_dir); 2624 } 2625 } 2626} 2627 2628sub expand_command_abbreviations { 2629 2630 # go through @ARGV and expand any abbreviations 2631 2632 my ( $rexpansion, $rraw_options, $config_file ) = @_; 2633 my ($word); 2634 2635 # set a pass limit to prevent an infinite loop; 2636 # 10 should be plenty, but it may be increased to allow deeply 2637 # nested expansions. 2638 my $max_passes = 10; 2639 my @new_argv = (); 2640 2641 # keep looping until all expansions have been converted into actual 2642 # dash parameters.. 2643 for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) { 2644 my @new_argv = (); 2645 my $abbrev_count = 0; 2646 2647 # loop over each item in @ARGV.. 2648 foreach $word (@ARGV) { 2649 2650 # convert any leading 'no-' to just 'no' 2651 if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 } 2652 2653 # if it is a dash flag (instead of a file name).. 2654 if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) { 2655 2656 my $abr = $1; 2657 my $flags = $2; 2658 2659 # save the raw input for debug output in case of circular refs 2660 if ( $pass_count == 0 ) { 2661 push( @$rraw_options, $word ); 2662 } 2663 2664 # recombine abbreviation and flag, if necessary, 2665 # to allow abbreviations with arguments such as '-vt=1' 2666 if ( $rexpansion->{ $abr . $flags } ) { 2667 $abr = $abr . $flags; 2668 $flags = ""; 2669 } 2670 2671 # if we see this dash item in the expansion hash.. 2672 if ( $rexpansion->{$abr} ) { 2673 $abbrev_count++; 2674 2675 # stuff all of the words that it expands to into the 2676 # new arg list for the next pass 2677 foreach my $abbrev ( @{ $rexpansion->{$abr} } ) { 2678 next unless $abbrev; # for safety; shouldn't happen 2679 push( @new_argv, '--' . $abbrev . $flags ); 2680 } 2681 } 2682 2683 # not in expansion hash, must be actual long name 2684 else { 2685 push( @new_argv, $word ); 2686 } 2687 } 2688 2689 # not a dash item, so just save it for the next pass 2690 else { 2691 push( @new_argv, $word ); 2692 } 2693 } # end of this pass 2694 2695 # update parameter list @ARGV to the new one 2696 @ARGV = @new_argv; 2697 last unless ( $abbrev_count > 0 ); 2698 2699 # make sure we are not in an infinite loop 2700 if ( $pass_count == $max_passes ) { 2701 local $" = ')('; 2702 Warn <<EOM; 2703I'm tired. We seem to be in an infinite loop trying to expand aliases. 2704Here are the raw options; 2705(rraw_options) 2706EOM 2707 my $num = @new_argv; 2708 if ( $num < 50 ) { 2709 Warn <<EOM; 2710After $max_passes passes here is ARGV 2711(@new_argv) 2712EOM 2713 } 2714 else { 2715 Warn <<EOM; 2716After $max_passes passes ARGV has $num entries 2717EOM 2718 } 2719 2720 if ($config_file) { 2721 Die <<"DIE"; 2722Please check your configuration file $config_file for circular-references. 2723To deactivate it, use -npro. 2724DIE 2725 } 2726 else { 2727 Die <<'DIE'; 2728Program bug - circular-references in the %expansion hash, probably due to 2729a recent program change. 2730DIE 2731 } 2732 } # end of check for circular references 2733 } # end of loop over all passes 2734} 2735 2736# Debug routine -- this will dump the expansion hash 2737sub dump_short_names { 2738 my $rexpansion = shift; 2739 print STDOUT <<EOM; 2740List of short names. This list shows how all abbreviations are 2741translated into other abbreviations and, eventually, into long names. 2742New abbreviations may be defined in a .perltidyrc file. 2743For a list of all long names, use perltidy --dump-long-names (-dln). 2744-------------------------------------------------------------------------- 2745EOM 2746 foreach my $abbrev ( sort keys %$rexpansion ) { 2747 my @list = @{ $$rexpansion{$abbrev} }; 2748 print STDOUT "$abbrev --> @list\n"; 2749 } 2750} 2751 2752sub check_vms_filename { 2753 2754 # given a valid filename (the perltidy input file) 2755 # create a modified filename and separator character 2756 # suitable for VMS. 2757 # 2758 # Contributed by Michael Cartmell 2759 # 2760 my ( $base, $path ) = fileparse( $_[0] ); 2761 2762 # remove explicit ; version 2763 $base =~ s/;-?\d*$// 2764 2765 # remove explicit . version ie two dots in filename NB ^ escapes a dot 2766 or $base =~ s/( # begin capture $1 2767 (?:^|[^^])\. # match a dot not preceded by a caret 2768 (?: # followed by nothing 2769 | # or 2770 .*[^^] # anything ending in a non caret 2771 ) 2772 ) # end capture $1 2773 \.-?\d*$ # match . version number 2774 /$1/x; 2775 2776 # normalise filename, if there are no unescaped dots then append one 2777 $base .= '.' unless $base =~ /(?:^|[^^])\./; 2778 2779 # if we don't already have an extension then we just append the extension 2780 my $separator = ( $base =~ /\.$/ ) ? "" : "_"; 2781 return ( $path . $base, $separator ); 2782} 2783 2784sub Win_OS_Type { 2785 2786 # TODO: are these more standard names? 2787 # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003 2788 2789 # Returns a string that determines what MS OS we are on. 2790 # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003 2791 # Returns blank string if not an MS system. 2792 # Original code contributed by: Yves Orton 2793 # We need to know this to decide where to look for config files 2794 2795 my $rpending_complaint = shift; 2796 my $os = ""; 2797 return $os unless $^O =~ /win32|dos/i; # is it a MS box? 2798 2799 # Systems built from Perl source may not have Win32.pm 2800 # But probably have Win32::GetOSVersion() anyway so the 2801 # following line is not 'required': 2802 # return $os unless eval('require Win32'); 2803 2804 # Use the standard API call to determine the version 2805 my ( $undef, $major, $minor, $build, $id ); 2806 eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() }; 2807 2808 # 2809 # NAME ID MAJOR MINOR 2810 # Windows NT 4 2 4 0 2811 # Windows 2000 2 5 0 2812 # Windows XP 2 5 1 2813 # Windows Server 2003 2 5 2 2814 2815 return "win32s" unless $id; # If id==0 then its a win32s box. 2816 $os = { # Magic numbers from MSDN 2817 # documentation of GetOSVersion 2818 1 => { 2819 0 => "95", 2820 10 => "98", 2821 90 => "Me" 2822 }, 2823 2 => { 2824 0 => "2000", # or NT 4, see below 2825 1 => "XP/.Net", 2826 2 => "Win2003", 2827 51 => "NT3.51" 2828 } 2829 }->{$id}->{$minor}; 2830 2831 # If $os is undefined, the above code is out of date. Suggested updates 2832 # are welcome. 2833 unless ( defined $os ) { 2834 $os = ""; 2835 $$rpending_complaint .= <<EOS; 2836Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record! 2837We won't be able to look for a system-wide config file. 2838EOS 2839 } 2840 2841 # Unfortunately the logic used for the various versions isn't so clever.. 2842 # so we have to handle an outside case. 2843 return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os; 2844} 2845 2846sub is_unix { 2847 return 2848 ( $^O !~ /win32|dos/i ) 2849 && ( $^O ne 'VMS' ) 2850 && ( $^O ne 'OS2' ) 2851 && ( $^O ne 'MacOS' ); 2852} 2853 2854sub look_for_Windows { 2855 2856 # determine Windows sub-type and location of 2857 # system-wide configuration files 2858 my $rpending_complaint = shift; 2859 my $is_Windows = ( $^O =~ /win32|dos/i ); 2860 my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows; 2861 return ( $is_Windows, $Windows_type ); 2862} 2863 2864sub find_config_file { 2865 2866 # look for a .perltidyrc configuration file 2867 # For Windows also look for a file named perltidy.ini 2868 my ( $is_Windows, $Windows_type, $rconfig_file_chatter, 2869 $rpending_complaint ) = @_; 2870 2871 $$rconfig_file_chatter .= "# Config file search...system reported as:"; 2872 if ($is_Windows) { 2873 $$rconfig_file_chatter .= "Windows $Windows_type\n"; 2874 } 2875 else { 2876 $$rconfig_file_chatter .= " $^O\n"; 2877 } 2878 2879 # sub to check file existence and record all tests 2880 my $exists_config_file = sub { 2881 my $config_file = shift; 2882 return 0 unless $config_file; 2883 $$rconfig_file_chatter .= "# Testing: $config_file\n"; 2884 return -f $config_file; 2885 }; 2886 2887 my $config_file; 2888 2889 # look in current directory first 2890 $config_file = ".perltidyrc"; 2891 return $config_file if $exists_config_file->($config_file); 2892 if ($is_Windows) { 2893 $config_file = "perltidy.ini"; 2894 return $config_file if $exists_config_file->($config_file); 2895 } 2896 2897 # Default environment vars. 2898 my @envs = qw(PERLTIDY HOME); 2899 2900 # Check the NT/2k/XP locations, first a local machine def, then a 2901 # network def 2902 push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i; 2903 2904 # Now go through the environment ... 2905 foreach my $var (@envs) { 2906 $$rconfig_file_chatter .= "# Examining: \$ENV{$var}"; 2907 if ( defined( $ENV{$var} ) ) { 2908 $$rconfig_file_chatter .= " = $ENV{$var}\n"; 2909 2910 # test ENV{ PERLTIDY } as file: 2911 if ( $var eq 'PERLTIDY' ) { 2912 $config_file = "$ENV{$var}"; 2913 return $config_file if $exists_config_file->($config_file); 2914 } 2915 2916 # test ENV as directory: 2917 $config_file = catfile( $ENV{$var}, ".perltidyrc" ); 2918 return $config_file if $exists_config_file->($config_file); 2919 2920 if ($is_Windows) { 2921 $config_file = catfile( $ENV{$var}, "perltidy.ini" ); 2922 return $config_file if $exists_config_file->($config_file); 2923 } 2924 } 2925 else { 2926 $$rconfig_file_chatter .= "\n"; 2927 } 2928 } 2929 2930 # then look for a system-wide definition 2931 # where to look varies with OS 2932 if ($is_Windows) { 2933 2934 if ($Windows_type) { 2935 my ( $os, $system, $allusers ) = 2936 Win_Config_Locs( $rpending_complaint, $Windows_type ); 2937 2938 # Check All Users directory, if there is one. 2939 # i.e. C:\Documents and Settings\User\perltidy.ini 2940 if ($allusers) { 2941 2942 $config_file = catfile( $allusers, ".perltidyrc" ); 2943 return $config_file if $exists_config_file->($config_file); 2944 2945 $config_file = catfile( $allusers, "perltidy.ini" ); 2946 return $config_file if $exists_config_file->($config_file); 2947 } 2948 2949 # Check system directory. 2950 # retain old code in case someone has been able to create 2951 # a file with a leading period. 2952 $config_file = catfile( $system, ".perltidyrc" ); 2953 return $config_file if $exists_config_file->($config_file); 2954 2955 $config_file = catfile( $system, "perltidy.ini" ); 2956 return $config_file if $exists_config_file->($config_file); 2957 } 2958 } 2959 2960 # Place to add customization code for other systems 2961 elsif ( $^O eq 'OS2' ) { 2962 } 2963 elsif ( $^O eq 'MacOS' ) { 2964 } 2965 elsif ( $^O eq 'VMS' ) { 2966 } 2967 2968 # Assume some kind of Unix 2969 else { 2970 2971 $config_file = "/usr/local/etc/perltidyrc"; 2972 return $config_file if $exists_config_file->($config_file); 2973 2974 $config_file = "/etc/perltidyrc"; 2975 return $config_file if $exists_config_file->($config_file); 2976 } 2977 2978 # Couldn't find a config file 2979 return; 2980} 2981 2982sub Win_Config_Locs { 2983 2984 # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP), 2985 # or undef if its not a win32 OS. In list context returns OS, System 2986 # Directory, and All Users Directory. All Users will be empty on a 2987 # 9x/Me box. Contributed by: Yves Orton. 2988 2989 my $rpending_complaint = shift; 2990 my $os = (@_) ? shift : Win_OS_Type(); 2991 return unless $os; 2992 2993 my $system = ""; 2994 my $allusers = ""; 2995 2996 if ( $os =~ /9[58]|Me/ ) { 2997 $system = "C:/Windows"; 2998 } 2999 elsif ( $os =~ /NT|XP|200?/ ) { 3000 $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/"; 3001 $allusers = 3002 ( $os =~ /NT/ ) 3003 ? "C:/WinNT/profiles/All Users/" 3004 : "C:/Documents and Settings/All Users/"; 3005 } 3006 else { 3007 3008 # This currently would only happen on a win32s computer. I dont have 3009 # one to test, so I am unsure how to proceed. Suggestions welcome! 3010 $$rpending_complaint .= 3011"I dont know a sensible place to look for config files on an $os system.\n"; 3012 return; 3013 } 3014 return wantarray ? ( $os, $system, $allusers ) : $os; 3015} 3016 3017sub dump_config_file { 3018 my $fh = shift; 3019 my $config_file = shift; 3020 my $rconfig_file_chatter = shift; 3021 print STDOUT "$$rconfig_file_chatter"; 3022 if ($fh) { 3023 print STDOUT "# Dump of file: '$config_file'\n"; 3024 while ( my $line = $fh->getline() ) { print STDOUT $line } 3025 eval { $fh->close() }; 3026 } 3027 else { 3028 print STDOUT "# ...no config file found\n"; 3029 } 3030} 3031 3032sub read_config_file { 3033 3034 my ( $fh, $config_file, $rexpansion ) = @_; 3035 my @config_list = (); 3036 my $saw_pbp; 3037 3038 # file is bad if non-empty $death_message is returned 3039 my $death_message = ""; 3040 3041 my $name = undef; 3042 my $line_no; 3043 while ( my $line = $fh->getline() ) { 3044 $line_no++; 3045 chomp $line; 3046 ( $line, $death_message ) = 3047 strip_comment( $line, $config_file, $line_no ); 3048 last if ($death_message); 3049 next unless $line; 3050 $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends 3051 next unless $line; 3052 3053 # look for something of the general form 3054 # newname { body } 3055 # or just 3056 # body 3057 3058 my $body = $line; 3059 my ($newname); 3060 if ( $line =~ /^((\w+)\s*\{)(.*)\}$/ ) { 3061 ( $newname, $body ) = ( $2, $3, ); 3062 } 3063 if ($body) { 3064 3065 if ( !$saw_pbp && $body =~ /-(pbp|perl-best-practices)/ ) { 3066 $saw_pbp = 1; 3067 } 3068 3069 # handle a new alias definition 3070 if ($newname) { 3071 if ($name) { 3072 $death_message = 3073"No '}' seen after $name and before $newname in config file $config_file line $.\n"; 3074 last; 3075 } 3076 $name = $newname; 3077 3078 if ( ${$rexpansion}{$name} ) { 3079 local $" = ')('; 3080 my @names = sort keys %$rexpansion; 3081 $death_message = 3082 "Here is a list of all installed aliases\n(@names)\n" 3083 . "Attempting to redefine alias ($name) in config file $config_file line $.\n"; 3084 last; 3085 } 3086 ${$rexpansion}{$name} = []; 3087 } 3088 3089 # now do the body 3090 if ($body) { 3091 3092 my ( $rbody_parts, $msg ) = parse_args($body); 3093 if ($msg) { 3094 $death_message = <<EOM; 3095Error reading file '$config_file' at line number $line_no. 3096$msg 3097Please fix this line or use -npro to avoid reading this file 3098EOM 3099 last; 3100 } 3101 3102 if ($name) { 3103 3104 # remove leading dashes if this is an alias 3105 foreach (@$rbody_parts) { s/^\-+//; } 3106 push @{ ${$rexpansion}{$name} }, @$rbody_parts; 3107 } 3108 else { 3109 push( @config_list, @$rbody_parts ); 3110 } 3111 } 3112 } 3113 } 3114 eval { $fh->close() }; 3115 return ( \@config_list, $death_message, $saw_pbp ); 3116} 3117 3118sub strip_comment { 3119 3120 # Strip any comment from a command line 3121 my ( $instr, $config_file, $line_no ) = @_; 3122 my $msg = ""; 3123 3124 # check for full-line comment 3125 if ( $instr =~ /^\s*#/ ) { 3126 return ( "", $msg ); 3127 } 3128 3129 # nothing to do if no comments 3130 if ( $instr !~ /#/ ) { 3131 return ( $instr, $msg ); 3132 } 3133 3134 # handle case of no quotes 3135 elsif ( $instr !~ /['"]/ ) { 3136 3137 # We now require a space before the # of a side comment 3138 # this allows something like: 3139 # -sbcp=# 3140 # Otherwise, it would have to be quoted: 3141 # -sbcp='#' 3142 $instr =~ s/\s+\#.*$//; 3143 return ( $instr, $msg ); 3144 } 3145 3146 # handle comments and quotes 3147 my $outstr = ""; 3148 my $quote_char = ""; 3149 while (1) { 3150 3151 # looking for ending quote character 3152 if ($quote_char) { 3153 if ( $instr =~ /\G($quote_char)/gc ) { 3154 $quote_char = ""; 3155 $outstr .= $1; 3156 } 3157 elsif ( $instr =~ /\G(.)/gc ) { 3158 $outstr .= $1; 3159 } 3160 3161 # error..we reached the end without seeing the ending quote char 3162 else { 3163 $msg = <<EOM; 3164Error reading file $config_file at line number $line_no. 3165Did not see ending quote character <$quote_char> in this text: 3166$instr 3167Please fix this line or use -npro to avoid reading this file 3168EOM 3169 last; 3170 } 3171 } 3172 3173 # accumulating characters and looking for start of a quoted string 3174 else { 3175 if ( $instr =~ /\G([\"\'])/gc ) { 3176 $outstr .= $1; 3177 $quote_char = $1; 3178 } 3179 3180 # Note: not yet enforcing the space-before-hash rule for side 3181 # comments if the parameter is quoted. 3182 elsif ( $instr =~ /\G#/gc ) { 3183 last; 3184 } 3185 elsif ( $instr =~ /\G(.)/gc ) { 3186 $outstr .= $1; 3187 } 3188 else { 3189 last; 3190 } 3191 } 3192 } 3193 return ( $outstr, $msg ); 3194} 3195 3196sub parse_args { 3197 3198 # Parse a command string containing multiple string with possible 3199 # quotes, into individual commands. It might look like this, for example: 3200 # 3201 # -wba=" + - " -some-thing -wbb='. && ||' 3202 # 3203 # There is no need, at present, to handle escaped quote characters. 3204 # (They are not perltidy tokens, so needn't be in strings). 3205 3206 my ($body) = @_; 3207 my @body_parts = (); 3208 my $quote_char = ""; 3209 my $part = ""; 3210 my $msg = ""; 3211 while (1) { 3212 3213 # looking for ending quote character 3214 if ($quote_char) { 3215 if ( $body =~ /\G($quote_char)/gc ) { 3216 $quote_char = ""; 3217 } 3218 elsif ( $body =~ /\G(.)/gc ) { 3219 $part .= $1; 3220 } 3221 3222 # error..we reached the end without seeing the ending quote char 3223 else { 3224 if ( length($part) ) { push @body_parts, $part; } 3225 $msg = <<EOM; 3226Did not see ending quote character <$quote_char> in this text: 3227$body 3228EOM 3229 last; 3230 } 3231 } 3232 3233 # accumulating characters and looking for start of a quoted string 3234 else { 3235 if ( $body =~ /\G([\"\'])/gc ) { 3236 $quote_char = $1; 3237 } 3238 elsif ( $body =~ /\G(\s+)/gc ) { 3239 if ( length($part) ) { push @body_parts, $part; } 3240 $part = ""; 3241 } 3242 elsif ( $body =~ /\G(.)/gc ) { 3243 $part .= $1; 3244 } 3245 else { 3246 if ( length($part) ) { push @body_parts, $part; } 3247 last; 3248 } 3249 } 3250 } 3251 return ( \@body_parts, $msg ); 3252} 3253 3254sub dump_long_names { 3255 3256 my @names = sort @_; 3257 print STDOUT <<EOM; 3258# Command line long names (passed to GetOptions) 3259#--------------------------------------------------------------- 3260# here is a summary of the Getopt codes: 3261# <none> does not take an argument 3262# =s takes a mandatory string 3263# :s takes an optional string 3264# =i takes a mandatory integer 3265# :i takes an optional integer 3266# ! does not take an argument and may be negated 3267# i.e., -foo and -nofoo are allowed 3268# a double dash signals the end of the options list 3269# 3270#--------------------------------------------------------------- 3271EOM 3272 3273 foreach (@names) { print STDOUT "$_\n" } 3274} 3275 3276sub dump_defaults { 3277 my @defaults = sort @_; 3278 print STDOUT "Default command line options:\n"; 3279 foreach (@_) { print STDOUT "$_\n" } 3280} 3281 3282sub readable_options { 3283 3284 # return options for this run as a string which could be 3285 # put in a perltidyrc file 3286 my ( $rOpts, $roption_string ) = @_; 3287 my %Getopt_flags; 3288 my $rGetopt_flags = \%Getopt_flags; 3289 my $readable_options = "# Final parameter set for this run.\n"; 3290 $readable_options .= 3291 "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n"; 3292 foreach my $opt ( @{$roption_string} ) { 3293 my $flag = ""; 3294 if ( $opt =~ /(.*)(!|=.*)$/ ) { 3295 $opt = $1; 3296 $flag = $2; 3297 } 3298 if ( defined( $rOpts->{$opt} ) ) { 3299 $rGetopt_flags->{$opt} = $flag; 3300 } 3301 } 3302 foreach my $key ( sort keys %{$rOpts} ) { 3303 my $flag = $rGetopt_flags->{$key}; 3304 my $value = $rOpts->{$key}; 3305 my $prefix = '--'; 3306 my $suffix = ""; 3307 if ($flag) { 3308 if ( $flag =~ /^=/ ) { 3309 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' } 3310 $suffix = "=" . $value; 3311 } 3312 elsif ( $flag =~ /^!/ ) { 3313 $prefix .= "no" unless ($value); 3314 } 3315 else { 3316 3317 # shouldn't happen 3318 $readable_options .= 3319 "# ERROR in dump_options: unrecognized flag $flag for $key\n"; 3320 } 3321 } 3322 $readable_options .= $prefix . $key . $suffix . "\n"; 3323 } 3324 return $readable_options; 3325} 3326 3327sub show_version { 3328 print STDOUT <<"EOM"; 3329This is perltidy, v$VERSION 3330 3331Copyright 2000-2012, Steve Hancock 3332 3333Perltidy is free software and may be copied under the terms of the GNU 3334General Public License, which is included in the distribution files. 3335 3336Complete documentation for perltidy can be found using 'man perltidy' 3337or on the internet at http://perltidy.sourceforge.net. 3338EOM 3339} 3340 3341sub usage { 3342 3343 print STDOUT <<EOF; 3344This is perltidy version $VERSION, a perl script indenter. Usage: 3345 3346 perltidy [ options ] file1 file2 file3 ... 3347 (output goes to file1.tdy, file2.tdy, file3.tdy, ...) 3348 perltidy [ options ] file1 -o outfile 3349 perltidy [ options ] file1 -st >outfile 3350 perltidy [ options ] <infile >outfile 3351 3352Options have short and long forms. Short forms are shown; see 3353man pages for long forms. Note: '=s' indicates a required string, 3354and '=n' indicates a required integer. 3355 3356I/O control 3357 -h show this help 3358 -o=file name of the output file (only if single input file) 3359 -oext=s change output extension from 'tdy' to s 3360 -opath=path change path to be 'path' for output files 3361 -b backup original to .bak and modify file in-place 3362 -bext=s change default backup extension from 'bak' to s 3363 -q deactivate error messages (for running under editor) 3364 -w include non-critical warning messages in the .ERR error output 3365 -syn run perl -c to check syntax (default under unix systems) 3366 -log save .LOG file, which has useful diagnostics 3367 -f force perltidy to read a binary file 3368 -g like -log but writes more detailed .LOG file, for debugging scripts 3369 -opt write the set of options actually used to a .LOG file 3370 -npro ignore .perltidyrc configuration command file 3371 -pro=file read configuration commands from file instead of .perltidyrc 3372 -st send output to standard output, STDOUT 3373 -se send all error output to standard error output, STDERR 3374 -v display version number to standard output and quit 3375 3376Basic Options: 3377 -i=n use n columns per indentation level (default n=4) 3378 -t tabs: use one tab character per indentation level, not recommeded 3379 -nt no tabs: use n spaces per indentation level (default) 3380 -et=n entab leading whitespace n spaces per tab; not recommended 3381 -io "indent only": just do indentation, no other formatting. 3382 -sil=n set starting indentation level to n; use if auto detection fails 3383 -ole=s specify output line ending (s=dos or win, mac, unix) 3384 -ple keep output line endings same as input (input must be filename) 3385 3386Whitespace Control 3387 -fws freeze whitespace; this disables all whitespace changes 3388 and disables the following switches: 3389 -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight) 3390 -bbt same as -bt but for code block braces; same as -bt if not given 3391 -bbvt block braces vertically tight; use with -bl or -bli 3392 -bbvtl=s make -bbvt to apply to selected list of block types 3393 -pt=n paren tightness (n=0, 1 or 2) 3394 -sbt=n square bracket tightness (n=0, 1, or 2) 3395 -bvt=n brace vertical tightness, 3396 n=(0=open, 1=close unless multiple steps on a line, 2=always close) 3397 -pvt=n paren vertical tightness (see -bvt for n) 3398 -sbvt=n square bracket vertical tightness (see -bvt for n) 3399 -bvtc=n closing brace vertical tightness: 3400 n=(0=open, 1=sometimes close, 2=always close) 3401 -pvtc=n closing paren vertical tightness, see -bvtc for n. 3402 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n. 3403 -ci=n sets continuation indentation=n, default is n=2 spaces 3404 -lp line up parentheses, brackets, and non-BLOCK braces 3405 -sfs add space before semicolon in for( ; ; ) 3406 -aws allow perltidy to add whitespace (default) 3407 -dws delete all old non-essential whitespace 3408 -icb indent closing brace of a code block 3409 -cti=n closing indentation of paren, square bracket, or non-block brace: 3410 n=0 none, =1 align with opening, =2 one full indentation level 3411 -icp equivalent to -cti=2 3412 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /' 3413 -wrs=s want space right of tokens in string; 3414 -sts put space before terminal semicolon of a statement 3415 -sak=s put space between keywords given in s and '('; 3416 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local' 3417 3418Line Break Control 3419 -fnl freeze newlines; this disables all line break changes 3420 and disables the following switches: 3421 -anl add newlines; ok to introduce new line breaks 3422 -bbs add blank line before subs and packages 3423 -bbc add blank line before block comments 3424 -bbb add blank line between major blocks 3425 -kbl=n keep old blank lines? 0=no, 1=some, 2=all 3426 -mbl=n maximum consecutive blank lines to output (default=1) 3427 -ce cuddled else; use this style: '} else {' 3428 -dnl delete old newlines (default) 3429 -l=n maximum line length; default n=80 3430 -bl opening brace on new line 3431 -sbl opening sub brace on new line. value of -bl is used if not given. 3432 -bli opening brace on new line and indented 3433 -bar opening brace always on right, even for long clauses 3434 -vt=n vertical tightness (requires -lp); n controls break after opening 3435 token: 0=never 1=no break if next line balanced 2=no break 3436 -vtc=n vertical tightness of closing container; n controls if closing 3437 token starts new line: 0=always 1=not unless list 1=never 3438 -wba=s want break after tokens in string; i.e. wba=': .' 3439 -wbb=s want break before tokens in string 3440 3441Following Old Breakpoints 3442 -kis keep interior semicolons. Allows multiple statements per line. 3443 -boc break at old comma breaks: turns off all automatic list formatting 3444 -bol break at old logical breakpoints: or, and, ||, && (default) 3445 -bok break at old list keyword breakpoints such as map, sort (default) 3446 -bot break at old conditional (ternary ?:) operator breakpoints (default) 3447 -boa break at old attribute breakpoints 3448 -cab=n break at commas after a comma-arrow (=>): 3449 n=0 break at all commas after => 3450 n=1 stable: break unless this breaks an existing one-line container 3451 n=2 break only if a one-line container cannot be formed 3452 n=3 do not treat commas after => specially at all 3453 3454Comment controls 3455 -ibc indent block comments (default) 3456 -isbc indent spaced block comments; may indent unless no leading space 3457 -msc=n minimum desired spaces to side comment, default 4 3458 -fpsc=n fix position for side comments; default 0; 3459 -csc add or update closing side comments after closing BLOCK brace 3460 -dcsc delete closing side comments created by a -csc command 3461 -cscp=s change closing side comment prefix to be other than '## end' 3462 -cscl=s change closing side comment to apply to selected list of blocks 3463 -csci=n minimum number of lines needed to apply a -csc tag, default n=6 3464 -csct=n maximum number of columns of appended text, default n=20 3465 -cscw causes warning if old side comment is overwritten with -csc 3466 3467 -sbc use 'static block comments' identified by leading '##' (default) 3468 -sbcp=s change static block comment identifier to be other than '##' 3469 -osbc outdent static block comments 3470 3471 -ssc use 'static side comments' identified by leading '##' (default) 3472 -sscp=s change static side comment identifier to be other than '##' 3473 3474Delete selected text 3475 -dac delete all comments AND pod 3476 -dbc delete block comments 3477 -dsc delete side comments 3478 -dp delete pod 3479 3480Send selected text to a '.TEE' file 3481 -tac tee all comments AND pod 3482 -tbc tee block comments 3483 -tsc tee side comments 3484 -tp tee pod 3485 3486Outdenting 3487 -olq outdent long quoted strings (default) 3488 -olc outdent a long block comment line 3489 -ola outdent statement labels 3490 -okw outdent control keywords (redo, next, last, goto, return) 3491 -okwl=s specify alternative keywords for -okw command 3492 3493Other controls 3494 -mft=n maximum fields per table; default n=40 3495 -x do not format lines before hash-bang line (i.e., for VMS) 3496 -asc allows perltidy to add a ';' when missing (default) 3497 -dsm allows perltidy to delete an unnecessary ';' (default) 3498 3499Combinations of other parameters 3500 -gnu attempt to follow GNU Coding Standards as applied to perl 3501 -mangle remove as many newlines as possible (but keep comments and pods) 3502 -extrude insert as many newlines as possible 3503 3504Dump and die, debugging 3505 -dop dump options used in this run to standard output and quit 3506 -ddf dump default options to standard output and quit 3507 -dsn dump all option short names to standard output and quit 3508 -dln dump option long names to standard output and quit 3509 -dpro dump whatever configuration file is in effect to standard output 3510 -dtt dump all token types to standard output and quit 3511 3512HTML 3513 -html write an html file (see 'man perl2web' for many options) 3514 Note: when -html is used, no indentation or formatting are done. 3515 Hint: try perltidy -html -css=mystyle.css filename.pl 3516 and edit mystyle.css to change the appearance of filename.html. 3517 -nnn gives line numbers 3518 -pre only writes out <pre>..</pre> code section 3519 -toc places a table of contents to subs at the top (default) 3520 -pod passes pod text through pod2html (default) 3521 -frm write html as a frame (3 files) 3522 -text=s extra extension for table of contents if -frm, default='toc' 3523 -sext=s extra extension for file content if -frm, default='src' 3524 3525A prefix of "n" negates short form toggle switches, and a prefix of "no" 3526negates the long forms. For example, -nasc means don't add missing 3527semicolons. 3528 3529If you are unable to see this entire text, try "perltidy -h | more" 3530For more detailed information, and additional options, try "man perltidy", 3531or go to the perltidy home page at http://perltidy.sourceforge.net 3532EOF 3533 3534} 3535 3536sub process_this_file { 3537 3538 my ( $truth, $beauty ) = @_; 3539 3540 # loop to process each line of this file 3541 while ( my $line_of_tokens = $truth->get_line() ) { 3542 $beauty->write_line($line_of_tokens); 3543 } 3544 3545 # finish up 3546 eval { $beauty->finish_formatting() }; 3547 $truth->report_tokenization_errors(); 3548} 3549 3550sub check_syntax { 3551 3552 # Use 'perl -c' to make sure that we did not create bad syntax 3553 # This is a very good independent check for programming errors 3554 # 3555 # Given names of the input and output files, ($istream, $ostream), 3556 # we do the following: 3557 # - check syntax of the input file 3558 # - if bad, all done (could be an incomplete code snippet) 3559 # - if infile syntax ok, then check syntax of the output file; 3560 # - if outfile syntax bad, issue warning; this implies a code bug! 3561 # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good 3562 3563 my ( $istream, $ostream, $logger_object, $rOpts ) = @_; 3564 my $infile_syntax_ok = 0; 3565 my $line_of_dashes = '-' x 42 . "\n"; 3566 3567 my $flags = $rOpts->{'perl-syntax-check-flags'}; 3568 3569 # be sure we invoke perl with -c 3570 # note: perl will accept repeated flags like '-c -c'. It is safest 3571 # to append another -c than try to find an interior bundled c, as 3572 # in -Tc, because such a 'c' might be in a quoted string, for example. 3573 if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" } 3574 3575 # be sure we invoke perl with -x if requested 3576 # same comments about repeated parameters applies 3577 if ( $rOpts->{'look-for-hash-bang'} ) { 3578 if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" } 3579 } 3580 3581 # this shouldn't happen unless a termporary file couldn't be made 3582 if ( $istream eq '-' ) { 3583 $logger_object->write_logfile_entry( 3584 "Cannot run perl -c on STDIN and STDOUT\n"); 3585 return $infile_syntax_ok; 3586 } 3587 3588 $logger_object->write_logfile_entry( 3589 "checking input file syntax with perl $flags\n"); 3590 3591 # Not all operating systems/shells support redirection of the standard 3592 # error output. 3593 my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1'; 3594 3595 my ( $istream_filename, $perl_output ) = 3596 do_syntax_check( $istream, $flags, $error_redirection ); 3597 $logger_object->write_logfile_entry( 3598 "Input stream passed to Perl as file $istream_filename\n"); 3599 $logger_object->write_logfile_entry($line_of_dashes); 3600 $logger_object->write_logfile_entry("$perl_output\n"); 3601 3602 if ( $perl_output =~ /syntax\s*OK/ ) { 3603 $infile_syntax_ok = 1; 3604 $logger_object->write_logfile_entry($line_of_dashes); 3605 $logger_object->write_logfile_entry( 3606 "checking output file syntax with perl $flags ...\n"); 3607 my ( $ostream_filename, $perl_output ) = 3608 do_syntax_check( $ostream, $flags, $error_redirection ); 3609 $logger_object->write_logfile_entry( 3610 "Output stream passed to Perl as file $ostream_filename\n"); 3611 $logger_object->write_logfile_entry($line_of_dashes); 3612 $logger_object->write_logfile_entry("$perl_output\n"); 3613 3614 unless ( $perl_output =~ /syntax\s*OK/ ) { 3615 $logger_object->write_logfile_entry($line_of_dashes); 3616 $logger_object->warning( 3617"The output file has a syntax error when tested with perl $flags $ostream !\n" 3618 ); 3619 $logger_object->warning( 3620 "This implies an error in perltidy; the file $ostream is bad\n" 3621 ); 3622 $logger_object->report_definite_bug(); 3623 3624 # the perl version number will be helpful for diagnosing the problem 3625 $logger_object->write_logfile_entry( 3626 qx/perl -v $error_redirection/ . "\n" ); 3627 } 3628 } 3629 else { 3630 3631 # Only warn of perl -c syntax errors. Other messages, 3632 # such as missing modules, are too common. They can be 3633 # seen by running with perltidy -w 3634 $logger_object->complain("A syntax check using perl $flags\n"); 3635 $logger_object->complain( 3636 "for the output in file $istream_filename gives:\n"); 3637 $logger_object->complain($line_of_dashes); 3638 $logger_object->complain("$perl_output\n"); 3639 $logger_object->complain($line_of_dashes); 3640 $infile_syntax_ok = -1; 3641 $logger_object->write_logfile_entry($line_of_dashes); 3642 $logger_object->write_logfile_entry( 3643"The output file will not be checked because of input file problems\n" 3644 ); 3645 } 3646 return $infile_syntax_ok; 3647} 3648 3649sub do_syntax_check { 3650 my ( $stream, $flags, $error_redirection ) = @_; 3651 3652 # We need a named input file for executing perl 3653 my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream); 3654 3655 # TODO: Need to add name of file to log somewhere 3656 # otherwise Perl output is hard to read 3657 if ( !$stream_filename ) { return $stream_filename, "" } 3658 3659 # We have to quote the filename in case it has unusual characters 3660 # or spaces. Example: this filename #CM11.pm# gives trouble. 3661 my $quoted_stream_filename = '"' . $stream_filename . '"'; 3662 3663 # Under VMS something like -T will become -t (and an error) so we 3664 # will put quotes around the flags. Double quotes seem to work on 3665 # Unix/Windows/VMS, but this may not work on all systems. (Single 3666 # quotes do not work under Windows). It could become necessary to 3667 # put double quotes around each flag, such as: -"c" -"T" 3668 # We may eventually need some system-dependent coding here. 3669 $flags = '"' . $flags . '"'; 3670 3671 # now wish for luck... 3672 my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/; 3673 3674 unlink $stream_filename if ($is_tmpfile); 3675 return $stream_filename, $msg; 3676} 3677 3678##################################################################### 3679# 3680# This is a stripped down version of IO::Scalar 3681# Given a reference to a scalar, it supplies either: 3682# a getline method which reads lines (mode='r'), or 3683# a print method which reads lines (mode='w') 3684# 3685##################################################################### 3686package Perl::Tidy::IOScalar; 3687use Carp; 3688 3689sub new { 3690 my ( $package, $rscalar, $mode ) = @_; 3691 my $ref = ref $rscalar; 3692 if ( $ref ne 'SCALAR' ) { 3693 confess <<EOM; 3694------------------------------------------------------------------------ 3695expecting ref to SCALAR but got ref to ($ref); trace follows: 3696------------------------------------------------------------------------ 3697EOM 3698 3699 } 3700 if ( $mode eq 'w' ) { 3701 $$rscalar = ""; 3702 return bless [ $rscalar, $mode ], $package; 3703 } 3704 elsif ( $mode eq 'r' ) { 3705 3706 # Convert a scalar to an array. 3707 # This avoids looking for "\n" on each call to getline 3708 # 3709 # NOTES: The -1 count is needed to avoid loss of trailing blank lines 3710 # (which might be important in a DATA section). 3711 my @array; 3712 if ( $rscalar && ${$rscalar} ) { 3713 @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1; 3714 3715 # remove possible extra blank line introduced with split 3716 if ( @array && $array[-1] eq "\n" ) { pop @array } 3717 } 3718 my $i_next = 0; 3719 return bless [ \@array, $mode, $i_next ], $package; 3720 } 3721 else { 3722 confess <<EOM; 3723------------------------------------------------------------------------ 3724expecting mode = 'r' or 'w' but got mode ($mode); trace follows: 3725------------------------------------------------------------------------ 3726EOM 3727 } 3728} 3729 3730sub getline { 3731 my $self = shift; 3732 my $mode = $self->[1]; 3733 if ( $mode ne 'r' ) { 3734 confess <<EOM; 3735------------------------------------------------------------------------ 3736getline call requires mode = 'r' but mode = ($mode); trace follows: 3737------------------------------------------------------------------------ 3738EOM 3739 } 3740 my $i = $self->[2]++; 3741 return $self->[0]->[$i]; 3742} 3743 3744sub print { 3745 my $self = shift; 3746 my $mode = $self->[1]; 3747 if ( $mode ne 'w' ) { 3748 confess <<EOM; 3749------------------------------------------------------------------------ 3750print call requires mode = 'w' but mode = ($mode); trace follows: 3751------------------------------------------------------------------------ 3752EOM 3753 } 3754 ${ $self->[0] } .= $_[0]; 3755} 3756sub close { return } 3757 3758##################################################################### 3759# 3760# This is a stripped down version of IO::ScalarArray 3761# Given a reference to an array, it supplies either: 3762# a getline method which reads lines (mode='r'), or 3763# a print method which reads lines (mode='w') 3764# 3765# NOTE: this routine assumes that that there aren't any embedded 3766# newlines within any of the array elements. There are no checks 3767# for that. 3768# 3769##################################################################### 3770package Perl::Tidy::IOScalarArray; 3771use Carp; 3772 3773sub new { 3774 my ( $package, $rarray, $mode ) = @_; 3775 my $ref = ref $rarray; 3776 if ( $ref ne 'ARRAY' ) { 3777 confess <<EOM; 3778------------------------------------------------------------------------ 3779expecting ref to ARRAY but got ref to ($ref); trace follows: 3780------------------------------------------------------------------------ 3781EOM 3782 3783 } 3784 if ( $mode eq 'w' ) { 3785 @$rarray = (); 3786 return bless [ $rarray, $mode ], $package; 3787 } 3788 elsif ( $mode eq 'r' ) { 3789 my $i_next = 0; 3790 return bless [ $rarray, $mode, $i_next ], $package; 3791 } 3792 else { 3793 confess <<EOM; 3794------------------------------------------------------------------------ 3795expecting mode = 'r' or 'w' but got mode ($mode); trace follows: 3796------------------------------------------------------------------------ 3797EOM 3798 } 3799} 3800 3801sub getline { 3802 my $self = shift; 3803 my $mode = $self->[1]; 3804 if ( $mode ne 'r' ) { 3805 confess <<EOM; 3806------------------------------------------------------------------------ 3807getline requires mode = 'r' but mode = ($mode); trace follows: 3808------------------------------------------------------------------------ 3809EOM 3810 } 3811 my $i = $self->[2]++; 3812 return $self->[0]->[$i]; 3813} 3814 3815sub print { 3816 my $self = shift; 3817 my $mode = $self->[1]; 3818 if ( $mode ne 'w' ) { 3819 confess <<EOM; 3820------------------------------------------------------------------------ 3821print requires mode = 'w' but mode = ($mode); trace follows: 3822------------------------------------------------------------------------ 3823EOM 3824 } 3825 push @{ $self->[0] }, $_[0]; 3826} 3827sub close { return } 3828 3829##################################################################### 3830# 3831# the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method 3832# which returns the next line to be parsed 3833# 3834##################################################################### 3835 3836package Perl::Tidy::LineSource; 3837 3838sub new { 3839 3840 my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_; 3841 3842 my $input_line_ending; 3843 if ( $rOpts->{'preserve-line-endings'} ) { 3844 $input_line_ending = Perl::Tidy::find_input_line_ending($input_file); 3845 } 3846 3847 ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' ); 3848 return undef unless $fh; 3849 3850 # in order to check output syntax when standard output is used, 3851 # or when it is an object, we have to make a copy of the file 3852 if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} ) 3853 { 3854 3855 # Turning off syntax check when input output is used. 3856 # The reason is that temporary files cause problems on 3857 # on many systems. 3858 $rOpts->{'check-syntax'} = 0; 3859 3860 $$rpending_logfile_message .= <<EOM; 3861Note: --syntax check will be skipped because standard input is used 3862EOM 3863 3864 } 3865 3866 return bless { 3867 _fh => $fh, 3868 _filename => $input_file, 3869 _input_line_ending => $input_line_ending, 3870 _rinput_buffer => [], 3871 _started => 0, 3872 }, $class; 3873} 3874 3875sub close_input_file { 3876 my $self = shift; 3877 3878 # Only close physical files, not STDIN and other objects 3879 my $filename = $self->{_filename}; 3880 if ( $filename ne '-' && !ref $filename ) { 3881 eval { $self->{_fh}->close() }; 3882 } 3883} 3884 3885sub get_line { 3886 my $self = shift; 3887 my $line = undef; 3888 my $fh = $self->{_fh}; 3889 my $rinput_buffer = $self->{_rinput_buffer}; 3890 3891 if ( scalar(@$rinput_buffer) ) { 3892 $line = shift @$rinput_buffer; 3893 } 3894 else { 3895 $line = $fh->getline(); 3896 3897 # patch to read raw mac files under unix, dos 3898 # see if the first line has embedded \r's 3899 if ( $line && !$self->{_started} ) { 3900 if ( $line =~ /[\015][^\015\012]/ ) { 3901 3902 # found one -- break the line up and store in a buffer 3903 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line; 3904 my $count = @$rinput_buffer; 3905 $line = shift @$rinput_buffer; 3906 } 3907 $self->{_started}++; 3908 } 3909 } 3910 return $line; 3911} 3912 3913##################################################################### 3914# 3915# the Perl::Tidy::LineSink class supplies a write_line method for 3916# actual file writing 3917# 3918##################################################################### 3919 3920package Perl::Tidy::LineSink; 3921 3922sub new { 3923 3924 my ( $class, $output_file, $tee_file, $line_separator, $rOpts, 3925 $rpending_logfile_message, $binmode ) 3926 = @_; 3927 my $fh = undef; 3928 my $fh_tee = undef; 3929 3930 my $output_file_open = 0; 3931 3932 if ( $rOpts->{'format'} eq 'tidy' ) { 3933 ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' ); 3934 unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; } 3935 $output_file_open = 1; 3936 if ($binmode) { 3937 if ( ref($fh) eq 'IO::File' ) { 3938 binmode $fh; 3939 } 3940 if ( $output_file eq '-' ) { binmode STDOUT } 3941 } 3942 } 3943 3944 # in order to check output syntax when standard output is used, 3945 # or when it is an object, we have to make a copy of the file 3946 if ( $output_file eq '-' || ref $output_file ) { 3947 if ( $rOpts->{'check-syntax'} ) { 3948 3949 # Turning off syntax check when standard output is used. 3950 # The reason is that temporary files cause problems on 3951 # on many systems. 3952 $rOpts->{'check-syntax'} = 0; 3953 $$rpending_logfile_message .= <<EOM; 3954Note: --syntax check will be skipped because standard output is used 3955EOM 3956 3957 } 3958 } 3959 3960 bless { 3961 _fh => $fh, 3962 _fh_tee => $fh_tee, 3963 _output_file => $output_file, 3964 _output_file_open => $output_file_open, 3965 _tee_flag => 0, 3966 _tee_file => $tee_file, 3967 _tee_file_opened => 0, 3968 _line_separator => $line_separator, 3969 _binmode => $binmode, 3970 }, $class; 3971} 3972 3973sub write_line { 3974 3975 my $self = shift; 3976 my $fh = $self->{_fh}; 3977 3978 my $output_file_open = $self->{_output_file_open}; 3979 chomp $_[0]; 3980 $_[0] .= $self->{_line_separator}; 3981 3982 $fh->print( $_[0] ) if ( $self->{_output_file_open} ); 3983 3984 if ( $self->{_tee_flag} ) { 3985 unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() } 3986 my $fh_tee = $self->{_fh_tee}; 3987 print $fh_tee $_[0]; 3988 } 3989} 3990 3991sub tee_on { 3992 my $self = shift; 3993 $self->{_tee_flag} = 1; 3994} 3995 3996sub tee_off { 3997 my $self = shift; 3998 $self->{_tee_flag} = 0; 3999} 4000 4001sub really_open_tee_file { 4002 my $self = shift; 4003 my $tee_file = $self->{_tee_file}; 4004 my $fh_tee; 4005 $fh_tee = IO::File->new(">$tee_file") 4006 or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n"); 4007 binmode $fh_tee if $self->{_binmode}; 4008 $self->{_tee_file_opened} = 1; 4009 $self->{_fh_tee} = $fh_tee; 4010} 4011 4012sub close_output_file { 4013 my $self = shift; 4014 4015 # Only close physical files, not STDOUT and other objects 4016 my $output_file = $self->{_output_file}; 4017 if ( $output_file ne '-' && !ref $output_file ) { 4018 eval { $self->{_fh}->close() } if $self->{_output_file_open}; 4019 } 4020 $self->close_tee_file(); 4021} 4022 4023sub close_tee_file { 4024 my $self = shift; 4025 4026 # Only close physical files, not STDOUT and other objects 4027 if ( $self->{_tee_file_opened} ) { 4028 my $tee_file = $self->{_tee_file}; 4029 if ( $tee_file ne '-' && !ref $tee_file ) { 4030 eval { $self->{_fh_tee}->close() }; 4031 $self->{_tee_file_opened} = 0; 4032 } 4033 } 4034} 4035 4036##################################################################### 4037# 4038# The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is 4039# useful for program development. 4040# 4041# Only one such file is created regardless of the number of input 4042# files processed. This allows the results of processing many files 4043# to be summarized in a single file. 4044# 4045##################################################################### 4046 4047package Perl::Tidy::Diagnostics; 4048 4049sub new { 4050 4051 my $class = shift; 4052 bless { 4053 _write_diagnostics_count => 0, 4054 _last_diagnostic_file => "", 4055 _input_file => "", 4056 _fh => undef, 4057 }, $class; 4058} 4059 4060sub set_input_file { 4061 my $self = shift; 4062 $self->{_input_file} = $_[0]; 4063} 4064 4065# This is a diagnostic routine which is useful for program development. 4066# Output from debug messages go to a file named DIAGNOSTICS, where 4067# they are labeled by file and line. This allows many files to be 4068# scanned at once for some particular condition of interest. 4069sub write_diagnostics { 4070 my $self = shift; 4071 4072 unless ( $self->{_write_diagnostics_count} ) { 4073 open DIAGNOSTICS, ">DIAGNOSTICS" 4074 or death("couldn't open DIAGNOSTICS: $!\n"); 4075 } 4076 4077 my $last_diagnostic_file = $self->{_last_diagnostic_file}; 4078 my $input_file = $self->{_input_file}; 4079 if ( $last_diagnostic_file ne $input_file ) { 4080 print DIAGNOSTICS "\nFILE:$input_file\n"; 4081 } 4082 $self->{_last_diagnostic_file} = $input_file; 4083 my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); 4084 print DIAGNOSTICS "$input_line_number:\t@_"; 4085 $self->{_write_diagnostics_count}++; 4086} 4087 4088##################################################################### 4089# 4090# The Perl::Tidy::Logger class writes the .LOG and .ERR files 4091# 4092##################################################################### 4093 4094package Perl::Tidy::Logger; 4095 4096sub new { 4097 my $class = shift; 4098 my $fh; 4099 my ( $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude, ) = @_; 4100 4101 my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef; 4102 4103 # remove any old error output file if we might write a new one 4104 unless ( $fh_warnings || ref($warning_file) ) { 4105 if ( -e $warning_file ) { unlink($warning_file) } 4106 } 4107 4108 bless { 4109 _log_file => $log_file, 4110 _rOpts => $rOpts, 4111 _fh_warnings => $fh_warnings, 4112 _last_input_line_written => 0, 4113 _at_end_of_file => 0, 4114 _use_prefix => 1, 4115 _block_log_output => 0, 4116 _line_of_tokens => undef, 4117 _output_line_number => undef, 4118 _wrote_line_information_string => 0, 4119 _wrote_column_headings => 0, 4120 _warning_file => $warning_file, 4121 _warning_count => 0, 4122 _complaint_count => 0, 4123 _saw_code_bug => -1, # -1=no 0=maybe 1=for sure 4124 _saw_brace_error => 0, 4125 _saw_extrude => $saw_extrude, 4126 _output_array => [], 4127 }, $class; 4128} 4129 4130sub get_warning_count { 4131 my $self = shift; 4132 return $self->{_warning_count}; 4133} 4134 4135sub get_use_prefix { 4136 my $self = shift; 4137 return $self->{_use_prefix}; 4138} 4139 4140sub block_log_output { 4141 my $self = shift; 4142 $self->{_block_log_output} = 1; 4143} 4144 4145sub unblock_log_output { 4146 my $self = shift; 4147 $self->{_block_log_output} = 0; 4148} 4149 4150sub interrupt_logfile { 4151 my $self = shift; 4152 $self->{_use_prefix} = 0; 4153 $self->warning("\n"); 4154 $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" ); 4155} 4156 4157sub resume_logfile { 4158 my $self = shift; 4159 $self->write_logfile_entry( '#' x 60 . "\n" ); 4160 $self->{_use_prefix} = 1; 4161} 4162 4163sub we_are_at_the_last_line { 4164 my $self = shift; 4165 unless ( $self->{_wrote_line_information_string} ) { 4166 $self->write_logfile_entry("Last line\n\n"); 4167 } 4168 $self->{_at_end_of_file} = 1; 4169} 4170 4171# record some stuff in case we go down in flames 4172sub black_box { 4173 my $self = shift; 4174 my ( $line_of_tokens, $output_line_number ) = @_; 4175 my $input_line = $line_of_tokens->{_line_text}; 4176 my $input_line_number = $line_of_tokens->{_line_number}; 4177 4178 # save line information in case we have to write a logfile message 4179 $self->{_line_of_tokens} = $line_of_tokens; 4180 $self->{_output_line_number} = $output_line_number; 4181 $self->{_wrote_line_information_string} = 0; 4182 4183 my $last_input_line_written = $self->{_last_input_line_written}; 4184 my $rOpts = $self->{_rOpts}; 4185 if ( 4186 ( 4187 ( $input_line_number - $last_input_line_written ) >= 4188 $rOpts->{'logfile-gap'} 4189 ) 4190 || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) 4191 ) 4192 { 4193 my $rlevels = $line_of_tokens->{_rlevels}; 4194 my $structural_indentation_level = $$rlevels[0]; 4195 $self->{_last_input_line_written} = $input_line_number; 4196 ( my $out_str = $input_line ) =~ s/^\s*//; 4197 chomp $out_str; 4198 4199 $out_str = ( '.' x $structural_indentation_level ) . $out_str; 4200 4201 if ( length($out_str) > 35 ) { 4202 $out_str = substr( $out_str, 0, 35 ) . " ...."; 4203 } 4204 $self->logfile_output( "", "$out_str\n" ); 4205 } 4206} 4207 4208sub write_logfile_entry { 4209 my $self = shift; 4210 4211 # add leading >>> to avoid confusing error mesages and code 4212 $self->logfile_output( ">>>", "@_" ); 4213} 4214 4215sub write_column_headings { 4216 my $self = shift; 4217 4218 $self->{_wrote_column_headings} = 1; 4219 my $routput_array = $self->{_output_array}; 4220 push @{$routput_array}, <<EOM; 4221The nesting depths in the table below are at the start of the lines. 4222The indicated output line numbers are not always exact. 4223ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not. 4224 4225in:out indent c b nesting code + messages; (messages begin with >>>) 4226lines levels i k (code begins with one '.' per indent level) 4227------ ----- - - -------- ------------------------------------------- 4228EOM 4229} 4230 4231sub make_line_information_string { 4232 4233 # make columns of information when a logfile message needs to go out 4234 my $self = shift; 4235 my $line_of_tokens = $self->{_line_of_tokens}; 4236 my $input_line_number = $line_of_tokens->{_line_number}; 4237 my $line_information_string = ""; 4238 if ($input_line_number) { 4239 4240 my $output_line_number = $self->{_output_line_number}; 4241 my $brace_depth = $line_of_tokens->{_curly_brace_depth}; 4242 my $paren_depth = $line_of_tokens->{_paren_depth}; 4243 my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth}; 4244 my $guessed_indentation_level = 4245 $line_of_tokens->{_guessed_indentation_level}; 4246 my $rlevels = $line_of_tokens->{_rlevels}; 4247 my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; 4248 my $rci_levels = $line_of_tokens->{_rci_levels}; 4249 my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; 4250 4251 my $structural_indentation_level = $$rlevels[0]; 4252 4253 $self->write_column_headings() unless $self->{_wrote_column_headings}; 4254 4255 # keep logfile columns aligned for scripts up to 999 lines; 4256 # for longer scripts it doesn't really matter 4257 my $extra_space = ""; 4258 $extra_space .= 4259 ( $input_line_number < 10 ) ? " " 4260 : ( $input_line_number < 100 ) ? " " 4261 : ""; 4262 $extra_space .= 4263 ( $output_line_number < 10 ) ? " " 4264 : ( $output_line_number < 100 ) ? " " 4265 : ""; 4266 4267 # there are 2 possible nesting strings: 4268 # the original which looks like this: (0 [1 {2 4269 # the new one, which looks like this: {{[ 4270 # the new one is easier to read, and shows the order, but 4271 # could be arbitrarily long, so we use it unless it is too long 4272 my $nesting_string = 4273 "($paren_depth [$square_bracket_depth {$brace_depth"; 4274 my $nesting_string_new = $$rnesting_tokens[0]; 4275 4276 my $ci_level = $$rci_levels[0]; 4277 if ( $ci_level > 9 ) { $ci_level = '*' } 4278 my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0'; 4279 4280 if ( length($nesting_string_new) <= 8 ) { 4281 $nesting_string = 4282 $nesting_string_new . " " x ( 8 - length($nesting_string_new) ); 4283 } 4284 $line_information_string = 4285"L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; 4286 } 4287 return $line_information_string; 4288} 4289 4290sub logfile_output { 4291 my $self = shift; 4292 my ( $prompt, $msg ) = @_; 4293 return if ( $self->{_block_log_output} ); 4294 4295 my $routput_array = $self->{_output_array}; 4296 if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) { 4297 push @{$routput_array}, "$msg"; 4298 } 4299 else { 4300 my $line_information_string = $self->make_line_information_string(); 4301 $self->{_wrote_line_information_string} = 1; 4302 4303 if ($line_information_string) { 4304 push @{$routput_array}, "$line_information_string $prompt$msg"; 4305 } 4306 else { 4307 push @{$routput_array}, "$msg"; 4308 } 4309 } 4310} 4311 4312sub get_saw_brace_error { 4313 my $self = shift; 4314 return $self->{_saw_brace_error}; 4315} 4316 4317sub increment_brace_error { 4318 my $self = shift; 4319 $self->{_saw_brace_error}++; 4320} 4321 4322sub brace_warning { 4323 my $self = shift; 4324 use constant BRACE_WARNING_LIMIT => 10; 4325 my $saw_brace_error = $self->{_saw_brace_error}; 4326 4327 if ( $saw_brace_error < BRACE_WARNING_LIMIT ) { 4328 $self->warning(@_); 4329 } 4330 $saw_brace_error++; 4331 $self->{_saw_brace_error} = $saw_brace_error; 4332 4333 if ( $saw_brace_error == BRACE_WARNING_LIMIT ) { 4334 $self->warning("No further warnings of this type will be given\n"); 4335 } 4336} 4337 4338sub complain { 4339 4340 # handle non-critical warning messages based on input flag 4341 my $self = shift; 4342 my $rOpts = $self->{_rOpts}; 4343 4344 # these appear in .ERR output only if -w flag is used 4345 if ( $rOpts->{'warning-output'} ) { 4346 $self->warning(@_); 4347 } 4348 4349 # otherwise, they go to the .LOG file 4350 else { 4351 $self->{_complaint_count}++; 4352 $self->write_logfile_entry(@_); 4353 } 4354} 4355 4356sub warning { 4357 4358 # report errors to .ERR file (or stdout) 4359 my $self = shift; 4360 use constant WARNING_LIMIT => 50; 4361 4362 my $rOpts = $self->{_rOpts}; 4363 unless ( $rOpts->{'quiet'} ) { 4364 4365 my $warning_count = $self->{_warning_count}; 4366 my $fh_warnings = $self->{_fh_warnings}; 4367 if ( !$fh_warnings ) { 4368 my $warning_file = $self->{_warning_file}; 4369 ( $fh_warnings, my $filename ) = 4370 Perl::Tidy::streamhandle( $warning_file, 'w' ); 4371 $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n"); 4372 Perl::Tidy::Warn "## Please see file $filename\n" 4373 unless ref($warning_file); 4374 $self->{_fh_warnings} = $fh_warnings; 4375 } 4376 4377 if ( $warning_count < WARNING_LIMIT ) { 4378 if ( $self->get_use_prefix() > 0 ) { 4379 my $input_line_number = 4380 Perl::Tidy::Tokenizer::get_input_line_number(); 4381 $fh_warnings->print("$input_line_number:\t@_"); 4382 $self->write_logfile_entry("WARNING: @_"); 4383 } 4384 else { 4385 $fh_warnings->print(@_); 4386 $self->write_logfile_entry(@_); 4387 } 4388 } 4389 $warning_count++; 4390 $self->{_warning_count} = $warning_count; 4391 4392 if ( $warning_count == WARNING_LIMIT ) { 4393 $fh_warnings->print("No further warnings will be given\n"); 4394 } 4395 } 4396} 4397 4398# programming bug codes: 4399# -1 = no bug 4400# 0 = maybe, not sure. 4401# 1 = definitely 4402sub report_possible_bug { 4403 my $self = shift; 4404 my $saw_code_bug = $self->{_saw_code_bug}; 4405 $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug; 4406} 4407 4408sub report_definite_bug { 4409 my $self = shift; 4410 $self->{_saw_code_bug} = 1; 4411} 4412 4413sub ask_user_for_bug_report { 4414 my $self = shift; 4415 4416 my ( $infile_syntax_ok, $formatter ) = @_; 4417 my $saw_code_bug = $self->{_saw_code_bug}; 4418 if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) { 4419 $self->warning(<<EOM); 4420 4421You may have encountered a code bug in perltidy. If you think so, and 4422the problem is not listed in the BUGS file at 4423http://perltidy.sourceforge.net, please report it so that it can be 4424corrected. Include the smallest possible script which has the problem, 4425along with the .LOG file. See the manual pages for contact information. 4426Thank you! 4427EOM 4428 4429 } 4430 elsif ( $saw_code_bug == 1 ) { 4431 if ( $self->{_saw_extrude} ) { 4432 $self->warning(<<EOM); 4433 4434You may have encountered a bug in perltidy. However, since you are using the 4435-extrude option, the problem may be with perl or one of its modules, which have 4436occasional problems with this type of file. If you believe that the 4437problem is with perltidy, and the problem is not listed in the BUGS file at 4438http://perltidy.sourceforge.net, please report it so that it can be corrected. 4439Include the smallest possible script which has the problem, along with the .LOG 4440file. See the manual pages for contact information. 4441Thank you! 4442EOM 4443 } 4444 else { 4445 $self->warning(<<EOM); 4446 4447Oops, you seem to have encountered a bug in perltidy. Please check the 4448BUGS file at http://perltidy.sourceforge.net. If the problem is not 4449listed there, please report it so that it can be corrected. Include the 4450smallest possible script which produces this message, along with the 4451.LOG file if appropriate. See the manual pages for contact information. 4452Your efforts are appreciated. 4453Thank you! 4454EOM 4455 my $added_semicolon_count = 0; 4456 eval { 4457 $added_semicolon_count = 4458 $formatter->get_added_semicolon_count(); 4459 }; 4460 if ( $added_semicolon_count > 0 ) { 4461 $self->warning(<<EOM); 4462 4463The log file shows that perltidy added $added_semicolon_count semicolons. 4464Please rerun with -nasc to see if that is the cause of the syntax error. Even 4465if that is the problem, please report it so that it can be fixed. 4466EOM 4467 4468 } 4469 } 4470 } 4471} 4472 4473sub finish { 4474 4475 # called after all formatting to summarize errors 4476 my $self = shift; 4477 my ( $infile_syntax_ok, $formatter ) = @_; 4478 4479 my $rOpts = $self->{_rOpts}; 4480 my $warning_count = $self->{_warning_count}; 4481 my $saw_code_bug = $self->{_saw_code_bug}; 4482 4483 my $save_logfile = 4484 ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) 4485 || $saw_code_bug == 1 4486 || $rOpts->{'logfile'}; 4487 my $log_file = $self->{_log_file}; 4488 if ($warning_count) { 4489 if ($save_logfile) { 4490 $self->block_log_output(); # avoid echoing this to the logfile 4491 $self->warning( 4492 "The logfile $log_file may contain useful information\n"); 4493 $self->unblock_log_output(); 4494 } 4495 4496 if ( $self->{_complaint_count} > 0 ) { 4497 $self->warning( 4498"To see $self->{_complaint_count} non-critical warnings rerun with -w\n" 4499 ); 4500 } 4501 4502 if ( $self->{_saw_brace_error} 4503 && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) ) 4504 { 4505 $self->warning("To save a full .LOG file rerun with -g\n"); 4506 } 4507 } 4508 $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter ); 4509 4510 if ($save_logfile) { 4511 my $log_file = $self->{_log_file}; 4512 my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' ); 4513 if ($fh) { 4514 my $routput_array = $self->{_output_array}; 4515 foreach ( @{$routput_array} ) { $fh->print($_) } 4516 if ( $log_file ne '-' && !ref $log_file ) { 4517 eval { $fh->close() }; 4518 } 4519 } 4520 } 4521} 4522 4523##################################################################### 4524# 4525# The Perl::Tidy::DevNull class supplies a dummy print method 4526# 4527##################################################################### 4528 4529package Perl::Tidy::DevNull; 4530sub new { return bless {}, $_[0] } 4531sub print { return } 4532sub close { return } 4533 4534##################################################################### 4535# 4536# The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html 4537# 4538##################################################################### 4539 4540package Perl::Tidy::HtmlWriter; 4541 4542use File::Basename; 4543 4544# class variables 4545use vars qw{ 4546 %html_color 4547 %html_bold 4548 %html_italic 4549 %token_short_names 4550 %short_to_long_names 4551 $rOpts 4552 $css_filename 4553 $css_linkname 4554 $missing_html_entities 4555}; 4556 4557# replace unsafe characters with HTML entity representation if HTML::Entities 4558# is available 4559{ eval "use HTML::Entities"; $missing_html_entities = $@; } 4560 4561sub new { 4562 4563 my ( $class, $input_file, $html_file, $extension, $html_toc_extension, 4564 $html_src_extension ) 4565 = @_; 4566 4567 my $html_file_opened = 0; 4568 my $html_fh; 4569 ( $html_fh, my $html_filename ) = 4570 Perl::Tidy::streamhandle( $html_file, 'w' ); 4571 unless ($html_fh) { 4572 Perl::Tidy::Warn("can't open $html_file: $!\n"); 4573 return undef; 4574 } 4575 $html_file_opened = 1; 4576 4577 if ( !$input_file || $input_file eq '-' || ref($input_file) ) { 4578 $input_file = "NONAME"; 4579 } 4580 4581 # write the table of contents to a string 4582 my $toc_string; 4583 my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' ); 4584 4585 my $html_pre_fh; 4586 my @pre_string_stack; 4587 if ( $rOpts->{'html-pre-only'} ) { 4588 4589 # pre section goes directly to the output stream 4590 $html_pre_fh = $html_fh; 4591 $html_pre_fh->print( <<"PRE_END"); 4592<pre> 4593PRE_END 4594 } 4595 else { 4596 4597 # pre section go out to a temporary string 4598 my $pre_string; 4599 $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); 4600 push @pre_string_stack, \$pre_string; 4601 } 4602 4603 # pod text gets diverted if the 'pod2html' is used 4604 my $html_pod_fh; 4605 my $pod_string; 4606 if ( $rOpts->{'pod2html'} ) { 4607 if ( $rOpts->{'html-pre-only'} ) { 4608 undef $rOpts->{'pod2html'}; 4609 } 4610 else { 4611 eval "use Pod::Html"; 4612 if ($@) { 4613 Perl::Tidy::Warn 4614"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n"; 4615 undef $rOpts->{'pod2html'}; 4616 } 4617 else { 4618 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' ); 4619 } 4620 } 4621 } 4622 4623 my $toc_filename; 4624 my $src_filename; 4625 if ( $rOpts->{'frames'} ) { 4626 unless ($extension) { 4627 Perl::Tidy::Warn 4628"cannot use frames without a specified output extension; ignoring -frm\n"; 4629 undef $rOpts->{'frames'}; 4630 } 4631 else { 4632 $toc_filename = $input_file . $html_toc_extension . $extension; 4633 $src_filename = $input_file . $html_src_extension . $extension; 4634 } 4635 } 4636 4637 # ---------------------------------------------------------- 4638 # Output is now directed as follows: 4639 # html_toc_fh <-- table of contents items 4640 # html_pre_fh <-- the <pre> section of formatted code, except: 4641 # html_pod_fh <-- pod goes here with the pod2html option 4642 # ---------------------------------------------------------- 4643 4644 my $title = $rOpts->{'title'}; 4645 unless ($title) { 4646 ( $title, my $path ) = fileparse($input_file); 4647 } 4648 my $toc_item_count = 0; 4649 my $in_toc_package = ""; 4650 my $last_level = 0; 4651 bless { 4652 _input_file => $input_file, # name of input file 4653 _title => $title, # title, unescaped 4654 _html_file => $html_file, # name of .html output file 4655 _toc_filename => $toc_filename, # for frames option 4656 _src_filename => $src_filename, # for frames option 4657 _html_file_opened => $html_file_opened, # a flag 4658 _html_fh => $html_fh, # the output stream 4659 _html_pre_fh => $html_pre_fh, # pre section goes here 4660 _rpre_string_stack => \@pre_string_stack, # stack of pre sections 4661 _html_pod_fh => $html_pod_fh, # pod goes here if pod2html 4662 _rpod_string => \$pod_string, # string holding pod 4663 _pod_cut_count => 0, # how many =cut's? 4664 _html_toc_fh => $html_toc_fh, # fh for table of contents 4665 _rtoc_string => \$toc_string, # string holding toc 4666 _rtoc_item_count => \$toc_item_count, # how many toc items 4667 _rin_toc_package => \$in_toc_package, # package name 4668 _rtoc_name_count => {}, # hash to track unique names 4669 _rpackage_stack => [], # stack to check for package 4670 # name changes 4671 _rlast_level => \$last_level, # brace indentation level 4672 }, $class; 4673} 4674 4675sub add_toc_item { 4676 4677 # Add an item to the html table of contents. 4678 # This is called even if no table of contents is written, 4679 # because we still want to put the anchors in the <pre> text. 4680 # We are given an anchor name and its type; types are: 4681 # 'package', 'sub', '__END__', '__DATA__', 'EOF' 4682 # There must be an 'EOF' call at the end to wrap things up. 4683 my $self = shift; 4684 my ( $name, $type ) = @_; 4685 my $html_toc_fh = $self->{_html_toc_fh}; 4686 my $html_pre_fh = $self->{_html_pre_fh}; 4687 my $rtoc_name_count = $self->{_rtoc_name_count}; 4688 my $rtoc_item_count = $self->{_rtoc_item_count}; 4689 my $rlast_level = $self->{_rlast_level}; 4690 my $rin_toc_package = $self->{_rin_toc_package}; 4691 my $rpackage_stack = $self->{_rpackage_stack}; 4692 4693 # packages contain sublists of subs, so to avoid errors all package 4694 # items are written and finished with the following routines 4695 my $end_package_list = sub { 4696 if ($$rin_toc_package) { 4697 $html_toc_fh->print("</ul>\n</li>\n"); 4698 $$rin_toc_package = ""; 4699 } 4700 }; 4701 4702 my $start_package_list = sub { 4703 my ( $unique_name, $package ) = @_; 4704 if ($$rin_toc_package) { $end_package_list->() } 4705 $html_toc_fh->print(<<EOM); 4706<li><a href=\"#$unique_name\">package $package</a> 4707<ul> 4708EOM 4709 $$rin_toc_package = $package; 4710 }; 4711 4712 # start the table of contents on the first item 4713 unless ($$rtoc_item_count) { 4714 4715 # but just quit if we hit EOF without any other entries 4716 # in this case, there will be no toc 4717 return if ( $type eq 'EOF' ); 4718 $html_toc_fh->print( <<"TOC_END"); 4719<!-- BEGIN CODE INDEX --><a name="code-index"></a> 4720<ul> 4721TOC_END 4722 } 4723 $$rtoc_item_count++; 4724 4725 # make a unique anchor name for this location: 4726 # - packages get a 'package-' prefix 4727 # - subs use their names 4728 my $unique_name = $name; 4729 if ( $type eq 'package' ) { $unique_name = "package-$name" } 4730 4731 # append '-1', '-2', etc if necessary to make unique; this will 4732 # be unique because subs and packages cannot have a '-' 4733 if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) { 4734 $unique_name .= "-$count"; 4735 } 4736 4737 # - all names get terminal '-' if pod2html is used, to avoid 4738 # conflicts with anchor names created by pod2html 4739 if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' } 4740 4741 # start/stop lists of subs 4742 if ( $type eq 'sub' ) { 4743 my $package = $rpackage_stack->[$$rlast_level]; 4744 unless ($package) { $package = 'main' } 4745 4746 # if we're already in a package/sub list, be sure its the right 4747 # package or else close it 4748 if ( $$rin_toc_package && $$rin_toc_package ne $package ) { 4749 $end_package_list->(); 4750 } 4751 4752 # start a package/sub list if necessary 4753 unless ($$rin_toc_package) { 4754 $start_package_list->( $unique_name, $package ); 4755 } 4756 } 4757 4758 # now write an entry in the toc for this item 4759 if ( $type eq 'package' ) { 4760 $start_package_list->( $unique_name, $name ); 4761 } 4762 elsif ( $type eq 'sub' ) { 4763 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n"); 4764 } 4765 else { 4766 $end_package_list->(); 4767 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n"); 4768 } 4769 4770 # write the anchor in the <pre> section 4771 $html_pre_fh->print("<a name=\"$unique_name\"></a>"); 4772 4773 # end the table of contents, if any, on the end of file 4774 if ( $type eq 'EOF' ) { 4775 $html_toc_fh->print( <<"TOC_END"); 4776</ul> 4777<!-- END CODE INDEX --> 4778TOC_END 4779 } 4780} 4781 4782BEGIN { 4783 4784 # This is the official list of tokens which may be identified by the 4785 # user. Long names are used as getopt keys. Short names are 4786 # convenient short abbreviations for specifying input. Short names 4787 # somewhat resemble token type characters, but are often different 4788 # because they may only be alphanumeric, to allow command line 4789 # input. Also, note that because of case insensitivity of html, 4790 # this table must be in a single case only (I've chosen to use all 4791 # lower case). 4792 # When adding NEW_TOKENS: update this hash table 4793 # short names => long names 4794 %short_to_long_names = ( 4795 'n' => 'numeric', 4796 'p' => 'paren', 4797 'q' => 'quote', 4798 's' => 'structure', 4799 'c' => 'comment', 4800 'v' => 'v-string', 4801 'cm' => 'comma', 4802 'w' => 'bareword', 4803 'co' => 'colon', 4804 'pu' => 'punctuation', 4805 'i' => 'identifier', 4806 'j' => 'label', 4807 'h' => 'here-doc-target', 4808 'hh' => 'here-doc-text', 4809 'k' => 'keyword', 4810 'sc' => 'semicolon', 4811 'm' => 'subroutine', 4812 'pd' => 'pod-text', 4813 ); 4814 4815 # Now we have to map actual token types into one of the above short 4816 # names; any token types not mapped will get 'punctuation' 4817 # properties. 4818 4819 # The values of this hash table correspond to the keys of the 4820 # previous hash table. 4821 # The keys of this hash table are token types and can be seen 4822 # by running with --dump-token-types (-dtt). 4823 4824 # When adding NEW_TOKENS: update this hash table 4825 # $type => $short_name 4826 %token_short_names = ( 4827 '#' => 'c', 4828 'n' => 'n', 4829 'v' => 'v', 4830 'k' => 'k', 4831 'F' => 'k', 4832 'Q' => 'q', 4833 'q' => 'q', 4834 'J' => 'j', 4835 'j' => 'j', 4836 'h' => 'h', 4837 'H' => 'hh', 4838 'w' => 'w', 4839 ',' => 'cm', 4840 '=>' => 'cm', 4841 ';' => 'sc', 4842 ':' => 'co', 4843 'f' => 'sc', 4844 '(' => 'p', 4845 ')' => 'p', 4846 'M' => 'm', 4847 'P' => 'pd', 4848 'A' => 'co', 4849 ); 4850 4851 # These token types will all be called identifiers for now 4852 # FIXME: could separate user defined modules as separate type 4853 my @identifier = qw" i t U C Y Z G :: CORE::"; 4854 @token_short_names{@identifier} = ('i') x scalar(@identifier); 4855 4856 # These token types will be called 'structure' 4857 my @structure = qw" { } "; 4858 @token_short_names{@structure} = ('s') x scalar(@structure); 4859 4860 # OLD NOTES: save for reference 4861 # Any of these could be added later if it would be useful. 4862 # For now, they will by default become punctuation 4863 # my @list = qw" L R [ ] "; 4864 # @token_long_names{@list} = ('non-structure') x scalar(@list); 4865 # 4866 # my @list = qw" 4867 # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm 4868 # "; 4869 # @token_long_names{@list} = ('math') x scalar(@list); 4870 # 4871 # my @list = qw" & &= ~ ~= ^ ^= | |= "; 4872 # @token_long_names{@list} = ('bit') x scalar(@list); 4873 # 4874 # my @list = qw" == != < > <= <=> "; 4875 # @token_long_names{@list} = ('numerical-comparison') x scalar(@list); 4876 # 4877 # my @list = qw" && || ! &&= ||= //= "; 4878 # @token_long_names{@list} = ('logical') x scalar(@list); 4879 # 4880 # my @list = qw" . .= =~ !~ x x= "; 4881 # @token_long_names{@list} = ('string-operators') x scalar(@list); 4882 # 4883 # # Incomplete.. 4884 # my @list = qw" .. -> <> ... \ ? "; 4885 # @token_long_names{@list} = ('misc-operators') x scalar(@list); 4886 4887} 4888 4889sub make_getopt_long_names { 4890 my $class = shift; 4891 my ($rgetopt_names) = @_; 4892 while ( my ( $short_name, $name ) = each %short_to_long_names ) { 4893 push @$rgetopt_names, "html-color-$name=s"; 4894 push @$rgetopt_names, "html-italic-$name!"; 4895 push @$rgetopt_names, "html-bold-$name!"; 4896 } 4897 push @$rgetopt_names, "html-color-background=s"; 4898 push @$rgetopt_names, "html-linked-style-sheet=s"; 4899 push @$rgetopt_names, "nohtml-style-sheets"; 4900 push @$rgetopt_names, "html-pre-only"; 4901 push @$rgetopt_names, "html-line-numbers"; 4902 push @$rgetopt_names, "html-entities!"; 4903 push @$rgetopt_names, "stylesheet"; 4904 push @$rgetopt_names, "html-table-of-contents!"; 4905 push @$rgetopt_names, "pod2html!"; 4906 push @$rgetopt_names, "frames!"; 4907 push @$rgetopt_names, "html-toc-extension=s"; 4908 push @$rgetopt_names, "html-src-extension=s"; 4909 4910 # Pod::Html parameters: 4911 push @$rgetopt_names, "backlink=s"; 4912 push @$rgetopt_names, "cachedir=s"; 4913 push @$rgetopt_names, "htmlroot=s"; 4914 push @$rgetopt_names, "libpods=s"; 4915 push @$rgetopt_names, "podpath=s"; 4916 push @$rgetopt_names, "podroot=s"; 4917 push @$rgetopt_names, "title=s"; 4918 4919 # Pod::Html parameters with leading 'pod' which will be removed 4920 # before the call to Pod::Html 4921 push @$rgetopt_names, "podquiet!"; 4922 push @$rgetopt_names, "podverbose!"; 4923 push @$rgetopt_names, "podrecurse!"; 4924 push @$rgetopt_names, "podflush"; 4925 push @$rgetopt_names, "podheader!"; 4926 push @$rgetopt_names, "podindex!"; 4927} 4928 4929sub make_abbreviated_names { 4930 4931 # We're appending things like this to the expansion list: 4932 # 'hcc' => [qw(html-color-comment)], 4933 # 'hck' => [qw(html-color-keyword)], 4934 # etc 4935 my $class = shift; 4936 my ($rexpansion) = @_; 4937 4938 # abbreviations for color/bold/italic properties 4939 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { 4940 ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"]; 4941 ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"]; 4942 ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"]; 4943 ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"]; 4944 ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"]; 4945 } 4946 4947 # abbreviations for all other html options 4948 ${$rexpansion}{"hcbg"} = ["html-color-background"]; 4949 ${$rexpansion}{"pre"} = ["html-pre-only"]; 4950 ${$rexpansion}{"toc"} = ["html-table-of-contents"]; 4951 ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"]; 4952 ${$rexpansion}{"nnn"} = ["html-line-numbers"]; 4953 ${$rexpansion}{"hent"} = ["html-entities"]; 4954 ${$rexpansion}{"nhent"} = ["nohtml-entities"]; 4955 ${$rexpansion}{"css"} = ["html-linked-style-sheet"]; 4956 ${$rexpansion}{"nss"} = ["nohtml-style-sheets"]; 4957 ${$rexpansion}{"ss"} = ["stylesheet"]; 4958 ${$rexpansion}{"pod"} = ["pod2html"]; 4959 ${$rexpansion}{"npod"} = ["nopod2html"]; 4960 ${$rexpansion}{"frm"} = ["frames"]; 4961 ${$rexpansion}{"nfrm"} = ["noframes"]; 4962 ${$rexpansion}{"text"} = ["html-toc-extension"]; 4963 ${$rexpansion}{"sext"} = ["html-src-extension"]; 4964} 4965 4966sub check_options { 4967 4968 # This will be called once after options have been parsed 4969 my $class = shift; 4970 $rOpts = shift; 4971 4972 # X11 color names for default settings that seemed to look ok 4973 # (these color names are only used for programming clarity; the hex 4974 # numbers are actually written) 4975 use constant ForestGreen => "#228B22"; 4976 use constant SaddleBrown => "#8B4513"; 4977 use constant magenta4 => "#8B008B"; 4978 use constant IndianRed3 => "#CD5555"; 4979 use constant DeepSkyBlue4 => "#00688B"; 4980 use constant MediumOrchid3 => "#B452CD"; 4981 use constant black => "#000000"; 4982 use constant white => "#FFFFFF"; 4983 use constant red => "#FF0000"; 4984 4985 # set default color, bold, italic properties 4986 # anything not listed here will be given the default (punctuation) color -- 4987 # these types currently not listed and get default: ws pu s sc cm co p 4988 # When adding NEW_TOKENS: add an entry here if you don't want defaults 4989 4990 # set_default_properties( $short_name, default_color, bold?, italic? ); 4991 set_default_properties( 'c', ForestGreen, 0, 0 ); 4992 set_default_properties( 'pd', ForestGreen, 0, 1 ); 4993 set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown 4994 set_default_properties( 'q', IndianRed3, 0, 0 ); 4995 set_default_properties( 'hh', IndianRed3, 0, 1 ); 4996 set_default_properties( 'h', IndianRed3, 1, 0 ); 4997 set_default_properties( 'i', DeepSkyBlue4, 0, 0 ); 4998 set_default_properties( 'w', black, 0, 0 ); 4999 set_default_properties( 'n', MediumOrchid3, 0, 0 ); 5000 set_default_properties( 'v', MediumOrchid3, 0, 0 ); 5001 set_default_properties( 'j', IndianRed3, 1, 0 ); 5002 set_default_properties( 'm', red, 1, 0 ); 5003 5004 set_default_color( 'html-color-background', white ); 5005 set_default_color( 'html-color-punctuation', black ); 5006 5007 # setup property lookup tables for tokens based on their short names 5008 # every token type has a short name, and will use these tables 5009 # to do the html markup 5010 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { 5011 $html_color{$short_name} = $rOpts->{"html-color-$long_name"}; 5012 $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"}; 5013 $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"}; 5014 } 5015 5016 # write style sheet to STDOUT and die if requested 5017 if ( defined( $rOpts->{'stylesheet'} ) ) { 5018 write_style_sheet_file('-'); 5019 Perl::Tidy::Exit 0; 5020 } 5021 5022 # make sure user gives a file name after -css 5023 if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) { 5024 $css_linkname = $rOpts->{'html-linked-style-sheet'}; 5025 if ( $css_linkname =~ /^-/ ) { 5026 Perl::Tidy::Die "You must specify a valid filename after -css\n"; 5027 } 5028 } 5029 5030 # check for conflict 5031 if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) { 5032 $rOpts->{'nohtml-style-sheets'} = 0; 5033 warning("You can't specify both -css and -nss; -nss ignored\n"); 5034 } 5035 5036 # write a style sheet file if necessary 5037 if ($css_linkname) { 5038 5039 # if the selected filename exists, don't write, because user may 5040 # have done some work by hand to create it; use backup name instead 5041 # Also, this will avoid a potential disaster in which the user 5042 # forgets to specify the style sheet, like this: 5043 # perltidy -html -css myfile1.pl myfile2.pl 5044 # This would cause myfile1.pl to parsed as the style sheet by GetOpts 5045 my $css_filename = $css_linkname; 5046 unless ( -e $css_filename ) { 5047 write_style_sheet_file($css_filename); 5048 } 5049 } 5050 $missing_html_entities = 1 unless $rOpts->{'html-entities'}; 5051} 5052 5053sub write_style_sheet_file { 5054 5055 my $css_filename = shift; 5056 my $fh; 5057 unless ( $fh = IO::File->new("> $css_filename") ) { 5058 Perl::Tidy::Die "can't open $css_filename: $!\n"; 5059 } 5060 write_style_sheet_data($fh); 5061 eval { $fh->close }; 5062} 5063 5064sub write_style_sheet_data { 5065 5066 # write the style sheet data to an open file handle 5067 my $fh = shift; 5068 5069 my $bg_color = $rOpts->{'html-color-background'}; 5070 my $text_color = $rOpts->{'html-color-punctuation'}; 5071 5072 # pre-bgcolor is new, and may not be defined 5073 my $pre_bg_color = $rOpts->{'html-pre-color-background'}; 5074 $pre_bg_color = $bg_color unless $pre_bg_color; 5075 5076 $fh->print(<<"EOM"); 5077/* default style sheet generated by perltidy */ 5078body {background: $bg_color; color: $text_color} 5079pre { color: $text_color; 5080 background: $pre_bg_color; 5081 font-family: courier; 5082 } 5083 5084EOM 5085 5086 foreach my $short_name ( sort keys %short_to_long_names ) { 5087 my $long_name = $short_to_long_names{$short_name}; 5088 5089 my $abbrev = '.' . $short_name; 5090 if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment 5091 my $color = $html_color{$short_name}; 5092 if ( !defined($color) ) { $color = $text_color } 5093 $fh->print("$abbrev \{ color: $color;"); 5094 5095 if ( $html_bold{$short_name} ) { 5096 $fh->print(" font-weight:bold;"); 5097 } 5098 5099 if ( $html_italic{$short_name} ) { 5100 $fh->print(" font-style:italic;"); 5101 } 5102 $fh->print("} /* $long_name */\n"); 5103 } 5104} 5105 5106sub set_default_color { 5107 5108 # make sure that options hash $rOpts->{$key} contains a valid color 5109 my ( $key, $color ) = @_; 5110 if ( $rOpts->{$key} ) { $color = $rOpts->{$key} } 5111 $rOpts->{$key} = check_RGB($color); 5112} 5113 5114sub check_RGB { 5115 5116 # if color is a 6 digit hex RGB value, prepend a #, otherwise 5117 # assume that it is a valid ascii color name 5118 my ($color) = @_; 5119 if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" } 5120 return $color; 5121} 5122 5123sub set_default_properties { 5124 my ( $short_name, $color, $bold, $italic ) = @_; 5125 5126 set_default_color( "html-color-$short_to_long_names{$short_name}", $color ); 5127 my $key; 5128 $key = "html-bold-$short_to_long_names{$short_name}"; 5129 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold; 5130 $key = "html-italic-$short_to_long_names{$short_name}"; 5131 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic; 5132} 5133 5134sub pod_to_html { 5135 5136 # Use Pod::Html to process the pod and make the page 5137 # then merge the perltidy code sections into it. 5138 # return 1 if success, 0 otherwise 5139 my $self = shift; 5140 my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_; 5141 my $input_file = $self->{_input_file}; 5142 my $title = $self->{_title}; 5143 my $success_flag = 0; 5144 5145 # don't try to use pod2html if no pod 5146 unless ($pod_string) { 5147 return $success_flag; 5148 } 5149 5150 # Pod::Html requires a real temporary filename 5151 # If we are making a frame, we have a name available 5152 # Otherwise, we have to fine one 5153 my $tmpfile; 5154 if ( $rOpts->{'frames'} ) { 5155 $tmpfile = $self->{_toc_filename}; 5156 } 5157 else { 5158 $tmpfile = Perl::Tidy::make_temporary_filename(); 5159 } 5160 my $fh_tmp = IO::File->new( $tmpfile, 'w' ); 5161 unless ($fh_tmp) { 5162 Perl::Tidy::Warn 5163 "unable to open temporary file $tmpfile; cannot use pod2html\n"; 5164 return $success_flag; 5165 } 5166 5167 #------------------------------------------------------------------ 5168 # Warning: a temporary file is open; we have to clean up if 5169 # things go bad. From here on all returns should be by going to 5170 # RETURN so that the temporary file gets unlinked. 5171 #------------------------------------------------------------------ 5172 5173 # write the pod text to the temporary file 5174 $fh_tmp->print($pod_string); 5175 $fh_tmp->close(); 5176 5177 # Hand off the pod to pod2html. 5178 # Note that we can use the same temporary filename for input and output 5179 # because of the way pod2html works. 5180 { 5181 5182 my @args; 5183 push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title"; 5184 my $kw; 5185 5186 # Flags with string args: 5187 # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s", 5188 # "podpath=s", "podroot=s" 5189 # Note: -css=s is handled by perltidy itself 5190 foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) { 5191 if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" } 5192 } 5193 5194 # Toggle switches; these have extra leading 'pod' 5195 # "header!", "index!", "recurse!", "quiet!", "verbose!" 5196 foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) { 5197 my $kwd = $kw; # allows us to strip 'pod' 5198 if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" } 5199 elsif ( defined( $rOpts->{$kw} ) ) { 5200 $kwd =~ s/^pod//; 5201 push @args, "--no$kwd"; 5202 } 5203 } 5204 5205 # "flush", 5206 $kw = 'podflush'; 5207 if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" } 5208 5209 # Must clean up if pod2html dies (it can); 5210 # Be careful not to overwrite callers __DIE__ routine 5211 local $SIG{__DIE__} = sub { 5212 unlink $tmpfile if -e $tmpfile; 5213 Perl::Tidy::Die $_[0]; 5214 }; 5215 5216 pod2html(@args); 5217 } 5218 $fh_tmp = IO::File->new( $tmpfile, 'r' ); 5219 unless ($fh_tmp) { 5220 5221 # this error shouldn't happen ... we just used this filename 5222 Perl::Tidy::Warn 5223 "unable to open temporary file $tmpfile; cannot use pod2html\n"; 5224 goto RETURN; 5225 } 5226 5227 my $html_fh = $self->{_html_fh}; 5228 my @toc; 5229 my $in_toc; 5230 my $no_print; 5231 5232 # This routine will write the html selectively and store the toc 5233 my $html_print = sub { 5234 foreach (@_) { 5235 $html_fh->print($_) unless ($no_print); 5236 if ($in_toc) { push @toc, $_ } 5237 } 5238 }; 5239 5240 # loop over lines of html output from pod2html and merge in 5241 # the necessary perltidy html sections 5242 my ( $saw_body, $saw_index, $saw_body_end ); 5243 while ( my $line = $fh_tmp->getline() ) { 5244 5245 if ( $line =~ /^\s*<html>\s*$/i ) { 5246 my $date = localtime; 5247 $html_print->("<!-- Generated by perltidy on $date -->\n"); 5248 $html_print->($line); 5249 } 5250 5251 # Copy the perltidy css, if any, after <body> tag 5252 elsif ( $line =~ /^\s*<body.*>\s*$/i ) { 5253 $saw_body = 1; 5254 $html_print->($css_string) if $css_string; 5255 $html_print->($line); 5256 5257 # add a top anchor and heading 5258 $html_print->("<a name=\"-top-\"></a>\n"); 5259 $title = escape_html($title); 5260 $html_print->("<h1>$title</h1>\n"); 5261 } 5262 elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) { 5263 $in_toc = 1; 5264 5265 # when frames are used, an extra table of contents in the 5266 # contents panel is confusing, so don't print it 5267 $no_print = $rOpts->{'frames'} 5268 || !$rOpts->{'html-table-of-contents'}; 5269 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'}; 5270 $html_print->($line); 5271 } 5272 5273 # Copy the perltidy toc, if any, after the Pod::Html toc 5274 elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) { 5275 $saw_index = 1; 5276 $html_print->($line); 5277 if ($toc_string) { 5278 $html_print->("<hr />\n") if $rOpts->{'frames'}; 5279 $html_print->("<h2>Code Index:</h2>\n"); 5280 my @toc = map { $_ .= "\n" } split /\n/, $toc_string; 5281 $html_print->(@toc); 5282 } 5283 $in_toc = 0; 5284 $no_print = 0; 5285 } 5286 5287 # Copy one perltidy section after each marker 5288 elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) { 5289 $line = $2; 5290 $html_print->($1) if $1; 5291 5292 # Intermingle code and pod sections if we saw multiple =cut's. 5293 if ( $self->{_pod_cut_count} > 1 ) { 5294 my $rpre_string = shift(@$rpre_string_stack); 5295 if ($$rpre_string) { 5296 $html_print->('<pre>'); 5297 $html_print->($$rpre_string); 5298 $html_print->('</pre>'); 5299 } 5300 else { 5301 5302 # shouldn't happen: we stored a string before writing 5303 # each marker. 5304 Perl::Tidy::Warn 5305"Problem merging html stream with pod2html; order may be wrong\n"; 5306 } 5307 $html_print->($line); 5308 } 5309 5310 # If didn't see multiple =cut lines, we'll put the pod out first 5311 # and then the code, because it's less confusing. 5312 else { 5313 5314 # since we are not intermixing code and pod, we don't need 5315 # or want any <hr> lines which separated pod and code 5316 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i ); 5317 } 5318 } 5319 5320 # Copy any remaining code section before the </body> tag 5321 elsif ( $line =~ /^\s*<\/body>\s*$/i ) { 5322 $saw_body_end = 1; 5323 if (@$rpre_string_stack) { 5324 unless ( $self->{_pod_cut_count} > 1 ) { 5325 $html_print->('<hr />'); 5326 } 5327 while ( my $rpre_string = shift(@$rpre_string_stack) ) { 5328 $html_print->('<pre>'); 5329 $html_print->($$rpre_string); 5330 $html_print->('</pre>'); 5331 } 5332 } 5333 $html_print->($line); 5334 } 5335 else { 5336 $html_print->($line); 5337 } 5338 } 5339 5340 $success_flag = 1; 5341 unless ($saw_body) { 5342 Perl::Tidy::Warn "Did not see <body> in pod2html output\n"; 5343 $success_flag = 0; 5344 } 5345 unless ($saw_body_end) { 5346 Perl::Tidy::Warn "Did not see </body> in pod2html output\n"; 5347 $success_flag = 0; 5348 } 5349 unless ($saw_index) { 5350 Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n"; 5351 $success_flag = 0; 5352 } 5353 5354 RETURN: 5355 eval { $html_fh->close() }; 5356 5357 # note that we have to unlink tmpfile before making frames 5358 # because the tmpfile may be one of the names used for frames 5359 unlink $tmpfile if -e $tmpfile; 5360 if ( $success_flag && $rOpts->{'frames'} ) { 5361 $self->make_frame( \@toc ); 5362 } 5363 return $success_flag; 5364} 5365 5366sub make_frame { 5367 5368 # Make a frame with table of contents in the left panel 5369 # and the text in the right panel. 5370 # On entry: 5371 # $html_filename contains the no-frames html output 5372 # $rtoc is a reference to an array with the table of contents 5373 my $self = shift; 5374 my ($rtoc) = @_; 5375 my $input_file = $self->{_input_file}; 5376 my $html_filename = $self->{_html_file}; 5377 my $toc_filename = $self->{_toc_filename}; 5378 my $src_filename = $self->{_src_filename}; 5379 my $title = $self->{_title}; 5380 $title = escape_html($title); 5381 5382 # FUTURE input parameter: 5383 my $top_basename = ""; 5384 5385 # We need to produce 3 html files: 5386 # 1. - the table of contents 5387 # 2. - the contents (source code) itself 5388 # 3. - the frame which contains them 5389 5390 # get basenames for relative links 5391 my ( $toc_basename, $toc_path ) = fileparse($toc_filename); 5392 my ( $src_basename, $src_path ) = fileparse($src_filename); 5393 5394 # 1. Make the table of contents panel, with appropriate changes 5395 # to the anchor names 5396 my $src_frame_name = 'SRC'; 5397 my $first_anchor = 5398 write_toc_html( $title, $toc_filename, $src_basename, $rtoc, 5399 $src_frame_name ); 5400 5401 # 2. The current .html filename is renamed to be the contents panel 5402 rename( $html_filename, $src_filename ) 5403 or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n"; 5404 5405 # 3. Then use the original html filename for the frame 5406 write_frame_html( 5407 $title, $html_filename, $top_basename, 5408 $toc_basename, $src_basename, $src_frame_name 5409 ); 5410} 5411 5412sub write_toc_html { 5413 5414 # write a separate html table of contents file for frames 5415 my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_; 5416 my $fh = IO::File->new( $toc_filename, 'w' ) 5417 or Perl::Tidy::Die "Cannot open $toc_filename:$!\n"; 5418 $fh->print(<<EOM); 5419<html> 5420<head> 5421<title>$title</title> 5422</head> 5423<body> 5424<h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1> 5425EOM 5426 5427 my $first_anchor = 5428 change_anchor_names( $rtoc, $src_basename, "$src_frame_name" ); 5429 $fh->print( join "", @$rtoc ); 5430 5431 $fh->print(<<EOM); 5432</body> 5433</html> 5434EOM 5435 5436} 5437 5438sub write_frame_html { 5439 5440 # write an html file to be the table of contents frame 5441 my ( 5442 $title, $frame_filename, $top_basename, 5443 $toc_basename, $src_basename, $src_frame_name 5444 ) = @_; 5445 5446 my $fh = IO::File->new( $frame_filename, 'w' ) 5447 or Perl::Tidy::Die "Cannot open $toc_basename:$!\n"; 5448 5449 $fh->print(<<EOM); 5450<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" 5451 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> 5452<?xml version="1.0" encoding="iso-8859-1" ?> 5453<html xmlns="http://www.w3.org/1999/xhtml"> 5454<head> 5455<title>$title</title> 5456</head> 5457EOM 5458 5459 # two left panels, one right, if master index file 5460 if ($top_basename) { 5461 $fh->print(<<EOM); 5462<frameset cols="20%,80%"> 5463<frameset rows="30%,70%"> 5464<frame src = "$top_basename" /> 5465<frame src = "$toc_basename" /> 5466</frameset> 5467EOM 5468 } 5469 5470 # one left panels, one right, if no master index file 5471 else { 5472 $fh->print(<<EOM); 5473<frameset cols="20%,*"> 5474<frame src = "$toc_basename" /> 5475EOM 5476 } 5477 $fh->print(<<EOM); 5478<frame src = "$src_basename" name = "$src_frame_name" /> 5479<noframes> 5480<body> 5481<p>If you see this message, you are using a non-frame-capable web client.</p> 5482<p>This document contains:</p> 5483<ul> 5484<li><a href="$toc_basename">A table of contents</a></li> 5485<li><a href="$src_basename">The source code</a></li> 5486</ul> 5487</body> 5488</noframes> 5489</frameset> 5490</html> 5491EOM 5492} 5493 5494sub change_anchor_names { 5495 5496 # add a filename and target to anchors 5497 # also return the first anchor 5498 my ( $rlines, $filename, $target ) = @_; 5499 my $first_anchor; 5500 foreach my $line (@$rlines) { 5501 5502 # We're looking for lines like this: 5503 # <LI><A HREF="#synopsis">SYNOPSIS</A></LI> 5504 # ---- - -------- ----------------- 5505 # $1 $4 $5 5506 if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) { 5507 my $pre = $1; 5508 my $name = $4; 5509 my $post = $5; 5510 my $href = "$filename#$name"; 5511 $line = "$pre<a href=\"$href\" target=\"$target\">$post\n"; 5512 unless ($first_anchor) { $first_anchor = $href } 5513 } 5514 } 5515 return $first_anchor; 5516} 5517 5518sub close_html_file { 5519 my $self = shift; 5520 return unless $self->{_html_file_opened}; 5521 5522 my $html_fh = $self->{_html_fh}; 5523 my $rtoc_string = $self->{_rtoc_string}; 5524 5525 # There are 3 basic paths to html output... 5526 5527 # --------------------------------- 5528 # Path 1: finish up if in -pre mode 5529 # --------------------------------- 5530 if ( $rOpts->{'html-pre-only'} ) { 5531 $html_fh->print( <<"PRE_END"); 5532</pre> 5533PRE_END 5534 eval { $html_fh->close() }; 5535 return; 5536 } 5537 5538 # Finish the index 5539 $self->add_toc_item( 'EOF', 'EOF' ); 5540 5541 my $rpre_string_stack = $self->{_rpre_string_stack}; 5542 5543 # Patch to darken the <pre> background color in case of pod2html and 5544 # interleaved code/documentation. Otherwise, the distinction 5545 # between code and documentation is blurred. 5546 if ( $rOpts->{pod2html} 5547 && $self->{_pod_cut_count} >= 1 5548 && $rOpts->{'html-color-background'} eq '#FFFFFF' ) 5549 { 5550 $rOpts->{'html-pre-color-background'} = '#F0F0F0'; 5551 } 5552 5553 # put the css or its link into a string, if used 5554 my $css_string; 5555 my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' ); 5556 5557 # use css linked to another file 5558 if ( $rOpts->{'html-linked-style-sheet'} ) { 5559 $fh_css->print( 5560 qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />) 5561 ); 5562 } 5563 5564 # use css embedded in this file 5565 elsif ( !$rOpts->{'nohtml-style-sheets'} ) { 5566 $fh_css->print( <<'ENDCSS'); 5567<style type="text/css"> 5568<!-- 5569ENDCSS 5570 write_style_sheet_data($fh_css); 5571 $fh_css->print( <<"ENDCSS"); 5572--> 5573</style> 5574ENDCSS 5575 } 5576 5577 # ----------------------------------------------------------- 5578 # path 2: use pod2html if requested 5579 # If we fail for some reason, continue on to path 3 5580 # ----------------------------------------------------------- 5581 if ( $rOpts->{'pod2html'} ) { 5582 my $rpod_string = $self->{_rpod_string}; 5583 $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string, 5584 $rpre_string_stack ) 5585 && return; 5586 } 5587 5588 # -------------------------------------------------- 5589 # path 3: write code in html, with pod only in italics 5590 # -------------------------------------------------- 5591 my $input_file = $self->{_input_file}; 5592 my $title = escape_html($input_file); 5593 my $date = localtime; 5594 $html_fh->print( <<"HTML_START"); 5595<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 5596 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 5597<!-- Generated by perltidy on $date --> 5598<html xmlns="http://www.w3.org/1999/xhtml"> 5599<head> 5600<title>$title</title> 5601HTML_START 5602 5603 # output the css, if used 5604 if ($css_string) { 5605 $html_fh->print($css_string); 5606 $html_fh->print( <<"ENDCSS"); 5607</head> 5608<body> 5609ENDCSS 5610 } 5611 else { 5612 5613 $html_fh->print( <<"HTML_START"); 5614</head> 5615<body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\"> 5616HTML_START 5617 } 5618 5619 $html_fh->print("<a name=\"-top-\"></a>\n"); 5620 $html_fh->print( <<"EOM"); 5621<h1>$title</h1> 5622EOM 5623 5624 # copy the table of contents 5625 if ( $$rtoc_string 5626 && !$rOpts->{'frames'} 5627 && $rOpts->{'html-table-of-contents'} ) 5628 { 5629 $html_fh->print($$rtoc_string); 5630 } 5631 5632 # copy the pre section(s) 5633 my $fname_comment = $input_file; 5634 $fname_comment =~ s/--+/-/g; # protect HTML comment tags 5635 $html_fh->print( <<"END_PRE"); 5636<hr /> 5637<!-- contents of filename: $fname_comment --> 5638<pre> 5639END_PRE 5640 5641 foreach my $rpre_string (@$rpre_string_stack) { 5642 $html_fh->print($$rpre_string); 5643 } 5644 5645 # and finish the html page 5646 $html_fh->print( <<"HTML_END"); 5647</pre> 5648</body> 5649</html> 5650HTML_END 5651 eval { $html_fh->close() }; # could be object without close method 5652 5653 if ( $rOpts->{'frames'} ) { 5654 my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string; 5655 $self->make_frame( \@toc ); 5656 } 5657} 5658 5659sub markup_tokens { 5660 my $self = shift; 5661 my ( $rtokens, $rtoken_type, $rlevels ) = @_; 5662 my ( @colored_tokens, $j, $string, $type, $token, $level ); 5663 my $rlast_level = $self->{_rlast_level}; 5664 my $rpackage_stack = $self->{_rpackage_stack}; 5665 5666 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { 5667 $type = $$rtoken_type[$j]; 5668 $token = $$rtokens[$j]; 5669 $level = $$rlevels[$j]; 5670 $level = 0 if ( $level < 0 ); 5671 5672 #------------------------------------------------------- 5673 # Update the package stack. The package stack is needed to keep 5674 # the toc correct because some packages may be declared within 5675 # blocks and go out of scope when we leave the block. 5676 #------------------------------------------------------- 5677 if ( $level > $$rlast_level ) { 5678 unless ( $rpackage_stack->[ $level - 1 ] ) { 5679 $rpackage_stack->[ $level - 1 ] = 'main'; 5680 } 5681 $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ]; 5682 } 5683 elsif ( $level < $$rlast_level ) { 5684 my $package = $rpackage_stack->[$level]; 5685 unless ($package) { $package = 'main' } 5686 5687 # if we change packages due to a nesting change, we 5688 # have to make an entry in the toc 5689 if ( $package ne $rpackage_stack->[ $level + 1 ] ) { 5690 $self->add_toc_item( $package, 'package' ); 5691 } 5692 } 5693 $$rlast_level = $level; 5694 5695 #------------------------------------------------------- 5696 # Intercept a sub name here; split it 5697 # into keyword 'sub' and sub name; and add an 5698 # entry in the toc 5699 #------------------------------------------------------- 5700 if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { 5701 $token = $self->markup_html_element( $1, 'k' ); 5702 push @colored_tokens, $token; 5703 $token = $2; 5704 $type = 'M'; 5705 5706 # but don't include sub declarations in the toc; 5707 # these wlll have leading token types 'i;' 5708 my $signature = join "", @$rtoken_type; 5709 unless ( $signature =~ /^i;/ ) { 5710 my $subname = $token; 5711 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype 5712 $self->add_toc_item( $subname, 'sub' ); 5713 } 5714 } 5715 5716 #------------------------------------------------------- 5717 # Intercept a package name here; split it 5718 # into keyword 'package' and name; add to the toc, 5719 # and update the package stack 5720 #------------------------------------------------------- 5721 if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) { 5722 $token = $self->markup_html_element( $1, 'k' ); 5723 push @colored_tokens, $token; 5724 $token = $2; 5725 $type = 'i'; 5726 $self->add_toc_item( "$token", 'package' ); 5727 $rpackage_stack->[$level] = $token; 5728 } 5729 5730 $token = $self->markup_html_element( $token, $type ); 5731 push @colored_tokens, $token; 5732 } 5733 return ( \@colored_tokens ); 5734} 5735 5736sub markup_html_element { 5737 my $self = shift; 5738 my ( $token, $type ) = @_; 5739 5740 return $token if ( $type eq 'b' ); # skip a blank token 5741 return $token if ( $token =~ /^\s*$/ ); # skip a blank line 5742 $token = escape_html($token); 5743 5744 # get the short abbreviation for this token type 5745 my $short_name = $token_short_names{$type}; 5746 if ( !defined($short_name) ) { 5747 $short_name = "pu"; # punctuation is default 5748 } 5749 5750 # handle style sheets.. 5751 if ( !$rOpts->{'nohtml-style-sheets'} ) { 5752 if ( $short_name ne 'pu' ) { 5753 $token = qq(<span class="$short_name">) . $token . "</span>"; 5754 } 5755 } 5756 5757 # handle no style sheets.. 5758 else { 5759 my $color = $html_color{$short_name}; 5760 5761 if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) { 5762 $token = qq(<font color="$color">) . $token . "</font>"; 5763 } 5764 if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" } 5765 if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" } 5766 } 5767 return $token; 5768} 5769 5770sub escape_html { 5771 5772 my $token = shift; 5773 if ($missing_html_entities) { 5774 $token =~ s/\&/&/g; 5775 $token =~ s/\</</g; 5776 $token =~ s/\>/>/g; 5777 $token =~ s/\"/"/g; 5778 } 5779 else { 5780 HTML::Entities::encode_entities($token); 5781 } 5782 return $token; 5783} 5784 5785sub finish_formatting { 5786 5787 # called after last line 5788 my $self = shift; 5789 $self->close_html_file(); 5790 return; 5791} 5792 5793sub write_line { 5794 5795 my $self = shift; 5796 return unless $self->{_html_file_opened}; 5797 my $html_pre_fh = $self->{_html_pre_fh}; 5798 my ($line_of_tokens) = @_; 5799 my $line_type = $line_of_tokens->{_line_type}; 5800 my $input_line = $line_of_tokens->{_line_text}; 5801 my $line_number = $line_of_tokens->{_line_number}; 5802 chomp $input_line; 5803 5804 # markup line of code.. 5805 my $html_line; 5806 if ( $line_type eq 'CODE' ) { 5807 my $rtoken_type = $line_of_tokens->{_rtoken_type}; 5808 my $rtokens = $line_of_tokens->{_rtokens}; 5809 my $rlevels = $line_of_tokens->{_rlevels}; 5810 5811 if ( $input_line =~ /(^\s*)/ ) { 5812 $html_line = $1; 5813 } 5814 else { 5815 $html_line = ""; 5816 } 5817 my ($rcolored_tokens) = 5818 $self->markup_tokens( $rtokens, $rtoken_type, $rlevels ); 5819 $html_line .= join '', @$rcolored_tokens; 5820 } 5821 5822 # markup line of non-code.. 5823 else { 5824 my $line_character; 5825 if ( $line_type eq 'HERE' ) { $line_character = 'H' } 5826 elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' } 5827 elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' } 5828 elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' } 5829 elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' } 5830 elsif ( $line_type eq 'END_START' ) { 5831 $line_character = 'k'; 5832 $self->add_toc_item( '__END__', '__END__' ); 5833 } 5834 elsif ( $line_type eq 'DATA_START' ) { 5835 $line_character = 'k'; 5836 $self->add_toc_item( '__DATA__', '__DATA__' ); 5837 } 5838 elsif ( $line_type =~ /^POD/ ) { 5839 $line_character = 'P'; 5840 if ( $rOpts->{'pod2html'} ) { 5841 my $html_pod_fh = $self->{_html_pod_fh}; 5842 if ( $line_type eq 'POD_START' ) { 5843 5844 my $rpre_string_stack = $self->{_rpre_string_stack}; 5845 my $rpre_string = $rpre_string_stack->[-1]; 5846 5847 # if we have written any non-blank lines to the 5848 # current pre section, start writing to a new output 5849 # string 5850 if ( $$rpre_string =~ /\S/ ) { 5851 my $pre_string; 5852 $html_pre_fh = 5853 Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); 5854 $self->{_html_pre_fh} = $html_pre_fh; 5855 push @$rpre_string_stack, \$pre_string; 5856 5857 # leave a marker in the pod stream so we know 5858 # where to put the pre section we just 5859 # finished. 5860 my $for_html = '=for html'; # don't confuse pod utils 5861 $html_pod_fh->print(<<EOM); 5862 5863$for_html 5864<!-- pERLTIDY sECTION --> 5865 5866EOM 5867 } 5868 5869 # otherwise, just clear the current string and start 5870 # over 5871 else { 5872 $$rpre_string = ""; 5873 $html_pod_fh->print("\n"); 5874 } 5875 } 5876 $html_pod_fh->print( $input_line . "\n" ); 5877 if ( $line_type eq 'POD_END' ) { 5878 $self->{_pod_cut_count}++; 5879 $html_pod_fh->print("\n"); 5880 } 5881 return; 5882 } 5883 } 5884 else { $line_character = 'Q' } 5885 $html_line = $self->markup_html_element( $input_line, $line_character ); 5886 } 5887 5888 # add the line number if requested 5889 if ( $rOpts->{'html-line-numbers'} ) { 5890 my $extra_space .= 5891 ( $line_number < 10 ) ? " " 5892 : ( $line_number < 100 ) ? " " 5893 : ( $line_number < 1000 ) ? " " 5894 : ""; 5895 $html_line = $extra_space . $line_number . " " . $html_line; 5896 } 5897 5898 # write the line 5899 $html_pre_fh->print("$html_line\n"); 5900} 5901 5902##################################################################### 5903# 5904# The Perl::Tidy::Formatter package adds indentation, whitespace, and 5905# line breaks to the token stream 5906# 5907# WARNING: This is not a real class for speed reasons. Only one 5908# Formatter may be used. 5909# 5910##################################################################### 5911 5912package Perl::Tidy::Formatter; 5913 5914BEGIN { 5915 5916 # Caution: these debug flags produce a lot of output 5917 # They should all be 0 except when debugging small scripts 5918 use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0; 5919 use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0; 5920 use constant FORMATTER_DEBUG_FLAG_BOND => 0; 5921 use constant FORMATTER_DEBUG_FLAG_BREAK => 0; 5922 use constant FORMATTER_DEBUG_FLAG_CI => 0; 5923 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0; 5924 use constant FORMATTER_DEBUG_FLAG_FORCE => 0; 5925 use constant FORMATTER_DEBUG_FLAG_LIST => 0; 5926 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0; 5927 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0; 5928 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0; 5929 use constant FORMATTER_DEBUG_FLAG_STORE => 0; 5930 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0; 5931 use constant FORMATTER_DEBUG_FLAG_WHITE => 0; 5932 5933 my $debug_warning = sub { 5934 print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n"; 5935 }; 5936 5937 FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE'); 5938 FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES'); 5939 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); 5940 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); 5941 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); 5942 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); 5943 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); 5944 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); 5945 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); 5946 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); 5947 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); 5948 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); 5949 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); 5950 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); 5951} 5952 5953use Carp; 5954use vars qw{ 5955 5956 @gnu_stack 5957 $max_gnu_stack_index 5958 $gnu_position_predictor 5959 $line_start_index_to_go 5960 $last_indentation_written 5961 $last_unadjusted_indentation 5962 $last_leading_token 5963 $last_output_short_opening_token 5964 5965 $saw_VERSION_in_this_file 5966 $saw_END_or_DATA_ 5967 5968 @gnu_item_list 5969 $max_gnu_item_index 5970 $gnu_sequence_number 5971 $last_output_indentation 5972 %last_gnu_equals 5973 %gnu_comma_count 5974 %gnu_arrow_count 5975 5976 @block_type_to_go 5977 @type_sequence_to_go 5978 @container_environment_to_go 5979 @bond_strength_to_go 5980 @forced_breakpoint_to_go 5981 @token_lengths_to_go 5982 @summed_lengths_to_go 5983 @levels_to_go 5984 @leading_spaces_to_go 5985 @reduced_spaces_to_go 5986 @matching_token_to_go 5987 @mate_index_to_go 5988 @nesting_blocks_to_go 5989 @ci_levels_to_go 5990 @nesting_depth_to_go 5991 @nobreak_to_go 5992 @old_breakpoint_to_go 5993 @tokens_to_go 5994 @types_to_go 5995 @inext_to_go 5996 @iprev_to_go 5997 5998 %saved_opening_indentation 5999 6000 $max_index_to_go 6001 $comma_count_in_batch 6002 $old_line_count_in_batch 6003 $last_nonblank_index_to_go 6004 $last_nonblank_type_to_go 6005 $last_nonblank_token_to_go 6006 $last_last_nonblank_index_to_go 6007 $last_last_nonblank_type_to_go 6008 $last_last_nonblank_token_to_go 6009 @nonblank_lines_at_depth 6010 $starting_in_quote 6011 $ending_in_quote 6012 @whitespace_level_stack 6013 $whitespace_last_level 6014 6015 $in_format_skipping_section 6016 $format_skipping_pattern_begin 6017 $format_skipping_pattern_end 6018 6019 $forced_breakpoint_count 6020 $forced_breakpoint_undo_count 6021 @forced_breakpoint_undo_stack 6022 %postponed_breakpoint 6023 6024 $tabbing 6025 $embedded_tab_count 6026 $first_embedded_tab_at 6027 $last_embedded_tab_at 6028 $deleted_semicolon_count 6029 $first_deleted_semicolon_at 6030 $last_deleted_semicolon_at 6031 $added_semicolon_count 6032 $first_added_semicolon_at 6033 $last_added_semicolon_at 6034 $first_tabbing_disagreement 6035 $last_tabbing_disagreement 6036 $in_tabbing_disagreement 6037 $tabbing_disagreement_count 6038 $input_line_tabbing 6039 6040 $last_line_type 6041 $last_line_leading_type 6042 $last_line_leading_level 6043 $last_last_line_leading_level 6044 6045 %block_leading_text 6046 %block_opening_line_number 6047 $csc_new_statement_ok 6048 $csc_last_label 6049 %csc_block_label 6050 $accumulating_text_for_block 6051 $leading_block_text 6052 $rleading_block_if_elsif_text 6053 $leading_block_text_level 6054 $leading_block_text_length_exceeded 6055 $leading_block_text_line_length 6056 $leading_block_text_line_number 6057 $closing_side_comment_prefix_pattern 6058 $closing_side_comment_list_pattern 6059 6060 $last_nonblank_token 6061 $last_nonblank_type 6062 $last_last_nonblank_token 6063 $last_last_nonblank_type 6064 $last_nonblank_block_type 6065 $last_output_level 6066 %is_do_follower 6067 %is_if_brace_follower 6068 %space_after_keyword 6069 $rbrace_follower 6070 $looking_for_else 6071 %is_last_next_redo_return 6072 %is_other_brace_follower 6073 %is_else_brace_follower 6074 %is_anon_sub_brace_follower 6075 %is_anon_sub_1_brace_follower 6076 %is_sort_map_grep 6077 %is_sort_map_grep_eval 6078 %is_sort_map_grep_eval_do 6079 %is_block_without_semicolon 6080 %is_if_unless 6081 %is_and_or 6082 %is_assignment 6083 %is_chain_operator 6084 %is_if_unless_and_or_last_next_redo_return 6085 6086 @has_broken_sublist 6087 @dont_align 6088 @want_comma_break 6089 6090 $is_static_block_comment 6091 $index_start_one_line_block 6092 $semicolons_before_block_self_destruct 6093 $index_max_forced_break 6094 $input_line_number 6095 $diagnostics_object 6096 $vertical_aligner_object 6097 $logger_object 6098 $file_writer_object 6099 $formatter_self 6100 @ci_stack 6101 $last_line_had_side_comment 6102 %want_break_before 6103 %outdent_keyword 6104 $static_block_comment_pattern 6105 $static_side_comment_pattern 6106 %opening_vertical_tightness 6107 %closing_vertical_tightness 6108 %closing_token_indentation 6109 $some_closing_token_indentation 6110 6111 %opening_token_right 6112 %stack_opening_token 6113 %stack_closing_token 6114 6115 $block_brace_vertical_tightness_pattern 6116 6117 $rOpts_add_newlines 6118 $rOpts_add_whitespace 6119 $rOpts_block_brace_tightness 6120 $rOpts_block_brace_vertical_tightness 6121 $rOpts_brace_left_and_indent 6122 $rOpts_comma_arrow_breakpoints 6123 $rOpts_break_at_old_keyword_breakpoints 6124 $rOpts_break_at_old_comma_breakpoints 6125 $rOpts_break_at_old_logical_breakpoints 6126 $rOpts_break_at_old_ternary_breakpoints 6127 $rOpts_break_at_old_attribute_breakpoints 6128 $rOpts_closing_side_comment_else_flag 6129 $rOpts_closing_side_comment_maximum_text 6130 $rOpts_continuation_indentation 6131 $rOpts_cuddled_else 6132 $rOpts_delete_old_whitespace 6133 $rOpts_fuzzy_line_length 6134 $rOpts_indent_columns 6135 $rOpts_line_up_parentheses 6136 $rOpts_maximum_fields_per_table 6137 $rOpts_maximum_line_length 6138 $rOpts_variable_maximum_line_length 6139 $rOpts_short_concatenation_item_length 6140 $rOpts_keep_old_blank_lines 6141 $rOpts_ignore_old_breakpoints 6142 $rOpts_format_skipping 6143 $rOpts_space_function_paren 6144 $rOpts_space_keyword_paren 6145 $rOpts_keep_interior_semicolons 6146 $rOpts_ignore_side_comment_lengths 6147 $rOpts_stack_closing_block_brace 6148 $rOpts_whitespace_cycle 6149 $rOpts_tight_secret_operators 6150 6151 %is_opening_type 6152 %is_closing_type 6153 %is_keyword_returning_list 6154 %tightness 6155 %matching_token 6156 $rOpts 6157 %right_bond_strength 6158 %left_bond_strength 6159 %binary_ws_rules 6160 %want_left_space 6161 %want_right_space 6162 %is_digraph 6163 %is_trigraph 6164 $bli_pattern 6165 $bli_list_string 6166 %is_closing_type 6167 %is_opening_type 6168 %is_closing_token 6169 %is_opening_token 6170}; 6171 6172BEGIN { 6173 6174 # default list of block types for which -bli would apply 6175 $bli_list_string = 'if else elsif unless while for foreach do : sub'; 6176 6177 @_ = qw( 6178 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> 6179 <= >= == =~ !~ != ++ -- /= x= 6180 ); 6181 @is_digraph{@_} = (1) x scalar(@_); 6182 6183 @_ = qw( ... **= <<= >>= &&= ||= //= <=> ); 6184 @is_trigraph{@_} = (1) x scalar(@_); 6185 6186 @_ = qw( 6187 = **= += *= &= <<= &&= 6188 -= /= |= >>= ||= //= 6189 .= %= ^= 6190 x= 6191 ); 6192 @is_assignment{@_} = (1) x scalar(@_); 6193 6194 @_ = qw( 6195 grep 6196 keys 6197 map 6198 reverse 6199 sort 6200 split 6201 ); 6202 @is_keyword_returning_list{@_} = (1) x scalar(@_); 6203 6204 @_ = qw(is if unless and or err last next redo return); 6205 @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_); 6206 6207 @_ = qw(last next redo return); 6208 @is_last_next_redo_return{@_} = (1) x scalar(@_); 6209 6210 @_ = qw(sort map grep); 6211 @is_sort_map_grep{@_} = (1) x scalar(@_); 6212 6213 @_ = qw(sort map grep eval); 6214 @is_sort_map_grep_eval{@_} = (1) x scalar(@_); 6215 6216 @_ = qw(sort map grep eval do); 6217 @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_); 6218 6219 @_ = qw(if unless); 6220 @is_if_unless{@_} = (1) x scalar(@_); 6221 6222 @_ = qw(and or err); 6223 @is_and_or{@_} = (1) x scalar(@_); 6224 6225 # Identify certain operators which often occur in chains. 6226 # Note: the minus (-) causes a side effect of padding of the first line in 6227 # something like this (by sub set_logical_padding): 6228 # Checkbutton => 'Transmission checked', 6229 # -variable => \$TRANS 6230 # This usually improves appearance so it seems ok. 6231 @_ = qw(&& || and or : ? . + - * /); 6232 @is_chain_operator{@_} = (1) x scalar(@_); 6233 6234 # We can remove semicolons after blocks preceded by these keywords 6235 @_ = 6236 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else 6237 unless while until for foreach given when default); 6238 @is_block_without_semicolon{@_} = (1) x scalar(@_); 6239 6240 # 'L' is token for opening { at hash key 6241 @_ = qw" L { ( [ "; 6242 @is_opening_type{@_} = (1) x scalar(@_); 6243 6244 # 'R' is token for closing } at hash key 6245 @_ = qw" R } ) ] "; 6246 @is_closing_type{@_} = (1) x scalar(@_); 6247 6248 @_ = qw" { ( [ "; 6249 @is_opening_token{@_} = (1) x scalar(@_); 6250 6251 @_ = qw" } ) ] "; 6252 @is_closing_token{@_} = (1) x scalar(@_); 6253} 6254 6255# whitespace codes 6256use constant WS_YES => 1; 6257use constant WS_OPTIONAL => 0; 6258use constant WS_NO => -1; 6259 6260# Token bond strengths. 6261use constant NO_BREAK => 10000; 6262use constant VERY_STRONG => 100; 6263use constant STRONG => 2.1; 6264use constant NOMINAL => 1.1; 6265use constant WEAK => 0.8; 6266use constant VERY_WEAK => 0.55; 6267 6268# values for testing indexes in output array 6269use constant UNDEFINED_INDEX => -1; 6270 6271# Maximum number of little messages; probably need not be changed. 6272use constant MAX_NAG_MESSAGES => 6; 6273 6274# increment between sequence numbers for each type 6275# For example, ?: pairs might have numbers 7,11,15,... 6276use constant TYPE_SEQUENCE_INCREMENT => 4; 6277 6278{ 6279 6280 # methods to count instances 6281 my $_count = 0; 6282 sub get_count { $_count; } 6283 sub _increment_count { ++$_count } 6284 sub _decrement_count { --$_count } 6285} 6286 6287sub trim { 6288 6289 # trim leading and trailing whitespace from a string 6290 $_[0] =~ s/\s+$//; 6291 $_[0] =~ s/^\s+//; 6292 return $_[0]; 6293} 6294 6295sub max { 6296 my $max = shift; 6297 foreach (@_) { 6298 $max = ( $max < $_ ) ? $_ : $max; 6299 } 6300 return $max; 6301} 6302 6303sub min { 6304 my $min = shift; 6305 foreach (@_) { 6306 $min = ( $min > $_ ) ? $_ : $min; 6307 } 6308 return $min; 6309} 6310 6311sub split_words { 6312 6313 # given a string containing words separated by whitespace, 6314 # return the list of words 6315 my ($str) = @_; 6316 return unless $str; 6317 $str =~ s/\s+$//; 6318 $str =~ s/^\s+//; 6319 return split( /\s+/, $str ); 6320} 6321 6322# interface to Perl::Tidy::Logger routines 6323sub warning { 6324 if ($logger_object) { 6325 $logger_object->warning(@_); 6326 } 6327} 6328 6329sub complain { 6330 if ($logger_object) { 6331 $logger_object->complain(@_); 6332 } 6333} 6334 6335sub write_logfile_entry { 6336 if ($logger_object) { 6337 $logger_object->write_logfile_entry(@_); 6338 } 6339} 6340 6341sub black_box { 6342 if ($logger_object) { 6343 $logger_object->black_box(@_); 6344 } 6345} 6346 6347sub report_definite_bug { 6348 if ($logger_object) { 6349 $logger_object->report_definite_bug(); 6350 } 6351} 6352 6353sub get_saw_brace_error { 6354 if ($logger_object) { 6355 $logger_object->get_saw_brace_error(); 6356 } 6357} 6358 6359sub we_are_at_the_last_line { 6360 if ($logger_object) { 6361 $logger_object->we_are_at_the_last_line(); 6362 } 6363} 6364 6365# interface to Perl::Tidy::Diagnostics routine 6366sub write_diagnostics { 6367 6368 if ($diagnostics_object) { 6369 $diagnostics_object->write_diagnostics(@_); 6370 } 6371} 6372 6373sub get_added_semicolon_count { 6374 my $self = shift; 6375 return $added_semicolon_count; 6376} 6377 6378sub DESTROY { 6379 $_[0]->_decrement_count(); 6380} 6381 6382sub new { 6383 6384 my $class = shift; 6385 6386 # we are given an object with a write_line() method to take lines 6387 my %defaults = ( 6388 sink_object => undef, 6389 diagnostics_object => undef, 6390 logger_object => undef, 6391 ); 6392 my %args = ( %defaults, @_ ); 6393 6394 $logger_object = $args{logger_object}; 6395 $diagnostics_object = $args{diagnostics_object}; 6396 6397 # we create another object with a get_line() and peek_ahead() method 6398 my $sink_object = $args{sink_object}; 6399 $file_writer_object = 6400 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object ); 6401 6402 # initialize the leading whitespace stack to negative levels 6403 # so that we can never run off the end of the stack 6404 $gnu_position_predictor = 0; # where the current token is predicted to be 6405 $max_gnu_stack_index = 0; 6406 $max_gnu_item_index = -1; 6407 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); 6408 @gnu_item_list = (); 6409 $last_output_indentation = 0; 6410 $last_indentation_written = 0; 6411 $last_unadjusted_indentation = 0; 6412 $last_leading_token = ""; 6413 $last_output_short_opening_token = 0; 6414 6415 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; 6416 $saw_END_or_DATA_ = 0; 6417 6418 @block_type_to_go = (); 6419 @type_sequence_to_go = (); 6420 @container_environment_to_go = (); 6421 @bond_strength_to_go = (); 6422 @forced_breakpoint_to_go = (); 6423 @summed_lengths_to_go = (); # line length to start of ith token 6424 @token_lengths_to_go = (); 6425 @levels_to_go = (); 6426 @matching_token_to_go = (); 6427 @mate_index_to_go = (); 6428 @nesting_blocks_to_go = (); 6429 @ci_levels_to_go = (); 6430 @nesting_depth_to_go = (0); 6431 @nobreak_to_go = (); 6432 @old_breakpoint_to_go = (); 6433 @tokens_to_go = (); 6434 @types_to_go = (); 6435 @leading_spaces_to_go = (); 6436 @reduced_spaces_to_go = (); 6437 @inext_to_go = (); 6438 @iprev_to_go = (); 6439 6440 @whitespace_level_stack = (); 6441 $whitespace_last_level = -1; 6442 6443 @dont_align = (); 6444 @has_broken_sublist = (); 6445 @want_comma_break = (); 6446 6447 @ci_stack = (""); 6448 $first_tabbing_disagreement = 0; 6449 $last_tabbing_disagreement = 0; 6450 $tabbing_disagreement_count = 0; 6451 $in_tabbing_disagreement = 0; 6452 $input_line_tabbing = undef; 6453 6454 $last_line_type = ""; 6455 $last_last_line_leading_level = 0; 6456 $last_line_leading_level = 0; 6457 $last_line_leading_type = '#'; 6458 6459 $last_nonblank_token = ';'; 6460 $last_nonblank_type = ';'; 6461 $last_last_nonblank_token = ';'; 6462 $last_last_nonblank_type = ';'; 6463 $last_nonblank_block_type = ""; 6464 $last_output_level = 0; 6465 $looking_for_else = 0; 6466 $embedded_tab_count = 0; 6467 $first_embedded_tab_at = 0; 6468 $last_embedded_tab_at = 0; 6469 $deleted_semicolon_count = 0; 6470 $first_deleted_semicolon_at = 0; 6471 $last_deleted_semicolon_at = 0; 6472 $added_semicolon_count = 0; 6473 $first_added_semicolon_at = 0; 6474 $last_added_semicolon_at = 0; 6475 $last_line_had_side_comment = 0; 6476 $is_static_block_comment = 0; 6477 %postponed_breakpoint = (); 6478 6479 # variables for adding side comments 6480 %block_leading_text = (); 6481 %block_opening_line_number = (); 6482 $csc_new_statement_ok = 1; 6483 %csc_block_label = (); 6484 6485 %saved_opening_indentation = (); 6486 $in_format_skipping_section = 0; 6487 6488 reset_block_text_accumulator(); 6489 6490 prepare_for_new_input_lines(); 6491 6492 $vertical_aligner_object = 6493 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object, 6494 $logger_object, $diagnostics_object ); 6495 6496 if ( $rOpts->{'entab-leading-whitespace'} ) { 6497 write_logfile_entry( 6498"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n" 6499 ); 6500 } 6501 elsif ( $rOpts->{'tabs'} ) { 6502 write_logfile_entry("Indentation will be with a tab character\n"); 6503 } 6504 else { 6505 write_logfile_entry( 6506 "Indentation will be with $rOpts->{'indent-columns'} spaces\n"); 6507 } 6508 6509 # This was the start of a formatter referent, but object-oriented 6510 # coding has turned out to be too slow here. 6511 $formatter_self = {}; 6512 6513 bless $formatter_self, $class; 6514 6515 # Safety check..this is not a class yet 6516 if ( _increment_count() > 1 ) { 6517 confess 6518"Attempt to create more than 1 object in $class, which is not a true class yet\n"; 6519 } 6520 return $formatter_self; 6521} 6522 6523sub prepare_for_new_input_lines { 6524 6525 $gnu_sequence_number++; # increment output batch counter 6526 %last_gnu_equals = (); 6527 %gnu_comma_count = (); 6528 %gnu_arrow_count = (); 6529 $line_start_index_to_go = 0; 6530 $max_gnu_item_index = UNDEFINED_INDEX; 6531 $index_max_forced_break = UNDEFINED_INDEX; 6532 $max_index_to_go = UNDEFINED_INDEX; 6533 $last_nonblank_index_to_go = UNDEFINED_INDEX; 6534 $last_nonblank_type_to_go = ''; 6535 $last_nonblank_token_to_go = ''; 6536 $last_last_nonblank_index_to_go = UNDEFINED_INDEX; 6537 $last_last_nonblank_type_to_go = ''; 6538 $last_last_nonblank_token_to_go = ''; 6539 $forced_breakpoint_count = 0; 6540 $forced_breakpoint_undo_count = 0; 6541 $rbrace_follower = undef; 6542 $summed_lengths_to_go[0] = 0; 6543 $old_line_count_in_batch = 1; 6544 $comma_count_in_batch = 0; 6545 $starting_in_quote = 0; 6546 6547 destroy_one_line_block(); 6548} 6549 6550sub write_line { 6551 6552 my $self = shift; 6553 my ($line_of_tokens) = @_; 6554 6555 my $line_type = $line_of_tokens->{_line_type}; 6556 my $input_line = $line_of_tokens->{_line_text}; 6557 6558 if ( $rOpts->{notidy} ) { 6559 write_unindented_line($input_line); 6560 $last_line_type = $line_type; 6561 return; 6562 } 6563 6564 # _line_type codes are: 6565 # SYSTEM - system-specific code before hash-bang line 6566 # CODE - line of perl code (including comments) 6567 # POD_START - line starting pod, such as '=head' 6568 # POD - pod documentation text 6569 # POD_END - last line of pod section, '=cut' 6570 # HERE - text of here-document 6571 # HERE_END - last line of here-doc (target word) 6572 # FORMAT - format section 6573 # FORMAT_END - last line of format section, '.' 6574 # DATA_START - __DATA__ line 6575 # DATA - unidentified text following __DATA__ 6576 # END_START - __END__ line 6577 # END - unidentified text following __END__ 6578 # ERROR - we are in big trouble, probably not a perl script 6579 6580 # put a blank line after an =cut which comes before __END__ and __DATA__ 6581 # (required by podchecker) 6582 if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) { 6583 $file_writer_object->reset_consecutive_blank_lines(); 6584 if ( $input_line !~ /^\s*$/ ) { want_blank_line() } 6585 } 6586 6587 # handle line of code.. 6588 if ( $line_type eq 'CODE' ) { 6589 6590 # let logger see all non-blank lines of code 6591 if ( $input_line !~ /^\s*$/ ) { 6592 my $output_line_number = 6593 $vertical_aligner_object->get_output_line_number(); 6594 black_box( $line_of_tokens, $output_line_number ); 6595 } 6596 print_line_of_tokens($line_of_tokens); 6597 } 6598 6599 # handle line of non-code.. 6600 else { 6601 6602 # set special flags 6603 my $skip_line = 0; 6604 my $tee_line = 0; 6605 if ( $line_type =~ /^POD/ ) { 6606 6607 # Pod docs should have a preceding blank line. But stay 6608 # out of __END__ and __DATA__ sections, because 6609 # the user may be using this section for any purpose whatsoever 6610 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } 6611 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } 6612 if ( !$skip_line 6613 && $line_type eq 'POD_START' 6614 && !$saw_END_or_DATA_ ) 6615 { 6616 want_blank_line(); 6617 } 6618 } 6619 6620 # leave the blank counters in a predictable state 6621 # after __END__ or __DATA__ 6622 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) { 6623 $file_writer_object->reset_consecutive_blank_lines(); 6624 $saw_END_or_DATA_ = 1; 6625 } 6626 6627 # write unindented non-code line 6628 if ( !$skip_line ) { 6629 if ($tee_line) { $file_writer_object->tee_on() } 6630 write_unindented_line($input_line); 6631 if ($tee_line) { $file_writer_object->tee_off() } 6632 } 6633 } 6634 $last_line_type = $line_type; 6635} 6636 6637sub create_one_line_block { 6638 $index_start_one_line_block = $_[0]; 6639 $semicolons_before_block_self_destruct = $_[1]; 6640} 6641 6642sub destroy_one_line_block { 6643 $index_start_one_line_block = UNDEFINED_INDEX; 6644 $semicolons_before_block_self_destruct = 0; 6645} 6646 6647sub leading_spaces_to_go { 6648 6649 # return the number of indentation spaces for a token in the output stream; 6650 # these were previously stored by 'set_leading_whitespace'. 6651 6652 my $ii = shift; 6653 if ( $ii < 0 ) { $ii = 0 } 6654 return get_SPACES( $leading_spaces_to_go[$ii] ); 6655 6656} 6657 6658sub get_SPACES { 6659 6660 # return the number of leading spaces associated with an indentation 6661 # variable $indentation is either a constant number of spaces or an object 6662 # with a get_SPACES method. 6663 my $indentation = shift; 6664 return ref($indentation) ? $indentation->get_SPACES() : $indentation; 6665} 6666 6667sub get_RECOVERABLE_SPACES { 6668 6669 # return the number of spaces (+ means shift right, - means shift left) 6670 # that we would like to shift a group of lines with the same indentation 6671 # to get them to line up with their opening parens 6672 my $indentation = shift; 6673 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; 6674} 6675 6676sub get_AVAILABLE_SPACES_to_go { 6677 6678 my $item = $leading_spaces_to_go[ $_[0] ]; 6679 6680 # return the number of available leading spaces associated with an 6681 # indentation variable. $indentation is either a constant number of 6682 # spaces or an object with a get_AVAILABLE_SPACES method. 6683 return ref($item) ? $item->get_AVAILABLE_SPACES() : 0; 6684} 6685 6686sub new_lp_indentation_item { 6687 6688 # this is an interface to the IndentationItem class 6689 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_; 6690 6691 # A negative level implies not to store the item in the item_list 6692 my $index = 0; 6693 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; } 6694 6695 my $item = Perl::Tidy::IndentationItem->new( 6696 $spaces, $level, 6697 $ci_level, $available_spaces, 6698 $index, $gnu_sequence_number, 6699 $align_paren, $max_gnu_stack_index, 6700 $line_start_index_to_go, 6701 ); 6702 6703 if ( $level >= 0 ) { 6704 $gnu_item_list[$max_gnu_item_index] = $item; 6705 } 6706 6707 return $item; 6708} 6709 6710sub set_leading_whitespace { 6711 6712 # This routine defines leading whitespace 6713 # given: the level and continuation_level of a token, 6714 # define: space count of leading string which would apply if it 6715 # were the first token of a new line. 6716 6717 my ( $level_abs, $ci_level, $in_continued_quote ) = @_; 6718 6719 # Adjust levels if necessary to recycle whitespace: 6720 # given $level_abs, the absolute level 6721 # define $level, a possibly reduced level for whitespace 6722 my $level = $level_abs; 6723 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) { 6724 if ( $level_abs < $whitespace_last_level ) { 6725 pop(@whitespace_level_stack); 6726 } 6727 if ( !@whitespace_level_stack ) { 6728 push @whitespace_level_stack, $level_abs; 6729 } 6730 elsif ( $level_abs > $whitespace_last_level ) { 6731 $level = $whitespace_level_stack[-1] + 6732 ( $level_abs - $whitespace_last_level ); 6733 6734 if ( 6735 # 1 Try to break at a block brace 6736 ( 6737 $level > $rOpts_whitespace_cycle 6738 && $last_nonblank_type eq '{' 6739 && $last_nonblank_token eq '{' 6740 ) 6741 6742 # 2 Then either a brace or bracket 6743 || ( $level > $rOpts_whitespace_cycle + 1 6744 && $last_nonblank_token =~ /^[\{\[]$/ ) 6745 6746 # 3 Then a paren too 6747 || $level > $rOpts_whitespace_cycle + 2 6748 ) 6749 { 6750 $level = 1; 6751 } 6752 push @whitespace_level_stack, $level; 6753 } 6754 $level = $whitespace_level_stack[-1]; 6755 } 6756 $whitespace_last_level = $level_abs; 6757 6758 # modify for -bli, which adds one continuation indentation for 6759 # opening braces 6760 if ( $rOpts_brace_left_and_indent 6761 && $max_index_to_go == 0 6762 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o ) 6763 { 6764 $ci_level++; 6765 } 6766 6767 # patch to avoid trouble when input file has negative indentation. 6768 # other logic should catch this error. 6769 if ( $level < 0 ) { $level = 0 } 6770 6771 #------------------------------------------- 6772 # handle the standard indentation scheme 6773 #------------------------------------------- 6774 unless ($rOpts_line_up_parentheses) { 6775 my $space_count = 6776 $ci_level * $rOpts_continuation_indentation + 6777 $level * $rOpts_indent_columns; 6778 my $ci_spaces = 6779 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation; 6780 6781 if ($in_continued_quote) { 6782 $space_count = 0; 6783 $ci_spaces = 0; 6784 } 6785 $leading_spaces_to_go[$max_index_to_go] = $space_count; 6786 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces; 6787 return; 6788 } 6789 6790 #------------------------------------------------------------- 6791 # handle case of -lp indentation.. 6792 #------------------------------------------------------------- 6793 6794 # The continued_quote flag means that this is the first token of a 6795 # line, and it is the continuation of some kind of multi-line quote 6796 # or pattern. It requires special treatment because it must have no 6797 # added leading whitespace. So we create a special indentation item 6798 # which is not in the stack. 6799 if ($in_continued_quote) { 6800 my $space_count = 0; 6801 my $available_space = 0; 6802 $level = -1; # flag to prevent storing in item_list 6803 $leading_spaces_to_go[$max_index_to_go] = 6804 $reduced_spaces_to_go[$max_index_to_go] = 6805 new_lp_indentation_item( $space_count, $level, $ci_level, 6806 $available_space, 0 ); 6807 return; 6808 } 6809 6810 # get the top state from the stack 6811 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES(); 6812 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); 6813 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); 6814 6815 my $type = $types_to_go[$max_index_to_go]; 6816 my $token = $tokens_to_go[$max_index_to_go]; 6817 my $total_depth = $nesting_depth_to_go[$max_index_to_go]; 6818 6819 if ( $type eq '{' || $type eq '(' ) { 6820 6821 $gnu_comma_count{ $total_depth + 1 } = 0; 6822 $gnu_arrow_count{ $total_depth + 1 } = 0; 6823 6824 # If we come to an opening token after an '=' token of some type, 6825 # see if it would be helpful to 'break' after the '=' to save space 6826 my $last_equals = $last_gnu_equals{$total_depth}; 6827 if ( $last_equals && $last_equals > $line_start_index_to_go ) { 6828 6829 # find the position if we break at the '=' 6830 my $i_test = $last_equals; 6831 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } 6832 6833 # TESTING 6834 ##my $too_close = ($i_test==$max_index_to_go-1); 6835 6836 my $test_position = total_line_length( $i_test, $max_index_to_go ); 6837 my $mll = maximum_line_length($i_test); 6838 6839 if ( 6840 6841 # the equals is not just before an open paren (testing) 6842 ##!$too_close && 6843 6844 # if we are beyond the midpoint 6845 $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2 6846 6847 # or we are beyont the 1/4 point and there was an old 6848 # break at the equals 6849 || ( 6850 $gnu_position_predictor > 6851 $mll - $rOpts_maximum_line_length * 3 / 4 6852 && ( 6853 $old_breakpoint_to_go[$last_equals] 6854 || ( $last_equals > 0 6855 && $old_breakpoint_to_go[ $last_equals - 1 ] ) 6856 || ( $last_equals > 1 6857 && $types_to_go[ $last_equals - 1 ] eq 'b' 6858 && $old_breakpoint_to_go[ $last_equals - 2 ] ) 6859 ) 6860 ) 6861 ) 6862 { 6863 6864 # then make the switch -- note that we do not set a real 6865 # breakpoint here because we may not really need one; sub 6866 # scan_list will do that if necessary 6867 $line_start_index_to_go = $i_test + 1; 6868 $gnu_position_predictor = $test_position; 6869 } 6870 } 6871 } 6872 6873 my $halfway = 6874 maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2; 6875 6876 # Check for decreasing depth .. 6877 # Note that one token may have both decreasing and then increasing 6878 # depth. For example, (level, ci) can go from (1,1) to (2,0). So, 6879 # in this example we would first go back to (1,0) then up to (2,0) 6880 # in a single call. 6881 if ( $level < $current_level || $ci_level < $current_ci_level ) { 6882 6883 # loop to find the first entry at or completely below this level 6884 my ( $lev, $ci_lev ); 6885 while (1) { 6886 if ($max_gnu_stack_index) { 6887 6888 # save index of token which closes this level 6889 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go); 6890 6891 # Undo any extra indentation if we saw no commas 6892 my $available_spaces = 6893 $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES(); 6894 6895 my $comma_count = 0; 6896 my $arrow_count = 0; 6897 if ( $type eq '}' || $type eq ')' ) { 6898 $comma_count = $gnu_comma_count{$total_depth}; 6899 $arrow_count = $gnu_arrow_count{$total_depth}; 6900 $comma_count = 0 unless $comma_count; 6901 $arrow_count = 0 unless $arrow_count; 6902 } 6903 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count); 6904 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count); 6905 6906 if ( $available_spaces > 0 ) { 6907 6908 if ( $comma_count <= 0 || $arrow_count > 0 ) { 6909 6910 my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX(); 6911 my $seqno = 6912 $gnu_stack[$max_gnu_stack_index] 6913 ->get_SEQUENCE_NUMBER(); 6914 6915 # Be sure this item was created in this batch. This 6916 # should be true because we delete any available 6917 # space from open items at the end of each batch. 6918 if ( $gnu_sequence_number != $seqno 6919 || $i > $max_gnu_item_index ) 6920 { 6921 warning( 6922"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n" 6923 ); 6924 report_definite_bug(); 6925 } 6926 6927 else { 6928 if ( $arrow_count == 0 ) { 6929 $gnu_item_list[$i] 6930 ->permanently_decrease_AVAILABLE_SPACES( 6931 $available_spaces); 6932 } 6933 else { 6934 $gnu_item_list[$i] 6935 ->tentatively_decrease_AVAILABLE_SPACES( 6936 $available_spaces); 6937 } 6938 6939 my $j; 6940 for ( 6941 $j = $i + 1 ; 6942 $j <= $max_gnu_item_index ; 6943 $j++ 6944 ) 6945 { 6946 $gnu_item_list[$j] 6947 ->decrease_SPACES($available_spaces); 6948 } 6949 } 6950 } 6951 } 6952 6953 # go down one level 6954 --$max_gnu_stack_index; 6955 $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); 6956 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); 6957 6958 # stop when we reach a level at or below the current level 6959 if ( $lev <= $level && $ci_lev <= $ci_level ) { 6960 $space_count = 6961 $gnu_stack[$max_gnu_stack_index]->get_SPACES(); 6962 $current_level = $lev; 6963 $current_ci_level = $ci_lev; 6964 last; 6965 } 6966 } 6967 6968 # reached bottom of stack .. should never happen because 6969 # only negative levels can get here, and $level was forced 6970 # to be positive above. 6971 else { 6972 warning( 6973"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n" 6974 ); 6975 report_definite_bug(); 6976 last; 6977 } 6978 } 6979 } 6980 6981 # handle increasing depth 6982 if ( $level > $current_level || $ci_level > $current_ci_level ) { 6983 6984 # Compute the standard incremental whitespace. This will be 6985 # the minimum incremental whitespace that will be used. This 6986 # choice results in a smooth transition between the gnu-style 6987 # and the standard style. 6988 my $standard_increment = 6989 ( $level - $current_level ) * $rOpts_indent_columns + 6990 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; 6991 6992 # Now we have to define how much extra incremental space 6993 # ("$available_space") we want. This extra space will be 6994 # reduced as necessary when long lines are encountered or when 6995 # it becomes clear that we do not have a good list. 6996 my $available_space = 0; 6997 my $align_paren = 0; 6998 my $excess = 0; 6999 7000 # initialization on empty stack.. 7001 if ( $max_gnu_stack_index == 0 ) { 7002 $space_count = $level * $rOpts_indent_columns; 7003 } 7004 7005 # if this is a BLOCK, add the standard increment 7006 elsif ($last_nonblank_block_type) { 7007 $space_count += $standard_increment; 7008 } 7009 7010 # if last nonblank token was not structural indentation, 7011 # just use standard increment 7012 elsif ( $last_nonblank_type ne '{' ) { 7013 $space_count += $standard_increment; 7014 } 7015 7016 # otherwise use the space to the first non-blank level change token 7017 else { 7018 7019 $space_count = $gnu_position_predictor; 7020 7021 my $min_gnu_indentation = 7022 $gnu_stack[$max_gnu_stack_index]->get_SPACES(); 7023 7024 $available_space = $space_count - $min_gnu_indentation; 7025 if ( $available_space >= $standard_increment ) { 7026 $min_gnu_indentation += $standard_increment; 7027 } 7028 elsif ( $available_space > 1 ) { 7029 $min_gnu_indentation += $available_space + 1; 7030 } 7031 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { 7032 if ( ( $tightness{$last_nonblank_token} < 2 ) ) { 7033 $min_gnu_indentation += 2; 7034 } 7035 else { 7036 $min_gnu_indentation += 1; 7037 } 7038 } 7039 else { 7040 $min_gnu_indentation += $standard_increment; 7041 } 7042 $available_space = $space_count - $min_gnu_indentation; 7043 7044 if ( $available_space < 0 ) { 7045 $space_count = $min_gnu_indentation; 7046 $available_space = 0; 7047 } 7048 $align_paren = 1; 7049 } 7050 7051 # update state, but not on a blank token 7052 if ( $types_to_go[$max_index_to_go] ne 'b' ) { 7053 7054 $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1); 7055 7056 ++$max_gnu_stack_index; 7057 $gnu_stack[$max_gnu_stack_index] = 7058 new_lp_indentation_item( $space_count, $level, $ci_level, 7059 $available_space, $align_paren ); 7060 7061 # If the opening paren is beyond the half-line length, then 7062 # we will use the minimum (standard) indentation. This will 7063 # help avoid problems associated with running out of space 7064 # near the end of a line. As a result, in deeply nested 7065 # lists, there will be some indentations which are limited 7066 # to this minimum standard indentation. But the most deeply 7067 # nested container will still probably be able to shift its 7068 # parameters to the right for proper alignment, so in most 7069 # cases this will not be noticeable. 7070 if ( $available_space > 0 && $space_count > $halfway ) { 7071 $gnu_stack[$max_gnu_stack_index] 7072 ->tentatively_decrease_AVAILABLE_SPACES($available_space); 7073 } 7074 } 7075 } 7076 7077 # Count commas and look for non-list characters. Once we see a 7078 # non-list character, we give up and don't look for any more commas. 7079 if ( $type eq '=>' ) { 7080 $gnu_arrow_count{$total_depth}++; 7081 7082 # tentatively treating '=>' like '=' for estimating breaks 7083 # TODO: this could use some experimentation 7084 $last_gnu_equals{$total_depth} = $max_index_to_go; 7085 } 7086 7087 elsif ( $type eq ',' ) { 7088 $gnu_comma_count{$total_depth}++; 7089 } 7090 7091 elsif ( $is_assignment{$type} ) { 7092 $last_gnu_equals{$total_depth} = $max_index_to_go; 7093 } 7094 7095 # this token might start a new line 7096 # if this is a non-blank.. 7097 if ( $type ne 'b' ) { 7098 7099 # and if .. 7100 if ( 7101 7102 # this is the first nonblank token of the line 7103 $max_index_to_go == 1 && $types_to_go[0] eq 'b' 7104 7105 # or previous character was one of these: 7106 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/ 7107 7108 # or previous character was opening and this does not close it 7109 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' ) 7110 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' ) 7111 7112 # or this token is one of these: 7113 || $type =~ /^([\.]|\|\||\&\&)$/ 7114 7115 # or this is a closing structure 7116 || ( $last_nonblank_type_to_go eq '}' 7117 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go ) 7118 7119 # or previous token was keyword 'return' 7120 || ( $last_nonblank_type_to_go eq 'k' 7121 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) ) 7122 7123 # or starting a new line at certain keywords is fine 7124 || ( $type eq 'k' 7125 && $is_if_unless_and_or_last_next_redo_return{$token} ) 7126 7127 # or this is after an assignment after a closing structure 7128 || ( 7129 $is_assignment{$last_nonblank_type_to_go} 7130 && ( 7131 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/ 7132 7133 # and it is significantly to the right 7134 || $gnu_position_predictor > $halfway 7135 ) 7136 ) 7137 ) 7138 { 7139 check_for_long_gnu_style_lines(); 7140 $line_start_index_to_go = $max_index_to_go; 7141 7142 # back up 1 token if we want to break before that type 7143 # otherwise, we may strand tokens like '?' or ':' on a line 7144 if ( $line_start_index_to_go > 0 ) { 7145 if ( $last_nonblank_type_to_go eq 'k' ) { 7146 7147 if ( $want_break_before{$last_nonblank_token_to_go} ) { 7148 $line_start_index_to_go--; 7149 } 7150 } 7151 elsif ( $want_break_before{$last_nonblank_type_to_go} ) { 7152 $line_start_index_to_go--; 7153 } 7154 } 7155 } 7156 } 7157 7158 # remember the predicted position of this token on the output line 7159 if ( $max_index_to_go > $line_start_index_to_go ) { 7160 $gnu_position_predictor = 7161 total_line_length( $line_start_index_to_go, $max_index_to_go ); 7162 } 7163 else { 7164 $gnu_position_predictor = 7165 $space_count + $token_lengths_to_go[$max_index_to_go]; 7166 } 7167 7168 # store the indentation object for this token 7169 # this allows us to manipulate the leading whitespace 7170 # (in case we have to reduce indentation to fit a line) without 7171 # having to change any token values 7172 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index]; 7173 $reduced_spaces_to_go[$max_index_to_go] = 7174 ( $max_gnu_stack_index > 0 && $ci_level ) 7175 ? $gnu_stack[ $max_gnu_stack_index - 1 ] 7176 : $gnu_stack[$max_gnu_stack_index]; 7177 return; 7178} 7179 7180sub check_for_long_gnu_style_lines { 7181 7182 # look at the current estimated maximum line length, and 7183 # remove some whitespace if it exceeds the desired maximum 7184 7185 # this is only for the '-lp' style 7186 return unless ($rOpts_line_up_parentheses); 7187 7188 # nothing can be done if no stack items defined for this line 7189 return if ( $max_gnu_item_index == UNDEFINED_INDEX ); 7190 7191 # see if we have exceeded the maximum desired line length 7192 # keep 2 extra free because they are needed in some cases 7193 # (result of trial-and-error testing) 7194 my $spaces_needed = 7195 $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2; 7196 7197 return if ( $spaces_needed <= 0 ); 7198 7199 # We are over the limit, so try to remove a requested number of 7200 # spaces from leading whitespace. We are only allowed to remove 7201 # from whitespace items created on this batch, since others have 7202 # already been used and cannot be undone. 7203 my @candidates = (); 7204 my $i; 7205 7206 # loop over all whitespace items created for the current batch 7207 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { 7208 my $item = $gnu_item_list[$i]; 7209 7210 # item must still be open to be a candidate (otherwise it 7211 # cannot influence the current token) 7212 next if ( $item->get_CLOSED() >= 0 ); 7213 7214 my $available_spaces = $item->get_AVAILABLE_SPACES(); 7215 7216 if ( $available_spaces > 0 ) { 7217 push( @candidates, [ $i, $available_spaces ] ); 7218 } 7219 } 7220 7221 return unless (@candidates); 7222 7223 # sort by available whitespace so that we can remove whitespace 7224 # from the maximum available first 7225 @candidates = sort { $b->[1] <=> $a->[1] } @candidates; 7226 7227 # keep removing whitespace until we are done or have no more 7228 my $candidate; 7229 foreach $candidate (@candidates) { 7230 my ( $i, $available_spaces ) = @{$candidate}; 7231 my $deleted_spaces = 7232 ( $available_spaces > $spaces_needed ) 7233 ? $spaces_needed 7234 : $available_spaces; 7235 7236 # remove the incremental space from this item 7237 $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces); 7238 7239 my $i_debug = $i; 7240 7241 # update the leading whitespace of this item and all items 7242 # that came after it 7243 for ( ; $i <= $max_gnu_item_index ; $i++ ) { 7244 7245 my $old_spaces = $gnu_item_list[$i]->get_SPACES(); 7246 if ( $old_spaces >= $deleted_spaces ) { 7247 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces); 7248 } 7249 7250 # shouldn't happen except for code bug: 7251 else { 7252 my $level = $gnu_item_list[$i_debug]->get_LEVEL(); 7253 my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL(); 7254 my $old_level = $gnu_item_list[$i]->get_LEVEL(); 7255 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL(); 7256 warning( 7257"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n" 7258 ); 7259 report_definite_bug(); 7260 } 7261 } 7262 $gnu_position_predictor -= $deleted_spaces; 7263 $spaces_needed -= $deleted_spaces; 7264 last unless ( $spaces_needed > 0 ); 7265 } 7266} 7267 7268sub finish_lp_batch { 7269 7270 # This routine is called once after each each output stream batch is 7271 # finished to undo indentation for all incomplete -lp 7272 # indentation levels. It is too risky to leave a level open, 7273 # because then we can't backtrack in case of a long line to follow. 7274 # This means that comments and blank lines will disrupt this 7275 # indentation style. But the vertical aligner may be able to 7276 # get the space back if there are side comments. 7277 7278 # this is only for the 'lp' style 7279 return unless ($rOpts_line_up_parentheses); 7280 7281 # nothing can be done if no stack items defined for this line 7282 return if ( $max_gnu_item_index == UNDEFINED_INDEX ); 7283 7284 # loop over all whitespace items created for the current batch 7285 my $i; 7286 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { 7287 my $item = $gnu_item_list[$i]; 7288 7289 # only look for open items 7290 next if ( $item->get_CLOSED() >= 0 ); 7291 7292 # Tentatively remove all of the available space 7293 # (The vertical aligner will try to get it back later) 7294 my $available_spaces = $item->get_AVAILABLE_SPACES(); 7295 if ( $available_spaces > 0 ) { 7296 7297 # delete incremental space for this item 7298 $gnu_item_list[$i] 7299 ->tentatively_decrease_AVAILABLE_SPACES($available_spaces); 7300 7301 # Reduce the total indentation space of any nodes that follow 7302 # Note that any such nodes must necessarily be dependents 7303 # of this node. 7304 foreach ( $i + 1 .. $max_gnu_item_index ) { 7305 $gnu_item_list[$_]->decrease_SPACES($available_spaces); 7306 } 7307 } 7308 } 7309 return; 7310} 7311 7312sub reduce_lp_indentation { 7313 7314 # reduce the leading whitespace at token $i if possible by $spaces_needed 7315 # (a large value of $spaces_needed will remove all excess space) 7316 # NOTE: to be called from scan_list only for a sequence of tokens 7317 # contained between opening and closing parens/braces/brackets 7318 7319 my ( $i, $spaces_wanted ) = @_; 7320 my $deleted_spaces = 0; 7321 7322 my $item = $leading_spaces_to_go[$i]; 7323 my $available_spaces = $item->get_AVAILABLE_SPACES(); 7324 7325 if ( 7326 $available_spaces > 0 7327 && ( ( $spaces_wanted <= $available_spaces ) 7328 || !$item->get_HAVE_CHILD() ) 7329 ) 7330 { 7331 7332 # we'll remove these spaces, but mark them as recoverable 7333 $deleted_spaces = 7334 $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted); 7335 } 7336 7337 return $deleted_spaces; 7338} 7339 7340sub token_sequence_length { 7341 7342 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend 7343 # returns 0 if $ibeg > $iend (shouldn't happen) 7344 my ( $ibeg, $iend ) = @_; 7345 return 0 if ( $iend < 0 || $ibeg > $iend ); 7346 return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 ); 7347 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg]; 7348} 7349 7350sub total_line_length { 7351 7352 # return length of a line of tokens ($ibeg .. $iend) 7353 my ( $ibeg, $iend ) = @_; 7354 return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend ); 7355} 7356 7357sub maximum_line_length_for_level { 7358 7359 # return maximum line length for line starting with a given level 7360 my $maximum_line_length = $rOpts_maximum_line_length; 7361 7362 # Modify if -vmll option is selected 7363 if ($rOpts_variable_maximum_line_length) { 7364 my $level = shift; 7365 if ( $level < 0 ) { $level = 0 } 7366 $maximum_line_length += $level * $rOpts_indent_columns; 7367 } 7368 return $maximum_line_length; 7369} 7370 7371sub maximum_line_length { 7372 7373 # return maximum line length for line starting with the token at given index 7374 return maximum_line_length_for_level( $levels_to_go[ $_[0] ] ); 7375 7376} 7377 7378sub excess_line_length { 7379 7380 # return number of characters by which a line of tokens ($ibeg..$iend) 7381 # exceeds the allowable line length. 7382 my ( $ibeg, $iend ) = @_; 7383 return total_line_length( $ibeg, $iend ) - maximum_line_length($ibeg); 7384} 7385 7386sub finish_formatting { 7387 7388 # flush buffer and write any informative messages 7389 my $self = shift; 7390 7391 flush(); 7392 $file_writer_object->decrement_output_line_number() 7393 ; # fix up line number since it was incremented 7394 we_are_at_the_last_line(); 7395 if ( $added_semicolon_count > 0 ) { 7396 my $first = ( $added_semicolon_count > 1 ) ? "First" : ""; 7397 my $what = 7398 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; 7399 write_logfile_entry("$added_semicolon_count $what added:\n"); 7400 write_logfile_entry( 7401 " $first at input line $first_added_semicolon_at\n"); 7402 7403 if ( $added_semicolon_count > 1 ) { 7404 write_logfile_entry( 7405 " Last at input line $last_added_semicolon_at\n"); 7406 } 7407 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n"); 7408 write_logfile_entry("\n"); 7409 } 7410 7411 if ( $deleted_semicolon_count > 0 ) { 7412 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : ""; 7413 my $what = 7414 ( $deleted_semicolon_count > 1 ) 7415 ? "semicolons were" 7416 : "semicolon was"; 7417 write_logfile_entry( 7418 "$deleted_semicolon_count unnecessary $what deleted:\n"); 7419 write_logfile_entry( 7420 " $first at input line $first_deleted_semicolon_at\n"); 7421 7422 if ( $deleted_semicolon_count > 1 ) { 7423 write_logfile_entry( 7424 " Last at input line $last_deleted_semicolon_at\n"); 7425 } 7426 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n"); 7427 write_logfile_entry("\n"); 7428 } 7429 7430 if ( $embedded_tab_count > 0 ) { 7431 my $first = ( $embedded_tab_count > 1 ) ? "First" : ""; 7432 my $what = 7433 ( $embedded_tab_count > 1 ) 7434 ? "quotes or patterns" 7435 : "quote or pattern"; 7436 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n"); 7437 write_logfile_entry( 7438"This means the display of this script could vary with device or software\n" 7439 ); 7440 write_logfile_entry(" $first at input line $first_embedded_tab_at\n"); 7441 7442 if ( $embedded_tab_count > 1 ) { 7443 write_logfile_entry( 7444 " Last at input line $last_embedded_tab_at\n"); 7445 } 7446 write_logfile_entry("\n"); 7447 } 7448 7449 if ($first_tabbing_disagreement) { 7450 write_logfile_entry( 7451"First indentation disagreement seen at input line $first_tabbing_disagreement\n" 7452 ); 7453 } 7454 7455 if ($in_tabbing_disagreement) { 7456 write_logfile_entry( 7457"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n" 7458 ); 7459 } 7460 else { 7461 7462 if ($last_tabbing_disagreement) { 7463 7464 write_logfile_entry( 7465"Last indentation disagreement seen at input line $last_tabbing_disagreement\n" 7466 ); 7467 } 7468 else { 7469 write_logfile_entry("No indentation disagreement seen\n"); 7470 } 7471 } 7472 if ($first_tabbing_disagreement) { 7473 write_logfile_entry( 7474"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n" 7475 ); 7476 } 7477 write_logfile_entry("\n"); 7478 7479 $vertical_aligner_object->report_anything_unusual(); 7480 7481 $file_writer_object->report_line_length_errors(); 7482} 7483 7484sub check_options { 7485 7486 # This routine is called to check the Opts hash after it is defined 7487 7488 ($rOpts) = @_; 7489 7490 make_static_block_comment_pattern(); 7491 make_static_side_comment_pattern(); 7492 make_closing_side_comment_prefix(); 7493 make_closing_side_comment_list_pattern(); 7494 $format_skipping_pattern_begin = 7495 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' ); 7496 $format_skipping_pattern_end = 7497 make_format_skipping_pattern( 'format-skipping-end', '#>>>' ); 7498 7499 # If closing side comments ARE selected, then we can safely 7500 # delete old closing side comments unless closing side comment 7501 # warnings are requested. This is a good idea because it will 7502 # eliminate any old csc's which fall below the line count threshold. 7503 # We cannot do this if warnings are turned on, though, because we 7504 # might delete some text which has been added. So that must 7505 # be handled when comments are created. 7506 if ( $rOpts->{'closing-side-comments'} ) { 7507 if ( !$rOpts->{'closing-side-comment-warnings'} ) { 7508 $rOpts->{'delete-closing-side-comments'} = 1; 7509 } 7510 } 7511 7512 # If closing side comments ARE NOT selected, but warnings ARE 7513 # selected and we ARE DELETING csc's, then we will pretend to be 7514 # adding with a huge interval. This will force the comments to be 7515 # generated for comparison with the old comments, but not added. 7516 elsif ( $rOpts->{'closing-side-comment-warnings'} ) { 7517 if ( $rOpts->{'delete-closing-side-comments'} ) { 7518 $rOpts->{'delete-closing-side-comments'} = 0; 7519 $rOpts->{'closing-side-comments'} = 1; 7520 $rOpts->{'closing-side-comment-interval'} = 100000000; 7521 } 7522 } 7523 7524 make_bli_pattern(); 7525 make_block_brace_vertical_tightness_pattern(); 7526 7527 if ( $rOpts->{'line-up-parentheses'} ) { 7528 7529 if ( $rOpts->{'indent-only'} 7530 || !$rOpts->{'add-newlines'} 7531 || !$rOpts->{'delete-old-newlines'} ) 7532 { 7533 Perl::Tidy::Warn <<EOM; 7534----------------------------------------------------------------------- 7535Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp 7536 7537The -lp indentation logic requires that perltidy be able to coordinate 7538arbitrarily large numbers of line breakpoints. This isn't possible 7539with these flags. Sometimes an acceptable workaround is to use -wocb=3 7540----------------------------------------------------------------------- 7541EOM 7542 $rOpts->{'line-up-parentheses'} = 0; 7543 } 7544 } 7545 7546 # At present, tabs are not compatible with the line-up-parentheses style 7547 # (it would be possible to entab the total leading whitespace 7548 # just prior to writing the line, if desired). 7549 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) { 7550 Perl::Tidy::Warn <<EOM; 7551Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et. 7552EOM 7553 $rOpts->{'tabs'} = 0; 7554 } 7555 7556 # Likewise, tabs are not compatible with outdenting.. 7557 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { 7558 Perl::Tidy::Warn <<EOM; 7559Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et. 7560EOM 7561 $rOpts->{'tabs'} = 0; 7562 } 7563 7564 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { 7565 Perl::Tidy::Warn <<EOM; 7566Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et. 7567EOM 7568 $rOpts->{'tabs'} = 0; 7569 } 7570 7571 if ( !$rOpts->{'space-for-semicolon'} ) { 7572 $want_left_space{'f'} = -1; 7573 } 7574 7575 if ( $rOpts->{'space-terminal-semicolon'} ) { 7576 $want_left_space{';'} = 1; 7577 } 7578 7579 # implement outdenting preferences for keywords 7580 %outdent_keyword = (); 7581 unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) { 7582 @_ = qw(next last redo goto return); # defaults 7583 } 7584 7585 # FUTURE: if not a keyword, assume that it is an identifier 7586 foreach (@_) { 7587 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) { 7588 $outdent_keyword{$_} = 1; 7589 } 7590 else { 7591 Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword"; 7592 } 7593 } 7594 7595 # implement user whitespace preferences 7596 if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) { 7597 @want_left_space{@_} = (1) x scalar(@_); 7598 } 7599 7600 if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) { 7601 @want_right_space{@_} = (1) x scalar(@_); 7602 } 7603 7604 if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) { 7605 @want_left_space{@_} = (-1) x scalar(@_); 7606 } 7607 7608 if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) { 7609 @want_right_space{@_} = (-1) x scalar(@_); 7610 } 7611 if ( $rOpts->{'dump-want-left-space'} ) { 7612 dump_want_left_space(*STDOUT); 7613 Perl::Tidy::Exit 0; 7614 } 7615 7616 if ( $rOpts->{'dump-want-right-space'} ) { 7617 dump_want_right_space(*STDOUT); 7618 Perl::Tidy::Exit 0; 7619 } 7620 7621 # default keywords for which space is introduced before an opening paren 7622 # (at present, including them messes up vertical alignment) 7623 @_ = qw(my local our and or err eq ne if else elsif until 7624 unless while for foreach return switch case given when); 7625 @space_after_keyword{@_} = (1) x scalar(@_); 7626 7627 # first remove any or all of these if desired 7628 if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) { 7629 7630 # -nsak='*' selects all the above keywords 7631 if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) } 7632 @space_after_keyword{@_} = (0) x scalar(@_); 7633 } 7634 7635 # then allow user to add to these defaults 7636 if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) { 7637 @space_after_keyword{@_} = (1) x scalar(@_); 7638 } 7639 7640 # implement user break preferences 7641 my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | & 7642 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= 7643 . : ? && || and or err xor 7644 ); 7645 7646 my $break_after = sub { 7647 foreach my $tok (@_) { 7648 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: 7649 my $lbs = $left_bond_strength{$tok}; 7650 my $rbs = $right_bond_strength{$tok}; 7651 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { 7652 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = 7653 ( $lbs, $rbs ); 7654 } 7655 } 7656 }; 7657 7658 my $break_before = sub { 7659 foreach my $tok (@_) { 7660 my $lbs = $left_bond_strength{$tok}; 7661 my $rbs = $right_bond_strength{$tok}; 7662 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { 7663 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = 7664 ( $lbs, $rbs ); 7665 } 7666 } 7667 }; 7668 7669 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); 7670 $break_before->(@all_operators) 7671 if ( $rOpts->{'break-before-all-operators'} ); 7672 7673 $break_after->( split_words( $rOpts->{'want-break-after'} ) ); 7674 $break_before->( split_words( $rOpts->{'want-break-before'} ) ); 7675 7676 # make note if breaks are before certain key types 7677 %want_break_before = (); 7678 foreach my $tok ( @all_operators, ',' ) { 7679 $want_break_before{$tok} = 7680 $left_bond_strength{$tok} < $right_bond_strength{$tok}; 7681 } 7682 7683 # Coordinate ?/: breaks, which must be similar 7684 if ( !$want_break_before{':'} ) { 7685 $want_break_before{'?'} = $want_break_before{':'}; 7686 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; 7687 $left_bond_strength{'?'} = NO_BREAK; 7688 } 7689 7690 # Define here tokens which may follow the closing brace of a do statement 7691 # on the same line, as in: 7692 # } while ( $something); 7693 @_ = qw(until while unless if ; : ); 7694 push @_, ','; 7695 @is_do_follower{@_} = (1) x scalar(@_); 7696 7697 # These tokens may follow the closing brace of an if or elsif block. 7698 # In other words, for cuddled else we want code to look like: 7699 # } elsif ( $something) { 7700 # } else { 7701 if ( $rOpts->{'cuddled-else'} ) { 7702 @_ = qw(else elsif); 7703 @is_if_brace_follower{@_} = (1) x scalar(@_); 7704 } 7705 else { 7706 %is_if_brace_follower = (); 7707 } 7708 7709 # nothing can follow the closing curly of an else { } block: 7710 %is_else_brace_follower = (); 7711 7712 # what can follow a multi-line anonymous sub definition closing curly: 7713 @_ = qw# ; : => or and && || ~~ !~~ ) #; 7714 push @_, ','; 7715 @is_anon_sub_brace_follower{@_} = (1) x scalar(@_); 7716 7717 # what can follow a one-line anonynomous sub closing curly: 7718 # one-line anonumous subs also have ']' here... 7719 # see tk3.t and PP.pm 7720 @_ = qw# ; : => or and && || ) ] ~~ !~~ #; 7721 push @_, ','; 7722 @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_); 7723 7724 # What can follow a closing curly of a block 7725 # which is not an if/elsif/else/do/sort/map/grep/eval/sub 7726 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' 7727 @_ = qw# ; : => or and && || ) #; 7728 push @_, ','; 7729 7730 # allow cuddled continue if cuddled else is specified 7731 if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; } 7732 7733 @is_other_brace_follower{@_} = (1) x scalar(@_); 7734 7735 $right_bond_strength{'{'} = WEAK; 7736 $left_bond_strength{'{'} = VERY_STRONG; 7737 7738 # make -l=0 equal to -l=infinite 7739 if ( !$rOpts->{'maximum-line-length'} ) { 7740 $rOpts->{'maximum-line-length'} = 1000000; 7741 } 7742 7743 # make -lbl=0 equal to -lbl=infinite 7744 if ( !$rOpts->{'long-block-line-count'} ) { 7745 $rOpts->{'long-block-line-count'} = 1000000; 7746 } 7747 7748 my $ole = $rOpts->{'output-line-ending'}; 7749 if ($ole) { 7750 my %endings = ( 7751 dos => "\015\012", 7752 win => "\015\012", 7753 mac => "\015", 7754 unix => "\012", 7755 ); 7756 $ole = lc $ole; 7757 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { 7758 my $str = join " ", keys %endings; 7759 Perl::Tidy::Die <<EOM; 7760Unrecognized line ending '$ole'; expecting one of: $str 7761EOM 7762 } 7763 if ( $rOpts->{'preserve-line-endings'} ) { 7764 Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n"; 7765 $rOpts->{'preserve-line-endings'} = undef; 7766 } 7767 } 7768 7769 # hashes used to simplify setting whitespace 7770 %tightness = ( 7771 '{' => $rOpts->{'brace-tightness'}, 7772 '}' => $rOpts->{'brace-tightness'}, 7773 '(' => $rOpts->{'paren-tightness'}, 7774 ')' => $rOpts->{'paren-tightness'}, 7775 '[' => $rOpts->{'square-bracket-tightness'}, 7776 ']' => $rOpts->{'square-bracket-tightness'}, 7777 ); 7778 %matching_token = ( 7779 '{' => '}', 7780 '(' => ')', 7781 '[' => ']', 7782 '?' => ':', 7783 ); 7784 7785 # frequently used parameters 7786 $rOpts_add_newlines = $rOpts->{'add-newlines'}; 7787 $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; 7788 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; 7789 $rOpts_block_brace_vertical_tightness = 7790 $rOpts->{'block-brace-vertical-tightness'}; 7791 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'}; 7792 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; 7793 $rOpts_break_at_old_ternary_breakpoints = 7794 $rOpts->{'break-at-old-ternary-breakpoints'}; 7795 $rOpts_break_at_old_attribute_breakpoints = 7796 $rOpts->{'break-at-old-attribute-breakpoints'}; 7797 $rOpts_break_at_old_comma_breakpoints = 7798 $rOpts->{'break-at-old-comma-breakpoints'}; 7799 $rOpts_break_at_old_keyword_breakpoints = 7800 $rOpts->{'break-at-old-keyword-breakpoints'}; 7801 $rOpts_break_at_old_logical_breakpoints = 7802 $rOpts->{'break-at-old-logical-breakpoints'}; 7803 $rOpts_closing_side_comment_else_flag = 7804 $rOpts->{'closing-side-comment-else-flag'}; 7805 $rOpts_closing_side_comment_maximum_text = 7806 $rOpts->{'closing-side-comment-maximum-text'}; 7807 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; 7808 $rOpts_cuddled_else = $rOpts->{'cuddled-else'}; 7809 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; 7810 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; 7811 $rOpts_indent_columns = $rOpts->{'indent-columns'}; 7812 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; 7813 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; 7814 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; 7815 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'}; 7816 7817 $rOpts_variable_maximum_line_length = 7818 $rOpts->{'variable-maximum-line-length'}; 7819 $rOpts_short_concatenation_item_length = 7820 $rOpts->{'short-concatenation-item-length'}; 7821 7822 $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'}; 7823 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; 7824 $rOpts_format_skipping = $rOpts->{'format-skipping'}; 7825 $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; 7826 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; 7827 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; 7828 $rOpts_ignore_side_comment_lengths = 7829 $rOpts->{'ignore-side-comment-lengths'}; 7830 7831 # Note that both opening and closing tokens can access the opening 7832 # and closing flags of their container types. 7833 %opening_vertical_tightness = ( 7834 '(' => $rOpts->{'paren-vertical-tightness'}, 7835 '{' => $rOpts->{'brace-vertical-tightness'}, 7836 '[' => $rOpts->{'square-bracket-vertical-tightness'}, 7837 ')' => $rOpts->{'paren-vertical-tightness'}, 7838 '}' => $rOpts->{'brace-vertical-tightness'}, 7839 ']' => $rOpts->{'square-bracket-vertical-tightness'}, 7840 ); 7841 7842 %closing_vertical_tightness = ( 7843 '(' => $rOpts->{'paren-vertical-tightness-closing'}, 7844 '{' => $rOpts->{'brace-vertical-tightness-closing'}, 7845 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, 7846 ')' => $rOpts->{'paren-vertical-tightness-closing'}, 7847 '}' => $rOpts->{'brace-vertical-tightness-closing'}, 7848 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, 7849 ); 7850 7851 $rOpts_tight_secret_operators = $rOpts->{'tight-secret-operators'}; 7852 7853 # assume flag for '>' same as ')' for closing qw quotes 7854 %closing_token_indentation = ( 7855 ')' => $rOpts->{'closing-paren-indentation'}, 7856 '}' => $rOpts->{'closing-brace-indentation'}, 7857 ']' => $rOpts->{'closing-square-bracket-indentation'}, 7858 '>' => $rOpts->{'closing-paren-indentation'}, 7859 ); 7860 7861 # flag indicating if any closing tokens are indented 7862 $some_closing_token_indentation = 7863 $rOpts->{'closing-paren-indentation'} 7864 || $rOpts->{'closing-brace-indentation'} 7865 || $rOpts->{'closing-square-bracket-indentation'} 7866 || $rOpts->{'indent-closing-brace'}; 7867 7868 %opening_token_right = ( 7869 '(' => $rOpts->{'opening-paren-right'}, 7870 '{' => $rOpts->{'opening-hash-brace-right'}, 7871 '[' => $rOpts->{'opening-square-bracket-right'}, 7872 ); 7873 7874 %stack_opening_token = ( 7875 '(' => $rOpts->{'stack-opening-paren'}, 7876 '{' => $rOpts->{'stack-opening-hash-brace'}, 7877 '[' => $rOpts->{'stack-opening-square-bracket'}, 7878 ); 7879 7880 %stack_closing_token = ( 7881 ')' => $rOpts->{'stack-closing-paren'}, 7882 '}' => $rOpts->{'stack-closing-hash-brace'}, 7883 ']' => $rOpts->{'stack-closing-square-bracket'}, 7884 ); 7885 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'}; 7886} 7887 7888sub make_static_block_comment_pattern { 7889 7890 # create the pattern used to identify static block comments 7891 $static_block_comment_pattern = '^\s*##'; 7892 7893 # allow the user to change it 7894 if ( $rOpts->{'static-block-comment-prefix'} ) { 7895 my $prefix = $rOpts->{'static-block-comment-prefix'}; 7896 $prefix =~ s/^\s*//; 7897 my $pattern = $prefix; 7898 7899 # user may give leading caret to force matching left comments only 7900 if ( $prefix !~ /^\^#/ ) { 7901 if ( $prefix !~ /^#/ ) { 7902 Perl::Tidy::Die 7903"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"; 7904 } 7905 $pattern = '^\s*' . $prefix; 7906 } 7907 eval "'##'=~/$pattern/"; 7908 if ($@) { 7909 Perl::Tidy::Die 7910"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"; 7911 } 7912 $static_block_comment_pattern = $pattern; 7913 } 7914} 7915 7916sub make_format_skipping_pattern { 7917 my ( $opt_name, $default ) = @_; 7918 my $param = $rOpts->{$opt_name}; 7919 unless ($param) { $param = $default } 7920 $param =~ s/^\s*//; 7921 if ( $param !~ /^#/ ) { 7922 Perl::Tidy::Die 7923 "ERROR: the $opt_name parameter '$param' must begin with '#'\n"; 7924 } 7925 my $pattern = '^' . $param . '\s'; 7926 eval "'#'=~/$pattern/"; 7927 if ($@) { 7928 Perl::Tidy::Die 7929"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"; 7930 } 7931 return $pattern; 7932} 7933 7934sub make_closing_side_comment_list_pattern { 7935 7936 # turn any input list into a regex for recognizing selected block types 7937 $closing_side_comment_list_pattern = '^\w+'; 7938 if ( defined( $rOpts->{'closing-side-comment-list'} ) 7939 && $rOpts->{'closing-side-comment-list'} ) 7940 { 7941 $closing_side_comment_list_pattern = 7942 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} ); 7943 } 7944} 7945 7946sub make_bli_pattern { 7947 7948 if ( defined( $rOpts->{'brace-left-and-indent-list'} ) 7949 && $rOpts->{'brace-left-and-indent-list'} ) 7950 { 7951 $bli_list_string = $rOpts->{'brace-left-and-indent-list'}; 7952 } 7953 7954 $bli_pattern = make_block_pattern( '-blil', $bli_list_string ); 7955} 7956 7957sub make_block_brace_vertical_tightness_pattern { 7958 7959 # turn any input list into a regex for recognizing selected block types 7960 $block_brace_vertical_tightness_pattern = 7961 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; 7962 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} ) 7963 && $rOpts->{'block-brace-vertical-tightness-list'} ) 7964 { 7965 $block_brace_vertical_tightness_pattern = 7966 make_block_pattern( '-bbvtl', 7967 $rOpts->{'block-brace-vertical-tightness-list'} ); 7968 } 7969} 7970 7971sub make_block_pattern { 7972 7973 # given a string of block-type keywords, return a regex to match them 7974 # The only tricky part is that labels are indicated with a single ':' 7975 # and the 'sub' token text may have additional text after it (name of 7976 # sub). 7977 # 7978 # Example: 7979 # 7980 # input string: "if else elsif unless while for foreach do : sub"; 7981 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; 7982 7983 my ( $abbrev, $string ) = @_; 7984 my @list = split_words($string); 7985 my @words = (); 7986 my %seen; 7987 for my $i (@list) { 7988 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern } 7989 next if $seen{$i}; 7990 $seen{$i} = 1; 7991 if ( $i eq 'sub' ) { 7992 } 7993 elsif ( $i eq ';' ) { 7994 push @words, ';'; 7995 } 7996 elsif ( $i eq '{' ) { 7997 push @words, '\{'; 7998 } 7999 elsif ( $i eq ':' ) { 8000 push @words, '\w+:'; 8001 } 8002 elsif ( $i =~ /^\w/ ) { 8003 push @words, $i; 8004 } 8005 else { 8006 Perl::Tidy::Warn 8007 "unrecognized block type $i after $abbrev, ignoring\n"; 8008 } 8009 } 8010 my $pattern = '(' . join( '|', @words ) . ')$'; 8011 if ( $seen{'sub'} ) { 8012 $pattern = '(' . $pattern . '|sub)'; 8013 } 8014 $pattern = '^' . $pattern; 8015 return $pattern; 8016} 8017 8018sub make_static_side_comment_pattern { 8019 8020 # create the pattern used to identify static side comments 8021 $static_side_comment_pattern = '^##'; 8022 8023 # allow the user to change it 8024 if ( $rOpts->{'static-side-comment-prefix'} ) { 8025 my $prefix = $rOpts->{'static-side-comment-prefix'}; 8026 $prefix =~ s/^\s*//; 8027 my $pattern = '^' . $prefix; 8028 eval "'##'=~/$pattern/"; 8029 if ($@) { 8030 Perl::Tidy::Die 8031"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"; 8032 } 8033 $static_side_comment_pattern = $pattern; 8034 } 8035} 8036 8037sub make_closing_side_comment_prefix { 8038 8039 # Be sure we have a valid closing side comment prefix 8040 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'}; 8041 my $csc_prefix_pattern; 8042 if ( !defined($csc_prefix) ) { 8043 $csc_prefix = '## end'; 8044 $csc_prefix_pattern = '^##\s+end'; 8045 } 8046 else { 8047 my $test_csc_prefix = $csc_prefix; 8048 if ( $test_csc_prefix !~ /^#/ ) { 8049 $test_csc_prefix = '#' . $test_csc_prefix; 8050 } 8051 8052 # make a regex to recognize the prefix 8053 my $test_csc_prefix_pattern = $test_csc_prefix; 8054 8055 # escape any special characters 8056 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g; 8057 8058 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern; 8059 8060 # allow exact number of intermediate spaces to vary 8061 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g; 8062 8063 # make sure we have a good pattern 8064 # if we fail this we probably have an error in escaping 8065 # characters. 8066 eval "'##'=~/$test_csc_prefix_pattern/"; 8067 if ($@) { 8068 8069 # shouldn't happen..must have screwed up escaping, above 8070 report_definite_bug(); 8071 Perl::Tidy::Warn 8072"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"; 8073 8074 # just warn and keep going with defaults 8075 Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n"; 8076 Perl::Tidy::Warn 8077 "Using default -cscp instead; please check output\n"; 8078 } 8079 else { 8080 $csc_prefix = $test_csc_prefix; 8081 $csc_prefix_pattern = $test_csc_prefix_pattern; 8082 } 8083 } 8084 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix; 8085 $closing_side_comment_prefix_pattern = $csc_prefix_pattern; 8086} 8087 8088sub dump_want_left_space { 8089 my $fh = shift; 8090 local $" = "\n"; 8091 print $fh <<EOM; 8092These values are the main control of whitespace to the left of a token type; 8093They may be altered with the -wls parameter. 8094For a list of token types, use perltidy --dump-token-types (-dtt) 8095 1 means the token wants a space to its left 8096-1 means the token does not want a space to its left 8097------------------------------------------------------------------------ 8098EOM 8099 foreach ( sort keys %want_left_space ) { 8100 print $fh "$_\t$want_left_space{$_}\n"; 8101 } 8102} 8103 8104sub dump_want_right_space { 8105 my $fh = shift; 8106 local $" = "\n"; 8107 print $fh <<EOM; 8108These values are the main control of whitespace to the right of a token type; 8109They may be altered with the -wrs parameter. 8110For a list of token types, use perltidy --dump-token-types (-dtt) 8111 1 means the token wants a space to its right 8112-1 means the token does not want a space to its right 8113------------------------------------------------------------------------ 8114EOM 8115 foreach ( sort keys %want_right_space ) { 8116 print $fh "$_\t$want_right_space{$_}\n"; 8117 } 8118} 8119 8120{ # begin is_essential_whitespace 8121 8122 my %is_sort_grep_map; 8123 my %is_for_foreach; 8124 8125 BEGIN { 8126 8127 @_ = qw(sort grep map); 8128 @is_sort_grep_map{@_} = (1) x scalar(@_); 8129 8130 @_ = qw(for foreach); 8131 @is_for_foreach{@_} = (1) x scalar(@_); 8132 8133 } 8134 8135 sub is_essential_whitespace { 8136 8137 # Essential whitespace means whitespace which cannot be safely deleted 8138 # without risking the introduction of a syntax error. 8139 # We are given three tokens and their types: 8140 # ($tokenl, $typel) is the token to the left of the space in question 8141 # ($tokenr, $typer) is the token to the right of the space in question 8142 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl 8143 # 8144 # This is a slow routine but is not needed too often except when -mangle 8145 # is used. 8146 # 8147 # Note: This routine should almost never need to be changed. It is 8148 # for avoiding syntax problems rather than for formatting. 8149 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_; 8150 8151 my $result = 8152 8153 # never combine two bare words or numbers 8154 # examples: and ::ok(1) 8155 # return ::spw(...) 8156 # for bla::bla:: abc 8157 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl 8158 # $input eq"quit" to make $inputeq"quit" 8159 # my $size=-s::SINK if $file; <==OK but we won't do it 8160 # don't join something like: for bla::bla:: abc 8161 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl 8162 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' ) 8163 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) ) 8164 8165 # do not combine a number with a concatination dot 8166 # example: pom.caputo: 8167 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n"); 8168 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) ) 8169 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) ) 8170 8171 # do not join a minus with a bare word, because you might form 8172 # a file test operator. Example from Complex.pm: 8173 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test. 8174 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) ) 8175 8176 # and something like this could become ambiguous without space 8177 # after the '-': 8178 # use constant III=>1; 8179 # $a = $b - III; 8180 # and even this: 8181 # $a = - III; 8182 || ( ( $tokenl eq '-' ) 8183 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) ) 8184 8185 # '= -' should not become =- or you will get a warning 8186 # about reversed -= 8187 # || ($tokenr eq '-') 8188 8189 # keep a space between a quote and a bareword to prevent the 8190 # bareword from becoming a quote modifier. 8191 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) 8192 8193 # keep a space between a token ending in '$' and any word; 8194 # this caused trouble: "die @$ if $@" 8195 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ ) 8196 && ( $tokenr =~ /^[a-zA-Z_]/ ) ) 8197 8198 # perl is very fussy about spaces before << 8199 || ( $tokenr =~ /^\<\</ ) 8200 8201 # avoid combining tokens to create new meanings. Example: 8202 # $a+ +$b must not become $a++$b 8203 || ( $is_digraph{ $tokenl . $tokenr } ) 8204 || ( $is_trigraph{ $tokenl . $tokenr } ) 8205 8206 # another example: do not combine these two &'s: 8207 # allow_options & &OPT_EXECCGI 8208 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } ) 8209 8210 # don't combine $$ or $# with any alphanumeric 8211 # (testfile mangle.t with --mangle) 8212 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) ) 8213 8214 # retain any space after possible filehandle 8215 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle) 8216 || ( $typel eq 'Z' ) 8217 8218 # Perl is sensitive to whitespace after the + here: 8219 # $b = xvals $a + 0.1 * yvals $a; 8220 || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ ) 8221 8222 # keep paren separate in 'use Foo::Bar ()' 8223 || ( $tokenr eq '(' 8224 && $typel eq 'w' 8225 && $typell eq 'k' 8226 && $tokenll eq 'use' ) 8227 8228 # keep any space between filehandle and paren: 8229 # file mangle.t with --mangle: 8230 || ( $typel eq 'Y' && $tokenr eq '(' ) 8231 8232 # retain any space after here doc operator ( hereerr.t) 8233 || ( $typel eq 'h' ) 8234 8235 # be careful with a space around ++ and --, to avoid ambiguity as to 8236 # which token it applies 8237 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) ) 8238 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) ) 8239 8240 # need space after foreach my; for example, this will fail in 8241 # older versions of Perl: 8242 # foreach my$ft(@filetypes)... 8243 || ( 8244 $tokenl eq 'my' 8245 8246 # /^(for|foreach)$/ 8247 && $is_for_foreach{$tokenll} 8248 && $tokenr =~ /^\$/ 8249 ) 8250 8251 # must have space between grep and left paren; "grep(" will fail 8252 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} ) 8253 8254 # don't stick numbers next to left parens, as in: 8255 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm) 8256 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) ) 8257 8258 # We must be sure that a space between a ? and a quoted string 8259 # remains if the space before the ? remains. [Loca.pm, lockarea] 8260 # ie, 8261 # $b=join $comma ? ',' : ':', @_; # ok 8262 # $b=join $comma?',' : ':', @_; # ok! 8263 # $b=join $comma ?',' : ':', @_; # error! 8264 # Not really required: 8265 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) ) 8266 8267 # do not remove space between an '&' and a bare word because 8268 # it may turn into a function evaluation, like here 8269 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm] 8270 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY); 8271 || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) 8272 8273 # space stacked labels (TODO: check if really necessary) 8274 || ( $typel eq 'J' && $typer eq 'J' ) 8275 8276 ; # the value of this long logic sequence is the result we want 8277 return $result; 8278 } 8279} 8280 8281{ 8282 my %secret_operators; 8283 my %is_leading_secret_token; 8284 8285 BEGIN { 8286 8287 # token lists for perl secret operators as compiled by Philippe Bruhat 8288 # at: https://metacpan.org/module/perlsecret 8289 %secret_operators = ( 8290 'Goatse' => [qw#= ( ) =#], #=( )= 8291 'Venus1' => [qw#0 +#], # 0+ 8292 'Venus2' => [qw#+ 0#], # +0 8293 'Enterprise' => [qw#) x ! !#], # ()x!! 8294 'Kite1' => [qw#~ ~ <>#], # ~~<> 8295 'Kite2' => [qw#~~ <>#], # ~~<> 8296 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=> 8297 ); 8298 8299 # The following operators and constants are not included because they 8300 # are normally kept tight by perltidy: 8301 # !! ~~ <~> 8302 # 8303 8304 # Make a lookup table indexed by the first token of each operator: 8305 # first token => [list, list, ...] 8306 foreach my $value ( values(%secret_operators) ) { 8307 my $tok = $value->[0]; 8308 push @{ $is_leading_secret_token{$tok} }, $value; 8309 } 8310 } 8311 8312 sub secret_operator_whitespace { 8313 8314 my ( $jmax, $rtokens, $rtoken_type, $rwhite_space_flag ) = @_; 8315 8316 # Loop over all tokens in this line 8317 my ( $j, $token, $type ); 8318 for ( $j = 0 ; $j <= $jmax ; $j++ ) { 8319 8320 $token = $$rtokens[$j]; 8321 $type = $$rtoken_type[$j]; 8322 8323 # Skip unless this token might start a secret operator 8324 next if ( $type eq 'b' ); 8325 next unless ( $is_leading_secret_token{$token} ); 8326 8327 # Loop over all secret operators with this leading token 8328 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) { 8329 my $jend = $j - 1; 8330 foreach my $tok ( @{$rpattern} ) { 8331 $jend++; 8332 $jend++ 8333 8334 if ( $jend <= $jmax && $$rtoken_type[$jend] eq 'b' ); 8335 if ( $jend > $jmax || $tok ne $$rtokens[$jend] ) { 8336 $jend = undef; 8337 last; 8338 } 8339 } 8340 8341 if ($jend) { 8342 8343 # set flags to prevent spaces within this operator 8344 for ( my $jj = $j + 1 ; $jj <= $jend ; $jj++ ) { 8345 $rwhite_space_flag->[$jj] = WS_NO; 8346 } 8347 $j = $jend; 8348 last; 8349 } 8350 } ## End Loop over all operators 8351 } ## End loop over all tokens 8352 } # End sub 8353} 8354 8355sub set_white_space_flag { 8356 8357 # This routine examines each pair of nonblank tokens and 8358 # sets values for array @white_space_flag. 8359 # 8360 # $white_space_flag[$j] is a flag indicating whether a white space 8361 # BEFORE token $j is needed, with the following values: 8362 # 8363 # WS_NO = -1 do not want a space before token $j 8364 # WS_OPTIONAL= 0 optional space or $j is a whitespace 8365 # WS_YES = 1 want a space before token $j 8366 # 8367 # 8368 # The values for the first token will be defined based 8369 # upon the contents of the "to_go" output array. 8370 # 8371 # Note: retain debug print statements because they are usually 8372 # required after adding new token types. 8373 8374 BEGIN { 8375 8376 # initialize these global hashes, which control the use of 8377 # whitespace around tokens: 8378 # 8379 # %binary_ws_rules 8380 # %want_left_space 8381 # %want_right_space 8382 # %space_after_keyword 8383 # 8384 # Many token types are identical to the tokens themselves. 8385 # See the tokenizer for a complete list. Here are some special types: 8386 # k = perl keyword 8387 # f = semicolon in for statement 8388 # m = unary minus 8389 # p = unary plus 8390 # Note that :: is excluded since it should be contained in an identifier 8391 # Note that '->' is excluded because it never gets space 8392 # parentheses and brackets are excluded since they are handled specially 8393 # curly braces are included but may be overridden by logic, such as 8394 # newline logic. 8395 8396 # NEW_TOKENS: create a whitespace rule here. This can be as 8397 # simple as adding your new letter to @spaces_both_sides, for 8398 # example. 8399 8400 @_ = qw" L { ( [ "; 8401 @is_opening_type{@_} = (1) x scalar(@_); 8402 8403 @_ = qw" R } ) ] "; 8404 @is_closing_type{@_} = (1) x scalar(@_); 8405 8406 my @spaces_both_sides = qw" 8407 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= 8408 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ 8409 &&= ||= //= <=> A k f w F n C Y U G v 8410 "; 8411 8412 my @spaces_left_side = qw" 8413 t ! ~ m p { \ h pp mm Z j 8414 "; 8415 push( @spaces_left_side, '#' ); # avoids warning message 8416 8417 my @spaces_right_side = qw" 8418 ; } ) ] R J ++ -- **= 8419 "; 8420 push( @spaces_right_side, ',' ); # avoids warning message 8421 8422 # Note that we are in a BEGIN block here. Later in processing 8423 # the values of %want_left_space and %want_right_space 8424 # may be overridden by any user settings specified by the 8425 # -wls and -wrs parameters. However the binary_whitespace_rules 8426 # are hardwired and have priority. 8427 @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); 8428 @want_right_space{@spaces_both_sides} = 8429 (1) x scalar(@spaces_both_sides); 8430 @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side); 8431 @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side); 8432 @want_left_space{@spaces_right_side} = 8433 (-1) x scalar(@spaces_right_side); 8434 @want_right_space{@spaces_right_side} = 8435 (1) x scalar(@spaces_right_side); 8436 $want_left_space{'->'} = WS_NO; 8437 $want_right_space{'->'} = WS_NO; 8438 $want_left_space{'**'} = WS_NO; 8439 $want_right_space{'**'} = WS_NO; 8440 $want_right_space{'CORE::'} = WS_NO; 8441 8442 # These binary_ws_rules are hardwired and have priority over the above 8443 # settings. It would be nice to allow adjustment by the user, 8444 # but it would be complicated to specify. 8445 # 8446 # hash type information must stay tightly bound 8447 # as in : ${xxxx} 8448 $binary_ws_rules{'i'}{'L'} = WS_NO; 8449 $binary_ws_rules{'i'}{'{'} = WS_YES; 8450 $binary_ws_rules{'k'}{'{'} = WS_YES; 8451 $binary_ws_rules{'U'}{'{'} = WS_YES; 8452 $binary_ws_rules{'i'}{'['} = WS_NO; 8453 $binary_ws_rules{'R'}{'L'} = WS_NO; 8454 $binary_ws_rules{'R'}{'{'} = WS_NO; 8455 $binary_ws_rules{'t'}{'L'} = WS_NO; 8456 $binary_ws_rules{'t'}{'{'} = WS_NO; 8457 $binary_ws_rules{'}'}{'L'} = WS_NO; 8458 $binary_ws_rules{'}'}{'{'} = WS_NO; 8459 $binary_ws_rules{'$'}{'L'} = WS_NO; 8460 $binary_ws_rules{'$'}{'{'} = WS_NO; 8461 $binary_ws_rules{'@'}{'L'} = WS_NO; 8462 $binary_ws_rules{'@'}{'{'} = WS_NO; 8463 $binary_ws_rules{'='}{'L'} = WS_YES; 8464 $binary_ws_rules{'J'}{'J'} = WS_YES; 8465 8466 # the following includes ') {' 8467 # as in : if ( xxx ) { yyy } 8468 $binary_ws_rules{']'}{'L'} = WS_NO; 8469 $binary_ws_rules{']'}{'{'} = WS_NO; 8470 $binary_ws_rules{')'}{'{'} = WS_YES; 8471 $binary_ws_rules{')'}{'['} = WS_NO; 8472 $binary_ws_rules{']'}{'['} = WS_NO; 8473 $binary_ws_rules{']'}{'{'} = WS_NO; 8474 $binary_ws_rules{'}'}{'['} = WS_NO; 8475 $binary_ws_rules{'R'}{'['} = WS_NO; 8476 8477 $binary_ws_rules{']'}{'++'} = WS_NO; 8478 $binary_ws_rules{']'}{'--'} = WS_NO; 8479 $binary_ws_rules{')'}{'++'} = WS_NO; 8480 $binary_ws_rules{')'}{'--'} = WS_NO; 8481 8482 $binary_ws_rules{'R'}{'++'} = WS_NO; 8483 $binary_ws_rules{'R'}{'--'} = WS_NO; 8484 8485 $binary_ws_rules{'i'}{'Q'} = WS_YES; 8486 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' 8487 8488 # FIXME: we could to split 'i' into variables and functions 8489 # and have no space for functions but space for variables. For now, 8490 # I have a special patch in the special rules below 8491 $binary_ws_rules{'i'}{'('} = WS_NO; 8492 8493 $binary_ws_rules{'w'}{'('} = WS_NO; 8494 $binary_ws_rules{'w'}{'{'} = WS_YES; 8495 } ## end BEGIN block 8496 8497 my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_; 8498 my ( $last_token, $last_type, $last_block_type, $token, $type, 8499 $block_type ); 8500 my (@white_space_flag); 8501 my $j_tight_closing_paren = -1; 8502 8503 if ( $max_index_to_go >= 0 ) { 8504 $token = $tokens_to_go[$max_index_to_go]; 8505 $type = $types_to_go[$max_index_to_go]; 8506 $block_type = $block_type_to_go[$max_index_to_go]; 8507 8508 #--------------------------------------------------------------- 8509 # Patch due to splitting of tokens with leading -> 8510 #--------------------------------------------------------------- 8511 # 8512 # This routine is dealing with the raw tokens from the tokenizer, 8513 # but to get started it needs the previous token, which will 8514 # have been stored in the '_to_go' arrays. 8515 # 8516 # This patch avoids requiring two iterations to 8517 # converge for cases such as the following, where a paren 8518 # comes in on a line following a variable with leading arrow: 8519 # $self->{main}->add_content_defer_opening 8520 # ($name, $wmkf, $self->{attrs}, $self); 8521 # In this case when we see the opening paren on line 2 we need 8522 # to know if the last token on the previous line had an arrow, 8523 # but it has already been split off so we have to add it back 8524 # in to avoid getting an unwanted space before the paren. 8525 if ( $type =~ /^[wi]$/ ) { 8526 my $im = $iprev_to_go[$max_index_to_go]; 8527 my $tm = ( $im >= 0 ) ? $types_to_go[$im] : ""; 8528 if ( $tm eq '->' ) { $token = $tm . $token } 8529 } 8530 8531 #--------------------------------------------------------------- 8532 # End patch due to splitting of tokens with leading -> 8533 #--------------------------------------------------------------- 8534 } 8535 else { 8536 $token = ' '; 8537 $type = 'b'; 8538 $block_type = ''; 8539 } 8540 8541 my ( $j, $ws ); 8542 8543 # main loop over all tokens to define the whitespace flags 8544 for ( $j = 0 ; $j <= $jmax ; $j++ ) { 8545 8546 if ( $$rtoken_type[$j] eq 'b' ) { 8547 $white_space_flag[$j] = WS_OPTIONAL; 8548 next; 8549 } 8550 8551 # set a default value, to be changed as needed 8552 $ws = undef; 8553 $last_token = $token; 8554 $last_type = $type; 8555 $last_block_type = $block_type; 8556 $token = $$rtokens[$j]; 8557 $type = $$rtoken_type[$j]; 8558 $block_type = $$rblock_type[$j]; 8559 8560 #--------------------------------------------------------------- 8561 # Whitespace Rules Section 1: 8562 # Handle space on the inside of opening braces. 8563 #--------------------------------------------------------------- 8564 8565 # /^[L\{\(\[]$/ 8566 if ( $is_opening_type{$last_type} ) { 8567 8568 $j_tight_closing_paren = -1; 8569 8570 # let's keep empty matched braces together: () {} [] 8571 # except for BLOCKS 8572 if ( $token eq $matching_token{$last_token} ) { 8573 if ($block_type) { 8574 $ws = WS_YES; 8575 } 8576 else { 8577 $ws = WS_NO; 8578 } 8579 } 8580 else { 8581 8582 # we're considering the right of an opening brace 8583 # tightness = 0 means always pad inside with space 8584 # tightness = 1 means pad inside if "complex" 8585 # tightness = 2 means never pad inside with space 8586 8587 my $tightness; 8588 if ( $last_type eq '{' 8589 && $last_token eq '{' 8590 && $last_block_type ) 8591 { 8592 $tightness = $rOpts_block_brace_tightness; 8593 } 8594 else { $tightness = $tightness{$last_token} } 8595 8596 #============================================================= 8597 # Patch for test problem fabrice_bug.pl 8598 # We must always avoid spaces around a bare word beginning 8599 # with ^ as in: 8600 # my $before = ${^PREMATCH}; 8601 # Because all of the following cause an error in perl: 8602 # my $before = ${ ^PREMATCH }; 8603 # my $before = ${ ^PREMATCH}; 8604 # my $before = ${^PREMATCH }; 8605 # So if brace tightness flag is -bt=0 we must temporarily reset 8606 # to bt=1. Note that here we must set tightness=1 and not 2 so 8607 # that the closing space 8608 # is also avoided (via the $j_tight_closing_paren flag in coding) 8609 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 } 8610 8611 #============================================================= 8612 8613 if ( $tightness <= 0 ) { 8614 $ws = WS_YES; 8615 } 8616 elsif ( $tightness > 1 ) { 8617 $ws = WS_NO; 8618 } 8619 else { 8620 8621 # Patch to count '-foo' as single token so that 8622 # each of $a{-foo} and $a{foo} and $a{'foo'} do 8623 # not get spaces with default formatting. 8624 my $j_here = $j; 8625 ++$j_here 8626 if ( $token eq '-' 8627 && $last_token eq '{' 8628 && $$rtoken_type[ $j + 1 ] eq 'w' ); 8629 8630 # $j_next is where a closing token should be if 8631 # the container has a single token 8632 my $j_next = 8633 ( $$rtoken_type[ $j_here + 1 ] eq 'b' ) 8634 ? $j_here + 2 8635 : $j_here + 1; 8636 my $tok_next = $$rtokens[$j_next]; 8637 my $type_next = $$rtoken_type[$j_next]; 8638 8639 # for tightness = 1, if there is just one token 8640 # within the matching pair, we will keep it tight 8641 if ( 8642 $tok_next eq $matching_token{$last_token} 8643 8644 # but watch out for this: [ [ ] (misc.t) 8645 && $last_token ne $token 8646 ) 8647 { 8648 8649 # remember where to put the space for the closing paren 8650 $j_tight_closing_paren = $j_next; 8651 $ws = WS_NO; 8652 } 8653 else { 8654 $ws = WS_YES; 8655 } 8656 } 8657 } 8658 } # end setting space flag inside opening tokens 8659 my $ws_1 = $ws 8660 if FORMATTER_DEBUG_FLAG_WHITE; 8661 8662 #--------------------------------------------------------------- 8663 # Whitespace Rules Section 2: 8664 # Handle space on inside of closing brace pairs. 8665 #--------------------------------------------------------------- 8666 8667 # /[\}\)\]R]/ 8668 if ( $is_closing_type{$type} ) { 8669 8670 if ( $j == $j_tight_closing_paren ) { 8671 8672 $j_tight_closing_paren = -1; 8673 $ws = WS_NO; 8674 } 8675 else { 8676 8677 if ( !defined($ws) ) { 8678 8679 my $tightness; 8680 if ( $type eq '}' && $token eq '}' && $block_type ) { 8681 $tightness = $rOpts_block_brace_tightness; 8682 } 8683 else { $tightness = $tightness{$token} } 8684 8685 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; 8686 } 8687 } 8688 } # end setting space flag inside closing tokens 8689 8690 my $ws_2 = $ws 8691 if FORMATTER_DEBUG_FLAG_WHITE; 8692 8693 #--------------------------------------------------------------- 8694 # Whitespace Rules Section 3: 8695 # Use the binary rule table. 8696 #--------------------------------------------------------------- 8697 if ( !defined($ws) ) { 8698 $ws = $binary_ws_rules{$last_type}{$type}; 8699 } 8700 my $ws_3 = $ws 8701 if FORMATTER_DEBUG_FLAG_WHITE; 8702 8703 #--------------------------------------------------------------- 8704 # Whitespace Rules Section 4: 8705 # Handle some special cases. 8706 #--------------------------------------------------------------- 8707 if ( $token eq '(' ) { 8708 8709 # This will have to be tweaked as tokenization changes. 8710 # We usually want a space at '} (', for example: 8711 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); 8712 # 8713 # But not others: 8714 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } ); 8715 # At present, the above & block is marked as type L/R so this case 8716 # won't go through here. 8717 if ( $last_type eq '}' ) { $ws = WS_YES } 8718 8719 # NOTE: some older versions of Perl had occasional problems if 8720 # spaces are introduced between keywords or functions and opening 8721 # parens. So the default is not to do this except is certain 8722 # cases. The current Perl seems to tolerate spaces. 8723 8724 # Space between keyword and '(' 8725 elsif ( $last_type eq 'k' ) { 8726 $ws = WS_NO 8727 unless ( $rOpts_space_keyword_paren 8728 || $space_after_keyword{$last_token} ); 8729 } 8730 8731 # Space between function and '(' 8732 # ----------------------------------------------------- 8733 # 'w' and 'i' checks for something like: 8734 # myfun( &myfun( ->myfun( 8735 # ----------------------------------------------------- 8736 elsif (( $last_type =~ /^[wUG]$/ ) 8737 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) ) 8738 { 8739 $ws = WS_NO unless ($rOpts_space_function_paren); 8740 } 8741 8742 # space between something like $i and ( in 8743 # for $i ( 0 .. 20 ) { 8744 # FIXME: eventually, type 'i' needs to be split into multiple 8745 # token types so this can be a hardwired rule. 8746 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { 8747 $ws = WS_YES; 8748 } 8749 8750 # allow constant function followed by '()' to retain no space 8751 elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) { 8752 $ws = WS_NO; 8753 } 8754 } 8755 8756 # patch for SWITCH/CASE: make space at ']{' optional 8757 # since the '{' might begin a case or when block 8758 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) { 8759 $ws = WS_OPTIONAL; 8760 } 8761 8762 # keep space between 'sub' and '{' for anonymous sub definition 8763 if ( $type eq '{' ) { 8764 if ( $last_token eq 'sub' ) { 8765 $ws = WS_YES; 8766 } 8767 8768 # this is needed to avoid no space in '){' 8769 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES } 8770 8771 # avoid any space before the brace or bracket in something like 8772 # @opts{'a','b',...} 8773 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) { 8774 $ws = WS_NO; 8775 } 8776 } 8777 8778 elsif ( $type eq 'i' ) { 8779 8780 # never a space before -> 8781 if ( $token =~ /^\-\>/ ) { 8782 $ws = WS_NO; 8783 } 8784 } 8785 8786 # retain any space between '-' and bare word 8787 elsif ( $type eq 'w' || $type eq 'C' ) { 8788 $ws = WS_OPTIONAL if $last_type eq '-'; 8789 8790 # never a space before -> 8791 if ( $token =~ /^\-\>/ ) { 8792 $ws = WS_NO; 8793 } 8794 } 8795 8796 # retain any space between '-' and bare word 8797 # example: avoid space between 'USER' and '-' here: 8798 # $myhash{USER-NAME}='steve'; 8799 elsif ( $type eq 'm' || $type eq '-' ) { 8800 $ws = WS_OPTIONAL if ( $last_type eq 'w' ); 8801 } 8802 8803 # always space before side comment 8804 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 } 8805 8806 # always preserver whatever space was used after a possible 8807 # filehandle (except _) or here doc operator 8808 if ( 8809 $type ne '#' 8810 && ( ( $last_type eq 'Z' && $last_token ne '_' ) 8811 || $last_type eq 'h' ) 8812 ) 8813 { 8814 $ws = WS_OPTIONAL; 8815 } 8816 8817 my $ws_4 = $ws 8818 if FORMATTER_DEBUG_FLAG_WHITE; 8819 8820 #--------------------------------------------------------------- 8821 # Whitespace Rules Section 5: 8822 # Apply default rules not covered above. 8823 #--------------------------------------------------------------- 8824 8825 # If we fall through to here, look at the pre-defined hash tables for 8826 # the two tokens, and: 8827 # if (they are equal) use the common value 8828 # if (either is zero or undef) use the other 8829 # if (either is -1) use it 8830 # That is, 8831 # left vs right 8832 # 1 vs 1 --> 1 8833 # 0 vs 0 --> 0 8834 # -1 vs -1 --> -1 8835 # 8836 # 0 vs -1 --> -1 8837 # 0 vs 1 --> 1 8838 # 1 vs 0 --> 1 8839 # -1 vs 0 --> -1 8840 # 8841 # -1 vs 1 --> -1 8842 # 1 vs -1 --> -1 8843 if ( !defined($ws) ) { 8844 my $wl = $want_left_space{$type}; 8845 my $wr = $want_right_space{$last_type}; 8846 if ( !defined($wl) ) { $wl = 0 } 8847 if ( !defined($wr) ) { $wr = 0 } 8848 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; 8849 } 8850 8851 if ( !defined($ws) ) { 8852 $ws = 0; 8853 write_diagnostics( 8854 "WS flag is undefined for tokens $last_token $token\n"); 8855 } 8856 8857 # Treat newline as a whitespace. Otherwise, we might combine 8858 # 'Send' and '-recipients' here according to the above rules: 8859 # my $msg = new Fax::Send 8860 # -recipients => $to, 8861 # -data => $data; 8862 if ( $ws == 0 && $j == 0 ) { $ws = 1 } 8863 8864 if ( ( $ws == 0 ) 8865 && $j > 0 8866 && $j < $jmax 8867 && ( $last_type !~ /^[Zh]$/ ) ) 8868 { 8869 8870 # If this happens, we have a non-fatal but undesirable 8871 # hole in the above rules which should be patched. 8872 write_diagnostics( 8873 "WS flag is zero for tokens $last_token $token\n"); 8874 } 8875 $white_space_flag[$j] = $ws; 8876 8877 FORMATTER_DEBUG_FLAG_WHITE && do { 8878 my $str = substr( $last_token, 0, 15 ); 8879 $str .= ' ' x ( 16 - length($str) ); 8880 if ( !defined($ws_1) ) { $ws_1 = "*" } 8881 if ( !defined($ws_2) ) { $ws_2 = "*" } 8882 if ( !defined($ws_3) ) { $ws_3 = "*" } 8883 if ( !defined($ws_4) ) { $ws_4 = "*" } 8884 print STDOUT 8885"WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; 8886 }; 8887 } ## end main loop 8888 8889 if ($rOpts_tight_secret_operators) { 8890 secret_operator_whitespace( $jmax, $rtokens, $rtoken_type, 8891 \@white_space_flag ); 8892 } 8893 8894 return \@white_space_flag; 8895} ## end sub set_white_space_flag 8896 8897{ # begin print_line_of_tokens 8898 8899 my $rtoken_type; 8900 my $rtokens; 8901 my $rlevels; 8902 my $rslevels; 8903 my $rblock_type; 8904 my $rcontainer_type; 8905 my $rcontainer_environment; 8906 my $rtype_sequence; 8907 my $input_line; 8908 my $rnesting_tokens; 8909 my $rci_levels; 8910 my $rnesting_blocks; 8911 8912 my $in_quote; 8913 my $guessed_indentation_level; 8914 8915 # These local token variables are stored by store_token_to_go: 8916 my $block_type; 8917 my $ci_level; 8918 my $container_environment; 8919 my $container_type; 8920 my $in_continued_quote; 8921 my $level; 8922 my $nesting_blocks; 8923 my $no_internal_newlines; 8924 my $slevel; 8925 my $token; 8926 my $type; 8927 my $type_sequence; 8928 8929 # routine to pull the jth token from the line of tokens 8930 sub extract_token { 8931 my $j = shift; 8932 $token = $$rtokens[$j]; 8933 $type = $$rtoken_type[$j]; 8934 $block_type = $$rblock_type[$j]; 8935 $container_type = $$rcontainer_type[$j]; 8936 $container_environment = $$rcontainer_environment[$j]; 8937 $type_sequence = $$rtype_sequence[$j]; 8938 $level = $$rlevels[$j]; 8939 $slevel = $$rslevels[$j]; 8940 $nesting_blocks = $$rnesting_blocks[$j]; 8941 $ci_level = $$rci_levels[$j]; 8942 } 8943 8944 { 8945 my @saved_token; 8946 8947 sub save_current_token { 8948 8949 @saved_token = ( 8950 $block_type, $ci_level, 8951 $container_environment, $container_type, 8952 $in_continued_quote, $level, 8953 $nesting_blocks, $no_internal_newlines, 8954 $slevel, $token, 8955 $type, $type_sequence, 8956 ); 8957 } 8958 8959 sub restore_current_token { 8960 ( 8961 $block_type, $ci_level, 8962 $container_environment, $container_type, 8963 $in_continued_quote, $level, 8964 $nesting_blocks, $no_internal_newlines, 8965 $slevel, $token, 8966 $type, $type_sequence, 8967 ) = @saved_token; 8968 } 8969 } 8970 8971 sub token_length { 8972 8973 # Returns the length of a token, given: 8974 # $token=text of the token 8975 # $type = type 8976 # $not_first_token = should be TRUE if this is not the first token of 8977 # the line. It might the index of this token in an array. It is 8978 # used to test for a side comment vs a block commment. 8979 # Note: Eventually this should be the only routine determining the 8980 # length of a token in this package. 8981 my ( $token, $type, $not_first_token ) = @_; 8982 my $token_length = length($token); 8983 8984 # We mark lengths of side comments as just 1 if we are 8985 # ignoring their lengths when setting line breaks. 8986 $token_length = 1 8987 if ( $rOpts_ignore_side_comment_lengths 8988 && $not_first_token 8989 && $type eq '#' ); 8990 return $token_length; 8991 } 8992 8993 sub rtoken_length { 8994 8995 # return length of ith token in @{$rtokens} 8996 my ($i) = @_; 8997 return token_length( $$rtokens[$i], $$rtoken_type[$i], $i ); 8998 } 8999 9000 # Routine to place the current token into the output stream. 9001 # Called once per output token. 9002 sub store_token_to_go { 9003 9004 my $flag = $no_internal_newlines; 9005 if ( $_[0] ) { $flag = 1 } 9006 9007 $tokens_to_go[ ++$max_index_to_go ] = $token; 9008 $types_to_go[$max_index_to_go] = $type; 9009 $nobreak_to_go[$max_index_to_go] = $flag; 9010 $old_breakpoint_to_go[$max_index_to_go] = 0; 9011 $forced_breakpoint_to_go[$max_index_to_go] = 0; 9012 $block_type_to_go[$max_index_to_go] = $block_type; 9013 $type_sequence_to_go[$max_index_to_go] = $type_sequence; 9014 $container_environment_to_go[$max_index_to_go] = $container_environment; 9015 $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks; 9016 $ci_levels_to_go[$max_index_to_go] = $ci_level; 9017 $mate_index_to_go[$max_index_to_go] = -1; 9018 $matching_token_to_go[$max_index_to_go] = ''; 9019 $bond_strength_to_go[$max_index_to_go] = 0; 9020 9021 # Note: negative levels are currently retained as a diagnostic so that 9022 # the 'final indentation level' is correctly reported for bad scripts. 9023 # But this means that every use of $level as an index must be checked. 9024 # If this becomes too much of a problem, we might give up and just clip 9025 # them at zero. 9026 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0; 9027 $levels_to_go[$max_index_to_go] = $level; 9028 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0; 9029 9030 # link the non-blank tokens 9031 my $iprev = $max_index_to_go - 1; 9032 $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' ); 9033 $iprev_to_go[$max_index_to_go] = $iprev; 9034 $inext_to_go[$iprev] = $max_index_to_go 9035 if ( $iprev >= 0 && $type ne 'b' ); 9036 $inext_to_go[$max_index_to_go] = $max_index_to_go + 1; 9037 9038 $token_lengths_to_go[$max_index_to_go] = 9039 token_length( $token, $type, $max_index_to_go ); 9040 9041 # We keep a running sum of token lengths from the start of this batch: 9042 # summed_lengths_to_go[$i] = total length to just before token $i 9043 # summed_lengths_to_go[$i+1] = total length to just after token $i 9044 $summed_lengths_to_go[ $max_index_to_go + 1 ] = 9045 $summed_lengths_to_go[$max_index_to_go] + 9046 $token_lengths_to_go[$max_index_to_go]; 9047 9048 # Define the indentation that this token would have if it started 9049 # a new line. We have to do this now because we need to know this 9050 # when considering one-line blocks. 9051 set_leading_whitespace( $level, $ci_level, $in_continued_quote ); 9052 9053 # remember previous nonblank tokens seen 9054 if ( $type ne 'b' ) { 9055 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go; 9056 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go; 9057 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go; 9058 $last_nonblank_index_to_go = $max_index_to_go; 9059 $last_nonblank_type_to_go = $type; 9060 $last_nonblank_token_to_go = $token; 9061 if ( $type eq ',' ) { 9062 $comma_count_in_batch++; 9063 } 9064 } 9065 9066 FORMATTER_DEBUG_FLAG_STORE && do { 9067 my ( $a, $b, $c ) = caller(); 9068 print STDOUT 9069"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n"; 9070 }; 9071 } 9072 9073 sub insert_new_token_to_go { 9074 9075 # insert a new token into the output stream. use same level as 9076 # previous token; assumes a character at max_index_to_go. 9077 save_current_token(); 9078 ( $token, $type, $slevel, $no_internal_newlines ) = @_; 9079 9080 if ( $max_index_to_go == UNDEFINED_INDEX ) { 9081 warning("code bug: bad call to insert_new_token_to_go\n"); 9082 } 9083 $level = $levels_to_go[$max_index_to_go]; 9084 9085 # FIXME: it seems to be necessary to use the next, rather than 9086 # previous, value of this variable when creating a new blank (align.t) 9087 #my $slevel = $nesting_depth_to_go[$max_index_to_go]; 9088 $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; 9089 $ci_level = $ci_levels_to_go[$max_index_to_go]; 9090 $container_environment = $container_environment_to_go[$max_index_to_go]; 9091 $in_continued_quote = 0; 9092 $block_type = ""; 9093 $type_sequence = ""; 9094 store_token_to_go(); 9095 restore_current_token(); 9096 return; 9097 } 9098 9099 sub print_line_of_tokens { 9100 9101 my $line_of_tokens = shift; 9102 9103 # This routine is called once per input line to process all of 9104 # the tokens on that line. This is the first stage of 9105 # beautification. 9106 # 9107 # Full-line comments and blank lines may be processed immediately. 9108 # 9109 # For normal lines of code, the tokens are stored one-by-one, 9110 # via calls to 'sub store_token_to_go', until a known line break 9111 # point is reached. Then, the batch of collected tokens is 9112 # passed along to 'sub output_line_to_go' for further 9113 # processing. This routine decides if there should be 9114 # whitespace between each pair of non-white tokens, so later 9115 # routines only need to decide on any additional line breaks. 9116 # Any whitespace is initially a single space character. Later, 9117 # the vertical aligner may expand that to be multiple space 9118 # characters if necessary for alignment. 9119 9120 # extract input line number for error messages 9121 $input_line_number = $line_of_tokens->{_line_number}; 9122 9123 $rtoken_type = $line_of_tokens->{_rtoken_type}; 9124 $rtokens = $line_of_tokens->{_rtokens}; 9125 $rlevels = $line_of_tokens->{_rlevels}; 9126 $rslevels = $line_of_tokens->{_rslevels}; 9127 $rblock_type = $line_of_tokens->{_rblock_type}; 9128 $rcontainer_type = $line_of_tokens->{_rcontainer_type}; 9129 $rcontainer_environment = $line_of_tokens->{_rcontainer_environment}; 9130 $rtype_sequence = $line_of_tokens->{_rtype_sequence}; 9131 $input_line = $line_of_tokens->{_line_text}; 9132 $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; 9133 $rci_levels = $line_of_tokens->{_rci_levels}; 9134 $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; 9135 9136 $in_continued_quote = $starting_in_quote = 9137 $line_of_tokens->{_starting_in_quote}; 9138 $in_quote = $line_of_tokens->{_ending_in_quote}; 9139 $ending_in_quote = $in_quote; 9140 $guessed_indentation_level = 9141 $line_of_tokens->{_guessed_indentation_level}; 9142 9143 my $j; 9144 my $j_next; 9145 my $jmax; 9146 my $next_nonblank_token; 9147 my $next_nonblank_token_type; 9148 my $rwhite_space_flag; 9149 9150 $jmax = @$rtokens - 1; 9151 $block_type = ""; 9152 $container_type = ""; 9153 $container_environment = ""; 9154 $type_sequence = ""; 9155 $no_internal_newlines = 1 - $rOpts_add_newlines; 9156 $is_static_block_comment = 0; 9157 9158 # Handle a continued quote.. 9159 if ($in_continued_quote) { 9160 9161 # A line which is entirely a quote or pattern must go out 9162 # verbatim. Note: the \n is contained in $input_line. 9163 if ( $jmax <= 0 ) { 9164 if ( ( $input_line =~ "\t" ) ) { 9165 note_embedded_tab(); 9166 } 9167 write_unindented_line("$input_line"); 9168 $last_line_had_side_comment = 0; 9169 return; 9170 } 9171 } 9172 9173 # Write line verbatim if we are in a formatting skip section 9174 if ($in_format_skipping_section) { 9175 write_unindented_line("$input_line"); 9176 $last_line_had_side_comment = 0; 9177 9178 # Note: extra space appended to comment simplifies pattern matching 9179 if ( $jmax == 0 9180 && $$rtoken_type[0] eq '#' 9181 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o ) 9182 { 9183 $in_format_skipping_section = 0; 9184 write_logfile_entry("Exiting formatting skip section\n"); 9185 $file_writer_object->reset_consecutive_blank_lines(); 9186 } 9187 return; 9188 } 9189 9190 # See if we are entering a formatting skip section 9191 if ( $rOpts_format_skipping 9192 && $jmax == 0 9193 && $$rtoken_type[0] eq '#' 9194 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o ) 9195 { 9196 flush(); 9197 $in_format_skipping_section = 1; 9198 write_logfile_entry("Entering formatting skip section\n"); 9199 write_unindented_line("$input_line"); 9200 $last_line_had_side_comment = 0; 9201 return; 9202 } 9203 9204 # delete trailing blank tokens 9205 if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- } 9206 9207 # Handle a blank line.. 9208 if ( $jmax < 0 ) { 9209 9210 # If keep-old-blank-lines is zero, we delete all 9211 # old blank lines and let the blank line rules generate any 9212 # needed blanks. 9213 if ($rOpts_keep_old_blank_lines) { 9214 flush(); 9215 $file_writer_object->write_blank_code_line( 9216 $rOpts_keep_old_blank_lines == 2 ); 9217 $last_line_leading_type = 'b'; 9218 } 9219 $last_line_had_side_comment = 0; 9220 return; 9221 } 9222 9223 # see if this is a static block comment (starts with ## by default) 9224 my $is_static_block_comment_without_leading_space = 0; 9225 if ( $jmax == 0 9226 && $$rtoken_type[0] eq '#' 9227 && $rOpts->{'static-block-comments'} 9228 && $input_line =~ /$static_block_comment_pattern/o ) 9229 { 9230 $is_static_block_comment = 1; 9231 $is_static_block_comment_without_leading_space = 9232 substr( $input_line, 0, 1 ) eq '#'; 9233 } 9234 9235 # Check for comments which are line directives 9236 # Treat exactly as static block comments without leading space 9237 # reference: perlsyn, near end, section Plain Old Comments (Not!) 9238 # example: '# line 42 "new_filename.plx"' 9239 if ( 9240 $jmax == 0 9241 && $$rtoken_type[0] eq '#' 9242 && $input_line =~ /^\# \s* 9243 line \s+ (\d+) \s* 9244 (?:\s("?)([^"]+)\2)? \s* 9245 $/x 9246 ) 9247 { 9248 $is_static_block_comment = 1; 9249 $is_static_block_comment_without_leading_space = 1; 9250 } 9251 9252 # create a hanging side comment if appropriate 9253 my $is_hanging_side_comment; 9254 if ( 9255 $jmax == 0 9256 && $$rtoken_type[0] eq '#' # only token is a comment 9257 && $last_line_had_side_comment # last line had side comment 9258 && $input_line =~ /^\s/ # there is some leading space 9259 && !$is_static_block_comment # do not make static comment hanging 9260 && $rOpts->{'hanging-side-comments'} # user is allowing 9261 # hanging side comments 9262 # like this 9263 ) 9264 { 9265 9266 # We will insert an empty qw string at the start of the token list 9267 # to force this comment to be a side comment. The vertical aligner 9268 # should then line it up with the previous side comment. 9269 $is_hanging_side_comment = 1; 9270 unshift @$rtoken_type, 'q'; 9271 unshift @$rtokens, ''; 9272 unshift @$rlevels, $$rlevels[0]; 9273 unshift @$rslevels, $$rslevels[0]; 9274 unshift @$rblock_type, ''; 9275 unshift @$rcontainer_type, ''; 9276 unshift @$rcontainer_environment, ''; 9277 unshift @$rtype_sequence, ''; 9278 unshift @$rnesting_tokens, $$rnesting_tokens[0]; 9279 unshift @$rci_levels, $$rci_levels[0]; 9280 unshift @$rnesting_blocks, $$rnesting_blocks[0]; 9281 $jmax = 1; 9282 } 9283 9284 # remember if this line has a side comment 9285 $last_line_had_side_comment = 9286 ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' ); 9287 9288 # Handle a block (full-line) comment.. 9289 if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) { 9290 9291 if ( $rOpts->{'delete-block-comments'} ) { return } 9292 9293 if ( $rOpts->{'tee-block-comments'} ) { 9294 $file_writer_object->tee_on(); 9295 } 9296 9297 destroy_one_line_block(); 9298 output_line_to_go(); 9299 9300 # output a blank line before block comments 9301 if ( 9302 # unless we follow a blank or comment line 9303 $last_line_leading_type !~ /^[#b]$/ 9304 9305 # only if allowed 9306 && $rOpts->{'blanks-before-comments'} 9307 9308 # not if this is an empty comment line 9309 && $$rtokens[0] ne '#' 9310 9311 # not after a short line ending in an opening token 9312 # because we already have space above this comment. 9313 # Note that the first comment in this if block, after 9314 # the 'if (', does not get a blank line because of this. 9315 && !$last_output_short_opening_token 9316 9317 # never before static block comments 9318 && !$is_static_block_comment 9319 ) 9320 { 9321 flush(); # switching to new output stream 9322 $file_writer_object->write_blank_code_line(); 9323 $last_line_leading_type = 'b'; 9324 } 9325 9326 # TRIM COMMENTS -- This could be turned off as a option 9327 $$rtokens[0] =~ s/\s*$//; # trim right end 9328 9329 if ( 9330 $rOpts->{'indent-block-comments'} 9331 && ( !$rOpts->{'indent-spaced-block-comments'} 9332 || $input_line =~ /^\s+/ ) 9333 && !$is_static_block_comment_without_leading_space 9334 ) 9335 { 9336 extract_token(0); 9337 store_token_to_go(); 9338 output_line_to_go(); 9339 } 9340 else { 9341 flush(); # switching to new output stream 9342 $file_writer_object->write_code_line( $$rtokens[0] . "\n" ); 9343 $last_line_leading_type = '#'; 9344 } 9345 if ( $rOpts->{'tee-block-comments'} ) { 9346 $file_writer_object->tee_off(); 9347 } 9348 return; 9349 } 9350 9351 # compare input/output indentation except for continuation lines 9352 # (because they have an unknown amount of initial blank space) 9353 # and lines which are quotes (because they may have been outdented) 9354 # Note: this test is placed here because we know the continuation flag 9355 # at this point, which allows us to avoid non-meaningful checks. 9356 my $structural_indentation_level = $$rlevels[0]; 9357 compare_indentation_levels( $guessed_indentation_level, 9358 $structural_indentation_level ) 9359 unless ( $is_hanging_side_comment 9360 || $$rci_levels[0] > 0 9361 || $guessed_indentation_level == 0 && $$rtoken_type[0] eq 'Q' ); 9362 9363 # Patch needed for MakeMaker. Do not break a statement 9364 # in which $VERSION may be calculated. See MakeMaker.pm; 9365 # this is based on the coding in it. 9366 # The first line of a file that matches this will be eval'd: 9367 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ 9368 # Examples: 9369 # *VERSION = \'1.01'; 9370 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/; 9371 # We will pass such a line straight through without breaking 9372 # it unless -npvl is used 9373 9374 my $is_VERSION_statement = 0; 9375 9376 if ( 9377 !$saw_VERSION_in_this_file 9378 && $input_line =~ /VERSION/ # quick check to reject most lines 9379 && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ 9380 ) 9381 { 9382 $saw_VERSION_in_this_file = 1; 9383 $is_VERSION_statement = 1; 9384 write_logfile_entry("passing VERSION line; -npvl deactivates\n"); 9385 $no_internal_newlines = 1; 9386 } 9387 9388 # take care of indentation-only 9389 # NOTE: In previous versions we sent all qw lines out immediately here. 9390 # No longer doing this: also write a line which is entirely a 'qw' list 9391 # to allow stacking of opening and closing tokens. Note that interior 9392 # qw lines will still go out at the end of this routine. 9393 if ( $rOpts->{'indent-only'} ) { 9394 flush(); 9395 trim($input_line); 9396 9397 extract_token(0); 9398 $token = $input_line; 9399 $type = 'q'; 9400 $block_type = ""; 9401 $container_type = ""; 9402 $container_environment = ""; 9403 $type_sequence = ""; 9404 store_token_to_go(); 9405 output_line_to_go(); 9406 return; 9407 } 9408 9409 push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding 9410 push( @$rtoken_type, 'b', 'b' ); 9411 ($rwhite_space_flag) = 9412 set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type ); 9413 9414 # if the buffer hasn't been flushed, add a leading space if 9415 # necessary to keep essential whitespace. This is really only 9416 # necessary if we are squeezing out all ws. 9417 if ( $max_index_to_go >= 0 ) { 9418 9419 $old_line_count_in_batch++; 9420 9421 if ( 9422 is_essential_whitespace( 9423 $last_last_nonblank_token, 9424 $last_last_nonblank_type, 9425 $tokens_to_go[$max_index_to_go], 9426 $types_to_go[$max_index_to_go], 9427 $$rtokens[0], 9428 $$rtoken_type[0] 9429 ) 9430 ) 9431 { 9432 my $slevel = $$rslevels[0]; 9433 insert_new_token_to_go( ' ', 'b', $slevel, 9434 $no_internal_newlines ); 9435 } 9436 } 9437 9438 # If we just saw the end of an elsif block, write nag message 9439 # if we do not see another elseif or an else. 9440 if ($looking_for_else) { 9441 9442 unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) { 9443 write_logfile_entry("(No else block)\n"); 9444 } 9445 $looking_for_else = 0; 9446 } 9447 9448 # This is a good place to kill incomplete one-line blocks 9449 if ( ( $semicolons_before_block_self_destruct == 0 ) 9450 && ( $max_index_to_go >= 0 ) 9451 && ( $types_to_go[$max_index_to_go] eq ';' ) 9452 && ( $$rtokens[0] ne '}' ) ) 9453 { 9454 destroy_one_line_block(); 9455 output_line_to_go(); 9456 } 9457 9458 # loop to process the tokens one-by-one 9459 $type = 'b'; 9460 $token = ""; 9461 9462 foreach $j ( 0 .. $jmax ) { 9463 9464 # pull out the local values for this token 9465 extract_token($j); 9466 9467 if ( $type eq '#' ) { 9468 9469 # trim trailing whitespace 9470 # (there is no option at present to prevent this) 9471 $token =~ s/\s*$//; 9472 9473 if ( 9474 $rOpts->{'delete-side-comments'} 9475 9476 # delete closing side comments if necessary 9477 || ( $rOpts->{'delete-closing-side-comments'} 9478 && $token =~ /$closing_side_comment_prefix_pattern/o 9479 && $last_nonblank_block_type =~ 9480 /$closing_side_comment_list_pattern/o ) 9481 ) 9482 { 9483 if ( $types_to_go[$max_index_to_go] eq 'b' ) { 9484 unstore_token_to_go(); 9485 } 9486 last; 9487 } 9488 } 9489 9490 # If we are continuing after seeing a right curly brace, flush 9491 # buffer unless we see what we are looking for, as in 9492 # } else ... 9493 if ( $rbrace_follower && $type ne 'b' ) { 9494 9495 unless ( $rbrace_follower->{$token} ) { 9496 output_line_to_go(); 9497 } 9498 $rbrace_follower = undef; 9499 } 9500 9501 $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1; 9502 $next_nonblank_token = $$rtokens[$j_next]; 9503 $next_nonblank_token_type = $$rtoken_type[$j_next]; 9504 9505 #-------------------------------------------------------- 9506 # Start of section to patch token text 9507 #-------------------------------------------------------- 9508 9509 # Modify certain tokens here for whitespace 9510 # The following is not yet done, but could be: 9511 # sub (x x x) 9512 if ( $type =~ /^[wit]$/ ) { 9513 9514 # Examples: 9515 # change '$ var' to '$var' etc 9516 # '-> new' to '->new' 9517 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) { 9518 $token =~ s/\s*//g; 9519 } 9520 9521 # Split identifiers with leading arrows, inserting blanks if 9522 # necessary. It is easier and safer here than in the 9523 # tokenizer. For example '->new' becomes two tokens, '->' and 9524 # 'new' with a possible blank between. 9525 # 9526 # Note: there is a related patch in sub set_white_space_flag 9527 if ( $token =~ /^\-\>(.*)$/ && $1 ) { 9528 my $token_save = $1; 9529 my $type_save = $type; 9530 9531 # store a blank to left of arrow if necessary 9532 if ( $max_index_to_go >= 0 9533 && $types_to_go[$max_index_to_go] ne 'b' 9534 && $want_left_space{'->'} == WS_YES ) 9535 { 9536 insert_new_token_to_go( ' ', 'b', $slevel, 9537 $no_internal_newlines ); 9538 } 9539 9540 # then store the arrow 9541 $token = '->'; 9542 $type = $token; 9543 store_token_to_go(); 9544 9545 # then reset the current token to be the remainder, 9546 # and reset the whitespace flag according to the arrow 9547 $$rwhite_space_flag[$j] = $want_right_space{'->'}; 9548 $token = $token_save; 9549 $type = $type_save; 9550 } 9551 9552 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g } 9553 9554 # trim identifiers of trailing blanks which can occur 9555 # under some unusual circumstances, such as if the 9556 # identifier 'witch' has trailing blanks on input here: 9557 # 9558 # sub 9559 # witch 9560 # () # prototype may be on new line ... 9561 # ... 9562 if ( $type eq 'i' ) { $token =~ s/\s+$//g } 9563 } 9564 9565 # change 'LABEL :' to 'LABEL:' 9566 elsif ( $type eq 'J' ) { $token =~ s/\s+//g } 9567 9568 # patch to add space to something like "x10" 9569 # This avoids having to split this token in the pre-tokenizer 9570 elsif ( $type eq 'n' ) { 9571 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / } 9572 } 9573 9574 elsif ( $type eq 'Q' ) { 9575 note_embedded_tab() if ( $token =~ "\t" ); 9576 9577 # make note of something like '$var = s/xxx/yyy/;' 9578 # in case it should have been '$var =~ s/xxx/yyy/;' 9579 if ( 9580 $token =~ /^(s|tr|y|m|\/)/ 9581 && $last_nonblank_token =~ /^(=|==|!=)$/ 9582 9583 # precededed by simple scalar 9584 && $last_last_nonblank_type eq 'i' 9585 && $last_last_nonblank_token =~ /^\$/ 9586 9587 # followed by some kind of termination 9588 # (but give complaint if we can's see far enough ahead) 9589 && $next_nonblank_token =~ /^[; \)\}]$/ 9590 9591 # scalar is not decleared 9592 && !( 9593 $types_to_go[0] eq 'k' 9594 && $tokens_to_go[0] =~ /^(my|our|local)$/ 9595 ) 9596 ) 9597 { 9598 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; 9599 complain( 9600"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n" 9601 ); 9602 } 9603 } 9604 9605 # trim blanks from right of qw quotes 9606 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this) 9607 elsif ( $type eq 'q' ) { 9608 $token =~ s/\s*$//; 9609 note_embedded_tab() if ( $token =~ "\t" ); 9610 } 9611 9612 #-------------------------------------------------------- 9613 # End of section to patch token text 9614 #-------------------------------------------------------- 9615 9616 # insert any needed whitespace 9617 if ( ( $type ne 'b' ) 9618 && ( $max_index_to_go >= 0 ) 9619 && ( $types_to_go[$max_index_to_go] ne 'b' ) 9620 && $rOpts_add_whitespace ) 9621 { 9622 my $ws = $$rwhite_space_flag[$j]; 9623 9624 if ( $ws == 1 ) { 9625 insert_new_token_to_go( ' ', 'b', $slevel, 9626 $no_internal_newlines ); 9627 } 9628 } 9629 9630 # Do not allow breaks which would promote a side comment to a 9631 # block comment. In order to allow a break before an opening 9632 # or closing BLOCK, followed by a side comment, those sections 9633 # of code will handle this flag separately. 9634 my $side_comment_follows = ( $next_nonblank_token_type eq '#' ); 9635 my $is_opening_BLOCK = 9636 ( $type eq '{' 9637 && $token eq '{' 9638 && $block_type 9639 && $block_type ne 't' ); 9640 my $is_closing_BLOCK = 9641 ( $type eq '}' 9642 && $token eq '}' 9643 && $block_type 9644 && $block_type ne 't' ); 9645 9646 if ( $side_comment_follows 9647 && !$is_opening_BLOCK 9648 && !$is_closing_BLOCK ) 9649 { 9650 $no_internal_newlines = 1; 9651 } 9652 9653 # We're only going to handle breaking for code BLOCKS at this 9654 # (top) level. Other indentation breaks will be handled by 9655 # sub scan_list, which is better suited to dealing with them. 9656 if ($is_opening_BLOCK) { 9657 9658 # Tentatively output this token. This is required before 9659 # calling starting_one_line_block. We may have to unstore 9660 # it, though, if we have to break before it. 9661 store_token_to_go($side_comment_follows); 9662 9663 # Look ahead to see if we might form a one-line block 9664 my $too_long = 9665 starting_one_line_block( $j, $jmax, $level, $slevel, 9666 $ci_level, $rtokens, $rtoken_type, $rblock_type ); 9667 clear_breakpoint_undo_stack(); 9668 9669 # to simplify the logic below, set a flag to indicate if 9670 # this opening brace is far from the keyword which introduces it 9671 my $keyword_on_same_line = 1; 9672 if ( ( $max_index_to_go >= 0 ) 9673 && ( $last_nonblank_type eq ')' ) ) 9674 { 9675 if ( $block_type =~ /^(if|else|elsif)$/ 9676 && ( $tokens_to_go[0] eq '}' ) 9677 && $rOpts_cuddled_else ) 9678 { 9679 $keyword_on_same_line = 1; 9680 } 9681 elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) 9682 { 9683 $keyword_on_same_line = 0; 9684 } 9685 } 9686 9687 # decide if user requested break before '{' 9688 my $want_break = 9689 9690 # use -bl flag if not a sub block of any type 9691 $block_type !~ /^sub/ 9692 ? $rOpts->{'opening-brace-on-new-line'} 9693 9694 # use -sbl flag for a named sub block 9695 : $block_type !~ /^sub\W*$/ 9696 ? $rOpts->{'opening-sub-brace-on-new-line'} 9697 9698 # use -asbl flag for an anonymous sub block 9699 : $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; 9700 9701 # Break before an opening '{' ... 9702 if ( 9703 9704 # if requested 9705 $want_break 9706 9707 # and we were unable to start looking for a block, 9708 && $index_start_one_line_block == UNDEFINED_INDEX 9709 9710 # or if it will not be on same line as its keyword, so that 9711 # it will be outdented (eval.t, overload.t), and the user 9712 # has not insisted on keeping it on the right 9713 || ( !$keyword_on_same_line 9714 && !$rOpts->{'opening-brace-always-on-right'} ) 9715 9716 ) 9717 { 9718 9719 # but only if allowed 9720 unless ($no_internal_newlines) { 9721 9722 # since we already stored this token, we must unstore it 9723 unstore_token_to_go(); 9724 9725 # then output the line 9726 output_line_to_go(); 9727 9728 # and now store this token at the start of a new line 9729 store_token_to_go($side_comment_follows); 9730 } 9731 } 9732 9733 # Now update for side comment 9734 if ($side_comment_follows) { $no_internal_newlines = 1 } 9735 9736 # now output this line 9737 unless ($no_internal_newlines) { 9738 output_line_to_go(); 9739 } 9740 } 9741 9742 elsif ($is_closing_BLOCK) { 9743 9744 # If there is a pending one-line block .. 9745 if ( $index_start_one_line_block != UNDEFINED_INDEX ) { 9746 9747 # we have to terminate it if.. 9748 if ( 9749 9750 # it is too long (final length may be different from 9751 # initial estimate). note: must allow 1 space for this token 9752 excess_line_length( $index_start_one_line_block, 9753 $max_index_to_go ) >= 0 9754 9755 # or if it has too many semicolons 9756 || ( $semicolons_before_block_self_destruct == 0 9757 && $last_nonblank_type ne ';' ) 9758 ) 9759 { 9760 destroy_one_line_block(); 9761 } 9762 } 9763 9764 # put a break before this closing curly brace if appropriate 9765 unless ( $no_internal_newlines 9766 || $index_start_one_line_block != UNDEFINED_INDEX ) 9767 { 9768 9769 # add missing semicolon if ... 9770 # there are some tokens 9771 if ( 9772 ( $max_index_to_go > 0 ) 9773 9774 # and we don't have one 9775 && ( $last_nonblank_type ne ';' ) 9776 9777 # patch until some block type issues are fixed: 9778 # Do not add semi-colon for block types '{', 9779 # '}', and ';' because we cannot be sure yet 9780 # that this is a block and not an anonomyous 9781 # hash (blktype.t, blktype1.t) 9782 && ( $block_type !~ /^[\{\};]$/ ) 9783 9784 # patch: and do not add semi-colons for recently 9785 # added block types (see tmp/semicolon.t) 9786 && ( $block_type !~ 9787 /^(switch|case|given|when|default)$/ ) 9788 9789 # it seems best not to add semicolons in these 9790 # special block types: sort|map|grep 9791 && ( !$is_sort_map_grep{$block_type} ) 9792 9793 # and we are allowed to do so. 9794 && $rOpts->{'add-semicolons'} 9795 ) 9796 { 9797 9798 save_current_token(); 9799 $token = ';'; 9800 $type = ';'; 9801 $level = $levels_to_go[$max_index_to_go]; 9802 $slevel = $nesting_depth_to_go[$max_index_to_go]; 9803 $nesting_blocks = 9804 $nesting_blocks_to_go[$max_index_to_go]; 9805 $ci_level = $ci_levels_to_go[$max_index_to_go]; 9806 $block_type = ""; 9807 $container_type = ""; 9808 $container_environment = ""; 9809 $type_sequence = ""; 9810 9811 # Note - we remove any blank AFTER extracting its 9812 # parameters such as level, etc, above 9813 if ( $types_to_go[$max_index_to_go] eq 'b' ) { 9814 unstore_token_to_go(); 9815 } 9816 store_token_to_go(); 9817 9818 note_added_semicolon(); 9819 restore_current_token(); 9820 } 9821 9822 # then write out everything before this closing curly brace 9823 output_line_to_go(); 9824 9825 } 9826 9827 # Now update for side comment 9828 if ($side_comment_follows) { $no_internal_newlines = 1 } 9829 9830 # store the closing curly brace 9831 store_token_to_go(); 9832 9833 # ok, we just stored a closing curly brace. Often, but 9834 # not always, we want to end the line immediately. 9835 # So now we have to check for special cases. 9836 9837 # if this '}' successfully ends a one-line block.. 9838 my $is_one_line_block = 0; 9839 my $keep_going = 0; 9840 if ( $index_start_one_line_block != UNDEFINED_INDEX ) { 9841 9842 # Remember the type of token just before the 9843 # opening brace. It would be more general to use 9844 # a stack, but this will work for one-line blocks. 9845 $is_one_line_block = 9846 $types_to_go[$index_start_one_line_block]; 9847 9848 # we have to actually make it by removing tentative 9849 # breaks that were set within it 9850 undo_forced_breakpoint_stack(0); 9851 set_nobreaks( $index_start_one_line_block, 9852 $max_index_to_go - 1 ); 9853 9854 # then re-initialize for the next one-line block 9855 destroy_one_line_block(); 9856 9857 # then decide if we want to break after the '}' .. 9858 # We will keep going to allow certain brace followers as in: 9859 # do { $ifclosed = 1; last } unless $losing; 9860 # 9861 # But make a line break if the curly ends a 9862 # significant block: 9863 if ( 9864 $is_block_without_semicolon{$block_type} 9865 9866 # if needless semicolon follows we handle it later 9867 && $next_nonblank_token ne ';' 9868 ) 9869 { 9870 output_line_to_go() unless ($no_internal_newlines); 9871 } 9872 } 9873 9874 # set string indicating what we need to look for brace follower 9875 # tokens 9876 if ( $block_type eq 'do' ) { 9877 $rbrace_follower = \%is_do_follower; 9878 } 9879 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { 9880 $rbrace_follower = \%is_if_brace_follower; 9881 } 9882 elsif ( $block_type eq 'else' ) { 9883 $rbrace_follower = \%is_else_brace_follower; 9884 } 9885 9886 # added eval for borris.t 9887 elsif ($is_sort_map_grep_eval{$block_type} 9888 || $is_one_line_block eq 'G' ) 9889 { 9890 $rbrace_follower = undef; 9891 $keep_going = 1; 9892 } 9893 9894 # anonymous sub 9895 elsif ( $block_type =~ /^sub\W*$/ ) { 9896 9897 if ($is_one_line_block) { 9898 $rbrace_follower = \%is_anon_sub_1_brace_follower; 9899 } 9900 else { 9901 $rbrace_follower = \%is_anon_sub_brace_follower; 9902 } 9903 } 9904 9905 # None of the above: specify what can follow a closing 9906 # brace of a block which is not an 9907 # if/elsif/else/do/sort/map/grep/eval 9908 # Testfiles: 9909 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t 9910 else { 9911 $rbrace_follower = \%is_other_brace_follower; 9912 } 9913 9914 # See if an elsif block is followed by another elsif or else; 9915 # complain if not. 9916 if ( $block_type eq 'elsif' ) { 9917 9918 if ( $next_nonblank_token_type eq 'b' ) { # end of line? 9919 $looking_for_else = 1; # ok, check on next line 9920 } 9921 else { 9922 9923 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) { 9924 write_logfile_entry("No else block :(\n"); 9925 } 9926 } 9927 } 9928 9929 # keep going after certain block types (map,sort,grep,eval) 9930 # added eval for borris.t 9931 if ($keep_going) { 9932 9933 # keep going 9934 } 9935 9936 # if no more tokens, postpone decision until re-entring 9937 elsif ( ( $next_nonblank_token_type eq 'b' ) 9938 && $rOpts_add_newlines ) 9939 { 9940 unless ($rbrace_follower) { 9941 output_line_to_go() unless ($no_internal_newlines); 9942 } 9943 } 9944 9945 elsif ($rbrace_follower) { 9946 9947 unless ( $rbrace_follower->{$next_nonblank_token} ) { 9948 output_line_to_go() unless ($no_internal_newlines); 9949 } 9950 $rbrace_follower = undef; 9951 } 9952 9953 else { 9954 output_line_to_go() unless ($no_internal_newlines); 9955 } 9956 9957 } # end treatment of closing block token 9958 9959 # handle semicolon 9960 elsif ( $type eq ';' ) { 9961 9962 # kill one-line blocks with too many semicolons 9963 $semicolons_before_block_self_destruct--; 9964 if ( 9965 ( $semicolons_before_block_self_destruct < 0 ) 9966 || ( $semicolons_before_block_self_destruct == 0 9967 && $next_nonblank_token_type !~ /^[b\}]$/ ) 9968 ) 9969 { 9970 destroy_one_line_block(); 9971 } 9972 9973 # Remove unnecessary semicolons, but not after bare 9974 # blocks, where it could be unsafe if the brace is 9975 # mistokenized. 9976 if ( 9977 ( 9978 $last_nonblank_token eq '}' 9979 && ( 9980 $is_block_without_semicolon{ 9981 $last_nonblank_block_type} 9982 || $last_nonblank_block_type =~ /^sub\s+\w/ 9983 || $last_nonblank_block_type =~ /^\w+:$/ ) 9984 ) 9985 || $last_nonblank_type eq ';' 9986 ) 9987 { 9988 9989 if ( 9990 $rOpts->{'delete-semicolons'} 9991 9992 # don't delete ; before a # because it would promote it 9993 # to a block comment 9994 && ( $next_nonblank_token_type ne '#' ) 9995 ) 9996 { 9997 note_deleted_semicolon(); 9998 output_line_to_go() 9999 unless ( $no_internal_newlines 10000 || $index_start_one_line_block != UNDEFINED_INDEX ); 10001 next; 10002 } 10003 else { 10004 write_logfile_entry("Extra ';'\n"); 10005 } 10006 } 10007 store_token_to_go(); 10008 10009 output_line_to_go() 10010 unless ( $no_internal_newlines 10011 || ( $rOpts_keep_interior_semicolons && $j < $jmax ) 10012 || ( $next_nonblank_token eq '}' ) ); 10013 10014 } 10015 10016 # handle here_doc target string 10017 elsif ( $type eq 'h' ) { 10018 $no_internal_newlines = 10019 1; # no newlines after seeing here-target 10020 destroy_one_line_block(); 10021 store_token_to_go(); 10022 } 10023 10024 # handle all other token types 10025 else { 10026 10027 # if this is a blank... 10028 if ( $type eq 'b' ) { 10029 10030 # make it just one character 10031 $token = ' ' if $rOpts_add_whitespace; 10032 10033 # delete it if unwanted by whitespace rules 10034 # or we are deleting all whitespace 10035 my $ws = $$rwhite_space_flag[ $j + 1 ]; 10036 if ( ( defined($ws) && $ws == -1 ) 10037 || $rOpts_delete_old_whitespace ) 10038 { 10039 10040 # unless it might make a syntax error 10041 next 10042 unless is_essential_whitespace( 10043 $last_last_nonblank_token, 10044 $last_last_nonblank_type, 10045 $tokens_to_go[$max_index_to_go], 10046 $types_to_go[$max_index_to_go], 10047 $$rtokens[ $j + 1 ], 10048 $$rtoken_type[ $j + 1 ] 10049 ); 10050 } 10051 } 10052 store_token_to_go(); 10053 } 10054 10055 # remember two previous nonblank OUTPUT tokens 10056 if ( $type ne '#' && $type ne 'b' ) { 10057 $last_last_nonblank_token = $last_nonblank_token; 10058 $last_last_nonblank_type = $last_nonblank_type; 10059 $last_nonblank_token = $token; 10060 $last_nonblank_type = $type; 10061 $last_nonblank_block_type = $block_type; 10062 } 10063 10064 # unset the continued-quote flag since it only applies to the 10065 # first token, and we want to resume normal formatting if 10066 # there are additional tokens on the line 10067 $in_continued_quote = 0; 10068 10069 } # end of loop over all tokens in this 'line_of_tokens' 10070 10071 # we have to flush .. 10072 if ( 10073 10074 # if there is a side comment 10075 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} ) 10076 10077 # if this line ends in a quote 10078 # NOTE: This is critically important for insuring that quoted lines 10079 # do not get processed by things like -sot and -sct 10080 || $in_quote 10081 10082 # if this is a VERSION statement 10083 || $is_VERSION_statement 10084 10085 # to keep a label at the end of a line 10086 || $type eq 'J' 10087 10088 # if we are instructed to keep all old line breaks 10089 || !$rOpts->{'delete-old-newlines'} 10090 ) 10091 { 10092 destroy_one_line_block(); 10093 output_line_to_go(); 10094 } 10095 10096 # mark old line breakpoints in current output stream 10097 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) { 10098 $old_breakpoint_to_go[$max_index_to_go] = 1; 10099 } 10100 } ## end sub print_line_of_tokens 10101} ## end block print_line_of_tokens 10102 10103# sub output_line_to_go sends one logical line of tokens on down the 10104# pipeline to the VerticalAligner package, breaking the line into continuation 10105# lines as necessary. The line of tokens is ready to go in the "to_go" 10106# arrays. 10107sub output_line_to_go { 10108 10109 # debug stuff; this routine can be called from many points 10110 FORMATTER_DEBUG_FLAG_OUTPUT && do { 10111 my ( $a, $b, $c ) = caller; 10112 write_diagnostics( 10113"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n" 10114 ); 10115 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ]; 10116 write_diagnostics("$output_str\n"); 10117 }; 10118 10119 # just set a tentative breakpoint if we might be in a one-line block 10120 if ( $index_start_one_line_block != UNDEFINED_INDEX ) { 10121 set_forced_breakpoint($max_index_to_go); 10122 return; 10123 } 10124 10125 my $cscw_block_comment; 10126 $cscw_block_comment = add_closing_side_comment() 10127 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); 10128 10129 my $comma_arrow_count_contained = match_opening_and_closing_tokens(); 10130 10131 # tell the -lp option we are outputting a batch so it can close 10132 # any unfinished items in its stack 10133 finish_lp_batch(); 10134 10135 # If this line ends in a code block brace, set breaks at any 10136 # previous closing code block braces to breakup a chain of code 10137 # blocks on one line. This is very rare but can happen for 10138 # user-defined subs. For example we might be looking at this: 10139 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { 10140 my $saw_good_break = 0; # flag to force breaks even if short line 10141 if ( 10142 10143 # looking for opening or closing block brace 10144 $block_type_to_go[$max_index_to_go] 10145 10146 # but not one of these which are never duplicated on a line: 10147 # until|while|for|if|elsif|else 10148 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] } 10149 ) 10150 { 10151 my $lev = $nesting_depth_to_go[$max_index_to_go]; 10152 10153 # Walk backwards from the end and 10154 # set break at any closing block braces at the same level. 10155 # But quit if we are not in a chain of blocks. 10156 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) { 10157 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level 10158 next if ( $levels_to_go[$i] > $lev ); # skip past higher level 10159 10160 if ( $block_type_to_go[$i] ) { 10161 if ( $tokens_to_go[$i] eq '}' ) { 10162 set_forced_breakpoint($i); 10163 $saw_good_break = 1; 10164 } 10165 } 10166 10167 # quit if we see anything besides words, function, blanks 10168 # at this level 10169 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last } 10170 } 10171 } 10172 10173 my $imin = 0; 10174 my $imax = $max_index_to_go; 10175 10176 # trim any blank tokens 10177 if ( $max_index_to_go >= 0 ) { 10178 if ( $types_to_go[$imin] eq 'b' ) { $imin++ } 10179 if ( $types_to_go[$imax] eq 'b' ) { $imax-- } 10180 } 10181 10182 # anything left to write? 10183 if ( $imin <= $imax ) { 10184 10185 # add a blank line before certain key types but not after a comment 10186 if ( $last_line_leading_type !~ /^[#]/ ) { 10187 my $want_blank = 0; 10188 my $leading_token = $tokens_to_go[$imin]; 10189 my $leading_type = $types_to_go[$imin]; 10190 10191 # blank lines before subs except declarations and one-liners 10192 # MCONVERSION LOCATION - for sub tokenization change 10193 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { 10194 $want_blank = $rOpts->{'blank-lines-before-subs'} 10195 if ( 10196 terminal_type( \@types_to_go, \@block_type_to_go, $imin, 10197 $imax ) !~ /^[\;\}]$/ 10198 ); 10199 } 10200 10201 # break before all package declarations 10202 # MCONVERSION LOCATION - for tokenizaton change 10203 elsif ($leading_token =~ /^(package\s)/ 10204 && $leading_type eq 'i' ) 10205 { 10206 $want_blank = $rOpts->{'blank-lines-before-packages'}; 10207 } 10208 10209 # break before certain key blocks except one-liners 10210 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { 10211 $want_blank = $rOpts->{'blank-lines-before-subs'} 10212 if ( 10213 terminal_type( \@types_to_go, \@block_type_to_go, $imin, 10214 $imax ) ne '}' 10215 ); 10216 } 10217 10218 # Break before certain block types if we haven't had a 10219 # break at this level for a while. This is the 10220 # difficult decision.. 10221 elsif ($leading_type eq 'k' 10222 && $last_line_leading_type ne 'b' 10223 && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ ) 10224 { 10225 my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; 10226 if ( !defined($lc) ) { $lc = 0 } 10227 10228 $want_blank = 10229 $rOpts->{'blanks-before-blocks'} 10230 && $lc >= $rOpts->{'long-block-line-count'} 10231 && $file_writer_object->get_consecutive_nonblank_lines() >= 10232 $rOpts->{'long-block-line-count'} 10233 && ( 10234 terminal_type( \@types_to_go, \@block_type_to_go, $imin, 10235 $imax ) ne '}' 10236 ); 10237 } 10238 10239 if ($want_blank) { 10240 10241 # future: send blank line down normal path to VerticalAligner 10242 Perl::Tidy::VerticalAligner::flush(); 10243 $file_writer_object->require_blank_code_lines($want_blank); 10244 } 10245 } 10246 10247 # update blank line variables and count number of consecutive 10248 # non-blank, non-comment lines at this level 10249 $last_last_line_leading_level = $last_line_leading_level; 10250 $last_line_leading_level = $levels_to_go[$imin]; 10251 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 } 10252 $last_line_leading_type = $types_to_go[$imin]; 10253 if ( $last_line_leading_level == $last_last_line_leading_level 10254 && $last_line_leading_type ne 'b' 10255 && $last_line_leading_type ne '#' 10256 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) ) 10257 { 10258 $nonblank_lines_at_depth[$last_line_leading_level]++; 10259 } 10260 else { 10261 $nonblank_lines_at_depth[$last_line_leading_level] = 1; 10262 } 10263 10264 FORMATTER_DEBUG_FLAG_FLUSH && do { 10265 my ( $package, $file, $line ) = caller; 10266 print STDOUT 10267"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n"; 10268 }; 10269 10270 # add a couple of extra terminal blank tokens 10271 pad_array_to_go(); 10272 10273 # set all forced breakpoints for good list formatting 10274 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0; 10275 10276 if ( 10277 $is_long_line 10278 || $old_line_count_in_batch > 1 10279 10280 # must always call scan_list() with unbalanced batches because it 10281 # is maintaining some stacks 10282 || is_unbalanced_batch() 10283 10284 # call scan_list if we might want to break at commas 10285 || ( 10286 $comma_count_in_batch 10287 && ( $rOpts_maximum_fields_per_table > 0 10288 || $rOpts_comma_arrow_breakpoints == 0 ) 10289 ) 10290 10291 # call scan_list if user may want to break open some one-line 10292 # hash references 10293 || ( $comma_arrow_count_contained 10294 && $rOpts_comma_arrow_breakpoints != 3 ) 10295 ) 10296 { 10297 ## This caused problems in one version of perl for unknown reasons: 10298 ## $saw_good_break ||= scan_list(); 10299 my $sgb = scan_list(); 10300 $saw_good_break ||= $sgb; 10301 } 10302 10303 # let $ri_first and $ri_last be references to lists of 10304 # first and last tokens of line fragments to output.. 10305 my ( $ri_first, $ri_last ); 10306 10307 # write a single line if.. 10308 if ( 10309 10310 # we aren't allowed to add any newlines 10311 !$rOpts_add_newlines 10312 10313 # or, we don't already have an interior breakpoint 10314 # and we didn't see a good breakpoint 10315 || ( 10316 !$forced_breakpoint_count 10317 && !$saw_good_break 10318 10319 # and this line is 'short' 10320 && !$is_long_line 10321 ) 10322 ) 10323 { 10324 @$ri_first = ($imin); 10325 @$ri_last = ($imax); 10326 } 10327 10328 # otherwise use multiple lines 10329 else { 10330 10331 ( $ri_first, $ri_last, my $colon_count ) = 10332 set_continuation_breaks($saw_good_break); 10333 10334 break_all_chain_tokens( $ri_first, $ri_last ); 10335 10336 break_equals( $ri_first, $ri_last ); 10337 10338 # now we do a correction step to clean this up a bit 10339 # (The only time we would not do this is for debugging) 10340 if ( $rOpts->{'recombine'} ) { 10341 ( $ri_first, $ri_last ) = 10342 recombine_breakpoints( $ri_first, $ri_last ); 10343 } 10344 10345 insert_final_breaks( $ri_first, $ri_last ) if $colon_count; 10346 } 10347 10348 # do corrector step if -lp option is used 10349 my $do_not_pad = 0; 10350 if ($rOpts_line_up_parentheses) { 10351 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last ); 10352 } 10353 send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad ); 10354 } 10355 prepare_for_new_input_lines(); 10356 10357 # output any new -cscw block comment 10358 if ($cscw_block_comment) { 10359 flush(); 10360 $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); 10361 } 10362} 10363 10364sub note_added_semicolon { 10365 $last_added_semicolon_at = $input_line_number; 10366 if ( $added_semicolon_count == 0 ) { 10367 $first_added_semicolon_at = $last_added_semicolon_at; 10368 } 10369 $added_semicolon_count++; 10370 write_logfile_entry("Added ';' here\n"); 10371} 10372 10373sub note_deleted_semicolon { 10374 $last_deleted_semicolon_at = $input_line_number; 10375 if ( $deleted_semicolon_count == 0 ) { 10376 $first_deleted_semicolon_at = $last_deleted_semicolon_at; 10377 } 10378 $deleted_semicolon_count++; 10379 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;) 10380} 10381 10382sub note_embedded_tab { 10383 $embedded_tab_count++; 10384 $last_embedded_tab_at = $input_line_number; 10385 if ( !$first_embedded_tab_at ) { 10386 $first_embedded_tab_at = $last_embedded_tab_at; 10387 } 10388 10389 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) { 10390 write_logfile_entry("Embedded tabs in quote or pattern\n"); 10391 } 10392} 10393 10394sub starting_one_line_block { 10395 10396 # after seeing an opening curly brace, look for the closing brace 10397 # and see if the entire block will fit on a line. This routine is 10398 # not always right because it uses the old whitespace, so a check 10399 # is made later (at the closing brace) to make sure we really 10400 # have a one-line block. We have to do this preliminary check, 10401 # though, because otherwise we would always break at a semicolon 10402 # within a one-line block if the block contains multiple statements. 10403 10404 my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type, 10405 $rblock_type ) 10406 = @_; 10407 10408 # kill any current block - we can only go 1 deep 10409 destroy_one_line_block(); 10410 10411 # return value: 10412 # 1=distance from start of block to opening brace exceeds line length 10413 # 0=otherwise 10414 10415 my $i_start = 0; 10416 10417 # shouldn't happen: there must have been a prior call to 10418 # store_token_to_go to put the opening brace in the output stream 10419 if ( $max_index_to_go < 0 ) { 10420 warning("program bug: store_token_to_go called incorrectly\n"); 10421 report_definite_bug(); 10422 } 10423 else { 10424 10425 # cannot use one-line blocks with cuddled else else/elsif lines 10426 if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) { 10427 return 0; 10428 } 10429 } 10430 10431 my $block_type = $$rblock_type[$j]; 10432 10433 # find the starting keyword for this block (such as 'if', 'else', ...) 10434 10435 if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) { 10436 $i_start = $max_index_to_go; 10437 } 10438 10439 elsif ( $last_last_nonblank_token_to_go eq ')' ) { 10440 10441 # For something like "if (xxx) {", the keyword "if" will be 10442 # just after the most recent break. This will be 0 unless 10443 # we have just killed a one-line block and are starting another. 10444 # (doif.t) 10445 # Note: cannot use inext_index_to_go[] here because that array 10446 # is still being constructed. 10447 $i_start = $index_max_forced_break + 1; 10448 if ( $types_to_go[$i_start] eq 'b' ) { 10449 $i_start++; 10450 } 10451 10452 unless ( $tokens_to_go[$i_start] eq $block_type ) { 10453 return 0; 10454 } 10455 } 10456 10457 # the previous nonblank token should start these block types 10458 elsif (( $last_last_nonblank_token_to_go eq $block_type ) 10459 || ( $block_type =~ /^sub/ ) ) 10460 { 10461 $i_start = $last_last_nonblank_index_to_go; 10462 } 10463 10464 # patch for SWITCH/CASE to retain one-line case/when blocks 10465 elsif ( $block_type eq 'case' || $block_type eq 'when' ) { 10466 10467 # Note: cannot use inext_index_to_go[] here because that array 10468 # is still being constructed. 10469 $i_start = $index_max_forced_break + 1; 10470 if ( $types_to_go[$i_start] eq 'b' ) { 10471 $i_start++; 10472 } 10473 unless ( $tokens_to_go[$i_start] eq $block_type ) { 10474 return 0; 10475 } 10476 } 10477 10478 else { 10479 return 1; 10480 } 10481 10482 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; 10483 10484 my $i; 10485 10486 # see if length is too long to even start 10487 if ( $pos > maximum_line_length($i_start) ) { 10488 return 1; 10489 } 10490 10491 for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) { 10492 10493 # old whitespace could be arbitrarily large, so don't use it 10494 if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 } 10495 else { $pos += rtoken_length($i) } 10496 10497 # Return false result if we exceed the maximum line length, 10498 if ( $pos > maximum_line_length($i_start) ) { 10499 return 0; 10500 } 10501 10502 # or encounter another opening brace before finding the closing brace. 10503 elsif ($$rtokens[$i] eq '{' 10504 && $$rtoken_type[$i] eq '{' 10505 && $$rblock_type[$i] ) 10506 { 10507 return 0; 10508 } 10509 10510 # if we find our closing brace.. 10511 elsif ($$rtokens[$i] eq '}' 10512 && $$rtoken_type[$i] eq '}' 10513 && $$rblock_type[$i] ) 10514 { 10515 10516 # be sure any trailing comment also fits on the line 10517 my $i_nonblank = 10518 ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1; 10519 10520 # Patch for one-line sort/map/grep/eval blocks with side comments: 10521 # We will ignore the side comment length for sort/map/grep/eval 10522 # because this can lead to statements which change every time 10523 # perltidy is run. Here is an example from Denis Moskowitz which 10524 # oscillates between these two states without this patch: 10525 10526## -------- 10527## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf 10528## @baz; 10529## 10530## grep { 10531## $_->foo ne 'bar' 10532## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf 10533## @baz; 10534## -------- 10535 10536 # When the first line is input it gets broken apart by the main 10537 # line break logic in sub print_line_of_tokens. 10538 # When the second line is input it gets recombined by 10539 # print_line_of_tokens and passed to the output routines. The 10540 # output routines (set_continuation_breaks) do not break it apart 10541 # because the bond strengths are set to the highest possible value 10542 # for grep/map/eval/sort blocks, so the first version gets output. 10543 # It would be possible to fix this by changing bond strengths, 10544 # but they are high to prevent errors in older versions of perl. 10545 10546 if ( $$rtoken_type[$i_nonblank] eq '#' 10547 && !$is_sort_map_grep{$block_type} ) 10548 { 10549 10550 $pos += rtoken_length($i_nonblank); 10551 10552 if ( $i_nonblank > $i + 1 ) { 10553 10554 # source whitespace could be anything, assume 10555 # at least one space before the hash on output 10556 if ( $$rtoken_type[ $i + 1 ] eq 'b' ) { $pos += 1 } 10557 else { $pos += rtoken_length( $i + 1 ) } 10558 } 10559 10560 if ( $pos >= maximum_line_length($i_start) ) { 10561 return 0; 10562 } 10563 } 10564 10565 # ok, it's a one-line block 10566 create_one_line_block( $i_start, 20 ); 10567 return 0; 10568 } 10569 10570 # just keep going for other characters 10571 else { 10572 } 10573 } 10574 10575 # Allow certain types of new one-line blocks to form by joining 10576 # input lines. These can be safely done, but for other block types, 10577 # we keep old one-line blocks but do not form new ones. It is not 10578 # always a good idea to make as many one-line blocks as possible, 10579 # so other types are not done. The user can always use -mangle. 10580 if ( $is_sort_map_grep_eval{$block_type} ) { 10581 create_one_line_block( $i_start, 1 ); 10582 } 10583 10584 return 0; 10585} 10586 10587sub unstore_token_to_go { 10588 10589 # remove most recent token from output stream 10590 if ( $max_index_to_go > 0 ) { 10591 $max_index_to_go--; 10592 } 10593 else { 10594 $max_index_to_go = UNDEFINED_INDEX; 10595 } 10596 10597} 10598 10599sub want_blank_line { 10600 flush(); 10601 $file_writer_object->want_blank_line(); 10602} 10603 10604sub write_unindented_line { 10605 flush(); 10606 $file_writer_object->write_line( $_[0] ); 10607} 10608 10609sub undo_ci { 10610 10611 # Undo continuation indentation in certain sequences 10612 # For example, we can undo continuation indation in sort/map/grep chains 10613 # my $dat1 = pack( "n*", 10614 # map { $_, $lookup->{$_} } 10615 # sort { $a <=> $b } 10616 # grep { $lookup->{$_} ne $default } keys %$lookup ); 10617 # To align the map/sort/grep keywords like this: 10618 # my $dat1 = pack( "n*", 10619 # map { $_, $lookup->{$_} } 10620 # sort { $a <=> $b } 10621 # grep { $lookup->{$_} ne $default } keys %$lookup ); 10622 my ( $ri_first, $ri_last ) = @_; 10623 my ( $line_1, $line_2, $lev_last ); 10624 my $this_line_is_semicolon_terminated; 10625 my $max_line = @$ri_first - 1; 10626 10627 # looking at each line of this batch.. 10628 # We are looking at leading tokens and looking for a sequence 10629 # all at the same level and higher level than enclosing lines. 10630 foreach my $line ( 0 .. $max_line ) { 10631 10632 my $ibeg = $$ri_first[$line]; 10633 my $lev = $levels_to_go[$ibeg]; 10634 if ( $line > 0 ) { 10635 10636 # if we have started a chain.. 10637 if ($line_1) { 10638 10639 # see if it continues.. 10640 if ( $lev == $lev_last ) { 10641 if ( $types_to_go[$ibeg] eq 'k' 10642 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) 10643 { 10644 10645 # chain continues... 10646 # check for chain ending at end of a a statement 10647 if ( $line == $max_line ) { 10648 10649 # see of this line ends a statement 10650 my $iend = $$ri_last[$line]; 10651 $this_line_is_semicolon_terminated = 10652 $types_to_go[$iend] eq ';' 10653 10654 # with possible side comment 10655 || ( $types_to_go[$iend] eq '#' 10656 && $iend - $ibeg >= 2 10657 && $types_to_go[ $iend - 2 ] eq ';' 10658 && $types_to_go[ $iend - 1 ] eq 'b' ); 10659 } 10660 $line_2 = $line if ($this_line_is_semicolon_terminated); 10661 } 10662 else { 10663 10664 # kill chain 10665 $line_1 = undef; 10666 } 10667 } 10668 elsif ( $lev < $lev_last ) { 10669 10670 # chain ends with previous line 10671 $line_2 = $line - 1; 10672 } 10673 elsif ( $lev > $lev_last ) { 10674 10675 # kill chain 10676 $line_1 = undef; 10677 } 10678 10679 # undo the continuation indentation if a chain ends 10680 if ( defined($line_2) && defined($line_1) ) { 10681 my $continuation_line_count = $line_2 - $line_1 + 1; 10682 @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] = 10683 (0) x ($continuation_line_count); 10684 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] = 10685 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ]; 10686 $line_1 = undef; 10687 } 10688 } 10689 10690 # not in a chain yet.. 10691 else { 10692 10693 # look for start of a new sort/map/grep chain 10694 if ( $lev > $lev_last ) { 10695 if ( $types_to_go[$ibeg] eq 'k' 10696 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) 10697 { 10698 $line_1 = $line; 10699 } 10700 } 10701 } 10702 } 10703 $lev_last = $lev; 10704 } 10705} 10706 10707sub undo_lp_ci { 10708 10709 # If there is a single, long parameter within parens, like this: 10710 # 10711 # $self->command( "/msg " 10712 # . $infoline->chan 10713 # . " You said $1, but did you know that it's square was " 10714 # . $1 * $1 . " ?" ); 10715 # 10716 # we can remove the continuation indentation of the 2nd and higher lines 10717 # to achieve this effect, which is more pleasing: 10718 # 10719 # $self->command("/msg " 10720 # . $infoline->chan 10721 # . " You said $1, but did you know that it's square was " 10722 # . $1 * $1 . " ?"); 10723 10724 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_; 10725 my $max_line = @$ri_first - 1; 10726 10727 # must be multiple lines 10728 return unless $max_line > $line_open; 10729 10730 my $lev_start = $levels_to_go[$i_start]; 10731 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; 10732 10733 # see if all additional lines in this container have continuation 10734 # indentation 10735 my $n; 10736 my $line_1 = 1 + $line_open; 10737 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) { 10738 my $ibeg = $$ri_first[$n]; 10739 my $iend = $$ri_last[$n]; 10740 if ( $ibeg eq $closing_index ) { $n--; last } 10741 return if ( $lev_start != $levels_to_go[$ibeg] ); 10742 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); 10743 last if ( $closing_index <= $iend ); 10744 } 10745 10746 # we can reduce the indentation of all continuation lines 10747 my $continuation_line_count = $n - $line_open; 10748 @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] = 10749 (0) x ($continuation_line_count); 10750 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] = 10751 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ]; 10752} 10753 10754sub pad_token { 10755 10756 # insert $pad_spaces before token number $ipad 10757 my ( $ipad, $pad_spaces ) = @_; 10758 if ( $pad_spaces > 0 ) { 10759 $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad]; 10760 } 10761 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) { 10762 $tokens_to_go[$ipad] = ""; 10763 } 10764 else { 10765 10766 # shouldn't happen 10767 return; 10768 } 10769 10770 $token_lengths_to_go[$ipad] += $pad_spaces; 10771 for ( my $i = $ipad ; $i <= $max_index_to_go ; $i++ ) { 10772 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces; 10773 } 10774} 10775 10776{ 10777 my %is_math_op; 10778 10779 BEGIN { 10780 10781 @_ = qw( + - * / ); 10782 @is_math_op{@_} = (1) x scalar(@_); 10783 } 10784 10785 sub set_logical_padding { 10786 10787 # Look at a batch of lines and see if extra padding can improve the 10788 # alignment when there are certain leading operators. Here is an 10789 # example, in which some extra space is introduced before 10790 # '( $year' to make it line up with the subsequent lines: 10791 # 10792 # if ( ( $Year < 1601 ) 10793 # || ( $Year > 2899 ) 10794 # || ( $EndYear < 1601 ) 10795 # || ( $EndYear > 2899 ) ) 10796 # { 10797 # &Error_OutOfRange; 10798 # } 10799 # 10800 my ( $ri_first, $ri_last ) = @_; 10801 my $max_line = @$ri_first - 1; 10802 10803 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, 10804 $pad_spaces, 10805 $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); 10806 10807 # looking at each line of this batch.. 10808 foreach $line ( 0 .. $max_line - 1 ) { 10809 10810 # see if the next line begins with a logical operator 10811 $ibeg = $$ri_first[$line]; 10812 $iend = $$ri_last[$line]; 10813 $ibeg_next = $$ri_first[ $line + 1 ]; 10814 $tok_next = $tokens_to_go[$ibeg_next]; 10815 $type_next = $types_to_go[$ibeg_next]; 10816 10817 $has_leading_op_next = ( $tok_next =~ /^\w/ ) 10818 ? $is_chain_operator{$tok_next} # + - * / : ? && || 10819 : $is_chain_operator{$type_next}; # and, or 10820 10821 next unless ($has_leading_op_next); 10822 10823 # next line must not be at lesser depth 10824 next 10825 if ( $nesting_depth_to_go[$ibeg] > 10826 $nesting_depth_to_go[$ibeg_next] ); 10827 10828 # identify the token in this line to be padded on the left 10829 $ipad = undef; 10830 10831 # handle lines at same depth... 10832 if ( $nesting_depth_to_go[$ibeg] == 10833 $nesting_depth_to_go[$ibeg_next] ) 10834 { 10835 10836 # if this is not first line of the batch ... 10837 if ( $line > 0 ) { 10838 10839 # and we have leading operator.. 10840 next if $has_leading_op; 10841 10842 # Introduce padding if.. 10843 # 1. the previous line is at lesser depth, or 10844 # 2. the previous line ends in an assignment 10845 # 3. the previous line ends in a 'return' 10846 # 4. the previous line ends in a comma 10847 # Example 1: previous line at lesser depth 10848 # if ( ( $Year < 1601 ) # <- we are here but 10849 # || ( $Year > 2899 ) # list has not yet 10850 # || ( $EndYear < 1601 ) # collapsed vertically 10851 # || ( $EndYear > 2899 ) ) 10852 # { 10853 # 10854 # Example 2: previous line ending in assignment: 10855 # $leapyear = 10856 # $year % 4 ? 0 # <- We are here 10857 # : $year % 100 ? 1 10858 # : $year % 400 ? 0 10859 # : 1; 10860 # 10861 # Example 3: previous line ending in comma: 10862 # push @expr, 10863 # /test/ ? undef 10864 # : eval($_) ? 1 10865 # : eval($_) ? 1 10866 # : 0; 10867 10868 # be sure levels agree (do not indent after an indented 'if') 10869 next 10870 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); 10871 10872 # allow padding on first line after a comma but only if: 10873 # (1) this is line 2 and 10874 # (2) there are at more than three lines and 10875 # (3) lines 3 and 4 have the same leading operator 10876 # These rules try to prevent padding within a long 10877 # comma-separated list. 10878 my $ok_comma; 10879 if ( $types_to_go[$iendm] eq ',' 10880 && $line == 1 10881 && $max_line > 2 ) 10882 { 10883 my $ibeg_next_next = $$ri_first[ $line + 2 ]; 10884 my $tok_next_next = $tokens_to_go[$ibeg_next_next]; 10885 $ok_comma = $tok_next_next eq $tok_next; 10886 } 10887 10888 next 10889 unless ( 10890 $is_assignment{ $types_to_go[$iendm] } 10891 || $ok_comma 10892 || ( $nesting_depth_to_go[$ibegm] < 10893 $nesting_depth_to_go[$ibeg] ) 10894 || ( $types_to_go[$iendm] eq 'k' 10895 && $tokens_to_go[$iendm] eq 'return' ) 10896 ); 10897 10898 # we will add padding before the first token 10899 $ipad = $ibeg; 10900 } 10901 10902 # for first line of the batch.. 10903 else { 10904 10905 # WARNING: Never indent if first line is starting in a 10906 # continued quote, which would change the quote. 10907 next if $starting_in_quote; 10908 10909 # if this is text after closing '}' 10910 # then look for an interior token to pad 10911 if ( $types_to_go[$ibeg] eq '}' ) { 10912 10913 } 10914 10915 # otherwise, we might pad if it looks really good 10916 else { 10917 10918 # we might pad token $ibeg, so be sure that it 10919 # is at the same depth as the next line. 10920 next 10921 if ( $nesting_depth_to_go[$ibeg] != 10922 $nesting_depth_to_go[$ibeg_next] ); 10923 10924 # We can pad on line 1 of a statement if at least 3 10925 # lines will be aligned. Otherwise, it 10926 # can look very confusing. 10927 10928 # We have to be careful not to pad if there are too few 10929 # lines. The current rule is: 10930 # (1) in general we require at least 3 consecutive lines 10931 # with the same leading chain operator token, 10932 # (2) but an exception is that we only require two lines 10933 # with leading colons if there are no more lines. For example, 10934 # the first $i in the following snippet would get padding 10935 # by the second rule: 10936 # 10937 # $i == 1 ? ( "First", "Color" ) 10938 # : $i == 2 ? ( "Then", "Rarity" ) 10939 # : ( "Then", "Name" ); 10940 10941 if ( $max_line > 1 ) { 10942 my $leading_token = $tokens_to_go[$ibeg_next]; 10943 my $tokens_differ; 10944 10945 # never indent line 1 of a '.' series because 10946 # previous line is most likely at same level. 10947 # TODO: we should also look at the leasing_spaces 10948 # of the last output line and skip if it is same 10949 # as this line. 10950 next if ( $leading_token eq '.' ); 10951 10952 my $count = 1; 10953 foreach my $l ( 2 .. 3 ) { 10954 last if ( $line + $l > $max_line ); 10955 my $ibeg_next_next = $$ri_first[ $line + $l ]; 10956 if ( $tokens_to_go[$ibeg_next_next] ne 10957 $leading_token ) 10958 { 10959 $tokens_differ = 1; 10960 last; 10961 } 10962 $count++; 10963 } 10964 next if ($tokens_differ); 10965 next if ( $count < 3 && $leading_token ne ':' ); 10966 $ipad = $ibeg; 10967 } 10968 else { 10969 next; 10970 } 10971 } 10972 } 10973 } 10974 10975 # find interior token to pad if necessary 10976 if ( !defined($ipad) ) { 10977 10978 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) { 10979 10980 # find any unclosed container 10981 next 10982 unless ( $type_sequence_to_go[$i] 10983 && $mate_index_to_go[$i] > $iend ); 10984 10985 # find next nonblank token to pad 10986 $ipad = $inext_to_go[$i]; 10987 last if ( $ipad > $iend ); 10988 } 10989 last unless $ipad; 10990 } 10991 10992 # We cannot pad a leading token at the lowest level because 10993 # it could cause a bug in which the starting indentation 10994 # level is guessed incorrectly each time the code is run 10995 # though perltidy, thus causing the code to march off to 10996 # the right. For example, the following snippet would have 10997 # this problem: 10998 10999## ov_method mycan( $package, '(""' ), $package 11000## or ov_method mycan( $package, '(0+' ), $package 11001## or ov_method mycan( $package, '(bool' ), $package 11002## or ov_method mycan( $package, '(nomethod' ), $package; 11003 11004 # If this snippet is within a block this won't happen 11005 # unless the user just processes the snippet alone within 11006 # an editor. In that case either the user will see and 11007 # fix the problem or it will be corrected next time the 11008 # entire file is processed with perltidy. 11009 next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 ); 11010 11011## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT 11012## IT DID MORE HARM THAN GOOD 11013## ceil( 11014## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000 11015## / $upem 11016## ), 11017##? # do not put leading padding for just 2 lines of math 11018##? if ( $ipad == $ibeg 11019##? && $line > 0 11020##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ] 11021##? && $is_math_op{$type_next} 11022##? && $line + 2 <= $max_line ) 11023##? { 11024##? my $ibeg_next_next = $$ri_first[ $line + 2 ]; 11025##? my $type_next_next = $types_to_go[$ibeg_next_next]; 11026##? next if !$is_math_op{$type_next_next}; 11027##? } 11028 11029 # next line must not be at greater depth 11030 my $iend_next = $$ri_last[ $line + 1 ]; 11031 next 11032 if ( $nesting_depth_to_go[ $iend_next + 1 ] > 11033 $nesting_depth_to_go[$ipad] ); 11034 11035 # lines must be somewhat similar to be padded.. 11036 my $inext_next = $inext_to_go[$ibeg_next]; 11037 my $type = $types_to_go[$ipad]; 11038 my $type_next = $types_to_go[ $ipad + 1 ]; 11039 11040 # see if there are multiple continuation lines 11041 my $logical_continuation_lines = 1; 11042 if ( $line + 2 <= $max_line ) { 11043 my $leading_token = $tokens_to_go[$ibeg_next]; 11044 my $ibeg_next_next = $$ri_first[ $line + 2 ]; 11045 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token 11046 && $nesting_depth_to_go[$ibeg_next] eq 11047 $nesting_depth_to_go[$ibeg_next_next] ) 11048 { 11049 $logical_continuation_lines++; 11050 } 11051 } 11052 11053 # see if leading types match 11054 my $types_match = $types_to_go[$inext_next] eq $type; 11055 my $matches_without_bang; 11056 11057 # if first line has leading ! then compare the following token 11058 if ( !$types_match && $type eq '!' ) { 11059 $types_match = $matches_without_bang = 11060 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; 11061 } 11062 11063 if ( 11064 11065 # either we have multiple continuation lines to follow 11066 # and we are not padding the first token 11067 ( $logical_continuation_lines > 1 && $ipad > 0 ) 11068 11069 # or.. 11070 || ( 11071 11072 # types must match 11073 $types_match 11074 11075 # and keywords must match if keyword 11076 && !( 11077 $type eq 'k' 11078 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] 11079 ) 11080 ) 11081 ) 11082 { 11083 11084 #----------------------begin special checks-------------- 11085 # 11086 # SPECIAL CHECK 1: 11087 # A check is needed before we can make the pad. 11088 # If we are in a list with some long items, we want each 11089 # item to stand out. So in the following example, the 11090 # first line beginning with '$casefold->' would look good 11091 # padded to align with the next line, but then it 11092 # would be indented more than the last line, so we 11093 # won't do it. 11094 # 11095 # ok( 11096 # $casefold->{code} eq '0041' 11097 # && $casefold->{status} eq 'C' 11098 # && $casefold->{mapping} eq '0061', 11099 # 'casefold 0x41' 11100 # ); 11101 # 11102 # Note: 11103 # It would be faster, and almost as good, to use a comma 11104 # count, and not pad if comma_count > 1 and the previous 11105 # line did not end with a comma. 11106 # 11107 my $ok_to_pad = 1; 11108 11109 my $ibg = $$ri_first[ $line + 1 ]; 11110 my $depth = $nesting_depth_to_go[ $ibg + 1 ]; 11111 11112 # just use simplified formula for leading spaces to avoid 11113 # needless sub calls 11114 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; 11115 11116 # look at each line beyond the next .. 11117 my $l = $line + 1; 11118 foreach $l ( $line + 2 .. $max_line ) { 11119 my $ibg = $$ri_first[$l]; 11120 11121 # quit looking at the end of this container 11122 last 11123 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth ) 11124 || ( $nesting_depth_to_go[$ibg] < $depth ); 11125 11126 # cannot do the pad if a later line would be 11127 # outdented more 11128 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) { 11129 $ok_to_pad = 0; 11130 last; 11131 } 11132 } 11133 11134 # don't pad if we end in a broken list 11135 if ( $l == $max_line ) { 11136 my $i2 = $$ri_last[$l]; 11137 if ( $types_to_go[$i2] eq '#' ) { 11138 my $i1 = $$ri_first[$l]; 11139 next 11140 if ( 11141 terminal_type( \@types_to_go, \@block_type_to_go, 11142 $i1, $i2 ) eq ',' 11143 ); 11144 } 11145 } 11146 11147 # SPECIAL CHECK 2: 11148 # a minus may introduce a quoted variable, and we will 11149 # add the pad only if this line begins with a bare word, 11150 # such as for the word 'Button' here: 11151 # [ 11152 # Button => "Print letter \"~$_\"", 11153 # -command => [ sub { print "$_[0]\n" }, $_ ], 11154 # -accelerator => "Meta+$_" 11155 # ]; 11156 # 11157 # On the other hand, if 'Button' is quoted, it looks best 11158 # not to pad: 11159 # [ 11160 # 'Button' => "Print letter \"~$_\"", 11161 # -command => [ sub { print "$_[0]\n" }, $_ ], 11162 # -accelerator => "Meta+$_" 11163 # ]; 11164 if ( $types_to_go[$ibeg_next] eq 'm' ) { 11165 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q'; 11166 } 11167 11168 next unless $ok_to_pad; 11169 11170 #----------------------end special check--------------- 11171 11172 my $length_1 = total_line_length( $ibeg, $ipad - 1 ); 11173 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); 11174 $pad_spaces = $length_2 - $length_1; 11175 11176 # If the first line has a leading ! and the second does 11177 # not, then remove one space to try to align the next 11178 # leading characters, which are often the same. For example: 11179 # if ( !$ts 11180 # || $ts == $self->Holder 11181 # || $self->Holder->Type eq "Arena" ) 11182 # 11183 # This usually helps readability, but if there are subsequent 11184 # ! operators things will still get messed up. For example: 11185 # 11186 # if ( !exists $Net::DNS::typesbyname{$qtype} 11187 # && exists $Net::DNS::classesbyname{$qtype} 11188 # && !exists $Net::DNS::classesbyname{$qclass} 11189 # && exists $Net::DNS::typesbyname{$qclass} ) 11190 # We can't fix that. 11191 if ($matches_without_bang) { $pad_spaces-- } 11192 11193 # make sure this won't change if -lp is used 11194 my $indentation_1 = $leading_spaces_to_go[$ibeg]; 11195 if ( ref($indentation_1) ) { 11196 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) { 11197 my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; 11198 unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) 11199 { 11200 $pad_spaces = 0; 11201 } 11202 } 11203 } 11204 11205 # we might be able to handle a pad of -1 by removing a blank 11206 # token 11207 if ( $pad_spaces < 0 ) { 11208 11209 if ( $pad_spaces == -1 ) { 11210 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) 11211 { 11212 pad_token( $ipad - 1, $pad_spaces ); 11213 } 11214 } 11215 $pad_spaces = 0; 11216 } 11217 11218 # now apply any padding for alignment 11219 if ( $ipad >= 0 && $pad_spaces ) { 11220 11221 my $length_t = total_line_length( $ibeg, $iend ); 11222 if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) ) 11223 { 11224 pad_token( $ipad, $pad_spaces ); 11225 } 11226 } 11227 } 11228 } 11229 continue { 11230 $iendm = $iend; 11231 $ibegm = $ibeg; 11232 $has_leading_op = $has_leading_op_next; 11233 } # end of loop over lines 11234 return; 11235 } 11236} 11237 11238sub correct_lp_indentation { 11239 11240 # When the -lp option is used, we need to make a last pass through 11241 # each line to correct the indentation positions in case they differ 11242 # from the predictions. This is necessary because perltidy uses a 11243 # predictor/corrector method for aligning with opening parens. The 11244 # predictor is usually good, but sometimes stumbles. The corrector 11245 # tries to patch things up once the actual opening paren locations 11246 # are known. 11247 my ( $ri_first, $ri_last ) = @_; 11248 my $do_not_pad = 0; 11249 11250 # Note on flag '$do_not_pad': 11251 # We want to avoid a situation like this, where the aligner inserts 11252 # whitespace before the '=' to align it with a previous '=', because 11253 # otherwise the parens might become mis-aligned in a situation like 11254 # this, where the '=' has become aligned with the previous line, 11255 # pushing the opening '(' forward beyond where we want it. 11256 # 11257 # $mkFloor::currentRoom = ''; 11258 # $mkFloor::c_entry = $c->Entry( 11259 # -width => '10', 11260 # -relief => 'sunken', 11261 # ... 11262 # ); 11263 # 11264 # We leave it to the aligner to decide how to do this. 11265 11266 # first remove continuation indentation if appropriate 11267 my $max_line = @$ri_first - 1; 11268 11269 # looking at each line of this batch.. 11270 my ( $ibeg, $iend ); 11271 my $line; 11272 foreach $line ( 0 .. $max_line ) { 11273 $ibeg = $$ri_first[$line]; 11274 $iend = $$ri_last[$line]; 11275 11276 # looking at each token in this output line.. 11277 my $i; 11278 foreach $i ( $ibeg .. $iend ) { 11279 11280 # How many space characters to place before this token 11281 # for special alignment. Actual padding is done in the 11282 # continue block. 11283 11284 # looking for next unvisited indentation item 11285 my $indentation = $leading_spaces_to_go[$i]; 11286 if ( !$indentation->get_MARKED() ) { 11287 $indentation->set_MARKED(1); 11288 11289 # looking for indentation item for which we are aligning 11290 # with parens, braces, and brackets 11291 next unless ( $indentation->get_ALIGN_PAREN() ); 11292 11293 # skip closed container on this line 11294 if ( $i > $ibeg ) { 11295 my $im = max( $ibeg, $iprev_to_go[$i] ); 11296 if ( $type_sequence_to_go[$im] 11297 && $mate_index_to_go[$im] <= $iend ) 11298 { 11299 next; 11300 } 11301 } 11302 11303 if ( $line == 1 && $i == $ibeg ) { 11304 $do_not_pad = 1; 11305 } 11306 11307 # Ok, let's see what the error is and try to fix it 11308 my $actual_pos; 11309 my $predicted_pos = $indentation->get_SPACES(); 11310 if ( $i > $ibeg ) { 11311 11312 # token is mid-line - use length to previous token 11313 $actual_pos = total_line_length( $ibeg, $i - 1 ); 11314 11315 # for mid-line token, we must check to see if all 11316 # additional lines have continuation indentation, 11317 # and remove it if so. Otherwise, we do not get 11318 # good alignment. 11319 my $closing_index = $indentation->get_CLOSED(); 11320 if ( $closing_index > $iend ) { 11321 my $ibeg_next = $$ri_first[ $line + 1 ]; 11322 if ( $ci_levels_to_go[$ibeg_next] > 0 ) { 11323 undo_lp_ci( $line, $i, $closing_index, $ri_first, 11324 $ri_last ); 11325 } 11326 } 11327 } 11328 elsif ( $line > 0 ) { 11329 11330 # handle case where token starts a new line; 11331 # use length of previous line 11332 my $ibegm = $$ri_first[ $line - 1 ]; 11333 my $iendm = $$ri_last[ $line - 1 ]; 11334 $actual_pos = total_line_length( $ibegm, $iendm ); 11335 11336 # follow -pt style 11337 ++$actual_pos 11338 if ( $types_to_go[ $iendm + 1 ] eq 'b' ); 11339 } 11340 else { 11341 11342 # token is first character of first line of batch 11343 $actual_pos = $predicted_pos; 11344 } 11345 11346 my $move_right = $actual_pos - $predicted_pos; 11347 11348 # done if no error to correct (gnu2.t) 11349 if ( $move_right == 0 ) { 11350 $indentation->set_RECOVERABLE_SPACES($move_right); 11351 next; 11352 } 11353 11354 # if we have not seen closure for this indentation in 11355 # this batch, we can only pass on a request to the 11356 # vertical aligner 11357 my $closing_index = $indentation->get_CLOSED(); 11358 11359 if ( $closing_index < 0 ) { 11360 $indentation->set_RECOVERABLE_SPACES($move_right); 11361 next; 11362 } 11363 11364 # If necessary, look ahead to see if there is really any 11365 # leading whitespace dependent on this whitespace, and 11366 # also find the longest line using this whitespace. 11367 # Since it is always safe to move left if there are no 11368 # dependents, we only need to do this if we may have 11369 # dependent nodes or need to move right. 11370 11371 my $right_margin = 0; 11372 my $have_child = $indentation->get_HAVE_CHILD(); 11373 11374 my %saw_indentation; 11375 my $line_count = 1; 11376 $saw_indentation{$indentation} = $indentation; 11377 11378 if ( $have_child || $move_right > 0 ) { 11379 $have_child = 0; 11380 my $max_length = 0; 11381 if ( $i == $ibeg ) { 11382 $max_length = total_line_length( $ibeg, $iend ); 11383 } 11384 11385 # look ahead at the rest of the lines of this batch.. 11386 my $line_t; 11387 foreach $line_t ( $line + 1 .. $max_line ) { 11388 my $ibeg_t = $$ri_first[$line_t]; 11389 my $iend_t = $$ri_last[$line_t]; 11390 last if ( $closing_index <= $ibeg_t ); 11391 11392 # remember all different indentation objects 11393 my $indentation_t = $leading_spaces_to_go[$ibeg_t]; 11394 $saw_indentation{$indentation_t} = $indentation_t; 11395 $line_count++; 11396 11397 # remember longest line in the group 11398 my $length_t = total_line_length( $ibeg_t, $iend_t ); 11399 if ( $length_t > $max_length ) { 11400 $max_length = $length_t; 11401 } 11402 } 11403 $right_margin = maximum_line_length($ibeg) - $max_length; 11404 if ( $right_margin < 0 ) { $right_margin = 0 } 11405 } 11406 11407 my $first_line_comma_count = 11408 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; 11409 my $comma_count = $indentation->get_COMMA_COUNT(); 11410 my $arrow_count = $indentation->get_ARROW_COUNT(); 11411 11412 # This is a simple approximate test for vertical alignment: 11413 # if we broke just after an opening paren, brace, bracket, 11414 # and there are 2 or more commas in the first line, 11415 # and there are no '=>'s, 11416 # then we are probably vertically aligned. We could set 11417 # an exact flag in sub scan_list, but this is good 11418 # enough. 11419 my $indentation_count = keys %saw_indentation; 11420 my $is_vertically_aligned = 11421 ( $i == $ibeg 11422 && $first_line_comma_count > 1 11423 && $indentation_count == 1 11424 && ( $arrow_count == 0 || $arrow_count == $line_count ) ); 11425 11426 # Make the move if possible .. 11427 if ( 11428 11429 # we can always move left 11430 $move_right < 0 11431 11432 # but we should only move right if we are sure it will 11433 # not spoil vertical alignment 11434 || ( $comma_count == 0 ) 11435 || ( $comma_count > 0 && !$is_vertically_aligned ) 11436 ) 11437 { 11438 my $move = 11439 ( $move_right <= $right_margin ) 11440 ? $move_right 11441 : $right_margin; 11442 11443 foreach ( keys %saw_indentation ) { 11444 $saw_indentation{$_} 11445 ->permanently_decrease_AVAILABLE_SPACES( -$move ); 11446 } 11447 } 11448 11449 # Otherwise, record what we want and the vertical aligner 11450 # will try to recover it. 11451 else { 11452 $indentation->set_RECOVERABLE_SPACES($move_right); 11453 } 11454 } 11455 } 11456 } 11457 return $do_not_pad; 11458} 11459 11460# flush is called to output any tokens in the pipeline, so that 11461# an alternate source of lines can be written in the correct order 11462 11463sub flush { 11464 destroy_one_line_block(); 11465 output_line_to_go(); 11466 Perl::Tidy::VerticalAligner::flush(); 11467} 11468 11469sub reset_block_text_accumulator { 11470 11471 # save text after 'if' and 'elsif' to append after 'else' 11472 if ($accumulating_text_for_block) { 11473 11474 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) { 11475 push @{$rleading_block_if_elsif_text}, $leading_block_text; 11476 } 11477 } 11478 $accumulating_text_for_block = ""; 11479 $leading_block_text = ""; 11480 $leading_block_text_level = 0; 11481 $leading_block_text_length_exceeded = 0; 11482 $leading_block_text_line_number = 0; 11483 $leading_block_text_line_length = 0; 11484} 11485 11486sub set_block_text_accumulator { 11487 my $i = shift; 11488 $accumulating_text_for_block = $tokens_to_go[$i]; 11489 if ( $accumulating_text_for_block !~ /^els/ ) { 11490 $rleading_block_if_elsif_text = []; 11491 } 11492 $leading_block_text = ""; 11493 $leading_block_text_level = $levels_to_go[$i]; 11494 $leading_block_text_line_number = 11495 $vertical_aligner_object->get_output_line_number(); 11496 $leading_block_text_length_exceeded = 0; 11497 11498 # this will contain the column number of the last character 11499 # of the closing side comment 11500 $leading_block_text_line_length = 11501 length($csc_last_label) + 11502 length($accumulating_text_for_block) + 11503 length( $rOpts->{'closing-side-comment-prefix'} ) + 11504 $leading_block_text_level * $rOpts_indent_columns + 3; 11505} 11506 11507sub accumulate_block_text { 11508 my $i = shift; 11509 11510 # accumulate leading text for -csc, ignoring any side comments 11511 if ( $accumulating_text_for_block 11512 && !$leading_block_text_length_exceeded 11513 && $types_to_go[$i] ne '#' ) 11514 { 11515 11516 my $added_length = $token_lengths_to_go[$i]; 11517 $added_length += 1 if $i == 0; 11518 my $new_line_length = $leading_block_text_line_length + $added_length; 11519 11520 # we can add this text if we don't exceed some limits.. 11521 if ( 11522 11523 # we must not have already exceeded the text length limit 11524 length($leading_block_text) < 11525 $rOpts_closing_side_comment_maximum_text 11526 11527 # and either: 11528 # the new total line length must be below the line length limit 11529 # or the new length must be below the text length limit 11530 # (ie, we may allow one token to exceed the text length limit) 11531 && ( 11532 $new_line_length < 11533 maximum_line_length_for_level($leading_block_text_level) 11534 11535 || length($leading_block_text) + $added_length < 11536 $rOpts_closing_side_comment_maximum_text 11537 ) 11538 11539 # UNLESS: we are adding a closing paren before the brace we seek. 11540 # This is an attempt to avoid situations where the ... to be 11541 # added are longer than the omitted right paren, as in: 11542 11543 # foreach my $item (@a_rather_long_variable_name_here) { 11544 # &whatever; 11545 # } ## end foreach my $item (@a_rather_long_variable_name_here... 11546 11547 || ( 11548 $tokens_to_go[$i] eq ')' 11549 && ( 11550 ( 11551 $i + 1 <= $max_index_to_go 11552 && $block_type_to_go[ $i + 1 ] eq 11553 $accumulating_text_for_block 11554 ) 11555 || ( $i + 2 <= $max_index_to_go 11556 && $block_type_to_go[ $i + 2 ] eq 11557 $accumulating_text_for_block ) 11558 ) 11559 ) 11560 ) 11561 { 11562 11563 # add an extra space at each newline 11564 if ( $i == 0 ) { $leading_block_text .= ' ' } 11565 11566 # add the token text 11567 $leading_block_text .= $tokens_to_go[$i]; 11568 $leading_block_text_line_length = $new_line_length; 11569 } 11570 11571 # show that text was truncated if necessary 11572 elsif ( $types_to_go[$i] ne 'b' ) { 11573 $leading_block_text_length_exceeded = 1; 11574## Please see file perltidy.ERR 11575 $leading_block_text .= '...'; 11576 } 11577 } 11578} 11579 11580{ 11581 my %is_if_elsif_else_unless_while_until_for_foreach; 11582 11583 BEGIN { 11584 11585 # These block types may have text between the keyword and opening 11586 # curly. Note: 'else' does not, but must be included to allow trailing 11587 # if/elsif text to be appended. 11588 # patch for SWITCH/CASE: added 'case' and 'when' 11589 @_ = qw(if elsif else unless while until for foreach case when); 11590 @is_if_elsif_else_unless_while_until_for_foreach{@_} = 11591 (1) x scalar(@_); 11592 } 11593 11594 sub accumulate_csc_text { 11595 11596 # called once per output buffer when -csc is used. Accumulates 11597 # the text placed after certain closing block braces. 11598 # Defines and returns the following for this buffer: 11599 11600 my $block_leading_text = ""; # the leading text of the last '}' 11601 my $rblock_leading_if_elsif_text; 11602 my $i_block_leading_text = 11603 -1; # index of token owning block_leading_text 11604 my $block_line_count = 100; # how many lines the block spans 11605 my $terminal_type = 'b'; # type of last nonblank token 11606 my $i_terminal = 0; # index of last nonblank token 11607 my $terminal_block_type = ""; 11608 11609 # update most recent statement label 11610 $csc_last_label = "" unless ($csc_last_label); 11611 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] } 11612 my $block_label = $csc_last_label; 11613 11614 # Loop over all tokens of this batch 11615 for my $i ( 0 .. $max_index_to_go ) { 11616 my $type = $types_to_go[$i]; 11617 my $block_type = $block_type_to_go[$i]; 11618 my $token = $tokens_to_go[$i]; 11619 11620 # remember last nonblank token type 11621 if ( $type ne '#' && $type ne 'b' ) { 11622 $terminal_type = $type; 11623 $terminal_block_type = $block_type; 11624 $i_terminal = $i; 11625 } 11626 11627 my $type_sequence = $type_sequence_to_go[$i]; 11628 if ( $block_type && $type_sequence ) { 11629 11630 if ( $token eq '}' ) { 11631 11632 # restore any leading text saved when we entered this block 11633 if ( defined( $block_leading_text{$type_sequence} ) ) { 11634 ( $block_leading_text, $rblock_leading_if_elsif_text ) 11635 = @{ $block_leading_text{$type_sequence} }; 11636 $i_block_leading_text = $i; 11637 delete $block_leading_text{$type_sequence}; 11638 $rleading_block_if_elsif_text = 11639 $rblock_leading_if_elsif_text; 11640 } 11641 11642 if ( defined( $csc_block_label{$type_sequence} ) ) { 11643 $block_label = $csc_block_label{$type_sequence}; 11644 delete $csc_block_label{$type_sequence}; 11645 } 11646 11647 # if we run into a '}' then we probably started accumulating 11648 # at something like a trailing 'if' clause..no harm done. 11649 if ( $accumulating_text_for_block 11650 && $levels_to_go[$i] <= $leading_block_text_level ) 11651 { 11652 my $lev = $levels_to_go[$i]; 11653 reset_block_text_accumulator(); 11654 } 11655 11656 if ( defined( $block_opening_line_number{$type_sequence} ) ) 11657 { 11658 my $output_line_number = 11659 $vertical_aligner_object->get_output_line_number(); 11660 $block_line_count = 11661 $output_line_number - 11662 $block_opening_line_number{$type_sequence} + 1; 11663 delete $block_opening_line_number{$type_sequence}; 11664 } 11665 else { 11666 11667 # Error: block opening line undefined for this line.. 11668 # This shouldn't be possible, but it is not a 11669 # significant problem. 11670 } 11671 } 11672 11673 elsif ( $token eq '{' ) { 11674 11675 my $line_number = 11676 $vertical_aligner_object->get_output_line_number(); 11677 $block_opening_line_number{$type_sequence} = $line_number; 11678 11679 # set a label for this block, except for 11680 # a bare block which already has the label 11681 # A label can only be used on the next { 11682 if ( $block_type =~ /:$/ ) { $csc_last_label = "" } 11683 $csc_block_label{$type_sequence} = $csc_last_label; 11684 $csc_last_label = ""; 11685 11686 if ( $accumulating_text_for_block 11687 && $levels_to_go[$i] == $leading_block_text_level ) 11688 { 11689 11690 if ( $accumulating_text_for_block eq $block_type ) { 11691 11692 # save any leading text before we enter this block 11693 $block_leading_text{$type_sequence} = [ 11694 $leading_block_text, 11695 $rleading_block_if_elsif_text 11696 ]; 11697 $block_opening_line_number{$type_sequence} = 11698 $leading_block_text_line_number; 11699 reset_block_text_accumulator(); 11700 } 11701 else { 11702 11703 # shouldn't happen, but not a serious error. 11704 # We were accumulating -csc text for block type 11705 # $accumulating_text_for_block and unexpectedly 11706 # encountered a '{' for block type $block_type. 11707 } 11708 } 11709 } 11710 } 11711 11712 if ( $type eq 'k' 11713 && $csc_new_statement_ok 11714 && $is_if_elsif_else_unless_while_until_for_foreach{$token} 11715 && $token =~ /$closing_side_comment_list_pattern/o ) 11716 { 11717 set_block_text_accumulator($i); 11718 } 11719 else { 11720 11721 # note: ignoring type 'q' because of tricks being played 11722 # with 'q' for hanging side comments 11723 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) { 11724 $csc_new_statement_ok = 11725 ( $block_type || $type eq 'J' || $type eq ';' ); 11726 } 11727 if ( $type eq ';' 11728 && $accumulating_text_for_block 11729 && $levels_to_go[$i] == $leading_block_text_level ) 11730 { 11731 reset_block_text_accumulator(); 11732 } 11733 else { 11734 accumulate_block_text($i); 11735 } 11736 } 11737 } 11738 11739 # Treat an 'else' block specially by adding preceding 'if' and 11740 # 'elsif' text. Otherwise, the 'end else' is not helpful, 11741 # especially for cuddled-else formatting. 11742 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) { 11743 $block_leading_text = 11744 make_else_csc_text( $i_terminal, $terminal_block_type, 11745 $block_leading_text, $rblock_leading_if_elsif_text ); 11746 } 11747 11748 # if this line ends in a label then remember it for the next pass 11749 $csc_last_label = ""; 11750 if ( $terminal_type eq 'J' ) { 11751 $csc_last_label = $tokens_to_go[$i_terminal]; 11752 } 11753 11754 return ( $terminal_type, $i_terminal, $i_block_leading_text, 11755 $block_leading_text, $block_line_count, $block_label ); 11756 } 11757} 11758 11759sub make_else_csc_text { 11760 11761 # create additional -csc text for an 'else' and optionally 'elsif', 11762 # depending on the value of switch 11763 # $rOpts_closing_side_comment_else_flag: 11764 # 11765 # = 0 add 'if' text to trailing else 11766 # = 1 same as 0 plus: 11767 # add 'if' to 'elsif's if can fit in line length 11768 # add last 'elsif' to trailing else if can fit in one line 11769 # = 2 same as 1 but do not check if exceed line length 11770 # 11771 # $rif_elsif_text = a reference to a list of all previous closing 11772 # side comments created for this if block 11773 # 11774 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_; 11775 my $csc_text = $block_leading_text; 11776 11777 if ( $block_type eq 'elsif' 11778 && $rOpts_closing_side_comment_else_flag == 0 ) 11779 { 11780 return $csc_text; 11781 } 11782 11783 my $count = @{$rif_elsif_text}; 11784 return $csc_text unless ($count); 11785 11786 my $if_text = '[ if' . $rif_elsif_text->[0]; 11787 11788 # always show the leading 'if' text on 'else' 11789 if ( $block_type eq 'else' ) { 11790 $csc_text .= $if_text; 11791 } 11792 11793 # see if that's all 11794 if ( $rOpts_closing_side_comment_else_flag == 0 ) { 11795 return $csc_text; 11796 } 11797 11798 my $last_elsif_text = ""; 11799 if ( $count > 1 ) { 11800 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ]; 11801 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; } 11802 } 11803 11804 # tentatively append one more item 11805 my $saved_text = $csc_text; 11806 if ( $block_type eq 'else' ) { 11807 $csc_text .= $last_elsif_text; 11808 } 11809 else { 11810 $csc_text .= ' ' . $if_text; 11811 } 11812 11813 # all done if no length checks requested 11814 if ( $rOpts_closing_side_comment_else_flag == 2 ) { 11815 return $csc_text; 11816 } 11817 11818 # undo it if line length exceeded 11819 my $length = 11820 length($csc_text) + 11821 length($block_type) + 11822 length( $rOpts->{'closing-side-comment-prefix'} ) + 11823 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3; 11824 if ( $length > maximum_line_length_for_level($leading_block_text_level) ) { 11825 $csc_text = $saved_text; 11826 } 11827 return $csc_text; 11828} 11829 11830{ # sub balance_csc_text 11831 11832 my %matching_char; 11833 11834 BEGIN { 11835 %matching_char = ( 11836 '{' => '}', 11837 '(' => ')', 11838 '[' => ']', 11839 '}' => '{', 11840 ')' => '(', 11841 ']' => '[', 11842 ); 11843 } 11844 11845 sub balance_csc_text { 11846 11847 # Append characters to balance a closing side comment so that editors 11848 # such as vim can correctly jump through code. 11849 # Simple Example: 11850 # input = ## end foreach my $foo ( sort { $b ... 11851 # output = ## end foreach my $foo ( sort { $b ...}) 11852 11853 # NOTE: This routine does not currently filter out structures within 11854 # quoted text because the bounce algorithims in text editors do not 11855 # necessarily do this either (a version of vim was checked and 11856 # did not do this). 11857 11858 # Some complex examples which will cause trouble for some editors: 11859 # while ( $mask_string =~ /\{[^{]*?\}/g ) { 11860 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) { 11861 # if ( $1 eq '{' ) { 11862 # test file test1/braces.pl has many such examples. 11863 11864 my ($csc) = @_; 11865 11866 # loop to examine characters one-by-one, RIGHT to LEFT and 11867 # build a balancing ending, LEFT to RIGHT. 11868 for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) { 11869 11870 my $char = substr( $csc, $pos, 1 ); 11871 11872 # ignore everything except structural characters 11873 next unless ( $matching_char{$char} ); 11874 11875 # pop most recently appended character 11876 my $top = chop($csc); 11877 11878 # push it back plus the mate to the newest character 11879 # unless they balance each other. 11880 $csc = $csc . $top . $matching_char{$char} unless $top eq $char; 11881 } 11882 11883 # return the balanced string 11884 return $csc; 11885 } 11886} 11887 11888sub add_closing_side_comment { 11889 11890 # add closing side comments after closing block braces if -csc used 11891 my $cscw_block_comment; 11892 11893 #--------------------------------------------------------------- 11894 # Step 1: loop through all tokens of this line to accumulate 11895 # the text needed to create the closing side comments. Also see 11896 # how the line ends. 11897 #--------------------------------------------------------------- 11898 11899 my ( $terminal_type, $i_terminal, $i_block_leading_text, 11900 $block_leading_text, $block_line_count, $block_label ) 11901 = accumulate_csc_text(); 11902 11903 #--------------------------------------------------------------- 11904 # Step 2: make the closing side comment if this ends a block 11905 #--------------------------------------------------------------- 11906 my $have_side_comment = $i_terminal != $max_index_to_go; 11907 11908 # if this line might end in a block closure.. 11909 if ( 11910 $terminal_type eq '}' 11911 11912 # ..and either 11913 && ( 11914 11915 # the block is long enough 11916 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} ) 11917 11918 # or there is an existing comment to check 11919 || ( $have_side_comment 11920 && $rOpts->{'closing-side-comment-warnings'} ) 11921 ) 11922 11923 # .. and if this is one of the types of interest 11924 && $block_type_to_go[$i_terminal] =~ 11925 /$closing_side_comment_list_pattern/o 11926 11927 # .. but not an anonymous sub 11928 # These are not normally of interest, and their closing braces are 11929 # often followed by commas or semicolons anyway. This also avoids 11930 # possible erratic output due to line numbering inconsistencies 11931 # in the cases where their closing braces terminate a line. 11932 && $block_type_to_go[$i_terminal] ne 'sub' 11933 11934 # ..and the corresponding opening brace must is not in this batch 11935 # (because we do not need to tag one-line blocks, although this 11936 # should also be caught with a positive -csci value) 11937 && $mate_index_to_go[$i_terminal] < 0 11938 11939 # ..and either 11940 && ( 11941 11942 # this is the last token (line doesn't have a side comment) 11943 !$have_side_comment 11944 11945 # or the old side comment is a closing side comment 11946 || $tokens_to_go[$max_index_to_go] =~ 11947 /$closing_side_comment_prefix_pattern/o 11948 ) 11949 ) 11950 { 11951 11952 # then make the closing side comment text 11953 if ($block_label) { $block_label .= " " } 11954 my $token = 11955"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]"; 11956 11957 # append any extra descriptive text collected above 11958 if ( $i_block_leading_text == $i_terminal ) { 11959 $token .= $block_leading_text; 11960 } 11961 11962 $token = balance_csc_text($token) 11963 if $rOpts->{'closing-side-comments-balanced'}; 11964 11965 $token =~ s/\s*$//; # trim any trailing whitespace 11966 11967 # handle case of existing closing side comment 11968 if ($have_side_comment) { 11969 11970 # warn if requested and tokens differ significantly 11971 if ( $rOpts->{'closing-side-comment-warnings'} ) { 11972 my $old_csc = $tokens_to_go[$max_index_to_go]; 11973 my $new_csc = $token; 11974 $new_csc =~ s/\s+//g; # trim all whitespace 11975 $old_csc =~ s/\s+//g; # trim all whitespace 11976 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures 11977 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures 11978 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...' 11979 my $new_trailing_dots = $1; 11980 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...' 11981 11982 # Patch to handle multiple closing side comments at 11983 # else and elsif's. These have become too complicated 11984 # to check, so if we see an indication of 11985 # '[ if' or '[ # elsif', then assume they were made 11986 # by perltidy. 11987 if ( $block_type_to_go[$i_terminal] eq 'else' ) { 11988 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc } 11989 } 11990 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) { 11991 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc } 11992 } 11993 11994 # if old comment is contained in new comment, 11995 # only compare the common part. 11996 if ( length($new_csc) > length($old_csc) ) { 11997 $new_csc = substr( $new_csc, 0, length($old_csc) ); 11998 } 11999 12000 # if the new comment is shorter and has been limited, 12001 # only compare the common part. 12002 if ( length($new_csc) < length($old_csc) 12003 && $new_trailing_dots ) 12004 { 12005 $old_csc = substr( $old_csc, 0, length($new_csc) ); 12006 } 12007 12008 # any remaining difference? 12009 if ( $new_csc ne $old_csc ) { 12010 12011 # just leave the old comment if we are below the threshold 12012 # for creating side comments 12013 if ( $block_line_count < 12014 $rOpts->{'closing-side-comment-interval'} ) 12015 { 12016 $token = undef; 12017 } 12018 12019 # otherwise we'll make a note of it 12020 else { 12021 12022 warning( 12023"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n" 12024 ); 12025 12026 # save the old side comment in a new trailing block comment 12027 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; 12028 $year += 1900; 12029 $month += 1; 12030 $cscw_block_comment = 12031"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; 12032 } 12033 } 12034 else { 12035 12036 # No differences.. we can safely delete old comment if we 12037 # are below the threshold 12038 if ( $block_line_count < 12039 $rOpts->{'closing-side-comment-interval'} ) 12040 { 12041 $token = undef; 12042 unstore_token_to_go() 12043 if ( $types_to_go[$max_index_to_go] eq '#' ); 12044 unstore_token_to_go() 12045 if ( $types_to_go[$max_index_to_go] eq 'b' ); 12046 } 12047 } 12048 } 12049 12050 # switch to the new csc (unless we deleted it!) 12051 $tokens_to_go[$max_index_to_go] = $token if $token; 12052 } 12053 12054 # handle case of NO existing closing side comment 12055 else { 12056 12057 # insert the new side comment into the output token stream 12058 my $type = '#'; 12059 my $block_type = ''; 12060 my $type_sequence = ''; 12061 my $container_environment = 12062 $container_environment_to_go[$max_index_to_go]; 12063 my $level = $levels_to_go[$max_index_to_go]; 12064 my $slevel = $nesting_depth_to_go[$max_index_to_go]; 12065 my $no_internal_newlines = 0; 12066 12067 my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; 12068 my $ci_level = $ci_levels_to_go[$max_index_to_go]; 12069 my $in_continued_quote = 0; 12070 12071 # first insert a blank token 12072 insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines ); 12073 12074 # then the side comment 12075 insert_new_token_to_go( $token, $type, $slevel, 12076 $no_internal_newlines ); 12077 } 12078 } 12079 return $cscw_block_comment; 12080} 12081 12082sub previous_nonblank_token { 12083 my ($i) = @_; 12084 my $name = ""; 12085 my $im = $i - 1; 12086 return "" if ( $im < 0 ); 12087 if ( $types_to_go[$im] eq 'b' ) { $im--; } 12088 return "" if ( $im < 0 ); 12089 $name = $tokens_to_go[$im]; 12090 12091 # prepend any sub name to an isolated -> to avoid unwanted alignments 12092 # [test case is test8/penco.pl] 12093 if ( $name eq '->' ) { 12094 $im--; 12095 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) { 12096 $name = $tokens_to_go[$im] . $name; 12097 } 12098 } 12099 return $name; 12100} 12101 12102sub send_lines_to_vertical_aligner { 12103 12104 my ( $ri_first, $ri_last, $do_not_pad ) = @_; 12105 12106 my $rindentation_list = [0]; # ref to indentations for each line 12107 12108 # define the array @matching_token_to_go for the output tokens 12109 # which will be non-blank for each special token (such as =>) 12110 # for which alignment is required. 12111 set_vertical_alignment_markers( $ri_first, $ri_last ); 12112 12113 # flush if necessary to avoid unwanted alignment 12114 my $must_flush = 0; 12115 if ( @$ri_first > 1 ) { 12116 12117 # flush before a long if statement 12118 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) { 12119 $must_flush = 1; 12120 } 12121 } 12122 if ($must_flush) { 12123 Perl::Tidy::VerticalAligner::flush(); 12124 } 12125 12126 undo_ci( $ri_first, $ri_last ); 12127 12128 set_logical_padding( $ri_first, $ri_last ); 12129 12130 # loop to prepare each line for shipment 12131 my $n_last_line = @$ri_first - 1; 12132 my $in_comma_list; 12133 for my $n ( 0 .. $n_last_line ) { 12134 my $ibeg = $$ri_first[$n]; 12135 my $iend = $$ri_last[$n]; 12136 12137 my ( $rtokens, $rfields, $rpatterns ) = 12138 make_alignment_patterns( $ibeg, $iend ); 12139 12140 # Set flag to show how much level changes between this line 12141 # and the next line, if we have it. 12142 my $ljump = 0; 12143 if ( $n < $n_last_line ) { 12144 my $ibegp = $$ri_first[ $n + 1 ]; 12145 $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend]; 12146 } 12147 12148 my ( $indentation, $lev, $level_end, $terminal_type, 12149 $is_semicolon_terminated, $is_outdented_line ) 12150 = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns, 12151 $ri_first, $ri_last, $rindentation_list, $ljump ); 12152 12153 # we will allow outdenting of long lines.. 12154 my $outdent_long_lines = ( 12155 12156 # which are long quotes, if allowed 12157 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) 12158 12159 # which are long block comments, if allowed 12160 || ( 12161 $types_to_go[$ibeg] eq '#' 12162 && $rOpts->{'outdent-long-comments'} 12163 12164 # but not if this is a static block comment 12165 && !$is_static_block_comment 12166 ) 12167 ); 12168 12169 my $level_jump = 12170 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; 12171 12172 my $rvertical_tightness_flags = 12173 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, 12174 $ri_first, $ri_last ); 12175 12176 # flush an outdented line to avoid any unwanted vertical alignment 12177 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); 12178 12179 # Set a flag at the final ':' of a ternary chain to request 12180 # vertical alignment of the final term. Here is a 12181 # slightly complex example: 12182 # 12183 # $self->{_text} = ( 12184 # !$section ? '' 12185 # : $type eq 'item' ? "the $section entry" 12186 # : "the section on $section" 12187 # ) 12188 # . ( 12189 # $page 12190 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage" 12191 # : ' elsewhere in this document' 12192 # ); 12193 # 12194 my $is_terminal_ternary = 0; 12195 if ( $tokens_to_go[$ibeg] eq ':' 12196 || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' ) 12197 { 12198 my $last_leading_type = ":"; 12199 if ( $n > 0 ) { 12200 my $iprev = $$ri_first[ $n - 1 ]; 12201 $last_leading_type = $types_to_go[$iprev]; 12202 } 12203 if ( $terminal_type ne ';' 12204 && $n_last_line > $n 12205 && $level_end == $lev ) 12206 { 12207 my $inext = $$ri_first[ $n + 1 ]; 12208 $level_end = $levels_to_go[$inext]; 12209 $terminal_type = $types_to_go[$inext]; 12210 } 12211 12212 $is_terminal_ternary = $last_leading_type eq ':' 12213 && ( ( $terminal_type eq ';' && $level_end <= $lev ) 12214 || ( $terminal_type ne ':' && $level_end < $lev ) ) 12215 12216 # the termainal term must not contain any ternary terms, as in 12217 # my $ECHO = ( 12218 # $Is_MSWin32 ? ".\\echo$$" 12219 # : $Is_MacOS ? ":echo$$" 12220 # : ( $Is_NetWare ? "echo$$" : "./echo$$" ) 12221 # ); 12222 && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ]; 12223 } 12224 12225 # send this new line down the pipe 12226 my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; 12227 Perl::Tidy::VerticalAligner::valign_input( 12228 $lev, 12229 $level_end, 12230 $indentation, 12231 $rfields, 12232 $rtokens, 12233 $rpatterns, 12234 $forced_breakpoint_to_go[$iend] || $in_comma_list, 12235 $outdent_long_lines, 12236 $is_terminal_ternary, 12237 $is_semicolon_terminated, 12238 $do_not_pad, 12239 $rvertical_tightness_flags, 12240 $level_jump, 12241 ); 12242 $in_comma_list = 12243 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; 12244 12245 # flush an outdented line to avoid any unwanted vertical alignment 12246 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); 12247 12248 $do_not_pad = 0; 12249 12250 # Set flag indicating if this line ends in an opening 12251 # token and is very short, so that a blank line is not 12252 # needed if the subsequent line is a comment. 12253 # Examples of what we are looking for: 12254 # { 12255 # && ( 12256 # BEGIN { 12257 # default { 12258 # sub { 12259 $last_output_short_opening_token 12260 12261 # line ends in opening token 12262 = $types_to_go[$iend] =~ /^[\{\(\[L]$/ 12263 12264 # and either 12265 && ( 12266 # line has either single opening token 12267 $iend == $ibeg 12268 12269 # or is a single token followed by opening token. 12270 # Note that sub identifiers have blanks like 'sub doit' 12271 || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ ) 12272 ) 12273 12274 # and limit total to 10 character widths 12275 && token_sequence_length( $ibeg, $iend ) <= 10; 12276 12277 } # end of loop to output each line 12278 12279 # remember indentation of lines containing opening containers for 12280 # later use by sub set_adjusted_indentation 12281 save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); 12282} 12283 12284{ # begin make_alignment_patterns 12285 12286 my %block_type_map; 12287 my %keyword_map; 12288 12289 BEGIN { 12290 12291 # map related block names into a common name to 12292 # allow alignment 12293 %block_type_map = ( 12294 'unless' => 'if', 12295 'else' => 'if', 12296 'elsif' => 'if', 12297 'when' => 'if', 12298 'default' => 'if', 12299 'case' => 'if', 12300 'sort' => 'map', 12301 'grep' => 'map', 12302 ); 12303 12304 # map certain keywords to the same 'if' class to align 12305 # long if/elsif sequences. [elsif.pl] 12306 %keyword_map = ( 12307 'unless' => 'if', 12308 'else' => 'if', 12309 'elsif' => 'if', 12310 'when' => 'given', 12311 'default' => 'given', 12312 'case' => 'switch', 12313 12314 # treat an 'undef' similar to numbers and quotes 12315 'undef' => 'Q', 12316 ); 12317 } 12318 12319 sub make_alignment_patterns { 12320 12321 # Here we do some important preliminary work for the 12322 # vertical aligner. We create three arrays for one 12323 # output line. These arrays contain strings that can 12324 # be tested by the vertical aligner to see if 12325 # consecutive lines can be aligned vertically. 12326 # 12327 # The three arrays are indexed on the vertical 12328 # alignment fields and are: 12329 # @tokens - a list of any vertical alignment tokens for this line. 12330 # These are tokens, such as '=' '&&' '#' etc which 12331 # we want to might align vertically. These are 12332 # decorated with various information such as 12333 # nesting depth to prevent unwanted vertical 12334 # alignment matches. 12335 # @fields - the actual text of the line between the vertical alignment 12336 # tokens. 12337 # @patterns - a modified list of token types, one for each alignment 12338 # field. These should normally each match before alignment is 12339 # allowed, even when the alignment tokens match. 12340 my ( $ibeg, $iend ) = @_; 12341 my @tokens = (); 12342 my @fields = (); 12343 my @patterns = (); 12344 my $i_start = $ibeg; 12345 my $i; 12346 12347 my $depth = 0; 12348 my @container_name = (""); 12349 my @multiple_comma_arrows = (undef); 12350 12351 my $j = 0; # field index 12352 12353 $patterns[0] = ""; 12354 for $i ( $ibeg .. $iend ) { 12355 12356 # Keep track of containers balanced on this line only. 12357 # These are used below to prevent unwanted cross-line alignments. 12358 # Unbalanced containers already avoid aligning across 12359 # container boundaries. 12360 if ( $tokens_to_go[$i] eq '(' ) { 12361 12362 # if container is balanced on this line... 12363 my $i_mate = $mate_index_to_go[$i]; 12364 if ( $i_mate > $i && $i_mate <= $iend ) { 12365 $depth++; 12366 my $seqno = $type_sequence_to_go[$i]; 12367 my $count = comma_arrow_count($seqno); 12368 $multiple_comma_arrows[$depth] = $count && $count > 1; 12369 12370 # Append the previous token name to make the container name 12371 # more unique. This name will also be given to any commas 12372 # within this container, and it helps avoid undesirable 12373 # alignments of different types of containers. 12374 my $name = previous_nonblank_token($i); 12375 $name =~ s/^->//; 12376 $container_name[$depth] = "+" . $name; 12377 12378 # Make the container name even more unique if necessary. 12379 # If we are not vertically aligning this opening paren, 12380 # append a character count to avoid bad alignment because 12381 # it usually looks bad to align commas within continers 12382 # for which the opening parens do not align. Here 12383 # is an example very BAD alignment of commas (because 12384 # the atan2 functions are not all aligned): 12385 # $XY = 12386 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) + 12387 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) - 12388 # $X * atan2( $X, 1 ) - 12389 # $Y * atan2( $Y, 1 ); 12390 # 12391 # On the other hand, it is usually okay to align commas if 12392 # opening parens align, such as: 12393 # glVertex3d( $cx + $s * $xs, $cy, $z ); 12394 # glVertex3d( $cx, $cy + $s * $ys, $z ); 12395 # glVertex3d( $cx - $s * $xs, $cy, $z ); 12396 # glVertex3d( $cx, $cy - $s * $ys, $z ); 12397 # 12398 # To distinguish between these situations, we will 12399 # append the length of the line from the previous matching 12400 # token, or beginning of line, to the function name. This 12401 # will allow the vertical aligner to reject undesirable 12402 # matches. 12403 12404 # if we are not aligning on this paren... 12405 if ( $matching_token_to_go[$i] eq '' ) { 12406 12407 # Sum length from previous alignment, or start of line. 12408 my $len = 12409 ( $i_start == $ibeg ) 12410 ? total_line_length( $i_start, $i - 1 ) 12411 : token_sequence_length( $i_start, $i - 1 ); 12412 12413 # tack length onto the container name to make unique 12414 $container_name[$depth] .= "-" . $len; 12415 } 12416 } 12417 } 12418 elsif ( $tokens_to_go[$i] eq ')' ) { 12419 $depth-- if $depth > 0; 12420 } 12421 12422 # if we find a new synchronization token, we are done with 12423 # a field 12424 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) { 12425 12426 my $tok = my $raw_tok = $matching_token_to_go[$i]; 12427 12428 # make separators in different nesting depths unique 12429 # by appending the nesting depth digit. 12430 if ( $raw_tok ne '#' ) { 12431 $tok .= "$nesting_depth_to_go[$i]"; 12432 } 12433 12434 # also decorate commas with any container name to avoid 12435 # unwanted cross-line alignments. 12436 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) { 12437 if ( $container_name[$depth] ) { 12438 $tok .= $container_name[$depth]; 12439 } 12440 } 12441 12442 # Patch to avoid aligning leading and trailing if, unless. 12443 # Mark trailing if, unless statements with container names. 12444 # This makes them different from leading if, unless which 12445 # are not so marked at present. If we ever need to name 12446 # them too, we could use ci to distinguish them. 12447 # Example problem to avoid: 12448 # return ( 2, "DBERROR" ) 12449 # if ( $retval == 2 ); 12450 # if ( scalar @_ ) { 12451 # my ( $a, $b, $c, $d, $e, $f ) = @_; 12452 # } 12453 if ( $raw_tok eq '(' ) { 12454 my $ci = $ci_levels_to_go[$ibeg]; 12455 if ( $container_name[$depth] =~ /^\+(if|unless)/ 12456 && $ci ) 12457 { 12458 $tok .= $container_name[$depth]; 12459 } 12460 } 12461 12462 # Decorate block braces with block types to avoid 12463 # unwanted alignments such as the following: 12464 # foreach ( @{$routput_array} ) { $fh->print($_) } 12465 # eval { $fh->close() }; 12466 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) { 12467 my $block_type = $block_type_to_go[$i]; 12468 12469 # map certain related block types to allow 12470 # else blocks to align 12471 $block_type = $block_type_map{$block_type} 12472 if ( defined( $block_type_map{$block_type} ) ); 12473 12474 # remove sub names to allow one-line sub braces to align 12475 # regardless of name 12476 if ( $block_type =~ /^sub / ) { $block_type = 'sub' } 12477 12478 # allow all control-type blocks to align 12479 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' } 12480 12481 $tok .= $block_type; 12482 } 12483 12484 # concatenate the text of the consecutive tokens to form 12485 # the field 12486 push( @fields, 12487 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); 12488 12489 # store the alignment token for this field 12490 push( @tokens, $tok ); 12491 12492 # get ready for the next batch 12493 $i_start = $i; 12494 $j++; 12495 $patterns[$j] = ""; 12496 } 12497 12498 # continue accumulating tokens 12499 # handle non-keywords.. 12500 if ( $types_to_go[$i] ne 'k' ) { 12501 my $type = $types_to_go[$i]; 12502 12503 # Mark most things before arrows as a quote to 12504 # get them to line up. Testfile: mixed.pl. 12505 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) { 12506 my $next_type = $types_to_go[ $i + 1 ]; 12507 my $i_next_nonblank = 12508 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); 12509 12510 if ( $types_to_go[$i_next_nonblank] eq '=>' ) { 12511 $type = 'Q'; 12512 12513 # Patch to ignore leading minus before words, 12514 # by changing pattern 'mQ' into just 'Q', 12515 # so that we can align things like this: 12516 # Button => "Print letter \"~$_\"", 12517 # -command => [ sub { print "$_[0]\n" }, $_ ], 12518 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" } 12519 } 12520 } 12521 12522 # patch to make numbers and quotes align 12523 if ( $type eq 'n' ) { $type = 'Q' } 12524 12525 # patch to ignore any ! in patterns 12526 if ( $type eq '!' ) { $type = '' } 12527 12528 $patterns[$j] .= $type; 12529 } 12530 12531 # for keywords we have to use the actual text 12532 else { 12533 12534 my $tok = $tokens_to_go[$i]; 12535 12536 # but map certain keywords to a common string to allow 12537 # alignment. 12538 $tok = $keyword_map{$tok} 12539 if ( defined( $keyword_map{$tok} ) ); 12540 $patterns[$j] .= $tok; 12541 } 12542 } 12543 12544 # done with this line .. join text of tokens to make the last field 12545 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) ); 12546 return ( \@tokens, \@fields, \@patterns ); 12547 } 12548 12549} # end make_alignment_patterns 12550 12551{ # begin unmatched_indexes 12552 12553 # closure to keep track of unbalanced containers. 12554 # arrays shared by the routines in this block: 12555 my @unmatched_opening_indexes_in_this_batch; 12556 my @unmatched_closing_indexes_in_this_batch; 12557 my %comma_arrow_count; 12558 12559 sub is_unbalanced_batch { 12560 @unmatched_opening_indexes_in_this_batch + 12561 @unmatched_closing_indexes_in_this_batch; 12562 } 12563 12564 sub comma_arrow_count { 12565 my $seqno = $_[0]; 12566 return $comma_arrow_count{$seqno}; 12567 } 12568 12569 sub match_opening_and_closing_tokens { 12570 12571 # Match up indexes of opening and closing braces, etc, in this batch. 12572 # This has to be done after all tokens are stored because unstoring 12573 # of tokens would otherwise cause trouble. 12574 12575 @unmatched_opening_indexes_in_this_batch = (); 12576 @unmatched_closing_indexes_in_this_batch = (); 12577 %comma_arrow_count = (); 12578 my $comma_arrow_count_contained = 0; 12579 12580 my ( $i, $i_mate, $token ); 12581 foreach $i ( 0 .. $max_index_to_go ) { 12582 if ( $type_sequence_to_go[$i] ) { 12583 $token = $tokens_to_go[$i]; 12584 if ( $token =~ /^[\(\[\{\?]$/ ) { 12585 push @unmatched_opening_indexes_in_this_batch, $i; 12586 } 12587 elsif ( $token =~ /^[\)\]\}\:]$/ ) { 12588 12589 $i_mate = pop @unmatched_opening_indexes_in_this_batch; 12590 if ( defined($i_mate) && $i_mate >= 0 ) { 12591 if ( $type_sequence_to_go[$i_mate] == 12592 $type_sequence_to_go[$i] ) 12593 { 12594 $mate_index_to_go[$i] = $i_mate; 12595 $mate_index_to_go[$i_mate] = $i; 12596 my $seqno = $type_sequence_to_go[$i]; 12597 if ( $comma_arrow_count{$seqno} ) { 12598 $comma_arrow_count_contained += 12599 $comma_arrow_count{$seqno}; 12600 } 12601 } 12602 else { 12603 push @unmatched_opening_indexes_in_this_batch, 12604 $i_mate; 12605 push @unmatched_closing_indexes_in_this_batch, $i; 12606 } 12607 } 12608 else { 12609 push @unmatched_closing_indexes_in_this_batch, $i; 12610 } 12611 } 12612 } 12613 elsif ( $tokens_to_go[$i] eq '=>' ) { 12614 if (@unmatched_opening_indexes_in_this_batch) { 12615 my $j = $unmatched_opening_indexes_in_this_batch[-1]; 12616 my $seqno = $type_sequence_to_go[$j]; 12617 $comma_arrow_count{$seqno}++; 12618 } 12619 } 12620 } 12621 return $comma_arrow_count_contained; 12622 } 12623 12624 sub save_opening_indentation { 12625 12626 # This should be called after each batch of tokens is output. It 12627 # saves indentations of lines of all unmatched opening tokens. 12628 # These will be used by sub get_opening_indentation. 12629 12630 my ( $ri_first, $ri_last, $rindentation_list ) = @_; 12631 12632 # we no longer need indentations of any saved indentations which 12633 # are unmatched closing tokens in this batch, because we will 12634 # never encounter them again. So we can delete them to keep 12635 # the hash size down. 12636 foreach (@unmatched_closing_indexes_in_this_batch) { 12637 my $seqno = $type_sequence_to_go[$_]; 12638 delete $saved_opening_indentation{$seqno}; 12639 } 12640 12641 # we need to save indentations of any unmatched opening tokens 12642 # in this batch because we may need them in a subsequent batch. 12643 foreach (@unmatched_opening_indexes_in_this_batch) { 12644 my $seqno = $type_sequence_to_go[$_]; 12645 $saved_opening_indentation{$seqno} = [ 12646 lookup_opening_indentation( 12647 $_, $ri_first, $ri_last, $rindentation_list 12648 ) 12649 ]; 12650 } 12651 } 12652} # end unmatched_indexes 12653 12654sub get_opening_indentation { 12655 12656 # get the indentation of the line which output the opening token 12657 # corresponding to a given closing token in the current output batch. 12658 # 12659 # given: 12660 # $i_closing - index in this line of a closing token ')' '}' or ']' 12661 # 12662 # $ri_first - reference to list of the first index $i for each output 12663 # line in this batch 12664 # $ri_last - reference to list of the last index $i for each output line 12665 # in this batch 12666 # $rindentation_list - reference to a list containing the indentation 12667 # used for each line. 12668 # 12669 # return: 12670 # -the indentation of the line which contained the opening token 12671 # which matches the token at index $i_opening 12672 # -and its offset (number of columns) from the start of the line 12673 # 12674 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; 12675 12676 # first, see if the opening token is in the current batch 12677 my $i_opening = $mate_index_to_go[$i_closing]; 12678 my ( $indent, $offset, $is_leading, $exists ); 12679 $exists = 1; 12680 if ( $i_opening >= 0 ) { 12681 12682 # it is..look up the indentation 12683 ( $indent, $offset, $is_leading ) = 12684 lookup_opening_indentation( $i_opening, $ri_first, $ri_last, 12685 $rindentation_list ); 12686 } 12687 12688 # if not, it should have been stored in the hash by a previous batch 12689 else { 12690 my $seqno = $type_sequence_to_go[$i_closing]; 12691 if ($seqno) { 12692 if ( $saved_opening_indentation{$seqno} ) { 12693 ( $indent, $offset, $is_leading ) = 12694 @{ $saved_opening_indentation{$seqno} }; 12695 } 12696 12697 # some kind of serious error 12698 # (example is badfile.t) 12699 else { 12700 $indent = 0; 12701 $offset = 0; 12702 $is_leading = 0; 12703 $exists = 0; 12704 } 12705 } 12706 12707 # if no sequence number it must be an unbalanced container 12708 else { 12709 $indent = 0; 12710 $offset = 0; 12711 $is_leading = 0; 12712 $exists = 0; 12713 } 12714 } 12715 return ( $indent, $offset, $is_leading, $exists ); 12716} 12717 12718sub lookup_opening_indentation { 12719 12720 # get the indentation of the line in the current output batch 12721 # which output a selected opening token 12722 # 12723 # given: 12724 # $i_opening - index of an opening token in the current output batch 12725 # whose line indentation we need 12726 # $ri_first - reference to list of the first index $i for each output 12727 # line in this batch 12728 # $ri_last - reference to list of the last index $i for each output line 12729 # in this batch 12730 # $rindentation_list - reference to a list containing the indentation 12731 # used for each line. (NOTE: the first slot in 12732 # this list is the last returned line number, and this is 12733 # followed by the list of indentations). 12734 # 12735 # return 12736 # -the indentation of the line which contained token $i_opening 12737 # -and its offset (number of columns) from the start of the line 12738 12739 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; 12740 12741 my $nline = $rindentation_list->[0]; # line number of previous lookup 12742 12743 # reset line location if necessary 12744 $nline = 0 if ( $i_opening < $ri_start->[$nline] ); 12745 12746 # find the correct line 12747 unless ( $i_opening > $ri_last->[-1] ) { 12748 while ( $i_opening > $ri_last->[$nline] ) { $nline++; } 12749 } 12750 12751 # error - token index is out of bounds - shouldn't happen 12752 else { 12753 warning( 12754"non-fatal program bug in lookup_opening_indentation - index out of range\n" 12755 ); 12756 report_definite_bug(); 12757 $nline = $#{$ri_last}; 12758 } 12759 12760 $rindentation_list->[0] = 12761 $nline; # save line number to start looking next call 12762 my $ibeg = $ri_start->[$nline]; 12763 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; 12764 my $is_leading = ( $ibeg == $i_opening ); 12765 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); 12766} 12767 12768{ 12769 my %is_if_elsif_else_unless_while_until_for_foreach; 12770 12771 BEGIN { 12772 12773 # These block types may have text between the keyword and opening 12774 # curly. Note: 'else' does not, but must be included to allow trailing 12775 # if/elsif text to be appended. 12776 # patch for SWITCH/CASE: added 'case' and 'when' 12777 @_ = qw(if elsif else unless while until for foreach case when); 12778 @is_if_elsif_else_unless_while_until_for_foreach{@_} = 12779 (1) x scalar(@_); 12780 } 12781 12782 sub set_adjusted_indentation { 12783 12784 # This routine has the final say regarding the actual indentation of 12785 # a line. It starts with the basic indentation which has been 12786 # defined for the leading token, and then takes into account any 12787 # options that the user has set regarding special indenting and 12788 # outdenting. 12789 12790 my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, 12791 $rindentation_list, $level_jump ) 12792 = @_; 12793 12794 # we need to know the last token of this line 12795 my ( $terminal_type, $i_terminal ) = 12796 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend ); 12797 12798 my $is_outdented_line = 0; 12799 12800 my $is_semicolon_terminated = $terminal_type eq ';' 12801 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]; 12802 12803 ########################################################## 12804 # Section 1: set a flag and a default indentation 12805 # 12806 # Most lines are indented according to the initial token. 12807 # But it is common to outdent to the level just after the 12808 # terminal token in certain cases... 12809 # adjust_indentation flag: 12810 # 0 - do not adjust 12811 # 1 - outdent 12812 # 2 - vertically align with opening token 12813 # 3 - indent 12814 ########################################################## 12815 my $adjust_indentation = 0; 12816 my $default_adjust_indentation = $adjust_indentation; 12817 12818 my ( 12819 $opening_indentation, $opening_offset, 12820 $is_leading, $opening_exists 12821 ); 12822 12823 # if we are at a closing token of some type.. 12824 if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) { 12825 12826 # get the indentation of the line containing the corresponding 12827 # opening token 12828 ( 12829 $opening_indentation, $opening_offset, 12830 $is_leading, $opening_exists 12831 ) 12832 = get_opening_indentation( $ibeg, $ri_first, $ri_last, 12833 $rindentation_list ); 12834 12835 # First set the default behavior: 12836 if ( 12837 12838 # default behavior is to outdent closing lines 12839 # of the form: "); }; ]; )->xxx;" 12840 $is_semicolon_terminated 12841 12842 # and 'cuddled parens' of the form: ")->pack(" 12843 || ( 12844 $terminal_type eq '(' 12845 && $types_to_go[$ibeg] eq ')' 12846 && ( $nesting_depth_to_go[$iend] + 1 == 12847 $nesting_depth_to_go[$ibeg] ) 12848 ) 12849 12850 # and when the next line is at a lower indentation level 12851 # PATCH: and only if the style allows undoing continuation 12852 # for all closing token types. We should really wait until 12853 # the indentation of the next line is known and then make 12854 # a decision, but that would require another pass. 12855 || ( $level_jump < 0 && !$some_closing_token_indentation ) 12856 ) 12857 { 12858 $adjust_indentation = 1; 12859 } 12860 12861 # outdent something like '),' 12862 if ( 12863 $terminal_type eq ',' 12864 12865 # allow just one character before the comma 12866 && $i_terminal == $ibeg + 1 12867 12868 # requre LIST environment; otherwise, we may outdent too much -- 12869 # this can happen in calls without parentheses (overload.t); 12870 && $container_environment_to_go[$i_terminal] eq 'LIST' 12871 ) 12872 { 12873 $adjust_indentation = 1; 12874 } 12875 12876 # undo continuation indentation of a terminal closing token if 12877 # it is the last token before a level decrease. This will allow 12878 # a closing token to line up with its opening counterpart, and 12879 # avoids a indentation jump larger than 1 level. 12880 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/ 12881 && $i_terminal == $ibeg ) 12882 { 12883 my $ci = $ci_levels_to_go[$ibeg]; 12884 my $lev = $levels_to_go[$ibeg]; 12885 my $next_type = $types_to_go[ $ibeg + 1 ]; 12886 my $i_next_nonblank = 12887 ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 ); 12888 if ( $i_next_nonblank <= $max_index_to_go 12889 && $levels_to_go[$i_next_nonblank] < $lev ) 12890 { 12891 $adjust_indentation = 1; 12892 } 12893 } 12894 12895 # YVES patch 1 of 2: 12896 # Undo ci of line with leading closing eval brace, 12897 # but not beyond the indention of the line with 12898 # the opening brace. 12899 if ( $block_type_to_go[$ibeg] eq 'eval' 12900 && !$rOpts->{'line-up-parentheses'} 12901 && !$rOpts->{'indent-closing-brace'} ) 12902 { 12903 ( 12904 $opening_indentation, $opening_offset, 12905 $is_leading, $opening_exists 12906 ) 12907 = get_opening_indentation( $ibeg, $ri_first, $ri_last, 12908 $rindentation_list ); 12909 my $indentation = $leading_spaces_to_go[$ibeg]; 12910 if ( defined($opening_indentation) 12911 && $indentation > $opening_indentation ) 12912 { 12913 $adjust_indentation = 1; 12914 } 12915 } 12916 12917 $default_adjust_indentation = $adjust_indentation; 12918 12919 # Now modify default behavior according to user request: 12920 # handle option to indent non-blocks of the form ); }; ]; 12921 # But don't do special indentation to something like ')->pack(' 12922 if ( !$block_type_to_go[$ibeg] ) { 12923 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] }; 12924 if ( $cti == 1 ) { 12925 if ( $i_terminal <= $ibeg + 1 12926 || $is_semicolon_terminated ) 12927 { 12928 $adjust_indentation = 2; 12929 } 12930 else { 12931 $adjust_indentation = 0; 12932 } 12933 } 12934 elsif ( $cti == 2 ) { 12935 if ($is_semicolon_terminated) { 12936 $adjust_indentation = 3; 12937 } 12938 else { 12939 $adjust_indentation = 0; 12940 } 12941 } 12942 elsif ( $cti == 3 ) { 12943 $adjust_indentation = 3; 12944 } 12945 } 12946 12947 # handle option to indent blocks 12948 else { 12949 if ( 12950 $rOpts->{'indent-closing-brace'} 12951 && ( 12952 $i_terminal == $ibeg # isolated terminal '}' 12953 || $is_semicolon_terminated 12954 ) 12955 ) # } xxxx ; 12956 { 12957 $adjust_indentation = 3; 12958 } 12959 } 12960 } 12961 12962 # if at ');', '};', '>;', and '];' of a terminal qw quote 12963 elsif ($$rpatterns[0] =~ /^qb*;$/ 12964 && $$rfields[0] =~ /^([\)\}\]\>]);$/ ) 12965 { 12966 if ( $closing_token_indentation{$1} == 0 ) { 12967 $adjust_indentation = 1; 12968 } 12969 else { 12970 $adjust_indentation = 3; 12971 } 12972 } 12973 12974 # if line begins with a ':', align it with any 12975 # previous line leading with corresponding ? 12976 elsif ( $types_to_go[$ibeg] eq ':' ) { 12977 ( 12978 $opening_indentation, $opening_offset, 12979 $is_leading, $opening_exists 12980 ) 12981 = get_opening_indentation( $ibeg, $ri_first, $ri_last, 12982 $rindentation_list ); 12983 if ($is_leading) { $adjust_indentation = 2; } 12984 } 12985 12986 ########################################################## 12987 # Section 2: set indentation according to flag set above 12988 # 12989 # Select the indentation object to define leading 12990 # whitespace. If we are outdenting something like '} } );' 12991 # then we want to use one level below the last token 12992 # ($i_terminal) in order to get it to fully outdent through 12993 # all levels. 12994 ########################################################## 12995 my $indentation; 12996 my $lev; 12997 my $level_end = $levels_to_go[$iend]; 12998 12999 if ( $adjust_indentation == 0 ) { 13000 $indentation = $leading_spaces_to_go[$ibeg]; 13001 $lev = $levels_to_go[$ibeg]; 13002 } 13003 elsif ( $adjust_indentation == 1 ) { 13004 $indentation = $reduced_spaces_to_go[$i_terminal]; 13005 $lev = $levels_to_go[$i_terminal]; 13006 } 13007 13008 # handle indented closing token which aligns with opening token 13009 elsif ( $adjust_indentation == 2 ) { 13010 13011 # handle option to align closing token with opening token 13012 $lev = $levels_to_go[$ibeg]; 13013 13014 # calculate spaces needed to align with opening token 13015 my $space_count = 13016 get_SPACES($opening_indentation) + $opening_offset; 13017 13018 # Indent less than the previous line. 13019 # 13020 # Problem: For -lp we don't exactly know what it was if there 13021 # were recoverable spaces sent to the aligner. A good solution 13022 # would be to force a flush of the vertical alignment buffer, so 13023 # that we would know. For now, this rule is used for -lp: 13024 # 13025 # When the last line did not start with a closing token we will 13026 # be optimistic that the aligner will recover everything wanted. 13027 # 13028 # This rule will prevent us from breaking a hierarchy of closing 13029 # tokens, and in a worst case will leave a closing paren too far 13030 # indented, but this is better than frequently leaving it not 13031 # indented enough. 13032 my $last_spaces = get_SPACES($last_indentation_written); 13033 if ( $last_leading_token !~ /^[\}\]\)]$/ ) { 13034 $last_spaces += 13035 get_RECOVERABLE_SPACES($last_indentation_written); 13036 } 13037 13038 # reset the indentation to the new space count if it works 13039 # only options are all or none: nothing in-between looks good 13040 $lev = $levels_to_go[$ibeg]; 13041 if ( $space_count < $last_spaces ) { 13042 if ($rOpts_line_up_parentheses) { 13043 my $lev = $levels_to_go[$ibeg]; 13044 $indentation = 13045 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); 13046 } 13047 else { 13048 $indentation = $space_count; 13049 } 13050 } 13051 13052 # revert to default if it doesn't work 13053 else { 13054 $space_count = leading_spaces_to_go($ibeg); 13055 if ( $default_adjust_indentation == 0 ) { 13056 $indentation = $leading_spaces_to_go[$ibeg]; 13057 } 13058 elsif ( $default_adjust_indentation == 1 ) { 13059 $indentation = $reduced_spaces_to_go[$i_terminal]; 13060 $lev = $levels_to_go[$i_terminal]; 13061 } 13062 } 13063 } 13064 13065 # Full indentaion of closing tokens (-icb and -icp or -cti=2) 13066 else { 13067 13068 # handle -icb (indented closing code block braces) 13069 # Updated method for indented block braces: indent one full level if 13070 # there is no continuation indentation. This will occur for major 13071 # structures such as sub, if, else, but not for things like map 13072 # blocks. 13073 # 13074 # Note: only code blocks without continuation indentation are 13075 # handled here (if, else, unless, ..). In the following snippet, 13076 # the terminal brace of the sort block will have continuation 13077 # indentation as shown so it will not be handled by the coding 13078 # here. We would have to undo the continuation indentation to do 13079 # this, but it probably looks ok as is. This is a possible future 13080 # update for semicolon terminated lines. 13081 # 13082 # if ($sortby eq 'date' or $sortby eq 'size') { 13083 # @files = sort { 13084 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} 13085 # or $a cmp $b 13086 # } @files; 13087 # } 13088 # 13089 if ( $block_type_to_go[$ibeg] 13090 && $ci_levels_to_go[$i_terminal] == 0 ) 13091 { 13092 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] ); 13093 $indentation = $spaces + $rOpts_indent_columns; 13094 13095 # NOTE: for -lp we could create a new indentation object, but 13096 # there is probably no need to do it 13097 } 13098 13099 # handle -icp and any -icb block braces which fall through above 13100 # test such as the 'sort' block mentioned above. 13101 else { 13102 13103 # There are currently two ways to handle -icp... 13104 # One way is to use the indentation of the previous line: 13105 # $indentation = $last_indentation_written; 13106 13107 # The other way is to use the indentation that the previous line 13108 # would have had if it hadn't been adjusted: 13109 $indentation = $last_unadjusted_indentation; 13110 13111 # Current method: use the minimum of the two. This avoids 13112 # inconsistent indentation. 13113 if ( get_SPACES($last_indentation_written) < 13114 get_SPACES($indentation) ) 13115 { 13116 $indentation = $last_indentation_written; 13117 } 13118 } 13119 13120 # use previous indentation but use own level 13121 # to cause list to be flushed properly 13122 $lev = $levels_to_go[$ibeg]; 13123 } 13124 13125 # remember indentation except for multi-line quotes, which get 13126 # no indentation 13127 unless ( $ibeg == 0 && $starting_in_quote ) { 13128 $last_indentation_written = $indentation; 13129 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg]; 13130 $last_leading_token = $tokens_to_go[$ibeg]; 13131 } 13132 13133 # be sure lines with leading closing tokens are not outdented more 13134 # than the line which contained the corresponding opening token. 13135 13136 ############################################################# 13137 # updated per bug report in alex_bug.pl: we must not 13138 # mess with the indentation of closing logical braces so 13139 # we must treat something like '} else {' as if it were 13140 # an isolated brace my $is_isolated_block_brace = ( 13141 # $iend == $ibeg ) && $block_type_to_go[$ibeg]; 13142 ############################################################# 13143 my $is_isolated_block_brace = $block_type_to_go[$ibeg] 13144 && ( $iend == $ibeg 13145 || $is_if_elsif_else_unless_while_until_for_foreach{ 13146 $block_type_to_go[$ibeg] 13147 } ); 13148 13149 # only do this for a ':; which is aligned with its leading '?' 13150 my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading; 13151 if ( defined($opening_indentation) 13152 && !$is_isolated_block_brace 13153 && !$is_unaligned_colon ) 13154 { 13155 if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) { 13156 $indentation = $opening_indentation; 13157 } 13158 } 13159 13160 # remember the indentation of each line of this batch 13161 push @{$rindentation_list}, $indentation; 13162 13163 # outdent lines with certain leading tokens... 13164 if ( 13165 13166 # must be first word of this batch 13167 $ibeg == 0 13168 13169 # and ... 13170 && ( 13171 13172 # certain leading keywords if requested 13173 ( 13174 $rOpts->{'outdent-keywords'} 13175 && $types_to_go[$ibeg] eq 'k' 13176 && $outdent_keyword{ $tokens_to_go[$ibeg] } 13177 ) 13178 13179 # or labels if requested 13180 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' ) 13181 13182 # or static block comments if requested 13183 || ( $types_to_go[$ibeg] eq '#' 13184 && $rOpts->{'outdent-static-block-comments'} 13185 && $is_static_block_comment ) 13186 ) 13187 ) 13188 13189 { 13190 my $space_count = leading_spaces_to_go($ibeg); 13191 if ( $space_count > 0 ) { 13192 $space_count -= $rOpts_continuation_indentation; 13193 $is_outdented_line = 1; 13194 if ( $space_count < 0 ) { $space_count = 0 } 13195 13196 # do not promote a spaced static block comment to non-spaced; 13197 # this is not normally necessary but could be for some 13198 # unusual user inputs (such as -ci = -i) 13199 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) { 13200 $space_count = 1; 13201 } 13202 13203 if ($rOpts_line_up_parentheses) { 13204 $indentation = 13205 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); 13206 } 13207 else { 13208 $indentation = $space_count; 13209 } 13210 } 13211 } 13212 13213 return ( $indentation, $lev, $level_end, $terminal_type, 13214 $is_semicolon_terminated, $is_outdented_line ); 13215 } 13216} 13217 13218sub set_vertical_tightness_flags { 13219 13220 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_; 13221 13222 # Define vertical tightness controls for the nth line of a batch. 13223 # We create an array of parameters which tell the vertical aligner 13224 # if we should combine this line with the next line to achieve the 13225 # desired vertical tightness. The array of parameters contains: 13226 # 13227 # [0] type: 1=opening non-block 2=closing non-block 13228 # 3=opening block brace 4=closing block brace 13229 # 13230 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok 13231 # if closing: spaces of padding to use 13232 # [2] sequence number of container 13233 # [3] valid flag: do not append if this flag is false. Will be 13234 # true if appropriate -vt flag is set. Otherwise, Will be 13235 # made true only for 2 line container in parens with -lp 13236 # 13237 # These flags are used by sub set_leading_whitespace in 13238 # the vertical aligner 13239 13240 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ]; 13241 13242 #-------------------------------------------------------------- 13243 # Vertical Tightness Flags Section 1: 13244 # Handle Lines 1 .. n-1 but not the last line 13245 # For non-BLOCK tokens, we will need to examine the next line 13246 # too, so we won't consider the last line. 13247 #-------------------------------------------------------------- 13248 if ( $n < $n_last_line ) { 13249 13250 #-------------------------------------------------------------- 13251 # Vertical Tightness Flags Section 1a: 13252 # Look for Type 1, last token of this line is a non-block opening token 13253 #-------------------------------------------------------------- 13254 my $ibeg_next = $$ri_first[ $n + 1 ]; 13255 my $token_end = $tokens_to_go[$iend]; 13256 my $iend_next = $$ri_last[ $n + 1 ]; 13257 if ( 13258 $type_sequence_to_go[$iend] 13259 && !$block_type_to_go[$iend] 13260 && $is_opening_token{$token_end} 13261 && ( 13262 $opening_vertical_tightness{$token_end} > 0 13263 13264 # allow 2-line method call to be closed up 13265 || ( $rOpts_line_up_parentheses 13266 && $token_end eq '(' 13267 && $iend > $ibeg 13268 && $types_to_go[ $iend - 1 ] ne 'b' ) 13269 ) 13270 ) 13271 { 13272 13273 # avoid multiple jumps in nesting depth in one line if 13274 # requested 13275 my $ovt = $opening_vertical_tightness{$token_end}; 13276 my $iend_next = $$ri_last[ $n + 1 ]; 13277 unless ( 13278 $ovt < 2 13279 && ( $nesting_depth_to_go[ $iend_next + 1 ] != 13280 $nesting_depth_to_go[$ibeg_next] ) 13281 ) 13282 { 13283 13284 # If -vt flag has not been set, mark this as invalid 13285 # and aligner will validate it if it sees the closing paren 13286 # within 2 lines. 13287 my $valid_flag = $ovt; 13288 @{$rvertical_tightness_flags} = 13289 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag ); 13290 } 13291 } 13292 13293 #-------------------------------------------------------------- 13294 # Vertical Tightness Flags Section 1b: 13295 # Look for Type 2, first token of next line is a non-block closing 13296 # token .. and be sure this line does not have a side comment 13297 #-------------------------------------------------------------- 13298 my $token_next = $tokens_to_go[$ibeg_next]; 13299 if ( $type_sequence_to_go[$ibeg_next] 13300 && !$block_type_to_go[$ibeg_next] 13301 && $is_closing_token{$token_next} 13302 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen! 13303 { 13304 my $ovt = $opening_vertical_tightness{$token_next}; 13305 my $cvt = $closing_vertical_tightness{$token_next}; 13306 if ( 13307 13308 # never append a trailing line like )->pack( 13309 # because it will throw off later alignment 13310 ( 13311 $nesting_depth_to_go[$ibeg_next] == 13312 $nesting_depth_to_go[ $iend_next + 1 ] + 1 13313 ) 13314 && ( 13315 $cvt == 2 13316 || ( 13317 $container_environment_to_go[$ibeg_next] ne 'LIST' 13318 && ( 13319 $cvt == 1 13320 13321 # allow closing up 2-line method calls 13322 || ( $rOpts_line_up_parentheses 13323 && $token_next eq ')' ) 13324 ) 13325 ) 13326 ) 13327 ) 13328 { 13329 13330 # decide which trailing closing tokens to append.. 13331 my $ok = 0; 13332 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 } 13333 else { 13334 my $str = join( '', 13335 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] ); 13336 13337 # append closing token if followed by comment or ';' 13338 if ( $str =~ /^b?[#;]/ ) { $ok = 1 } 13339 } 13340 13341 if ($ok) { 13342 my $valid_flag = $cvt; 13343 @{$rvertical_tightness_flags} = ( 13344 2, 13345 $tightness{$token_next} == 2 ? 0 : 1, 13346 $type_sequence_to_go[$ibeg_next], $valid_flag, 13347 ); 13348 } 13349 } 13350 } 13351 13352 #-------------------------------------------------------------- 13353 # Vertical Tightness Flags Section 1c: 13354 # Implement the Opening Token Right flag (Type 2).. 13355 # If requested, move an isolated trailing opening token to the end of 13356 # the previous line which ended in a comma. We could do this 13357 # in sub recombine_breakpoints but that would cause problems 13358 # with -lp formatting. The problem is that indentation will 13359 # quickly move far to the right in nested expressions. By 13360 # doing it after indentation has been set, we avoid changes 13361 # to the indentation. Actual movement of the token takes place 13362 # in sub valign_output_step_B. 13363 #-------------------------------------------------------------- 13364 if ( 13365 $opening_token_right{ $tokens_to_go[$ibeg_next] } 13366 13367 # previous line is not opening 13368 # (use -sot to combine with it) 13369 && !$is_opening_token{$token_end} 13370 13371 # previous line ended in one of these 13372 # (add other cases if necessary; '=>' and '.' are not necessary 13373 && !$block_type_to_go[$ibeg_next] 13374 13375 # this is a line with just an opening token 13376 && ( $iend_next == $ibeg_next 13377 || $iend_next == $ibeg_next + 2 13378 && $types_to_go[$iend_next] eq '#' ) 13379 13380 # looks bad if we align vertically with the wrong container 13381 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next] 13382 ) 13383 { 13384 my $valid_flag = 1; 13385 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; 13386 @{$rvertical_tightness_flags} = 13387 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, ); 13388 } 13389 13390 #-------------------------------------------------------------- 13391 # Vertical Tightness Flags Section 1d: 13392 # Stacking of opening and closing tokens (Type 2) 13393 #-------------------------------------------------------------- 13394 my $stackable; 13395 my $token_beg_next = $tokens_to_go[$ibeg_next]; 13396 13397 # patch to make something like 'qw(' behave like an opening paren 13398 # (aran.t) 13399 if ( $types_to_go[$ibeg_next] eq 'q' ) { 13400 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) { 13401 $token_beg_next = $1; 13402 } 13403 } 13404 13405 if ( $is_closing_token{$token_end} 13406 && $is_closing_token{$token_beg_next} ) 13407 { 13408 $stackable = $stack_closing_token{$token_beg_next} 13409 unless ( $block_type_to_go[$ibeg_next] ) 13410 ; # shouldn't happen; just checking 13411 } 13412 elsif ($is_opening_token{$token_end} 13413 && $is_opening_token{$token_beg_next} ) 13414 { 13415 $stackable = $stack_opening_token{$token_beg_next} 13416 unless ( $block_type_to_go[$ibeg_next] ) 13417 ; # shouldn't happen; just checking 13418 } 13419 13420 if ($stackable) { 13421 13422 my $is_semicolon_terminated; 13423 if ( $n + 1 == $n_last_line ) { 13424 my ( $terminal_type, $i_terminal ) = terminal_type( 13425 \@types_to_go, \@block_type_to_go, 13426 $ibeg_next, $iend_next 13427 ); 13428 $is_semicolon_terminated = $terminal_type eq ';' 13429 && $nesting_depth_to_go[$iend_next] < 13430 $nesting_depth_to_go[$ibeg_next]; 13431 } 13432 13433 # this must be a line with just an opening token 13434 # or end in a semicolon 13435 if ( 13436 $is_semicolon_terminated 13437 || ( $iend_next == $ibeg_next 13438 || $iend_next == $ibeg_next + 2 13439 && $types_to_go[$iend_next] eq '#' ) 13440 ) 13441 { 13442 my $valid_flag = 1; 13443 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; 13444 @{$rvertical_tightness_flags} = 13445 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, 13446 ); 13447 } 13448 } 13449 } 13450 13451 #-------------------------------------------------------------- 13452 # Vertical Tightness Flags Section 2: 13453 # Handle type 3, opening block braces on last line of the batch 13454 # Check for a last line with isolated opening BLOCK curly 13455 #-------------------------------------------------------------- 13456 elsif ($rOpts_block_brace_vertical_tightness 13457 && $ibeg eq $iend 13458 && $types_to_go[$iend] eq '{' 13459 && $block_type_to_go[$iend] =~ 13460 /$block_brace_vertical_tightness_pattern/o ) 13461 { 13462 @{$rvertical_tightness_flags} = 13463 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 ); 13464 } 13465 13466 #-------------------------------------------------------------- 13467 # Vertical Tightness Flags Section 3: 13468 # Handle type 4, a closing block brace on the last line of the batch Check 13469 # for a last line with isolated closing BLOCK curly 13470 #-------------------------------------------------------------- 13471 elsif ($rOpts_stack_closing_block_brace 13472 && $ibeg eq $iend 13473 && $block_type_to_go[$iend] 13474 && $types_to_go[$iend] eq '}' ) 13475 { 13476 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1; 13477 @{$rvertical_tightness_flags} = 13478 ( 4, $spaces, $type_sequence_to_go[$iend], 1 ); 13479 } 13480 13481 # pack in the sequence numbers of the ends of this line 13482 $rvertical_tightness_flags->[4] = get_seqno($ibeg); 13483 $rvertical_tightness_flags->[5] = get_seqno($iend); 13484 return $rvertical_tightness_flags; 13485} 13486 13487sub get_seqno { 13488 13489 # get opening and closing sequence numbers of a token for the vertical 13490 # aligner. Assign qw quotes a value to allow qw opening and closing tokens 13491 # to be treated somewhat like opening and closing tokens for stacking 13492 # tokens by the vertical aligner. 13493 my ($ii) = @_; 13494 my $seqno = $type_sequence_to_go[$ii]; 13495 if ( $types_to_go[$ii] eq 'q' ) { 13496 my $SEQ_QW = -1; 13497 if ( $ii > 0 ) { 13498 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ ); 13499 } 13500 else { 13501 if ( !$ending_in_quote ) { 13502 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ ); 13503 } 13504 } 13505 } 13506 return ($seqno); 13507} 13508 13509{ 13510 my %is_vertical_alignment_type; 13511 my %is_vertical_alignment_keyword; 13512 my %is_terminal_alignment_type; 13513 13514 BEGIN { 13515 13516 # Removed =~ from list to improve chances of alignment 13517 @_ = qw# 13518 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= 13519 { ? : => && || // ~~ !~~ 13520 #; 13521 @is_vertical_alignment_type{@_} = (1) x scalar(@_); 13522 13523 # only align these at end of line 13524 @_ = qw(&& ||); 13525 @is_terminal_alignment_type{@_} = (1) x scalar(@_); 13526 13527 # eq and ne were removed from this list to improve alignment chances 13528 @_ = qw(if unless and or err for foreach while until); 13529 @is_vertical_alignment_keyword{@_} = (1) x scalar(@_); 13530 } 13531 13532 sub set_vertical_alignment_markers { 13533 13534 # This routine takes the first step toward vertical alignment of the 13535 # lines of output text. It looks for certain tokens which can serve as 13536 # vertical alignment markers (such as an '='). 13537 # 13538 # Method: We look at each token $i in this output batch and set 13539 # $matching_token_to_go[$i] equal to those tokens at which we would 13540 # accept vertical alignment. 13541 13542 # nothing to do if we aren't allowed to change whitespace 13543 if ( !$rOpts_add_whitespace ) { 13544 for my $i ( 0 .. $max_index_to_go ) { 13545 $matching_token_to_go[$i] = ''; 13546 } 13547 return; 13548 } 13549 13550 my ( $ri_first, $ri_last ) = @_; 13551 13552 # remember the index of last nonblank token before any sidecomment 13553 my $i_terminal = $max_index_to_go; 13554 if ( $types_to_go[$i_terminal] eq '#' ) { 13555 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) { 13556 if ( $i_terminal > 0 ) { --$i_terminal } 13557 } 13558 } 13559 13560 # look at each line of this batch.. 13561 my $last_vertical_alignment_before_index; 13562 my $vert_last_nonblank_type; 13563 my $vert_last_nonblank_token; 13564 my $vert_last_nonblank_block_type; 13565 my $max_line = @$ri_first - 1; 13566 my ( $i, $type, $token, $block_type, $alignment_type ); 13567 my ( $ibeg, $iend, $line ); 13568 13569 foreach $line ( 0 .. $max_line ) { 13570 $ibeg = $$ri_first[$line]; 13571 $iend = $$ri_last[$line]; 13572 $last_vertical_alignment_before_index = -1; 13573 $vert_last_nonblank_type = ''; 13574 $vert_last_nonblank_token = ''; 13575 $vert_last_nonblank_block_type = ''; 13576 13577 # look at each token in this output line.. 13578 foreach $i ( $ibeg .. $iend ) { 13579 $alignment_type = ''; 13580 $type = $types_to_go[$i]; 13581 $block_type = $block_type_to_go[$i]; 13582 $token = $tokens_to_go[$i]; 13583 13584 # check for flag indicating that we should not align 13585 # this token 13586 if ( $matching_token_to_go[$i] ) { 13587 $matching_token_to_go[$i] = ''; 13588 next; 13589 } 13590 13591 #-------------------------------------------------------- 13592 # First see if we want to align BEFORE this token 13593 #-------------------------------------------------------- 13594 13595 # The first possible token that we can align before 13596 # is index 2 because: 1) it doesn't normally make sense to 13597 # align before the first token and 2) the second 13598 # token must be a blank if we are to align before 13599 # the third 13600 if ( $i < $ibeg + 2 ) { } 13601 13602 # must follow a blank token 13603 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { } 13604 13605 # align a side comment -- 13606 elsif ( $type eq '#' ) { 13607 13608 unless ( 13609 13610 # it is a static side comment 13611 ( 13612 $rOpts->{'static-side-comments'} 13613 && $token =~ /$static_side_comment_pattern/o 13614 ) 13615 13616 # or a closing side comment 13617 || ( $vert_last_nonblank_block_type 13618 && $token =~ 13619 /$closing_side_comment_prefix_pattern/o ) 13620 ) 13621 { 13622 $alignment_type = $type; 13623 } ## Example of a static side comment 13624 } 13625 13626 # otherwise, do not align two in a row to create a 13627 # blank field 13628 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { } 13629 13630 # align before one of these keywords 13631 # (within a line, since $i>1) 13632 elsif ( $type eq 'k' ) { 13633 13634 # /^(if|unless|and|or|eq|ne)$/ 13635 if ( $is_vertical_alignment_keyword{$token} ) { 13636 $alignment_type = $token; 13637 } 13638 } 13639 13640 # align before one of these types.. 13641 # Note: add '.' after new vertical aligner is operational 13642 elsif ( $is_vertical_alignment_type{$type} ) { 13643 $alignment_type = $token; 13644 13645 # Do not align a terminal token. Although it might 13646 # occasionally look ok to do this, this has been found to be 13647 # a good general rule. The main problems are: 13648 # (1) that the terminal token (such as an = or :) might get 13649 # moved far to the right where it is hard to see because 13650 # nothing follows it, and 13651 # (2) doing so may prevent other good alignments. 13652 # Current exceptions are && and || 13653 if ( $i == $iend || $i >= $i_terminal ) { 13654 $alignment_type = "" 13655 unless ( $is_terminal_alignment_type{$type} ); 13656 } 13657 13658 # Do not align leading ': (' or '. ('. This would prevent 13659 # alignment in something like the following: 13660 # $extra_space .= 13661 # ( $input_line_number < 10 ) ? " " 13662 # : ( $input_line_number < 100 ) ? " " 13663 # : ""; 13664 # or 13665 # $code = 13666 # ( $case_matters ? $accessor : " lc($accessor) " ) 13667 # . ( $yesno ? " eq " : " ne " ) 13668 if ( $i == $ibeg + 2 13669 && $types_to_go[$ibeg] =~ /^[\.\:]$/ 13670 && $types_to_go[ $i - 1 ] eq 'b' ) 13671 { 13672 $alignment_type = ""; 13673 } 13674 13675 # For a paren after keyword, only align something like this: 13676 # if ( $a ) { &a } 13677 # elsif ( $b ) { &b } 13678 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) { 13679 $alignment_type = "" 13680 unless $vert_last_nonblank_token =~ 13681 /^(if|unless|elsif)$/; 13682 } 13683 13684 # be sure the alignment tokens are unique 13685 # This didn't work well: reason not determined 13686 # if ($token ne $type) {$alignment_type .= $type} 13687 } 13688 13689 # NOTE: This is deactivated because it causes the previous 13690 # if/elsif alignment to fail 13691 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) 13692 #{ $alignment_type = $type; } 13693 13694 if ($alignment_type) { 13695 $last_vertical_alignment_before_index = $i; 13696 } 13697 13698 #-------------------------------------------------------- 13699 # Next see if we want to align AFTER the previous nonblank 13700 #-------------------------------------------------------- 13701 13702 # We want to line up ',' and interior ';' tokens, with the added 13703 # space AFTER these tokens. (Note: interior ';' is included 13704 # because it may occur in short blocks). 13705 if ( 13706 13707 # we haven't already set it 13708 !$alignment_type 13709 13710 # and its not the first token of the line 13711 && ( $i > $ibeg ) 13712 13713 # and it follows a blank 13714 && $types_to_go[ $i - 1 ] eq 'b' 13715 13716 # and previous token IS one of these: 13717 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ ) 13718 13719 # and it's NOT one of these 13720 && ( $type !~ /^[b\#\)\]\}]$/ ) 13721 13722 # then go ahead and align 13723 ) 13724 13725 { 13726 $alignment_type = $vert_last_nonblank_type; 13727 } 13728 13729 #-------------------------------------------------------- 13730 # then store the value 13731 #-------------------------------------------------------- 13732 $matching_token_to_go[$i] = $alignment_type; 13733 if ( $type ne 'b' ) { 13734 $vert_last_nonblank_type = $type; 13735 $vert_last_nonblank_token = $token; 13736 $vert_last_nonblank_block_type = $block_type; 13737 } 13738 } 13739 } 13740 } 13741} 13742 13743sub terminal_type { 13744 13745 # returns type of last token on this line (terminal token), as follows: 13746 # returns # for a full-line comment 13747 # returns ' ' for a blank line 13748 # otherwise returns final token type 13749 13750 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_; 13751 13752 # check for full-line comment.. 13753 if ( $$rtype[$ibeg] eq '#' ) { 13754 return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg]; 13755 } 13756 else { 13757 13758 # start at end and walk bakwards.. 13759 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { 13760 13761 # skip past any side comment and blanks 13762 next if ( $$rtype[$i] eq 'b' ); 13763 next if ( $$rtype[$i] eq '#' ); 13764 13765 # found it..make sure it is a BLOCK termination, 13766 # but hide a terminal } after sort/grep/map because it is not 13767 # necessarily the end of the line. (terminal.t) 13768 my $terminal_type = $$rtype[$i]; 13769 if ( 13770 $terminal_type eq '}' 13771 && ( !$$rblock_type[$i] 13772 || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) ) 13773 ) 13774 { 13775 $terminal_type = 'b'; 13776 } 13777 return wantarray ? ( $terminal_type, $i ) : $terminal_type; 13778 } 13779 13780 # empty line 13781 return wantarray ? ( ' ', $ibeg ) : ' '; 13782 } 13783} 13784 13785{ # set_bond_strengths 13786 13787 my %is_good_keyword_breakpoint; 13788 my %is_lt_gt_le_ge; 13789 13790 my %binary_bond_strength; 13791 my %nobreak_lhs; 13792 my %nobreak_rhs; 13793 13794 my @bias_tokens; 13795 my $delta_bias; 13796 13797 sub bias_table_key { 13798 my ( $type, $token ) = @_; 13799 my $bias_table_key = $type; 13800 if ( $type eq 'k' ) { 13801 $bias_table_key = $token; 13802 if ( $token eq 'err' ) { $bias_table_key = 'or' } 13803 } 13804 return $bias_table_key; 13805 } 13806 13807 sub set_bond_strengths { 13808 13809 BEGIN { 13810 13811 @_ = qw(if unless while until for foreach); 13812 @is_good_keyword_breakpoint{@_} = (1) x scalar(@_); 13813 13814 @_ = qw(lt gt le ge); 13815 @is_lt_gt_le_ge{@_} = (1) x scalar(@_); 13816 # 13817 # The decision about where to break a line depends upon a "bond 13818 # strength" between tokens. The LOWER the bond strength, the MORE 13819 # likely a break. A bond strength may be any value but to simplify 13820 # things there are several pre-defined strength levels: 13821 13822 # NO_BREAK => 10000; 13823 # VERY_STRONG => 100; 13824 # STRONG => 2.1; 13825 # NOMINAL => 1.1; 13826 # WEAK => 0.8; 13827 # VERY_WEAK => 0.55; 13828 13829 # The strength values are based on trial-and-error, and need to be 13830 # tweaked occasionally to get desired results. Some comments: 13831 # 13832 # 1. Only relative strengths are important. small differences 13833 # in strengths can make big formatting differences. 13834 # 2. Each indentation level adds one unit of bond strength. 13835 # 3. A value of NO_BREAK makes an unbreakable bond 13836 # 4. A value of VERY_WEAK is the strength of a ',' 13837 # 5. Values below NOMINAL are considered ok break points. 13838 # 6. Values above NOMINAL are considered poor break points. 13839 # 13840 # The bond strengths should roughly follow precenence order where 13841 # possible. If you make changes, please check the results very 13842 # carefully on a variety of scripts. Testing with the -extrude 13843 # options is particularly helpful in exercising all of the rules. 13844 13845 # Wherever possible, bond strengths are defined in the following 13846 # tables. There are two main stages to setting bond strengths and 13847 # two types of tables: 13848 # 13849 # The first stage involves looking at each token individually and 13850 # defining left and right bond strengths, according to if we want 13851 # to break to the left or right side, and how good a break point it 13852 # is. For example tokens like =, ||, && make good break points and 13853 # will have low strengths, but one might want to break on either 13854 # side to put them at the end of one line or beginning of the next. 13855 # 13856 # The second stage involves looking at certain pairs of tokens and 13857 # defining a bond strength for that particular pair. This second 13858 # stage has priority. 13859 13860 #--------------------------------------------------------------- 13861 # Bond Strength BEGIN Section 1. 13862 # Set left and right bond strengths of individual tokens. 13863 #--------------------------------------------------------------- 13864 13865 # NOTE: NO_BREAK's set in this section first are HINTS which will 13866 # probably not be honored. Essential NO_BREAKS's should be set in 13867 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end 13868 # of this subroutine. 13869 13870 # Note that we are setting defaults in this section. The user 13871 # cannot change bond strengths but can cause the left and right 13872 # bond strengths of any token type to be swapped through the use of 13873 # the -wba and -wbb flags. In this way the user can determine if a 13874 # breakpoint token should appear at the end of one line or the 13875 # beginning of the next line. 13876 13877 # The hash keys in this section are token types, plus the text of 13878 # certain keywords like 'or', 'and'. 13879 13880 # no break around possible filehandle 13881 $left_bond_strength{'Z'} = NO_BREAK; 13882 $right_bond_strength{'Z'} = NO_BREAK; 13883 13884 # never put a bare word on a new line: 13885 # example print (STDERR, "bla"); will fail with break after ( 13886 $left_bond_strength{'w'} = NO_BREAK; 13887 13888 # blanks always have infinite strength to force breaks after 13889 # real tokens 13890 $right_bond_strength{'b'} = NO_BREAK; 13891 13892 # try not to break on exponentation 13893 @_ = qw" ** .. ... <=> "; 13894 @left_bond_strength{@_} = (STRONG) x scalar(@_); 13895 @right_bond_strength{@_} = (STRONG) x scalar(@_); 13896 13897 # The comma-arrow has very low precedence but not a good break point 13898 $left_bond_strength{'=>'} = NO_BREAK; 13899 $right_bond_strength{'=>'} = NOMINAL; 13900 13901 # ok to break after label 13902 $left_bond_strength{'J'} = NO_BREAK; 13903 $right_bond_strength{'J'} = NOMINAL; 13904 $left_bond_strength{'j'} = STRONG; 13905 $right_bond_strength{'j'} = STRONG; 13906 $left_bond_strength{'A'} = STRONG; 13907 $right_bond_strength{'A'} = STRONG; 13908 13909 $left_bond_strength{'->'} = STRONG; 13910 $right_bond_strength{'->'} = VERY_STRONG; 13911 13912 $left_bond_strength{'CORE::'} = NOMINAL; 13913 $right_bond_strength{'CORE::'} = NO_BREAK; 13914 13915 # breaking AFTER modulus operator is ok: 13916 @_ = qw" % "; 13917 @left_bond_strength{@_} = (STRONG) x scalar(@_); 13918 @right_bond_strength{@_} = 13919 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_); 13920 13921 # Break AFTER math operators * and / 13922 @_ = qw" * / x "; 13923 @left_bond_strength{@_} = (STRONG) x scalar(@_); 13924 @right_bond_strength{@_} = (NOMINAL) x scalar(@_); 13925 13926 # Break AFTER weakest math operators + and - 13927 # Make them weaker than * but a bit stronger than '.' 13928 @_ = qw" + - "; 13929 @left_bond_strength{@_} = (STRONG) x scalar(@_); 13930 @right_bond_strength{@_} = 13931 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_); 13932 13933 # breaking BEFORE these is just ok: 13934 @_ = qw" >> << "; 13935 @right_bond_strength{@_} = (STRONG) x scalar(@_); 13936 @left_bond_strength{@_} = (NOMINAL) x scalar(@_); 13937 13938 # breaking before the string concatenation operator seems best 13939 # because it can be hard to see at the end of a line 13940 $right_bond_strength{'.'} = STRONG; 13941 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; 13942 13943 @_ = qw"} ] ) R"; 13944 @left_bond_strength{@_} = (STRONG) x scalar(@_); 13945 @right_bond_strength{@_} = (NOMINAL) x scalar(@_); 13946 13947 # make these a little weaker than nominal so that they get 13948 # favored for end-of-line characters 13949 @_ = qw"!= == =~ !~ ~~ !~~"; 13950 @left_bond_strength{@_} = (STRONG) x scalar(@_); 13951 @right_bond_strength{@_} = 13952 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_); 13953 13954 # break AFTER these 13955 @_ = qw" < > | & >= <="; 13956 @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_); 13957 @right_bond_strength{@_} = 13958 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_); 13959 13960 # breaking either before or after a quote is ok 13961 # but bias for breaking before a quote 13962 $left_bond_strength{'Q'} = NOMINAL; 13963 $right_bond_strength{'Q'} = NOMINAL + 0.02; 13964 $left_bond_strength{'q'} = NOMINAL; 13965 $right_bond_strength{'q'} = NOMINAL; 13966 13967 # starting a line with a keyword is usually ok 13968 $left_bond_strength{'k'} = NOMINAL; 13969 13970 # we usually want to bond a keyword strongly to what immediately 13971 # follows, rather than leaving it stranded at the end of a line 13972 $right_bond_strength{'k'} = STRONG; 13973 13974 $left_bond_strength{'G'} = NOMINAL; 13975 $right_bond_strength{'G'} = STRONG; 13976 13977 # assignment operators 13978 @_ = qw( 13979 = **= += *= &= <<= &&= 13980 -= /= |= >>= ||= //= 13981 .= %= ^= 13982 x= 13983 ); 13984 13985 # Default is is to break AFTER various assignment operators 13986 @left_bond_strength{@_} = (STRONG) x scalar(@_); 13987 @right_bond_strength{@_} = 13988 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_); 13989 13990 # Default is to break BEFORE '&&' and '||' and '//' 13991 # set strength of '||' to same as '=' so that chains like 13992 # $a = $b || $c || $d will break before the first '||' 13993 $right_bond_strength{'||'} = NOMINAL; 13994 $left_bond_strength{'||'} = $right_bond_strength{'='}; 13995 13996 # same thing for '//' 13997 $right_bond_strength{'//'} = NOMINAL; 13998 $left_bond_strength{'//'} = $right_bond_strength{'='}; 13999 14000 # set strength of && a little higher than || 14001 $right_bond_strength{'&&'} = NOMINAL; 14002 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1; 14003 14004 $left_bond_strength{';'} = VERY_STRONG; 14005 $right_bond_strength{';'} = VERY_WEAK; 14006 $left_bond_strength{'f'} = VERY_STRONG; 14007 14008 # make right strength of for ';' a little less than '=' 14009 # to make for contents break after the ';' to avoid this: 14010 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j += 14011 # $number_of_fields ) 14012 # and make it weaker than ',' and 'and' too 14013 $right_bond_strength{'f'} = VERY_WEAK - 0.03; 14014 14015 # The strengths of ?/: should be somewhere between 14016 # an '=' and a quote (NOMINAL), 14017 # make strength of ':' slightly less than '?' to help 14018 # break long chains of ? : after the colons 14019 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL; 14020 $right_bond_strength{':'} = NO_BREAK; 14021 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01; 14022 $right_bond_strength{'?'} = NO_BREAK; 14023 14024 $left_bond_strength{','} = VERY_STRONG; 14025 $right_bond_strength{','} = VERY_WEAK; 14026 14027 # remaining digraphs and trigraphs not defined above 14028 @_ = qw( :: <> ++ --); 14029 @left_bond_strength{@_} = (WEAK) x scalar(@_); 14030 @right_bond_strength{@_} = (STRONG) x scalar(@_); 14031 14032 # Set bond strengths of certain keywords 14033 # make 'or', 'err', 'and' slightly weaker than a ',' 14034 $left_bond_strength{'and'} = VERY_WEAK - 0.01; 14035 $left_bond_strength{'or'} = VERY_WEAK - 0.02; 14036 $left_bond_strength{'err'} = VERY_WEAK - 0.02; 14037 $left_bond_strength{'xor'} = NOMINAL; 14038 $right_bond_strength{'and'} = NOMINAL; 14039 $right_bond_strength{'or'} = NOMINAL; 14040 $right_bond_strength{'err'} = NOMINAL; 14041 $right_bond_strength{'xor'} = STRONG; 14042 14043 #--------------------------------------------------------------- 14044 # Bond Strength BEGIN Section 2. 14045 # Set binary rules for bond strengths between certain token types. 14046 #--------------------------------------------------------------- 14047 14048 # We have a little problem making tables which apply to the 14049 # container tokens. Here is a list of container tokens and 14050 # their types: 14051 # 14052 # type tokens // meaning 14053 # { {, [, ( // indent 14054 # } }, ], ) // outdent 14055 # [ [ // left non-structural [ (enclosing an array index) 14056 # ] ] // right non-structural square bracket 14057 # ( ( // left non-structural paren 14058 # ) ) // right non-structural paren 14059 # L { // left non-structural curly brace (enclosing a key) 14060 # R } // right non-structural curly brace 14061 # 14062 # Some rules apply to token types and some to just the token 14063 # itself. We solve the problem by combining type and token into a 14064 # new hash key for the container types. 14065 # 14066 # If a rule applies to a token 'type' then we need to make rules 14067 # for each of these 'type.token' combinations: 14068 # Type Type.Token 14069 # { {{, {[, {( 14070 # [ [[ 14071 # ( (( 14072 # L L{ 14073 # } }}, }], }) 14074 # ] ]] 14075 # ) )) 14076 # R R} 14077 # 14078 # If a rule applies to a token then we need to make rules for 14079 # these 'type.token' combinations: 14080 # Token Type.Token 14081 # { {{, L{ 14082 # [ {[, [[ 14083 # ( {(, (( 14084 # } }}, R} 14085 # ] }], ]] 14086 # ) }), )) 14087 14088 # allow long lines before final { in an if statement, as in: 14089 # if (.......... 14090 # ..........) 14091 # { 14092 # 14093 # Otherwise, the line before the { tends to be too short. 14094 14095 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03; 14096 $binary_bond_strength{'(('}{'{{'} = NOMINAL; 14097 14098 # break on something like '} (', but keep this stronger than a ',' 14099 # example is in 'howe.pl' 14100 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; 14101 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; 14102 14103 # keep matrix and hash indices together 14104 # but make them a little below STRONG to allow breaking open 14105 # something like {'some-word'}{'some-very-long-word'} at the }{ 14106 # (bracebrk.t) 14107 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; 14108 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; 14109 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; 14110 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; 14111 14112 # increase strength to the point where a break in the following 14113 # will be after the opening paren rather than at the arrow: 14114 # $a->$b($c); 14115 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG; 14116 14117 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; 14118 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; 14119 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; 14120 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; 14121 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; 14122 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; 14123 14124 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; 14125 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; 14126 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; 14127 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; 14128 14129 #--------------------------------------------------------------- 14130 # Binary NO_BREAK rules 14131 #--------------------------------------------------------------- 14132 14133 # use strict requires that bare word and => not be separated 14134 $binary_bond_strength{'C'}{'=>'} = NO_BREAK; 14135 $binary_bond_strength{'U'}{'=>'} = NO_BREAK; 14136 14137 # Never break between a bareword and a following paren because 14138 # perl may give an error. For example, if a break is placed 14139 # between 'to_filehandle' and its '(' the following line will 14140 # give a syntax error [Carp.pm]: my( $no) =fileno( 14141 # to_filehandle( $in)) ; 14142 $binary_bond_strength{'C'}{'(('} = NO_BREAK; 14143 $binary_bond_strength{'C'}{'{('} = NO_BREAK; 14144 $binary_bond_strength{'U'}{'(('} = NO_BREAK; 14145 $binary_bond_strength{'U'}{'{('} = NO_BREAK; 14146 14147 # use strict requires that bare word within braces not start new 14148 # line 14149 $binary_bond_strength{'L{'}{'w'} = NO_BREAK; 14150 14151 $binary_bond_strength{'w'}{'R}'} = NO_BREAK; 14152 14153 # use strict requires that bare word and => not be separated 14154 $binary_bond_strength{'w'}{'=>'} = NO_BREAK; 14155 14156 # use strict does not allow separating type info from trailing { } 14157 # testfile is readmail.pl 14158 $binary_bond_strength{'t'}{'L{'} = NO_BREAK; 14159 $binary_bond_strength{'i'}{'L{'} = NO_BREAK; 14160 14161 # As a defensive measure, do not break between a '(' and a 14162 # filehandle. In some cases, this can cause an error. For 14163 # example, the following program works: 14164 # my $msg="hi!\n"; 14165 # print 14166 # ( STDOUT 14167 # $msg 14168 # ); 14169 # 14170 # But this program fails: 14171 # my $msg="hi!\n"; 14172 # print 14173 # ( 14174 # STDOUT 14175 # $msg 14176 # ); 14177 # 14178 # This is normally only a problem with the 'extrude' option 14179 $binary_bond_strength{'(('}{'Y'} = NO_BREAK; 14180 $binary_bond_strength{'{('}{'Y'} = NO_BREAK; 14181 14182 # never break between sub name and opening paren 14183 $binary_bond_strength{'w'}{'(('} = NO_BREAK; 14184 $binary_bond_strength{'w'}{'{('} = NO_BREAK; 14185 14186 # keep '}' together with ';' 14187 $binary_bond_strength{'}}'}{';'} = NO_BREAK; 14188 14189 # Breaking before a ++ can cause perl to guess wrong. For 14190 # example the following line will cause a syntax error 14191 # with -extrude if we break between '$i' and '++' [fixstyle2] 14192 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); 14193 $nobreak_lhs{'++'} = NO_BREAK; 14194 14195 # Do not break before a possible file handle 14196 $nobreak_lhs{'Z'} = NO_BREAK; 14197 14198 # use strict hates bare words on any new line. For 14199 # example, a break before the underscore here provokes the 14200 # wrath of use strict: 14201 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { 14202 $nobreak_rhs{'F'} = NO_BREAK; 14203 $nobreak_rhs{'CORE::'} = NO_BREAK; 14204 14205 #--------------------------------------------------------------- 14206 # Bond Strength BEGIN Section 3. 14207 # Define tables and values for applying a small bias to the above 14208 # values. 14209 #--------------------------------------------------------------- 14210 # Adding a small 'bias' to strengths is a simple way to make a line 14211 # break at the first of a sequence of identical terms. For 14212 # example, to force long string of conditional operators to break 14213 # with each line ending in a ':', we can add a small number to the 14214 # bond strength of each ':' (colon.t) 14215 @bias_tokens = qw( : && || f and or . ); # tokens which get bias 14216 $delta_bias = 0.0001; # a very small strength level 14217 14218 } ## end BEGIN 14219 14220 # patch-its always ok to break at end of line 14221 $nobreak_to_go[$max_index_to_go] = 0; 14222 14223 # we start a new set of bias values for each line 14224 my %bias; 14225 @bias{@bias_tokens} = (0) x scalar(@bias_tokens); 14226 my $code_bias = -.01; # bias for closing block braces 14227 14228 my $type = 'b'; 14229 my $token = ' '; 14230 my $last_type; 14231 my $last_nonblank_type = $type; 14232 my $last_nonblank_token = $token; 14233 my $list_str = $left_bond_strength{'?'}; 14234 14235 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, 14236 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, 14237 ); 14238 14239 # main loop to compute bond strengths between each pair of tokens 14240 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) { 14241 $last_type = $type; 14242 if ( $type ne 'b' ) { 14243 $last_nonblank_type = $type; 14244 $last_nonblank_token = $token; 14245 } 14246 $type = $types_to_go[$i]; 14247 14248 # strength on both sides of a blank is the same 14249 if ( $type eq 'b' && $last_type ne 'b' ) { 14250 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ]; 14251 next; 14252 } 14253 14254 $token = $tokens_to_go[$i]; 14255 $block_type = $block_type_to_go[$i]; 14256 $i_next = $i + 1; 14257 $next_type = $types_to_go[$i_next]; 14258 $next_token = $tokens_to_go[$i_next]; 14259 $total_nesting_depth = $nesting_depth_to_go[$i_next]; 14260 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); 14261 $next_nonblank_type = $types_to_go[$i_next_nonblank]; 14262 $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 14263 14264 # We are computing the strength of the bond between the current 14265 # token and the NEXT token. 14266 14267 #--------------------------------------------------------------- 14268 # Bond Strength Section 1: 14269 # First Approximation. 14270 # Use minimum of individual left and right tabulated bond 14271 # strengths. 14272 #--------------------------------------------------------------- 14273 my $bsr = $right_bond_strength{$type}; 14274 my $bsl = $left_bond_strength{$next_nonblank_type}; 14275 14276 # define right bond strengths of certain keywords 14277 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) { 14278 $bsr = $right_bond_strength{$token}; 14279 } 14280 elsif ( $token eq 'ne' or $token eq 'eq' ) { 14281 $bsr = NOMINAL; 14282 } 14283 14284 # set terminal bond strength to the nominal value 14285 # this will cause good preceding breaks to be retained 14286 if ( $i_next_nonblank > $max_index_to_go ) { 14287 $bsl = NOMINAL; 14288 } 14289 14290 # define right bond strengths of certain keywords 14291 if ( $next_nonblank_type eq 'k' 14292 && defined( $left_bond_strength{$next_nonblank_token} ) ) 14293 { 14294 $bsl = $left_bond_strength{$next_nonblank_token}; 14295 } 14296 elsif ($next_nonblank_token eq 'ne' 14297 or $next_nonblank_token eq 'eq' ) 14298 { 14299 $bsl = NOMINAL; 14300 } 14301 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) { 14302 $bsl = 0.9 * NOMINAL + 0.1 * STRONG; 14303 } 14304 14305 # Use the minimum of the left and right strengths. Note: it might 14306 # seem that we would want to keep a NO_BREAK if either token has 14307 # this value. This didn't work, for example because in an arrow 14308 # list, it prevents the comma from separating from the following 14309 # bare word (which is probably quoted by its arrow). So necessary 14310 # NO_BREAK's have to be handled as special cases in the final 14311 # section. 14312 if ( !defined($bsr) ) { $bsr = VERY_STRONG } 14313 if ( !defined($bsl) ) { $bsl = VERY_STRONG } 14314 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; 14315 my $bond_str_1 = $bond_str; 14316 14317 #--------------------------------------------------------------- 14318 # Bond Strength Section 2: 14319 # Apply hardwired rules.. 14320 #--------------------------------------------------------------- 14321 14322 # Patch to put terminal or clauses on a new line: Weaken the bond 14323 # at an || followed by die or similar keyword to make the terminal 14324 # or clause fall on a new line, like this: 14325 # 14326 # my $class = shift 14327 # || die "Cannot add broadcast: No class identifier found"; 14328 # 14329 # Otherwise the break will be at the previous '=' since the || and 14330 # = have the same starting strength and the or is biased, like 14331 # this: 14332 # 14333 # my $class = 14334 # shift || die "Cannot add broadcast: No class identifier found"; 14335 # 14336 # In any case if the user places a break at either the = or the || 14337 # it should remain there. 14338 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) { 14339 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) { 14340 if ( $want_break_before{$token} && $i > 0 ) { 14341 $bond_strength_to_go[ $i - 1 ] -= $delta_bias; 14342 } 14343 else { 14344 $bond_str -= $delta_bias; 14345 } 14346 } 14347 } 14348 14349 # good to break after end of code blocks 14350 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) { 14351 14352 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; 14353 $code_bias += $delta_bias; 14354 } 14355 14356 if ( $type eq 'k' ) { 14357 14358 # allow certain control keywords to stand out 14359 if ( $next_nonblank_type eq 'k' 14360 && $is_last_next_redo_return{$token} ) 14361 { 14362 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; 14363 } 14364 14365 # Don't break after keyword my. This is a quick fix for a 14366 # rare problem with perl. An example is this line from file 14367 # Container.pm: 14368 14369 # foreach my $question( Debian::DebConf::ConfigDb::gettree( 14370 # $this->{'question'} ) ) 14371 14372 if ( $token eq 'my' ) { 14373 $bond_str = NO_BREAK; 14374 } 14375 14376 } 14377 14378 # good to break before 'if', 'unless', etc 14379 if ( $is_if_brace_follower{$next_nonblank_token} ) { 14380 $bond_str = VERY_WEAK; 14381 } 14382 14383 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) { 14384 14385 # FIXME: needs more testing 14386 if ( $is_keyword_returning_list{$next_nonblank_token} ) { 14387 $bond_str = $list_str if ( $bond_str > $list_str ); 14388 } 14389 14390 # keywords like 'unless', 'if', etc, within statements 14391 # make good breaks 14392 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) { 14393 $bond_str = VERY_WEAK / 1.05; 14394 } 14395 } 14396 14397 # try not to break before a comma-arrow 14398 elsif ( $next_nonblank_type eq '=>' ) { 14399 if ( $bond_str < STRONG ) { $bond_str = STRONG } 14400 } 14401 14402 #--------------------------------------------------------------- 14403 # Additional hardwired NOBREAK rules 14404 #--------------------------------------------------------------- 14405 14406 # map1.t -- correct for a quirk in perl 14407 if ( $token eq '(' 14408 && $next_nonblank_type eq 'i' 14409 && $last_nonblank_type eq 'k' 14410 && $is_sort_map_grep{$last_nonblank_token} ) 14411 14412 # /^(sort|map|grep)$/ ) 14413 { 14414 $bond_str = NO_BREAK; 14415 } 14416 14417 # extrude.t: do not break before paren at: 14418 # -l pid_filename( 14419 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { 14420 $bond_str = NO_BREAK; 14421 } 14422 14423 # in older version of perl, use strict can cause problems with 14424 # breaks before bare words following opening parens. For example, 14425 # this will fail under older versions if a break is made between 14426 # '(' and 'MAIL': use strict; open( MAIL, "a long filename or 14427 # command"); close MAIL; 14428 if ( $type eq '{' ) { 14429 14430 if ( $token eq '(' && $next_nonblank_type eq 'w' ) { 14431 14432 # but it's fine to break if the word is followed by a '=>' 14433 # or if it is obviously a sub call 14434 my $i_next_next_nonblank = $i_next_nonblank + 1; 14435 my $next_next_type = $types_to_go[$i_next_next_nonblank]; 14436 if ( $next_next_type eq 'b' 14437 && $i_next_nonblank < $max_index_to_go ) 14438 { 14439 $i_next_next_nonblank++; 14440 $next_next_type = $types_to_go[$i_next_next_nonblank]; 14441 } 14442 14443 # We'll check for an old breakpoint and keep a leading 14444 # bareword if it was that way in the input file. 14445 # Presumably it was ok that way. For example, the 14446 # following would remain unchanged: 14447 # 14448 # @months = ( 14449 # January, February, March, April, 14450 # May, June, July, August, 14451 # September, October, November, December, 14452 # ); 14453 # 14454 # This should be sufficient: 14455 if ( 14456 !$old_breakpoint_to_go[$i] 14457 && ( $next_next_type eq ',' 14458 || $next_next_type eq '}' ) 14459 ) 14460 { 14461 $bond_str = NO_BREAK; 14462 } 14463 } 14464 } 14465 14466 # Do not break between a possible filehandle and a ? or / and do 14467 # not introduce a break after it if there is no blank 14468 # (extrude.t) 14469 elsif ( $type eq 'Z' ) { 14470 14471 # dont break.. 14472 if ( 14473 14474 # if there is no blank and we do not want one. Examples: 14475 # print $x++ # do not break after $x 14476 # print HTML"HELLO" # break ok after HTML 14477 ( 14478 $next_type ne 'b' 14479 && defined( $want_left_space{$next_type} ) 14480 && $want_left_space{$next_type} == WS_NO 14481 ) 14482 14483 # or we might be followed by the start of a quote 14484 || $next_nonblank_type =~ /^[\/\?]$/ 14485 ) 14486 { 14487 $bond_str = NO_BREAK; 14488 } 14489 } 14490 14491 # Breaking before a ? before a quote can cause trouble if 14492 # they are not separated by a blank. 14493 # Example: a syntax error occurs if you break before the ? here 14494 # my$logic=join$all?' && ':' || ',@regexps; 14495 # From: Professional_Perl_Programming_Code/multifind.pl 14496 if ( $next_nonblank_type eq '?' ) { 14497 $bond_str = NO_BREAK 14498 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' ); 14499 } 14500 14501 # Breaking before a . followed by a number 14502 # can cause trouble if there is no intervening space 14503 # Example: a syntax error occurs if you break before the .2 here 14504 # $str .= pack($endian.2, ensurrogate($ord)); 14505 # From: perl58/Unicode.pm 14506 elsif ( $next_nonblank_type eq '.' ) { 14507 $bond_str = NO_BREAK 14508 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' ); 14509 } 14510 14511 # patch to put cuddled elses back together when on multiple 14512 # lines, as in: } \n else \n { \n 14513 if ($rOpts_cuddled_else) { 14514 14515 if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' ) 14516 || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) ) 14517 { 14518 $bond_str = NO_BREAK; 14519 } 14520 } 14521 my $bond_str_2 = $bond_str; 14522 14523 #--------------------------------------------------------------- 14524 # End of hardwired rules 14525 #--------------------------------------------------------------- 14526 14527 #--------------------------------------------------------------- 14528 # Bond Strength Section 3: 14529 # Apply table rules. These have priority over the above 14530 # hardwired rules. 14531 #--------------------------------------------------------------- 14532 14533 my $tabulated_bond_str; 14534 my $ltype = $type; 14535 my $rtype = $next_nonblank_type; 14536 if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token } 14537 if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) { 14538 $rtype = $next_nonblank_type . $next_nonblank_token; 14539 } 14540 14541 if ( $binary_bond_strength{$ltype}{$rtype} ) { 14542 $bond_str = $binary_bond_strength{$ltype}{$rtype}; 14543 $tabulated_bond_str = $bond_str; 14544 } 14545 14546 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) { 14547 $bond_str = NO_BREAK; 14548 $tabulated_bond_str = $bond_str; 14549 } 14550 my $bond_str_3 = $bond_str; 14551 14552 # If the hardwired rules conflict with the tabulated bond 14553 # strength then there is an inconsistency that should be fixed 14554 FORMATTER_DEBUG_FLAG_BOND_TABLES 14555 && $tabulated_bond_str 14556 && $bond_str_1 14557 && $bond_str_1 != $bond_str_2 14558 && $bond_str_2 != $tabulated_bond_str 14559 && do { 14560 print STDERR 14561"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n"; 14562 }; 14563 14564 #----------------------------------------------------------------- 14565 # Bond Strength Section 4: 14566 # Modify strengths of certain tokens which often occur in sequence 14567 # by adding a small bias to each one in turn so that the breaks 14568 # occur from left to right. 14569 # 14570 # Note that we only changing strengths by small amounts here, 14571 # and usually increasing, so we should not be altering any NO_BREAKs. 14572 # Other routines which check for NO_BREAKs will use a tolerance 14573 # of one to avoid any problem. 14574 #----------------------------------------------------------------- 14575 14576 # The bias tables use special keys 14577 my $left_key = bias_table_key( $type, $token ); 14578 my $right_key = 14579 bias_table_key( $next_nonblank_type, $next_nonblank_token ); 14580 14581 # add any bias set by sub scan_list at old comma break points. 14582 if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] } 14583 14584 # bias left token 14585 elsif ( defined( $bias{$left_key} ) ) { 14586 if ( !$want_break_before{$left_key} ) { 14587 $bias{$left_key} += $delta_bias; 14588 $bond_str += $bias{$left_key}; 14589 } 14590 } 14591 14592 # bias right token 14593 if ( defined( $bias{$right_key} ) ) { 14594 if ( $want_break_before{$right_key} ) { 14595 14596 # for leading '.' align all but 'short' quotes; the idea 14597 # is to not place something like "\n" on a single line. 14598 if ( $right_key eq '.' ) { 14599 unless ( 14600 $last_nonblank_type eq '.' 14601 && ( 14602 length($token) <= 14603 $rOpts_short_concatenation_item_length ) 14604 && ( $token !~ /^[\)\]\}]$/ ) 14605 ) 14606 { 14607 $bias{$right_key} += $delta_bias; 14608 } 14609 } 14610 else { 14611 $bias{$right_key} += $delta_bias; 14612 } 14613 $bond_str += $bias{$right_key}; 14614 } 14615 } 14616 my $bond_str_4 = $bond_str; 14617 14618 #--------------------------------------------------------------- 14619 # Bond Strength Section 5: 14620 # Fifth Approximation. 14621 # Take nesting depth into account by adding the nesting depth 14622 # to the bond strength. 14623 #--------------------------------------------------------------- 14624 my $strength; 14625 14626 if ( defined($bond_str) && !$nobreak_to_go[$i] ) { 14627 if ( $total_nesting_depth > 0 ) { 14628 $strength = $bond_str + $total_nesting_depth; 14629 } 14630 else { 14631 $strength = $bond_str; 14632 } 14633 } 14634 else { 14635 $strength = NO_BREAK; 14636 } 14637 14638 # always break after side comment 14639 if ( $type eq '#' ) { $strength = 0 } 14640 14641 $bond_strength_to_go[$i] = $strength; 14642 14643 FORMATTER_DEBUG_FLAG_BOND && do { 14644 my $str = substr( $token, 0, 15 ); 14645 $str .= ' ' x ( 16 - length($str) ); 14646 print STDOUT 14647"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n"; 14648 }; 14649 } ## end main loop 14650 } ## end sub set_bond_strengths 14651} 14652 14653sub pad_array_to_go { 14654 14655 # to simplify coding in scan_list and set_bond_strengths, it helps 14656 # to create some extra blank tokens at the end of the arrays 14657 $tokens_to_go[ $max_index_to_go + 1 ] = ''; 14658 $tokens_to_go[ $max_index_to_go + 2 ] = ''; 14659 $types_to_go[ $max_index_to_go + 1 ] = 'b'; 14660 $types_to_go[ $max_index_to_go + 2 ] = 'b'; 14661 $nesting_depth_to_go[ $max_index_to_go + 1 ] = 14662 $nesting_depth_to_go[$max_index_to_go]; 14663 14664 # /^[R\}\)\]]$/ 14665 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) { 14666 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { 14667 14668 # shouldn't happen: 14669 unless ( get_saw_brace_error() ) { 14670 warning( 14671"Program bug in scan_list: hit nesting error which should have been caught\n" 14672 ); 14673 report_definite_bug(); 14674 } 14675 } 14676 else { 14677 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; 14678 } 14679 } 14680 14681 # /^[L\{\(\[]$/ 14682 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) { 14683 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; 14684 } 14685} 14686 14687{ # begin scan_list 14688 14689 my ( 14690 $block_type, $current_depth, 14691 $depth, $i, 14692 $i_last_nonblank_token, $last_colon_sequence_number, 14693 $last_nonblank_token, $last_nonblank_type, 14694 $last_nonblank_block_type, $last_old_breakpoint_count, 14695 $minimum_depth, $next_nonblank_block_type, 14696 $next_nonblank_token, $next_nonblank_type, 14697 $old_breakpoint_count, $starting_breakpoint_count, 14698 $starting_depth, $token, 14699 $type, $type_sequence, 14700 ); 14701 14702 my ( 14703 @breakpoint_stack, @breakpoint_undo_stack, 14704 @comma_index, @container_type, 14705 @identifier_count_stack, @index_before_arrow, 14706 @interrupted_list, @item_count_stack, 14707 @last_comma_index, @last_dot_index, 14708 @last_nonblank_type, @old_breakpoint_count_stack, 14709 @opening_structure_index_stack, @rfor_semicolon_list, 14710 @has_old_logical_breakpoints, @rand_or_list, 14711 @i_equals, 14712 ); 14713 14714 # routine to define essential variables when we go 'up' to 14715 # a new depth 14716 sub check_for_new_minimum_depth { 14717 my $depth = shift; 14718 if ( $depth < $minimum_depth ) { 14719 14720 $minimum_depth = $depth; 14721 14722 # these arrays need not retain values between calls 14723 $breakpoint_stack[$depth] = $starting_breakpoint_count; 14724 $container_type[$depth] = ""; 14725 $identifier_count_stack[$depth] = 0; 14726 $index_before_arrow[$depth] = -1; 14727 $interrupted_list[$depth] = 1; 14728 $item_count_stack[$depth] = 0; 14729 $last_nonblank_type[$depth] = ""; 14730 $opening_structure_index_stack[$depth] = -1; 14731 14732 $breakpoint_undo_stack[$depth] = undef; 14733 $comma_index[$depth] = undef; 14734 $last_comma_index[$depth] = undef; 14735 $last_dot_index[$depth] = undef; 14736 $old_breakpoint_count_stack[$depth] = undef; 14737 $has_old_logical_breakpoints[$depth] = 0; 14738 $rand_or_list[$depth] = []; 14739 $rfor_semicolon_list[$depth] = []; 14740 $i_equals[$depth] = -1; 14741 14742 # these arrays must retain values between calls 14743 if ( !defined( $has_broken_sublist[$depth] ) ) { 14744 $dont_align[$depth] = 0; 14745 $has_broken_sublist[$depth] = 0; 14746 $want_comma_break[$depth] = 0; 14747 } 14748 } 14749 } 14750 14751 # routine to decide which commas to break at within a container; 14752 # returns: 14753 # $bp_count = number of comma breakpoints set 14754 # $do_not_break_apart = a flag indicating if container need not 14755 # be broken open 14756 sub set_comma_breakpoints { 14757 14758 my $dd = shift; 14759 my $bp_count = 0; 14760 my $do_not_break_apart = 0; 14761 14762 # anything to do? 14763 if ( $item_count_stack[$dd] ) { 14764 14765 # handle commas not in containers... 14766 if ( $dont_align[$dd] ) { 14767 do_uncontained_comma_breaks($dd); 14768 } 14769 14770 # handle commas within containers... 14771 else { 14772 my $fbc = $forced_breakpoint_count; 14773 14774 # always open comma lists not preceded by keywords, 14775 # barewords, identifiers (that is, anything that doesn't 14776 # look like a function call) 14777 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; 14778 14779 set_comma_breakpoints_do( 14780 $dd, 14781 $opening_structure_index_stack[$dd], 14782 $i, 14783 $item_count_stack[$dd], 14784 $identifier_count_stack[$dd], 14785 $comma_index[$dd], 14786 $next_nonblank_type, 14787 $container_type[$dd], 14788 $interrupted_list[$dd], 14789 \$do_not_break_apart, 14790 $must_break_open, 14791 ); 14792 $bp_count = $forced_breakpoint_count - $fbc; 14793 $do_not_break_apart = 0 if $must_break_open; 14794 } 14795 } 14796 return ( $bp_count, $do_not_break_apart ); 14797 } 14798 14799 sub do_uncontained_comma_breaks { 14800 14801 # Handle commas not in containers... 14802 # This is a catch-all routine for commas that we 14803 # don't know what to do with because the don't fall 14804 # within containers. We will bias the bond strength 14805 # to break at commas which ended lines in the input 14806 # file. This usually works better than just trying 14807 # to put as many items on a line as possible. A 14808 # downside is that if the input file is garbage it 14809 # won't work very well. However, the user can always 14810 # prevent following the old breakpoints with the 14811 # -iob flag. 14812 my $dd = shift; 14813 my $bias = -.01; 14814 my $old_comma_break_count = 0; 14815 foreach my $ii ( @{ $comma_index[$dd] } ) { 14816 if ( $old_breakpoint_to_go[$ii] ) { 14817 $old_comma_break_count++; 14818 $bond_strength_to_go[$ii] = $bias; 14819 14820 # reduce bias magnitude to force breaks in order 14821 $bias *= 0.99; 14822 } 14823 } 14824 14825 # Also put a break before the first comma if 14826 # (1) there was a break there in the input, and 14827 # (2) there was exactly one old break before the first comma break 14828 # (3) OLD: there are multiple old comma breaks 14829 # (3) NEW: there are one or more old comma breaks (see return example) 14830 # 14831 # For example, we will follow the user and break after 14832 # 'print' in this snippet: 14833 # print 14834 # "conformability (Not the same dimension)\n", 14835 # "\t", $have, " is ", text_unit($hu), "\n", 14836 # "\t", $want, " is ", text_unit($wu), "\n", 14837 # ; 14838 # 14839 # Another example, just one comma, where we will break after 14840 # the return: 14841 # return 14842 # $x * cos($a) - $y * sin($a), 14843 # $x * sin($a) + $y * cos($a); 14844 14845 # Breaking a print statement: 14846 # print SAVEOUT 14847 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "", 14848 # ( $? & 128 ) ? " -- core dumped" : "", "\n"; 14849 # 14850 # But we will not force a break after the opening paren here 14851 # (causes a blinker): 14852 # $heap->{stream}->set_output_filter( 14853 # poe::filter::reference->new('myotherfreezer') ), 14854 # ; 14855 # 14856 my $i_first_comma = $comma_index[$dd]->[0]; 14857 if ( $old_breakpoint_to_go[$i_first_comma] ) { 14858 my $level_comma = $levels_to_go[$i_first_comma]; 14859 my $ibreak = -1; 14860 my $obp_count = 0; 14861 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) { 14862 if ( $old_breakpoint_to_go[$ii] ) { 14863 $obp_count++; 14864 last if ( $obp_count > 1 ); 14865 $ibreak = $ii 14866 if ( $levels_to_go[$ii] == $level_comma ); 14867 } 14868 } 14869 14870 # Changed rule from multiple old commas to just one here: 14871 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 ) 14872 { 14873 # Do not to break before an opening token because 14874 # it can lead to "blinkers". 14875 my $ibreakm = $ibreak; 14876 $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' ); 14877 if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ ) 14878 { 14879 set_forced_breakpoint($ibreak); 14880 } 14881 } 14882 } 14883 } 14884 14885 my %is_logical_container; 14886 14887 BEGIN { 14888 @_ = qw# if elsif unless while and or err not && | || ? : ! #; 14889 @is_logical_container{@_} = (1) x scalar(@_); 14890 } 14891 14892 sub set_for_semicolon_breakpoints { 14893 my $dd = shift; 14894 foreach ( @{ $rfor_semicolon_list[$dd] } ) { 14895 set_forced_breakpoint($_); 14896 } 14897 } 14898 14899 sub set_logical_breakpoints { 14900 my $dd = shift; 14901 if ( 14902 $item_count_stack[$dd] == 0 14903 && $is_logical_container{ $container_type[$dd] } 14904 14905 || $has_old_logical_breakpoints[$dd] 14906 ) 14907 { 14908 14909 # Look for breaks in this order: 14910 # 0 1 2 3 14911 # or and || && 14912 foreach my $i ( 0 .. 3 ) { 14913 if ( $rand_or_list[$dd][$i] ) { 14914 foreach ( @{ $rand_or_list[$dd][$i] } ) { 14915 set_forced_breakpoint($_); 14916 } 14917 14918 # break at any 'if' and 'unless' too 14919 foreach ( @{ $rand_or_list[$dd][4] } ) { 14920 set_forced_breakpoint($_); 14921 } 14922 $rand_or_list[$dd] = []; 14923 last; 14924 } 14925 } 14926 } 14927 } 14928 14929 sub is_unbreakable_container { 14930 14931 # never break a container of one of these types 14932 # because bad things can happen (map1.t) 14933 my $dd = shift; 14934 $is_sort_map_grep{ $container_type[$dd] }; 14935 } 14936 14937 sub scan_list { 14938 14939 # This routine is responsible for setting line breaks for all lists, 14940 # so that hierarchical structure can be displayed and so that list 14941 # items can be vertically aligned. The output of this routine is 14942 # stored in the array @forced_breakpoint_to_go, which is used to set 14943 # final breakpoints. 14944 14945 $starting_depth = $nesting_depth_to_go[0]; 14946 14947 $block_type = ' '; 14948 $current_depth = $starting_depth; 14949 $i = -1; 14950 $last_colon_sequence_number = -1; 14951 $last_nonblank_token = ';'; 14952 $last_nonblank_type = ';'; 14953 $last_nonblank_block_type = ' '; 14954 $last_old_breakpoint_count = 0; 14955 $minimum_depth = $current_depth + 1; # forces update in check below 14956 $old_breakpoint_count = 0; 14957 $starting_breakpoint_count = $forced_breakpoint_count; 14958 $token = ';'; 14959 $type = ';'; 14960 $type_sequence = ''; 14961 14962 my $total_depth_variation = 0; 14963 my $i_old_assignment_break; 14964 my $depth_last = $starting_depth; 14965 14966 check_for_new_minimum_depth($current_depth); 14967 14968 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0; 14969 my $want_previous_breakpoint = -1; 14970 14971 my $saw_good_breakpoint; 14972 my $i_line_end = -1; 14973 my $i_line_start = -1; 14974 14975 # loop over all tokens in this batch 14976 while ( ++$i <= $max_index_to_go ) { 14977 if ( $type ne 'b' ) { 14978 $i_last_nonblank_token = $i - 1; 14979 $last_nonblank_type = $type; 14980 $last_nonblank_token = $token; 14981 $last_nonblank_block_type = $block_type; 14982 } ## end if ( $type ne 'b' ) 14983 $type = $types_to_go[$i]; 14984 $block_type = $block_type_to_go[$i]; 14985 $token = $tokens_to_go[$i]; 14986 $type_sequence = $type_sequence_to_go[$i]; 14987 my $next_type = $types_to_go[ $i + 1 ]; 14988 my $next_token = $tokens_to_go[ $i + 1 ]; 14989 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); 14990 $next_nonblank_type = $types_to_go[$i_next_nonblank]; 14991 $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 14992 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; 14993 14994 # set break if flag was set 14995 if ( $want_previous_breakpoint >= 0 ) { 14996 set_forced_breakpoint($want_previous_breakpoint); 14997 $want_previous_breakpoint = -1; 14998 } 14999 15000 $last_old_breakpoint_count = $old_breakpoint_count; 15001 if ( $old_breakpoint_to_go[$i] ) { 15002 $i_line_end = $i; 15003 $i_line_start = $i_next_nonblank; 15004 15005 $old_breakpoint_count++; 15006 15007 # Break before certain keywords if user broke there and 15008 # this is a 'safe' break point. The idea is to retain 15009 # any preferred breaks for sequential list operations, 15010 # like a schwartzian transform. 15011 if ($rOpts_break_at_old_keyword_breakpoints) { 15012 if ( 15013 $next_nonblank_type eq 'k' 15014 && $is_keyword_returning_list{$next_nonblank_token} 15015 && ( $type =~ /^[=\)\]\}Riw]$/ 15016 || $type eq 'k' 15017 && $is_keyword_returning_list{$token} ) 15018 ) 15019 { 15020 15021 # we actually have to set this break next time through 15022 # the loop because if we are at a closing token (such 15023 # as '}') which forms a one-line block, this break might 15024 # get undone. 15025 $want_previous_breakpoint = $i; 15026 } ## end if ( $next_nonblank_type...) 15027 } ## end if ($rOpts_break_at_old_keyword_breakpoints) 15028 15029 # Break before attributes if user broke there 15030 if ($rOpts_break_at_old_attribute_breakpoints) { 15031 if ( $next_nonblank_type eq 'A' ) { 15032 $want_previous_breakpoint = $i; 15033 } 15034 } 15035 15036 # remember an = break as possible good break point 15037 if ( $is_assignment{$type} ) { 15038 $i_old_assignment_break = $i; 15039 } 15040 elsif ( $is_assignment{$next_nonblank_type} ) { 15041 $i_old_assignment_break = $i_next_nonblank; 15042 } 15043 } ## end if ( $old_breakpoint_to_go...) 15044 next if ( $type eq 'b' ); 15045 $depth = $nesting_depth_to_go[ $i + 1 ]; 15046 15047 $total_depth_variation += abs( $depth - $depth_last ); 15048 $depth_last = $depth; 15049 15050 # safety check - be sure we always break after a comment 15051 # Shouldn't happen .. an error here probably means that the 15052 # nobreak flag did not get turned off correctly during 15053 # formatting. 15054 if ( $type eq '#' ) { 15055 if ( $i != $max_index_to_go ) { 15056 warning( 15057"Non-fatal program bug: backup logic needed to break after a comment\n" 15058 ); 15059 report_definite_bug(); 15060 $nobreak_to_go[$i] = 0; 15061 set_forced_breakpoint($i); 15062 } ## end if ( $i != $max_index_to_go) 15063 } ## end if ( $type eq '#' ) 15064 15065 # Force breakpoints at certain tokens in long lines. 15066 # Note that such breakpoints will be undone later if these tokens 15067 # are fully contained within parens on a line. 15068 if ( 15069 15070 # break before a keyword within a line 15071 $type eq 'k' 15072 && $i > 0 15073 15074 # if one of these keywords: 15075 && $token =~ /^(if|unless|while|until|for)$/ 15076 15077 # but do not break at something like '1 while' 15078 && ( $last_nonblank_type ne 'n' || $i > 2 ) 15079 15080 # and let keywords follow a closing 'do' brace 15081 && $last_nonblank_block_type ne 'do' 15082 15083 && ( 15084 $is_long_line 15085 15086 # or container is broken (by side-comment, etc) 15087 || ( $next_nonblank_token eq '(' 15088 && $mate_index_to_go[$i_next_nonblank] < $i ) 15089 ) 15090 ) 15091 { 15092 set_forced_breakpoint( $i - 1 ); 15093 } ## end if ( $type eq 'k' && $i...) 15094 15095 # remember locations of '||' and '&&' for possible breaks if we 15096 # decide this is a long logical expression. 15097 if ( $type eq '||' ) { 15098 push @{ $rand_or_list[$depth][2] }, $i; 15099 ++$has_old_logical_breakpoints[$depth] 15100 if ( ( $i == $i_line_start || $i == $i_line_end ) 15101 && $rOpts_break_at_old_logical_breakpoints ); 15102 } ## end if ( $type eq '||' ) 15103 elsif ( $type eq '&&' ) { 15104 push @{ $rand_or_list[$depth][3] }, $i; 15105 ++$has_old_logical_breakpoints[$depth] 15106 if ( ( $i == $i_line_start || $i == $i_line_end ) 15107 && $rOpts_break_at_old_logical_breakpoints ); 15108 } ## end elsif ( $type eq '&&' ) 15109 elsif ( $type eq 'f' ) { 15110 push @{ $rfor_semicolon_list[$depth] }, $i; 15111 } 15112 elsif ( $type eq 'k' ) { 15113 if ( $token eq 'and' ) { 15114 push @{ $rand_or_list[$depth][1] }, $i; 15115 ++$has_old_logical_breakpoints[$depth] 15116 if ( ( $i == $i_line_start || $i == $i_line_end ) 15117 && $rOpts_break_at_old_logical_breakpoints ); 15118 } ## end if ( $token eq 'and' ) 15119 15120 # break immediately at 'or's which are probably not in a logical 15121 # block -- but we will break in logical breaks below so that 15122 # they do not add to the forced_breakpoint_count 15123 elsif ( $token eq 'or' ) { 15124 push @{ $rand_or_list[$depth][0] }, $i; 15125 ++$has_old_logical_breakpoints[$depth] 15126 if ( ( $i == $i_line_start || $i == $i_line_end ) 15127 && $rOpts_break_at_old_logical_breakpoints ); 15128 if ( $is_logical_container{ $container_type[$depth] } ) { 15129 } 15130 else { 15131 if ($is_long_line) { set_forced_breakpoint($i) } 15132 elsif ( ( $i == $i_line_start || $i == $i_line_end ) 15133 && $rOpts_break_at_old_logical_breakpoints ) 15134 { 15135 $saw_good_breakpoint = 1; 15136 } 15137 } ## end else [ if ( $is_logical_container...)] 15138 } ## end elsif ( $token eq 'or' ) 15139 elsif ( $token eq 'if' || $token eq 'unless' ) { 15140 push @{ $rand_or_list[$depth][4] }, $i; 15141 if ( ( $i == $i_line_start || $i == $i_line_end ) 15142 && $rOpts_break_at_old_logical_breakpoints ) 15143 { 15144 set_forced_breakpoint($i); 15145 } 15146 } ## end elsif ( $token eq 'if' ||...) 15147 } ## end elsif ( $type eq 'k' ) 15148 elsif ( $is_assignment{$type} ) { 15149 $i_equals[$depth] = $i; 15150 } 15151 15152 if ($type_sequence) { 15153 15154 # handle any postponed closing breakpoints 15155 if ( $token =~ /^[\)\]\}\:]$/ ) { 15156 if ( $type eq ':' ) { 15157 $last_colon_sequence_number = $type_sequence; 15158 15159 # retain break at a ':' line break 15160 if ( ( $i == $i_line_start || $i == $i_line_end ) 15161 && $rOpts_break_at_old_ternary_breakpoints ) 15162 { 15163 15164 set_forced_breakpoint($i); 15165 15166 # break at previous '=' 15167 if ( $i_equals[$depth] > 0 ) { 15168 set_forced_breakpoint( $i_equals[$depth] ); 15169 $i_equals[$depth] = -1; 15170 } 15171 } ## end if ( ( $i == $i_line_start...)) 15172 } ## end if ( $type eq ':' ) 15173 if ( defined( $postponed_breakpoint{$type_sequence} ) ) { 15174 my $inc = ( $type eq ':' ) ? 0 : 1; 15175 set_forced_breakpoint( $i - $inc ); 15176 delete $postponed_breakpoint{$type_sequence}; 15177 } 15178 } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(]) 15179 15180 # set breaks at ?/: if they will get separated (and are 15181 # not a ?/: chain), or if the '?' is at the end of the 15182 # line 15183 elsif ( $token eq '?' ) { 15184 my $i_colon = $mate_index_to_go[$i]; 15185 if ( 15186 $i_colon <= 0 # the ':' is not in this batch 15187 || $i == 0 # this '?' is the first token of the line 15188 || $i == 15189 $max_index_to_go # or this '?' is the last token 15190 ) 15191 { 15192 15193 # don't break at a '?' if preceded by ':' on 15194 # this line of previous ?/: pair on this line. 15195 # This is an attempt to preserve a chain of ?/: 15196 # expressions (elsif2.t). And don't break if 15197 # this has a side comment. 15198 set_forced_breakpoint($i) 15199 unless ( 15200 $type_sequence == ( 15201 $last_colon_sequence_number + 15202 TYPE_SEQUENCE_INCREMENT 15203 ) 15204 || $tokens_to_go[$max_index_to_go] eq '#' 15205 ); 15206 set_closing_breakpoint($i); 15207 } ## end if ( $i_colon <= 0 ||...) 15208 } ## end elsif ( $token eq '?' ) 15209 } ## end if ($type_sequence) 15210 15211#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; 15212 15213 #------------------------------------------------------------ 15214 # Handle Increasing Depth.. 15215 # 15216 # prepare for a new list when depth increases 15217 # token $i is a '(','{', or '[' 15218 #------------------------------------------------------------ 15219 if ( $depth > $current_depth ) { 15220 15221 $breakpoint_stack[$depth] = $forced_breakpoint_count; 15222 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; 15223 $has_broken_sublist[$depth] = 0; 15224 $identifier_count_stack[$depth] = 0; 15225 $index_before_arrow[$depth] = -1; 15226 $interrupted_list[$depth] = 0; 15227 $item_count_stack[$depth] = 0; 15228 $last_comma_index[$depth] = undef; 15229 $last_dot_index[$depth] = undef; 15230 $last_nonblank_type[$depth] = $last_nonblank_type; 15231 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; 15232 $opening_structure_index_stack[$depth] = $i; 15233 $rand_or_list[$depth] = []; 15234 $rfor_semicolon_list[$depth] = []; 15235 $i_equals[$depth] = -1; 15236 $want_comma_break[$depth] = 0; 15237 $container_type[$depth] = 15238 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ ) 15239 ? $last_nonblank_token 15240 : ""; 15241 $has_old_logical_breakpoints[$depth] = 0; 15242 15243 # if line ends here then signal closing token to break 15244 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) 15245 { 15246 set_closing_breakpoint($i); 15247 } 15248 15249 # Not all lists of values should be vertically aligned.. 15250 $dont_align[$depth] = 15251 15252 # code BLOCKS are handled at a higher level 15253 ( $block_type ne "" ) 15254 15255 # certain paren lists 15256 || ( $type eq '(' ) && ( 15257 15258 # it does not usually look good to align a list of 15259 # identifiers in a parameter list, as in: 15260 # my($var1, $var2, ...) 15261 # (This test should probably be refined, for now I'm just 15262 # testing for any keyword) 15263 ( $last_nonblank_type eq 'k' ) 15264 15265 # a trailing '(' usually indicates a non-list 15266 || ( $next_nonblank_type eq '(' ) 15267 ); 15268 15269 # patch to outdent opening brace of long if/for/.. 15270 # statements (like this one). See similar coding in 15271 # set_continuation breaks. We have also catch it here for 15272 # short line fragments which otherwise will not go through 15273 # set_continuation_breaks. 15274 if ( 15275 $block_type 15276 15277 # if we have the ')' but not its '(' in this batch.. 15278 && ( $last_nonblank_token eq ')' ) 15279 && $mate_index_to_go[$i_last_nonblank_token] < 0 15280 15281 # and user wants brace to left 15282 && !$rOpts->{'opening-brace-always-on-right'} 15283 15284 && ( $type eq '{' ) # should be true 15285 && ( $token eq '{' ) # should be true 15286 ) 15287 { 15288 set_forced_breakpoint( $i - 1 ); 15289 } ## end if ( $block_type && ( ...)) 15290 } ## end if ( $depth > $current_depth) 15291 15292 #------------------------------------------------------------ 15293 # Handle Decreasing Depth.. 15294 # 15295 # finish off any old list when depth decreases 15296 # token $i is a ')','}', or ']' 15297 #------------------------------------------------------------ 15298 elsif ( $depth < $current_depth ) { 15299 15300 check_for_new_minimum_depth($depth); 15301 15302 # force all outer logical containers to break after we see on 15303 # old breakpoint 15304 $has_old_logical_breakpoints[$depth] ||= 15305 $has_old_logical_breakpoints[$current_depth]; 15306 15307 # Patch to break between ') {' if the paren list is broken. 15308 # There is similar logic in set_continuation_breaks for 15309 # non-broken lists. 15310 if ( $token eq ')' 15311 && $next_nonblank_block_type 15312 && $interrupted_list[$current_depth] 15313 && $next_nonblank_type eq '{' 15314 && !$rOpts->{'opening-brace-always-on-right'} ) 15315 { 15316 set_forced_breakpoint($i); 15317 } ## end if ( $token eq ')' && ... 15318 15319#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; 15320 15321 # set breaks at commas if necessary 15322 my ( $bp_count, $do_not_break_apart ) = 15323 set_comma_breakpoints($current_depth); 15324 15325 my $i_opening = $opening_structure_index_stack[$current_depth]; 15326 my $saw_opening_structure = ( $i_opening >= 0 ); 15327 15328 # this term is long if we had to break at interior commas.. 15329 my $is_long_term = $bp_count > 0; 15330 15331 # If this is a short container with one or more comma arrows, 15332 # then we will mark it as a long term to open it if requested. 15333 # $rOpts_comma_arrow_breakpoints = 15334 # 0 - open only if comma precedes closing brace 15335 # 1 - stable: except for one line blocks 15336 # 2 - try to form 1 line blocks 15337 # 3 - ignore => 15338 # 4 - always open up if vt=0 15339 # 5 - stable: even for one line blocks if vt=0 15340 if ( !$is_long_term 15341 && $tokens_to_go[$i_opening] =~ /^[\(\{\]L]$/ 15342 && $index_before_arrow[ $depth + 1 ] > 0 15343 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } 15344 ) 15345 { 15346 $is_long_term = $rOpts_comma_arrow_breakpoints == 4 15347 || ( $rOpts_comma_arrow_breakpoints == 0 15348 && $last_nonblank_token eq ',' ) 15349 || ( $rOpts_comma_arrow_breakpoints == 5 15350 && $old_breakpoint_to_go[$i_opening] ); 15351 } ## end if ( !$is_long_term &&...) 15352 15353 # mark term as long if the length between opening and closing 15354 # parens exceeds allowed line length 15355 if ( !$is_long_term && $saw_opening_structure ) { 15356 my $i_opening_minus = find_token_starting_list($i_opening); 15357 15358 # Note: we have to allow for one extra space after a 15359 # closing token so that we do not strand a comma or 15360 # semicolon, hence the '>=' here (oneline.t) 15361 $is_long_term = 15362 excess_line_length( $i_opening_minus, $i ) >= 0; 15363 } ## end if ( !$is_long_term &&...) 15364 15365 # We've set breaks after all comma-arrows. Now we have to 15366 # undo them if this can be a one-line block 15367 # (the only breakpoints set will be due to comma-arrows) 15368 if ( 15369 15370 # user doesn't require breaking after all comma-arrows 15371 ( $rOpts_comma_arrow_breakpoints != 0 ) 15372 && ( $rOpts_comma_arrow_breakpoints != 4 ) 15373 15374 # and if the opening structure is in this batch 15375 && $saw_opening_structure 15376 15377 # and either on the same old line 15378 && ( 15379 $old_breakpoint_count_stack[$current_depth] == 15380 $last_old_breakpoint_count 15381 15382 # or user wants to form long blocks with arrows 15383 || $rOpts_comma_arrow_breakpoints == 2 15384 ) 15385 15386 # and we made some breakpoints between the opening and closing 15387 && ( $breakpoint_undo_stack[$current_depth] < 15388 $forced_breakpoint_undo_count ) 15389 15390 # and this block is short enough to fit on one line 15391 # Note: use < because need 1 more space for possible comma 15392 && !$is_long_term 15393 15394 ) 15395 { 15396 undo_forced_breakpoint_stack( 15397 $breakpoint_undo_stack[$current_depth] ); 15398 } ## end if ( ( $rOpts_comma_arrow_breakpoints...)) 15399 15400 # now see if we have any comma breakpoints left 15401 my $has_comma_breakpoints = 15402 ( $breakpoint_stack[$current_depth] != 15403 $forced_breakpoint_count ); 15404 15405 # update broken-sublist flag of the outer container 15406 $has_broken_sublist[$depth] = 15407 $has_broken_sublist[$depth] 15408 || $has_broken_sublist[$current_depth] 15409 || $is_long_term 15410 || $has_comma_breakpoints; 15411 15412# Having come to the closing ')', '}', or ']', now we have to decide if we 15413# should 'open up' the structure by placing breaks at the opening and 15414# closing containers. This is a tricky decision. Here are some of the 15415# basic considerations: 15416# 15417# -If this is a BLOCK container, then any breakpoints will have already 15418# been set (and according to user preferences), so we need do nothing here. 15419# 15420# -If we have a comma-separated list for which we can align the list items, 15421# then we need to do so because otherwise the vertical aligner cannot 15422# currently do the alignment. 15423# 15424# -If this container does itself contain a container which has been broken 15425# open, then it should be broken open to properly show the structure. 15426# 15427# -If there is nothing to align, and no other reason to break apart, 15428# then do not do it. 15429# 15430# We will not break open the parens of a long but 'simple' logical expression. 15431# For example: 15432# 15433# This is an example of a simple logical expression and its formatting: 15434# 15435# if ( $bigwasteofspace1 && $bigwasteofspace2 15436# || $bigwasteofspace3 && $bigwasteofspace4 ) 15437# 15438# Most people would prefer this than the 'spacey' version: 15439# 15440# if ( 15441# $bigwasteofspace1 && $bigwasteofspace2 15442# || $bigwasteofspace3 && $bigwasteofspace4 15443# ) 15444# 15445# To illustrate the rules for breaking logical expressions, consider: 15446# 15447# FULLY DENSE: 15448# if ( $opt_excl 15449# and ( exists $ids_excl_uc{$id_uc} 15450# or grep $id_uc =~ /$_/, @ids_excl_uc )) 15451# 15452# This is on the verge of being difficult to read. The current default is to 15453# open it up like this: 15454# 15455# DEFAULT: 15456# if ( 15457# $opt_excl 15458# and ( exists $ids_excl_uc{$id_uc} 15459# or grep $id_uc =~ /$_/, @ids_excl_uc ) 15460# ) 15461# 15462# This is a compromise which tries to avoid being too dense and to spacey. 15463# A more spaced version would be: 15464# 15465# SPACEY: 15466# if ( 15467# $opt_excl 15468# and ( 15469# exists $ids_excl_uc{$id_uc} 15470# or grep $id_uc =~ /$_/, @ids_excl_uc 15471# ) 15472# ) 15473# 15474# Some people might prefer the spacey version -- an option could be added. The 15475# innermost expression contains a long block '( exists $ids_... ')'. 15476# 15477# Here is how the logic goes: We will force a break at the 'or' that the 15478# innermost expression contains, but we will not break apart its opening and 15479# closing containers because (1) it contains no multi-line sub-containers itself, 15480# and (2) there is no alignment to be gained by breaking it open like this 15481# 15482# and ( 15483# exists $ids_excl_uc{$id_uc} 15484# or grep $id_uc =~ /$_/, @ids_excl_uc 15485# ) 15486# 15487# (although this looks perfectly ok and might be good for long expressions). The 15488# outer 'if' container, though, contains a broken sub-container, so it will be 15489# broken open to avoid too much density. Also, since it contains no 'or's, there 15490# will be a forced break at its 'and'. 15491 15492 # set some flags telling something about this container.. 15493 my $is_simple_logical_expression = 0; 15494 if ( $item_count_stack[$current_depth] == 0 15495 && $saw_opening_structure 15496 && $tokens_to_go[$i_opening] eq '(' 15497 && $is_logical_container{ $container_type[$current_depth] } 15498 ) 15499 { 15500 15501 # This seems to be a simple logical expression with 15502 # no existing breakpoints. Set a flag to prevent 15503 # opening it up. 15504 if ( !$has_comma_breakpoints ) { 15505 $is_simple_logical_expression = 1; 15506 } 15507 15508 # This seems to be a simple logical expression with 15509 # breakpoints (broken sublists, for example). Break 15510 # at all 'or's and '||'s. 15511 else { 15512 set_logical_breakpoints($current_depth); 15513 } 15514 } ## end if ( $item_count_stack...) 15515 15516 if ( $is_long_term 15517 && @{ $rfor_semicolon_list[$current_depth] } ) 15518 { 15519 set_for_semicolon_breakpoints($current_depth); 15520 15521 # open up a long 'for' or 'foreach' container to allow 15522 # leading term alignment unless -lp is used. 15523 $has_comma_breakpoints = 1 15524 unless $rOpts_line_up_parentheses; 15525 } ## end if ( $is_long_term && ...) 15526 15527 if ( 15528 15529 # breaks for code BLOCKS are handled at a higher level 15530 !$block_type 15531 15532 # we do not need to break at the top level of an 'if' 15533 # type expression 15534 && !$is_simple_logical_expression 15535 15536 ## modification to keep ': (' containers vertically tight; 15537 ## but probably better to let user set -vt=1 to avoid 15538 ## inconsistency with other paren types 15539 ## && ($container_type[$current_depth] ne ':') 15540 15541 # otherwise, we require one of these reasons for breaking: 15542 && ( 15543 15544 # - this term has forced line breaks 15545 $has_comma_breakpoints 15546 15547 # - the opening container is separated from this batch 15548 # for some reason (comment, blank line, code block) 15549 # - this is a non-paren container spanning multiple lines 15550 || !$saw_opening_structure 15551 15552 # - this is a long block contained in another breakable 15553 # container 15554 || ( $is_long_term 15555 && $container_environment_to_go[$i_opening] ne 15556 'BLOCK' ) 15557 ) 15558 ) 15559 { 15560 15561 # For -lp option, we must put a breakpoint before 15562 # the token which has been identified as starting 15563 # this indentation level. This is necessary for 15564 # proper alignment. 15565 if ( $rOpts_line_up_parentheses && $saw_opening_structure ) 15566 { 15567 my $item = $leading_spaces_to_go[ $i_opening + 1 ]; 15568 if ( $i_opening + 1 < $max_index_to_go 15569 && $types_to_go[ $i_opening + 1 ] eq 'b' ) 15570 { 15571 $item = $leading_spaces_to_go[ $i_opening + 2 ]; 15572 } 15573 if ( defined($item) ) { 15574 my $i_start_2 = $item->get_STARTING_INDEX(); 15575 if ( 15576 defined($i_start_2) 15577 15578 # we are breaking after an opening brace, paren, 15579 # so don't break before it too 15580 && $i_start_2 ne $i_opening 15581 ) 15582 { 15583 15584 # Only break for breakpoints at the same 15585 # indentation level as the opening paren 15586 my $test1 = $nesting_depth_to_go[$i_opening]; 15587 my $test2 = $nesting_depth_to_go[$i_start_2]; 15588 if ( $test2 == $test1 ) { 15589 set_forced_breakpoint( $i_start_2 - 1 ); 15590 } 15591 } ## end if ( defined($i_start_2...)) 15592 } ## end if ( defined($item) ) 15593 } ## end if ( $rOpts_line_up_parentheses...) 15594 15595 # break after opening structure. 15596 # note: break before closing structure will be automatic 15597 if ( $minimum_depth <= $current_depth ) { 15598 15599 set_forced_breakpoint($i_opening) 15600 unless ( $do_not_break_apart 15601 || is_unbreakable_container($current_depth) ); 15602 15603 # break at ',' of lower depth level before opening token 15604 if ( $last_comma_index[$depth] ) { 15605 set_forced_breakpoint( $last_comma_index[$depth] ); 15606 } 15607 15608 # break at '.' of lower depth level before opening token 15609 if ( $last_dot_index[$depth] ) { 15610 set_forced_breakpoint( $last_dot_index[$depth] ); 15611 } 15612 15613 # break before opening structure if preeced by another 15614 # closing structure and a comma. This is normally 15615 # done by the previous closing brace, but not 15616 # if it was a one-line block. 15617 if ( $i_opening > 2 ) { 15618 my $i_prev = 15619 ( $types_to_go[ $i_opening - 1 ] eq 'b' ) 15620 ? $i_opening - 2 15621 : $i_opening - 1; 15622 15623 if ( $types_to_go[$i_prev] eq ',' 15624 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ ) 15625 { 15626 set_forced_breakpoint($i_prev); 15627 } 15628 15629 # also break before something like ':(' or '?(' 15630 # if appropriate. 15631 elsif ( 15632 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ ) 15633 { 15634 my $token_prev = $tokens_to_go[$i_prev]; 15635 if ( $want_break_before{$token_prev} ) { 15636 set_forced_breakpoint($i_prev); 15637 } 15638 } ## end elsif ( $types_to_go[$i_prev...]) 15639 } ## end if ( $i_opening > 2 ) 15640 } ## end if ( $minimum_depth <=...) 15641 15642 # break after comma following closing structure 15643 if ( $next_type eq ',' ) { 15644 set_forced_breakpoint( $i + 1 ); 15645 } 15646 15647 # break before an '=' following closing structure 15648 if ( 15649 $is_assignment{$next_nonblank_type} 15650 && ( $breakpoint_stack[$current_depth] != 15651 $forced_breakpoint_count ) 15652 ) 15653 { 15654 set_forced_breakpoint($i); 15655 } ## end if ( $is_assignment{$next_nonblank_type...}) 15656 15657 # break at any comma before the opening structure Added 15658 # for -lp, but seems to be good in general. It isn't 15659 # obvious how far back to look; the '5' below seems to 15660 # work well and will catch the comma in something like 15661 # push @list, myfunc( $param, $param, .. 15662 15663 my $icomma = $last_comma_index[$depth]; 15664 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { 15665 unless ( $forced_breakpoint_to_go[$icomma] ) { 15666 set_forced_breakpoint($icomma); 15667 } 15668 } 15669 } # end logic to open up a container 15670 15671 # Break open a logical container open if it was already open 15672 elsif ($is_simple_logical_expression 15673 && $has_old_logical_breakpoints[$current_depth] ) 15674 { 15675 set_logical_breakpoints($current_depth); 15676 } 15677 15678 # Handle long container which does not get opened up 15679 elsif ($is_long_term) { 15680 15681 # must set fake breakpoint to alert outer containers that 15682 # they are complex 15683 set_fake_breakpoint(); 15684 } ## end elsif ($is_long_term) 15685 15686 } ## end elsif ( $depth < $current_depth) 15687 15688 #------------------------------------------------------------ 15689 # Handle this token 15690 #------------------------------------------------------------ 15691 15692 $current_depth = $depth; 15693 15694 # handle comma-arrow 15695 if ( $type eq '=>' ) { 15696 next if ( $last_nonblank_type eq '=>' ); 15697 next if $rOpts_break_at_old_comma_breakpoints; 15698 next if $rOpts_comma_arrow_breakpoints == 3; 15699 $want_comma_break[$depth] = 1; 15700 $index_before_arrow[$depth] = $i_last_nonblank_token; 15701 next; 15702 } ## end if ( $type eq '=>' ) 15703 15704 elsif ( $type eq '.' ) { 15705 $last_dot_index[$depth] = $i; 15706 } 15707 15708 # Turn off alignment if we are sure that this is not a list 15709 # environment. To be safe, we will do this if we see certain 15710 # non-list tokens, such as ';', and also the environment is 15711 # not a list. Note that '=' could be in any of the = operators 15712 # (lextest.t). We can't just use the reported environment 15713 # because it can be incorrect in some cases. 15714 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} ) 15715 && $container_environment_to_go[$i] ne 'LIST' ) 15716 { 15717 $dont_align[$depth] = 1; 15718 $want_comma_break[$depth] = 0; 15719 $index_before_arrow[$depth] = -1; 15720 } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...)) 15721 15722 # now just handle any commas 15723 next unless ( $type eq ',' ); 15724 15725 $last_dot_index[$depth] = undef; 15726 $last_comma_index[$depth] = $i; 15727 15728 # break here if this comma follows a '=>' 15729 # but not if there is a side comment after the comma 15730 if ( $want_comma_break[$depth] ) { 15731 15732 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { 15733 if ($rOpts_comma_arrow_breakpoints) { 15734 $want_comma_break[$depth] = 0; 15735 ##$index_before_arrow[$depth] = -1; 15736 next; 15737 } 15738 } 15739 15740 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); 15741 15742 # break before the previous token if it looks safe 15743 # Example of something that we will not try to break before: 15744 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, 15745 # Also we don't want to break at a binary operator (like +): 15746 # $c->createOval( 15747 # $x + $R, $y + 15748 # $R => $x - $R, 15749 # $y - $R, -fill => 'black', 15750 # ); 15751 my $ibreak = $index_before_arrow[$depth] - 1; 15752 if ( $ibreak > 0 15753 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) 15754 { 15755 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } 15756 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } 15757 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { 15758 15759 # don't break pointer calls, such as the following: 15760 # File::Spec->curdir => 1, 15761 # (This is tokenized as adjacent 'w' tokens) 15762 if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) { 15763 set_forced_breakpoint($ibreak); 15764 } 15765 } ## end if ( $types_to_go[$ibreak...]) 15766 } ## end if ( $ibreak > 0 && $tokens_to_go...) 15767 15768 $want_comma_break[$depth] = 0; 15769 $index_before_arrow[$depth] = -1; 15770 15771 # handle list which mixes '=>'s and ','s: 15772 # treat any list items so far as an interrupted list 15773 $interrupted_list[$depth] = 1; 15774 next; 15775 } ## end if ( $want_comma_break...) 15776 15777 # break after all commas above starting depth 15778 if ( $depth < $starting_depth && !$dont_align[$depth] ) { 15779 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); 15780 next; 15781 } 15782 15783 # add this comma to the list.. 15784 my $item_count = $item_count_stack[$depth]; 15785 if ( $item_count == 0 ) { 15786 15787 # but do not form a list with no opening structure 15788 # for example: 15789 15790 # open INFILE_COPY, ">$input_file_copy" 15791 # or die ("very long message"); 15792 15793 if ( ( $opening_structure_index_stack[$depth] < 0 ) 15794 && $container_environment_to_go[$i] eq 'BLOCK' ) 15795 { 15796 $dont_align[$depth] = 1; 15797 } 15798 } ## end if ( $item_count == 0 ) 15799 15800 $comma_index[$depth][$item_count] = $i; 15801 ++$item_count_stack[$depth]; 15802 if ( $last_nonblank_type =~ /^[iR\]]$/ ) { 15803 $identifier_count_stack[$depth]++; 15804 } 15805 } ## end while ( ++$i <= $max_index_to_go) 15806 15807 #------------------------------------------- 15808 # end of loop over all tokens in this batch 15809 #------------------------------------------- 15810 15811 # set breaks for any unfinished lists .. 15812 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) { 15813 15814 $interrupted_list[$dd] = 1; 15815 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); 15816 set_comma_breakpoints($dd); 15817 set_logical_breakpoints($dd) 15818 if ( $has_old_logical_breakpoints[$dd] ); 15819 set_for_semicolon_breakpoints($dd); 15820 15821 # break open container... 15822 my $i_opening = $opening_structure_index_stack[$dd]; 15823 set_forced_breakpoint($i_opening) 15824 unless ( 15825 is_unbreakable_container($dd) 15826 15827 # Avoid a break which would place an isolated ' or " 15828 # on a line 15829 || ( $type eq 'Q' 15830 && $i_opening >= $max_index_to_go - 2 15831 && $token =~ /^['"]$/ ) 15832 ); 15833 } ## end for ( my $dd = $current_depth...) 15834 15835 # Return a flag indicating if the input file had some good breakpoints. 15836 # This flag will be used to force a break in a line shorter than the 15837 # allowed line length. 15838 if ( $has_old_logical_breakpoints[$current_depth] ) { 15839 $saw_good_breakpoint = 1; 15840 } 15841 15842 # A complex line with one break at an = has a good breakpoint. 15843 # This is not complex ($total_depth_variation=0): 15844 # $res1 15845 # = 10; 15846 # 15847 # This is complex ($total_depth_variation=6): 15848 # $res2 = 15849 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert')); 15850 elsif ($i_old_assignment_break 15851 && $total_depth_variation > 4 15852 && $old_breakpoint_count == 1 ) 15853 { 15854 $saw_good_breakpoint = 1; 15855 } ## end elsif ( $i_old_assignment_break...) 15856 15857 return $saw_good_breakpoint; 15858 } ## end sub scan_list 15859} # end scan_list 15860 15861sub find_token_starting_list { 15862 15863 # When testing to see if a block will fit on one line, some 15864 # previous token(s) may also need to be on the line; particularly 15865 # if this is a sub call. So we will look back at least one 15866 # token. NOTE: This isn't perfect, but not critical, because 15867 # if we mis-identify a block, it will be wrapped and therefore 15868 # fixed the next time it is formatted. 15869 my $i_opening_paren = shift; 15870 my $i_opening_minus = $i_opening_paren; 15871 my $im1 = $i_opening_paren - 1; 15872 my $im2 = $i_opening_paren - 2; 15873 my $im3 = $i_opening_paren - 3; 15874 my $typem1 = $types_to_go[$im1]; 15875 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b'; 15876 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) { 15877 $i_opening_minus = $i_opening_paren; 15878 } 15879 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) { 15880 $i_opening_minus = $im1 if $im1 >= 0; 15881 15882 # walk back to improve length estimate 15883 for ( my $j = $im1 ; $j >= 0 ; $j-- ) { 15884 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ ); 15885 $i_opening_minus = $j; 15886 } 15887 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } 15888 } 15889 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 } 15890 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) { 15891 $i_opening_minus = $im2; 15892 } 15893 return $i_opening_minus; 15894} 15895 15896{ # begin set_comma_breakpoints_do 15897 15898 my %is_keyword_with_special_leading_term; 15899 15900 BEGIN { 15901 15902 # These keywords have prototypes which allow a special leading item 15903 # followed by a list 15904 @_ = 15905 qw(formline grep kill map printf sprintf push chmod join pack unshift); 15906 @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_); 15907 } 15908 15909 sub set_comma_breakpoints_do { 15910 15911 # Given a list with some commas, set breakpoints at some of the 15912 # commas, if necessary, to make it easy to read. This list is 15913 # an example: 15914 my ( 15915 $depth, $i_opening_paren, $i_closing_paren, 15916 $item_count, $identifier_count, $rcomma_index, 15917 $next_nonblank_type, $list_type, $interrupted, 15918 $rdo_not_break_apart, $must_break_open, 15919 ) = @_; 15920 15921 # nothing to do if no commas seen 15922 return if ( $item_count < 1 ); 15923 my $i_first_comma = $$rcomma_index[0]; 15924 my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ]; 15925 my $i_last_comma = $i_true_last_comma; 15926 if ( $i_last_comma >= $max_index_to_go ) { 15927 $i_last_comma = $$rcomma_index[ --$item_count - 1 ]; 15928 return if ( $item_count < 1 ); 15929 } 15930 15931 #--------------------------------------------------------------- 15932 # find lengths of all items in the list to calculate page layout 15933 #--------------------------------------------------------------- 15934 my $comma_count = $item_count; 15935 my @item_lengths; 15936 my @i_term_begin; 15937 my @i_term_end; 15938 my @i_term_comma; 15939 my $i_prev_plus; 15940 my @max_length = ( 0, 0 ); 15941 my $first_term_length; 15942 my $i = $i_opening_paren; 15943 my $is_odd = 1; 15944 15945 for ( my $j = 0 ; $j < $comma_count ; $j++ ) { 15946 $is_odd = 1 - $is_odd; 15947 $i_prev_plus = $i + 1; 15948 $i = $$rcomma_index[$j]; 15949 15950 my $i_term_end = 15951 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1; 15952 my $i_term_begin = 15953 ( $types_to_go[$i_prev_plus] eq 'b' ) 15954 ? $i_prev_plus + 1 15955 : $i_prev_plus; 15956 push @i_term_begin, $i_term_begin; 15957 push @i_term_end, $i_term_end; 15958 push @i_term_comma, $i; 15959 15960 # note: currently adding 2 to all lengths (for comma and space) 15961 my $length = 15962 2 + token_sequence_length( $i_term_begin, $i_term_end ); 15963 push @item_lengths, $length; 15964 15965 if ( $j == 0 ) { 15966 $first_term_length = $length; 15967 } 15968 else { 15969 15970 if ( $length > $max_length[$is_odd] ) { 15971 $max_length[$is_odd] = $length; 15972 } 15973 } 15974 } 15975 15976 # now we have to make a distinction between the comma count and item 15977 # count, because the item count will be one greater than the comma 15978 # count if the last item is not terminated with a comma 15979 my $i_b = 15980 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' ) 15981 ? $i_last_comma + 1 15982 : $i_last_comma; 15983 my $i_e = 15984 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' ) 15985 ? $i_closing_paren - 2 15986 : $i_closing_paren - 1; 15987 my $i_effective_last_comma = $i_last_comma; 15988 15989 my $last_item_length = token_sequence_length( $i_b + 1, $i_e ); 15990 15991 if ( $last_item_length > 0 ) { 15992 15993 # add 2 to length because other lengths include a comma and a blank 15994 $last_item_length += 2; 15995 push @item_lengths, $last_item_length; 15996 push @i_term_begin, $i_b + 1; 15997 push @i_term_end, $i_e; 15998 push @i_term_comma, undef; 15999 16000 my $i_odd = $item_count % 2; 16001 16002 if ( $last_item_length > $max_length[$i_odd] ) { 16003 $max_length[$i_odd] = $last_item_length; 16004 } 16005 16006 $item_count++; 16007 $i_effective_last_comma = $i_e + 1; 16008 16009 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) { 16010 $identifier_count++; 16011 } 16012 } 16013 16014 #--------------------------------------------------------------- 16015 # End of length calculations 16016 #--------------------------------------------------------------- 16017 16018 #--------------------------------------------------------------- 16019 # Compound List Rule 1: 16020 # Break at (almost) every comma for a list containing a broken 16021 # sublist. This has higher priority than the Interrupted List 16022 # Rule. 16023 #--------------------------------------------------------------- 16024 if ( $has_broken_sublist[$depth] ) { 16025 16026 # Break at every comma except for a comma between two 16027 # simple, small terms. This prevents long vertical 16028 # columns of, say, just 0's. 16029 my $small_length = 10; # 2 + actual maximum length wanted 16030 16031 # We'll insert a break in long runs of small terms to 16032 # allow alignment in uniform tables. 16033 my $skipped_count = 0; 16034 my $columns = table_columns_available($i_first_comma); 16035 my $fields = int( $columns / $small_length ); 16036 if ( $rOpts_maximum_fields_per_table 16037 && $fields > $rOpts_maximum_fields_per_table ) 16038 { 16039 $fields = $rOpts_maximum_fields_per_table; 16040 } 16041 my $max_skipped_count = $fields - 1; 16042 16043 my $is_simple_last_term = 0; 16044 my $is_simple_next_term = 0; 16045 foreach my $j ( 0 .. $item_count ) { 16046 $is_simple_last_term = $is_simple_next_term; 16047 $is_simple_next_term = 0; 16048 if ( $j < $item_count 16049 && $i_term_end[$j] == $i_term_begin[$j] 16050 && $item_lengths[$j] <= $small_length ) 16051 { 16052 $is_simple_next_term = 1; 16053 } 16054 next if $j == 0; 16055 if ( $is_simple_last_term 16056 && $is_simple_next_term 16057 && $skipped_count < $max_skipped_count ) 16058 { 16059 $skipped_count++; 16060 } 16061 else { 16062 $skipped_count = 0; 16063 my $i = $i_term_comma[ $j - 1 ]; 16064 last unless defined $i; 16065 set_forced_breakpoint($i); 16066 } 16067 } 16068 16069 # always break at the last comma if this list is 16070 # interrupted; we wouldn't want to leave a terminal '{', for 16071 # example. 16072 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) } 16073 return; 16074 } 16075 16076#my ( $a, $b, $c ) = caller(); 16077#print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count 16078#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; 16079#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; 16080 16081 #--------------------------------------------------------------- 16082 # Interrupted List Rule: 16083 # A list is is forced to use old breakpoints if it was interrupted 16084 # by side comments or blank lines, or requested by user. 16085 #--------------------------------------------------------------- 16086 if ( $rOpts_break_at_old_comma_breakpoints 16087 || $interrupted 16088 || $i_opening_paren < 0 ) 16089 { 16090 copy_old_breakpoints( $i_first_comma, $i_true_last_comma ); 16091 return; 16092 } 16093 16094 #--------------------------------------------------------------- 16095 # Looks like a list of items. We have to look at it and size it up. 16096 #--------------------------------------------------------------- 16097 16098 my $opening_token = $tokens_to_go[$i_opening_paren]; 16099 my $opening_environment = 16100 $container_environment_to_go[$i_opening_paren]; 16101 16102 #------------------------------------------------------------------- 16103 # Return if this will fit on one line 16104 #------------------------------------------------------------------- 16105 16106 my $i_opening_minus = find_token_starting_list($i_opening_paren); 16107 return 16108 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0; 16109 16110 #------------------------------------------------------------------- 16111 # Now we know that this block spans multiple lines; we have to set 16112 # at least one breakpoint -- real or fake -- as a signal to break 16113 # open any outer containers. 16114 #------------------------------------------------------------------- 16115 set_fake_breakpoint(); 16116 16117 # be sure we do not extend beyond the current list length 16118 if ( $i_effective_last_comma >= $max_index_to_go ) { 16119 $i_effective_last_comma = $max_index_to_go - 1; 16120 } 16121 16122 # Set a flag indicating if we need to break open to keep -lp 16123 # items aligned. This is necessary if any of the list terms 16124 # exceeds the available space after the '('. 16125 my $need_lp_break_open = $must_break_open; 16126 if ( $rOpts_line_up_parentheses && !$must_break_open ) { 16127 my $columns_if_unbroken = 16128 maximum_line_length($i_opening_minus) - 16129 total_line_length( $i_opening_minus, $i_opening_paren ); 16130 $need_lp_break_open = 16131 ( $max_length[0] > $columns_if_unbroken ) 16132 || ( $max_length[1] > $columns_if_unbroken ) 16133 || ( $first_term_length > $columns_if_unbroken ); 16134 } 16135 16136 # Specify if the list must have an even number of fields or not. 16137 # It is generally safest to assume an even number, because the 16138 # list items might be a hash list. But if we can be sure that 16139 # it is not a hash, then we can allow an odd number for more 16140 # flexibility. 16141 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count 16142 16143 if ( $identifier_count >= $item_count - 1 16144 || $is_assignment{$next_nonblank_type} 16145 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ ) 16146 ) 16147 { 16148 $odd_or_even = 1; 16149 } 16150 16151 # do we have a long first term which should be 16152 # left on a line by itself? 16153 my $use_separate_first_term = ( 16154 $odd_or_even == 1 # only if we can use 1 field/line 16155 && $item_count > 3 # need several items 16156 && $first_term_length > 16157 2 * $max_length[0] - 2 # need long first term 16158 && $first_term_length > 16159 2 * $max_length[1] - 2 # need long first term 16160 ); 16161 16162 # or do we know from the type of list that the first term should 16163 # be placed alone? 16164 if ( !$use_separate_first_term ) { 16165 if ( $is_keyword_with_special_leading_term{$list_type} ) { 16166 $use_separate_first_term = 1; 16167 16168 # should the container be broken open? 16169 if ( $item_count < 3 ) { 16170 if ( $i_first_comma - $i_opening_paren < 4 ) { 16171 $$rdo_not_break_apart = 1; 16172 } 16173 } 16174 elsif ($first_term_length < 20 16175 && $i_first_comma - $i_opening_paren < 4 ) 16176 { 16177 my $columns = table_columns_available($i_first_comma); 16178 if ( $first_term_length < $columns ) { 16179 $$rdo_not_break_apart = 1; 16180 } 16181 } 16182 } 16183 } 16184 16185 # if so, 16186 if ($use_separate_first_term) { 16187 16188 # ..set a break and update starting values 16189 $use_separate_first_term = 1; 16190 set_forced_breakpoint($i_first_comma); 16191 $i_opening_paren = $i_first_comma; 16192 $i_first_comma = $$rcomma_index[1]; 16193 $item_count--; 16194 return if $comma_count == 1; 16195 shift @item_lengths; 16196 shift @i_term_begin; 16197 shift @i_term_end; 16198 shift @i_term_comma; 16199 } 16200 16201 # if not, update the metrics to include the first term 16202 else { 16203 if ( $first_term_length > $max_length[0] ) { 16204 $max_length[0] = $first_term_length; 16205 } 16206 } 16207 16208 # Field width parameters 16209 my $pair_width = ( $max_length[0] + $max_length[1] ); 16210 my $max_width = 16211 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1]; 16212 16213 # Number of free columns across the page width for laying out tables 16214 my $columns = table_columns_available($i_first_comma); 16215 16216 # Estimated maximum number of fields which fit this space 16217 # This will be our first guess 16218 my $number_of_fields_max = 16219 maximum_number_of_fields( $columns, $odd_or_even, $max_width, 16220 $pair_width ); 16221 my $number_of_fields = $number_of_fields_max; 16222 16223 # Find the best-looking number of fields 16224 # and make this our second guess if possible 16225 my ( $number_of_fields_best, $ri_ragged_break_list, 16226 $new_identifier_count ) 16227 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths, 16228 $max_width ); 16229 16230 if ( $number_of_fields_best != 0 16231 && $number_of_fields_best < $number_of_fields_max ) 16232 { 16233 $number_of_fields = $number_of_fields_best; 16234 } 16235 16236 # ---------------------------------------------------------------------- 16237 # If we are crowded and the -lp option is being used, try to 16238 # undo some indentation 16239 # ---------------------------------------------------------------------- 16240 if ( 16241 $rOpts_line_up_parentheses 16242 && ( 16243 $number_of_fields == 0 16244 || ( $number_of_fields == 1 16245 && $number_of_fields != $number_of_fields_best ) 16246 ) 16247 ) 16248 { 16249 my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma); 16250 if ( $available_spaces > 0 ) { 16251 16252 my $spaces_wanted = $max_width - $columns; # for 1 field 16253 16254 if ( $number_of_fields_best == 0 ) { 16255 $number_of_fields_best = 16256 get_maximum_fields_wanted( \@item_lengths ); 16257 } 16258 16259 if ( $number_of_fields_best != 1 ) { 16260 my $spaces_wanted_2 = 16261 1 + $pair_width - $columns; # for 2 fields 16262 if ( $available_spaces > $spaces_wanted_2 ) { 16263 $spaces_wanted = $spaces_wanted_2; 16264 } 16265 } 16266 16267 if ( $spaces_wanted > 0 ) { 16268 my $deleted_spaces = 16269 reduce_lp_indentation( $i_first_comma, $spaces_wanted ); 16270 16271 # redo the math 16272 if ( $deleted_spaces > 0 ) { 16273 $columns = table_columns_available($i_first_comma); 16274 $number_of_fields_max = 16275 maximum_number_of_fields( $columns, $odd_or_even, 16276 $max_width, $pair_width ); 16277 $number_of_fields = $number_of_fields_max; 16278 16279 if ( $number_of_fields_best == 1 16280 && $number_of_fields >= 1 ) 16281 { 16282 $number_of_fields = $number_of_fields_best; 16283 } 16284 } 16285 } 16286 } 16287 } 16288 16289 # try for one column if two won't work 16290 if ( $number_of_fields <= 0 ) { 16291 $number_of_fields = int( $columns / $max_width ); 16292 } 16293 16294 # The user can place an upper bound on the number of fields, 16295 # which can be useful for doing maintenance on tables 16296 if ( $rOpts_maximum_fields_per_table 16297 && $number_of_fields > $rOpts_maximum_fields_per_table ) 16298 { 16299 $number_of_fields = $rOpts_maximum_fields_per_table; 16300 } 16301 16302 # How many columns (characters) and lines would this container take 16303 # if no additional whitespace were added? 16304 my $packed_columns = token_sequence_length( $i_opening_paren + 1, 16305 $i_effective_last_comma + 1 ); 16306 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero 16307 my $packed_lines = 1 + int( $packed_columns / $columns ); 16308 16309 # are we an item contained in an outer list? 16310 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; 16311 16312 if ( $number_of_fields <= 0 ) { 16313 16314# #--------------------------------------------------------------- 16315# # We're in trouble. We can't find a single field width that works. 16316# # There is no simple answer here; we may have a single long list 16317# # item, or many. 16318# #--------------------------------------------------------------- 16319# 16320# In many cases, it may be best to not force a break if there is just one 16321# comma, because the standard continuation break logic will do a better 16322# job without it. 16323# 16324# In the common case that all but one of the terms can fit 16325# on a single line, it may look better not to break open the 16326# containing parens. Consider, for example 16327# 16328# $color = 16329# join ( '/', 16330# sort { $color_value{$::a} <=> $color_value{$::b}; } 16331# keys %colors ); 16332# 16333# which will look like this with the container broken: 16334# 16335# $color = join ( 16336# '/', 16337# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors 16338# ); 16339# 16340# Here is an example of this rule for a long last term: 16341# 16342# log_message( 0, 256, 128, 16343# "Number of routes in adj-RIB-in to be considered: $peercount" ); 16344# 16345# And here is an example with a long first term: 16346# 16347# $s = sprintf( 16348# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", 16349# $r, $pu, $ps, $cu, $cs, $tt 16350# ) 16351# if $style eq 'all'; 16352 16353 my $i_last_comma = $$rcomma_index[ $comma_count - 1 ]; 16354 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0; 16355 my $long_first_term = 16356 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0; 16357 16358 # break at every comma ... 16359 if ( 16360 16361 # if requested by user or is best looking 16362 $number_of_fields_best == 1 16363 16364 # or if this is a sublist of a larger list 16365 || $in_hierarchical_list 16366 16367 # or if multiple commas and we dont have a long first or last 16368 # term 16369 || ( $comma_count > 1 16370 && !( $long_last_term || $long_first_term ) ) 16371 ) 16372 { 16373 foreach ( 0 .. $comma_count - 1 ) { 16374 set_forced_breakpoint( $$rcomma_index[$_] ); 16375 } 16376 } 16377 elsif ($long_last_term) { 16378 16379 set_forced_breakpoint($i_last_comma); 16380 $$rdo_not_break_apart = 1 unless $must_break_open; 16381 } 16382 elsif ($long_first_term) { 16383 16384 set_forced_breakpoint($i_first_comma); 16385 } 16386 else { 16387 16388 # let breaks be defined by default bond strength logic 16389 } 16390 return; 16391 } 16392 16393 # -------------------------------------------------------- 16394 # We have a tentative field count that seems to work. 16395 # How many lines will this require? 16396 # -------------------------------------------------------- 16397 my $formatted_lines = $item_count / ($number_of_fields); 16398 if ( $formatted_lines != int $formatted_lines ) { 16399 $formatted_lines = 1 + int $formatted_lines; 16400 } 16401 16402 # So far we've been trying to fill out to the right margin. But 16403 # compact tables are easier to read, so let's see if we can use fewer 16404 # fields without increasing the number of lines. 16405 $number_of_fields = 16406 compactify_table( $item_count, $number_of_fields, $formatted_lines, 16407 $odd_or_even ); 16408 16409 # How many spaces across the page will we fill? 16410 my $columns_per_line = 16411 ( int $number_of_fields / 2 ) * $pair_width + 16412 ( $number_of_fields % 2 ) * $max_width; 16413 16414 my $formatted_columns; 16415 16416 if ( $number_of_fields > 1 ) { 16417 $formatted_columns = 16418 ( $pair_width * ( int( $item_count / 2 ) ) + 16419 ( $item_count % 2 ) * $max_width ); 16420 } 16421 else { 16422 $formatted_columns = $max_width * $item_count; 16423 } 16424 if ( $formatted_columns < $packed_columns ) { 16425 $formatted_columns = $packed_columns; 16426 } 16427 16428 my $unused_columns = $formatted_columns - $packed_columns; 16429 16430 # set some empirical parameters to help decide if we should try to 16431 # align; high sparsity does not look good, especially with few lines 16432 my $sparsity = ($unused_columns) / ($formatted_columns); 16433 my $max_allowed_sparsity = 16434 ( $item_count < 3 ) ? 0.1 16435 : ( $packed_lines == 1 ) ? 0.15 16436 : ( $packed_lines == 2 ) ? 0.4 16437 : 0.7; 16438 16439 # Begin check for shortcut methods, which avoid treating a list 16440 # as a table for relatively small parenthesized lists. These 16441 # are usually easier to read if not formatted as tables. 16442 if ( 16443 $packed_lines <= 2 # probably can fit in 2 lines 16444 && $item_count < 9 # doesn't have too many items 16445 && $opening_environment eq 'BLOCK' # not a sub-container 16446 && $opening_token eq '(' # is paren list 16447 ) 16448 { 16449 16450 # Shortcut method 1: for -lp and just one comma: 16451 # This is a no-brainer, just break at the comma. 16452 if ( 16453 $rOpts_line_up_parentheses # -lp 16454 && $item_count == 2 # two items, one comma 16455 && !$must_break_open 16456 ) 16457 { 16458 my $i_break = $$rcomma_index[0]; 16459 set_forced_breakpoint($i_break); 16460 $$rdo_not_break_apart = 1; 16461 set_non_alignment_flags( $comma_count, $rcomma_index ); 16462 return; 16463 16464 } 16465 16466 # method 2 is for most small ragged lists which might look 16467 # best if not displayed as a table. 16468 if ( 16469 ( $number_of_fields == 2 && $item_count == 3 ) 16470 || ( 16471 $new_identifier_count > 0 # isn't all quotes 16472 && $sparsity > 0.15 16473 ) # would be fairly spaced gaps if aligned 16474 ) 16475 { 16476 16477 my $break_count = set_ragged_breakpoints( \@i_term_comma, 16478 $ri_ragged_break_list ); 16479 ++$break_count if ($use_separate_first_term); 16480 16481 # NOTE: we should really use the true break count here, 16482 # which can be greater if there are large terms and 16483 # little space, but usually this will work well enough. 16484 unless ($must_break_open) { 16485 16486 if ( $break_count <= 1 ) { 16487 $$rdo_not_break_apart = 1; 16488 } 16489 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) 16490 { 16491 $$rdo_not_break_apart = 1; 16492 } 16493 } 16494 set_non_alignment_flags( $comma_count, $rcomma_index ); 16495 return; 16496 } 16497 16498 } # end shortcut methods 16499 16500 # debug stuff 16501 16502 FORMATTER_DEBUG_FLAG_SPARSE && do { 16503 print STDOUT 16504"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; 16505 16506 }; 16507 16508 #--------------------------------------------------------------- 16509 # Compound List Rule 2: 16510 # If this list is too long for one line, and it is an item of a 16511 # larger list, then we must format it, regardless of sparsity 16512 # (ian.t). One reason that we have to do this is to trigger 16513 # Compound List Rule 1, above, which causes breaks at all commas of 16514 # all outer lists. In this way, the structure will be properly 16515 # displayed. 16516 #--------------------------------------------------------------- 16517 16518 # Decide if this list is too long for one line unless broken 16519 my $total_columns = table_columns_available($i_opening_paren); 16520 my $too_long = $packed_columns > $total_columns; 16521 16522 # For a paren list, include the length of the token just before the 16523 # '(' because this is likely a sub call, and we would have to 16524 # include the sub name on the same line as the list. This is still 16525 # imprecise, but not too bad. (steve.t) 16526 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { 16527 16528 $too_long = excess_line_length( $i_opening_minus, 16529 $i_effective_last_comma + 1 ) > 0; 16530 } 16531 16532 # FIXME: For an item after a '=>', try to include the length of the 16533 # thing before the '=>'. This is crude and should be improved by 16534 # actually looking back token by token. 16535 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { 16536 my $i_opening_minus = $i_opening_paren - 4; 16537 if ( $i_opening_minus >= 0 ) { 16538 $too_long = excess_line_length( $i_opening_minus, 16539 $i_effective_last_comma + 1 ) > 0; 16540 } 16541 } 16542 16543 # Always break lists contained in '[' and '{' if too long for 1 line, 16544 # and always break lists which are too long and part of a more complex 16545 # structure. 16546 my $must_break_open_container = $must_break_open 16547 || ( $too_long 16548 && ( $in_hierarchical_list || $opening_token ne '(' ) ); 16549 16550#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n"; 16551 16552 #--------------------------------------------------------------- 16553 # The main decision: 16554 # Now decide if we will align the data into aligned columns. Do not 16555 # attempt to align columns if this is a tiny table or it would be 16556 # too spaced. It seems that the more packed lines we have, the 16557 # sparser the list that can be allowed and still look ok. 16558 #--------------------------------------------------------------- 16559 16560 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) 16561 || ( $formatted_lines < 2 ) 16562 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) 16563 ) 16564 { 16565 16566 #--------------------------------------------------------------- 16567 # too sparse: would look ugly if aligned in a table; 16568 #--------------------------------------------------------------- 16569 16570 # use old breakpoints if this is a 'big' list 16571 # FIXME: goal is to improve set_ragged_breakpoints so that 16572 # this is not necessary. 16573 if ( $packed_lines > 2 && $item_count > 10 ) { 16574 write_logfile_entry("List sparse: using old breakpoints\n"); 16575 copy_old_breakpoints( $i_first_comma, $i_last_comma ); 16576 } 16577 16578 # let the continuation logic handle it if 2 lines 16579 else { 16580 16581 my $break_count = set_ragged_breakpoints( \@i_term_comma, 16582 $ri_ragged_break_list ); 16583 ++$break_count if ($use_separate_first_term); 16584 16585 unless ($must_break_open_container) { 16586 if ( $break_count <= 1 ) { 16587 $$rdo_not_break_apart = 1; 16588 } 16589 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) 16590 { 16591 $$rdo_not_break_apart = 1; 16592 } 16593 } 16594 set_non_alignment_flags( $comma_count, $rcomma_index ); 16595 } 16596 return; 16597 } 16598 16599 #--------------------------------------------------------------- 16600 # go ahead and format as a table 16601 #--------------------------------------------------------------- 16602 write_logfile_entry( 16603 "List: auto formatting with $number_of_fields fields/row\n"); 16604 16605 my $j_first_break = 16606 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1; 16607 16608 for ( 16609 my $j = $j_first_break ; 16610 $j < $comma_count ; 16611 $j += $number_of_fields 16612 ) 16613 { 16614 my $i = $$rcomma_index[$j]; 16615 set_forced_breakpoint($i); 16616 } 16617 return; 16618 } 16619} 16620 16621sub set_non_alignment_flags { 16622 16623 # set flag which indicates that these commas should not be 16624 # aligned 16625 my ( $comma_count, $rcomma_index ) = @_; 16626 foreach ( 0 .. $comma_count - 1 ) { 16627 $matching_token_to_go[ $$rcomma_index[$_] ] = 1; 16628 } 16629} 16630 16631sub study_list_complexity { 16632 16633 # Look for complex tables which should be formatted with one term per line. 16634 # Returns the following: 16635 # 16636 # \@i_ragged_break_list = list of good breakpoints to avoid lines 16637 # which are hard to read 16638 # $number_of_fields_best = suggested number of fields based on 16639 # complexity; = 0 if any number may be used. 16640 # 16641 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_; 16642 my $item_count = @{$ri_term_begin}; 16643 my $complex_item_count = 0; 16644 my $number_of_fields_best = $rOpts_maximum_fields_per_table; 16645 my $i_max = @{$ritem_lengths} - 1; 16646 ##my @item_complexity; 16647 16648 my $i_last_last_break = -3; 16649 my $i_last_break = -2; 16650 my @i_ragged_break_list; 16651 16652 my $definitely_complex = 30; 16653 my $definitely_simple = 12; 16654 my $quote_count = 0; 16655 16656 for my $i ( 0 .. $i_max ) { 16657 my $ib = $ri_term_begin->[$i]; 16658 my $ie = $ri_term_end->[$i]; 16659 16660 # define complexity: start with the actual term length 16661 my $weighted_length = ( $ritem_lengths->[$i] - 2 ); 16662 16663 ##TBD: join types here and check for variations 16664 ##my $str=join "", @tokens_to_go[$ib..$ie]; 16665 16666 my $is_quote = 0; 16667 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) { 16668 $is_quote = 1; 16669 $quote_count++; 16670 } 16671 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) { 16672 $quote_count++; 16673 } 16674 16675 if ( $ib eq $ie ) { 16676 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) { 16677 $complex_item_count++; 16678 $weighted_length *= 2; 16679 } 16680 else { 16681 } 16682 } 16683 else { 16684 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) { 16685 $complex_item_count++; 16686 $weighted_length *= 2; 16687 } 16688 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) { 16689 $weighted_length += 4; 16690 } 16691 } 16692 16693 # add weight for extra tokens. 16694 $weighted_length += 2 * ( $ie - $ib ); 16695 16696## my $BUB = join '', @tokens_to_go[$ib..$ie]; 16697## print "# COMPLEXITY:$weighted_length $BUB\n"; 16698 16699##push @item_complexity, $weighted_length; 16700 16701 # now mark a ragged break after this item it if it is 'long and 16702 # complex': 16703 if ( $weighted_length >= $definitely_complex ) { 16704 16705 # if we broke after the previous term 16706 # then break before it too 16707 if ( $i_last_break == $i - 1 16708 && $i > 1 16709 && $i_last_last_break != $i - 2 ) 16710 { 16711 16712 ## FIXME: don't strand a small term 16713 pop @i_ragged_break_list; 16714 push @i_ragged_break_list, $i - 2; 16715 push @i_ragged_break_list, $i - 1; 16716 } 16717 16718 push @i_ragged_break_list, $i; 16719 $i_last_last_break = $i_last_break; 16720 $i_last_break = $i; 16721 } 16722 16723 # don't break before a small last term -- it will 16724 # not look good on a line by itself. 16725 elsif ($i == $i_max 16726 && $i_last_break == $i - 1 16727 && $weighted_length <= $definitely_simple ) 16728 { 16729 pop @i_ragged_break_list; 16730 } 16731 } 16732 16733 my $identifier_count = $i_max + 1 - $quote_count; 16734 16735 # Need more tuning here.. 16736 if ( $max_width > 12 16737 && $complex_item_count > $item_count / 2 16738 && $number_of_fields_best != 2 ) 16739 { 16740 $number_of_fields_best = 1; 16741 } 16742 16743 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count ); 16744} 16745 16746sub get_maximum_fields_wanted { 16747 16748 # Not all tables look good with more than one field of items. 16749 # This routine looks at a table and decides if it should be 16750 # formatted with just one field or not. 16751 # This coding is still under development. 16752 my ($ritem_lengths) = @_; 16753 16754 my $number_of_fields_best = 0; 16755 16756 # For just a few items, we tentatively assume just 1 field. 16757 my $item_count = @{$ritem_lengths}; 16758 if ( $item_count <= 5 ) { 16759 $number_of_fields_best = 1; 16760 } 16761 16762 # For larger tables, look at it both ways and see what looks best 16763 else { 16764 16765 my $is_odd = 1; 16766 my @max_length = ( 0, 0 ); 16767 my @last_length_2 = ( undef, undef ); 16768 my @first_length_2 = ( undef, undef ); 16769 my $last_length = undef; 16770 my $total_variation_1 = 0; 16771 my $total_variation_2 = 0; 16772 my @total_variation_2 = ( 0, 0 ); 16773 for ( my $j = 0 ; $j < $item_count ; $j++ ) { 16774 16775 $is_odd = 1 - $is_odd; 16776 my $length = $ritem_lengths->[$j]; 16777 if ( $length > $max_length[$is_odd] ) { 16778 $max_length[$is_odd] = $length; 16779 } 16780 16781 if ( defined($last_length) ) { 16782 my $dl = abs( $length - $last_length ); 16783 $total_variation_1 += $dl; 16784 } 16785 $last_length = $length; 16786 16787 my $ll = $last_length_2[$is_odd]; 16788 if ( defined($ll) ) { 16789 my $dl = abs( $length - $ll ); 16790 $total_variation_2[$is_odd] += $dl; 16791 } 16792 else { 16793 $first_length_2[$is_odd] = $length; 16794 } 16795 $last_length_2[$is_odd] = $length; 16796 } 16797 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1]; 16798 16799 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0; 16800 unless ( $total_variation_2 < $factor * $total_variation_1 ) { 16801 $number_of_fields_best = 1; 16802 } 16803 } 16804 return ($number_of_fields_best); 16805} 16806 16807sub table_columns_available { 16808 my $i_first_comma = shift; 16809 my $columns = 16810 maximum_line_length($i_first_comma) - 16811 leading_spaces_to_go($i_first_comma); 16812 16813 # Patch: the vertical formatter does not line up lines whose lengths 16814 # exactly equal the available line length because of allowances 16815 # that must be made for side comments. Therefore, the number of 16816 # available columns is reduced by 1 character. 16817 $columns -= 1; 16818 return $columns; 16819} 16820 16821sub maximum_number_of_fields { 16822 16823 # how many fields will fit in the available space? 16824 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_; 16825 my $max_pairs = int( $columns / $pair_width ); 16826 my $number_of_fields = $max_pairs * 2; 16827 if ( $odd_or_even == 1 16828 && $max_pairs * $pair_width + $max_width <= $columns ) 16829 { 16830 $number_of_fields++; 16831 } 16832 return $number_of_fields; 16833} 16834 16835sub compactify_table { 16836 16837 # given a table with a certain number of fields and a certain number 16838 # of lines, see if reducing the number of fields will make it look 16839 # better. 16840 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_; 16841 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) { 16842 my $min_fields; 16843 16844 for ( 16845 $min_fields = $number_of_fields ; 16846 $min_fields >= $odd_or_even 16847 && $min_fields * $formatted_lines >= $item_count ; 16848 $min_fields -= $odd_or_even 16849 ) 16850 { 16851 $number_of_fields = $min_fields; 16852 } 16853 } 16854 return $number_of_fields; 16855} 16856 16857sub set_ragged_breakpoints { 16858 16859 # Set breakpoints in a list that cannot be formatted nicely as a 16860 # table. 16861 my ( $ri_term_comma, $ri_ragged_break_list ) = @_; 16862 16863 my $break_count = 0; 16864 foreach (@$ri_ragged_break_list) { 16865 my $j = $ri_term_comma->[$_]; 16866 if ($j) { 16867 set_forced_breakpoint($j); 16868 $break_count++; 16869 } 16870 } 16871 return $break_count; 16872} 16873 16874sub copy_old_breakpoints { 16875 my ( $i_first_comma, $i_last_comma ) = @_; 16876 for my $i ( $i_first_comma .. $i_last_comma ) { 16877 if ( $old_breakpoint_to_go[$i] ) { 16878 set_forced_breakpoint($i); 16879 } 16880 } 16881} 16882 16883sub set_nobreaks { 16884 my ( $i, $j ) = @_; 16885 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) { 16886 16887 FORMATTER_DEBUG_FLAG_NOBREAK && do { 16888 my ( $a, $b, $c ) = caller(); 16889 print STDOUT 16890"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"; 16891 }; 16892 16893 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 ); 16894 } 16895 16896 # shouldn't happen; non-critical error 16897 else { 16898 FORMATTER_DEBUG_FLAG_NOBREAK && do { 16899 my ( $a, $b, $c ) = caller(); 16900 print STDOUT 16901 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"; 16902 }; 16903 } 16904} 16905 16906sub set_fake_breakpoint { 16907 16908 # Just bump up the breakpoint count as a signal that there are breaks. 16909 # This is useful if we have breaks but may want to postpone deciding where 16910 # to make them. 16911 $forced_breakpoint_count++; 16912} 16913 16914sub set_forced_breakpoint { 16915 my $i = shift; 16916 16917 return unless defined $i && $i >= 0; 16918 16919 # when called with certain tokens, use bond strengths to decide 16920 # if we break before or after it 16921 my $token = $tokens_to_go[$i]; 16922 16923 if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) { 16924 if ( $want_break_before{$token} && $i >= 0 ) { $i-- } 16925 } 16926 16927 # breaks are forced before 'if' and 'unless' 16928 elsif ( $is_if_unless{$token} ) { $i-- } 16929 16930 if ( $i >= 0 && $i <= $max_index_to_go ) { 16931 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; 16932 16933 FORMATTER_DEBUG_FLAG_FORCE && do { 16934 my ( $a, $b, $c ) = caller(); 16935 print STDOUT 16936"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; 16937 }; 16938 16939 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) { 16940 $forced_breakpoint_to_go[$i_nonblank] = 1; 16941 16942 if ( $i_nonblank > $index_max_forced_break ) { 16943 $index_max_forced_break = $i_nonblank; 16944 } 16945 $forced_breakpoint_count++; 16946 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] = 16947 $i_nonblank; 16948 16949 # if we break at an opening container..break at the closing 16950 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) { 16951 set_closing_breakpoint($i_nonblank); 16952 } 16953 } 16954 } 16955} 16956 16957sub clear_breakpoint_undo_stack { 16958 $forced_breakpoint_undo_count = 0; 16959} 16960 16961sub undo_forced_breakpoint_stack { 16962 16963 my $i_start = shift; 16964 if ( $i_start < 0 ) { 16965 $i_start = 0; 16966 my ( $a, $b, $c ) = caller(); 16967 warning( 16968"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start " 16969 ); 16970 } 16971 16972 while ( $forced_breakpoint_undo_count > $i_start ) { 16973 my $i = 16974 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; 16975 if ( $i >= 0 && $i <= $max_index_to_go ) { 16976 $forced_breakpoint_to_go[$i] = 0; 16977 $forced_breakpoint_count--; 16978 16979 FORMATTER_DEBUG_FLAG_UNDOBP && do { 16980 my ( $a, $b, $c ) = caller(); 16981 print STDOUT 16982"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"; 16983 }; 16984 } 16985 16986 # shouldn't happen, but not a critical error 16987 else { 16988 FORMATTER_DEBUG_FLAG_UNDOBP && do { 16989 my ( $a, $b, $c ) = caller(); 16990 print STDOUT 16991"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"; 16992 }; 16993 } 16994 } 16995} 16996 16997{ # begin recombine_breakpoints 16998 16999 my %is_amp_amp; 17000 my %is_ternary; 17001 my %is_math_op; 17002 my %is_plus_minus; 17003 my %is_mult_div; 17004 17005 BEGIN { 17006 17007 @_ = qw( && || ); 17008 @is_amp_amp{@_} = (1) x scalar(@_); 17009 17010 @_ = qw( ? : ); 17011 @is_ternary{@_} = (1) x scalar(@_); 17012 17013 @_ = qw( + - * / ); 17014 @is_math_op{@_} = (1) x scalar(@_); 17015 17016 @_ = qw( + - ); 17017 @is_plus_minus{@_} = (1) x scalar(@_); 17018 17019 @_ = qw( * / ); 17020 @is_mult_div{@_} = (1) x scalar(@_); 17021 } 17022 17023 sub recombine_breakpoints { 17024 17025 # sub set_continuation_breaks is very liberal in setting line breaks 17026 # for long lines, always setting breaks at good breakpoints, even 17027 # when that creates small lines. Sometimes small line fragments 17028 # are produced which would look better if they were combined. 17029 # That's the task of this routine. 17030 # 17031 # We are given indexes to the current lines: 17032 # $ri_beg = ref to array of BEGinning indexes of each line 17033 # $ri_end = ref to array of ENDing indexes of each line 17034 my ( $ri_beg, $ri_end ) = @_; 17035 17036 # Make a list of all good joining tokens between the lines 17037 # n-1 and n. 17038 my @joint; 17039 my $nmax = @$ri_end - 1; 17040 for my $n ( 1 .. $nmax ) { 17041 my $ibeg_1 = $$ri_beg[ $n - 1 ]; 17042 my $iend_1 = $$ri_end[ $n - 1 ]; 17043 my $iend_2 = $$ri_end[$n]; 17044 my $ibeg_2 = $$ri_beg[$n]; 17045 17046 my ( $itok, $itokp, $itokm ); 17047 17048 foreach my $itest ( $iend_1, $ibeg_2 ) { 17049 my $type = $types_to_go[$itest]; 17050 if ( $is_math_op{$type} 17051 || $is_amp_amp{$type} 17052 || $is_assignment{$type} 17053 || $type eq ':' ) 17054 { 17055 $itok = $itest; 17056 } 17057 } 17058 $joint[$n] = [$itok]; 17059 } 17060 17061 my $more_to_do = 1; 17062 17063 # We keep looping over all of the lines of this batch 17064 # until there are no more possible recombinations 17065 my $nmax_last = @$ri_end; 17066 while ($more_to_do) { 17067 my $n_best = 0; 17068 my $bs_best; 17069 my $n; 17070 my $nmax = @$ri_end - 1; 17071 17072 # Safety check for infinite loop 17073 unless ( $nmax < $nmax_last ) { 17074 17075 # Shouldn't happen because splice below decreases nmax on each 17076 # pass. 17077 Perl::Tidy::Die 17078 "Program bug-infinite loop in recombine breakpoints\n"; 17079 } 17080 $nmax_last = $nmax; 17081 $more_to_do = 0; 17082 my $previous_outdentable_closing_paren; 17083 my $leading_amp_count = 0; 17084 my $this_line_is_semicolon_terminated; 17085 17086 # loop over all remaining lines in this batch 17087 for $n ( 1 .. $nmax ) { 17088 17089 #---------------------------------------------------------- 17090 # If we join the current pair of lines, 17091 # line $n-1 will become the left part of the joined line 17092 # line $n will become the right part of the joined line 17093 # 17094 # Here are Indexes of the endpoint tokens of the two lines: 17095 # 17096 # -----line $n-1--- | -----line $n----- 17097 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 17098 # ^ 17099 # | 17100 # We want to decide if we should remove the line break 17101 # between the tokens at $iend_1 and $ibeg_2 17102 # 17103 # We will apply a number of ad-hoc tests to see if joining 17104 # here will look ok. The code will just issue a 'next' 17105 # command if the join doesn't look good. If we get through 17106 # the gauntlet of tests, the lines will be recombined. 17107 #---------------------------------------------------------- 17108 # 17109 # beginning and ending tokens of the lines we are working on 17110 my $ibeg_1 = $$ri_beg[ $n - 1 ]; 17111 my $iend_1 = $$ri_end[ $n - 1 ]; 17112 my $iend_2 = $$ri_end[$n]; 17113 my $ibeg_2 = $$ri_beg[$n]; 17114 my $ibeg_nmax = $$ri_beg[$nmax]; 17115 17116 my $type_iend_1 = $types_to_go[$iend_1]; 17117 my $type_iend_2 = $types_to_go[$iend_2]; 17118 my $type_ibeg_1 = $types_to_go[$ibeg_1]; 17119 my $type_ibeg_2 = $types_to_go[$ibeg_2]; 17120 17121 # some beginning indexes of other lines, which may not exist 17122 my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1; 17123 my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1; 17124 my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1; 17125 17126 my $bs_tweak = 0; 17127 17128 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] - 17129 # $nesting_depth_to_go[$ibeg_1] ); 17130 17131 FORMATTER_DEBUG_FLAG_RECOMBINE && do { 17132 print STDERR 17133"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n"; 17134 }; 17135 17136 # If line $n is the last line, we set some flags and 17137 # do any special checks for it 17138 if ( $n == $nmax ) { 17139 17140 # a terminal '{' should stay where it is 17141 next if $type_ibeg_2 eq '{'; 17142 17143 # set flag if statement $n ends in ';' 17144 $this_line_is_semicolon_terminated = $type_iend_2 eq ';' 17145 17146 # with possible side comment 17147 || ( $type_iend_2 eq '#' 17148 && $iend_2 - $ibeg_2 >= 2 17149 && $types_to_go[ $iend_2 - 2 ] eq ';' 17150 && $types_to_go[ $iend_2 - 1 ] eq 'b' ); 17151 } 17152 17153 #---------------------------------------------------------- 17154 # Recombine Section 1: 17155 # Examine the special token joining this line pair, if any. 17156 # Put as many tests in this section to avoid duplicate code and 17157 # to make formatting independent of whether breaks are to the 17158 # left or right of an operator. 17159 #---------------------------------------------------------- 17160 17161 my ($itok) = @{ $joint[$n] }; 17162 if ($itok) { 17163 17164 # FIXME: Patch - may not be necessary 17165 my $iend_1 = 17166 $type_iend_1 eq 'b' 17167 ? $iend_1 - 1 17168 : $iend_1; 17169 17170 my $iend_2 = 17171 $type_iend_2 eq 'b' 17172 ? $iend_2 - 1 17173 : $iend_2; 17174 ## END PATCH 17175 17176 my $type = $types_to_go[$itok]; 17177 17178 if ( $type eq ':' ) { 17179 17180 # do not join at a colon unless it disobeys the break request 17181 if ( $itok eq $iend_1 ) { 17182 next unless $want_break_before{$type}; 17183 } 17184 else { 17185 $leading_amp_count++; 17186 next if $want_break_before{$type}; 17187 } 17188 } ## end if ':' 17189 17190 # handle math operators + - * / 17191 elsif ( $is_math_op{$type} ) { 17192 17193 # Combine these lines if this line is a single 17194 # number, or if it is a short term with same 17195 # operator as the previous line. For example, in 17196 # the following code we will combine all of the 17197 # short terms $A, $B, $C, $D, $E, $F, together 17198 # instead of leaving them one per line: 17199 # my $time = 17200 # $A * $B * $C * $D * $E * $F * 17201 # ( 2. * $eps * $sigma * $area ) * 17202 # ( 1. / $tcold**3 - 1. / $thot**3 ); 17203 17204 # This can be important in math-intensive code. 17205 17206 my $good_combo; 17207 17208 my $itokp = min( $inext_to_go[$itok], $iend_2 ); 17209 my $itokpp = min( $inext_to_go[$itokp], $iend_2 ); 17210 my $itokm = max( $iprev_to_go[$itok], $ibeg_1 ); 17211 my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 ); 17212 17213 # check for a number on the right 17214 if ( $types_to_go[$itokp] eq 'n' ) { 17215 17216 # ok if nothing else on right 17217 if ( $itokp == $iend_2 ) { 17218 $good_combo = 1; 17219 } 17220 else { 17221 17222 # look one more token to right.. 17223 # okay if math operator or some termination 17224 $good_combo = 17225 ( ( $itokpp == $iend_2 ) 17226 && $is_math_op{ $types_to_go[$itokpp] } ) 17227 || $types_to_go[$itokpp] =~ /^[#,;]$/; 17228 } 17229 } 17230 17231 # check for a number on the left 17232 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) { 17233 17234 # okay if nothing else to left 17235 if ( $itokm == $ibeg_1 ) { 17236 $good_combo = 1; 17237 } 17238 17239 # otherwise look one more token to left 17240 else { 17241 17242 # okay if math operator, comma, or assignment 17243 $good_combo = ( $itokmm == $ibeg_1 ) 17244 && ( $is_math_op{ $types_to_go[$itokmm] } 17245 || $types_to_go[$itokmm] =~ /^[,]$/ 17246 || $is_assignment{ $types_to_go[$itokmm] } 17247 ); 17248 } 17249 } 17250 17251 # look for a single short token either side of the 17252 # operator 17253 if ( !$good_combo ) { 17254 17255 # Slight adjustment factor to make results 17256 # independent of break before or after operator in 17257 # long summed lists. (An operator and a space make 17258 # two spaces). 17259 my $two = ( $itok eq $iend_1 ) ? 2 : 0; 17260 17261 $good_combo = 17262 17263 # numbers or id's on both sides of this joint 17264 $types_to_go[$itokp] =~ /^[in]$/ 17265 && $types_to_go[$itokm] =~ /^[in]$/ 17266 17267 # one of the two lines must be short: 17268 && ( 17269 ( 17270 # no more than 2 nonblank tokens right of 17271 # joint 17272 $itokpp == $iend_2 17273 17274 # short 17275 && token_sequence_length( $itokp, $iend_2 ) 17276 < $two + 17277 $rOpts_short_concatenation_item_length 17278 ) 17279 || ( 17280 # no more than 2 nonblank tokens left of 17281 # joint 17282 $itokmm == $ibeg_1 17283 17284 # short 17285 && token_sequence_length( $ibeg_1, $itokm ) 17286 < 2 - $two + 17287 $rOpts_short_concatenation_item_length 17288 ) 17289 17290 ) 17291 17292 # keep pure terms; don't mix +- with */ 17293 && !( 17294 $is_plus_minus{$type} 17295 && ( $is_mult_div{ $types_to_go[$itokmm] } 17296 || $is_mult_div{ $types_to_go[$itokpp] } ) 17297 ) 17298 && !( 17299 $is_mult_div{$type} 17300 && ( $is_plus_minus{ $types_to_go[$itokmm] } 17301 || $is_plus_minus{ $types_to_go[$itokpp] } ) 17302 ) 17303 17304 ; 17305 } 17306 17307 # it is also good to combine if we can reduce to 2 lines 17308 if ( !$good_combo ) { 17309 17310 # index on other line where same token would be in a 17311 # long chain. 17312 my $iother = 17313 ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1; 17314 17315 $good_combo = 17316 $n == 2 17317 && $n == $nmax 17318 && $types_to_go[$iother] ne $type; 17319 } 17320 17321 next unless ($good_combo); 17322 17323 } ## end math 17324 17325 elsif ( $is_amp_amp{$type} ) { 17326 ##TBD 17327 } ## end &&, || 17328 17329 elsif ( $is_assignment{$type} ) { 17330 ##TBD 17331 } ## end assignment 17332 } 17333 17334 #---------------------------------------------------------- 17335 # Recombine Section 2: 17336 # Examine token at $iend_1 (right end of first line of pair) 17337 #---------------------------------------------------------- 17338 17339 # an isolated '}' may join with a ';' terminated segment 17340 if ( $type_iend_1 eq '}' ) { 17341 17342 # Check for cases where combining a semicolon terminated 17343 # statement with a previous isolated closing paren will 17344 # allow the combined line to be outdented. This is 17345 # generally a good move. For example, we can join up 17346 # the last two lines here: 17347 # ( 17348 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, 17349 # $size, $atime, $mtime, $ctime, $blksize, $blocks 17350 # ) 17351 # = stat($file); 17352 # 17353 # to get: 17354 # ( 17355 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, 17356 # $size, $atime, $mtime, $ctime, $blksize, $blocks 17357 # ) = stat($file); 17358 # 17359 # which makes the parens line up. 17360 # 17361 # Another example, from Joe Matarazzo, probably looks best 17362 # with the 'or' clause appended to the trailing paren: 17363 # $self->some_method( 17364 # PARAM1 => 'foo', 17365 # PARAM2 => 'bar' 17366 # ) or die "Some_method didn't work"; 17367 # 17368 # But we do not want to do this for something like the -lp 17369 # option where the paren is not outdentable because the 17370 # trailing clause will be far to the right. 17371 # 17372 # The logic here is synchronized with the logic in sub 17373 # sub set_adjusted_indentation, which actually does 17374 # the outdenting. 17375 # 17376 $previous_outdentable_closing_paren = 17377 $this_line_is_semicolon_terminated 17378 17379 # only one token on last line 17380 && $ibeg_1 == $iend_1 17381 17382 # must be structural paren 17383 && $tokens_to_go[$iend_1] eq ')' 17384 17385 # style must allow outdenting, 17386 && !$closing_token_indentation{')'} 17387 17388 # only leading '&&', '||', and ':' if no others seen 17389 # (but note: our count made below could be wrong 17390 # due to intervening comments) 17391 && ( $leading_amp_count == 0 17392 || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ ) 17393 17394 # but leading colons probably line up with with a 17395 # previous colon or question (count could be wrong). 17396 && $type_ibeg_2 ne ':' 17397 17398 # only one step in depth allowed. this line must not 17399 # begin with a ')' itself. 17400 && ( $nesting_depth_to_go[$iend_1] == 17401 $nesting_depth_to_go[$iend_2] + 1 ); 17402 17403 # YVES patch 2 of 2: 17404 # Allow cuddled eval chains, like this: 17405 # eval { 17406 # #STUFF; 17407 # 1; # return true 17408 # } or do { 17409 # #handle error 17410 # }; 17411 # This patch works together with a patch in 17412 # setting adjusted indentation (where the closing eval 17413 # brace is outdented if possible). 17414 # The problem is that an 'eval' block has continuation 17415 # indentation and it looks better to undo it in some 17416 # cases. If we do not use this patch we would get: 17417 # eval { 17418 # #STUFF; 17419 # 1; # return true 17420 # } 17421 # or do { 17422 # #handle error 17423 # }; 17424 # The alternative, for uncuddled style, is to create 17425 # a patch in set_adjusted_indentation which undoes 17426 # the indentation of a leading line like 'or do {'. 17427 # This doesn't work well with -icb through 17428 if ( 17429 $block_type_to_go[$iend_1] eq 'eval' 17430 && !$rOpts->{'line-up-parentheses'} 17431 && !$rOpts->{'indent-closing-brace'} 17432 && $tokens_to_go[$iend_2] eq '{' 17433 && ( 17434 ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ ) 17435 || ( $type_ibeg_2 eq 'k' 17436 && $is_and_or{ $tokens_to_go[$ibeg_2] } ) 17437 || $is_if_unless{ $tokens_to_go[$ibeg_2] } 17438 ) 17439 ) 17440 { 17441 $previous_outdentable_closing_paren ||= 1; 17442 } 17443 17444 next 17445 unless ( 17446 $previous_outdentable_closing_paren 17447 17448 # handle '.' and '?' specially below 17449 || ( $type_ibeg_2 =~ /^[\.\?]$/ ) 17450 ); 17451 } 17452 17453 # YVES 17454 # honor breaks at opening brace 17455 # Added to prevent recombining something like this: 17456 # } || eval { package main; 17457 elsif ( $type_iend_1 eq '{' ) { 17458 next if $forced_breakpoint_to_go[$iend_1]; 17459 } 17460 17461 # do not recombine lines with ending &&, ||, 17462 elsif ( $is_amp_amp{$type_iend_1} ) { 17463 next unless $want_break_before{$type_iend_1}; 17464 } 17465 17466 # Identify and recombine a broken ?/: chain 17467 elsif ( $type_iend_1 eq '?' ) { 17468 17469 # Do not recombine different levels 17470 next 17471 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); 17472 17473 # do not recombine unless next line ends in : 17474 next unless $type_iend_2 eq ':'; 17475 } 17476 17477 # for lines ending in a comma... 17478 elsif ( $type_iend_1 eq ',' ) { 17479 17480 # Do not recombine at comma which is following the 17481 # input bias. 17482 # TODO: might be best to make a special flag 17483 next if ( $old_breakpoint_to_go[$iend_1] ); 17484 17485 # an isolated '},' may join with an identifier + ';' 17486 # this is useful for the class of a 'bless' statement (bless.t) 17487 if ( $type_ibeg_1 eq '}' 17488 && $type_ibeg_2 eq 'i' ) 17489 { 17490 next 17491 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) 17492 && ( $iend_2 == ( $ibeg_2 + 1 ) ) 17493 && $this_line_is_semicolon_terminated ); 17494 17495 # override breakpoint 17496 $forced_breakpoint_to_go[$iend_1] = 0; 17497 } 17498 17499 # but otherwise .. 17500 else { 17501 17502 # do not recombine after a comma unless this will leave 17503 # just 1 more line 17504 next unless ( $n + 1 >= $nmax ); 17505 17506 # do not recombine if there is a change in indentation depth 17507 next 17508 if ( 17509 $levels_to_go[$iend_1] != $levels_to_go[$iend_2] ); 17510 17511 # do not recombine a "complex expression" after a 17512 # comma. "complex" means no parens. 17513 my $saw_paren; 17514 foreach my $ii ( $ibeg_2 .. $iend_2 ) { 17515 if ( $tokens_to_go[$ii] eq '(' ) { 17516 $saw_paren = 1; 17517 last; 17518 } 17519 } 17520 next if $saw_paren; 17521 } 17522 } 17523 17524 # opening paren.. 17525 elsif ( $type_iend_1 eq '(' ) { 17526 17527 # No longer doing this 17528 } 17529 17530 elsif ( $type_iend_1 eq ')' ) { 17531 17532 # No longer doing this 17533 } 17534 17535 # keep a terminal for-semicolon 17536 elsif ( $type_iend_1 eq 'f' ) { 17537 next; 17538 } 17539 17540 # if '=' at end of line ... 17541 elsif ( $is_assignment{$type_iend_1} ) { 17542 17543 # keep break after = if it was in input stream 17544 # this helps prevent 'blinkers' 17545 next if $old_breakpoint_to_go[$iend_1] 17546 17547 # don't strand an isolated '=' 17548 && $iend_1 != $ibeg_1; 17549 17550 my $is_short_quote = 17551 ( $type_ibeg_2 eq 'Q' 17552 && $ibeg_2 == $iend_2 17553 && token_sequence_length( $ibeg_2, $ibeg_2 ) < 17554 $rOpts_short_concatenation_item_length ); 17555 my $is_ternary = 17556 ( $type_ibeg_1 eq '?' 17557 && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) ); 17558 17559 # always join an isolated '=', a short quote, or if this 17560 # will put ?/: at start of adjacent lines 17561 if ( $ibeg_1 != $iend_1 17562 && !$is_short_quote 17563 && !$is_ternary ) 17564 { 17565 next 17566 unless ( 17567 ( 17568 17569 # unless we can reduce this to two lines 17570 $nmax < $n + 2 17571 17572 # or three lines, the last with a leading semicolon 17573 || ( $nmax == $n + 2 17574 && $types_to_go[$ibeg_nmax] eq ';' ) 17575 17576 # or the next line ends with a here doc 17577 || $type_iend_2 eq 'h' 17578 17579 # or the next line ends in an open paren or brace 17580 # and the break hasn't been forced [dima.t] 17581 || ( !$forced_breakpoint_to_go[$iend_1] 17582 && $type_iend_2 eq '{' ) 17583 ) 17584 17585 # do not recombine if the two lines might align well 17586 # this is a very approximate test for this 17587 && ( $ibeg_3 >= 0 17588 && $type_ibeg_2 ne $types_to_go[$ibeg_3] ) 17589 ); 17590 17591 if ( 17592 17593 # Recombine if we can make two lines 17594 $nmax >= $n + 2 17595 17596 # -lp users often prefer this: 17597 # my $title = function($env, $env, $sysarea, 17598 # "bubba Borrower Entry"); 17599 # so we will recombine if -lp is used we have 17600 # ending comma 17601 && ( !$rOpts_line_up_parentheses 17602 || $type_iend_2 ne ',' ) 17603 ) 17604 { 17605 17606 # otherwise, scan the rhs line up to last token for 17607 # complexity. Note that we are not counting the last 17608 # token in case it is an opening paren. 17609 my $tv = 0; 17610 my $depth = $nesting_depth_to_go[$ibeg_2]; 17611 for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) { 17612 if ( $nesting_depth_to_go[$i] != $depth ) { 17613 $tv++; 17614 last if ( $tv > 1 ); 17615 } 17616 $depth = $nesting_depth_to_go[$i]; 17617 } 17618 17619 # ok to recombine if no level changes before last token 17620 if ( $tv > 0 ) { 17621 17622 # otherwise, do not recombine if more than two 17623 # level changes. 17624 next if ( $tv > 1 ); 17625 17626 # check total complexity of the two adjacent lines 17627 # that will occur if we do this join 17628 my $istop = 17629 ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2; 17630 for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) { 17631 if ( $nesting_depth_to_go[$i] != $depth ) { 17632 $tv++; 17633 last if ( $tv > 2 ); 17634 } 17635 $depth = $nesting_depth_to_go[$i]; 17636 } 17637 17638 # do not recombine if total is more than 2 level changes 17639 next if ( $tv > 2 ); 17640 } 17641 } 17642 } 17643 17644 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { 17645 $forced_breakpoint_to_go[$iend_1] = 0; 17646 } 17647 } 17648 17649 # for keywords.. 17650 elsif ( $type_iend_1 eq 'k' ) { 17651 17652 # make major control keywords stand out 17653 # (recombine.t) 17654 next 17655 if ( 17656 17657 #/^(last|next|redo|return)$/ 17658 $is_last_next_redo_return{ $tokens_to_go[$iend_1] } 17659 17660 # but only if followed by multiple lines 17661 && $n < $nmax 17662 ); 17663 17664 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { 17665 next 17666 unless $want_break_before{ $tokens_to_go[$iend_1] }; 17667 } 17668 } 17669 17670 #---------------------------------------------------------- 17671 # Recombine Section 3: 17672 # Examine token at $ibeg_2 (left end of second line of pair) 17673 #---------------------------------------------------------- 17674 17675 # join lines identified above as capable of 17676 # causing an outdented line with leading closing paren 17677 # Note that we are skipping the rest of this section 17678 if ($previous_outdentable_closing_paren) { 17679 $forced_breakpoint_to_go[$iend_1] = 0; 17680 } 17681 17682 # handle lines with leading &&, || 17683 elsif ( $is_amp_amp{$type_ibeg_2} ) { 17684 17685 $leading_amp_count++; 17686 17687 # ok to recombine if it follows a ? or : 17688 # and is followed by an open paren.. 17689 my $ok = 17690 ( $is_ternary{$type_ibeg_1} 17691 && $tokens_to_go[$iend_2] eq '(' ) 17692 17693 # or is followed by a ? or : at same depth 17694 # 17695 # We are looking for something like this. We can 17696 # recombine the && line with the line above to make the 17697 # structure more clear: 17698 # return 17699 # exists $G->{Attr}->{V} 17700 # && exists $G->{Attr}->{V}->{$u} 17701 # ? %{ $G->{Attr}->{V}->{$u} } 17702 # : (); 17703 # 17704 # We should probably leave something like this alone: 17705 # return 17706 # exists $G->{Attr}->{E} 17707 # && exists $G->{Attr}->{E}->{$u} 17708 # && exists $G->{Attr}->{E}->{$u}->{$v} 17709 # ? %{ $G->{Attr}->{E}->{$u}->{$v} } 17710 # : (); 17711 # so that we either have all of the &&'s (or ||'s) 17712 # on one line, as in the first example, or break at 17713 # each one as in the second example. However, it 17714 # sometimes makes things worse to check for this because 17715 # it prevents multiple recombinations. So this is not done. 17716 || ( $ibeg_3 >= 0 17717 && $is_ternary{ $types_to_go[$ibeg_3] } 17718 && $nesting_depth_to_go[$ibeg_3] == 17719 $nesting_depth_to_go[$ibeg_2] ); 17720 17721 next if !$ok && $want_break_before{$type_ibeg_2}; 17722 $forced_breakpoint_to_go[$iend_1] = 0; 17723 17724 # tweak the bond strength to give this joint priority 17725 # over ? and : 17726 $bs_tweak = 0.25; 17727 } 17728 17729 # Identify and recombine a broken ?/: chain 17730 elsif ( $type_ibeg_2 eq '?' ) { 17731 17732 # Do not recombine different levels 17733 my $lev = $levels_to_go[$ibeg_2]; 17734 next if ( $lev ne $levels_to_go[$ibeg_1] ); 17735 17736 # Do not recombine a '?' if either next line or 17737 # previous line does not start with a ':'. The reasons 17738 # are that (1) no alignment of the ? will be possible 17739 # and (2) the expression is somewhat complex, so the 17740 # '?' is harder to see in the interior of the line. 17741 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':'; 17742 my $precedes_colon = 17743 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; 17744 next unless ( $follows_colon || $precedes_colon ); 17745 17746 # we will always combining a ? line following a : line 17747 if ( !$follows_colon ) { 17748 17749 # ...otherwise recombine only if it looks like a chain. 17750 # we will just look at a few nearby lines to see if 17751 # this looks like a chain. 17752 my $local_count = 0; 17753 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) { 17754 $local_count++ 17755 if $ii >= 0 17756 && $types_to_go[$ii] eq ':' 17757 && $levels_to_go[$ii] == $lev; 17758 } 17759 next unless ( $local_count > 1 ); 17760 } 17761 $forced_breakpoint_to_go[$iend_1] = 0; 17762 } 17763 17764 # do not recombine lines with leading '.' 17765 elsif ( $type_ibeg_2 eq '.' ) { 17766 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 ); 17767 next 17768 unless ( 17769 17770 # ... unless there is just one and we can reduce 17771 # this to two lines if we do. For example, this 17772 # 17773 # 17774 # $bodyA .= 17775 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' 17776 # 17777 # looks better than this: 17778 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' 17779 # . '$args .= $pat;' 17780 17781 ( 17782 $n == 2 17783 && $n == $nmax 17784 && $type_ibeg_1 ne $type_ibeg_2 17785 ) 17786 17787 # ... or this would strand a short quote , like this 17788 # . "some long qoute" 17789 # . "\n"; 17790 17791 || ( $types_to_go[$i_next_nonblank] eq 'Q' 17792 && $i_next_nonblank >= $iend_2 - 1 17793 && $token_lengths_to_go[$i_next_nonblank] < 17794 $rOpts_short_concatenation_item_length ) 17795 ); 17796 } 17797 17798 # handle leading keyword.. 17799 elsif ( $type_ibeg_2 eq 'k' ) { 17800 17801 # handle leading "or" 17802 if ( $tokens_to_go[$ibeg_2] eq 'or' ) { 17803 next 17804 unless ( 17805 $this_line_is_semicolon_terminated 17806 && ( 17807 17808 # following 'if' or 'unless' or 'or' 17809 $type_ibeg_1 eq 'k' 17810 && $is_if_unless{ $tokens_to_go[$ibeg_1] } 17811 17812 # important: only combine a very simple or 17813 # statement because the step below may have 17814 # combined a trailing 'and' with this or, 17815 # and we do not want to then combine 17816 # everything together 17817 && ( $iend_2 - $ibeg_2 <= 7 ) 17818 ) 17819 ); 17820 } 17821 17822 # handle leading 'and' 17823 elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) { 17824 17825 # Decide if we will combine a single terminal 'and' 17826 # after an 'if' or 'unless'. 17827 17828 # This looks best with the 'and' on the same 17829 # line as the 'if': 17830 # 17831 # $a = 1 17832 # if $seconds and $nu < 2; 17833 # 17834 # But this looks better as shown: 17835 # 17836 # $a = 1 17837 # if !$this->{Parents}{$_} 17838 # or $this->{Parents}{$_} eq $_; 17839 # 17840 next 17841 unless ( 17842 $this_line_is_semicolon_terminated 17843 && ( 17844 17845 # following 'if' or 'unless' or 'or' 17846 $type_ibeg_1 eq 'k' 17847 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } 17848 || $tokens_to_go[$ibeg_1] eq 'or' ) 17849 ) 17850 ); 17851 } 17852 17853 # handle leading "if" and "unless" 17854 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { 17855 17856 # FIXME: This is still experimental..may not be too useful 17857 next 17858 unless ( 17859 $this_line_is_semicolon_terminated 17860 17861 # previous line begins with 'and' or 'or' 17862 && $type_ibeg_1 eq 'k' 17863 && $is_and_or{ $tokens_to_go[$ibeg_1] } 17864 17865 ); 17866 } 17867 17868 # handle all other leading keywords 17869 else { 17870 17871 # keywords look best at start of lines, 17872 # but combine things like "1 while" 17873 unless ( $is_assignment{$type_iend_1} ) { 17874 next 17875 if ( ( $type_iend_1 ne 'k' ) 17876 && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); 17877 } 17878 } 17879 } 17880 17881 # similar treatment of && and || as above for 'and' and 'or': 17882 # NOTE: This block of code is currently bypassed because 17883 # of a previous block but is retained for possible future use. 17884 elsif ( $is_amp_amp{$type_ibeg_2} ) { 17885 17886 # maybe looking at something like: 17887 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i; 17888 17889 next 17890 unless ( 17891 $this_line_is_semicolon_terminated 17892 17893 # previous line begins with an 'if' or 'unless' keyword 17894 && $type_ibeg_1 eq 'k' 17895 && $is_if_unless{ $tokens_to_go[$ibeg_1] } 17896 17897 ); 17898 } 17899 17900 # handle line with leading = or similar 17901 elsif ( $is_assignment{$type_ibeg_2} ) { 17902 next unless ( $n == 1 || $n == $nmax ); 17903 next if $old_breakpoint_to_go[$iend_1]; 17904 next 17905 unless ( 17906 17907 # unless we can reduce this to two lines 17908 $nmax == 2 17909 17910 # or three lines, the last with a leading semicolon 17911 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) 17912 17913 # or the next line ends with a here doc 17914 || $type_iend_2 eq 'h' 17915 17916 # or this is a short line ending in ; 17917 || ( $n == $nmax && $this_line_is_semicolon_terminated ) 17918 ); 17919 $forced_breakpoint_to_go[$iend_1] = 0; 17920 } 17921 17922 #---------------------------------------------------------- 17923 # Recombine Section 4: 17924 # Combine the lines if we arrive here and it is possible 17925 #---------------------------------------------------------- 17926 17927 # honor hard breakpoints 17928 next if ( $forced_breakpoint_to_go[$iend_1] > 0 ); 17929 17930 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak; 17931 17932 # combined line cannot be too long 17933 my $excess = excess_line_length( $ibeg_1, $iend_2 ); 17934 next if ( $excess > 0 ); 17935 17936 # Require a few extra spaces before recombining lines if we are 17937 # at an old breakpoint unless this is a simple list or terminal 17938 # line. The goal is to avoid oscillating between two 17939 # quasi-stable end states. For example this snippet caused 17940 # problems: 17941## my $this = 17942## bless { 17943## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]" 17944## }, 17945## $type; 17946 next 17947 if ( $old_breakpoint_to_go[$iend_1] 17948 && !$this_line_is_semicolon_terminated 17949 && $n < $nmax 17950 && $excess + 4 > 0 17951 && $type_iend_2 ne ',' ); 17952 17953 # do not recombine if we would skip in indentation levels 17954 if ( $n < $nmax ) { 17955 my $if_next = $$ri_beg[ $n + 1 ]; 17956 next 17957 if ( 17958 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] 17959 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] 17960 17961 # but an isolated 'if (' is undesirable 17962 && !( 17963 $n == 1 17964 && $iend_1 - $ibeg_1 <= 2 17965 && $type_ibeg_1 eq 'k' 17966 && $tokens_to_go[$ibeg_1] eq 'if' 17967 && $tokens_to_go[$iend_1] ne '(' 17968 ) 17969 ); 17970 } 17971 17972 # honor no-break's 17973 next if ( $bs >= NO_BREAK - 1 ); 17974 17975 # remember the pair with the greatest bond strength 17976 if ( !$n_best ) { 17977 $n_best = $n; 17978 $bs_best = $bs; 17979 } 17980 else { 17981 17982 if ( $bs > $bs_best ) { 17983 $n_best = $n; 17984 $bs_best = $bs; 17985 } 17986 } 17987 } 17988 17989 # recombine the pair with the greatest bond strength 17990 if ($n_best) { 17991 splice @$ri_beg, $n_best, 1; 17992 splice @$ri_end, $n_best - 1, 1; 17993 splice @joint, $n_best, 1; 17994 17995 # keep going if we are still making progress 17996 $more_to_do++; 17997 } 17998 } 17999 return ( $ri_beg, $ri_end ); 18000 } 18001} # end recombine_breakpoints 18002 18003sub break_all_chain_tokens { 18004 18005 # scan the current breakpoints looking for breaks at certain "chain 18006 # operators" (. : && || + etc) which often occur repeatedly in a long 18007 # statement. If we see a break at any one, break at all similar tokens 18008 # within the same container. 18009 # 18010 my ( $ri_left, $ri_right ) = @_; 18011 18012 my %saw_chain_type; 18013 my %left_chain_type; 18014 my %right_chain_type; 18015 my %interior_chain_type; 18016 my $nmax = @$ri_right - 1; 18017 18018 # scan the left and right end tokens of all lines 18019 my $count = 0; 18020 for my $n ( 0 .. $nmax ) { 18021 my $il = $$ri_left[$n]; 18022 my $ir = $$ri_right[$n]; 18023 my $typel = $types_to_go[$il]; 18024 my $typer = $types_to_go[$ir]; 18025 $typel = '+' if ( $typel eq '-' ); # treat + and - the same 18026 $typer = '+' if ( $typer eq '-' ); 18027 $typel = '*' if ( $typel eq '/' ); # treat * and / the same 18028 $typer = '*' if ( $typer eq '/' ); 18029 my $tokenl = $tokens_to_go[$il]; 18030 my $tokenr = $tokens_to_go[$ir]; 18031 18032 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) { 18033 next if ( $typel eq '?' ); 18034 push @{ $left_chain_type{$typel} }, $il; 18035 $saw_chain_type{$typel} = 1; 18036 $count++; 18037 } 18038 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) { 18039 next if ( $typer eq '?' ); 18040 push @{ $right_chain_type{$typer} }, $ir; 18041 $saw_chain_type{$typer} = 1; 18042 $count++; 18043 } 18044 } 18045 return unless $count; 18046 18047 # now look for any interior tokens of the same types 18048 $count = 0; 18049 for my $n ( 0 .. $nmax ) { 18050 my $il = $$ri_left[$n]; 18051 my $ir = $$ri_right[$n]; 18052 for ( my $i = $il + 1 ; $i < $ir ; $i++ ) { 18053 my $type = $types_to_go[$i]; 18054 $type = '+' if ( $type eq '-' ); 18055 $type = '*' if ( $type eq '/' ); 18056 if ( $saw_chain_type{$type} ) { 18057 push @{ $interior_chain_type{$type} }, $i; 18058 $count++; 18059 } 18060 } 18061 } 18062 return unless $count; 18063 18064 # now make a list of all new break points 18065 my @insert_list; 18066 18067 # loop over all chain types 18068 foreach my $type ( keys %saw_chain_type ) { 18069 18070 # quit if just ONE continuation line with leading . For example-- 18071 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' 18072 # . $contents; 18073 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ ); 18074 18075 # loop over all interior chain tokens 18076 foreach my $itest ( @{ $interior_chain_type{$type} } ) { 18077 18078 # loop over all left end tokens of same type 18079 if ( $left_chain_type{$type} ) { 18080 next if $nobreak_to_go[ $itest - 1 ]; 18081 foreach my $i ( @{ $left_chain_type{$type} } ) { 18082 next unless in_same_container( $i, $itest ); 18083 push @insert_list, $itest - 1; 18084 18085 # Break at matching ? if this : is at a different level. 18086 # For example, the ? before $THRf_DEAD in the following 18087 # should get a break if its : gets a break. 18088 # 18089 # my $flags = 18090 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE 18091 # : ( $_ & 4 ) ? $THRf_R_DETACHED 18092 # : $THRf_R_JOINABLE; 18093 if ( $type eq ':' 18094 && $levels_to_go[$i] != $levels_to_go[$itest] ) 18095 { 18096 my $i_question = $mate_index_to_go[$itest]; 18097 if ( $i_question > 0 ) { 18098 push @insert_list, $i_question - 1; 18099 } 18100 } 18101 last; 18102 } 18103 } 18104 18105 # loop over all right end tokens of same type 18106 if ( $right_chain_type{$type} ) { 18107 next if $nobreak_to_go[$itest]; 18108 foreach my $i ( @{ $right_chain_type{$type} } ) { 18109 next unless in_same_container( $i, $itest ); 18110 push @insert_list, $itest; 18111 18112 # break at matching ? if this : is at a different level 18113 if ( $type eq ':' 18114 && $levels_to_go[$i] != $levels_to_go[$itest] ) 18115 { 18116 my $i_question = $mate_index_to_go[$itest]; 18117 if ( $i_question >= 0 ) { 18118 push @insert_list, $i_question; 18119 } 18120 } 18121 last; 18122 } 18123 } 18124 } 18125 } 18126 18127 # insert any new break points 18128 if (@insert_list) { 18129 insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); 18130 } 18131} 18132 18133sub break_equals { 18134 18135 # Look for assignment operators that could use a breakpoint. 18136 # For example, in the following snippet 18137 # 18138 # $HOME = $ENV{HOME} 18139 # || $ENV{LOGDIR} 18140 # || $pw[7] 18141 # || die "no home directory for user $<"; 18142 # 18143 # we could break at the = to get this, which is a little nicer: 18144 # $HOME = 18145 # $ENV{HOME} 18146 # || $ENV{LOGDIR} 18147 # || $pw[7] 18148 # || die "no home directory for user $<"; 18149 # 18150 # The logic here follows the logic in set_logical_padding, which 18151 # will add the padding in the second line to improve alignment. 18152 # 18153 my ( $ri_left, $ri_right ) = @_; 18154 my $nmax = @$ri_right - 1; 18155 return unless ( $nmax >= 2 ); 18156 18157 # scan the left ends of first two lines 18158 my $tokbeg = ""; 18159 my $depth_beg; 18160 for my $n ( 1 .. 2 ) { 18161 my $il = $$ri_left[$n]; 18162 my $typel = $types_to_go[$il]; 18163 my $tokenl = $tokens_to_go[$il]; 18164 18165 my $has_leading_op = ( $tokenl =~ /^\w/ ) 18166 ? $is_chain_operator{$tokenl} # + - * / : ? && || 18167 : $is_chain_operator{$typel}; # and, or 18168 return unless ($has_leading_op); 18169 if ( $n > 1 ) { 18170 return 18171 unless ( $tokenl eq $tokbeg 18172 && $nesting_depth_to_go[$il] eq $depth_beg ); 18173 } 18174 $tokbeg = $tokenl; 18175 $depth_beg = $nesting_depth_to_go[$il]; 18176 } 18177 18178 # now look for any interior tokens of the same types 18179 my $il = $$ri_left[0]; 18180 my $ir = $$ri_right[0]; 18181 18182 # now make a list of all new break points 18183 my @insert_list; 18184 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) { 18185 my $type = $types_to_go[$i]; 18186 if ( $is_assignment{$type} 18187 && $nesting_depth_to_go[$i] eq $depth_beg ) 18188 { 18189 if ( $want_break_before{$type} ) { 18190 push @insert_list, $i - 1; 18191 } 18192 else { 18193 push @insert_list, $i; 18194 } 18195 } 18196 } 18197 18198 # Break after a 'return' followed by a chain of operators 18199 # return ( $^O !~ /win32|dos/i ) 18200 # && ( $^O ne 'VMS' ) 18201 # && ( $^O ne 'OS2' ) 18202 # && ( $^O ne 'MacOS' ); 18203 # To give: 18204 # return 18205 # ( $^O !~ /win32|dos/i ) 18206 # && ( $^O ne 'VMS' ) 18207 # && ( $^O ne 'OS2' ) 18208 # && ( $^O ne 'MacOS' ); 18209 my $i = 0; 18210 if ( $types_to_go[$i] eq 'k' 18211 && $tokens_to_go[$i] eq 'return' 18212 && $ir > $il 18213 && $nesting_depth_to_go[$i] eq $depth_beg ) 18214 { 18215 push @insert_list, $i; 18216 } 18217 18218 return unless (@insert_list); 18219 18220 # One final check... 18221 # scan second and third lines and be sure there are no assignments 18222 # we want to avoid breaking at an = to make something like this: 18223 # unless ( $icon = 18224 # $html_icons{"$type-$state"} 18225 # or $icon = $html_icons{$type} 18226 # or $icon = $html_icons{$state} ) 18227 for my $n ( 1 .. 2 ) { 18228 my $il = $$ri_left[$n]; 18229 my $ir = $$ri_right[$n]; 18230 for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) { 18231 my $type = $types_to_go[$i]; 18232 return 18233 if ( $is_assignment{$type} 18234 && $nesting_depth_to_go[$i] eq $depth_beg ); 18235 } 18236 } 18237 18238 # ok, insert any new break point 18239 if (@insert_list) { 18240 insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); 18241 } 18242} 18243 18244sub insert_final_breaks { 18245 18246 my ( $ri_left, $ri_right ) = @_; 18247 18248 my $nmax = @$ri_right - 1; 18249 18250 # scan the left and right end tokens of all lines 18251 my $count = 0; 18252 my $i_first_colon = -1; 18253 for my $n ( 0 .. $nmax ) { 18254 my $il = $$ri_left[$n]; 18255 my $ir = $$ri_right[$n]; 18256 my $typel = $types_to_go[$il]; 18257 my $typer = $types_to_go[$ir]; 18258 return if ( $typel eq '?' ); 18259 return if ( $typer eq '?' ); 18260 if ( $typel eq ':' ) { $i_first_colon = $il; last; } 18261 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; } 18262 } 18263 18264 # For long ternary chains, 18265 # if the first : we see has its # ? is in the interior 18266 # of a preceding line, then see if there are any good 18267 # breakpoints before the ?. 18268 if ( $i_first_colon > 0 ) { 18269 my $i_question = $mate_index_to_go[$i_first_colon]; 18270 if ( $i_question > 0 ) { 18271 my @insert_list; 18272 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) { 18273 my $token = $tokens_to_go[$ii]; 18274 my $type = $types_to_go[$ii]; 18275 18276 # For now, a good break is either a comma or a 'return'. 18277 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' ) 18278 && in_same_container( $ii, $i_question ) ) 18279 { 18280 push @insert_list, $ii; 18281 last; 18282 } 18283 } 18284 18285 # insert any new break points 18286 if (@insert_list) { 18287 insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); 18288 } 18289 } 18290 } 18291} 18292 18293sub in_same_container { 18294 18295 # check to see if tokens at i1 and i2 are in the 18296 # same container, and not separated by a comma, ? or : 18297 my ( $i1, $i2 ) = @_; 18298 my $type = $types_to_go[$i1]; 18299 my $depth = $nesting_depth_to_go[$i1]; 18300 return unless ( $nesting_depth_to_go[$i2] == $depth ); 18301 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } 18302 18303 ########################################################### 18304 # This is potentially a very slow routine and not critical. 18305 # For safety just give up for large differences. 18306 # See test file 'infinite_loop.txt' 18307 # TODO: replace this loop with a data structure 18308 ########################################################### 18309 return if ( $i2 - $i1 > 200 ); 18310 18311 for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) { 18312 next if ( $nesting_depth_to_go[$i] > $depth ); 18313 return if ( $nesting_depth_to_go[$i] < $depth ); 18314 18315 my $tok = $tokens_to_go[$i]; 18316 $tok = ',' if $tok eq '=>'; # treat => same as , 18317 18318 # Example: we would not want to break at any of these .'s 18319 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>" 18320 if ( $type ne ':' ) { 18321 return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or'; 18322 } 18323 else { 18324 return if ( $tok =~ /^[\,]$/ ); 18325 } 18326 } 18327 return 1; 18328} 18329 18330sub set_continuation_breaks { 18331 18332 # Define an array of indexes for inserting newline characters to 18333 # keep the line lengths below the maximum desired length. There is 18334 # an implied break after the last token, so it need not be included. 18335 18336 # Method: 18337 # This routine is part of series of routines which adjust line 18338 # lengths. It is only called if a statement is longer than the 18339 # maximum line length, or if a preliminary scanning located 18340 # desirable break points. Sub scan_list has already looked at 18341 # these tokens and set breakpoints (in array 18342 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example 18343 # after commas, after opening parens, and before closing parens). 18344 # This routine will honor these breakpoints and also add additional 18345 # breakpoints as necessary to keep the line length below the maximum 18346 # requested. It bases its decision on where the 'bond strength' is 18347 # lowest. 18348 18349 # Output: returns references to the arrays: 18350 # @i_first 18351 # @i_last 18352 # which contain the indexes $i of the first and last tokens on each 18353 # line. 18354 18355 # In addition, the array: 18356 # $forced_breakpoint_to_go[$i] 18357 # may be updated to be =1 for any index $i after which there must be 18358 # a break. This signals later routines not to undo the breakpoint. 18359 18360 my $saw_good_break = shift; 18361 my @i_first = (); # the first index to output 18362 my @i_last = (); # the last index to output 18363 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s 18364 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } 18365 18366 set_bond_strengths(); 18367 18368 my $imin = 0; 18369 my $imax = $max_index_to_go; 18370 if ( $types_to_go[$imin] eq 'b' ) { $imin++ } 18371 if ( $types_to_go[$imax] eq 'b' ) { $imax-- } 18372 my $i_begin = $imin; # index for starting next iteration 18373 18374 my $leading_spaces = leading_spaces_to_go($imin); 18375 my $line_count = 0; 18376 my $last_break_strength = NO_BREAK; 18377 my $i_last_break = -1; 18378 my $max_bias = 0.001; 18379 my $tiny_bias = 0.0001; 18380 my $leading_alignment_token = ""; 18381 my $leading_alignment_type = ""; 18382 18383 # see if any ?/:'s are in order 18384 my $colons_in_order = 1; 18385 my $last_tok = ""; 18386 my @colon_list = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ]; 18387 my $colon_count = @colon_list; 18388 foreach (@colon_list) { 18389 if ( $_ eq $last_tok ) { $colons_in_order = 0; last } 18390 $last_tok = $_; 18391 } 18392 18393 # This is a sufficient but not necessary condition for colon chain 18394 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 ); 18395 18396 #------------------------------------------------------- 18397 # BEGINNING of main loop to set continuation breakpoints 18398 # Keep iterating until we reach the end 18399 #------------------------------------------------------- 18400 while ( $i_begin <= $imax ) { 18401 my $lowest_strength = NO_BREAK; 18402 my $starting_sum = $summed_lengths_to_go[$i_begin]; 18403 my $i_lowest = -1; 18404 my $i_test = -1; 18405 my $lowest_next_token = ''; 18406 my $lowest_next_type = 'b'; 18407 my $i_lowest_next_nonblank = -1; 18408 18409 #------------------------------------------------------- 18410 # BEGINNING of inner loop to find the best next breakpoint 18411 #------------------------------------------------------- 18412 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) { 18413 my $type = $types_to_go[$i_test]; 18414 my $token = $tokens_to_go[$i_test]; 18415 my $next_type = $types_to_go[ $i_test + 1 ]; 18416 my $next_token = $tokens_to_go[ $i_test + 1 ]; 18417 my $i_next_nonblank = $inext_to_go[$i_test]; 18418 my $next_nonblank_type = $types_to_go[$i_next_nonblank]; 18419 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 18420 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; 18421 my $strength = $bond_strength_to_go[$i_test]; 18422 my $maximum_line_length = maximum_line_length($i_begin); 18423 18424 # use old breaks as a tie-breaker. For example to 18425 # prevent blinkers with -pbp in this code: 18426 18427##@keywords{ 18428## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/} 18429## = (); 18430 18431 # At the same time try to prevent a leading * in this code 18432 # with the default formatting: 18433 # 18434## return 18435## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 ) 18436## * ( $x**( $a - 1 ) ) 18437## * ( ( 1 - $x )**( $b - 1 ) ); 18438 18439 # reduce strength a bit to break ties at an old breakpoint ... 18440 if ( 18441 $old_breakpoint_to_go[$i_test] 18442 18443 # which is a 'good' breakpoint, meaning ... 18444 # we don't want to break before it 18445 && !$want_break_before{$type} 18446 18447 # and either we want to break before the next token 18448 # or the next token is not short (i.e. not a '*', '/' etc.) 18449 && $i_next_nonblank <= $imax 18450 && ( $want_break_before{$next_nonblank_type} 18451 || $token_lengths_to_go[$i_next_nonblank] > 2 18452 || $next_nonblank_type =~ /^[\,\(\[\{L]$/ ) 18453 ) 18454 { 18455 $strength -= $tiny_bias; 18456 } 18457 18458 # otherwise increase strength a bit if this token would be at the 18459 # maximum line length. This is necessary to avoid blinking 18460 # in the above example when the -iob flag is added. 18461 else { 18462 my $len = 18463 $leading_spaces + 18464 $summed_lengths_to_go[ $i_test + 1 ] - 18465 $starting_sum; 18466 if ( $len >= $maximum_line_length ) { 18467 $strength += $tiny_bias; 18468 } 18469 } 18470 18471 my $must_break = 0; 18472 18473 # Force an immediate break at certain operators 18474 # with lower level than the start of the line, 18475 # unless we've already seen a better break. 18476 # 18477 ############################################## 18478 # Note on an issue with a preceding ? 18479 ############################################## 18480 # We don't include a ? in the above list, but there may 18481 # be a break at a previous ? if the line is long. 18482 # Because of this we do not want to force a break if 18483 # there is a previous ? on this line. For now the best way 18484 # to do this is to not break if we have seen a lower strength 18485 # point, which is probably a ?. 18486 # 18487 # Example of unwanted breaks we are avoiding at a '.' following a ? 18488 # from pod2html using perltidy -gnu: 18489 # ) 18490 # ? "\n<A NAME=\"" 18491 # . $value 18492 # . "\">\n$text</A>\n" 18493 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n"; 18494 if ( 18495 ( 18496 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ 18497 || ( $next_nonblank_type eq 'k' 18498 && $next_nonblank_token =~ /^(and|or)$/ ) 18499 ) 18500 && ( $nesting_depth_to_go[$i_begin] > 18501 $nesting_depth_to_go[$i_next_nonblank] ) 18502 && ( $strength <= $lowest_strength ) 18503 ) 18504 { 18505 set_forced_breakpoint($i_next_nonblank); 18506 } 18507 18508 if ( 18509 18510 # Try to put a break where requested by scan_list 18511 $forced_breakpoint_to_go[$i_test] 18512 18513 # break between ) { in a continued line so that the '{' can 18514 # be outdented 18515 # See similar logic in scan_list which catches instances 18516 # where a line is just something like ') {'. We have to 18517 # be careful because the corresponding block keyword might 18518 # not be on the first line, such as 'for' here: 18519 # 18520 # eval { 18521 # for ("a") { 18522 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ } 18523 # } 18524 # }; 18525 # 18526 || ( $line_count 18527 && ( $token eq ')' ) 18528 && ( $next_nonblank_type eq '{' ) 18529 && ($next_nonblank_block_type) 18530 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] ) 18531 && !$rOpts->{'opening-brace-always-on-right'} ) 18532 18533 # There is an implied forced break at a terminal opening brace 18534 || ( ( $type eq '{' ) && ( $i_test == $imax ) ) 18535 ) 18536 { 18537 18538 # Forced breakpoints must sometimes be overridden, for example 18539 # because of a side comment causing a NO_BREAK. It is easier 18540 # to catch this here than when they are set. 18541 if ( $strength < NO_BREAK - 1 ) { 18542 $strength = $lowest_strength - $tiny_bias; 18543 $must_break = 1; 18544 } 18545 } 18546 18547 # quit if a break here would put a good terminal token on 18548 # the next line and we already have a possible break 18549 if ( 18550 !$must_break 18551 && ( $next_nonblank_type =~ /^[\;\,]$/ ) 18552 && ( 18553 ( 18554 $leading_spaces + 18555 $summed_lengths_to_go[ $i_next_nonblank + 1 ] - 18556 $starting_sum 18557 ) > $maximum_line_length 18558 ) 18559 ) 18560 { 18561 last if ( $i_lowest >= 0 ); 18562 } 18563 18564 # Avoid a break which would strand a single punctuation 18565 # token. For example, we do not want to strand a leading 18566 # '.' which is followed by a long quoted string. 18567 # But note that we do want to do this with -extrude (l=1) 18568 # so please test any changes to this code on -extrude. 18569 if ( 18570 !$must_break 18571 && ( $i_test == $i_begin ) 18572 && ( $i_test < $imax ) 18573 && ( $token eq $type ) 18574 && ( 18575 ( 18576 $leading_spaces + 18577 $summed_lengths_to_go[ $i_test + 1 ] - 18578 $starting_sum 18579 ) < $maximum_line_length 18580 ) 18581 ) 18582 { 18583 $i_test = min( $imax, $inext_to_go[$i_test] ); 18584 redo; 18585 } 18586 18587 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) 18588 { 18589 18590 # break at previous best break if it would have produced 18591 # a leading alignment of certain common tokens, and it 18592 # is different from the latest candidate break 18593 last 18594 if ($leading_alignment_type); 18595 18596 # Force at least one breakpoint if old code had good 18597 # break It is only called if a breakpoint is required or 18598 # desired. This will probably need some adjustments 18599 # over time. A goal is to try to be sure that, if a new 18600 # side comment is introduced into formated text, then 18601 # the same breakpoints will occur. scbreak.t 18602 last 18603 if ( 18604 $i_test == $imax # we are at the end 18605 && !$forced_breakpoint_count # 18606 && $saw_good_break # old line had good break 18607 && $type =~ /^[#;\{]$/ # and this line ends in 18608 # ';' or side comment 18609 && $i_last_break < 0 # and we haven't made a break 18610 && $i_lowest >= 0 # and we saw a possible break 18611 && $i_lowest < $imax - 1 # (but not just before this ;) 18612 && $strength - $lowest_strength < 0.5 * WEAK # and it's good 18613 ); 18614 18615 # Do not skip past an important break point in a short final 18616 # segment. For example, without this check we would miss the 18617 # break at the final / in the following code: 18618 # 18619 # $depth_stop = 18620 # ( $tau * $mass_pellet * $q_0 * 18621 # ( 1. - exp( -$t_stop / $tau ) ) - 18622 # 4. * $pi * $factor * $k_ice * 18623 # ( $t_melt - $t_ice ) * 18624 # $r_pellet * 18625 # $t_stop ) / 18626 # ( $rho_ice * $Qs * $pi * $r_pellet**2 ); 18627 # 18628 if ( $line_count > 2 18629 && $i_lowest < $i_test 18630 && $i_test > $imax - 2 18631 && $nesting_depth_to_go[$i_begin] > 18632 $nesting_depth_to_go[$i_lowest] 18633 && $lowest_strength < $last_break_strength - .5 * WEAK ) 18634 { 18635 # Make this break for math operators for now 18636 my $ir = $inext_to_go[$i_lowest]; 18637 my $il = $iprev_to_go[$ir]; 18638 last 18639 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/ 18640 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ ); 18641 } 18642 18643 # Update the minimum bond strength location 18644 $lowest_strength = $strength; 18645 $i_lowest = $i_test; 18646 $lowest_next_token = $next_nonblank_token; 18647 $lowest_next_type = $next_nonblank_type; 18648 $i_lowest_next_nonblank = $i_next_nonblank; 18649 last if $must_break; 18650 18651 # set flags to remember if a break here will produce a 18652 # leading alignment of certain common tokens 18653 if ( $line_count > 0 18654 && $i_test < $imax 18655 && ( $lowest_strength - $last_break_strength <= $max_bias ) 18656 ) 18657 { 18658 my $i_last_end = $iprev_to_go[$i_begin]; 18659 my $tok_beg = $tokens_to_go[$i_begin]; 18660 my $type_beg = $types_to_go[$i_begin]; 18661 if ( 18662 18663 # check for leading alignment of certain tokens 18664 ( 18665 $tok_beg eq $next_nonblank_token 18666 && $is_chain_operator{$tok_beg} 18667 && ( $type_beg eq 'k' 18668 || $type_beg eq $tok_beg ) 18669 && $nesting_depth_to_go[$i_begin] >= 18670 $nesting_depth_to_go[$i_next_nonblank] 18671 ) 18672 18673 || ( $tokens_to_go[$i_last_end] eq $token 18674 && $is_chain_operator{$token} 18675 && ( $type eq 'k' || $type eq $token ) 18676 && $nesting_depth_to_go[$i_last_end] >= 18677 $nesting_depth_to_go[$i_test] ) 18678 ) 18679 { 18680 $leading_alignment_token = $next_nonblank_token; 18681 $leading_alignment_type = $next_nonblank_type; 18682 } 18683 } 18684 } 18685 18686 my $too_long = ( $i_test >= $imax ); 18687 if ( !$too_long ) { 18688 my $next_length = 18689 $leading_spaces + 18690 $summed_lengths_to_go[ $i_test + 2 ] - 18691 $starting_sum; 18692 $too_long = $next_length > $maximum_line_length; 18693 18694 # To prevent blinkers we will avoid leaving a token exactly at 18695 # the line length limit unless it is the last token or one of 18696 # several "good" types. 18697 # 18698 # The following code was a blinker with -pbp before this 18699 # modification: 18700## $last_nonblank_token eq '(' 18701## && $is_indirect_object_taker{ $paren_type 18702## [$paren_depth] } 18703 # The issue causing the problem is that if the 18704 # term [$paren_depth] gets broken across a line then 18705 # the whitespace routine doesn't see both opening and closing 18706 # brackets and will format like '[ $paren_depth ]'. This 18707 # leads to an oscillation in length depending if we break 18708 # before the closing bracket or not. 18709 if ( !$too_long 18710 && $i_test + 1 < $imax 18711 && $next_nonblank_type !~ /^[,\}\]\)R]$/ ) 18712 { 18713 $too_long = $next_length >= $maximum_line_length; 18714 } 18715 } 18716 18717 FORMATTER_DEBUG_FLAG_BREAK 18718 && do { 18719 my $ltok = $token; 18720 my $rtok = $next_nonblank_token ? $next_nonblank_token : ""; 18721 my $i_testp2 = $i_test + 2; 18722 if ( $i_testp2 > $max_index_to_go + 1 ) { 18723 $i_testp2 = $max_index_to_go + 1; 18724 } 18725 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) } 18726 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) } 18727 print STDOUT 18728"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n"; 18729 }; 18730 18731 # allow one extra terminal token after exceeding line length 18732 # if it would strand this token. 18733 if ( $rOpts_fuzzy_line_length 18734 && $too_long 18735 && $i_lowest == $i_test 18736 && $token_lengths_to_go[$i_test] > 1 18737 && $next_nonblank_type =~ /^[\;\,]$/ ) 18738 { 18739 $too_long = 0; 18740 } 18741 18742 last 18743 if ( 18744 ( $i_test == $imax ) # we're done if no more tokens, 18745 || ( 18746 ( $i_lowest >= 0 ) # or no more space and we have a break 18747 && $too_long 18748 ) 18749 ); 18750 } 18751 18752 #------------------------------------------------------- 18753 # END of inner loop to find the best next breakpoint 18754 # Now decide exactly where to put the breakpoint 18755 #------------------------------------------------------- 18756 18757 # it's always ok to break at imax if no other break was found 18758 if ( $i_lowest < 0 ) { $i_lowest = $imax } 18759 18760 # semi-final index calculation 18761 my $i_next_nonblank = $inext_to_go[$i_lowest]; 18762 my $next_nonblank_type = $types_to_go[$i_next_nonblank]; 18763 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 18764 18765 #------------------------------------------------------- 18766 # ?/: rule 1 : if a break here will separate a '?' on this 18767 # line from its closing ':', then break at the '?' instead. 18768 #------------------------------------------------------- 18769 my $i; 18770 foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) { 18771 next unless ( $tokens_to_go[$i] eq '?' ); 18772 18773 # do not break if probable sequence of ?/: statements 18774 next if ($is_colon_chain); 18775 18776 # do not break if statement is broken by side comment 18777 next 18778 if ( 18779 $tokens_to_go[$max_index_to_go] eq '#' 18780 && terminal_type( \@types_to_go, \@block_type_to_go, 0, 18781 $max_index_to_go ) !~ /^[\;\}]$/ 18782 ); 18783 18784 # no break needed if matching : is also on the line 18785 next 18786 if ( $mate_index_to_go[$i] >= 0 18787 && $mate_index_to_go[$i] <= $i_next_nonblank ); 18788 18789 $i_lowest = $i; 18790 if ( $want_break_before{'?'} ) { $i_lowest-- } 18791 last; 18792 } 18793 18794 #------------------------------------------------------- 18795 # END of inner loop to find the best next breakpoint: 18796 # Break the line after the token with index i=$i_lowest 18797 #------------------------------------------------------- 18798 18799 # final index calculation 18800 $i_next_nonblank = $inext_to_go[$i_lowest]; 18801 $next_nonblank_type = $types_to_go[$i_next_nonblank]; 18802 $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 18803 18804 FORMATTER_DEBUG_FLAG_BREAK 18805 && print STDOUT 18806 "BREAK: best is i = $i_lowest strength = $lowest_strength\n"; 18807 18808 #------------------------------------------------------- 18809 # ?/: rule 2 : if we break at a '?', then break at its ':' 18810 # 18811 # Note: this rule is also in sub scan_list to handle a break 18812 # at the start and end of a line (in case breaks are dictated 18813 # by side comments). 18814 #------------------------------------------------------- 18815 if ( $next_nonblank_type eq '?' ) { 18816 set_closing_breakpoint($i_next_nonblank); 18817 } 18818 elsif ( $types_to_go[$i_lowest] eq '?' ) { 18819 set_closing_breakpoint($i_lowest); 18820 } 18821 18822 #------------------------------------------------------- 18823 # ?/: rule 3 : if we break at a ':' then we save 18824 # its location for further work below. We may need to go 18825 # back and break at its '?'. 18826 #------------------------------------------------------- 18827 if ( $next_nonblank_type eq ':' ) { 18828 push @i_colon_breaks, $i_next_nonblank; 18829 } 18830 elsif ( $types_to_go[$i_lowest] eq ':' ) { 18831 push @i_colon_breaks, $i_lowest; 18832 } 18833 18834 # here we should set breaks for all '?'/':' pairs which are 18835 # separated by this line 18836 18837 $line_count++; 18838 18839 # save this line segment, after trimming blanks at the ends 18840 push( @i_first, 18841 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); 18842 push( @i_last, 18843 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); 18844 18845 # set a forced breakpoint at a container opening, if necessary, to 18846 # signal a break at a closing container. Excepting '(' for now. 18847 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/ 18848 && !$forced_breakpoint_to_go[$i_lowest] ) 18849 { 18850 set_closing_breakpoint($i_lowest); 18851 } 18852 18853 # get ready to go again 18854 $i_begin = $i_lowest + 1; 18855 $last_break_strength = $lowest_strength; 18856 $i_last_break = $i_lowest; 18857 $leading_alignment_token = ""; 18858 $leading_alignment_type = ""; 18859 $lowest_next_token = ''; 18860 $lowest_next_type = 'b'; 18861 18862 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { 18863 $i_begin++; 18864 } 18865 18866 # update indentation size 18867 if ( $i_begin <= $imax ) { 18868 $leading_spaces = leading_spaces_to_go($i_begin); 18869 } 18870 } 18871 18872 #------------------------------------------------------- 18873 # END of main loop to set continuation breakpoints 18874 # Now go back and make any necessary corrections 18875 #------------------------------------------------------- 18876 18877 #------------------------------------------------------- 18878 # ?/: rule 4 -- if we broke at a ':', then break at 18879 # corresponding '?' unless this is a chain of ?: expressions 18880 #------------------------------------------------------- 18881 if (@i_colon_breaks) { 18882 18883 # using a simple method for deciding if we are in a ?/: chain -- 18884 # this is a chain if it has multiple ?/: pairs all in order; 18885 # otherwise not. 18886 # Note that if line starts in a ':' we count that above as a break 18887 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); 18888 18889 unless ($is_chain) { 18890 my @insert_list = (); 18891 foreach (@i_colon_breaks) { 18892 my $i_question = $mate_index_to_go[$_]; 18893 if ( $i_question >= 0 ) { 18894 if ( $want_break_before{'?'} ) { 18895 $i_question = $iprev_to_go[$i_question]; 18896 } 18897 18898 if ( $i_question >= 0 ) { 18899 push @insert_list, $i_question; 18900 } 18901 } 18902 insert_additional_breaks( \@insert_list, \@i_first, \@i_last ); 18903 } 18904 } 18905 } 18906 return ( \@i_first, \@i_last, $colon_count ); 18907} 18908 18909sub insert_additional_breaks { 18910 18911 # this routine will add line breaks at requested locations after 18912 # sub set_continuation_breaks has made preliminary breaks. 18913 18914 my ( $ri_break_list, $ri_first, $ri_last ) = @_; 18915 my $i_f; 18916 my $i_l; 18917 my $line_number = 0; 18918 my $i_break_left; 18919 foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) { 18920 18921 $i_f = $$ri_first[$line_number]; 18922 $i_l = $$ri_last[$line_number]; 18923 while ( $i_break_left >= $i_l ) { 18924 $line_number++; 18925 18926 # shouldn't happen unless caller passes bad indexes 18927 if ( $line_number >= @$ri_last ) { 18928 warning( 18929"Non-fatal program bug: couldn't set break at $i_break_left\n" 18930 ); 18931 report_definite_bug(); 18932 return; 18933 } 18934 $i_f = $$ri_first[$line_number]; 18935 $i_l = $$ri_last[$line_number]; 18936 } 18937 18938 # Do not leave a blank at the end of a line; back up if necessary 18939 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- } 18940 18941 my $i_break_right = $inext_to_go[$i_break_left]; 18942 if ( $i_break_left >= $i_f 18943 && $i_break_left < $i_l 18944 && $i_break_right > $i_f 18945 && $i_break_right <= $i_l ) 18946 { 18947 splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) ); 18948 splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) ); 18949 } 18950 } 18951} 18952 18953sub set_closing_breakpoint { 18954 18955 # set a breakpoint at a matching closing token 18956 # at present, this is only used to break at a ':' which matches a '?' 18957 my $i_break = shift; 18958 18959 if ( $mate_index_to_go[$i_break] >= 0 ) { 18960 18961 # CAUTION: infinite recursion possible here: 18962 # set_closing_breakpoint calls set_forced_breakpoint, and 18963 # set_forced_breakpoint call set_closing_breakpoint 18964 # ( test files attrib.t, BasicLyx.pm.html). 18965 # Don't reduce the '2' in the statement below 18966 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { 18967 18968 # break before } ] and ), but sub set_forced_breakpoint will decide 18969 # to break before or after a ? and : 18970 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; 18971 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc ); 18972 } 18973 } 18974 else { 18975 my $type_sequence = $type_sequence_to_go[$i_break]; 18976 if ($type_sequence) { 18977 my $closing_token = $matching_token{ $tokens_to_go[$i_break] }; 18978 $postponed_breakpoint{$type_sequence} = 1; 18979 } 18980 } 18981} 18982 18983sub compare_indentation_levels { 18984 18985 # check to see if output line tabbing agrees with input line 18986 # this can be very useful for debugging a script which has an extra 18987 # or missing brace 18988 my ( $guessed_indentation_level, $structural_indentation_level ) = @_; 18989 if ( $guessed_indentation_level ne $structural_indentation_level ) { 18990 $last_tabbing_disagreement = $input_line_number; 18991 18992 if ($in_tabbing_disagreement) { 18993 } 18994 else { 18995 $tabbing_disagreement_count++; 18996 18997 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { 18998 write_logfile_entry( 18999"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n" 19000 ); 19001 } 19002 $in_tabbing_disagreement = $input_line_number; 19003 $first_tabbing_disagreement = $in_tabbing_disagreement 19004 unless ($first_tabbing_disagreement); 19005 } 19006 } 19007 else { 19008 19009 if ($in_tabbing_disagreement) { 19010 19011 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { 19012 write_logfile_entry( 19013"End indentation disagreement from input line $in_tabbing_disagreement\n" 19014 ); 19015 19016 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) { 19017 write_logfile_entry( 19018 "No further tabbing disagreements will be noted\n"); 19019 } 19020 } 19021 $in_tabbing_disagreement = 0; 19022 } 19023 } 19024} 19025 19026##################################################################### 19027# 19028# the Perl::Tidy::IndentationItem class supplies items which contain 19029# how much whitespace should be used at the start of a line 19030# 19031##################################################################### 19032 19033package Perl::Tidy::IndentationItem; 19034 19035# Indexes for indentation items 19036use constant SPACES => 0; # total leading white spaces 19037use constant LEVEL => 1; # the indentation 'level' 19038use constant CI_LEVEL => 2; # the 'continuation level' 19039use constant AVAILABLE_SPACES => 3; # how many left spaces available 19040 # for this level 19041use constant CLOSED => 4; # index where we saw closing '}' 19042use constant COMMA_COUNT => 5; # how many commas at this level? 19043use constant SEQUENCE_NUMBER => 6; # output batch number 19044use constant INDEX => 7; # index in output batch list 19045use constant HAVE_CHILD => 8; # any dependents? 19046use constant RECOVERABLE_SPACES => 9; # how many spaces to the right 19047 # we would like to move to get 19048 # alignment (negative if left) 19049use constant ALIGN_PAREN => 10; # do we want to try to align 19050 # with an opening structure? 19051use constant MARKED => 11; # if visited by corrector logic 19052use constant STACK_DEPTH => 12; # indentation nesting depth 19053use constant STARTING_INDEX => 13; # first token index of this level 19054use constant ARROW_COUNT => 14; # how many =>'s 19055 19056sub new { 19057 19058 # Create an 'indentation_item' which describes one level of leading 19059 # whitespace when the '-lp' indentation is used. We return 19060 # a reference to an anonymous array of associated variables. 19061 # See above constants for storage scheme. 19062 my ( 19063 $class, $spaces, $level, 19064 $ci_level, $available_spaces, $index, 19065 $gnu_sequence_number, $align_paren, $stack_depth, 19066 $starting_index, 19067 ) = @_; 19068 my $closed = -1; 19069 my $arrow_count = 0; 19070 my $comma_count = 0; 19071 my $have_child = 0; 19072 my $want_right_spaces = 0; 19073 my $marked = 0; 19074 bless [ 19075 $spaces, $level, $ci_level, 19076 $available_spaces, $closed, $comma_count, 19077 $gnu_sequence_number, $index, $have_child, 19078 $want_right_spaces, $align_paren, $marked, 19079 $stack_depth, $starting_index, $arrow_count, 19080 ], $class; 19081} 19082 19083sub permanently_decrease_AVAILABLE_SPACES { 19084 19085 # make a permanent reduction in the available indentation spaces 19086 # at one indentation item. NOTE: if there are child nodes, their 19087 # total SPACES must be reduced by the caller. 19088 19089 my ( $item, $spaces_needed ) = @_; 19090 my $available_spaces = $item->get_AVAILABLE_SPACES(); 19091 my $deleted_spaces = 19092 ( $available_spaces > $spaces_needed ) 19093 ? $spaces_needed 19094 : $available_spaces; 19095 $item->decrease_AVAILABLE_SPACES($deleted_spaces); 19096 $item->decrease_SPACES($deleted_spaces); 19097 $item->set_RECOVERABLE_SPACES(0); 19098 19099 return $deleted_spaces; 19100} 19101 19102sub tentatively_decrease_AVAILABLE_SPACES { 19103 19104 # We are asked to tentatively delete $spaces_needed of indentation 19105 # for a indentation item. We may want to undo this later. NOTE: if 19106 # there are child nodes, their total SPACES must be reduced by the 19107 # caller. 19108 my ( $item, $spaces_needed ) = @_; 19109 my $available_spaces = $item->get_AVAILABLE_SPACES(); 19110 my $deleted_spaces = 19111 ( $available_spaces > $spaces_needed ) 19112 ? $spaces_needed 19113 : $available_spaces; 19114 $item->decrease_AVAILABLE_SPACES($deleted_spaces); 19115 $item->decrease_SPACES($deleted_spaces); 19116 $item->increase_RECOVERABLE_SPACES($deleted_spaces); 19117 return $deleted_spaces; 19118} 19119 19120sub get_STACK_DEPTH { 19121 my $self = shift; 19122 return $self->[STACK_DEPTH]; 19123} 19124 19125sub get_SPACES { 19126 my $self = shift; 19127 return $self->[SPACES]; 19128} 19129 19130sub get_MARKED { 19131 my $self = shift; 19132 return $self->[MARKED]; 19133} 19134 19135sub set_MARKED { 19136 my ( $self, $value ) = @_; 19137 if ( defined($value) ) { 19138 $self->[MARKED] = $value; 19139 } 19140 return $self->[MARKED]; 19141} 19142 19143sub get_AVAILABLE_SPACES { 19144 my $self = shift; 19145 return $self->[AVAILABLE_SPACES]; 19146} 19147 19148sub decrease_SPACES { 19149 my ( $self, $value ) = @_; 19150 if ( defined($value) ) { 19151 $self->[SPACES] -= $value; 19152 } 19153 return $self->[SPACES]; 19154} 19155 19156sub decrease_AVAILABLE_SPACES { 19157 my ( $self, $value ) = @_; 19158 if ( defined($value) ) { 19159 $self->[AVAILABLE_SPACES] -= $value; 19160 } 19161 return $self->[AVAILABLE_SPACES]; 19162} 19163 19164sub get_ALIGN_PAREN { 19165 my $self = shift; 19166 return $self->[ALIGN_PAREN]; 19167} 19168 19169sub get_RECOVERABLE_SPACES { 19170 my $self = shift; 19171 return $self->[RECOVERABLE_SPACES]; 19172} 19173 19174sub set_RECOVERABLE_SPACES { 19175 my ( $self, $value ) = @_; 19176 if ( defined($value) ) { 19177 $self->[RECOVERABLE_SPACES] = $value; 19178 } 19179 return $self->[RECOVERABLE_SPACES]; 19180} 19181 19182sub increase_RECOVERABLE_SPACES { 19183 my ( $self, $value ) = @_; 19184 if ( defined($value) ) { 19185 $self->[RECOVERABLE_SPACES] += $value; 19186 } 19187 return $self->[RECOVERABLE_SPACES]; 19188} 19189 19190sub get_CI_LEVEL { 19191 my $self = shift; 19192 return $self->[CI_LEVEL]; 19193} 19194 19195sub get_LEVEL { 19196 my $self = shift; 19197 return $self->[LEVEL]; 19198} 19199 19200sub get_SEQUENCE_NUMBER { 19201 my $self = shift; 19202 return $self->[SEQUENCE_NUMBER]; 19203} 19204 19205sub get_INDEX { 19206 my $self = shift; 19207 return $self->[INDEX]; 19208} 19209 19210sub get_STARTING_INDEX { 19211 my $self = shift; 19212 return $self->[STARTING_INDEX]; 19213} 19214 19215sub set_HAVE_CHILD { 19216 my ( $self, $value ) = @_; 19217 if ( defined($value) ) { 19218 $self->[HAVE_CHILD] = $value; 19219 } 19220 return $self->[HAVE_CHILD]; 19221} 19222 19223sub get_HAVE_CHILD { 19224 my $self = shift; 19225 return $self->[HAVE_CHILD]; 19226} 19227 19228sub set_ARROW_COUNT { 19229 my ( $self, $value ) = @_; 19230 if ( defined($value) ) { 19231 $self->[ARROW_COUNT] = $value; 19232 } 19233 return $self->[ARROW_COUNT]; 19234} 19235 19236sub get_ARROW_COUNT { 19237 my $self = shift; 19238 return $self->[ARROW_COUNT]; 19239} 19240 19241sub set_COMMA_COUNT { 19242 my ( $self, $value ) = @_; 19243 if ( defined($value) ) { 19244 $self->[COMMA_COUNT] = $value; 19245 } 19246 return $self->[COMMA_COUNT]; 19247} 19248 19249sub get_COMMA_COUNT { 19250 my $self = shift; 19251 return $self->[COMMA_COUNT]; 19252} 19253 19254sub set_CLOSED { 19255 my ( $self, $value ) = @_; 19256 if ( defined($value) ) { 19257 $self->[CLOSED] = $value; 19258 } 19259 return $self->[CLOSED]; 19260} 19261 19262sub get_CLOSED { 19263 my $self = shift; 19264 return $self->[CLOSED]; 19265} 19266 19267##################################################################### 19268# 19269# the Perl::Tidy::VerticalAligner::Line class supplies an object to 19270# contain a single output line 19271# 19272##################################################################### 19273 19274package Perl::Tidy::VerticalAligner::Line; 19275 19276{ 19277 19278 use strict; 19279 use Carp; 19280 19281 use constant JMAX => 0; 19282 use constant JMAX_ORIGINAL_LINE => 1; 19283 use constant RTOKENS => 2; 19284 use constant RFIELDS => 3; 19285 use constant RPATTERNS => 4; 19286 use constant INDENTATION => 5; 19287 use constant LEADING_SPACE_COUNT => 6; 19288 use constant OUTDENT_LONG_LINES => 7; 19289 use constant LIST_TYPE => 8; 19290 use constant IS_HANGING_SIDE_COMMENT => 9; 19291 use constant RALIGNMENTS => 10; 19292 use constant MAXIMUM_LINE_LENGTH => 11; 19293 use constant RVERTICAL_TIGHTNESS_FLAGS => 12; 19294 19295 my %_index_map; 19296 $_index_map{jmax} = JMAX; 19297 $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE; 19298 $_index_map{rtokens} = RTOKENS; 19299 $_index_map{rfields} = RFIELDS; 19300 $_index_map{rpatterns} = RPATTERNS; 19301 $_index_map{indentation} = INDENTATION; 19302 $_index_map{leading_space_count} = LEADING_SPACE_COUNT; 19303 $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES; 19304 $_index_map{list_type} = LIST_TYPE; 19305 $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT; 19306 $_index_map{ralignments} = RALIGNMENTS; 19307 $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH; 19308 $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS; 19309 19310 my @_default_data = (); 19311 $_default_data[JMAX] = undef; 19312 $_default_data[JMAX_ORIGINAL_LINE] = undef; 19313 $_default_data[RTOKENS] = undef; 19314 $_default_data[RFIELDS] = undef; 19315 $_default_data[RPATTERNS] = undef; 19316 $_default_data[INDENTATION] = undef; 19317 $_default_data[LEADING_SPACE_COUNT] = undef; 19318 $_default_data[OUTDENT_LONG_LINES] = undef; 19319 $_default_data[LIST_TYPE] = undef; 19320 $_default_data[IS_HANGING_SIDE_COMMENT] = undef; 19321 $_default_data[RALIGNMENTS] = []; 19322 $_default_data[MAXIMUM_LINE_LENGTH] = undef; 19323 $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef; 19324 19325 { 19326 19327 # methods to count object population 19328 my $_count = 0; 19329 sub get_count { $_count; } 19330 sub _increment_count { ++$_count } 19331 sub _decrement_count { --$_count } 19332 } 19333 19334 # Constructor may be called as a class method 19335 sub new { 19336 my ( $caller, %arg ) = @_; 19337 my $caller_is_obj = ref($caller); 19338 my $class = $caller_is_obj || $caller; 19339 no strict "refs"; 19340 my $self = bless [], $class; 19341 19342 $self->[RALIGNMENTS] = []; 19343 19344 my $index; 19345 foreach ( keys %_index_map ) { 19346 $index = $_index_map{$_}; 19347 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } 19348 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } 19349 else { $self->[$index] = $_default_data[$index] } 19350 } 19351 19352 $self->_increment_count(); 19353 return $self; 19354 } 19355 19356 sub DESTROY { 19357 $_[0]->_decrement_count(); 19358 } 19359 19360 sub get_jmax { $_[0]->[JMAX] } 19361 sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] } 19362 sub get_rtokens { $_[0]->[RTOKENS] } 19363 sub get_rfields { $_[0]->[RFIELDS] } 19364 sub get_rpatterns { $_[0]->[RPATTERNS] } 19365 sub get_indentation { $_[0]->[INDENTATION] } 19366 sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] } 19367 sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] } 19368 sub get_list_type { $_[0]->[LIST_TYPE] } 19369 sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] } 19370 sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] } 19371 19372 sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) } 19373 sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] } 19374 sub get_alignments { @{ $_[0]->[RALIGNMENTS] } } 19375 sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() } 19376 19377 sub get_starting_column { 19378 $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column(); 19379 } 19380 19381 sub increment_column { 19382 $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] ); 19383 } 19384 sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; } 19385 19386 sub current_field_width { 19387 my $self = shift; 19388 my ($j) = @_; 19389 if ( $j == 0 ) { 19390 return $self->get_column($j); 19391 } 19392 else { 19393 return $self->get_column($j) - $self->get_column( $j - 1 ); 19394 } 19395 } 19396 19397 sub field_width_growth { 19398 my $self = shift; 19399 my $j = shift; 19400 return $self->get_column($j) - $self->get_starting_column($j); 19401 } 19402 19403 sub starting_field_width { 19404 my $self = shift; 19405 my $j = shift; 19406 if ( $j == 0 ) { 19407 return $self->get_starting_column($j); 19408 } 19409 else { 19410 return $self->get_starting_column($j) - 19411 $self->get_starting_column( $j - 1 ); 19412 } 19413 } 19414 19415 sub increase_field_width { 19416 19417 my $self = shift; 19418 my ( $j, $pad ) = @_; 19419 my $jmax = $self->get_jmax(); 19420 for my $k ( $j .. $jmax ) { 19421 $self->increment_column( $k, $pad ); 19422 } 19423 } 19424 19425 sub get_available_space_on_right { 19426 my $self = shift; 19427 my $jmax = $self->get_jmax(); 19428 return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax); 19429 } 19430 19431 sub set_jmax { $_[0]->[JMAX] = $_[1] } 19432 sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] } 19433 sub set_rtokens { $_[0]->[RTOKENS] = $_[1] } 19434 sub set_rfields { $_[0]->[RFIELDS] = $_[1] } 19435 sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] } 19436 sub set_indentation { $_[0]->[INDENTATION] = $_[1] } 19437 sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] } 19438 sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] } 19439 sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] } 19440 sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] } 19441 sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] } 19442 19443} 19444 19445##################################################################### 19446# 19447# the Perl::Tidy::VerticalAligner::Alignment class holds information 19448# on a single column being aligned 19449# 19450##################################################################### 19451package Perl::Tidy::VerticalAligner::Alignment; 19452 19453{ 19454 19455 use strict; 19456 19457 #use Carp; 19458 19459 # Symbolic array indexes 19460 use constant COLUMN => 0; # the current column number 19461 use constant STARTING_COLUMN => 1; # column number when created 19462 use constant MATCHING_TOKEN => 2; # what token we are matching 19463 use constant STARTING_LINE => 3; # the line index of creation 19464 use constant ENDING_LINE => 4; # the most recent line to use it 19465 use constant SAVED_COLUMN => 5; # the most recent line to use it 19466 use constant SERIAL_NUMBER => 6; # unique number for this alignment 19467 # (just its index in an array) 19468 19469 # Correspondence between variables and array indexes 19470 my %_index_map; 19471 $_index_map{column} = COLUMN; 19472 $_index_map{starting_column} = STARTING_COLUMN; 19473 $_index_map{matching_token} = MATCHING_TOKEN; 19474 $_index_map{starting_line} = STARTING_LINE; 19475 $_index_map{ending_line} = ENDING_LINE; 19476 $_index_map{saved_column} = SAVED_COLUMN; 19477 $_index_map{serial_number} = SERIAL_NUMBER; 19478 19479 my @_default_data = (); 19480 $_default_data[COLUMN] = undef; 19481 $_default_data[STARTING_COLUMN] = undef; 19482 $_default_data[MATCHING_TOKEN] = undef; 19483 $_default_data[STARTING_LINE] = undef; 19484 $_default_data[ENDING_LINE] = undef; 19485 $_default_data[SAVED_COLUMN] = undef; 19486 $_default_data[SERIAL_NUMBER] = undef; 19487 19488 # class population count 19489 { 19490 my $_count = 0; 19491 sub get_count { $_count; } 19492 sub _increment_count { ++$_count } 19493 sub _decrement_count { --$_count } 19494 } 19495 19496 # constructor 19497 sub new { 19498 my ( $caller, %arg ) = @_; 19499 my $caller_is_obj = ref($caller); 19500 my $class = $caller_is_obj || $caller; 19501 no strict "refs"; 19502 my $self = bless [], $class; 19503 19504 foreach ( keys %_index_map ) { 19505 my $index = $_index_map{$_}; 19506 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } 19507 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } 19508 else { $self->[$index] = $_default_data[$index] } 19509 } 19510 $self->_increment_count(); 19511 return $self; 19512 } 19513 19514 sub DESTROY { 19515 $_[0]->_decrement_count(); 19516 } 19517 19518 sub get_column { return $_[0]->[COLUMN] } 19519 sub get_starting_column { return $_[0]->[STARTING_COLUMN] } 19520 sub get_matching_token { return $_[0]->[MATCHING_TOKEN] } 19521 sub get_starting_line { return $_[0]->[STARTING_LINE] } 19522 sub get_ending_line { return $_[0]->[ENDING_LINE] } 19523 sub get_serial_number { return $_[0]->[SERIAL_NUMBER] } 19524 19525 sub set_column { $_[0]->[COLUMN] = $_[1] } 19526 sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] } 19527 sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] } 19528 sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] } 19529 sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] } 19530 sub increment_column { $_[0]->[COLUMN] += $_[1] } 19531 19532 sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] } 19533 sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] } 19534 19535} 19536 19537package Perl::Tidy::VerticalAligner; 19538 19539# The Perl::Tidy::VerticalAligner package collects output lines and 19540# attempts to line up certain common tokens, such as => and #, which are 19541# identified by the calling routine. 19542# 19543# There are two main routines: valign_input and flush. Append acts as a 19544# storage buffer, collecting lines into a group which can be vertically 19545# aligned. When alignment is no longer possible or desirable, it dumps 19546# the group to flush. 19547# 19548# valign_input -----> flush 19549# 19550# collects writes 19551# vertical one 19552# groups group 19553 19554BEGIN { 19555 19556 # Caution: these debug flags produce a lot of output 19557 # They should all be 0 except when debugging small scripts 19558 19559 use constant VALIGN_DEBUG_FLAG_APPEND => 0; 19560 use constant VALIGN_DEBUG_FLAG_APPEND0 => 0; 19561 use constant VALIGN_DEBUG_FLAG_TERNARY => 0; 19562 use constant VALIGN_DEBUG_FLAG_TABS => 0; 19563 19564 my $debug_warning = sub { 19565 print STDOUT "VALIGN_DEBUGGING with key $_[0]\n"; 19566 }; 19567 19568 VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND'); 19569 VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0'); 19570 VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY'); 19571 VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS'); 19572 19573} 19574 19575use vars qw( 19576 $vertical_aligner_self 19577 $current_line 19578 $maximum_alignment_index 19579 $ralignment_list 19580 $maximum_jmax_seen 19581 $minimum_jmax_seen 19582 $previous_minimum_jmax_seen 19583 $previous_maximum_jmax_seen 19584 $maximum_line_index 19585 $group_level 19586 $group_type 19587 $group_maximum_gap 19588 $marginal_match 19589 $last_level_written 19590 $last_leading_space_count 19591 $extra_indent_ok 19592 $zero_count 19593 @group_lines 19594 $last_comment_column 19595 $last_side_comment_line_number 19596 $last_side_comment_length 19597 $last_side_comment_level 19598 $outdented_line_count 19599 $first_outdented_line_at 19600 $last_outdented_line_at 19601 $diagnostics_object 19602 $logger_object 19603 $file_writer_object 19604 @side_comment_history 19605 $comment_leading_space_count 19606 $is_matching_terminal_line 19607 $consecutive_block_comments 19608 19609 $cached_line_text 19610 $cached_line_type 19611 $cached_line_flag 19612 $cached_seqno 19613 $cached_line_valid 19614 $cached_line_leading_space_count 19615 $cached_seqno_string 19616 19617 $valign_buffer_filling 19618 @valign_buffer 19619 19620 $seqno_string 19621 $last_nonblank_seqno_string 19622 19623 $rOpts 19624 19625 $rOpts_maximum_line_length 19626 $rOpts_variable_maximum_line_length 19627 $rOpts_continuation_indentation 19628 $rOpts_indent_columns 19629 $rOpts_tabs 19630 $rOpts_entab_leading_whitespace 19631 $rOpts_valign 19632 19633 $rOpts_fixed_position_side_comment 19634 $rOpts_minimum_space_to_comment 19635 19636); 19637 19638sub initialize { 19639 19640 my $class; 19641 19642 ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object ) 19643 = @_; 19644 19645 # variables describing the entire space group: 19646 $ralignment_list = []; 19647 $group_level = 0; 19648 $last_level_written = -1; 19649 $extra_indent_ok = 0; # can we move all lines to the right? 19650 $last_side_comment_length = 0; 19651 $maximum_jmax_seen = 0; 19652 $minimum_jmax_seen = 0; 19653 $previous_minimum_jmax_seen = 0; 19654 $previous_maximum_jmax_seen = 0; 19655 19656 # variables describing each line of the group 19657 @group_lines = (); # list of all lines in group 19658 19659 $outdented_line_count = 0; 19660 $first_outdented_line_at = 0; 19661 $last_outdented_line_at = 0; 19662 $last_side_comment_line_number = 0; 19663 $last_side_comment_level = -1; 19664 $is_matching_terminal_line = 0; 19665 19666 # most recent 3 side comments; [ line number, column ] 19667 $side_comment_history[0] = [ -300, 0 ]; 19668 $side_comment_history[1] = [ -200, 0 ]; 19669 $side_comment_history[2] = [ -100, 0 ]; 19670 19671 # valign_output_step_B cache: 19672 $cached_line_text = ""; 19673 $cached_line_type = 0; 19674 $cached_line_flag = 0; 19675 $cached_seqno = 0; 19676 $cached_line_valid = 0; 19677 $cached_line_leading_space_count = 0; 19678 $cached_seqno_string = ""; 19679 19680 # string of sequence numbers joined together 19681 $seqno_string = ""; 19682 $last_nonblank_seqno_string = ""; 19683 19684 # frequently used parameters 19685 $rOpts_indent_columns = $rOpts->{'indent-columns'}; 19686 $rOpts_tabs = $rOpts->{'tabs'}; 19687 $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'}; 19688 $rOpts_fixed_position_side_comment = 19689 $rOpts->{'fixed-position-side-comment'}; 19690 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; 19691 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; 19692 $rOpts_variable_maximum_line_length = 19693 $rOpts->{'variable-maximum-line-length'}; 19694 $rOpts_valign = $rOpts->{'valign'}; 19695 19696 $consecutive_block_comments = 0; 19697 forget_side_comment(); 19698 19699 initialize_for_new_group(); 19700 19701 $vertical_aligner_self = {}; 19702 bless $vertical_aligner_self, $class; 19703 return $vertical_aligner_self; 19704} 19705 19706sub initialize_for_new_group { 19707 $maximum_line_index = -1; # lines in the current group 19708 $maximum_alignment_index = -1; # alignments in current group 19709 $zero_count = 0; # count consecutive lines without tokens 19710 $current_line = undef; # line being matched for alignment 19711 $group_maximum_gap = 0; # largest gap introduced 19712 $group_type = ""; 19713 $marginal_match = 0; 19714 $comment_leading_space_count = 0; 19715 $last_leading_space_count = 0; 19716} 19717 19718# interface to Perl::Tidy::Diagnostics routines 19719sub write_diagnostics { 19720 if ($diagnostics_object) { 19721 $diagnostics_object->write_diagnostics(@_); 19722 } 19723} 19724 19725# interface to Perl::Tidy::Logger routines 19726sub warning { 19727 if ($logger_object) { 19728 $logger_object->warning(@_); 19729 } 19730} 19731 19732sub write_logfile_entry { 19733 if ($logger_object) { 19734 $logger_object->write_logfile_entry(@_); 19735 } 19736} 19737 19738sub report_definite_bug { 19739 if ($logger_object) { 19740 $logger_object->report_definite_bug(); 19741 } 19742} 19743 19744sub get_SPACES { 19745 19746 # return the number of leading spaces associated with an indentation 19747 # variable $indentation is either a constant number of spaces or an 19748 # object with a get_SPACES method. 19749 my $indentation = shift; 19750 return ref($indentation) ? $indentation->get_SPACES() : $indentation; 19751} 19752 19753sub get_RECOVERABLE_SPACES { 19754 19755 # return the number of spaces (+ means shift right, - means shift left) 19756 # that we would like to shift a group of lines with the same indentation 19757 # to get them to line up with their opening parens 19758 my $indentation = shift; 19759 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; 19760} 19761 19762sub get_STACK_DEPTH { 19763 19764 my $indentation = shift; 19765 return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0; 19766} 19767 19768sub make_alignment { 19769 my ( $col, $token ) = @_; 19770 19771 # make one new alignment at column $col which aligns token $token 19772 ++$maximum_alignment_index; 19773 my $alignment = new Perl::Tidy::VerticalAligner::Alignment( 19774 column => $col, 19775 starting_column => $col, 19776 matching_token => $token, 19777 starting_line => $maximum_line_index, 19778 ending_line => $maximum_line_index, 19779 serial_number => $maximum_alignment_index, 19780 ); 19781 $ralignment_list->[$maximum_alignment_index] = $alignment; 19782 return $alignment; 19783} 19784 19785sub dump_alignments { 19786 print STDOUT 19787"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n"; 19788 for my $i ( 0 .. $maximum_alignment_index ) { 19789 my $column = $ralignment_list->[$i]->get_column(); 19790 my $starting_column = $ralignment_list->[$i]->get_starting_column(); 19791 my $matching_token = $ralignment_list->[$i]->get_matching_token(); 19792 my $starting_line = $ralignment_list->[$i]->get_starting_line(); 19793 my $ending_line = $ralignment_list->[$i]->get_ending_line(); 19794 print STDOUT 19795"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n"; 19796 } 19797} 19798 19799sub save_alignment_columns { 19800 for my $i ( 0 .. $maximum_alignment_index ) { 19801 $ralignment_list->[$i]->save_column(); 19802 } 19803} 19804 19805sub restore_alignment_columns { 19806 for my $i ( 0 .. $maximum_alignment_index ) { 19807 $ralignment_list->[$i]->restore_column(); 19808 } 19809} 19810 19811sub forget_side_comment { 19812 $last_comment_column = 0; 19813} 19814 19815sub maximum_line_length_for_level { 19816 19817 # return maximum line length for line starting with a given level 19818 my $maximum_line_length = $rOpts_maximum_line_length; 19819 if ($rOpts_variable_maximum_line_length) { 19820 my $level = shift; 19821 if ( $level < 0 ) { $level = 0 } 19822 $maximum_line_length += $level * $rOpts_indent_columns; 19823 } 19824 return $maximum_line_length; 19825} 19826 19827sub valign_input { 19828 19829 # Place one line in the current vertical group. 19830 # 19831 # The input parameters are: 19832 # $level = indentation level of this line 19833 # $rfields = reference to array of fields 19834 # $rpatterns = reference to array of patterns, one per field 19835 # $rtokens = reference to array of tokens starting fields 1,2,.. 19836 # 19837 # Here is an example of what this package does. In this example, 19838 # we are trying to line up both the '=>' and the '#'. 19839 # 19840 # '18' => 'grave', # \` 19841 # '19' => 'acute', # `' 19842 # '20' => 'caron', # \v 19843 # <-tabs-><f1-><--field 2 ---><-f3-> 19844 # | | | | 19845 # | | | | 19846 # col1 col2 col3 col4 19847 # 19848 # The calling routine has already broken the entire line into 3 fields as 19849 # indicated. (So the work of identifying promising common tokens has 19850 # already been done). 19851 # 19852 # In this example, there will be 2 tokens being matched: '=>' and '#'. 19853 # They are the leading parts of fields 2 and 3, but we do need to know 19854 # what they are so that we can dump a group of lines when these tokens 19855 # change. 19856 # 19857 # The fields contain the actual characters of each field. The patterns 19858 # are like the fields, but they contain mainly token types instead 19859 # of tokens, so they have fewer characters. They are used to be 19860 # sure we are matching fields of similar type. 19861 # 19862 # In this example, there will be 4 column indexes being adjusted. The 19863 # first one is always at zero. The interior columns are at the start of 19864 # the matching tokens, and the last one tracks the maximum line length. 19865 # 19866 # Each time a new line comes in, it joins the current vertical 19867 # group if possible. Otherwise it causes the current group to be dumped 19868 # and a new group is started. 19869 # 19870 # For each new group member, the column locations are increased, as 19871 # necessary, to make room for the new fields. When the group is finally 19872 # output, these column numbers are used to compute the amount of spaces of 19873 # padding needed for each field. 19874 # 19875 # Programming note: the fields are assumed not to have any tab characters. 19876 # Tabs have been previously removed except for tabs in quoted strings and 19877 # side comments. Tabs in these fields can mess up the column counting. 19878 # The log file warns the user if there are any such tabs. 19879 19880 my ( 19881 $level, $level_end, 19882 $indentation, $rfields, 19883 $rtokens, $rpatterns, 19884 $is_forced_break, $outdent_long_lines, 19885 $is_terminal_ternary, $is_terminal_statement, 19886 $do_not_pad, $rvertical_tightness_flags, 19887 $level_jump, 19888 ) = @_; 19889 19890 # number of fields is $jmax 19891 # number of tokens between fields is $jmax-1 19892 my $jmax = $#{$rfields}; 19893 19894 my $leading_space_count = get_SPACES($indentation); 19895 19896 # set outdented flag to be sure we either align within statements or 19897 # across statement boundaries, but not both. 19898 my $is_outdented = $last_leading_space_count > $leading_space_count; 19899 $last_leading_space_count = $leading_space_count; 19900 19901 # Patch: undo for hanging side comment 19902 my $is_hanging_side_comment = 19903 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ ); 19904 $is_outdented = 0 if $is_hanging_side_comment; 19905 19906 # Forget side comment alignment after seeing 2 or more block comments 19907 my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ ); 19908 if ($is_block_comment) { 19909 $consecutive_block_comments++; 19910 } 19911 else { 19912 if ( $consecutive_block_comments > 1 ) { forget_side_comment() } 19913 $consecutive_block_comments = 0; 19914 } 19915 19916 VALIGN_DEBUG_FLAG_APPEND0 && do { 19917 print STDOUT 19918"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n"; 19919 }; 19920 19921 # Validate cached line if necessary: If we can produce a container 19922 # with just 2 lines total by combining an existing cached opening 19923 # token with the closing token to follow, then we will mark both 19924 # cached flags as valid. 19925 if ($rvertical_tightness_flags) { 19926 if ( $maximum_line_index <= 0 19927 && $cached_line_type 19928 && $cached_seqno 19929 && $rvertical_tightness_flags->[2] 19930 && $rvertical_tightness_flags->[2] == $cached_seqno ) 19931 { 19932 $rvertical_tightness_flags->[3] ||= 1; 19933 $cached_line_valid ||= 1; 19934 } 19935 } 19936 19937 # do not join an opening block brace with an unbalanced line 19938 # unless requested with a flag value of 2 19939 if ( $cached_line_type == 3 19940 && $maximum_line_index < 0 19941 && $cached_line_flag < 2 19942 && $level_jump != 0 ) 19943 { 19944 $cached_line_valid = 0; 19945 } 19946 19947 # patch until new aligner is finished 19948 if ($do_not_pad) { my_flush() } 19949 19950 # shouldn't happen: 19951 if ( $level < 0 ) { $level = 0 } 19952 19953 # do not align code across indentation level changes 19954 # or if vertical alignment is turned off for debugging 19955 if ( $level != $group_level || $is_outdented || !$rOpts_valign ) { 19956 19957 # we are allowed to shift a group of lines to the right if its 19958 # level is greater than the previous and next group 19959 $extra_indent_ok = 19960 ( $level < $group_level && $last_level_written < $group_level ); 19961 19962 my_flush(); 19963 19964 # If we know that this line will get flushed out by itself because 19965 # of level changes, we can leave the extra_indent_ok flag set. 19966 # That way, if we get an external flush call, we will still be 19967 # able to do some -lp alignment if necessary. 19968 $extra_indent_ok = ( $is_terminal_statement && $level > $group_level ); 19969 19970 $group_level = $level; 19971 19972 # wait until after the above flush to get the leading space 19973 # count because it may have been changed if the -icp flag is in 19974 # effect 19975 $leading_space_count = get_SPACES($indentation); 19976 19977 } 19978 19979 # -------------------------------------------------------------------- 19980 # Patch to collect outdentable block COMMENTS 19981 # -------------------------------------------------------------------- 19982 my $is_blank_line = ""; 19983 if ( $group_type eq 'COMMENT' ) { 19984 if ( 19985 ( 19986 $is_block_comment 19987 && $outdent_long_lines 19988 && $leading_space_count == $comment_leading_space_count 19989 ) 19990 || $is_blank_line 19991 ) 19992 { 19993 $group_lines[ ++$maximum_line_index ] = $rfields->[0]; 19994 return; 19995 } 19996 else { 19997 my_flush(); 19998 } 19999 } 20000 20001 # -------------------------------------------------------------------- 20002 # add dummy fields for terminal ternary 20003 # -------------------------------------------------------------------- 20004 my $j_terminal_match; 20005 if ( $is_terminal_ternary && $current_line ) { 20006 $j_terminal_match = 20007 fix_terminal_ternary( $rfields, $rtokens, $rpatterns ); 20008 $jmax = @{$rfields} - 1; 20009 } 20010 20011 # -------------------------------------------------------------------- 20012 # add dummy fields for else statement 20013 # -------------------------------------------------------------------- 20014 if ( $rfields->[0] =~ /^else\s*$/ 20015 && $current_line 20016 && $level_jump == 0 ) 20017 { 20018 $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns ); 20019 $jmax = @{$rfields} - 1; 20020 } 20021 20022 # -------------------------------------------------------------------- 20023 # Step 1. Handle simple line of code with no fields to match. 20024 # -------------------------------------------------------------------- 20025 if ( $jmax <= 0 ) { 20026 $zero_count++; 20027 20028 if ( $maximum_line_index >= 0 20029 && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) ) 20030 { 20031 20032 # flush the current group if it has some aligned columns.. 20033 if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() } 20034 20035 # flush current group if we are just collecting side comments.. 20036 elsif ( 20037 20038 # ...and we haven't seen a comment lately 20039 ( $zero_count > 3 ) 20040 20041 # ..or if this new line doesn't fit to the left of the comments 20042 || ( ( $leading_space_count + length( $$rfields[0] ) ) > 20043 $group_lines[0]->get_column(0) ) 20044 ) 20045 { 20046 my_flush(); 20047 } 20048 } 20049 20050 # patch to start new COMMENT group if this comment may be outdented 20051 if ( $is_block_comment 20052 && $outdent_long_lines 20053 && $maximum_line_index < 0 ) 20054 { 20055 $group_type = 'COMMENT'; 20056 $comment_leading_space_count = $leading_space_count; 20057 $group_lines[ ++$maximum_line_index ] = $rfields->[0]; 20058 return; 20059 } 20060 20061 # just write this line directly if no current group, no side comment, 20062 # and no space recovery is needed. 20063 if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) ) 20064 { 20065 valign_output_step_B( $leading_space_count, $$rfields[0], 0, 20066 $outdent_long_lines, $rvertical_tightness_flags, $level ); 20067 return; 20068 } 20069 } 20070 else { 20071 $zero_count = 0; 20072 } 20073 20074 # programming check: (shouldn't happen) 20075 # an error here implies an incorrect call was made 20076 if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) { 20077 warning( 20078"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n" 20079 ); 20080 report_definite_bug(); 20081 } 20082 20083 # -------------------------------------------------------------------- 20084 # create an object to hold this line 20085 # -------------------------------------------------------------------- 20086 my $new_line = new Perl::Tidy::VerticalAligner::Line( 20087 jmax => $jmax, 20088 jmax_original_line => $jmax, 20089 rtokens => $rtokens, 20090 rfields => $rfields, 20091 rpatterns => $rpatterns, 20092 indentation => $indentation, 20093 leading_space_count => $leading_space_count, 20094 outdent_long_lines => $outdent_long_lines, 20095 list_type => "", 20096 is_hanging_side_comment => $is_hanging_side_comment, 20097 maximum_line_length => maximum_line_length_for_level($level), 20098 rvertical_tightness_flags => $rvertical_tightness_flags, 20099 ); 20100 20101 # Initialize a global flag saying if the last line of the group should 20102 # match end of group and also terminate the group. There should be no 20103 # returns between here and where the flag is handled at the bottom. 20104 my $col_matching_terminal = 0; 20105 if ( defined($j_terminal_match) ) { 20106 20107 # remember the column of the terminal ? or { to match with 20108 $col_matching_terminal = $current_line->get_column($j_terminal_match); 20109 20110 # set global flag for sub decide_if_aligned 20111 $is_matching_terminal_line = 1; 20112 } 20113 20114 # -------------------------------------------------------------------- 20115 # It simplifies things to create a zero length side comment 20116 # if none exists. 20117 # -------------------------------------------------------------------- 20118 make_side_comment( $new_line, $level_end ); 20119 20120 # -------------------------------------------------------------------- 20121 # Decide if this is a simple list of items. 20122 # There are 3 list types: none, comma, comma-arrow. 20123 # We use this below to be less restrictive in deciding what to align. 20124 # -------------------------------------------------------------------- 20125 if ($is_forced_break) { 20126 decide_if_list($new_line); 20127 } 20128 20129 if ($current_line) { 20130 20131 # -------------------------------------------------------------------- 20132 # Allow hanging side comment to join current group, if any 20133 # This will help keep side comments aligned, because otherwise we 20134 # will have to start a new group, making alignment less likely. 20135 # -------------------------------------------------------------------- 20136 join_hanging_comment( $new_line, $current_line ) 20137 if $is_hanging_side_comment; 20138 20139 # -------------------------------------------------------------------- 20140 # If there is just one previous line, and it has more fields 20141 # than the new line, try to join fields together to get a match with 20142 # the new line. At the present time, only a single leading '=' is 20143 # allowed to be compressed out. This is useful in rare cases where 20144 # a table is forced to use old breakpoints because of side comments, 20145 # and the table starts out something like this: 20146 # my %MonthChars = ('0', 'Jan', # side comment 20147 # '1', 'Feb', 20148 # '2', 'Mar', 20149 # Eliminating the '=' field will allow the remaining fields to line up. 20150 # This situation does not occur if there are no side comments 20151 # because scan_list would put a break after the opening '('. 20152 # -------------------------------------------------------------------- 20153 eliminate_old_fields( $new_line, $current_line ); 20154 20155 # -------------------------------------------------------------------- 20156 # If the new line has more fields than the current group, 20157 # see if we can match the first fields and combine the remaining 20158 # fields of the new line. 20159 # -------------------------------------------------------------------- 20160 eliminate_new_fields( $new_line, $current_line ); 20161 20162 # -------------------------------------------------------------------- 20163 # Flush previous group unless all common tokens and patterns match.. 20164 # -------------------------------------------------------------------- 20165 check_match( $new_line, $current_line ); 20166 20167 # -------------------------------------------------------------------- 20168 # See if there is space for this line in the current group (if any) 20169 # -------------------------------------------------------------------- 20170 if ($current_line) { 20171 check_fit( $new_line, $current_line ); 20172 } 20173 } 20174 20175 # -------------------------------------------------------------------- 20176 # Append this line to the current group (or start new group) 20177 # -------------------------------------------------------------------- 20178 add_to_group($new_line); 20179 20180 # Future update to allow this to vary: 20181 $current_line = $new_line if ( $maximum_line_index == 0 ); 20182 20183 # output this group if it ends in a terminal else or ternary line 20184 if ( defined($j_terminal_match) ) { 20185 20186 # if there is only one line in the group (maybe due to failure to match 20187 # perfectly with previous lines), then align the ? or { of this 20188 # terminal line with the previous one unless that would make the line 20189 # too long 20190 if ( $maximum_line_index == 0 ) { 20191 my $col_now = $current_line->get_column($j_terminal_match); 20192 my $pad = $col_matching_terminal - $col_now; 20193 my $padding_available = 20194 $current_line->get_available_space_on_right(); 20195 if ( $pad > 0 && $pad <= $padding_available ) { 20196 $current_line->increase_field_width( $j_terminal_match, $pad ); 20197 } 20198 } 20199 my_flush(); 20200 $is_matching_terminal_line = 0; 20201 } 20202 20203 # -------------------------------------------------------------------- 20204 # Step 8. Some old debugging stuff 20205 # -------------------------------------------------------------------- 20206 VALIGN_DEBUG_FLAG_APPEND && do { 20207 print STDOUT "APPEND fields:"; 20208 dump_array(@$rfields); 20209 print STDOUT "APPEND tokens:"; 20210 dump_array(@$rtokens); 20211 print STDOUT "APPEND patterns:"; 20212 dump_array(@$rpatterns); 20213 dump_alignments(); 20214 }; 20215 20216 return; 20217} 20218 20219sub join_hanging_comment { 20220 20221 my $line = shift; 20222 my $jmax = $line->get_jmax(); 20223 return 0 unless $jmax == 1; # must be 2 fields 20224 my $rtokens = $line->get_rtokens(); 20225 return 0 unless $$rtokens[0] eq '#'; # the second field is a comment.. 20226 my $rfields = $line->get_rfields(); 20227 return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty... 20228 my $old_line = shift; 20229 my $maximum_field_index = $old_line->get_jmax(); 20230 return 0 20231 unless $maximum_field_index > $jmax; # the current line has more fields 20232 my $rpatterns = $line->get_rpatterns(); 20233 20234 $line->set_is_hanging_side_comment(1); 20235 $jmax = $maximum_field_index; 20236 $line->set_jmax($jmax); 20237 $$rfields[$jmax] = $$rfields[1]; 20238 $$rtokens[ $jmax - 1 ] = $$rtokens[0]; 20239 $$rpatterns[ $jmax - 1 ] = $$rpatterns[0]; 20240 for ( my $j = 1 ; $j < $jmax ; $j++ ) { 20241 $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why? 20242 $$rtokens[ $j - 1 ] = ""; 20243 $$rpatterns[ $j - 1 ] = ""; 20244 } 20245 return 1; 20246} 20247 20248sub eliminate_old_fields { 20249 20250 my $new_line = shift; 20251 my $jmax = $new_line->get_jmax(); 20252 if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax } 20253 if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax } 20254 20255 # there must be one previous line 20256 return unless ( $maximum_line_index == 0 ); 20257 20258 my $old_line = shift; 20259 my $maximum_field_index = $old_line->get_jmax(); 20260 20261 ############################################### 20262 # this line must have fewer fields 20263 return unless $maximum_field_index > $jmax; 20264 ############################################### 20265 20266 # Identify specific cases where field elimination is allowed: 20267 # case=1: both lines have comma-separated lists, and the first 20268 # line has an equals 20269 # case=2: both lines have leading equals 20270 20271 # case 1 is the default 20272 my $case = 1; 20273 20274 # See if case 2: both lines have leading '=' 20275 # We'll require smiliar leading patterns in this case 20276 my $old_rtokens = $old_line->get_rtokens(); 20277 my $rtokens = $new_line->get_rtokens(); 20278 my $rpatterns = $new_line->get_rpatterns(); 20279 my $old_rpatterns = $old_line->get_rpatterns(); 20280 if ( $rtokens->[0] =~ /^=\d*$/ 20281 && $old_rtokens->[0] eq $rtokens->[0] 20282 && $old_rpatterns->[0] eq $rpatterns->[0] ) 20283 { 20284 $case = 2; 20285 } 20286 20287 # not too many fewer fields in new line for case 1 20288 return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax ); 20289 20290 # case 1 must have side comment 20291 my $old_rfields = $old_line->get_rfields(); 20292 return 20293 if ( $case == 1 20294 && length( $$old_rfields[$maximum_field_index] ) == 0 ); 20295 20296 my $rfields = $new_line->get_rfields(); 20297 20298 my $hid_equals = 0; 20299 20300 my @new_alignments = (); 20301 my @new_fields = (); 20302 my @new_matching_patterns = (); 20303 my @new_matching_tokens = (); 20304 20305 my $j = 0; 20306 my $k; 20307 my $current_field = ''; 20308 my $current_pattern = ''; 20309 20310 # loop over all old tokens 20311 my $in_match = 0; 20312 for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) { 20313 $current_field .= $$old_rfields[$k]; 20314 $current_pattern .= $$old_rpatterns[$k]; 20315 last if ( $j > $jmax - 1 ); 20316 20317 if ( $$old_rtokens[$k] eq $$rtokens[$j] ) { 20318 $in_match = 1; 20319 $new_fields[$j] = $current_field; 20320 $new_matching_patterns[$j] = $current_pattern; 20321 $current_field = ''; 20322 $current_pattern = ''; 20323 $new_matching_tokens[$j] = $$old_rtokens[$k]; 20324 $new_alignments[$j] = $old_line->get_alignment($k); 20325 $j++; 20326 } 20327 else { 20328 20329 if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) { 20330 last if ( $case == 2 ); # avoid problems with stuff 20331 # like: $a=$b=$c=$d; 20332 $hid_equals = 1; 20333 } 20334 last 20335 if ( $in_match && $case == 1 ) 20336 ; # disallow gaps in matching field types in case 1 20337 } 20338 } 20339 20340 # Modify the current state if we are successful. 20341 # We must exactly reach the ends of both lists for success. 20342 if ( ( $j == $jmax ) 20343 && ( $current_field eq '' ) 20344 && ( $case != 1 || $hid_equals ) ) 20345 { 20346 $k = $maximum_field_index; 20347 $current_field .= $$old_rfields[$k]; 20348 $current_pattern .= $$old_rpatterns[$k]; 20349 $new_fields[$j] = $current_field; 20350 $new_matching_patterns[$j] = $current_pattern; 20351 20352 $new_alignments[$j] = $old_line->get_alignment($k); 20353 $maximum_field_index = $j; 20354 20355 $old_line->set_alignments(@new_alignments); 20356 $old_line->set_jmax($jmax); 20357 $old_line->set_rtokens( \@new_matching_tokens ); 20358 $old_line->set_rfields( \@new_fields ); 20359 $old_line->set_rpatterns( \@$rpatterns ); 20360 } 20361} 20362 20363# create an empty side comment if none exists 20364sub make_side_comment { 20365 my $new_line = shift; 20366 my $level_end = shift; 20367 my $jmax = $new_line->get_jmax(); 20368 my $rtokens = $new_line->get_rtokens(); 20369 20370 # if line does not have a side comment... 20371 if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) { 20372 my $rfields = $new_line->get_rfields(); 20373 my $rpatterns = $new_line->get_rpatterns(); 20374 $$rtokens[$jmax] = '#'; 20375 $$rfields[ ++$jmax ] = ''; 20376 $$rpatterns[$jmax] = '#'; 20377 $new_line->set_jmax($jmax); 20378 $new_line->set_jmax_original_line($jmax); 20379 } 20380 20381 # line has a side comment.. 20382 else { 20383 20384 # don't remember old side comment location for very long 20385 my $line_number = $vertical_aligner_self->get_output_line_number(); 20386 my $rfields = $new_line->get_rfields(); 20387 if ( 20388 $line_number - $last_side_comment_line_number > 12 20389 20390 # and don't remember comment location across block level changes 20391 || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ ) 20392 ) 20393 { 20394 forget_side_comment(); 20395 } 20396 $last_side_comment_line_number = $line_number; 20397 $last_side_comment_level = $level_end; 20398 } 20399} 20400 20401sub decide_if_list { 20402 20403 my $line = shift; 20404 20405 # A list will be taken to be a line with a forced break in which all 20406 # of the field separators are commas or comma-arrows (except for the 20407 # trailing #) 20408 20409 # List separator tokens are things like ',3' or '=>2', 20410 # where the trailing digit is the nesting depth. Allow braces 20411 # to allow nested list items. 20412 my $rtokens = $line->get_rtokens(); 20413 my $test_token = $$rtokens[0]; 20414 if ( $test_token =~ /^(\,|=>)/ ) { 20415 my $list_type = $test_token; 20416 my $jmax = $line->get_jmax(); 20417 20418 foreach ( 1 .. $jmax - 2 ) { 20419 if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) { 20420 $list_type = ""; 20421 last; 20422 } 20423 } 20424 $line->set_list_type($list_type); 20425 } 20426} 20427 20428sub eliminate_new_fields { 20429 20430 return unless ( $maximum_line_index >= 0 ); 20431 my ( $new_line, $old_line ) = @_; 20432 my $jmax = $new_line->get_jmax(); 20433 20434 my $old_rtokens = $old_line->get_rtokens(); 20435 my $rtokens = $new_line->get_rtokens(); 20436 my $is_assignment = 20437 ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) ); 20438 20439 # must be monotonic variation 20440 return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax ); 20441 20442 # must be more fields in the new line 20443 my $maximum_field_index = $old_line->get_jmax(); 20444 return unless ( $maximum_field_index < $jmax ); 20445 20446 unless ($is_assignment) { 20447 return 20448 unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen ) 20449 ; # only if monotonic 20450 20451 # never combine fields of a comma list 20452 return 20453 unless ( $maximum_field_index > 1 ) 20454 && ( $new_line->get_list_type() !~ /^,/ ); 20455 } 20456 20457 my $rfields = $new_line->get_rfields(); 20458 my $rpatterns = $new_line->get_rpatterns(); 20459 my $old_rpatterns = $old_line->get_rpatterns(); 20460 20461 # loop over all OLD tokens except comment and check match 20462 my $match = 1; 20463 my $k; 20464 for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) { 20465 if ( ( $$old_rtokens[$k] ne $$rtokens[$k] ) 20466 || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) ) 20467 { 20468 $match = 0; 20469 last; 20470 } 20471 } 20472 20473 # first tokens agree, so combine extra new tokens 20474 if ($match) { 20475 for $k ( $maximum_field_index .. $jmax - 1 ) { 20476 20477 $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k]; 20478 $$rfields[$k] = ""; 20479 $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k]; 20480 $$rpatterns[$k] = ""; 20481 } 20482 20483 $$rtokens[ $maximum_field_index - 1 ] = '#'; 20484 $$rfields[$maximum_field_index] = $$rfields[$jmax]; 20485 $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax]; 20486 $jmax = $maximum_field_index; 20487 } 20488 $new_line->set_jmax($jmax); 20489} 20490 20491sub fix_terminal_ternary { 20492 20493 # Add empty fields as necessary to align a ternary term 20494 # like this: 20495 # 20496 # my $leapyear = 20497 # $year % 4 ? 0 20498 # : $year % 100 ? 1 20499 # : $year % 400 ? 0 20500 # : 1; 20501 # 20502 # returns 1 if the terminal item should be indented 20503 20504 my ( $rfields, $rtokens, $rpatterns ) = @_; 20505 20506 my $jmax = @{$rfields} - 1; 20507 my $old_line = $group_lines[$maximum_line_index]; 20508 my $rfields_old = $old_line->get_rfields(); 20509 20510 my $rpatterns_old = $old_line->get_rpatterns(); 20511 my $rtokens_old = $old_line->get_rtokens(); 20512 my $maximum_field_index = $old_line->get_jmax(); 20513 20514 # look for the question mark after the : 20515 my ($jquestion); 20516 my $depth_question; 20517 my $pad = ""; 20518 for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) { 20519 my $tok = $rtokens_old->[$j]; 20520 if ( $tok =~ /^\?(\d+)$/ ) { 20521 $depth_question = $1; 20522 20523 # depth must be correct 20524 next unless ( $depth_question eq $group_level ); 20525 20526 $jquestion = $j; 20527 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) { 20528 $pad = " " x length($1); 20529 } 20530 else { 20531 return; # shouldn't happen 20532 } 20533 last; 20534 } 20535 } 20536 return unless ( defined($jquestion) ); # shouldn't happen 20537 20538 # Now splice the tokens and patterns of the previous line 20539 # into the else line to insure a match. Add empty fields 20540 # as necessary. 20541 my $jadd = $jquestion; 20542 20543 # Work on copies of the actual arrays in case we have 20544 # to return due to an error 20545 my @fields = @{$rfields}; 20546 my @patterns = @{$rpatterns}; 20547 my @tokens = @{$rtokens}; 20548 20549 VALIGN_DEBUG_FLAG_TERNARY && do { 20550 local $" = '><'; 20551 print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n"; 20552 print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n"; 20553 print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n"; 20554 print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n"; 20555 print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n"; 20556 print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n"; 20557 }; 20558 20559 # handle cases of leading colon on this line 20560 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) { 20561 20562 my ( $colon, $therest ) = ( $1, $2 ); 20563 20564 # Handle sub-case of first field with leading colon plus additional code 20565 # This is the usual situation as at the '1' below: 20566 # ... 20567 # : $year % 400 ? 0 20568 # : 1; 20569 if ($therest) { 20570 20571 # Split the first field after the leading colon and insert padding. 20572 # Note that this padding will remain even if the terminal value goes 20573 # out on a separate line. This does not seem to look to bad, so no 20574 # mechanism has been included to undo it. 20575 my $field1 = shift @fields; 20576 unshift @fields, ( $colon, $pad . $therest ); 20577 20578 # change the leading pattern from : to ? 20579 return unless ( $patterns[0] =~ s/^\:/?/ ); 20580 20581 # install leading tokens and patterns of existing line 20582 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); 20583 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); 20584 20585 # insert appropriate number of empty fields 20586 splice( @fields, 1, 0, ('') x $jadd ) if $jadd; 20587 } 20588 20589 # handle sub-case of first field just equal to leading colon. 20590 # This can happen for example in the example below where 20591 # the leading '(' would create a new alignment token 20592 # : ( $name =~ /[]}]$/ ) ? ( $mname = $name ) 20593 # : ( $mname = $name . '->' ); 20594 else { 20595 20596 return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen 20597 20598 # prepend a leading ? onto the second pattern 20599 $patterns[1] = "?b" . $patterns[1]; 20600 20601 # pad the second field 20602 $fields[1] = $pad . $fields[1]; 20603 20604 # install leading tokens and patterns of existing line, replacing 20605 # leading token and inserting appropriate number of empty fields 20606 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] ); 20607 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] ); 20608 splice( @fields, 1, 0, ('') x $jadd ) if $jadd; 20609 } 20610 } 20611 20612 # Handle case of no leading colon on this line. This will 20613 # be the case when -wba=':' is used. For example, 20614 # $year % 400 ? 0 : 20615 # 1; 20616 else { 20617 20618 # install leading tokens and patterns of existing line 20619 $patterns[0] = '?' . 'b' . $patterns[0]; 20620 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); 20621 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); 20622 20623 # insert appropriate number of empty fields 20624 $jadd = $jquestion + 1; 20625 $fields[0] = $pad . $fields[0]; 20626 splice( @fields, 0, 0, ('') x $jadd ) if $jadd; 20627 } 20628 20629 VALIGN_DEBUG_FLAG_TERNARY && do { 20630 local $" = '><'; 20631 print STDOUT "MODIFIED TOKENS=<@tokens>\n"; 20632 print STDOUT "MODIFIED PATTERNS=<@patterns>\n"; 20633 print STDOUT "MODIFIED FIELDS=<@fields>\n"; 20634 }; 20635 20636 # all ok .. update the arrays 20637 @{$rfields} = @fields; 20638 @{$rtokens} = @tokens; 20639 @{$rpatterns} = @patterns; 20640 20641 # force a flush after this line 20642 return $jquestion; 20643} 20644 20645sub fix_terminal_else { 20646 20647 # Add empty fields as necessary to align a balanced terminal 20648 # else block to a previous if/elsif/unless block, 20649 # like this: 20650 # 20651 # if ( 1 || $x ) { print "ok 13\n"; } 20652 # else { print "not ok 13\n"; } 20653 # 20654 # returns 1 if the else block should be indented 20655 # 20656 my ( $rfields, $rtokens, $rpatterns ) = @_; 20657 my $jmax = @{$rfields} - 1; 20658 return unless ( $jmax > 0 ); 20659 20660 # check for balanced else block following if/elsif/unless 20661 my $rfields_old = $current_line->get_rfields(); 20662 20663 # TBD: add handling for 'case' 20664 return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ ); 20665 20666 # look for the opening brace after the else, and extrace the depth 20667 my $tok_brace = $rtokens->[0]; 20668 my $depth_brace; 20669 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; } 20670 20671 # probably: "else # side_comment" 20672 else { return } 20673 20674 my $rpatterns_old = $current_line->get_rpatterns(); 20675 my $rtokens_old = $current_line->get_rtokens(); 20676 my $maximum_field_index = $current_line->get_jmax(); 20677 20678 # be sure the previous if/elsif is followed by an opening paren 20679 my $jparen = 0; 20680 my $tok_paren = '(' . $depth_brace; 20681 my $tok_test = $rtokens_old->[$jparen]; 20682 return unless ( $tok_test eq $tok_paren ); # shouldn't happen 20683 20684 # Now find the opening block brace 20685 my ($jbrace); 20686 for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) { 20687 my $tok = $rtokens_old->[$j]; 20688 if ( $tok eq $tok_brace ) { 20689 $jbrace = $j; 20690 last; 20691 } 20692 } 20693 return unless ( defined($jbrace) ); # shouldn't happen 20694 20695 # Now splice the tokens and patterns of the previous line 20696 # into the else line to insure a match. Add empty fields 20697 # as necessary. 20698 my $jadd = $jbrace - $jparen; 20699 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] ); 20700 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] ); 20701 splice( @{$rfields}, 1, 0, ('') x $jadd ); 20702 20703 # force a flush after this line if it does not follow a case 20704 return $jbrace 20705 unless ( $rfields_old->[0] =~ /^case\s*$/ ); 20706} 20707 20708{ # sub check_match 20709 my %is_good_alignment; 20710 20711 BEGIN { 20712 20713 # Vertically aligning on certain "good" tokens is usually okay 20714 # so we can be less restrictive in marginal cases. 20715 @_ = qw( { ? => = ); 20716 push @_, (','); 20717 @is_good_alignment{@_} = (1) x scalar(@_); 20718 } 20719 20720 sub check_match { 20721 20722 # See if the current line matches the current vertical alignment group. 20723 # If not, flush the current group. 20724 my $new_line = shift; 20725 my $old_line = shift; 20726 20727 # uses global variables: 20728 # $previous_minimum_jmax_seen 20729 # $maximum_jmax_seen 20730 # $maximum_line_index 20731 # $marginal_match 20732 my $jmax = $new_line->get_jmax(); 20733 my $maximum_field_index = $old_line->get_jmax(); 20734 20735 # flush if this line has too many fields 20736 if ( $jmax > $maximum_field_index ) { goto NO_MATCH } 20737 20738 # flush if adding this line would make a non-monotonic field count 20739 if ( 20740 ( $maximum_field_index > $jmax ) # this has too few fields 20741 && ( 20742 ( $previous_minimum_jmax_seen < 20743 $jmax ) # and wouldn't be monotonic 20744 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) 20745 ) 20746 ) 20747 { 20748 goto NO_MATCH; 20749 } 20750 20751 # otherwise see if this line matches the current group 20752 my $jmax_original_line = $new_line->get_jmax_original_line(); 20753 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); 20754 my $rtokens = $new_line->get_rtokens(); 20755 my $rfields = $new_line->get_rfields(); 20756 my $rpatterns = $new_line->get_rpatterns(); 20757 my $list_type = $new_line->get_list_type(); 20758 20759 my $group_list_type = $old_line->get_list_type(); 20760 my $old_rpatterns = $old_line->get_rpatterns(); 20761 my $old_rtokens = $old_line->get_rtokens(); 20762 20763 my $jlimit = $jmax - 1; 20764 if ( $maximum_field_index > $jmax ) { 20765 $jlimit = $jmax_original_line; 20766 --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); 20767 } 20768 20769 # handle comma-separated lists .. 20770 if ( $group_list_type && ( $list_type eq $group_list_type ) ) { 20771 for my $j ( 0 .. $jlimit ) { 20772 my $old_tok = $$old_rtokens[$j]; 20773 next unless $old_tok; 20774 my $new_tok = $$rtokens[$j]; 20775 next unless $new_tok; 20776 20777 # lists always match ... 20778 # unless they would align any '=>'s with ','s 20779 goto NO_MATCH 20780 if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/ 20781 || $new_tok =~ /^=>/ && $old_tok =~ /^,/ ); 20782 } 20783 } 20784 20785 # do detailed check for everything else except hanging side comments 20786 elsif ( !$is_hanging_side_comment ) { 20787 20788 my $leading_space_count = $new_line->get_leading_space_count(); 20789 20790 my $max_pad = 0; 20791 my $min_pad = 0; 20792 my $saw_good_alignment; 20793 20794 for my $j ( 0 .. $jlimit ) { 20795 20796 my $old_tok = $$old_rtokens[$j]; 20797 my $new_tok = $$rtokens[$j]; 20798 20799 # Note on encoding used for alignment tokens: 20800 # ------------------------------------------- 20801 # Tokens are "decorated" with information which can help 20802 # prevent unwanted alignments. Consider for example the 20803 # following two lines: 20804 # local ( $xn, $xd ) = split( '/', &'rnorm(@_) ); 20805 # local ( $i, $f ) = &'bdiv( $xn, $xd ); 20806 # There are three alignment tokens in each line, a comma, 20807 # an =, and a comma. In the first line these three tokens 20808 # are encoded as: 20809 # ,4+local-18 =3 ,4+split-7 20810 # and in the second line they are encoded as 20811 # ,4+local-18 =3 ,4+&'bdiv-8 20812 # Tokens always at least have token name and nesting 20813 # depth. So in this example the ='s are at depth 3 and 20814 # the ,'s are at depth 4. This prevents aligning tokens 20815 # of different depths. Commas contain additional 20816 # information, as follows: 20817 # , {depth} + {container name} - {spaces to opening paren} 20818 # This allows us to reject matching the rightmost commas 20819 # in the above two lines, since they are for different 20820 # function calls. This encoding is done in 20821 # 'sub send_lines_to_vertical_aligner'. 20822 20823 # Pick off actual token. 20824 # Everything up to the first digit is the actual token. 20825 my $alignment_token = $new_tok; 20826 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 } 20827 20828 # see if the decorated tokens match 20829 my $tokens_match = $new_tok eq $old_tok 20830 20831 # Exception for matching terminal : of ternary statement.. 20832 # consider containers prefixed by ? and : a match 20833 || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ ); 20834 20835 # No match if the alignment tokens differ... 20836 if ( !$tokens_match ) { 20837 20838 # ...Unless this is a side comment 20839 if ( 20840 $j == $jlimit 20841 20842 # and there is either at least one alignment token 20843 # or this is a single item following a list. This 20844 # latter rule is required for 'December' to join 20845 # the following list: 20846 # my (@months) = ( 20847 # '', 'January', 'February', 'March', 20848 # 'April', 'May', 'June', 'July', 20849 # 'August', 'September', 'October', 'November', 20850 # 'December' 20851 # ); 20852 # If it doesn't then the -lp formatting will fail. 20853 && ( $j > 0 || $old_tok =~ /^,/ ) 20854 ) 20855 { 20856 $marginal_match = 1 20857 if ( $marginal_match == 0 20858 && $maximum_line_index == 0 ); 20859 last; 20860 } 20861 20862 goto NO_MATCH; 20863 } 20864 20865 # Calculate amount of padding required to fit this in. 20866 # $pad is the number of spaces by which we must increase 20867 # the current field to squeeze in this field. 20868 my $pad = 20869 length( $$rfields[$j] ) - $old_line->current_field_width($j); 20870 if ( $j == 0 ) { $pad += $leading_space_count; } 20871 20872 # remember max pads to limit marginal cases 20873 if ( $alignment_token ne '#' ) { 20874 if ( $pad > $max_pad ) { $max_pad = $pad } 20875 if ( $pad < $min_pad ) { $min_pad = $pad } 20876 } 20877 if ( $is_good_alignment{$alignment_token} ) { 20878 $saw_good_alignment = 1; 20879 } 20880 20881 # If patterns don't match, we have to be careful... 20882 if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) { 20883 20884 # flag this as a marginal match since patterns differ 20885 $marginal_match = 1 20886 if ( $marginal_match == 0 && $maximum_line_index == 0 ); 20887 20888 # We have to be very careful about aligning commas 20889 # when the pattern's don't match, because it can be 20890 # worse to create an alignment where none is needed 20891 # than to omit one. Here's an example where the ','s 20892 # are not in named continers. The first line below 20893 # should not match the next two: 20894 # ( $a, $b ) = ( $b, $r ); 20895 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); 20896 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); 20897 if ( $alignment_token eq ',' ) { 20898 20899 # do not align commas unless they are in named containers 20900 goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ ); 20901 } 20902 20903 # do not align parens unless patterns match; 20904 # large ugly spaces can occur in math expressions. 20905 elsif ( $alignment_token eq '(' ) { 20906 20907 # But we can allow a match if the parens don't 20908 # require any padding. 20909 if ( $pad != 0 ) { goto NO_MATCH } 20910 } 20911 20912 # Handle an '=' alignment with different patterns to 20913 # the left. 20914 elsif ( $alignment_token eq '=' ) { 20915 20916 # It is best to be a little restrictive when 20917 # aligning '=' tokens. Here is an example of 20918 # two lines that we will not align: 20919 # my $variable=6; 20920 # $bb=4; 20921 # The problem is that one is a 'my' declaration, 20922 # and the other isn't, so they're not very similar. 20923 # We will filter these out by comparing the first 20924 # letter of the pattern. This is crude, but works 20925 # well enough. 20926 if ( 20927 substr( $$old_rpatterns[$j], 0, 1 ) ne 20928 substr( $$rpatterns[$j], 0, 1 ) ) 20929 { 20930 goto NO_MATCH; 20931 } 20932 20933 # If we pass that test, we'll call it a marginal match. 20934 # Here is an example of a marginal match: 20935 # $done{$$op} = 1; 20936 # $op = compile_bblock($op); 20937 # The left tokens are both identifiers, but 20938 # one accesses a hash and the other doesn't. 20939 # We'll let this be a tentative match and undo 20940 # it later if we don't find more than 2 lines 20941 # in the group. 20942 elsif ( $maximum_line_index == 0 ) { 20943 $marginal_match = 20944 2; # =2 prevents being undone below 20945 } 20946 } 20947 } 20948 20949 # Don't let line with fewer fields increase column widths 20950 # ( align3.t ) 20951 if ( $maximum_field_index > $jmax ) { 20952 20953 # Exception: suspend this rule to allow last lines to join 20954 if ( $pad > 0 ) { goto NO_MATCH; } 20955 } 20956 } ## end for my $j ( 0 .. $jlimit) 20957 20958 # Turn off the "marginal match" flag in some cases... 20959 # A "marginal match" occurs when the alignment tokens agree 20960 # but there are differences in the other tokens (patterns). 20961 # If we leave the marginal match flag set, then the rule is that we 20962 # will align only if there are more than two lines in the group. 20963 # We will turn of the flag if we almost have a match 20964 # and either we have seen a good alignment token or we 20965 # just need a small pad (2 spaces) to fit. These rules are 20966 # the result of experimentation. Tokens which misaligned by just 20967 # one or two characters are annoying. On the other hand, 20968 # large gaps to less important alignment tokens are also annoying. 20969 if ( $marginal_match == 1 20970 && $jmax == $maximum_field_index 20971 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) ) 20972 ) 20973 { 20974 $marginal_match = 0; 20975 } 20976 ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n"; 20977 } 20978 20979 # We have a match (even if marginal). 20980 # If the current line has fewer fields than the current group 20981 # but otherwise matches, copy the remaining group fields to 20982 # make it a perfect match. 20983 if ( $maximum_field_index > $jmax ) { 20984 my $comment = $$rfields[$jmax]; 20985 for $jmax ( $jlimit .. $maximum_field_index ) { 20986 $$rtokens[$jmax] = $$old_rtokens[$jmax]; 20987 $$rfields[ ++$jmax ] = ''; 20988 $$rpatterns[$jmax] = $$old_rpatterns[$jmax]; 20989 } 20990 $$rfields[$jmax] = $comment; 20991 $new_line->set_jmax($jmax); 20992 } 20993 return; 20994 20995 NO_MATCH: 20996 ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n"; 20997 my_flush(); 20998 return; 20999 } 21000} 21001 21002sub check_fit { 21003 21004 return unless ( $maximum_line_index >= 0 ); 21005 my $new_line = shift; 21006 my $old_line = shift; 21007 21008 my $jmax = $new_line->get_jmax(); 21009 my $leading_space_count = $new_line->get_leading_space_count(); 21010 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); 21011 my $rtokens = $new_line->get_rtokens(); 21012 my $rfields = $new_line->get_rfields(); 21013 my $rpatterns = $new_line->get_rpatterns(); 21014 21015 my $group_list_type = $group_lines[0]->get_list_type(); 21016 21017 my $padding_so_far = 0; 21018 my $padding_available = $old_line->get_available_space_on_right(); 21019 21020 # save current columns in case this doesn't work 21021 save_alignment_columns(); 21022 21023 my ( $j, $pad, $eight ); 21024 my $maximum_field_index = $old_line->get_jmax(); 21025 for $j ( 0 .. $jmax ) { 21026 21027 $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j); 21028 21029 if ( $j == 0 ) { 21030 $pad += $leading_space_count; 21031 } 21032 21033 # remember largest gap of the group, excluding gap to side comment 21034 if ( $pad < 0 21035 && $group_maximum_gap < -$pad 21036 && $j > 0 21037 && $j < $jmax - 1 ) 21038 { 21039 $group_maximum_gap = -$pad; 21040 } 21041 21042 next if $pad < 0; 21043 21044 ## This patch helps sometimes, but it doesn't check to see if 21045 ## the line is too long even without the side comment. It needs 21046 ## to be reworked. 21047 ##don't let a long token with no trailing side comment push 21048 ##side comments out, or end a group. (sidecmt1.t) 21049 ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0); 21050 21051 # This line will need space; lets see if we want to accept it.. 21052 if ( 21053 21054 # not if this won't fit 21055 ( $pad > $padding_available ) 21056 21057 # previously, there were upper bounds placed on padding here 21058 # (maximum_whitespace_columns), but they were not really helpful 21059 21060 ) 21061 { 21062 21063 # revert to starting state then flush; things didn't work out 21064 restore_alignment_columns(); 21065 my_flush(); 21066 last; 21067 } 21068 21069 # patch to avoid excessive gaps in previous lines, 21070 # due to a line of fewer fields. 21071 # return join( ".", 21072 # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"}, 21073 # $self->{"area"}, $self->{"id"}, $self->{"sel"} ); 21074 next if ( $jmax < $maximum_field_index && $j == $jmax - 1 ); 21075 21076 # looks ok, squeeze this field in 21077 $old_line->increase_field_width( $j, $pad ); 21078 $padding_available -= $pad; 21079 21080 # remember largest gap of the group, excluding gap to side comment 21081 if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) { 21082 $group_maximum_gap = $pad; 21083 } 21084 } 21085} 21086 21087sub add_to_group { 21088 21089 # The current line either starts a new alignment group or is 21090 # accepted into the current alignment group. 21091 my $new_line = shift; 21092 $group_lines[ ++$maximum_line_index ] = $new_line; 21093 21094 # initialize field lengths if starting new group 21095 if ( $maximum_line_index == 0 ) { 21096 21097 my $jmax = $new_line->get_jmax(); 21098 my $rfields = $new_line->get_rfields(); 21099 my $rtokens = $new_line->get_rtokens(); 21100 my $j; 21101 my $col = $new_line->get_leading_space_count(); 21102 21103 for $j ( 0 .. $jmax ) { 21104 $col += length( $$rfields[$j] ); 21105 21106 # create initial alignments for the new group 21107 my $token = ""; 21108 if ( $j < $jmax ) { $token = $$rtokens[$j] } 21109 my $alignment = make_alignment( $col, $token ); 21110 $new_line->set_alignment( $j, $alignment ); 21111 } 21112 21113 $maximum_jmax_seen = $jmax; 21114 $minimum_jmax_seen = $jmax; 21115 } 21116 21117 # use previous alignments otherwise 21118 else { 21119 my @new_alignments = 21120 $group_lines[ $maximum_line_index - 1 ]->get_alignments(); 21121 $new_line->set_alignments(@new_alignments); 21122 } 21123 21124 # remember group jmax extremes for next call to valign_input 21125 $previous_minimum_jmax_seen = $minimum_jmax_seen; 21126 $previous_maximum_jmax_seen = $maximum_jmax_seen; 21127} 21128 21129sub dump_array { 21130 21131 # debug routine to dump array contents 21132 local $" = ')('; 21133 print STDOUT "(@_)\n"; 21134} 21135 21136# flush() sends the current Perl::Tidy::VerticalAligner group down the 21137# pipeline to Perl::Tidy::FileWriter. 21138 21139# This is the external flush, which also empties the buffer and cache 21140sub flush { 21141 21142 # the buffer must be emptied first, then any cached text 21143 dump_valign_buffer(); 21144 21145 if ( $maximum_line_index < 0 ) { 21146 if ($cached_line_type) { 21147 $seqno_string = $cached_seqno_string; 21148 valign_output_step_C( $cached_line_text, 21149 $cached_line_leading_space_count, 21150 $last_level_written ); 21151 $cached_line_type = 0; 21152 $cached_line_text = ""; 21153 $cached_seqno_string = ""; 21154 } 21155 } 21156 else { 21157 my_flush(); 21158 } 21159} 21160 21161sub reduce_valign_buffer_indentation { 21162 21163 my ($diff) = @_; 21164 if ( $valign_buffer_filling && $diff ) { 21165 my $max_valign_buffer = @valign_buffer; 21166 for ( my $i = 0 ; $i < $max_valign_buffer ; $i++ ) { 21167 my ( $line, $leading_space_count, $level ) = 21168 @{ $valign_buffer[$i] }; 21169 my $ws = substr( $line, 0, $diff ); 21170 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { 21171 $line = substr( $line, $diff ); 21172 } 21173 if ( $leading_space_count >= $diff ) { 21174 $leading_space_count -= $diff; 21175 $level = level_change( $leading_space_count, $diff, $level ); 21176 } 21177 $valign_buffer[$i] = [ $line, $leading_space_count, $level ]; 21178 } 21179 } 21180} 21181 21182sub level_change { 21183 21184 # compute decrease in level when we remove $diff spaces from the 21185 # leading spaces 21186 my ( $leading_space_count, $diff, $level ) = @_; 21187 if ($rOpts_indent_columns) { 21188 my $olev = 21189 int( ( $leading_space_count + $diff ) / $rOpts_indent_columns ); 21190 my $nlev = int( $leading_space_count / $rOpts_indent_columns ); 21191 $level -= ( $olev - $nlev ); 21192 if ( $level < 0 ) { $level = 0 } 21193 } 21194 return $level; 21195} 21196 21197sub dump_valign_buffer { 21198 if (@valign_buffer) { 21199 foreach (@valign_buffer) { 21200 valign_output_step_D( @{$_} ); 21201 } 21202 @valign_buffer = (); 21203 } 21204 $valign_buffer_filling = ""; 21205} 21206 21207# This is the internal flush, which leaves the cache intact 21208sub my_flush { 21209 21210 return if ( $maximum_line_index < 0 ); 21211 21212 # handle a group of comment lines 21213 if ( $group_type eq 'COMMENT' ) { 21214 21215 VALIGN_DEBUG_FLAG_APPEND0 && do { 21216 my ( $a, $b, $c ) = caller(); 21217 print STDOUT 21218"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n"; 21219 21220 }; 21221 my $leading_space_count = $comment_leading_space_count; 21222 my $leading_string = get_leading_string($leading_space_count); 21223 21224 # zero leading space count if any lines are too long 21225 my $max_excess = 0; 21226 for my $i ( 0 .. $maximum_line_index ) { 21227 my $str = $group_lines[$i]; 21228 my $excess = 21229 length($str) + 21230 $leading_space_count - 21231 maximum_line_length_for_level($group_level); 21232 if ( $excess > $max_excess ) { 21233 $max_excess = $excess; 21234 } 21235 } 21236 21237 if ( $max_excess > 0 ) { 21238 $leading_space_count -= $max_excess; 21239 if ( $leading_space_count < 0 ) { $leading_space_count = 0 } 21240 $last_outdented_line_at = 21241 $file_writer_object->get_output_line_number(); 21242 unless ($outdented_line_count) { 21243 $first_outdented_line_at = $last_outdented_line_at; 21244 } 21245 $outdented_line_count += ( $maximum_line_index + 1 ); 21246 } 21247 21248 # write the group of lines 21249 my $outdent_long_lines = 0; 21250 for my $i ( 0 .. $maximum_line_index ) { 21251 valign_output_step_B( $leading_space_count, $group_lines[$i], 0, 21252 $outdent_long_lines, "", $group_level ); 21253 } 21254 } 21255 21256 # handle a group of code lines 21257 else { 21258 21259 VALIGN_DEBUG_FLAG_APPEND0 && do { 21260 my $group_list_type = $group_lines[0]->get_list_type(); 21261 my ( $a, $b, $c ) = caller(); 21262 my $maximum_field_index = $group_lines[0]->get_jmax(); 21263 print STDOUT 21264"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n"; 21265 21266 }; 21267 21268 # some small groups are best left unaligned 21269 my $do_not_align = decide_if_aligned(); 21270 21271 # optimize side comment location 21272 $do_not_align = adjust_side_comment($do_not_align); 21273 21274 # recover spaces for -lp option if possible 21275 my $extra_leading_spaces = get_extra_leading_spaces(); 21276 21277 # all lines of this group have the same basic leading spacing 21278 my $group_leader_length = $group_lines[0]->get_leading_space_count(); 21279 21280 # add extra leading spaces if helpful 21281 my $min_ci_gap = improve_continuation_indentation( $do_not_align, 21282 $group_leader_length ); 21283 21284 # loop to output all lines 21285 for my $i ( 0 .. $maximum_line_index ) { 21286 my $line = $group_lines[$i]; 21287 valign_output_step_A( $line, $min_ci_gap, $do_not_align, 21288 $group_leader_length, $extra_leading_spaces ); 21289 } 21290 } 21291 initialize_for_new_group(); 21292} 21293 21294sub decide_if_aligned { 21295 21296 # Do not try to align two lines which are not really similar 21297 return unless $maximum_line_index == 1; 21298 return if ($is_matching_terminal_line); 21299 21300 my $group_list_type = $group_lines[0]->get_list_type(); 21301 21302 my $do_not_align = ( 21303 21304 # always align lists 21305 !$group_list_type 21306 21307 && ( 21308 21309 # don't align if it was just a marginal match 21310 $marginal_match 21311 21312 # don't align two lines with big gap 21313 || $group_maximum_gap > 12 21314 21315 # or lines with differing number of alignment tokens 21316 # TODO: this could be improved. It occasionally rejects 21317 # good matches. 21318 || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen 21319 ) 21320 ); 21321 21322 # But try to convert them into a simple comment group if the first line 21323 # a has side comment 21324 my $rfields = $group_lines[0]->get_rfields(); 21325 my $maximum_field_index = $group_lines[0]->get_jmax(); 21326 if ( $do_not_align 21327 && ( $maximum_line_index > 0 ) 21328 && ( length( $$rfields[$maximum_field_index] ) > 0 ) ) 21329 { 21330 combine_fields(); 21331 $do_not_align = 0; 21332 } 21333 return $do_not_align; 21334} 21335 21336sub adjust_side_comment { 21337 21338 my $do_not_align = shift; 21339 21340 # let's see if we can move the side comment field out a little 21341 # to improve readability (the last field is always a side comment field) 21342 my $have_side_comment = 0; 21343 my $first_side_comment_line = -1; 21344 my $maximum_field_index = $group_lines[0]->get_jmax(); 21345 for my $i ( 0 .. $maximum_line_index ) { 21346 my $line = $group_lines[$i]; 21347 21348 if ( length( $line->get_rfields()->[$maximum_field_index] ) ) { 21349 $have_side_comment = 1; 21350 $first_side_comment_line = $i; 21351 last; 21352 } 21353 } 21354 21355 my $kmax = $maximum_field_index + 1; 21356 21357 if ($have_side_comment) { 21358 21359 my $line = $group_lines[0]; 21360 21361 # the maximum space without exceeding the line length: 21362 my $avail = $line->get_available_space_on_right(); 21363 21364 # try to use the previous comment column 21365 my $side_comment_column = $line->get_column( $kmax - 2 ); 21366 my $move = $last_comment_column - $side_comment_column; 21367 21368## my $sc_line0 = $side_comment_history[0]->[0]; 21369## my $sc_col0 = $side_comment_history[0]->[1]; 21370## my $sc_line1 = $side_comment_history[1]->[0]; 21371## my $sc_col1 = $side_comment_history[1]->[1]; 21372## my $sc_line2 = $side_comment_history[2]->[0]; 21373## my $sc_col2 = $side_comment_history[2]->[1]; 21374## 21375## # FUTURE UPDATES: 21376## # Be sure to ignore 'do not align' and '} # end comments' 21377## # Find first $move > 0 and $move <= $avail as follows: 21378## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12 21379## # 2. try sc_col2 if (line-sc_line2) < 12 21380## # 3. try min possible space, plus up to 8, 21381## # 4. try min possible space 21382 21383 if ( $kmax > 0 && !$do_not_align ) { 21384 21385 # but if this doesn't work, give up and use the minimum space 21386 if ( $move > $avail ) { 21387 $move = $rOpts_minimum_space_to_comment - 1; 21388 } 21389 21390 # but we want some minimum space to the comment 21391 my $min_move = $rOpts_minimum_space_to_comment - 1; 21392 if ( $move >= 0 21393 && $last_side_comment_length > 0 21394 && ( $first_side_comment_line == 0 ) 21395 && $group_level == $last_level_written ) 21396 { 21397 $min_move = 0; 21398 } 21399 21400 if ( $move < $min_move ) { 21401 $move = $min_move; 21402 } 21403 21404 # prevously, an upper bound was placed on $move here, 21405 # (maximum_space_to_comment), but it was not helpful 21406 21407 # don't exceed the available space 21408 if ( $move > $avail ) { $move = $avail } 21409 21410 # we can only increase space, never decrease 21411 if ( $move > 0 ) { 21412 $line->increase_field_width( $maximum_field_index - 1, $move ); 21413 } 21414 21415 # remember this column for the next group 21416 $last_comment_column = $line->get_column( $kmax - 2 ); 21417 } 21418 else { 21419 21420 # try to at least line up the existing side comment location 21421 if ( $kmax > 0 && $move > 0 && $move < $avail ) { 21422 $line->increase_field_width( $maximum_field_index - 1, $move ); 21423 $do_not_align = 0; 21424 } 21425 21426 # reset side comment column if we can't align 21427 else { 21428 forget_side_comment(); 21429 } 21430 } 21431 } 21432 return $do_not_align; 21433} 21434 21435sub improve_continuation_indentation { 21436 my ( $do_not_align, $group_leader_length ) = @_; 21437 21438 # See if we can increase the continuation indentation 21439 # to move all continuation lines closer to the next field 21440 # (unless it is a comment). 21441 # 21442 # '$min_ci_gap'is the extra indentation that we may need to introduce. 21443 # We will only introduce this to fields which already have some ci. 21444 # Without this variable, we would occasionally get something like this 21445 # (Complex.pm): 21446 # 21447 # use overload '+' => \&plus, 21448 # '-' => \&minus, 21449 # '*' => \&multiply, 21450 # ... 21451 # 'tan' => \&tan, 21452 # 'atan2' => \&atan2, 21453 # 21454 # Whereas with this variable, we can shift variables over to get this: 21455 # 21456 # use overload '+' => \&plus, 21457 # '-' => \&minus, 21458 # '*' => \&multiply, 21459 # ... 21460 # 'tan' => \&tan, 21461 # 'atan2' => \&atan2, 21462 21463 ## Deactivated#################### 21464 # The trouble with this patch is that it may, for example, 21465 # move in some 'or's or ':'s, and leave some out, so that the 21466 # left edge alignment suffers. 21467 return 0; 21468 ########################################### 21469 21470 my $maximum_field_index = $group_lines[0]->get_jmax(); 21471 21472 my $min_ci_gap = maximum_line_length_for_level($group_level); 21473 if ( $maximum_field_index > 1 && !$do_not_align ) { 21474 21475 for my $i ( 0 .. $maximum_line_index ) { 21476 my $line = $group_lines[$i]; 21477 my $leading_space_count = $line->get_leading_space_count(); 21478 my $rfields = $line->get_rfields(); 21479 21480 my $gap = 21481 $line->get_column(0) - 21482 $leading_space_count - 21483 length( $$rfields[0] ); 21484 21485 if ( $leading_space_count > $group_leader_length ) { 21486 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap } 21487 } 21488 } 21489 21490 if ( $min_ci_gap >= maximum_line_length_for_level($group_level) ) { 21491 $min_ci_gap = 0; 21492 } 21493 } 21494 else { 21495 $min_ci_gap = 0; 21496 } 21497 return $min_ci_gap; 21498} 21499 21500sub valign_output_step_A { 21501 21502 ############################################################### 21503 # This is Step A in writing vertically aligned lines. 21504 # The line is prepared according to the alignments which have 21505 # been found and shipped to the next step. 21506 ############################################################### 21507 21508 my ( $line, $min_ci_gap, $do_not_align, $group_leader_length, 21509 $extra_leading_spaces ) 21510 = @_; 21511 my $rfields = $line->get_rfields(); 21512 my $leading_space_count = $line->get_leading_space_count(); 21513 my $outdent_long_lines = $line->get_outdent_long_lines(); 21514 my $maximum_field_index = $line->get_jmax(); 21515 my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags(); 21516 21517 # add any extra spaces 21518 if ( $leading_space_count > $group_leader_length ) { 21519 $leading_space_count += $min_ci_gap; 21520 } 21521 21522 my $str = $$rfields[0]; 21523 21524 # loop to concatenate all fields of this line and needed padding 21525 my $total_pad_count = 0; 21526 my ( $j, $pad ); 21527 for $j ( 1 .. $maximum_field_index ) { 21528 21529 # skip zero-length side comments 21530 last 21531 if ( ( $j == $maximum_field_index ) 21532 && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) ) 21533 ); 21534 21535 # compute spaces of padding before this field 21536 my $col = $line->get_column( $j - 1 ); 21537 $pad = $col - ( length($str) + $leading_space_count ); 21538 21539 if ($do_not_align) { 21540 $pad = 21541 ( $j < $maximum_field_index ) 21542 ? 0 21543 : $rOpts_minimum_space_to_comment - 1; 21544 } 21545 21546 # if the -fpsc flag is set, move the side comment to the selected 21547 # column if and only if it is possible, ignoring constraints on 21548 # line length and minimum space to comment 21549 if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index ) 21550 { 21551 my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1; 21552 if ( $newpad >= 0 ) { $pad = $newpad; } 21553 } 21554 21555 # accumulate the padding 21556 if ( $pad > 0 ) { $total_pad_count += $pad; } 21557 21558 # add this field 21559 if ( !defined $$rfields[$j] ) { 21560 write_diagnostics("UNDEFined field at j=$j\n"); 21561 } 21562 21563 # only add padding when we have a finite field; 21564 # this avoids extra terminal spaces if we have empty fields 21565 if ( length( $$rfields[$j] ) > 0 ) { 21566 $str .= ' ' x $total_pad_count; 21567 $total_pad_count = 0; 21568 $str .= $$rfields[$j]; 21569 } 21570 else { 21571 $total_pad_count = 0; 21572 } 21573 21574 # update side comment history buffer 21575 if ( $j == $maximum_field_index ) { 21576 my $lineno = $file_writer_object->get_output_line_number(); 21577 shift @side_comment_history; 21578 push @side_comment_history, [ $lineno, $col ]; 21579 } 21580 } 21581 21582 my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) ); 21583 21584 # ship this line off 21585 valign_output_step_B( $leading_space_count + $extra_leading_spaces, 21586 $str, $side_comment_length, $outdent_long_lines, 21587 $rvertical_tightness_flags, $group_level ); 21588} 21589 21590sub get_extra_leading_spaces { 21591 21592 #---------------------------------------------------------- 21593 # Define any extra indentation space (for the -lp option). 21594 # Here is why: 21595 # If a list has side comments, sub scan_list must dump the 21596 # list before it sees everything. When this happens, it sets 21597 # the indentation to the standard scheme, but notes how 21598 # many spaces it would have liked to use. We may be able 21599 # to recover that space here in the event that that all of the 21600 # lines of a list are back together again. 21601 #---------------------------------------------------------- 21602 21603 my $extra_leading_spaces = 0; 21604 if ($extra_indent_ok) { 21605 my $object = $group_lines[0]->get_indentation(); 21606 if ( ref($object) ) { 21607 my $extra_indentation_spaces_wanted = 21608 get_RECOVERABLE_SPACES($object); 21609 21610 # all indentation objects must be the same 21611 my $i; 21612 for $i ( 1 .. $maximum_line_index ) { 21613 if ( $object != $group_lines[$i]->get_indentation() ) { 21614 $extra_indentation_spaces_wanted = 0; 21615 last; 21616 } 21617 } 21618 21619 if ($extra_indentation_spaces_wanted) { 21620 21621 # the maximum space without exceeding the line length: 21622 my $avail = $group_lines[0]->get_available_space_on_right(); 21623 $extra_leading_spaces = 21624 ( $avail > $extra_indentation_spaces_wanted ) 21625 ? $extra_indentation_spaces_wanted 21626 : $avail; 21627 21628 # update the indentation object because with -icp the terminal 21629 # ');' will use the same adjustment. 21630 $object->permanently_decrease_AVAILABLE_SPACES( 21631 -$extra_leading_spaces ); 21632 } 21633 } 21634 } 21635 return $extra_leading_spaces; 21636} 21637 21638sub combine_fields { 21639 21640 # combine all fields except for the comment field ( sidecmt.t ) 21641 # Uses global variables: 21642 # @group_lines 21643 # $maximum_line_index 21644 my ( $j, $k ); 21645 my $maximum_field_index = $group_lines[0]->get_jmax(); 21646 for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) { 21647 my $line = $group_lines[$j]; 21648 my $rfields = $line->get_rfields(); 21649 foreach ( 1 .. $maximum_field_index - 1 ) { 21650 $$rfields[0] .= $$rfields[$_]; 21651 } 21652 $$rfields[1] = $$rfields[$maximum_field_index]; 21653 21654 $line->set_jmax(1); 21655 $line->set_column( 0, 0 ); 21656 $line->set_column( 1, 0 ); 21657 21658 } 21659 $maximum_field_index = 1; 21660 21661 for $j ( 0 .. $maximum_line_index ) { 21662 my $line = $group_lines[$j]; 21663 my $rfields = $line->get_rfields(); 21664 for $k ( 0 .. $maximum_field_index ) { 21665 my $pad = length( $$rfields[$k] ) - $line->current_field_width($k); 21666 if ( $k == 0 ) { 21667 $pad += $group_lines[$j]->get_leading_space_count(); 21668 } 21669 21670 if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) } 21671 21672 } 21673 } 21674} 21675 21676sub get_output_line_number { 21677 21678 # the output line number reported to a caller is the number of items 21679 # written plus the number of items in the buffer 21680 my $self = shift; 21681 1 + $maximum_line_index + $file_writer_object->get_output_line_number(); 21682} 21683 21684sub valign_output_step_B { 21685 21686 ############################################################### 21687 # This is Step B in writing vertically aligned lines. 21688 # Vertical tightness is applied according to preset flags. 21689 # In particular this routine handles stacking of opening 21690 # and closing tokens. 21691 ############################################################### 21692 21693 my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines, 21694 $rvertical_tightness_flags, $level ) 21695 = @_; 21696 21697 # handle outdenting of long lines: 21698 if ($outdent_long_lines) { 21699 my $excess = 21700 length($str) - 21701 $side_comment_length + 21702 $leading_space_count - 21703 maximum_line_length_for_level($level); 21704 if ( $excess > 0 ) { 21705 $leading_space_count = 0; 21706 $last_outdented_line_at = 21707 $file_writer_object->get_output_line_number(); 21708 21709 unless ($outdented_line_count) { 21710 $first_outdented_line_at = $last_outdented_line_at; 21711 } 21712 $outdented_line_count++; 21713 } 21714 } 21715 21716 # Make preliminary leading whitespace. It could get changed 21717 # later by entabbing, so we have to keep track of any changes 21718 # to the leading_space_count from here on. 21719 my $leading_string = 21720 $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : ""; 21721 21722 # Unpack any recombination data; it was packed by 21723 # sub send_lines_to_vertical_aligner. Contents: 21724 # 21725 # [0] type: 1=opening non-block 2=closing non-block 21726 # 3=opening block brace 4=closing block brace 21727 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok 21728 # if closing: spaces of padding to use 21729 # [2] sequence number of container 21730 # [3] valid flag: do not append if this flag is false 21731 # 21732 my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, 21733 $seqno_end ); 21734 if ($rvertical_tightness_flags) { 21735 ( 21736 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, 21737 $seqno_end 21738 ) = @{$rvertical_tightness_flags}; 21739 } 21740 21741 $seqno_string = $seqno_end; 21742 21743 # handle any cached line .. 21744 # either append this line to it or write it out 21745 if ( length($cached_line_text) ) { 21746 21747 # Dump an invalid cached line 21748 if ( !$cached_line_valid ) { 21749 valign_output_step_C( $cached_line_text, 21750 $cached_line_leading_space_count, 21751 $last_level_written ); 21752 } 21753 21754 # Handle cached line ending in OPENING tokens 21755 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) { 21756 21757 my $gap = $leading_space_count - length($cached_line_text); 21758 21759 # handle option of just one tight opening per line: 21760 if ( $cached_line_flag == 1 ) { 21761 if ( defined($open_or_close) && $open_or_close == 1 ) { 21762 $gap = -1; 21763 } 21764 } 21765 21766 if ( $gap >= 0 && defined($seqno_beg) ) { 21767 $leading_string = $cached_line_text . ' ' x $gap; 21768 $leading_space_count = $cached_line_leading_space_count; 21769 $seqno_string = $cached_seqno_string . ':' . $seqno_beg; 21770 $level = $last_level_written; 21771 } 21772 else { 21773 valign_output_step_C( $cached_line_text, 21774 $cached_line_leading_space_count, 21775 $last_level_written ); 21776 } 21777 } 21778 21779 # Handle cached line ending in CLOSING tokens 21780 else { 21781 my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str; 21782 if ( 21783 21784 # The new line must start with container 21785 $seqno_beg 21786 21787 # The container combination must be okay.. 21788 && ( 21789 21790 # okay to combine like types 21791 ( $open_or_close == $cached_line_type ) 21792 21793 # closing block brace may append to non-block 21794 || ( $cached_line_type == 2 && $open_or_close == 4 ) 21795 21796 # something like ');' 21797 || ( !$open_or_close && $cached_line_type == 2 ) 21798 21799 ) 21800 21801 # The combined line must fit 21802 && ( 21803 length($test_line) <= 21804 maximum_line_length_for_level($last_level_written) ) 21805 ) 21806 { 21807 21808 $seqno_string = $cached_seqno_string . ':' . $seqno_beg; 21809 21810 # Patch to outdent closing tokens ending # in ');' 21811 # If we are joining a line like ');' to a previous stacked 21812 # set of closing tokens, then decide if we may outdent the 21813 # combined stack to the indentation of the ');'. Since we 21814 # should not normally outdent any of the other tokens more than 21815 # the indentation of the lines that contained them, we will 21816 # only do this if all of the corresponding opening 21817 # tokens were on the same line. This can happen with 21818 # -sot and -sct. For example, it is ok here: 21819 # __PACKAGE__->load_components( qw( 21820 # PK::Auto 21821 # Core 21822 # )); 21823 # 21824 # But, for example, we do not outdent in this example because 21825 # that would put the closing sub brace out farther than the 21826 # opening sub brace: 21827 # 21828 # perltidy -sot -sct 21829 # $c->Tk::bind( 21830 # '<Control-f>' => sub { 21831 # my ($c) = @_; 21832 # my $e = $c->XEvent; 21833 # itemsUnderArea $c; 21834 # } ); 21835 # 21836 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) { 21837 21838 # The way to tell this is if the stacked sequence numbers 21839 # of this output line are the reverse of the stacked 21840 # sequence numbers of the previous non-blank line of 21841 # sequence numbers. So we can join if the previous 21842 # nonblank string of tokens is the mirror image. For 21843 # example if stack )}] is 13:8:6 then we are looking for a 21844 # leading stack like [{( which is 6:8:13 We only need to 21845 # check the two ends, because the intermediate tokens must 21846 # fall in order. Note on speed: having to split on colons 21847 # and eliminate multiple colons might appear to be slow, 21848 # but it's not an issue because we almost never come 21849 # through here. In a typical file we don't. 21850 $seqno_string =~ s/^:+//; 21851 $last_nonblank_seqno_string =~ s/^:+//; 21852 $seqno_string =~ s/:+/:/g; 21853 $last_nonblank_seqno_string =~ s/:+/:/g; 21854 21855 # how many spaces can we outdent? 21856 my $diff = 21857 $cached_line_leading_space_count - $leading_space_count; 21858 if ( $diff > 0 21859 && length($seqno_string) 21860 && length($last_nonblank_seqno_string) == 21861 length($seqno_string) ) 21862 { 21863 my @seqno_last = 21864 ( split ':', $last_nonblank_seqno_string ); 21865 my @seqno_now = ( split ':', $seqno_string ); 21866 if ( $seqno_now[-1] == $seqno_last[0] 21867 && $seqno_now[0] == $seqno_last[-1] ) 21868 { 21869 21870 # OK to outdent .. 21871 # for absolute safety, be sure we only remove 21872 # whitespace 21873 my $ws = substr( $test_line, 0, $diff ); 21874 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { 21875 21876 $test_line = substr( $test_line, $diff ); 21877 $cached_line_leading_space_count -= $diff; 21878 $last_level_written = 21879 level_change( 21880 $cached_line_leading_space_count, 21881 $diff, $last_level_written ); 21882 reduce_valign_buffer_indentation($diff); 21883 } 21884 21885 # shouldn't happen, but not critical: 21886 ##else { 21887 ## ERROR transferring indentation here 21888 ##} 21889 } 21890 } 21891 } 21892 21893 $str = $test_line; 21894 $leading_string = ""; 21895 $leading_space_count = $cached_line_leading_space_count; 21896 $level = $last_level_written; 21897 } 21898 else { 21899 valign_output_step_C( $cached_line_text, 21900 $cached_line_leading_space_count, 21901 $last_level_written ); 21902 } 21903 } 21904 } 21905 $cached_line_type = 0; 21906 $cached_line_text = ""; 21907 21908 # make the line to be written 21909 my $line = $leading_string . $str; 21910 21911 # write or cache this line 21912 if ( !$open_or_close || $side_comment_length > 0 ) { 21913 valign_output_step_C( $line, $leading_space_count, $level ); 21914 } 21915 else { 21916 $cached_line_text = $line; 21917 $cached_line_type = $open_or_close; 21918 $cached_line_flag = $tightness_flag; 21919 $cached_seqno = $seqno; 21920 $cached_line_valid = $valid; 21921 $cached_line_leading_space_count = $leading_space_count; 21922 $cached_seqno_string = $seqno_string; 21923 } 21924 21925 $last_level_written = $level; 21926 $last_side_comment_length = $side_comment_length; 21927 $extra_indent_ok = 0; 21928} 21929 21930sub valign_output_step_C { 21931 21932 ############################################################### 21933 # This is Step C in writing vertically aligned lines. 21934 # Lines are either stored in a buffer or passed along to the next step. 21935 # The reason for storing lines is that we may later want to reduce their 21936 # indentation when -sot and -sct are both used. 21937 ############################################################### 21938 my @args = @_; 21939 21940 # Dump any saved lines if we see a line with an unbalanced opening or 21941 # closing token. 21942 dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling ); 21943 21944 # Either store or write this line 21945 if ($valign_buffer_filling) { 21946 push @valign_buffer, [@args]; 21947 } 21948 else { 21949 valign_output_step_D(@args); 21950 } 21951 21952 # For lines starting or ending with opening or closing tokens.. 21953 if ($seqno_string) { 21954 $last_nonblank_seqno_string = $seqno_string; 21955 21956 # Start storing lines when we see a line with multiple stacked opening 21957 # tokens. 21958 if ( $args[0] =~ /[\{\(\[]\s*[\{\(\[]$/ ) { 21959 $valign_buffer_filling = $seqno_string; 21960 } 21961 } 21962} 21963 21964sub valign_output_step_D { 21965 21966 ############################################################### 21967 # This is Step D in writing vertically aligned lines. 21968 # Write one vertically aligned line of code to the output object. 21969 ############################################################### 21970 21971 my ( $line, $leading_space_count, $level ) = @_; 21972 21973 # The line is currently correct if there is no tabbing (recommended!) 21974 # We may have to lop off some leading spaces and replace with tabs. 21975 if ( $leading_space_count > 0 ) { 21976 21977 # Nothing to do if no tabs 21978 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) 21979 || $rOpts_indent_columns <= 0 ) 21980 { 21981 21982 # nothing to do 21983 } 21984 21985 # Handle entab option 21986 elsif ($rOpts_entab_leading_whitespace) { 21987 my $space_count = 21988 $leading_space_count % $rOpts_entab_leading_whitespace; 21989 my $tab_count = 21990 int( $leading_space_count / $rOpts_entab_leading_whitespace ); 21991 my $leading_string = "\t" x $tab_count . ' ' x $space_count; 21992 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { 21993 substr( $line, 0, $leading_space_count ) = $leading_string; 21994 } 21995 else { 21996 21997 # shouldn't happen - program error counting whitespace 21998 # - skip entabbing 21999 VALIGN_DEBUG_FLAG_TABS 22000 && warning( 22001"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" 22002 ); 22003 } 22004 } 22005 22006 # Handle option of one tab per level 22007 else { 22008 my $leading_string = ( "\t" x $level ); 22009 my $space_count = 22010 $leading_space_count - $level * $rOpts_indent_columns; 22011 22012 # shouldn't happen: 22013 if ( $space_count < 0 ) { 22014 22015 # But it could be an outdented comment 22016 if ( $line !~ /^\s*#/ ) { 22017 VALIGN_DEBUG_FLAG_TABS 22018 && warning( 22019"Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n" 22020 ); 22021 } 22022 $leading_string = ( ' ' x $leading_space_count ); 22023 } 22024 else { 22025 $leading_string .= ( ' ' x $space_count ); 22026 } 22027 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { 22028 substr( $line, 0, $leading_space_count ) = $leading_string; 22029 } 22030 else { 22031 22032 # shouldn't happen - program error counting whitespace 22033 # we'll skip entabbing 22034 VALIGN_DEBUG_FLAG_TABS 22035 && warning( 22036"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" 22037 ); 22038 } 22039 } 22040 } 22041 $file_writer_object->write_code_line( $line . "\n" ); 22042} 22043 22044{ # begin get_leading_string 22045 22046 my @leading_string_cache; 22047 22048 sub get_leading_string { 22049 22050 # define the leading whitespace string for this line.. 22051 my $leading_whitespace_count = shift; 22052 22053 # Handle case of zero whitespace, which includes multi-line quotes 22054 # (which may have a finite level; this prevents tab problems) 22055 if ( $leading_whitespace_count <= 0 ) { 22056 return ""; 22057 } 22058 22059 # look for previous result 22060 elsif ( $leading_string_cache[$leading_whitespace_count] ) { 22061 return $leading_string_cache[$leading_whitespace_count]; 22062 } 22063 22064 # must compute a string for this number of spaces 22065 my $leading_string; 22066 22067 # Handle simple case of no tabs 22068 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) 22069 || $rOpts_indent_columns <= 0 ) 22070 { 22071 $leading_string = ( ' ' x $leading_whitespace_count ); 22072 } 22073 22074 # Handle entab option 22075 elsif ($rOpts_entab_leading_whitespace) { 22076 my $space_count = 22077 $leading_whitespace_count % $rOpts_entab_leading_whitespace; 22078 my $tab_count = int( 22079 $leading_whitespace_count / $rOpts_entab_leading_whitespace ); 22080 $leading_string = "\t" x $tab_count . ' ' x $space_count; 22081 } 22082 22083 # Handle option of one tab per level 22084 else { 22085 $leading_string = ( "\t" x $group_level ); 22086 my $space_count = 22087 $leading_whitespace_count - $group_level * $rOpts_indent_columns; 22088 22089 # shouldn't happen: 22090 if ( $space_count < 0 ) { 22091 VALIGN_DEBUG_FLAG_TABS 22092 && warning( 22093"Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n" 22094 ); 22095 22096 # -- skip entabbing 22097 $leading_string = ( ' ' x $leading_whitespace_count ); 22098 } 22099 else { 22100 $leading_string .= ( ' ' x $space_count ); 22101 } 22102 } 22103 $leading_string_cache[$leading_whitespace_count] = $leading_string; 22104 return $leading_string; 22105 } 22106} # end get_leading_string 22107 22108sub report_anything_unusual { 22109 my $self = shift; 22110 if ( $outdented_line_count > 0 ) { 22111 write_logfile_entry( 22112 "$outdented_line_count long lines were outdented:\n"); 22113 write_logfile_entry( 22114 " First at output line $first_outdented_line_at\n"); 22115 22116 if ( $outdented_line_count > 1 ) { 22117 write_logfile_entry( 22118 " Last at output line $last_outdented_line_at\n"); 22119 } 22120 write_logfile_entry( 22121 " use -noll to prevent outdenting, -l=n to increase line length\n" 22122 ); 22123 write_logfile_entry("\n"); 22124 } 22125} 22126 22127##################################################################### 22128# 22129# the Perl::Tidy::FileWriter class writes the output file 22130# 22131##################################################################### 22132 22133package Perl::Tidy::FileWriter; 22134 22135# Maximum number of little messages; probably need not be changed. 22136use constant MAX_NAG_MESSAGES => 6; 22137 22138sub write_logfile_entry { 22139 my $self = shift; 22140 my $logger_object = $self->{_logger_object}; 22141 if ($logger_object) { 22142 $logger_object->write_logfile_entry(@_); 22143 } 22144} 22145 22146sub new { 22147 my $class = shift; 22148 my ( $line_sink_object, $rOpts, $logger_object ) = @_; 22149 22150 bless { 22151 _line_sink_object => $line_sink_object, 22152 _logger_object => $logger_object, 22153 _rOpts => $rOpts, 22154 _output_line_number => 1, 22155 _consecutive_blank_lines => 0, 22156 _consecutive_nonblank_lines => 0, 22157 _first_line_length_error => 0, 22158 _max_line_length_error => 0, 22159 _last_line_length_error => 0, 22160 _first_line_length_error_at => 0, 22161 _max_line_length_error_at => 0, 22162 _last_line_length_error_at => 0, 22163 _line_length_error_count => 0, 22164 _max_output_line_length => 0, 22165 _max_output_line_length_at => 0, 22166 }, $class; 22167} 22168 22169sub tee_on { 22170 my $self = shift; 22171 $self->{_line_sink_object}->tee_on(); 22172} 22173 22174sub tee_off { 22175 my $self = shift; 22176 $self->{_line_sink_object}->tee_off(); 22177} 22178 22179sub get_output_line_number { 22180 my $self = shift; 22181 return $self->{_output_line_number}; 22182} 22183 22184sub decrement_output_line_number { 22185 my $self = shift; 22186 $self->{_output_line_number}--; 22187} 22188 22189sub get_consecutive_nonblank_lines { 22190 my $self = shift; 22191 return $self->{_consecutive_nonblank_lines}; 22192} 22193 22194sub reset_consecutive_blank_lines { 22195 my $self = shift; 22196 $self->{_consecutive_blank_lines} = 0; 22197} 22198 22199sub want_blank_line { 22200 my $self = shift; 22201 unless ( $self->{_consecutive_blank_lines} ) { 22202 $self->write_blank_code_line(); 22203 } 22204} 22205 22206sub require_blank_code_lines { 22207 22208 # write out the requested number of blanks regardless of the value of -mbl 22209 # unless -mbl=0. This allows extra blank lines to be written for subs and 22210 # packages even with the default -mbl=1 22211 my $self = shift; 22212 my $count = shift; 22213 my $need = $count - $self->{_consecutive_blank_lines}; 22214 my $rOpts = $self->{_rOpts}; 22215 my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0; 22216 for ( my $i = 0 ; $i < $need ; $i++ ) { 22217 $self->write_blank_code_line($forced); 22218 } 22219} 22220 22221sub write_blank_code_line { 22222 my $self = shift; 22223 my $forced = shift; 22224 my $rOpts = $self->{_rOpts}; 22225 return 22226 if (!$forced 22227 && $self->{_consecutive_blank_lines} >= 22228 $rOpts->{'maximum-consecutive-blank-lines'} ); 22229 $self->{_consecutive_blank_lines}++; 22230 $self->{_consecutive_nonblank_lines} = 0; 22231 $self->write_line("\n"); 22232} 22233 22234sub write_code_line { 22235 my $self = shift; 22236 my $a = shift; 22237 22238 if ( $a =~ /^\s*$/ ) { 22239 my $rOpts = $self->{_rOpts}; 22240 return 22241 if ( $self->{_consecutive_blank_lines} >= 22242 $rOpts->{'maximum-consecutive-blank-lines'} ); 22243 $self->{_consecutive_blank_lines}++; 22244 $self->{_consecutive_nonblank_lines} = 0; 22245 } 22246 else { 22247 $self->{_consecutive_blank_lines} = 0; 22248 $self->{_consecutive_nonblank_lines}++; 22249 } 22250 $self->write_line($a); 22251} 22252 22253sub write_line { 22254 my $self = shift; 22255 my $a = shift; 22256 22257 # TODO: go through and see if the test is necessary here 22258 if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; } 22259 22260 $self->{_line_sink_object}->write_line($a); 22261 22262 # This calculation of excess line length ignores any internal tabs 22263 my $rOpts = $self->{_rOpts}; 22264 my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1; 22265 if ( $a =~ /^\t+/g ) { 22266 $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 ); 22267 } 22268 22269 # Note that we just incremented output line number to future value 22270 # so we must subtract 1 for current line number 22271 if ( length($a) > 1 + $self->{_max_output_line_length} ) { 22272 $self->{_max_output_line_length} = length($a) - 1; 22273 $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1; 22274 } 22275 22276 if ( $exceed > 0 ) { 22277 my $output_line_number = $self->{_output_line_number}; 22278 $self->{_last_line_length_error} = $exceed; 22279 $self->{_last_line_length_error_at} = $output_line_number - 1; 22280 if ( $self->{_line_length_error_count} == 0 ) { 22281 $self->{_first_line_length_error} = $exceed; 22282 $self->{_first_line_length_error_at} = $output_line_number - 1; 22283 } 22284 22285 if ( 22286 $self->{_last_line_length_error} > $self->{_max_line_length_error} ) 22287 { 22288 $self->{_max_line_length_error} = $exceed; 22289 $self->{_max_line_length_error_at} = $output_line_number - 1; 22290 } 22291 22292 if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) { 22293 $self->write_logfile_entry( 22294 "Line length exceeded by $exceed characters\n"); 22295 } 22296 $self->{_line_length_error_count}++; 22297 } 22298 22299} 22300 22301sub report_line_length_errors { 22302 my $self = shift; 22303 my $rOpts = $self->{_rOpts}; 22304 my $line_length_error_count = $self->{_line_length_error_count}; 22305 if ( $line_length_error_count == 0 ) { 22306 $self->write_logfile_entry( 22307 "No lines exceeded $rOpts->{'maximum-line-length'} characters\n"); 22308 my $max_output_line_length = $self->{_max_output_line_length}; 22309 my $max_output_line_length_at = $self->{_max_output_line_length_at}; 22310 $self->write_logfile_entry( 22311" Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n" 22312 ); 22313 22314 } 22315 else { 22316 22317 my $word = ( $line_length_error_count > 1 ) ? "s" : ""; 22318 $self->write_logfile_entry( 22319"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n" 22320 ); 22321 22322 $word = ( $line_length_error_count > 1 ) ? "First" : ""; 22323 my $first_line_length_error = $self->{_first_line_length_error}; 22324 my $first_line_length_error_at = $self->{_first_line_length_error_at}; 22325 $self->write_logfile_entry( 22326" $word at line $first_line_length_error_at by $first_line_length_error characters\n" 22327 ); 22328 22329 if ( $line_length_error_count > 1 ) { 22330 my $max_line_length_error = $self->{_max_line_length_error}; 22331 my $max_line_length_error_at = $self->{_max_line_length_error_at}; 22332 my $last_line_length_error = $self->{_last_line_length_error}; 22333 my $last_line_length_error_at = $self->{_last_line_length_error_at}; 22334 $self->write_logfile_entry( 22335" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n" 22336 ); 22337 $self->write_logfile_entry( 22338" Last at line $last_line_length_error_at by $last_line_length_error characters\n" 22339 ); 22340 } 22341 } 22342} 22343 22344##################################################################### 22345# 22346# The Perl::Tidy::Debugger class shows line tokenization 22347# 22348##################################################################### 22349 22350package Perl::Tidy::Debugger; 22351 22352sub new { 22353 22354 my ( $class, $filename ) = @_; 22355 22356 bless { 22357 _debug_file => $filename, 22358 _debug_file_opened => 0, 22359 _fh => undef, 22360 }, $class; 22361} 22362 22363sub really_open_debug_file { 22364 22365 my $self = shift; 22366 my $debug_file = $self->{_debug_file}; 22367 my $fh; 22368 unless ( $fh = IO::File->new("> $debug_file") ) { 22369 Perl::Tidy::Warn("can't open $debug_file: $!\n"); 22370 } 22371 $self->{_debug_file_opened} = 1; 22372 $self->{_fh} = $fh; 22373 print $fh 22374 "Use -dump-token-types (-dtt) to get a list of token type codes\n"; 22375} 22376 22377sub close_debug_file { 22378 22379 my $self = shift; 22380 my $fh = $self->{_fh}; 22381 if ( $self->{_debug_file_opened} ) { 22382 22383 eval { $self->{_fh}->close() }; 22384 } 22385} 22386 22387sub write_debug_entry { 22388 22389 # This is a debug dump routine which may be modified as necessary 22390 # to dump tokens on a line-by-line basis. The output will be written 22391 # to the .DEBUG file when the -D flag is entered. 22392 my $self = shift; 22393 my $line_of_tokens = shift; 22394 22395 my $input_line = $line_of_tokens->{_line_text}; 22396 my $rtoken_type = $line_of_tokens->{_rtoken_type}; 22397 my $rtokens = $line_of_tokens->{_rtokens}; 22398 my $rlevels = $line_of_tokens->{_rlevels}; 22399 my $rslevels = $line_of_tokens->{_rslevels}; 22400 my $rblock_type = $line_of_tokens->{_rblock_type}; 22401 my $input_line_number = $line_of_tokens->{_line_number}; 22402 my $line_type = $line_of_tokens->{_line_type}; 22403 22404 my ( $j, $num ); 22405 22406 my $token_str = "$input_line_number: "; 22407 my $reconstructed_original = "$input_line_number: "; 22408 my $block_str = "$input_line_number: "; 22409 22410 #$token_str .= "$line_type: "; 22411 #$reconstructed_original .= "$line_type: "; 22412 22413 my $pattern = ""; 22414 my @next_char = ( '"', '"' ); 22415 my $i_next = 0; 22416 unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() } 22417 my $fh = $self->{_fh}; 22418 22419 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { 22420 22421 # testing patterns 22422 if ( $$rtoken_type[$j] eq 'k' ) { 22423 $pattern .= $$rtokens[$j]; 22424 } 22425 else { 22426 $pattern .= $$rtoken_type[$j]; 22427 } 22428 $reconstructed_original .= $$rtokens[$j]; 22429 $block_str .= "($$rblock_type[$j])"; 22430 $num = length( $$rtokens[$j] ); 22431 my $type_str = $$rtoken_type[$j]; 22432 22433 # be sure there are no blank tokens (shouldn't happen) 22434 # This can only happen if a programming error has been made 22435 # because all valid tokens are non-blank 22436 if ( $type_str eq ' ' ) { 22437 print $fh "BLANK TOKEN on the next line\n"; 22438 $type_str = $next_char[$i_next]; 22439 $i_next = 1 - $i_next; 22440 } 22441 22442 if ( length($type_str) == 1 ) { 22443 $type_str = $type_str x $num; 22444 } 22445 $token_str .= $type_str; 22446 } 22447 22448 # Write what you want here ... 22449 # print $fh "$input_line\n"; 22450 # print $fh "$pattern\n"; 22451 print $fh "$reconstructed_original\n"; 22452 print $fh "$token_str\n"; 22453 22454 #print $fh "$block_str\n"; 22455} 22456 22457##################################################################### 22458# 22459# The Perl::Tidy::LineBuffer class supplies a 'get_line()' 22460# method for returning the next line to be parsed, as well as a 22461# 'peek_ahead()' method 22462# 22463# The input parameter is an object with a 'get_line()' method 22464# which returns the next line to be parsed 22465# 22466##################################################################### 22467 22468package Perl::Tidy::LineBuffer; 22469 22470sub new { 22471 22472 my $class = shift; 22473 my $line_source_object = shift; 22474 22475 return bless { 22476 _line_source_object => $line_source_object, 22477 _rlookahead_buffer => [], 22478 }, $class; 22479} 22480 22481sub peek_ahead { 22482 my $self = shift; 22483 my $buffer_index = shift; 22484 my $line = undef; 22485 my $line_source_object = $self->{_line_source_object}; 22486 my $rlookahead_buffer = $self->{_rlookahead_buffer}; 22487 if ( $buffer_index < scalar(@$rlookahead_buffer) ) { 22488 $line = $$rlookahead_buffer[$buffer_index]; 22489 } 22490 else { 22491 $line = $line_source_object->get_line(); 22492 push( @$rlookahead_buffer, $line ); 22493 } 22494 return $line; 22495} 22496 22497sub get_line { 22498 my $self = shift; 22499 my $line = undef; 22500 my $line_source_object = $self->{_line_source_object}; 22501 my $rlookahead_buffer = $self->{_rlookahead_buffer}; 22502 22503 if ( scalar(@$rlookahead_buffer) ) { 22504 $line = shift @$rlookahead_buffer; 22505 } 22506 else { 22507 $line = $line_source_object->get_line(); 22508 } 22509 return $line; 22510} 22511 22512######################################################################## 22513# 22514# the Perl::Tidy::Tokenizer package is essentially a filter which 22515# reads lines of perl source code from a source object and provides 22516# corresponding tokenized lines through its get_line() method. Lines 22517# flow from the source_object to the caller like this: 22518# 22519# source_object --> LineBuffer_object --> Tokenizer --> calling routine 22520# get_line() get_line() get_line() line_of_tokens 22521# 22522# The source object can be any object with a get_line() method which 22523# supplies one line (a character string) perl call. 22524# The LineBuffer object is created by the Tokenizer. 22525# The Tokenizer returns a reference to a data structure 'line_of_tokens' 22526# containing one tokenized line for each call to its get_line() method. 22527# 22528# WARNING: This is not a real class yet. Only one tokenizer my be used. 22529# 22530######################################################################## 22531 22532package Perl::Tidy::Tokenizer; 22533 22534BEGIN { 22535 22536 # Caution: these debug flags produce a lot of output 22537 # They should all be 0 except when debugging small scripts 22538 22539 use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0; 22540 use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0; 22541 use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0; 22542 use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0; 22543 use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0; 22544 22545 my $debug_warning = sub { 22546 print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n"; 22547 }; 22548 22549 TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT'); 22550 TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN'); 22551 TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE'); 22552 TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID'); 22553 TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE'); 22554 22555} 22556 22557use Carp; 22558 22559# PACKAGE VARIABLES for for processing an entire FILE. 22560use vars qw{ 22561 $tokenizer_self 22562 22563 $last_nonblank_token 22564 $last_nonblank_type 22565 $last_nonblank_block_type 22566 $statement_type 22567 $in_attribute_list 22568 $current_package 22569 $context 22570 22571 %is_constant 22572 %is_user_function 22573 %user_function_prototype 22574 %is_block_function 22575 %is_block_list_function 22576 %saw_function_definition 22577 22578 $brace_depth 22579 $paren_depth 22580 $square_bracket_depth 22581 22582 @current_depth 22583 @total_depth 22584 $total_depth 22585 @nesting_sequence_number 22586 @current_sequence_number 22587 @paren_type 22588 @paren_semicolon_count 22589 @paren_structural_type 22590 @brace_type 22591 @brace_structural_type 22592 @brace_context 22593 @brace_package 22594 @square_bracket_type 22595 @square_bracket_structural_type 22596 @depth_array 22597 @nested_ternary_flag 22598 @nested_statement_type 22599 @starting_line_of_current_depth 22600}; 22601 22602# GLOBAL CONSTANTS for routines in this package 22603use vars qw{ 22604 %is_indirect_object_taker 22605 %is_block_operator 22606 %expecting_operator_token 22607 %expecting_operator_types 22608 %expecting_term_types 22609 %expecting_term_token 22610 %is_digraph 22611 %is_file_test_operator 22612 %is_trigraph 22613 %is_valid_token_type 22614 %is_keyword 22615 %is_code_block_token 22616 %really_want_term 22617 @opening_brace_names 22618 @closing_brace_names 22619 %is_keyword_taking_list 22620 %is_q_qq_qw_qx_qr_s_y_tr_m 22621}; 22622 22623# possible values of operator_expected() 22624use constant TERM => -1; 22625use constant UNKNOWN => 0; 22626use constant OPERATOR => 1; 22627 22628# possible values of context 22629use constant SCALAR_CONTEXT => -1; 22630use constant UNKNOWN_CONTEXT => 0; 22631use constant LIST_CONTEXT => 1; 22632 22633# Maximum number of little messages; probably need not be changed. 22634use constant MAX_NAG_MESSAGES => 6; 22635 22636{ 22637 22638 # methods to count instances 22639 my $_count = 0; 22640 sub get_count { $_count; } 22641 sub _increment_count { ++$_count } 22642 sub _decrement_count { --$_count } 22643} 22644 22645sub DESTROY { 22646 $_[0]->_decrement_count(); 22647} 22648 22649sub new { 22650 22651 my $class = shift; 22652 22653 # Note: 'tabs' and 'indent_columns' are temporary and should be 22654 # removed asap 22655 my %defaults = ( 22656 source_object => undef, 22657 debugger_object => undef, 22658 diagnostics_object => undef, 22659 logger_object => undef, 22660 starting_level => undef, 22661 indent_columns => 4, 22662 tabsize => 8, 22663 look_for_hash_bang => 0, 22664 trim_qw => 1, 22665 look_for_autoloader => 1, 22666 look_for_selfloader => 1, 22667 starting_line_number => 1, 22668 ); 22669 my %args = ( %defaults, @_ ); 22670 22671 # we are given an object with a get_line() method to supply source lines 22672 my $source_object = $args{source_object}; 22673 22674 # we create another object with a get_line() and peek_ahead() method 22675 my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object); 22676 22677 # Tokenizer state data is as follows: 22678 # _rhere_target_list reference to list of here-doc targets 22679 # _here_doc_target the target string for a here document 22680 # _here_quote_character the type of here-doc quoting (" ' ` or none) 22681 # to determine if interpolation is done 22682 # _quote_target character we seek if chasing a quote 22683 # _line_start_quote line where we started looking for a long quote 22684 # _in_here_doc flag indicating if we are in a here-doc 22685 # _in_pod flag set if we are in pod documentation 22686 # _in_error flag set if we saw severe error (binary in script) 22687 # _in_data flag set if we are in __DATA__ section 22688 # _in_end flag set if we are in __END__ section 22689 # _in_format flag set if we are in a format description 22690 # _in_attribute_list flag telling if we are looking for attributes 22691 # _in_quote flag telling if we are chasing a quote 22692 # _starting_level indentation level of first line 22693 # _line_buffer_object object with get_line() method to supply source code 22694 # _diagnostics_object place to write debugging information 22695 # _unexpected_error_count error count used to limit output 22696 # _lower_case_labels_at line numbers where lower case labels seen 22697 $tokenizer_self = { 22698 _rhere_target_list => [], 22699 _in_here_doc => 0, 22700 _here_doc_target => "", 22701 _here_quote_character => "", 22702 _in_data => 0, 22703 _in_end => 0, 22704 _in_format => 0, 22705 _in_error => 0, 22706 _in_pod => 0, 22707 _in_attribute_list => 0, 22708 _in_quote => 0, 22709 _quote_target => "", 22710 _line_start_quote => -1, 22711 _starting_level => $args{starting_level}, 22712 _know_starting_level => defined( $args{starting_level} ), 22713 _tabsize => $args{tabsize}, 22714 _indent_columns => $args{indent_columns}, 22715 _look_for_hash_bang => $args{look_for_hash_bang}, 22716 _trim_qw => $args{trim_qw}, 22717 _continuation_indentation => $args{continuation_indentation}, 22718 _outdent_labels => $args{outdent_labels}, 22719 _last_line_number => $args{starting_line_number} - 1, 22720 _saw_perl_dash_P => 0, 22721 _saw_perl_dash_w => 0, 22722 _saw_use_strict => 0, 22723 _saw_v_string => 0, 22724 _look_for_autoloader => $args{look_for_autoloader}, 22725 _look_for_selfloader => $args{look_for_selfloader}, 22726 _saw_autoloader => 0, 22727 _saw_selfloader => 0, 22728 _saw_hash_bang => 0, 22729 _saw_end => 0, 22730 _saw_data => 0, 22731 _saw_negative_indentation => 0, 22732 _started_tokenizing => 0, 22733 _line_buffer_object => $line_buffer_object, 22734 _debugger_object => $args{debugger_object}, 22735 _diagnostics_object => $args{diagnostics_object}, 22736 _logger_object => $args{logger_object}, 22737 _unexpected_error_count => 0, 22738 _started_looking_for_here_target_at => 0, 22739 _nearly_matched_here_target_at => undef, 22740 _line_text => "", 22741 _rlower_case_labels_at => undef, 22742 }; 22743 22744 prepare_for_a_new_file(); 22745 find_starting_indentation_level(); 22746 22747 bless $tokenizer_self, $class; 22748 22749 # This is not a full class yet, so die if an attempt is made to 22750 # create more than one object. 22751 22752 if ( _increment_count() > 1 ) { 22753 confess 22754"Attempt to create more than 1 object in $class, which is not a true class yet\n"; 22755 } 22756 22757 return $tokenizer_self; 22758 22759} 22760 22761# interface to Perl::Tidy::Logger routines 22762sub warning { 22763 my $logger_object = $tokenizer_self->{_logger_object}; 22764 if ($logger_object) { 22765 $logger_object->warning(@_); 22766 } 22767} 22768 22769sub complain { 22770 my $logger_object = $tokenizer_self->{_logger_object}; 22771 if ($logger_object) { 22772 $logger_object->complain(@_); 22773 } 22774} 22775 22776sub write_logfile_entry { 22777 my $logger_object = $tokenizer_self->{_logger_object}; 22778 if ($logger_object) { 22779 $logger_object->write_logfile_entry(@_); 22780 } 22781} 22782 22783sub interrupt_logfile { 22784 my $logger_object = $tokenizer_self->{_logger_object}; 22785 if ($logger_object) { 22786 $logger_object->interrupt_logfile(); 22787 } 22788} 22789 22790sub resume_logfile { 22791 my $logger_object = $tokenizer_self->{_logger_object}; 22792 if ($logger_object) { 22793 $logger_object->resume_logfile(); 22794 } 22795} 22796 22797sub increment_brace_error { 22798 my $logger_object = $tokenizer_self->{_logger_object}; 22799 if ($logger_object) { 22800 $logger_object->increment_brace_error(); 22801 } 22802} 22803 22804sub report_definite_bug { 22805 my $logger_object = $tokenizer_self->{_logger_object}; 22806 if ($logger_object) { 22807 $logger_object->report_definite_bug(); 22808 } 22809} 22810 22811sub brace_warning { 22812 my $logger_object = $tokenizer_self->{_logger_object}; 22813 if ($logger_object) { 22814 $logger_object->brace_warning(@_); 22815 } 22816} 22817 22818sub get_saw_brace_error { 22819 my $logger_object = $tokenizer_self->{_logger_object}; 22820 if ($logger_object) { 22821 $logger_object->get_saw_brace_error(); 22822 } 22823 else { 22824 0; 22825 } 22826} 22827 22828# interface to Perl::Tidy::Diagnostics routines 22829sub write_diagnostics { 22830 if ( $tokenizer_self->{_diagnostics_object} ) { 22831 $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_); 22832 } 22833} 22834 22835sub report_tokenization_errors { 22836 22837 my $self = shift; 22838 22839 my $level = get_indentation_level(); 22840 if ( $level != $tokenizer_self->{_starting_level} ) { 22841 warning("final indentation level: $level\n"); 22842 } 22843 22844 check_final_nesting_depths(); 22845 22846 if ( $tokenizer_self->{_look_for_hash_bang} 22847 && !$tokenizer_self->{_saw_hash_bang} ) 22848 { 22849 warning( 22850 "hit EOF without seeing hash-bang line; maybe don't need -x?\n"); 22851 } 22852 22853 if ( $tokenizer_self->{_in_format} ) { 22854 warning("hit EOF while in format description\n"); 22855 } 22856 22857 if ( $tokenizer_self->{_in_pod} ) { 22858 22859 # Just write log entry if this is after __END__ or __DATA__ 22860 # because this happens to often, and it is not likely to be 22861 # a parsing error. 22862 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { 22863 write_logfile_entry( 22864"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" 22865 ); 22866 } 22867 22868 else { 22869 complain( 22870"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" 22871 ); 22872 } 22873 22874 } 22875 22876 if ( $tokenizer_self->{_in_here_doc} ) { 22877 my $here_doc_target = $tokenizer_self->{_here_doc_target}; 22878 my $started_looking_for_here_target_at = 22879 $tokenizer_self->{_started_looking_for_here_target_at}; 22880 if ($here_doc_target) { 22881 warning( 22882"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" 22883 ); 22884 } 22885 else { 22886 warning( 22887"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n" 22888 ); 22889 } 22890 my $nearly_matched_here_target_at = 22891 $tokenizer_self->{_nearly_matched_here_target_at}; 22892 if ($nearly_matched_here_target_at) { 22893 warning( 22894"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" 22895 ); 22896 } 22897 } 22898 22899 if ( $tokenizer_self->{_in_quote} ) { 22900 my $line_start_quote = $tokenizer_self->{_line_start_quote}; 22901 my $quote_target = $tokenizer_self->{_quote_target}; 22902 my $what = 22903 ( $tokenizer_self->{_in_attribute_list} ) 22904 ? "attribute list" 22905 : "quote/pattern"; 22906 warning( 22907"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n" 22908 ); 22909 } 22910 22911 unless ( $tokenizer_self->{_saw_perl_dash_w} ) { 22912 if ( $] < 5.006 ) { 22913 write_logfile_entry("Suggest including '-w parameter'\n"); 22914 } 22915 else { 22916 write_logfile_entry("Suggest including 'use warnings;'\n"); 22917 } 22918 } 22919 22920 if ( $tokenizer_self->{_saw_perl_dash_P} ) { 22921 write_logfile_entry("Use of -P parameter for defines is discouraged\n"); 22922 } 22923 22924 unless ( $tokenizer_self->{_saw_use_strict} ) { 22925 write_logfile_entry("Suggest including 'use strict;'\n"); 22926 } 22927 22928 # it is suggested that lables have at least one upper case character 22929 # for legibility and to avoid code breakage as new keywords are introduced 22930 if ( $tokenizer_self->{_rlower_case_labels_at} ) { 22931 my @lower_case_labels_at = 22932 @{ $tokenizer_self->{_rlower_case_labels_at} }; 22933 write_logfile_entry( 22934 "Suggest using upper case characters in label(s)\n"); 22935 local $" = ')('; 22936 write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n"); 22937 } 22938} 22939 22940sub report_v_string { 22941 22942 # warn if this version can't handle v-strings 22943 my $tok = shift; 22944 unless ( $tokenizer_self->{_saw_v_string} ) { 22945 $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number}; 22946 } 22947 if ( $] < 5.006 ) { 22948 warning( 22949"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" 22950 ); 22951 } 22952} 22953 22954sub get_input_line_number { 22955 return $tokenizer_self->{_last_line_number}; 22956} 22957 22958# returns the next tokenized line 22959sub get_line { 22960 22961 my $self = shift; 22962 22963 # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth, 22964 # $square_bracket_depth, $paren_depth 22965 22966 my $input_line = $tokenizer_self->{_line_buffer_object}->get_line(); 22967 $tokenizer_self->{_line_text} = $input_line; 22968 22969 return undef unless ($input_line); 22970 22971 my $input_line_number = ++$tokenizer_self->{_last_line_number}; 22972 22973 # Find and remove what characters terminate this line, including any 22974 # control r 22975 my $input_line_separator = ""; 22976 if ( chomp($input_line) ) { $input_line_separator = $/ } 22977 22978 # TODO: what other characters should be included here? 22979 if ( $input_line =~ s/((\r|\035|\032)+)$// ) { 22980 $input_line_separator = $2 . $input_line_separator; 22981 } 22982 22983 # for backwards compatibility we keep the line text terminated with 22984 # a newline character 22985 $input_line .= "\n"; 22986 $tokenizer_self->{_line_text} = $input_line; # update 22987 22988 # create a data structure describing this line which will be 22989 # returned to the caller. 22990 22991 # _line_type codes are: 22992 # SYSTEM - system-specific code before hash-bang line 22993 # CODE - line of perl code (including comments) 22994 # POD_START - line starting pod, such as '=head' 22995 # POD - pod documentation text 22996 # POD_END - last line of pod section, '=cut' 22997 # HERE - text of here-document 22998 # HERE_END - last line of here-doc (target word) 22999 # FORMAT - format section 23000 # FORMAT_END - last line of format section, '.' 23001 # DATA_START - __DATA__ line 23002 # DATA - unidentified text following __DATA__ 23003 # END_START - __END__ line 23004 # END - unidentified text following __END__ 23005 # ERROR - we are in big trouble, probably not a perl script 23006 23007 # Other variables: 23008 # _curly_brace_depth - depth of curly braces at start of line 23009 # _square_bracket_depth - depth of square brackets at start of line 23010 # _paren_depth - depth of parens at start of line 23011 # _starting_in_quote - this line continues a multi-line quote 23012 # (so don't trim leading blanks!) 23013 # _ending_in_quote - this line ends in a multi-line quote 23014 # (so don't trim trailing blanks!) 23015 my $line_of_tokens = { 23016 _line_type => 'EOF', 23017 _line_text => $input_line, 23018 _line_number => $input_line_number, 23019 _rtoken_type => undef, 23020 _rtokens => undef, 23021 _rlevels => undef, 23022 _rslevels => undef, 23023 _rblock_type => undef, 23024 _rcontainer_type => undef, 23025 _rcontainer_environment => undef, 23026 _rtype_sequence => undef, 23027 _rnesting_tokens => undef, 23028 _rci_levels => undef, 23029 _rnesting_blocks => undef, 23030 _guessed_indentation_level => 0, 23031 _starting_in_quote => 0, # to be set by subroutine 23032 _ending_in_quote => 0, 23033 _curly_brace_depth => $brace_depth, 23034 _square_bracket_depth => $square_bracket_depth, 23035 _paren_depth => $paren_depth, 23036 _quote_character => '', 23037 }; 23038 23039 # must print line unchanged if we are in a here document 23040 if ( $tokenizer_self->{_in_here_doc} ) { 23041 23042 $line_of_tokens->{_line_type} = 'HERE'; 23043 my $here_doc_target = $tokenizer_self->{_here_doc_target}; 23044 my $here_quote_character = $tokenizer_self->{_here_quote_character}; 23045 my $candidate_target = $input_line; 23046 chomp $candidate_target; 23047 if ( $candidate_target eq $here_doc_target ) { 23048 $tokenizer_self->{_nearly_matched_here_target_at} = undef; 23049 $line_of_tokens->{_line_type} = 'HERE_END'; 23050 write_logfile_entry("Exiting HERE document $here_doc_target\n"); 23051 23052 my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; 23053 if (@$rhere_target_list) { # there can be multiple here targets 23054 ( $here_doc_target, $here_quote_character ) = 23055 @{ shift @$rhere_target_list }; 23056 $tokenizer_self->{_here_doc_target} = $here_doc_target; 23057 $tokenizer_self->{_here_quote_character} = 23058 $here_quote_character; 23059 write_logfile_entry( 23060 "Entering HERE document $here_doc_target\n"); 23061 $tokenizer_self->{_nearly_matched_here_target_at} = undef; 23062 $tokenizer_self->{_started_looking_for_here_target_at} = 23063 $input_line_number; 23064 } 23065 else { 23066 $tokenizer_self->{_in_here_doc} = 0; 23067 $tokenizer_self->{_here_doc_target} = ""; 23068 $tokenizer_self->{_here_quote_character} = ""; 23069 } 23070 } 23071 23072 # check for error of extra whitespace 23073 # note for PERL6: leading whitespace is allowed 23074 else { 23075 $candidate_target =~ s/\s*$//; 23076 $candidate_target =~ s/^\s*//; 23077 if ( $candidate_target eq $here_doc_target ) { 23078 $tokenizer_self->{_nearly_matched_here_target_at} = 23079 $input_line_number; 23080 } 23081 } 23082 return $line_of_tokens; 23083 } 23084 23085 # must print line unchanged if we are in a format section 23086 elsif ( $tokenizer_self->{_in_format} ) { 23087 23088 if ( $input_line =~ /^\.[\s#]*$/ ) { 23089 write_logfile_entry("Exiting format section\n"); 23090 $tokenizer_self->{_in_format} = 0; 23091 $line_of_tokens->{_line_type} = 'FORMAT_END'; 23092 } 23093 else { 23094 $line_of_tokens->{_line_type} = 'FORMAT'; 23095 } 23096 return $line_of_tokens; 23097 } 23098 23099 # must print line unchanged if we are in pod documentation 23100 elsif ( $tokenizer_self->{_in_pod} ) { 23101 23102 $line_of_tokens->{_line_type} = 'POD'; 23103 if ( $input_line =~ /^=cut/ ) { 23104 $line_of_tokens->{_line_type} = 'POD_END'; 23105 write_logfile_entry("Exiting POD section\n"); 23106 $tokenizer_self->{_in_pod} = 0; 23107 } 23108 if ( $input_line =~ /^\#\!.*perl\b/ ) { 23109 warning( 23110 "Hash-bang in pod can cause older versions of perl to fail! \n" 23111 ); 23112 } 23113 23114 return $line_of_tokens; 23115 } 23116 23117 # must print line unchanged if we have seen a severe error (i.e., we 23118 # are seeing illegal tokens and connot continue. Syntax errors do 23119 # not pass this route). Calling routine can decide what to do, but 23120 # the default can be to just pass all lines as if they were after __END__ 23121 elsif ( $tokenizer_self->{_in_error} ) { 23122 $line_of_tokens->{_line_type} = 'ERROR'; 23123 return $line_of_tokens; 23124 } 23125 23126 # print line unchanged if we are __DATA__ section 23127 elsif ( $tokenizer_self->{_in_data} ) { 23128 23129 # ...but look for POD 23130 # Note that the _in_data and _in_end flags remain set 23131 # so that we return to that state after seeing the 23132 # end of a pod section 23133 if ( $input_line =~ /^=(?!cut)/ ) { 23134 $line_of_tokens->{_line_type} = 'POD_START'; 23135 write_logfile_entry("Entering POD section\n"); 23136 $tokenizer_self->{_in_pod} = 1; 23137 return $line_of_tokens; 23138 } 23139 else { 23140 $line_of_tokens->{_line_type} = 'DATA'; 23141 return $line_of_tokens; 23142 } 23143 } 23144 23145 # print line unchanged if we are in __END__ section 23146 elsif ( $tokenizer_self->{_in_end} ) { 23147 23148 # ...but look for POD 23149 # Note that the _in_data and _in_end flags remain set 23150 # so that we return to that state after seeing the 23151 # end of a pod section 23152 if ( $input_line =~ /^=(?!cut)/ ) { 23153 $line_of_tokens->{_line_type} = 'POD_START'; 23154 write_logfile_entry("Entering POD section\n"); 23155 $tokenizer_self->{_in_pod} = 1; 23156 return $line_of_tokens; 23157 } 23158 else { 23159 $line_of_tokens->{_line_type} = 'END'; 23160 return $line_of_tokens; 23161 } 23162 } 23163 23164 # check for a hash-bang line if we haven't seen one 23165 if ( !$tokenizer_self->{_saw_hash_bang} ) { 23166 if ( $input_line =~ /^\#\!.*perl\b/ ) { 23167 $tokenizer_self->{_saw_hash_bang} = $input_line_number; 23168 23169 # check for -w and -P flags 23170 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { 23171 $tokenizer_self->{_saw_perl_dash_P} = 1; 23172 } 23173 23174 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { 23175 $tokenizer_self->{_saw_perl_dash_w} = 1; 23176 } 23177 23178 if ( ( $input_line_number > 1 ) 23179 && ( !$tokenizer_self->{_look_for_hash_bang} ) ) 23180 { 23181 23182 # this is helpful for VMS systems; we may have accidentally 23183 # tokenized some DCL commands 23184 if ( $tokenizer_self->{_started_tokenizing} ) { 23185 warning( 23186"There seems to be a hash-bang after line 1; do you need to run with -x ?\n" 23187 ); 23188 } 23189 else { 23190 complain("Useless hash-bang after line 1\n"); 23191 } 23192 } 23193 23194 # Report the leading hash-bang as a system line 23195 # This will prevent -dac from deleting it 23196 else { 23197 $line_of_tokens->{_line_type} = 'SYSTEM'; 23198 return $line_of_tokens; 23199 } 23200 } 23201 } 23202 23203 # wait for a hash-bang before parsing if the user invoked us with -x 23204 if ( $tokenizer_self->{_look_for_hash_bang} 23205 && !$tokenizer_self->{_saw_hash_bang} ) 23206 { 23207 $line_of_tokens->{_line_type} = 'SYSTEM'; 23208 return $line_of_tokens; 23209 } 23210 23211 # a first line of the form ': #' will be marked as SYSTEM 23212 # since lines of this form may be used by tcsh 23213 if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { 23214 $line_of_tokens->{_line_type} = 'SYSTEM'; 23215 return $line_of_tokens; 23216 } 23217 23218 # now we know that it is ok to tokenize the line... 23219 # the line tokenizer will modify any of these private variables: 23220 # _rhere_target_list 23221 # _in_data 23222 # _in_end 23223 # _in_format 23224 # _in_error 23225 # _in_pod 23226 # _in_quote 23227 my $ending_in_quote_last = $tokenizer_self->{_in_quote}; 23228 tokenize_this_line($line_of_tokens); 23229 23230 # Now finish defining the return structure and return it 23231 $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote}; 23232 23233 # handle severe error (binary data in script) 23234 if ( $tokenizer_self->{_in_error} ) { 23235 $tokenizer_self->{_in_quote} = 0; # to avoid any more messages 23236 warning("Giving up after error\n"); 23237 $line_of_tokens->{_line_type} = 'ERROR'; 23238 reset_indentation_level(0); # avoid error messages 23239 return $line_of_tokens; 23240 } 23241 23242 # handle start of pod documentation 23243 if ( $tokenizer_self->{_in_pod} ) { 23244 23245 # This gets tricky..above a __DATA__ or __END__ section, perl 23246 # accepts '=cut' as the start of pod section. But afterwards, 23247 # only pod utilities see it and they may ignore an =cut without 23248 # leading =head. In any case, this isn't good. 23249 if ( $input_line =~ /^=cut\b/ ) { 23250 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { 23251 complain("=cut while not in pod ignored\n"); 23252 $tokenizer_self->{_in_pod} = 0; 23253 $line_of_tokens->{_line_type} = 'POD_END'; 23254 } 23255 else { 23256 $line_of_tokens->{_line_type} = 'POD_START'; 23257 complain( 23258"=cut starts a pod section .. this can fool pod utilities.\n" 23259 ); 23260 write_logfile_entry("Entering POD section\n"); 23261 } 23262 } 23263 23264 else { 23265 $line_of_tokens->{_line_type} = 'POD_START'; 23266 write_logfile_entry("Entering POD section\n"); 23267 } 23268 23269 return $line_of_tokens; 23270 } 23271 23272 # update indentation levels for log messages 23273 if ( $input_line !~ /^\s*$/ ) { 23274 my $rlevels = $line_of_tokens->{_rlevels}; 23275 $line_of_tokens->{_guessed_indentation_level} = 23276 guess_old_indentation_level($input_line); 23277 } 23278 23279 # see if this line contains here doc targets 23280 my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; 23281 if (@$rhere_target_list) { 23282 23283 my ( $here_doc_target, $here_quote_character ) = 23284 @{ shift @$rhere_target_list }; 23285 $tokenizer_self->{_in_here_doc} = 1; 23286 $tokenizer_self->{_here_doc_target} = $here_doc_target; 23287 $tokenizer_self->{_here_quote_character} = $here_quote_character; 23288 write_logfile_entry("Entering HERE document $here_doc_target\n"); 23289 $tokenizer_self->{_started_looking_for_here_target_at} = 23290 $input_line_number; 23291 } 23292 23293 # NOTE: __END__ and __DATA__ statements are written unformatted 23294 # because they can theoretically contain additional characters 23295 # which are not tokenized (and cannot be read with <DATA> either!). 23296 if ( $tokenizer_self->{_in_data} ) { 23297 $line_of_tokens->{_line_type} = 'DATA_START'; 23298 write_logfile_entry("Starting __DATA__ section\n"); 23299 $tokenizer_self->{_saw_data} = 1; 23300 23301 # keep parsing after __DATA__ if use SelfLoader was seen 23302 if ( $tokenizer_self->{_saw_selfloader} ) { 23303 $tokenizer_self->{_in_data} = 0; 23304 write_logfile_entry( 23305 "SelfLoader seen, continuing; -nlsl deactivates\n"); 23306 } 23307 23308 return $line_of_tokens; 23309 } 23310 23311 elsif ( $tokenizer_self->{_in_end} ) { 23312 $line_of_tokens->{_line_type} = 'END_START'; 23313 write_logfile_entry("Starting __END__ section\n"); 23314 $tokenizer_self->{_saw_end} = 1; 23315 23316 # keep parsing after __END__ if use AutoLoader was seen 23317 if ( $tokenizer_self->{_saw_autoloader} ) { 23318 $tokenizer_self->{_in_end} = 0; 23319 write_logfile_entry( 23320 "AutoLoader seen, continuing; -nlal deactivates\n"); 23321 } 23322 return $line_of_tokens; 23323 } 23324 23325 # now, finally, we know that this line is type 'CODE' 23326 $line_of_tokens->{_line_type} = 'CODE'; 23327 23328 # remember if we have seen any real code 23329 if ( !$tokenizer_self->{_started_tokenizing} 23330 && $input_line !~ /^\s*$/ 23331 && $input_line !~ /^\s*#/ ) 23332 { 23333 $tokenizer_self->{_started_tokenizing} = 1; 23334 } 23335 23336 if ( $tokenizer_self->{_debugger_object} ) { 23337 $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens); 23338 } 23339 23340 # Note: if keyword 'format' occurs in this line code, it is still CODE 23341 # (keyword 'format' need not start a line) 23342 if ( $tokenizer_self->{_in_format} ) { 23343 write_logfile_entry("Entering format section\n"); 23344 } 23345 23346 if ( $tokenizer_self->{_in_quote} 23347 and ( $tokenizer_self->{_line_start_quote} < 0 ) ) 23348 { 23349 23350 #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { 23351 if ( 23352 ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ ) 23353 { 23354 $tokenizer_self->{_line_start_quote} = $input_line_number; 23355 write_logfile_entry( 23356 "Start multi-line quote or pattern ending in $quote_target\n"); 23357 } 23358 } 23359 elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 ) 23360 and !$tokenizer_self->{_in_quote} ) 23361 { 23362 $tokenizer_self->{_line_start_quote} = -1; 23363 write_logfile_entry("End of multi-line quote or pattern\n"); 23364 } 23365 23366 # we are returning a line of CODE 23367 return $line_of_tokens; 23368} 23369 23370sub find_starting_indentation_level { 23371 23372 # We need to find the indentation level of the first line of the 23373 # script being formatted. Often it will be zero for an entire file, 23374 # but if we are formatting a local block of code (within an editor for 23375 # example) it may not be zero. The user may specify this with the 23376 # -sil=n parameter but normally doesn't so we have to guess. 23377 # 23378 # USES GLOBAL VARIABLES: $tokenizer_self 23379 my $starting_level = 0; 23380 23381 # use value if given as parameter 23382 if ( $tokenizer_self->{_know_starting_level} ) { 23383 $starting_level = $tokenizer_self->{_starting_level}; 23384 } 23385 23386 # if we know there is a hash_bang line, the level must be zero 23387 elsif ( $tokenizer_self->{_look_for_hash_bang} ) { 23388 $tokenizer_self->{_know_starting_level} = 1; 23389 } 23390 23391 # otherwise figure it out from the input file 23392 else { 23393 my $line; 23394 my $i = 0; 23395 23396 # keep looking at lines until we find a hash bang or piece of code 23397 my $msg = ""; 23398 while ( $line = 23399 $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) 23400 { 23401 23402 # if first line is #! then assume starting level is zero 23403 if ( $i == 1 && $line =~ /^\#\!/ ) { 23404 $starting_level = 0; 23405 last; 23406 } 23407 next if ( $line =~ /^\s*#/ ); # skip past comments 23408 next if ( $line =~ /^\s*$/ ); # skip past blank lines 23409 $starting_level = guess_old_indentation_level($line); 23410 last; 23411 } 23412 $msg = "Line $i implies starting-indentation-level = $starting_level\n"; 23413 write_logfile_entry("$msg"); 23414 } 23415 $tokenizer_self->{_starting_level} = $starting_level; 23416 reset_indentation_level($starting_level); 23417} 23418 23419sub guess_old_indentation_level { 23420 my ($line) = @_; 23421 23422 # Guess the indentation level of an input line. 23423 # 23424 # For the first line of code this result will define the starting 23425 # indentation level. It will mainly be non-zero when perltidy is applied 23426 # within an editor to a local block of code. 23427 # 23428 # This is an impossible task in general because we can't know what tabs 23429 # meant for the old script and how many spaces were used for one 23430 # indentation level in the given input script. For example it may have 23431 # been previously formatted with -i=7 -et=3. But we can at least try to 23432 # make sure that perltidy guesses correctly if it is applied repeatedly to 23433 # a block of code within an editor, so that the block stays at the same 23434 # level when perltidy is applied repeatedly. 23435 # 23436 # USES GLOBAL VARIABLES: $tokenizer_self 23437 my $level = 0; 23438 23439 # find leading tabs, spaces, and any statement label 23440 my $spaces = 0; 23441 if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) { 23442 23443 # If there are leading tabs, we use the tab scheme for this run, if 23444 # any, so that the code will remain stable when editing. 23445 if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} } 23446 23447 if ($2) { $spaces += length($2) } 23448 23449 # correct for outdented labels 23450 if ( $3 && $tokenizer_self->{'_outdent_labels'} ) { 23451 $spaces += $tokenizer_self->{_continuation_indentation}; 23452 } 23453 } 23454 23455 # compute indentation using the value of -i for this run. 23456 # If -i=0 is used for this run (which is possible) it doesn't matter 23457 # what we do here but we'll guess that the old run used 4 spaces per level. 23458 my $indent_columns = $tokenizer_self->{_indent_columns}; 23459 $indent_columns = 4 if ( !$indent_columns ); 23460 $level = int( $spaces / $indent_columns ); 23461 return ($level); 23462} 23463 23464# This is a currently unused debug routine 23465sub dump_functions { 23466 23467 my $fh = *STDOUT; 23468 my ( $pkg, $sub ); 23469 foreach $pkg ( keys %is_user_function ) { 23470 print $fh "\nnon-constant subs in package $pkg\n"; 23471 23472 foreach $sub ( keys %{ $is_user_function{$pkg} } ) { 23473 my $msg = ""; 23474 if ( $is_block_list_function{$pkg}{$sub} ) { 23475 $msg = 'block_list'; 23476 } 23477 23478 if ( $is_block_function{$pkg}{$sub} ) { 23479 $msg = 'block'; 23480 } 23481 print $fh "$sub $msg\n"; 23482 } 23483 } 23484 23485 foreach $pkg ( keys %is_constant ) { 23486 print $fh "\nconstants and constant subs in package $pkg\n"; 23487 23488 foreach $sub ( keys %{ $is_constant{$pkg} } ) { 23489 print $fh "$sub\n"; 23490 } 23491 } 23492} 23493 23494sub ones_count { 23495 23496 # count number of 1's in a string of 1's and 0's 23497 # example: ones_count("010101010101") gives 6 23498 return ( my $cis = $_[0] ) =~ tr/1/0/; 23499} 23500 23501sub prepare_for_a_new_file { 23502 23503 # previous tokens needed to determine what to expect next 23504 $last_nonblank_token = ';'; # the only possible starting state which 23505 $last_nonblank_type = ';'; # will make a leading brace a code block 23506 $last_nonblank_block_type = ''; 23507 23508 # scalars for remembering statement types across multiple lines 23509 $statement_type = ''; # '' or 'use' or 'sub..' or 'case..' 23510 $in_attribute_list = 0; 23511 23512 # scalars for remembering where we are in the file 23513 $current_package = "main"; 23514 $context = UNKNOWN_CONTEXT; 23515 23516 # hashes used to remember function information 23517 %is_constant = (); # user-defined constants 23518 %is_user_function = (); # user-defined functions 23519 %user_function_prototype = (); # their prototypes 23520 %is_block_function = (); 23521 %is_block_list_function = (); 23522 %saw_function_definition = (); 23523 23524 # variables used to track depths of various containers 23525 # and report nesting errors 23526 $paren_depth = 0; 23527 $brace_depth = 0; 23528 $square_bracket_depth = 0; 23529 @current_depth[ 0 .. $#closing_brace_names ] = 23530 (0) x scalar @closing_brace_names; 23531 $total_depth = 0; 23532 @total_depth = (); 23533 @nesting_sequence_number[ 0 .. $#closing_brace_names ] = 23534 ( 0 .. $#closing_brace_names ); 23535 @current_sequence_number = (); 23536 $paren_type[$paren_depth] = ''; 23537 $paren_semicolon_count[$paren_depth] = 0; 23538 $paren_structural_type[$brace_depth] = ''; 23539 $brace_type[$brace_depth] = ';'; # identify opening brace as code block 23540 $brace_structural_type[$brace_depth] = ''; 23541 $brace_context[$brace_depth] = UNKNOWN_CONTEXT; 23542 $brace_package[$paren_depth] = $current_package; 23543 $square_bracket_type[$square_bracket_depth] = ''; 23544 $square_bracket_structural_type[$square_bracket_depth] = ''; 23545 23546 initialize_tokenizer_state(); 23547} 23548 23549{ # begin tokenize_this_line 23550 23551 use constant BRACE => 0; 23552 use constant SQUARE_BRACKET => 1; 23553 use constant PAREN => 2; 23554 use constant QUESTION_COLON => 3; 23555 23556 # TV1: scalars for processing one LINE. 23557 # Re-initialized on each entry to sub tokenize_this_line. 23558 my ( 23559 $block_type, $container_type, $expecting, 23560 $i, $i_tok, $input_line, 23561 $input_line_number, $last_nonblank_i, $max_token_index, 23562 $next_tok, $next_type, $peeked_ahead, 23563 $prototype, $rhere_target_list, $rtoken_map, 23564 $rtoken_type, $rtokens, $tok, 23565 $type, $type_sequence, $indent_flag, 23566 ); 23567 23568 # TV2: refs to ARRAYS for processing one LINE 23569 # Re-initialized on each call. 23570 my $routput_token_list = []; # stack of output token indexes 23571 my $routput_token_type = []; # token types 23572 my $routput_block_type = []; # types of code block 23573 my $routput_container_type = []; # paren types, such as if, elsif, .. 23574 my $routput_type_sequence = []; # nesting sequential number 23575 my $routput_indent_flag = []; # 23576 23577 # TV3: SCALARS for quote variables. These are initialized with a 23578 # subroutine call and continually updated as lines are processed. 23579 my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, 23580 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, ); 23581 23582 # TV4: SCALARS for multi-line identifiers and 23583 # statements. These are initialized with a subroutine call 23584 # and continually updated as lines are processed. 23585 my ( $id_scan_state, $identifier, $want_paren, $indented_if_level ); 23586 23587 # TV5: SCALARS for tracking indentation level. 23588 # Initialized once and continually updated as lines are 23589 # processed. 23590 my ( 23591 $nesting_token_string, $nesting_type_string, 23592 $nesting_block_string, $nesting_block_flag, 23593 $nesting_list_string, $nesting_list_flag, 23594 $ci_string_in_tokenizer, $continuation_string_in_tokenizer, 23595 $in_statement_continuation, $level_in_tokenizer, 23596 $slevel_in_tokenizer, $rslevel_stack, 23597 ); 23598 23599 # TV6: SCALARS for remembering several previous 23600 # tokens. Initialized once and continually updated as 23601 # lines are processed. 23602 my ( 23603 $last_nonblank_container_type, $last_nonblank_type_sequence, 23604 $last_last_nonblank_token, $last_last_nonblank_type, 23605 $last_last_nonblank_block_type, $last_last_nonblank_container_type, 23606 $last_last_nonblank_type_sequence, $last_nonblank_prototype, 23607 ); 23608 23609 # ---------------------------------------------------------------- 23610 # beginning of tokenizer variable access and manipulation routines 23611 # ---------------------------------------------------------------- 23612 23613 sub initialize_tokenizer_state { 23614 23615 # TV1: initialized on each call 23616 # TV2: initialized on each call 23617 # TV3: 23618 $in_quote = 0; 23619 $quote_type = 'Q'; 23620 $quote_character = ""; 23621 $quote_pos = 0; 23622 $quote_depth = 0; 23623 $quoted_string_1 = ""; 23624 $quoted_string_2 = ""; 23625 $allowed_quote_modifiers = ""; 23626 23627 # TV4: 23628 $id_scan_state = ''; 23629 $identifier = ''; 23630 $want_paren = ""; 23631 $indented_if_level = 0; 23632 23633 # TV5: 23634 $nesting_token_string = ""; 23635 $nesting_type_string = ""; 23636 $nesting_block_string = '1'; # initially in a block 23637 $nesting_block_flag = 1; 23638 $nesting_list_string = '0'; # initially not in a list 23639 $nesting_list_flag = 0; # initially not in a list 23640 $ci_string_in_tokenizer = ""; 23641 $continuation_string_in_tokenizer = "0"; 23642 $in_statement_continuation = 0; 23643 $level_in_tokenizer = 0; 23644 $slevel_in_tokenizer = 0; 23645 $rslevel_stack = []; 23646 23647 # TV6: 23648 $last_nonblank_container_type = ''; 23649 $last_nonblank_type_sequence = ''; 23650 $last_last_nonblank_token = ';'; 23651 $last_last_nonblank_type = ';'; 23652 $last_last_nonblank_block_type = ''; 23653 $last_last_nonblank_container_type = ''; 23654 $last_last_nonblank_type_sequence = ''; 23655 $last_nonblank_prototype = ""; 23656 } 23657 23658 sub save_tokenizer_state { 23659 23660 my $rTV1 = [ 23661 $block_type, $container_type, $expecting, 23662 $i, $i_tok, $input_line, 23663 $input_line_number, $last_nonblank_i, $max_token_index, 23664 $next_tok, $next_type, $peeked_ahead, 23665 $prototype, $rhere_target_list, $rtoken_map, 23666 $rtoken_type, $rtokens, $tok, 23667 $type, $type_sequence, $indent_flag, 23668 ]; 23669 23670 my $rTV2 = [ 23671 $routput_token_list, $routput_token_type, 23672 $routput_block_type, $routput_container_type, 23673 $routput_type_sequence, $routput_indent_flag, 23674 ]; 23675 23676 my $rTV3 = [ 23677 $in_quote, $quote_type, 23678 $quote_character, $quote_pos, 23679 $quote_depth, $quoted_string_1, 23680 $quoted_string_2, $allowed_quote_modifiers, 23681 ]; 23682 23683 my $rTV4 = 23684 [ $id_scan_state, $identifier, $want_paren, $indented_if_level ]; 23685 23686 my $rTV5 = [ 23687 $nesting_token_string, $nesting_type_string, 23688 $nesting_block_string, $nesting_block_flag, 23689 $nesting_list_string, $nesting_list_flag, 23690 $ci_string_in_tokenizer, $continuation_string_in_tokenizer, 23691 $in_statement_continuation, $level_in_tokenizer, 23692 $slevel_in_tokenizer, $rslevel_stack, 23693 ]; 23694 23695 my $rTV6 = [ 23696 $last_nonblank_container_type, 23697 $last_nonblank_type_sequence, 23698 $last_last_nonblank_token, 23699 $last_last_nonblank_type, 23700 $last_last_nonblank_block_type, 23701 $last_last_nonblank_container_type, 23702 $last_last_nonblank_type_sequence, 23703 $last_nonblank_prototype, 23704 ]; 23705 return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ]; 23706 } 23707 23708 sub restore_tokenizer_state { 23709 my ($rstate) = @_; 23710 my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate}; 23711 ( 23712 $block_type, $container_type, $expecting, 23713 $i, $i_tok, $input_line, 23714 $input_line_number, $last_nonblank_i, $max_token_index, 23715 $next_tok, $next_type, $peeked_ahead, 23716 $prototype, $rhere_target_list, $rtoken_map, 23717 $rtoken_type, $rtokens, $tok, 23718 $type, $type_sequence, $indent_flag, 23719 ) = @{$rTV1}; 23720 23721 ( 23722 $routput_token_list, $routput_token_type, 23723 $routput_block_type, $routput_container_type, 23724 $routput_type_sequence, $routput_type_sequence, 23725 ) = @{$rTV2}; 23726 23727 ( 23728 $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, 23729 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, 23730 ) = @{$rTV3}; 23731 23732 ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) = 23733 @{$rTV4}; 23734 23735 ( 23736 $nesting_token_string, $nesting_type_string, 23737 $nesting_block_string, $nesting_block_flag, 23738 $nesting_list_string, $nesting_list_flag, 23739 $ci_string_in_tokenizer, $continuation_string_in_tokenizer, 23740 $in_statement_continuation, $level_in_tokenizer, 23741 $slevel_in_tokenizer, $rslevel_stack, 23742 ) = @{$rTV5}; 23743 23744 ( 23745 $last_nonblank_container_type, 23746 $last_nonblank_type_sequence, 23747 $last_last_nonblank_token, 23748 $last_last_nonblank_type, 23749 $last_last_nonblank_block_type, 23750 $last_last_nonblank_container_type, 23751 $last_last_nonblank_type_sequence, 23752 $last_nonblank_prototype, 23753 ) = @{$rTV6}; 23754 } 23755 23756 sub get_indentation_level { 23757 23758 # patch to avoid reporting error if indented if is not terminated 23759 if ($indented_if_level) { return $level_in_tokenizer - 1 } 23760 return $level_in_tokenizer; 23761 } 23762 23763 sub reset_indentation_level { 23764 $level_in_tokenizer = $_[0]; 23765 $slevel_in_tokenizer = $_[0]; 23766 push @{$rslevel_stack}, $slevel_in_tokenizer; 23767 } 23768 23769 sub peeked_ahead { 23770 $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead; 23771 } 23772 23773 # ------------------------------------------------------------ 23774 # end of tokenizer variable access and manipulation routines 23775 # ------------------------------------------------------------ 23776 23777 # ------------------------------------------------------------ 23778 # beginning of various scanner interface routines 23779 # ------------------------------------------------------------ 23780 sub scan_replacement_text { 23781 23782 # check for here-docs in replacement text invoked by 23783 # a substitution operator with executable modifier 'e'. 23784 # 23785 # given: 23786 # $replacement_text 23787 # return: 23788 # $rht = reference to any here-doc targets 23789 my ($replacement_text) = @_; 23790 23791 # quick check 23792 return undef unless ( $replacement_text =~ /<</ ); 23793 23794 write_logfile_entry("scanning replacement text for here-doc targets\n"); 23795 23796 # save the logger object for error messages 23797 my $logger_object = $tokenizer_self->{_logger_object}; 23798 23799 # localize all package variables 23800 local ( 23801 $tokenizer_self, $last_nonblank_token, 23802 $last_nonblank_type, $last_nonblank_block_type, 23803 $statement_type, $in_attribute_list, 23804 $current_package, $context, 23805 %is_constant, %is_user_function, 23806 %user_function_prototype, %is_block_function, 23807 %is_block_list_function, %saw_function_definition, 23808 $brace_depth, $paren_depth, 23809 $square_bracket_depth, @current_depth, 23810 @total_depth, $total_depth, 23811 @nesting_sequence_number, @current_sequence_number, 23812 @paren_type, @paren_semicolon_count, 23813 @paren_structural_type, @brace_type, 23814 @brace_structural_type, @brace_context, 23815 @brace_package, @square_bracket_type, 23816 @square_bracket_structural_type, @depth_array, 23817 @starting_line_of_current_depth, @nested_ternary_flag, 23818 @nested_statement_type, 23819 ); 23820 23821 # save all lexical variables 23822 my $rstate = save_tokenizer_state(); 23823 _decrement_count(); # avoid error check for multiple tokenizers 23824 23825 # make a new tokenizer 23826 my $rOpts = {}; 23827 my $rpending_logfile_message; 23828 my $source_object = 23829 Perl::Tidy::LineSource->new( \$replacement_text, $rOpts, 23830 $rpending_logfile_message ); 23831 my $tokenizer = Perl::Tidy::Tokenizer->new( 23832 source_object => $source_object, 23833 logger_object => $logger_object, 23834 starting_line_number => $input_line_number, 23835 ); 23836 23837 # scan the replacement text 23838 1 while ( $tokenizer->get_line() ); 23839 23840 # remove any here doc targets 23841 my $rht = undef; 23842 if ( $tokenizer_self->{_in_here_doc} ) { 23843 $rht = []; 23844 push @{$rht}, 23845 [ 23846 $tokenizer_self->{_here_doc_target}, 23847 $tokenizer_self->{_here_quote_character} 23848 ]; 23849 if ( $tokenizer_self->{_rhere_target_list} ) { 23850 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} }; 23851 $tokenizer_self->{_rhere_target_list} = undef; 23852 } 23853 $tokenizer_self->{_in_here_doc} = undef; 23854 } 23855 23856 # now its safe to report errors 23857 $tokenizer->report_tokenization_errors(); 23858 23859 # restore all tokenizer lexical variables 23860 restore_tokenizer_state($rstate); 23861 23862 # return the here doc targets 23863 return $rht; 23864 } 23865 23866 sub scan_bare_identifier { 23867 ( $i, $tok, $type, $prototype ) = 23868 scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype, 23869 $rtoken_map, $max_token_index ); 23870 } 23871 23872 sub scan_identifier { 23873 ( $i, $tok, $type, $id_scan_state, $identifier ) = 23874 scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens, 23875 $max_token_index, $expecting ); 23876 } 23877 23878 sub scan_id { 23879 ( $i, $tok, $type, $id_scan_state ) = 23880 scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, 23881 $id_scan_state, $max_token_index ); 23882 } 23883 23884 sub scan_number { 23885 my $number; 23886 ( $i, $type, $number ) = 23887 scan_number_do( $input_line, $i, $rtoken_map, $type, 23888 $max_token_index ); 23889 return $number; 23890 } 23891 23892 # a sub to warn if token found where term expected 23893 sub error_if_expecting_TERM { 23894 if ( $expecting == TERM ) { 23895 if ( $really_want_term{$last_nonblank_type} ) { 23896 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map, 23897 $rtoken_type, $input_line ); 23898 1; 23899 } 23900 } 23901 } 23902 23903 # a sub to warn if token found where operator expected 23904 sub error_if_expecting_OPERATOR { 23905 if ( $expecting == OPERATOR ) { 23906 my $thing = defined $_[0] ? $_[0] : $tok; 23907 unexpected( $thing, "operator", $i_tok, $last_nonblank_i, 23908 $rtoken_map, $rtoken_type, $input_line ); 23909 if ( $i_tok == 0 ) { 23910 interrupt_logfile(); 23911 warning("Missing ';' above?\n"); 23912 resume_logfile(); 23913 } 23914 1; 23915 } 23916 } 23917 23918 # ------------------------------------------------------------ 23919 # end scanner interfaces 23920 # ------------------------------------------------------------ 23921 23922 my %is_for_foreach; 23923 @_ = qw(for foreach); 23924 @is_for_foreach{@_} = (1) x scalar(@_); 23925 23926 my %is_my_our; 23927 @_ = qw(my our); 23928 @is_my_our{@_} = (1) x scalar(@_); 23929 23930 # These keywords may introduce blocks after parenthesized expressions, 23931 # in the form: 23932 # keyword ( .... ) { BLOCK } 23933 # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' 23934 my %is_blocktype_with_paren; 23935 @_ = qw(if elsif unless while until for foreach switch case given when); 23936 @is_blocktype_with_paren{@_} = (1) x scalar(@_); 23937 23938 # ------------------------------------------------------------ 23939 # begin hash of code for handling most token types 23940 # ------------------------------------------------------------ 23941 my $tokenization_code = { 23942 23943 # no special code for these types yet, but syntax checks 23944 # could be added 23945 23946## '!' => undef, 23947## '!=' => undef, 23948## '!~' => undef, 23949## '%=' => undef, 23950## '&&=' => undef, 23951## '&=' => undef, 23952## '+=' => undef, 23953## '-=' => undef, 23954## '..' => undef, 23955## '..' => undef, 23956## '...' => undef, 23957## '.=' => undef, 23958## '<<=' => undef, 23959## '<=' => undef, 23960## '<=>' => undef, 23961## '<>' => undef, 23962## '=' => undef, 23963## '==' => undef, 23964## '=~' => undef, 23965## '>=' => undef, 23966## '>>' => undef, 23967## '>>=' => undef, 23968## '\\' => undef, 23969## '^=' => undef, 23970## '|=' => undef, 23971## '||=' => undef, 23972## '//=' => undef, 23973## '~' => undef, 23974## '~~' => undef, 23975## '!~~' => undef, 23976 23977 '>' => sub { 23978 error_if_expecting_TERM() 23979 if ( $expecting == TERM ); 23980 }, 23981 '|' => sub { 23982 error_if_expecting_TERM() 23983 if ( $expecting == TERM ); 23984 }, 23985 '$' => sub { 23986 23987 # start looking for a scalar 23988 error_if_expecting_OPERATOR("Scalar") 23989 if ( $expecting == OPERATOR ); 23990 scan_identifier(); 23991 23992 if ( $identifier eq '$^W' ) { 23993 $tokenizer_self->{_saw_perl_dash_w} = 1; 23994 } 23995 23996 # Check for indentifier in indirect object slot 23997 # (vorboard.pl, sort.t). Something like: 23998 # /^(print|printf|sort|exec|system)$/ 23999 if ( 24000 $is_indirect_object_taker{$last_nonblank_token} 24001 24002 || ( ( $last_nonblank_token eq '(' ) 24003 && $is_indirect_object_taker{ $paren_type[$paren_depth] } ) 24004 || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object 24005 ) 24006 { 24007 $type = 'Z'; 24008 } 24009 }, 24010 '(' => sub { 24011 24012 ++$paren_depth; 24013 $paren_semicolon_count[$paren_depth] = 0; 24014 if ($want_paren) { 24015 $container_type = $want_paren; 24016 $want_paren = ""; 24017 } 24018 else { 24019 $container_type = $last_nonblank_token; 24020 24021 # We can check for a syntax error here of unexpected '(', 24022 # but this is going to get messy... 24023 if ( 24024 $expecting == OPERATOR 24025 24026 # be sure this is not a method call of the form 24027 # &method(...), $method->(..), &{method}(...), 24028 # $ref[2](list) is ok & short for $ref[2]->(list) 24029 # NOTE: at present, braces in something like &{ xxx } 24030 # are not marked as a block, we might have a method call 24031 && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/ 24032 24033 ) 24034 { 24035 24036 # ref: camel 3 p 703. 24037 if ( $last_last_nonblank_token eq 'do' ) { 24038 complain( 24039"do SUBROUTINE is deprecated; consider & or -> notation\n" 24040 ); 24041 } 24042 else { 24043 24044 # if this is an empty list, (), then it is not an 24045 # error; for example, we might have a constant pi and 24046 # invoke it with pi() or just pi; 24047 my ( $next_nonblank_token, $i_next ) = 24048 find_next_nonblank_token( $i, $rtokens, 24049 $max_token_index ); 24050 if ( $next_nonblank_token ne ')' ) { 24051 my $hint; 24052 error_if_expecting_OPERATOR('('); 24053 24054 if ( $last_nonblank_type eq 'C' ) { 24055 $hint = 24056 "$last_nonblank_token has a void prototype\n"; 24057 } 24058 elsif ( $last_nonblank_type eq 'i' ) { 24059 if ( $i_tok > 0 24060 && $last_nonblank_token =~ /^\$/ ) 24061 { 24062 $hint = 24063"Do you mean '$last_nonblank_token->(' ?\n"; 24064 } 24065 } 24066 if ($hint) { 24067 interrupt_logfile(); 24068 warning($hint); 24069 resume_logfile(); 24070 } 24071 } ## end if ( $next_nonblank_token... 24072 } ## end else [ if ( $last_last_nonblank_token... 24073 } ## end if ( $expecting == OPERATOR... 24074 } 24075 $paren_type[$paren_depth] = $container_type; 24076 ( $type_sequence, $indent_flag ) = 24077 increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); 24078 24079 # propagate types down through nested parens 24080 # for example: the second paren in 'if ((' would be structural 24081 # since the first is. 24082 24083 if ( $last_nonblank_token eq '(' ) { 24084 $type = $last_nonblank_type; 24085 } 24086 24087 # We exclude parens as structural after a ',' because it 24088 # causes subtle problems with continuation indentation for 24089 # something like this, where the first 'or' will not get 24090 # indented. 24091 # 24092 # assert( 24093 # __LINE__, 24094 # ( not defined $check ) 24095 # or ref $check 24096 # or $check eq "new" 24097 # or $check eq "old", 24098 # ); 24099 # 24100 # Likewise, we exclude parens where a statement can start 24101 # because of problems with continuation indentation, like 24102 # these: 24103 # 24104 # ($firstline =~ /^#\!.*perl/) 24105 # and (print $File::Find::name, "\n") 24106 # and (return 1); 24107 # 24108 # (ref($usage_fref) =~ /CODE/) 24109 # ? &$usage_fref 24110 # : (&blast_usage, &blast_params, &blast_general_params); 24111 24112 else { 24113 $type = '{'; 24114 } 24115 24116 if ( $last_nonblank_type eq ')' ) { 24117 warning( 24118 "Syntax error? found token '$last_nonblank_type' then '('\n" 24119 ); 24120 } 24121 $paren_structural_type[$paren_depth] = $type; 24122 24123 }, 24124 ')' => sub { 24125 ( $type_sequence, $indent_flag ) = 24126 decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); 24127 24128 if ( $paren_structural_type[$paren_depth] eq '{' ) { 24129 $type = '}'; 24130 } 24131 24132 $container_type = $paren_type[$paren_depth]; 24133 24134 # /^(for|foreach)$/ 24135 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) { 24136 my $num_sc = $paren_semicolon_count[$paren_depth]; 24137 if ( $num_sc > 0 && $num_sc != 2 ) { 24138 warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); 24139 } 24140 } 24141 24142 if ( $paren_depth > 0 ) { $paren_depth-- } 24143 }, 24144 ',' => sub { 24145 if ( $last_nonblank_type eq ',' ) { 24146 complain("Repeated ','s \n"); 24147 } 24148 24149 # patch for operator_expected: note if we are in the list (use.t) 24150 if ( $statement_type eq 'use' ) { $statement_type = '_use' } 24151## FIXME: need to move this elsewhere, perhaps check after a '(' 24152## elsif ($last_nonblank_token eq '(') { 24153## warning("Leading ','s illegal in some versions of perl\n"); 24154## } 24155 }, 24156 ';' => sub { 24157 $context = UNKNOWN_CONTEXT; 24158 $statement_type = ''; 24159 24160 # /^(for|foreach)$/ 24161 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) 24162 { # mark ; in for loop 24163 24164 # Be careful: we do not want a semicolon such as the 24165 # following to be included: 24166 # 24167 # for (sort {strcoll($a,$b);} keys %investments) { 24168 24169 if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth] 24170 && $square_bracket_depth == 24171 $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] ) 24172 { 24173 24174 $type = 'f'; 24175 $paren_semicolon_count[$paren_depth]++; 24176 } 24177 } 24178 24179 }, 24180 '"' => sub { 24181 error_if_expecting_OPERATOR("String") 24182 if ( $expecting == OPERATOR ); 24183 $in_quote = 1; 24184 $type = 'Q'; 24185 $allowed_quote_modifiers = ""; 24186 }, 24187 "'" => sub { 24188 error_if_expecting_OPERATOR("String") 24189 if ( $expecting == OPERATOR ); 24190 $in_quote = 1; 24191 $type = 'Q'; 24192 $allowed_quote_modifiers = ""; 24193 }, 24194 '`' => sub { 24195 error_if_expecting_OPERATOR("String") 24196 if ( $expecting == OPERATOR ); 24197 $in_quote = 1; 24198 $type = 'Q'; 24199 $allowed_quote_modifiers = ""; 24200 }, 24201 '/' => sub { 24202 my $is_pattern; 24203 24204 if ( $expecting == UNKNOWN ) { # indeterminte, must guess.. 24205 my $msg; 24206 ( $is_pattern, $msg ) = 24207 guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, 24208 $max_token_index ); 24209 24210 if ($msg) { 24211 write_diagnostics("DIVIDE:$msg\n"); 24212 write_logfile_entry($msg); 24213 } 24214 } 24215 else { $is_pattern = ( $expecting == TERM ) } 24216 24217 if ($is_pattern) { 24218 $in_quote = 1; 24219 $type = 'Q'; 24220 $allowed_quote_modifiers = '[msixpodualgc]'; 24221 } 24222 else { # not a pattern; check for a /= token 24223 24224 if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /= 24225 $i++; 24226 $tok = '/='; 24227 $type = $tok; 24228 } 24229 24230 #DEBUG - collecting info on what tokens follow a divide 24231 # for development of guessing algorithm 24232 #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) { 24233 # #write_diagnostics( "DIVIDE? $input_line\n" ); 24234 #} 24235 } 24236 }, 24237 '{' => sub { 24238 24239 # if we just saw a ')', we will label this block with 24240 # its type. We need to do this to allow sub 24241 # code_block_type to determine if this brace starts a 24242 # code block or anonymous hash. (The type of a paren 24243 # pair is the preceding token, such as 'if', 'else', 24244 # etc). 24245 $container_type = ""; 24246 24247 # ATTRS: for a '{' following an attribute list, reset 24248 # things to look like we just saw the sub name 24249 if ( $statement_type =~ /^sub/ ) { 24250 $last_nonblank_token = $statement_type; 24251 $last_nonblank_type = 'i'; 24252 $statement_type = ""; 24253 } 24254 24255 # patch for SWITCH/CASE: hide these keywords from an immediately 24256 # following opening brace 24257 elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) 24258 && $statement_type eq $last_nonblank_token ) 24259 { 24260 $last_nonblank_token = ";"; 24261 } 24262 24263 elsif ( $last_nonblank_token eq ')' ) { 24264 $last_nonblank_token = $paren_type[ $paren_depth + 1 ]; 24265 24266 # defensive move in case of a nesting error (pbug.t) 24267 # in which this ')' had no previous '(' 24268 # this nesting error will have been caught 24269 if ( !defined($last_nonblank_token) ) { 24270 $last_nonblank_token = 'if'; 24271 } 24272 24273 # check for syntax error here; 24274 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) { 24275 my $list = join( ' ', sort keys %is_blocktype_with_paren ); 24276 warning( 24277 "syntax error at ') {', didn't see one of: $list\n"); 24278 } 24279 } 24280 24281 # patch for paren-less for/foreach glitch, part 2. 24282 # see note below under 'qw' 24283 elsif ($last_nonblank_token eq 'qw' 24284 && $is_for_foreach{$want_paren} ) 24285 { 24286 $last_nonblank_token = $want_paren; 24287 if ( $last_last_nonblank_token eq $want_paren ) { 24288 warning( 24289"syntax error at '$want_paren .. {' -- missing \$ loop variable\n" 24290 ); 24291 24292 } 24293 $want_paren = ""; 24294 } 24295 24296 # now identify which of the three possible types of 24297 # curly braces we have: hash index container, anonymous 24298 # hash reference, or code block. 24299 24300 # non-structural (hash index) curly brace pair 24301 # get marked 'L' and 'R' 24302 if ( is_non_structural_brace() ) { 24303 $type = 'L'; 24304 24305 # patch for SWITCH/CASE: 24306 # allow paren-less identifier after 'when' 24307 # if the brace is preceded by a space 24308 if ( $statement_type eq 'when' 24309 && $last_nonblank_type eq 'i' 24310 && $last_last_nonblank_type eq 'k' 24311 && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) 24312 { 24313 $type = '{'; 24314 $block_type = $statement_type; 24315 } 24316 } 24317 24318 # code and anonymous hash have the same type, '{', but are 24319 # distinguished by 'block_type', 24320 # which will be blank for an anonymous hash 24321 else { 24322 24323 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type, 24324 $max_token_index ); 24325 24326 # remember a preceding smartmatch operator 24327 if ( $last_nonblank_type eq '~~' ) { 24328 $block_type = $last_nonblank_type; 24329 } 24330 24331 # patch to promote bareword type to function taking block 24332 if ( $block_type 24333 && $last_nonblank_type eq 'w' 24334 && $last_nonblank_i >= 0 ) 24335 { 24336 if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) { 24337 $routput_token_type->[$last_nonblank_i] = 'G'; 24338 } 24339 } 24340 24341 # patch for SWITCH/CASE: if we find a stray opening block brace 24342 # where we might accept a 'case' or 'when' block, then take it 24343 if ( $statement_type eq 'case' 24344 || $statement_type eq 'when' ) 24345 { 24346 if ( !$block_type || $block_type eq '}' ) { 24347 $block_type = $statement_type; 24348 } 24349 } 24350 } 24351 $brace_type[ ++$brace_depth ] = $block_type; 24352 $brace_package[$brace_depth] = $current_package; 24353 $brace_structural_type[$brace_depth] = $type; 24354 $brace_context[$brace_depth] = $context; 24355 ( $type_sequence, $indent_flag ) = 24356 increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); 24357 }, 24358 '}' => sub { 24359 $block_type = $brace_type[$brace_depth]; 24360 if ($block_type) { $statement_type = '' } 24361 if ( defined( $brace_package[$brace_depth] ) ) { 24362 $current_package = $brace_package[$brace_depth]; 24363 } 24364 24365 # can happen on brace error (caught elsewhere) 24366 else { 24367 } 24368 ( $type_sequence, $indent_flag ) = 24369 decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); 24370 24371 if ( $brace_structural_type[$brace_depth] eq 'L' ) { 24372 $type = 'R'; 24373 } 24374 24375 # propagate type information for 'do' and 'eval' blocks, and also 24376 # for smartmatch operator. This is necessary to enable us to know 24377 # if an operator or term is expected next. 24378 if ( $is_block_operator{$block_type} || $block_type eq '~~' ) { 24379 $tok = $block_type; 24380 } 24381 24382 $context = $brace_context[$brace_depth]; 24383 if ( $brace_depth > 0 ) { $brace_depth--; } 24384 }, 24385 '&' => sub { # maybe sub call? start looking 24386 24387 # We have to check for sub call unless we are sure we 24388 # are expecting an operator. This example from s2p 24389 # got mistaken as a q operator in an early version: 24390 # print BODY &q(<<'EOT'); 24391 if ( $expecting != OPERATOR ) { 24392 24393 # But only look for a sub call if we are expecting a term or 24394 # if there is no existing space after the &. 24395 # For example we probably don't want & as sub call here: 24396 # Fcntl::S_IRUSR & $mode; 24397 if ( $expecting == TERM || $next_type ne 'b' ) { 24398 scan_identifier(); 24399 } 24400 } 24401 else { 24402 } 24403 }, 24404 '<' => sub { # angle operator or less than? 24405 24406 if ( $expecting != OPERATOR ) { 24407 ( $i, $type ) = 24408 find_angle_operator_termination( $input_line, $i, $rtoken_map, 24409 $expecting, $max_token_index ); 24410 24411 if ( $type eq '<' && $expecting == TERM ) { 24412 error_if_expecting_TERM(); 24413 interrupt_logfile(); 24414 warning("Unterminated <> operator?\n"); 24415 resume_logfile(); 24416 } 24417 } 24418 else { 24419 } 24420 }, 24421 '?' => sub { # ?: conditional or starting pattern? 24422 24423 my $is_pattern; 24424 24425 if ( $expecting == UNKNOWN ) { 24426 24427 my $msg; 24428 ( $is_pattern, $msg ) = 24429 guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map, 24430 $max_token_index ); 24431 24432 if ($msg) { write_logfile_entry($msg) } 24433 } 24434 else { $is_pattern = ( $expecting == TERM ) } 24435 24436 if ($is_pattern) { 24437 $in_quote = 1; 24438 $type = 'Q'; 24439 $allowed_quote_modifiers = '[msixpodualgc]'; 24440 } 24441 else { 24442 ( $type_sequence, $indent_flag ) = 24443 increase_nesting_depth( QUESTION_COLON, 24444 $$rtoken_map[$i_tok] ); 24445 } 24446 }, 24447 '*' => sub { # typeglob, or multiply? 24448 24449 if ( $expecting == TERM ) { 24450 scan_identifier(); 24451 } 24452 else { 24453 24454 if ( $$rtokens[ $i + 1 ] eq '=' ) { 24455 $tok = '*='; 24456 $type = $tok; 24457 $i++; 24458 } 24459 elsif ( $$rtokens[ $i + 1 ] eq '*' ) { 24460 $tok = '**'; 24461 $type = $tok; 24462 $i++; 24463 if ( $$rtokens[ $i + 1 ] eq '=' ) { 24464 $tok = '**='; 24465 $type = $tok; 24466 $i++; 24467 } 24468 } 24469 } 24470 }, 24471 '.' => sub { # what kind of . ? 24472 24473 if ( $expecting != OPERATOR ) { 24474 scan_number(); 24475 if ( $type eq '.' ) { 24476 error_if_expecting_TERM() 24477 if ( $expecting == TERM ); 24478 } 24479 } 24480 else { 24481 } 24482 }, 24483 ':' => sub { 24484 24485 # if this is the first nonblank character, call it a label 24486 # since perl seems to just swallow it 24487 if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { 24488 $type = 'J'; 24489 } 24490 24491 # ATTRS: check for a ':' which introduces an attribute list 24492 # (this might eventually get its own token type) 24493 elsif ( $statement_type =~ /^sub/ ) { 24494 $type = 'A'; 24495 $in_attribute_list = 1; 24496 } 24497 24498 # check for scalar attribute, such as 24499 # my $foo : shared = 1; 24500 elsif ($is_my_our{$statement_type} 24501 && $current_depth[QUESTION_COLON] == 0 ) 24502 { 24503 $type = 'A'; 24504 $in_attribute_list = 1; 24505 } 24506 24507 # otherwise, it should be part of a ?/: operator 24508 else { 24509 ( $type_sequence, $indent_flag ) = 24510 decrease_nesting_depth( QUESTION_COLON, 24511 $$rtoken_map[$i_tok] ); 24512 if ( $last_nonblank_token eq '?' ) { 24513 warning("Syntax error near ? :\n"); 24514 } 24515 } 24516 }, 24517 '+' => sub { # what kind of plus? 24518 24519 if ( $expecting == TERM ) { 24520 my $number = scan_number(); 24521 24522 # unary plus is safest assumption if not a number 24523 if ( !defined($number) ) { $type = 'p'; } 24524 } 24525 elsif ( $expecting == OPERATOR ) { 24526 } 24527 else { 24528 if ( $next_type eq 'w' ) { $type = 'p' } 24529 } 24530 }, 24531 '@' => sub { 24532 24533 error_if_expecting_OPERATOR("Array") 24534 if ( $expecting == OPERATOR ); 24535 scan_identifier(); 24536 }, 24537 '%' => sub { # hash or modulo? 24538 24539 # first guess is hash if no following blank 24540 if ( $expecting == UNKNOWN ) { 24541 if ( $next_type ne 'b' ) { $expecting = TERM } 24542 } 24543 if ( $expecting == TERM ) { 24544 scan_identifier(); 24545 } 24546 }, 24547 '[' => sub { 24548 $square_bracket_type[ ++$square_bracket_depth ] = 24549 $last_nonblank_token; 24550 ( $type_sequence, $indent_flag ) = 24551 increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); 24552 24553 # It may seem odd, but structural square brackets have 24554 # type '{' and '}'. This simplifies the indentation logic. 24555 if ( !is_non_structural_brace() ) { 24556 $type = '{'; 24557 } 24558 $square_bracket_structural_type[$square_bracket_depth] = $type; 24559 }, 24560 ']' => sub { 24561 ( $type_sequence, $indent_flag ) = 24562 decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); 24563 24564 if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' ) 24565 { 24566 $type = '}'; 24567 } 24568 24569 # propagate type information for smartmatch operator. This is 24570 # necessary to enable us to know if an operator or term is expected 24571 # next. 24572 if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) { 24573 $tok = $square_bracket_type[$square_bracket_depth]; 24574 } 24575 24576 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } 24577 }, 24578 '-' => sub { # what kind of minus? 24579 24580 if ( ( $expecting != OPERATOR ) 24581 && $is_file_test_operator{$next_tok} ) 24582 { 24583 my ( $next_nonblank_token, $i_next ) = 24584 find_next_nonblank_token( $i + 1, $rtokens, 24585 $max_token_index ); 24586 24587 # check for a quoted word like "-w=>xx"; 24588 # it is sufficient to just check for a following '=' 24589 if ( $next_nonblank_token eq '=' ) { 24590 $type = 'm'; 24591 } 24592 else { 24593 $i++; 24594 $tok .= $next_tok; 24595 $type = 'F'; 24596 } 24597 } 24598 elsif ( $expecting == TERM ) { 24599 my $number = scan_number(); 24600 24601 # maybe part of bareword token? unary is safest 24602 if ( !defined($number) ) { $type = 'm'; } 24603 24604 } 24605 elsif ( $expecting == OPERATOR ) { 24606 } 24607 else { 24608 24609 if ( $next_type eq 'w' ) { 24610 $type = 'm'; 24611 } 24612 } 24613 }, 24614 24615 '^' => sub { 24616 24617 # check for special variables like ${^WARNING_BITS} 24618 if ( $expecting == TERM ) { 24619 24620 # FIXME: this should work but will not catch errors 24621 # because we also have to be sure that previous token is 24622 # a type character ($,@,%). 24623 if ( $last_nonblank_token eq '{' 24624 && ( $next_tok =~ /^[A-Za-z_]/ ) ) 24625 { 24626 24627 if ( $next_tok eq 'W' ) { 24628 $tokenizer_self->{_saw_perl_dash_w} = 1; 24629 } 24630 $tok = $tok . $next_tok; 24631 $i = $i + 1; 24632 $type = 'w'; 24633 } 24634 24635 else { 24636 unless ( error_if_expecting_TERM() ) { 24637 24638 # Something like this is valid but strange: 24639 # undef ^I; 24640 complain("The '^' seems unusual here\n"); 24641 } 24642 } 24643 } 24644 }, 24645 24646 '::' => sub { # probably a sub call 24647 scan_bare_identifier(); 24648 }, 24649 '<<' => sub { # maybe a here-doc? 24650 return 24651 unless ( $i < $max_token_index ) 24652 ; # here-doc not possible if end of line 24653 24654 if ( $expecting != OPERATOR ) { 24655 my ( $found_target, $here_doc_target, $here_quote_character, 24656 $saw_error ); 24657 ( 24658 $found_target, $here_doc_target, $here_quote_character, $i, 24659 $saw_error 24660 ) 24661 = find_here_doc( $expecting, $i, $rtokens, $rtoken_map, 24662 $max_token_index ); 24663 24664 if ($found_target) { 24665 push @{$rhere_target_list}, 24666 [ $here_doc_target, $here_quote_character ]; 24667 $type = 'h'; 24668 if ( length($here_doc_target) > 80 ) { 24669 my $truncated = substr( $here_doc_target, 0, 80 ); 24670 complain("Long here-target: '$truncated' ...\n"); 24671 } 24672 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { 24673 complain( 24674 "Unconventional here-target: '$here_doc_target'\n" 24675 ); 24676 } 24677 } 24678 elsif ( $expecting == TERM ) { 24679 unless ($saw_error) { 24680 24681 # shouldn't happen.. 24682 warning("Program bug; didn't find here doc target\n"); 24683 report_definite_bug(); 24684 } 24685 } 24686 } 24687 else { 24688 } 24689 }, 24690 '->' => sub { 24691 24692 # if -> points to a bare word, we must scan for an identifier, 24693 # otherwise something like ->y would look like the y operator 24694 scan_identifier(); 24695 }, 24696 24697 # type = 'pp' for pre-increment, '++' for post-increment 24698 '++' => sub { 24699 if ( $expecting == TERM ) { $type = 'pp' } 24700 elsif ( $expecting == UNKNOWN ) { 24701 my ( $next_nonblank_token, $i_next ) = 24702 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 24703 if ( $next_nonblank_token eq '$' ) { $type = 'pp' } 24704 } 24705 }, 24706 24707 '=>' => sub { 24708 if ( $last_nonblank_type eq $tok ) { 24709 complain("Repeated '=>'s \n"); 24710 } 24711 24712 # patch for operator_expected: note if we are in the list (use.t) 24713 # TODO: make version numbers a new token type 24714 if ( $statement_type eq 'use' ) { $statement_type = '_use' } 24715 }, 24716 24717 # type = 'mm' for pre-decrement, '--' for post-decrement 24718 '--' => sub { 24719 24720 if ( $expecting == TERM ) { $type = 'mm' } 24721 elsif ( $expecting == UNKNOWN ) { 24722 my ( $next_nonblank_token, $i_next ) = 24723 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 24724 if ( $next_nonblank_token eq '$' ) { $type = 'mm' } 24725 } 24726 }, 24727 24728 '&&' => sub { 24729 error_if_expecting_TERM() 24730 if ( $expecting == TERM ); 24731 }, 24732 24733 '||' => sub { 24734 error_if_expecting_TERM() 24735 if ( $expecting == TERM ); 24736 }, 24737 24738 '//' => sub { 24739 error_if_expecting_TERM() 24740 if ( $expecting == TERM ); 24741 }, 24742 }; 24743 24744 # ------------------------------------------------------------ 24745 # end hash of code for handling individual token types 24746 # ------------------------------------------------------------ 24747 24748 my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); 24749 24750 # These block types terminate statements and do not need a trailing 24751 # semicolon 24752 # patched for SWITCH/CASE/ 24753 my %is_zero_continuation_block_type; 24754 @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; 24755 if elsif else unless while until for foreach switch case given when); 24756 @is_zero_continuation_block_type{@_} = (1) x scalar(@_); 24757 24758 my %is_not_zero_continuation_block_type; 24759 @_ = qw(sort grep map do eval); 24760 @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_); 24761 24762 my %is_logical_container; 24763 @_ = qw(if elsif unless while and or err not && ! || for foreach); 24764 @is_logical_container{@_} = (1) x scalar(@_); 24765 24766 my %is_binary_type; 24767 @_ = qw(|| &&); 24768 @is_binary_type{@_} = (1) x scalar(@_); 24769 24770 my %is_binary_keyword; 24771 @_ = qw(and or err eq ne cmp); 24772 @is_binary_keyword{@_} = (1) x scalar(@_); 24773 24774 # 'L' is token for opening { at hash key 24775 my %is_opening_type; 24776 @_ = qw" L { ( [ "; 24777 @is_opening_type{@_} = (1) x scalar(@_); 24778 24779 # 'R' is token for closing } at hash key 24780 my %is_closing_type; 24781 @_ = qw" R } ) ] "; 24782 @is_closing_type{@_} = (1) x scalar(@_); 24783 24784 my %is_redo_last_next_goto; 24785 @_ = qw(redo last next goto); 24786 @is_redo_last_next_goto{@_} = (1) x scalar(@_); 24787 24788 my %is_use_require; 24789 @_ = qw(use require); 24790 @is_use_require{@_} = (1) x scalar(@_); 24791 24792 my %is_sub_package; 24793 @_ = qw(sub package); 24794 @is_sub_package{@_} = (1) x scalar(@_); 24795 24796 # This hash holds the hash key in $tokenizer_self for these keywords: 24797 my %is_format_END_DATA = ( 24798 'format' => '_in_format', 24799 '__END__' => '_in_end', 24800 '__DATA__' => '_in_data', 24801 ); 24802 24803 # ref: camel 3 p 147, 24804 # but perl may accept undocumented flags 24805 # perl 5.10 adds 'p' (preserve) 24806 # Perl version 5.16, http://perldoc.perl.org/perlop.html, has these: 24807 # /PATTERN/msixpodualgc or m?PATTERN?msixpodualgc 24808 # s/PATTERN/REPLACEMENT/msixpodualgcer 24809 # y/SEARCHLIST/REPLACEMENTLIST/cdsr 24810 # tr/SEARCHLIST/REPLACEMENTLIST/cdsr 24811 # qr/STRING/msixpodual 24812 my %quote_modifiers = ( 24813 's' => '[msixpodualgcer]', 24814 'y' => '[cdsr]', 24815 'tr' => '[cdsr]', 24816 'm' => '[msixpodualgc]', 24817 'qr' => '[msixpodual]', 24818 'q' => "", 24819 'qq' => "", 24820 'qw' => "", 24821 'qx' => "", 24822 ); 24823 24824 # table showing how many quoted things to look for after quote operator.. 24825 # s, y, tr have 2 (pattern and replacement) 24826 # others have 1 (pattern only) 24827 my %quote_items = ( 24828 's' => 2, 24829 'y' => 2, 24830 'tr' => 2, 24831 'm' => 1, 24832 'qr' => 1, 24833 'q' => 1, 24834 'qq' => 1, 24835 'qw' => 1, 24836 'qx' => 1, 24837 ); 24838 24839 sub tokenize_this_line { 24840 24841 # This routine breaks a line of perl code into tokens which are of use in 24842 # indentation and reformatting. One of my goals has been to define tokens 24843 # such that a newline may be inserted between any pair of tokens without 24844 # changing or invalidating the program. This version comes close to this, 24845 # although there are necessarily a few exceptions which must be caught by 24846 # the formatter. Many of these involve the treatment of bare words. 24847 # 24848 # The tokens and their types are returned in arrays. See previous 24849 # routine for their names. 24850 # 24851 # See also the array "valid_token_types" in the BEGIN section for an 24852 # up-to-date list. 24853 # 24854 # To simplify things, token types are either a single character, or they 24855 # are identical to the tokens themselves. 24856 # 24857 # As a debugging aid, the -D flag creates a file containing a side-by-side 24858 # comparison of the input string and its tokenization for each line of a file. 24859 # This is an invaluable debugging aid. 24860 # 24861 # In addition to tokens, and some associated quantities, the tokenizer 24862 # also returns flags indication any special line types. These include 24863 # quotes, here_docs, formats. 24864 # 24865 # ----------------------------------------------------------------------- 24866 # 24867 # How to add NEW_TOKENS: 24868 # 24869 # New token types will undoubtedly be needed in the future both to keep up 24870 # with changes in perl and to help adapt the tokenizer to other applications. 24871 # 24872 # Here are some notes on the minimal steps. I wrote these notes while 24873 # adding the 'v' token type for v-strings, which are things like version 24874 # numbers 5.6.0, and ip addresses, and will use that as an example. ( You 24875 # can use your editor to search for the string "NEW_TOKENS" to find the 24876 # appropriate sections to change): 24877 # 24878 # *. Try to talk somebody else into doing it! If not, .. 24879 # 24880 # *. Make a backup of your current version in case things don't work out! 24881 # 24882 # *. Think of a new, unused character for the token type, and add to 24883 # the array @valid_token_types in the BEGIN section of this package. 24884 # For example, I used 'v' for v-strings. 24885 # 24886 # *. Implement coding to recognize the $type of the token in this routine. 24887 # This is the hardest part, and is best done by imitating or modifying 24888 # some of the existing coding. For example, to recognize v-strings, I 24889 # patched 'sub scan_bare_identifier' to recognize v-strings beginning with 24890 # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. 24891 # 24892 # *. Update sub operator_expected. This update is critically important but 24893 # the coding is trivial. Look at the comments in that routine for help. 24894 # For v-strings, which should behave like numbers, I just added 'v' to the 24895 # regex used to handle numbers and strings (types 'n' and 'Q'). 24896 # 24897 # *. Implement a 'bond strength' rule in sub set_bond_strengths in 24898 # Perl::Tidy::Formatter for breaking lines around this token type. You can 24899 # skip this step and take the default at first, then adjust later to get 24900 # desired results. For adding type 'v', I looked at sub bond_strength and 24901 # saw that number type 'n' was using default strengths, so I didn't do 24902 # anything. I may tune it up someday if I don't like the way line 24903 # breaks with v-strings look. 24904 # 24905 # *. Implement a 'whitespace' rule in sub set_white_space_flag in 24906 # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine 24907 # and saw that type 'n' used spaces on both sides, so I just added 'v' 24908 # to the array @spaces_both_sides. 24909 # 24910 # *. Update HtmlWriter package so that users can colorize the token as 24911 # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in 24912 # that package. For v-strings, I initially chose to use a default color 24913 # equal to the default for numbers, but it might be nice to change that 24914 # eventually. 24915 # 24916 # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. 24917 # 24918 # *. Run lots and lots of debug tests. Start with special files designed 24919 # to test the new token type. Run with the -D flag to create a .DEBUG 24920 # file which shows the tokenization. When these work ok, test as many old 24921 # scripts as possible. Start with all of the '.t' files in the 'test' 24922 # directory of the distribution file. Compare .tdy output with previous 24923 # version and updated version to see the differences. Then include as 24924 # many more files as possible. My own technique has been to collect a huge 24925 # number of perl scripts (thousands!) into one directory and run perltidy 24926 # *, then run diff between the output of the previous version and the 24927 # current version. 24928 # 24929 # *. For another example, search for the smartmatch operator '~~' 24930 # with your editor to see where updates were made for it. 24931 # 24932 # ----------------------------------------------------------------------- 24933 24934 my $line_of_tokens = shift; 24935 my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; 24936 24937 # patch while coding change is underway 24938 # make callers private data to allow access 24939 # $tokenizer_self = $caller_tokenizer_self; 24940 24941 # extract line number for use in error messages 24942 $input_line_number = $line_of_tokens->{_line_number}; 24943 24944 # reinitialize for multi-line quote 24945 $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q'; 24946 24947 # check for pod documentation 24948 if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) { 24949 24950 # must not be in multi-line quote 24951 # and must not be in an eqn 24952 if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) ) 24953 { 24954 $tokenizer_self->{_in_pod} = 1; 24955 return; 24956 } 24957 } 24958 24959 $input_line = $untrimmed_input_line; 24960 24961 chomp $input_line; 24962 24963 # trim start of this line unless we are continuing a quoted line 24964 # do not trim end because we might end in a quote (test: deken4.pl) 24965 # Perl::Tidy::Formatter will delete needless trailing blanks 24966 unless ( $in_quote && ( $quote_type eq 'Q' ) ) { 24967 $input_line =~ s/^\s*//; # trim left end 24968 } 24969 24970 # update the copy of the line for use in error messages 24971 # This must be exactly what we give the pre_tokenizer 24972 $tokenizer_self->{_line_text} = $input_line; 24973 24974 # re-initialize for the main loop 24975 $routput_token_list = []; # stack of output token indexes 24976 $routput_token_type = []; # token types 24977 $routput_block_type = []; # types of code block 24978 $routput_container_type = []; # paren types, such as if, elsif, .. 24979 $routput_type_sequence = []; # nesting sequential number 24980 24981 $rhere_target_list = []; 24982 24983 $tok = $last_nonblank_token; 24984 $type = $last_nonblank_type; 24985 $prototype = $last_nonblank_prototype; 24986 $last_nonblank_i = -1; 24987 $block_type = $last_nonblank_block_type; 24988 $container_type = $last_nonblank_container_type; 24989 $type_sequence = $last_nonblank_type_sequence; 24990 $indent_flag = 0; 24991 $peeked_ahead = 0; 24992 24993 # tokenization is done in two stages.. 24994 # stage 1 is a very simple pre-tokenization 24995 my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens 24996 24997 # a little optimization for a full-line comment 24998 if ( !$in_quote && ( $input_line =~ /^#/ ) ) { 24999 $max_tokens_wanted = 1 # no use tokenizing a comment 25000 } 25001 25002 # start by breaking the line into pre-tokens 25003 ( $rtokens, $rtoken_map, $rtoken_type ) = 25004 pre_tokenize( $input_line, $max_tokens_wanted ); 25005 25006 $max_token_index = scalar(@$rtokens) - 1; 25007 push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic 25008 push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced 25009 push( @$rtoken_type, 'b', 'b', 'b' ); 25010 25011 # initialize for main loop 25012 for $i ( 0 .. $max_token_index + 3 ) { 25013 $routput_token_type->[$i] = ""; 25014 $routput_block_type->[$i] = ""; 25015 $routput_container_type->[$i] = ""; 25016 $routput_type_sequence->[$i] = ""; 25017 $routput_indent_flag->[$i] = 0; 25018 } 25019 $i = -1; 25020 $i_tok = -1; 25021 25022 # ------------------------------------------------------------ 25023 # begin main tokenization loop 25024 # ------------------------------------------------------------ 25025 25026 # we are looking at each pre-token of one line and combining them 25027 # into tokens 25028 while ( ++$i <= $max_token_index ) { 25029 25030 if ($in_quote) { # continue looking for end of a quote 25031 $type = $quote_type; 25032 25033 unless ( @{$routput_token_list} ) 25034 { # initialize if continuation line 25035 push( @{$routput_token_list}, $i ); 25036 $routput_token_type->[$i] = $type; 25037 25038 } 25039 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ ); 25040 25041 # scan for the end of the quote or pattern 25042 ( 25043 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 25044 $quoted_string_1, $quoted_string_2 25045 ) 25046 = do_quote( 25047 $i, $in_quote, $quote_character, 25048 $quote_pos, $quote_depth, $quoted_string_1, 25049 $quoted_string_2, $rtokens, $rtoken_map, 25050 $max_token_index 25051 ); 25052 25053 # all done if we didn't find it 25054 last if ($in_quote); 25055 25056 # save pattern and replacement text for rescanning 25057 my $qs1 = $quoted_string_1; 25058 my $qs2 = $quoted_string_2; 25059 25060 # re-initialize for next search 25061 $quote_character = ''; 25062 $quote_pos = 0; 25063 $quote_type = 'Q'; 25064 $quoted_string_1 = ""; 25065 $quoted_string_2 = ""; 25066 last if ( ++$i > $max_token_index ); 25067 25068 # look for any modifiers 25069 if ($allowed_quote_modifiers) { 25070 25071 # check for exact quote modifiers 25072 if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) { 25073 my $str = $$rtokens[$i]; 25074 my $saw_modifier_e; 25075 while ( $str =~ /\G$allowed_quote_modifiers/gc ) { 25076 my $pos = pos($str); 25077 my $char = substr( $str, $pos - 1, 1 ); 25078 $saw_modifier_e ||= ( $char eq 'e' ); 25079 } 25080 25081 # For an 'e' quote modifier we must scan the replacement 25082 # text for here-doc targets. 25083 if ($saw_modifier_e) { 25084 25085 my $rht = scan_replacement_text($qs1); 25086 25087 # Change type from 'Q' to 'h' for quotes with 25088 # here-doc targets so that the formatter (see sub 25089 # print_line_of_tokens) will not make any line 25090 # breaks after this point. 25091 if ($rht) { 25092 push @{$rhere_target_list}, @{$rht}; 25093 $type = 'h'; 25094 if ( $i_tok < 0 ) { 25095 my $ilast = $routput_token_list->[-1]; 25096 $routput_token_type->[$ilast] = $type; 25097 } 25098 } 25099 } 25100 25101 if ( defined( pos($str) ) ) { 25102 25103 # matched 25104 if ( pos($str) == length($str) ) { 25105 last if ( ++$i > $max_token_index ); 25106 } 25107 25108 # Looks like a joined quote modifier 25109 # and keyword, maybe something like 25110 # s/xxx/yyy/gefor @k=... 25111 # Example is "galgen.pl". Would have to split 25112 # the word and insert a new token in the 25113 # pre-token list. This is so rare that I haven't 25114 # done it. Will just issue a warning citation. 25115 25116 # This error might also be triggered if my quote 25117 # modifier characters are incomplete 25118 else { 25119 warning(<<EOM); 25120 25121Partial match to quote modifier $allowed_quote_modifiers at word: '$str' 25122Please put a space between quote modifiers and trailing keywords. 25123EOM 25124 25125 # print "token $$rtokens[$i]\n"; 25126 # my $num = length($str) - pos($str); 25127 # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num); 25128 # print "continuing with new token $$rtokens[$i]\n"; 25129 25130 # skipping past this token does least damage 25131 last if ( ++$i > $max_token_index ); 25132 } 25133 } 25134 else { 25135 25136 # example file: rokicki4.pl 25137 # This error might also be triggered if my quote 25138 # modifier characters are incomplete 25139 write_logfile_entry( 25140"Note: found word $str at quote modifier location\n" 25141 ); 25142 } 25143 } 25144 25145 # re-initialize 25146 $allowed_quote_modifiers = ""; 25147 } 25148 } 25149 25150 unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) { 25151 25152 # try to catch some common errors 25153 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { 25154 25155 if ( $last_nonblank_token eq 'eq' ) { 25156 complain("Should 'eq' be '==' here ?\n"); 25157 } 25158 elsif ( $last_nonblank_token eq 'ne' ) { 25159 complain("Should 'ne' be '!=' here ?\n"); 25160 } 25161 } 25162 25163 $last_last_nonblank_token = $last_nonblank_token; 25164 $last_last_nonblank_type = $last_nonblank_type; 25165 $last_last_nonblank_block_type = $last_nonblank_block_type; 25166 $last_last_nonblank_container_type = 25167 $last_nonblank_container_type; 25168 $last_last_nonblank_type_sequence = 25169 $last_nonblank_type_sequence; 25170 $last_nonblank_token = $tok; 25171 $last_nonblank_type = $type; 25172 $last_nonblank_prototype = $prototype; 25173 $last_nonblank_block_type = $block_type; 25174 $last_nonblank_container_type = $container_type; 25175 $last_nonblank_type_sequence = $type_sequence; 25176 $last_nonblank_i = $i_tok; 25177 } 25178 25179 # store previous token type 25180 if ( $i_tok >= 0 ) { 25181 $routput_token_type->[$i_tok] = $type; 25182 $routput_block_type->[$i_tok] = $block_type; 25183 $routput_container_type->[$i_tok] = $container_type; 25184 $routput_type_sequence->[$i_tok] = $type_sequence; 25185 $routput_indent_flag->[$i_tok] = $indent_flag; 25186 } 25187 my $pre_tok = $$rtokens[$i]; # get the next pre-token 25188 my $pre_type = $$rtoken_type[$i]; # and type 25189 $tok = $pre_tok; 25190 $type = $pre_type; # to be modified as necessary 25191 $block_type = ""; # blank for all tokens except code block braces 25192 $container_type = ""; # blank for all tokens except some parens 25193 $type_sequence = ""; # blank for all tokens except ?/: 25194 $indent_flag = 0; 25195 $prototype = ""; # blank for all tokens except user defined subs 25196 $i_tok = $i; 25197 25198 # this pre-token will start an output token 25199 push( @{$routput_token_list}, $i_tok ); 25200 25201 # continue gathering identifier if necessary 25202 # but do not start on blanks and comments 25203 if ( $id_scan_state && $pre_type !~ /[b#]/ ) { 25204 25205 if ( $id_scan_state =~ /^(sub|package)/ ) { 25206 scan_id(); 25207 } 25208 else { 25209 scan_identifier(); 25210 } 25211 25212 last if ($id_scan_state); 25213 next if ( ( $i > 0 ) || $type ); 25214 25215 # didn't find any token; start over 25216 $type = $pre_type; 25217 $tok = $pre_tok; 25218 } 25219 25220 # handle whitespace tokens.. 25221 next if ( $type eq 'b' ); 25222 my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' '; 25223 my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b'; 25224 25225 # Build larger tokens where possible, since we are not in a quote. 25226 # 25227 # First try to assemble digraphs. The following tokens are 25228 # excluded and handled specially: 25229 # '/=' is excluded because the / might start a pattern. 25230 # 'x=' is excluded since it might be $x=, with $ on previous line 25231 # '**' and *= might be typeglobs of punctuation variables 25232 # I have allowed tokens starting with <, such as <=, 25233 # because I don't think these could be valid angle operators. 25234 # test file: storrs4.pl 25235 my $test_tok = $tok . $$rtokens[ $i + 1 ]; 25236 my $combine_ok = $is_digraph{$test_tok}; 25237 25238 # check for special cases which cannot be combined 25239 if ($combine_ok) { 25240 25241 # '//' must be defined_or operator if an operator is expected. 25242 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=) 25243 # could be migrated here for clarity 25244 if ( $test_tok eq '//' ) { 25245 my $next_type = $$rtokens[ $i + 1 ]; 25246 my $expecting = 25247 operator_expected( $prev_type, $tok, $next_type ); 25248 $combine_ok = 0 unless ( $expecting == OPERATOR ); 25249 } 25250 } 25251 25252 if ( 25253 $combine_ok 25254 && ( $test_tok ne '/=' ) # might be pattern 25255 && ( $test_tok ne 'x=' ) # might be $x 25256 && ( $test_tok ne '**' ) # typeglob? 25257 && ( $test_tok ne '*=' ) # typeglob? 25258 ) 25259 { 25260 $tok = $test_tok; 25261 $i++; 25262 25263 # Now try to assemble trigraphs. Note that all possible 25264 # perl trigraphs can be constructed by appending a character 25265 # to a digraph. 25266 $test_tok = $tok . $$rtokens[ $i + 1 ]; 25267 25268 if ( $is_trigraph{$test_tok} ) { 25269 $tok = $test_tok; 25270 $i++; 25271 } 25272 } 25273 25274 $type = $tok; 25275 $next_tok = $$rtokens[ $i + 1 ]; 25276 $next_type = $$rtoken_type[ $i + 1 ]; 25277 25278 TOKENIZER_DEBUG_FLAG_TOKENIZE && do { 25279 local $" = ')('; 25280 my @debug_list = ( 25281 $last_nonblank_token, $tok, 25282 $next_tok, $brace_depth, 25283 $brace_type[$brace_depth], $paren_depth, 25284 $paren_type[$paren_depth] 25285 ); 25286 print STDOUT "TOKENIZE:(@debug_list)\n"; 25287 }; 25288 25289 # turn off attribute list on first non-blank, non-bareword 25290 if ( $pre_type ne 'w' ) { $in_attribute_list = 0 } 25291 25292 ############################################################### 25293 # We have the next token, $tok. 25294 # Now we have to examine this token and decide what it is 25295 # and define its $type 25296 # 25297 # section 1: bare words 25298 ############################################################### 25299 25300 if ( $pre_type eq 'w' ) { 25301 $expecting = operator_expected( $prev_type, $tok, $next_type ); 25302 my ( $next_nonblank_token, $i_next ) = 25303 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 25304 25305 # ATTRS: handle sub and variable attributes 25306 if ($in_attribute_list) { 25307 25308 # treat bare word followed by open paren like qw( 25309 if ( $next_nonblank_token eq '(' ) { 25310 $in_quote = $quote_items{'q'}; 25311 $allowed_quote_modifiers = $quote_modifiers{'q'}; 25312 $type = 'q'; 25313 $quote_type = 'q'; 25314 next; 25315 } 25316 25317 # handle bareword not followed by open paren 25318 else { 25319 $type = 'w'; 25320 next; 25321 } 25322 } 25323 25324 # quote a word followed by => operator 25325 if ( $next_nonblank_token eq '=' ) { 25326 25327 if ( $$rtokens[ $i_next + 1 ] eq '>' ) { 25328 if ( $is_constant{$current_package}{$tok} ) { 25329 $type = 'C'; 25330 } 25331 elsif ( $is_user_function{$current_package}{$tok} ) { 25332 $type = 'U'; 25333 $prototype = 25334 $user_function_prototype{$current_package}{$tok}; 25335 } 25336 elsif ( $tok =~ /^v\d+$/ ) { 25337 $type = 'v'; 25338 report_v_string($tok); 25339 } 25340 else { $type = 'w' } 25341 25342 next; 25343 } 25344 } 25345 25346 # quote a bare word within braces..like xxx->{s}; note that we 25347 # must be sure this is not a structural brace, to avoid 25348 # mistaking {s} in the following for a quoted bare word: 25349 # for(@[){s}bla}BLA} 25350 # Also treat q in something like var{-q} as a bare word, not qoute operator 25351 if ( 25352 $next_nonblank_token eq '}' 25353 && ( 25354 $last_nonblank_type eq 'L' 25355 || ( $last_nonblank_type eq 'm' 25356 && $last_last_nonblank_type eq 'L' ) 25357 ) 25358 ) 25359 { 25360 $type = 'w'; 25361 next; 25362 } 25363 25364 # a bare word immediately followed by :: is not a keyword; 25365 # use $tok_kw when testing for keywords to avoid a mistake 25366 my $tok_kw = $tok; 25367 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' ) 25368 { 25369 $tok_kw .= '::'; 25370 } 25371 25372 # handle operator x (now we know it isn't $x=) 25373 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) { 25374 if ( $tok eq 'x' ) { 25375 25376 if ( $$rtokens[ $i + 1 ] eq '=' ) { # x= 25377 $tok = 'x='; 25378 $type = $tok; 25379 $i++; 25380 } 25381 else { 25382 $type = 'x'; 25383 } 25384 } 25385 25386 # FIXME: Patch: mark something like x4 as an integer for now 25387 # It gets fixed downstream. This is easier than 25388 # splitting the pretoken. 25389 else { 25390 $type = 'n'; 25391 } 25392 } 25393 elsif ( $tok_kw eq 'CORE::' ) { 25394 $type = $tok = $tok_kw; 25395 $i += 2; 25396 } 25397 elsif ( ( $tok eq 'strict' ) 25398 and ( $last_nonblank_token eq 'use' ) ) 25399 { 25400 $tokenizer_self->{_saw_use_strict} = 1; 25401 scan_bare_identifier(); 25402 } 25403 25404 elsif ( ( $tok eq 'warnings' ) 25405 and ( $last_nonblank_token eq 'use' ) ) 25406 { 25407 $tokenizer_self->{_saw_perl_dash_w} = 1; 25408 25409 # scan as identifier, so that we pick up something like: 25410 # use warnings::register 25411 scan_bare_identifier(); 25412 } 25413 25414 elsif ( 25415 $tok eq 'AutoLoader' 25416 && $tokenizer_self->{_look_for_autoloader} 25417 && ( 25418 $last_nonblank_token eq 'use' 25419 25420 # these regexes are from AutoSplit.pm, which we want 25421 # to mimic 25422 || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ 25423 || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ 25424 ) 25425 ) 25426 { 25427 write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); 25428 $tokenizer_self->{_saw_autoloader} = 1; 25429 $tokenizer_self->{_look_for_autoloader} = 0; 25430 scan_bare_identifier(); 25431 } 25432 25433 elsif ( 25434 $tok eq 'SelfLoader' 25435 && $tokenizer_self->{_look_for_selfloader} 25436 && ( $last_nonblank_token eq 'use' 25437 || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ 25438 || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) 25439 ) 25440 { 25441 write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); 25442 $tokenizer_self->{_saw_selfloader} = 1; 25443 $tokenizer_self->{_look_for_selfloader} = 0; 25444 scan_bare_identifier(); 25445 } 25446 25447 elsif ( ( $tok eq 'constant' ) 25448 and ( $last_nonblank_token eq 'use' ) ) 25449 { 25450 scan_bare_identifier(); 25451 my ( $next_nonblank_token, $i_next ) = 25452 find_next_nonblank_token( $i, $rtokens, 25453 $max_token_index ); 25454 25455 if ($next_nonblank_token) { 25456 25457 if ( $is_keyword{$next_nonblank_token} ) { 25458 25459 # Assume qw is used as a quote and okay, as in: 25460 # use constant qw{ DEBUG 0 }; 25461 # Not worth trying to parse for just a warning 25462 25463 # NOTE: This warning is deactivated because recent 25464 # versions of perl do not complain here, but 25465 # the coding is retained for reference. 25466 if ( 0 && $next_nonblank_token ne 'qw' ) { 25467 warning( 25468"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" 25469 ); 25470 } 25471 } 25472 25473 # FIXME: could check for error in which next token is 25474 # not a word (number, punctuation, ..) 25475 else { 25476 $is_constant{$current_package}{$next_nonblank_token} 25477 = 1; 25478 } 25479 } 25480 } 25481 25482 # various quote operators 25483 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { 25484 if ( $expecting == OPERATOR ) { 25485 25486 # patch for paren-less for/foreach glitch, part 1 25487 # perl will accept this construct as valid: 25488 # 25489 # foreach my $key qw\Uno Due Tres Quadro\ { 25490 # print "Set $key\n"; 25491 # } 25492 unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} ) 25493 { 25494 error_if_expecting_OPERATOR(); 25495 } 25496 } 25497 $in_quote = $quote_items{$tok}; 25498 $allowed_quote_modifiers = $quote_modifiers{$tok}; 25499 25500 # All quote types are 'Q' except possibly qw quotes. 25501 # qw quotes are special in that they may generally be trimmed 25502 # of leading and trailing whitespace. So they are given a 25503 # separate type, 'q', unless requested otherwise. 25504 $type = 25505 ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} ) 25506 ? 'q' 25507 : 'Q'; 25508 $quote_type = $type; 25509 } 25510 25511 # check for a statement label 25512 elsif ( 25513 ( $next_nonblank_token eq ':' ) 25514 && ( $$rtokens[ $i_next + 1 ] ne ':' ) 25515 && ( $i_next <= $max_token_index ) # colon on same line 25516 && label_ok() 25517 ) 25518 { 25519 if ( $tok !~ /[A-Z]/ ) { 25520 push @{ $tokenizer_self->{_rlower_case_labels_at} }, 25521 $input_line_number; 25522 } 25523 $type = 'J'; 25524 $tok .= ':'; 25525 $i = $i_next; 25526 next; 25527 } 25528 25529 # 'sub' || 'package' 25530 elsif ( $is_sub_package{$tok_kw} ) { 25531 error_if_expecting_OPERATOR() 25532 if ( $expecting == OPERATOR ); 25533 scan_id(); 25534 } 25535 25536 # Note on token types for format, __DATA__, __END__: 25537 # It simplifies things to give these type ';', so that when we 25538 # start rescanning we will be expecting a token of type TERM. 25539 # We will switch to type 'k' before outputting the tokens. 25540 elsif ( $is_format_END_DATA{$tok_kw} ) { 25541 $type = ';'; # make tokenizer look for TERM next 25542 $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1; 25543 last; 25544 } 25545 25546 elsif ( $is_keyword{$tok_kw} ) { 25547 $type = 'k'; 25548 25549 # Since for and foreach may not be followed immediately 25550 # by an opening paren, we have to remember which keyword 25551 # is associated with the next '(' 25552 if ( $is_for_foreach{$tok} ) { 25553 if ( new_statement_ok() ) { 25554 $want_paren = $tok; 25555 } 25556 } 25557 25558 # recognize 'use' statements, which are special 25559 elsif ( $is_use_require{$tok} ) { 25560 $statement_type = $tok; 25561 error_if_expecting_OPERATOR() 25562 if ( $expecting == OPERATOR ); 25563 } 25564 25565 # remember my and our to check for trailing ": shared" 25566 elsif ( $is_my_our{$tok} ) { 25567 $statement_type = $tok; 25568 } 25569 25570 # Check for misplaced 'elsif' and 'else', but allow isolated 25571 # else or elsif blocks to be formatted. This is indicated 25572 # by a last noblank token of ';' 25573 elsif ( $tok eq 'elsif' ) { 25574 if ( $last_nonblank_token ne ';' 25575 && $last_nonblank_block_type !~ 25576 /^(if|elsif|unless)$/ ) 25577 { 25578 warning( 25579"expecting '$tok' to follow one of 'if|elsif|unless'\n" 25580 ); 25581 } 25582 } 25583 elsif ( $tok eq 'else' ) { 25584 25585 # patched for SWITCH/CASE 25586 if ( $last_nonblank_token ne ';' 25587 && $last_nonblank_block_type !~ 25588 /^(if|elsif|unless|case|when)$/ ) 25589 { 25590 warning( 25591"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" 25592 ); 25593 } 25594 } 25595 elsif ( $tok eq 'continue' ) { 25596 if ( $last_nonblank_token ne ';' 25597 && $last_nonblank_block_type !~ 25598 /(^(\{|\}|;|while|until|for|foreach)|:$)/ ) 25599 { 25600 25601 # note: ';' '{' and '}' in list above 25602 # because continues can follow bare blocks; 25603 # ':' is labeled block 25604 # 25605 ############################################ 25606 # NOTE: This check has been deactivated because 25607 # continue has an alternative usage for given/when 25608 # blocks in perl 5.10 25609 ## warning("'$tok' should follow a block\n"); 25610 ############################################ 25611 } 25612 } 25613 25614 # patch for SWITCH/CASE if 'case' and 'when are 25615 # treated as keywords. 25616 elsif ( $tok eq 'when' || $tok eq 'case' ) { 25617 $statement_type = $tok; # next '{' is block 25618 } 25619 25620 # 25621 # indent trailing if/unless/while/until 25622 # outdenting will be handled by later indentation loop 25623## DEACTIVATED: unfortunately this can cause some unwanted indentation like: 25624##$opt_o = 1 25625## if !( 25626## $opt_b 25627## || $opt_c 25628## || $opt_d 25629## || $opt_f 25630## || $opt_i 25631## || $opt_l 25632## || $opt_o 25633## || $opt_x 25634## ); 25635## if ( $tok =~ /^(if|unless|while|until)$/ 25636## && $next_nonblank_token ne '(' ) 25637## { 25638## $indent_flag = 1; 25639## } 25640 } 25641 25642 # check for inline label following 25643 # /^(redo|last|next|goto)$/ 25644 elsif (( $last_nonblank_type eq 'k' ) 25645 && ( $is_redo_last_next_goto{$last_nonblank_token} ) ) 25646 { 25647 $type = 'j'; 25648 next; 25649 } 25650 25651 # something else -- 25652 else { 25653 25654 scan_bare_identifier(); 25655 if ( $type eq 'w' ) { 25656 25657 if ( $expecting == OPERATOR ) { 25658 25659 # don't complain about possible indirect object 25660 # notation. 25661 # For example: 25662 # package main; 25663 # sub new($) { ... } 25664 # $b = new A::; # calls A::new 25665 # $c = new A; # same thing but suspicious 25666 # This will call A::new but we have a 'new' in 25667 # main:: which looks like a constant. 25668 # 25669 if ( $last_nonblank_type eq 'C' ) { 25670 if ( $tok !~ /::$/ ) { 25671 complain(<<EOM); 25672Expecting operator after '$last_nonblank_token' but found bare word '$tok' 25673 Maybe indirectet object notation? 25674EOM 25675 } 25676 } 25677 else { 25678 error_if_expecting_OPERATOR("bareword"); 25679 } 25680 } 25681 25682 # mark bare words immediately followed by a paren as 25683 # functions 25684 $next_tok = $$rtokens[ $i + 1 ]; 25685 if ( $next_tok eq '(' ) { 25686 $type = 'U'; 25687 } 25688 25689 # underscore after file test operator is file handle 25690 if ( $tok eq '_' && $last_nonblank_type eq 'F' ) { 25691 $type = 'Z'; 25692 } 25693 25694 # patch for SWITCH/CASE if 'case' and 'when are 25695 # not treated as keywords: 25696 if ( 25697 ( 25698 $tok eq 'case' 25699 && $brace_type[$brace_depth] eq 'switch' 25700 ) 25701 || ( $tok eq 'when' 25702 && $brace_type[$brace_depth] eq 'given' ) 25703 ) 25704 { 25705 $statement_type = $tok; # next '{' is block 25706 $type = 'k'; # for keyword syntax coloring 25707 } 25708 25709 # patch for SWITCH/CASE if switch and given not keywords 25710 # Switch is not a perl 5 keyword, but we will gamble 25711 # and mark switch followed by paren as a keyword. This 25712 # is only necessary to get html syntax coloring nice, 25713 # and does not commit this as being a switch/case. 25714 if ( $next_nonblank_token eq '(' 25715 && ( $tok eq 'switch' || $tok eq 'given' ) ) 25716 { 25717 $type = 'k'; # for keyword syntax coloring 25718 } 25719 } 25720 } 25721 } 25722 25723 ############################################################### 25724 # section 2: strings of digits 25725 ############################################################### 25726 elsif ( $pre_type eq 'd' ) { 25727 $expecting = operator_expected( $prev_type, $tok, $next_type ); 25728 error_if_expecting_OPERATOR("Number") 25729 if ( $expecting == OPERATOR ); 25730 my $number = scan_number(); 25731 if ( !defined($number) ) { 25732 25733 # shouldn't happen - we should always get a number 25734 warning("non-number beginning with digit--program bug\n"); 25735 report_definite_bug(); 25736 } 25737 } 25738 25739 ############################################################### 25740 # section 3: all other tokens 25741 ############################################################### 25742 25743 else { 25744 last if ( $tok eq '#' ); 25745 my $code = $tokenization_code->{$tok}; 25746 if ($code) { 25747 $expecting = 25748 operator_expected( $prev_type, $tok, $next_type ); 25749 $code->(); 25750 redo if $in_quote; 25751 } 25752 } 25753 } 25754 25755 # ----------------------------- 25756 # end of main tokenization loop 25757 # ----------------------------- 25758 25759 if ( $i_tok >= 0 ) { 25760 $routput_token_type->[$i_tok] = $type; 25761 $routput_block_type->[$i_tok] = $block_type; 25762 $routput_container_type->[$i_tok] = $container_type; 25763 $routput_type_sequence->[$i_tok] = $type_sequence; 25764 $routput_indent_flag->[$i_tok] = $indent_flag; 25765 } 25766 25767 unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) { 25768 $last_last_nonblank_token = $last_nonblank_token; 25769 $last_last_nonblank_type = $last_nonblank_type; 25770 $last_last_nonblank_block_type = $last_nonblank_block_type; 25771 $last_last_nonblank_container_type = $last_nonblank_container_type; 25772 $last_last_nonblank_type_sequence = $last_nonblank_type_sequence; 25773 $last_nonblank_token = $tok; 25774 $last_nonblank_type = $type; 25775 $last_nonblank_block_type = $block_type; 25776 $last_nonblank_container_type = $container_type; 25777 $last_nonblank_type_sequence = $type_sequence; 25778 $last_nonblank_prototype = $prototype; 25779 } 25780 25781 # reset indentation level if necessary at a sub or package 25782 # in an attempt to recover from a nesting error 25783 if ( $level_in_tokenizer < 0 ) { 25784 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { 25785 reset_indentation_level(0); 25786 brace_warning("resetting level to 0 at $1 $2\n"); 25787 } 25788 } 25789 25790 # all done tokenizing this line ... 25791 # now prepare the final list of tokens and types 25792 25793 my @token_type = (); # stack of output token types 25794 my @block_type = (); # stack of output code block types 25795 my @container_type = (); # stack of output code container types 25796 my @type_sequence = (); # stack of output type sequence numbers 25797 my @tokens = (); # output tokens 25798 my @levels = (); # structural brace levels of output tokens 25799 my @slevels = (); # secondary nesting levels of output tokens 25800 my @nesting_tokens = (); # string of tokens leading to this depth 25801 my @nesting_types = (); # string of token types leading to this depth 25802 my @nesting_blocks = (); # string of block types leading to this depth 25803 my @nesting_lists = (); # string of list types leading to this depth 25804 my @ci_string = (); # string needed to compute continuation indentation 25805 my @container_environment = (); # BLOCK or LIST 25806 my $container_environment = ''; 25807 my $im = -1; # previous $i value 25808 my $num; 25809 my $ci_string_sum = ones_count($ci_string_in_tokenizer); 25810 25811# Computing Token Indentation 25812# 25813# The final section of the tokenizer forms tokens and also computes 25814# parameters needed to find indentation. It is much easier to do it 25815# in the tokenizer than elsewhere. Here is a brief description of how 25816# indentation is computed. Perl::Tidy computes indentation as the sum 25817# of 2 terms: 25818# 25819# (1) structural indentation, such as if/else/elsif blocks 25820# (2) continuation indentation, such as long parameter call lists. 25821# 25822# These are occasionally called primary and secondary indentation. 25823# 25824# Structural indentation is introduced by tokens of type '{', although 25825# the actual tokens might be '{', '(', or '['. Structural indentation 25826# is of two types: BLOCK and non-BLOCK. Default structural indentation 25827# is 4 characters if the standard indentation scheme is used. 25828# 25829# Continuation indentation is introduced whenever a line at BLOCK level 25830# is broken before its termination. Default continuation indentation 25831# is 2 characters in the standard indentation scheme. 25832# 25833# Both types of indentation may be nested arbitrarily deep and 25834# interlaced. The distinction between the two is somewhat arbitrary. 25835# 25836# For each token, we will define two variables which would apply if 25837# the current statement were broken just before that token, so that 25838# that token started a new line: 25839# 25840# $level = the structural indentation level, 25841# $ci_level = the continuation indentation level 25842# 25843# The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces), 25844# assuming defaults. However, in some special cases it is customary 25845# to modify $ci_level from this strict value. 25846# 25847# The total structural indentation is easy to compute by adding and 25848# subtracting 1 from a saved value as types '{' and '}' are seen. The 25849# running value of this variable is $level_in_tokenizer. 25850# 25851# The total continuation is much more difficult to compute, and requires 25852# several variables. These veriables are: 25853# 25854# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for 25855# each indentation level, if there are intervening open secondary 25856# structures just prior to that level. 25857# $continuation_string_in_tokenizer = a string of 1's and 0's indicating 25858# if the last token at that level is "continued", meaning that it 25859# is not the first token of an expression. 25860# $nesting_block_string = a string of 1's and 0's indicating, for each 25861# indentation level, if the level is of type BLOCK or not. 25862# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string 25863# $nesting_list_string = a string of 1's and 0's indicating, for each 25864# indentation level, if it is is appropriate for list formatting. 25865# If so, continuation indentation is used to indent long list items. 25866# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string 25867# @{$rslevel_stack} = a stack of total nesting depths at each 25868# structural indentation level, where "total nesting depth" means 25869# the nesting depth that would occur if every nesting token -- '{', '[', 25870# and '(' -- , regardless of context, is used to compute a nesting 25871# depth. 25872 25873 #my $nesting_block_flag = ($nesting_block_string =~ /1$/); 25874 #my $nesting_list_flag = ($nesting_list_string =~ /1$/); 25875 25876 my ( $ci_string_i, $level_i, $nesting_block_string_i, 25877 $nesting_list_string_i, $nesting_token_string_i, 25878 $nesting_type_string_i, ); 25879 25880 foreach $i ( @{$routput_token_list} ) 25881 { # scan the list of pre-tokens indexes 25882 25883 # self-checking for valid token types 25884 my $type = $routput_token_type->[$i]; 25885 my $forced_indentation_flag = $routput_indent_flag->[$i]; 25886 25887 # See if we should undo the $forced_indentation_flag. 25888 # Forced indentation after 'if', 'unless', 'while' and 'until' 25889 # expressions without trailing parens is optional and doesn't 25890 # always look good. It is usually okay for a trailing logical 25891 # expression, but if the expression is a function call, code block, 25892 # or some kind of list it puts in an unwanted extra indentation 25893 # level which is hard to remove. 25894 # 25895 # Example where extra indentation looks ok: 25896 # return 1 25897 # if $det_a < 0 and $det_b > 0 25898 # or $det_a > 0 and $det_b < 0; 25899 # 25900 # Example where extra indentation is not needed because 25901 # the eval brace also provides indentation: 25902 # print "not " if defined eval { 25903 # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4; 25904 # }; 25905 # 25906 # The following rule works fairly well: 25907 # Undo the flag if the end of this line, or start of the next 25908 # line, is an opening container token or a comma. 25909 # This almost always works, but if not after another pass it will 25910 # be stable. 25911 if ( $forced_indentation_flag && $type eq 'k' ) { 25912 my $ixlast = -1; 25913 my $ilast = $routput_token_list->[$ixlast]; 25914 my $toklast = $routput_token_type->[$ilast]; 25915 if ( $toklast eq '#' ) { 25916 $ixlast--; 25917 $ilast = $routput_token_list->[$ixlast]; 25918 $toklast = $routput_token_type->[$ilast]; 25919 } 25920 if ( $toklast eq 'b' ) { 25921 $ixlast--; 25922 $ilast = $routput_token_list->[$ixlast]; 25923 $toklast = $routput_token_type->[$ilast]; 25924 } 25925 if ( $toklast =~ /^[\{,]$/ ) { 25926 $forced_indentation_flag = 0; 25927 } 25928 else { 25929 ( $toklast, my $i_next ) = 25930 find_next_nonblank_token( $max_token_index, $rtokens, 25931 $max_token_index ); 25932 if ( $toklast =~ /^[\{,]$/ ) { 25933 $forced_indentation_flag = 0; 25934 } 25935 } 25936 } 25937 25938 # if we are already in an indented if, see if we should outdent 25939 if ($indented_if_level) { 25940 25941 # don't try to nest trailing if's - shouldn't happen 25942 if ( $type eq 'k' ) { 25943 $forced_indentation_flag = 0; 25944 } 25945 25946 # check for the normal case - outdenting at next ';' 25947 elsif ( $type eq ';' ) { 25948 if ( $level_in_tokenizer == $indented_if_level ) { 25949 $forced_indentation_flag = -1; 25950 $indented_if_level = 0; 25951 } 25952 } 25953 25954 # handle case of missing semicolon 25955 elsif ( $type eq '}' ) { 25956 if ( $level_in_tokenizer == $indented_if_level ) { 25957 $indented_if_level = 0; 25958 25959 # TBD: This could be a subroutine call 25960 $level_in_tokenizer--; 25961 if ( @{$rslevel_stack} > 1 ) { 25962 pop( @{$rslevel_stack} ); 25963 } 25964 if ( length($nesting_block_string) > 1 ) 25965 { # true for valid script 25966 chop $nesting_block_string; 25967 chop $nesting_list_string; 25968 } 25969 25970 } 25971 } 25972 } 25973 25974 my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken 25975 $level_i = $level_in_tokenizer; 25976 25977 # This can happen by running perltidy on non-scripts 25978 # although it could also be bug introduced by programming change. 25979 # Perl silently accepts a 032 (^Z) and takes it as the end 25980 if ( !$is_valid_token_type{$type} ) { 25981 my $val = ord($type); 25982 warning( 25983 "unexpected character decimal $val ($type) in script\n"); 25984 $tokenizer_self->{_in_error} = 1; 25985 } 25986 25987 # ---------------------------------------------------------------- 25988 # TOKEN TYPE PATCHES 25989 # output __END__, __DATA__, and format as type 'k' instead of ';' 25990 # to make html colors correct, etc. 25991 my $fix_type = $type; 25992 if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' } 25993 25994 # output anonymous 'sub' as keyword 25995 if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' } 25996 25997 # ----------------------------------------------------------------- 25998 25999 $nesting_token_string_i = $nesting_token_string; 26000 $nesting_type_string_i = $nesting_type_string; 26001 $nesting_block_string_i = $nesting_block_string; 26002 $nesting_list_string_i = $nesting_list_string; 26003 26004 # set primary indentation levels based on structural braces 26005 # Note: these are set so that the leading braces have a HIGHER 26006 # level than their CONTENTS, which is convenient for indentation 26007 # Also, define continuation indentation for each token. 26008 if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 ) 26009 { 26010 26011 # use environment before updating 26012 $container_environment = 26013 $nesting_block_flag ? 'BLOCK' 26014 : $nesting_list_flag ? 'LIST' 26015 : ""; 26016 26017 # if the difference between total nesting levels is not 1, 26018 # there are intervening non-structural nesting types between 26019 # this '{' and the previous unclosed '{' 26020 my $intervening_secondary_structure = 0; 26021 if ( @{$rslevel_stack} ) { 26022 $intervening_secondary_structure = 26023 $slevel_in_tokenizer - $rslevel_stack->[-1]; 26024 } 26025 26026 # Continuation Indentation 26027 # 26028 # Having tried setting continuation indentation both in the formatter and 26029 # in the tokenizer, I can say that setting it in the tokenizer is much, 26030 # much easier. The formatter already has too much to do, and can't 26031 # make decisions on line breaks without knowing what 'ci' will be at 26032 # arbitrary locations. 26033 # 26034 # But a problem with setting the continuation indentation (ci) here 26035 # in the tokenizer is that we do not know where line breaks will actually 26036 # be. As a result, we don't know if we should propagate continuation 26037 # indentation to higher levels of structure. 26038 # 26039 # For nesting of only structural indentation, we never need to do this. 26040 # For example, in a long if statement, like this 26041 # 26042 # if ( !$output_block_type[$i] 26043 # && ($in_statement_continuation) ) 26044 # { <--outdented 26045 # do_something(); 26046 # } 26047 # 26048 # the second line has ci but we do normally give the lines within the BLOCK 26049 # any ci. This would be true if we had blocks nested arbitrarily deeply. 26050 # 26051 # But consider something like this, where we have created a break after 26052 # an opening paren on line 1, and the paren is not (currently) a 26053 # structural indentation token: 26054 # 26055 # my $file = $menubar->Menubutton( 26056 # qw/-text File -underline 0 -menuitems/ => [ 26057 # [ 26058 # Cascade => '~View', 26059 # -menuitems => [ 26060 # ... 26061 # 26062 # The second line has ci, so it would seem reasonable to propagate it 26063 # down, giving the third line 1 ci + 1 indentation. This suggests the 26064 # following rule, which is currently used to propagating ci down: if there 26065 # are any non-structural opening parens (or brackets, or braces), before 26066 # an opening structural brace, then ci is propagated down, and otherwise 26067 # not. The variable $intervening_secondary_structure contains this 26068 # information for the current token, and the string 26069 # "$ci_string_in_tokenizer" is a stack of previous values of this 26070 # variable. 26071 26072 # save the current states 26073 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer ); 26074 $level_in_tokenizer++; 26075 26076 if ($forced_indentation_flag) { 26077 26078 # break BEFORE '?' when there is forced indentation 26079 if ( $type eq '?' ) { $level_i = $level_in_tokenizer; } 26080 if ( $type eq 'k' ) { 26081 $indented_if_level = $level_in_tokenizer; 26082 } 26083 26084 # do not change container environement here if we are not 26085 # at a real list. Adding this check prevents "blinkers" 26086 # often near 'unless" clauses, such as in the following 26087 # code: 26088## next 26089## unless -e ( 26090## $archive = 26091## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" ) 26092## ); 26093 26094 $nesting_block_string .= "$nesting_block_flag"; 26095 } 26096 else { 26097 26098 if ( $routput_block_type->[$i] ) { 26099 $nesting_block_flag = 1; 26100 $nesting_block_string .= '1'; 26101 } 26102 else { 26103 $nesting_block_flag = 0; 26104 $nesting_block_string .= '0'; 26105 } 26106 } 26107 26108 # we will use continuation indentation within containers 26109 # which are not blocks and not logical expressions 26110 my $bit = 0; 26111 if ( !$routput_block_type->[$i] ) { 26112 26113 # propagate flag down at nested open parens 26114 if ( $routput_container_type->[$i] eq '(' ) { 26115 $bit = 1 if $nesting_list_flag; 26116 } 26117 26118 # use list continuation if not a logical grouping 26119 # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/ 26120 else { 26121 $bit = 1 26122 unless 26123 $is_logical_container{ $routput_container_type->[$i] 26124 }; 26125 } 26126 } 26127 $nesting_list_string .= $bit; 26128 $nesting_list_flag = $bit; 26129 26130 $ci_string_in_tokenizer .= 26131 ( $intervening_secondary_structure != 0 ) ? '1' : '0'; 26132 $ci_string_sum = ones_count($ci_string_in_tokenizer); 26133 $continuation_string_in_tokenizer .= 26134 ( $in_statement_continuation > 0 ) ? '1' : '0'; 26135 26136 # Sometimes we want to give an opening brace continuation indentation, 26137 # and sometimes not. For code blocks, we don't do it, so that the leading 26138 # '{' gets outdented, like this: 26139 # 26140 # if ( !$output_block_type[$i] 26141 # && ($in_statement_continuation) ) 26142 # { <--outdented 26143 # 26144 # For other types, we will give them continuation indentation. For example, 26145 # here is how a list looks with the opening paren indented: 26146 # 26147 # @LoL = 26148 # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], 26149 # [ "homer", "marge", "bart" ], ); 26150 # 26151 # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4) 26152 26153 my $total_ci = $ci_string_sum; 26154 if ( 26155 !$routput_block_type->[$i] # patch: skip for BLOCK 26156 && ($in_statement_continuation) 26157 && !( $forced_indentation_flag && $type eq ':' ) 26158 ) 26159 { 26160 $total_ci += $in_statement_continuation 26161 unless ( $ci_string_in_tokenizer =~ /1$/ ); 26162 } 26163 26164 $ci_string_i = $total_ci; 26165 $in_statement_continuation = 0; 26166 } 26167 26168 elsif ($type eq '}' 26169 || $type eq 'R' 26170 || $forced_indentation_flag < 0 ) 26171 { 26172 26173 # only a nesting error in the script would prevent popping here 26174 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); } 26175 26176 $level_i = --$level_in_tokenizer; 26177 26178 # restore previous level values 26179 if ( length($nesting_block_string) > 1 ) 26180 { # true for valid script 26181 chop $nesting_block_string; 26182 $nesting_block_flag = ( $nesting_block_string =~ /1$/ ); 26183 chop $nesting_list_string; 26184 $nesting_list_flag = ( $nesting_list_string =~ /1$/ ); 26185 26186 chop $ci_string_in_tokenizer; 26187 $ci_string_sum = ones_count($ci_string_in_tokenizer); 26188 26189 $in_statement_continuation = 26190 chop $continuation_string_in_tokenizer; 26191 26192 # zero continuation flag at terminal BLOCK '}' which 26193 # ends a statement. 26194 if ( $routput_block_type->[$i] ) { 26195 26196 # ...These include non-anonymous subs 26197 # note: could be sub ::abc { or sub 'abc 26198 if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) { 26199 26200 # note: older versions of perl require the /gc modifier 26201 # here or else the \G does not work. 26202 if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc ) 26203 { 26204 $in_statement_continuation = 0; 26205 } 26206 } 26207 26208# ...and include all block types except user subs with 26209# block prototypes and these: (sort|grep|map|do|eval) 26210# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/ 26211 elsif ( 26212 $is_zero_continuation_block_type{ 26213 $routput_block_type->[$i] 26214 } ) 26215 { 26216 $in_statement_continuation = 0; 26217 } 26218 26219 # ..but these are not terminal types: 26220 # /^(sort|grep|map|do|eval)$/ ) 26221 elsif ( 26222 $is_not_zero_continuation_block_type{ 26223 $routput_block_type->[$i] 26224 } ) 26225 { 26226 } 26227 26228 # ..and a block introduced by a label 26229 # /^\w+\s*:$/gc ) { 26230 elsif ( $routput_block_type->[$i] =~ /:$/ ) { 26231 $in_statement_continuation = 0; 26232 } 26233 26234 # user function with block prototype 26235 else { 26236 $in_statement_continuation = 0; 26237 } 26238 } 26239 26240 # If we are in a list, then 26241 # we must set continuatoin indentation at the closing 26242 # paren of something like this (paren after $check): 26243 # assert( 26244 # __LINE__, 26245 # ( not defined $check ) 26246 # or ref $check 26247 # or $check eq "new" 26248 # or $check eq "old", 26249 # ); 26250 elsif ( $tok eq ')' ) { 26251 $in_statement_continuation = 1 26252 if $routput_container_type->[$i] =~ /^[;,\{\}]$/; 26253 } 26254 26255 elsif ( $tok eq ';' ) { $in_statement_continuation = 0 } 26256 } 26257 26258 # use environment after updating 26259 $container_environment = 26260 $nesting_block_flag ? 'BLOCK' 26261 : $nesting_list_flag ? 'LIST' 26262 : ""; 26263 $ci_string_i = $ci_string_sum + $in_statement_continuation; 26264 $nesting_block_string_i = $nesting_block_string; 26265 $nesting_list_string_i = $nesting_list_string; 26266 } 26267 26268 # not a structural indentation type.. 26269 else { 26270 26271 $container_environment = 26272 $nesting_block_flag ? 'BLOCK' 26273 : $nesting_list_flag ? 'LIST' 26274 : ""; 26275 26276 # zero the continuation indentation at certain tokens so 26277 # that they will be at the same level as its container. For 26278 # commas, this simplifies the -lp indentation logic, which 26279 # counts commas. For ?: it makes them stand out. 26280 if ($nesting_list_flag) { 26281 if ( $type =~ /^[,\?\:]$/ ) { 26282 $in_statement_continuation = 0; 26283 } 26284 } 26285 26286 # be sure binary operators get continuation indentation 26287 if ( 26288 $container_environment 26289 && ( $type eq 'k' && $is_binary_keyword{$tok} 26290 || $is_binary_type{$type} ) 26291 ) 26292 { 26293 $in_statement_continuation = 1; 26294 } 26295 26296 # continuation indentation is sum of any open ci from previous 26297 # levels plus the current level 26298 $ci_string_i = $ci_string_sum + $in_statement_continuation; 26299 26300 # update continuation flag ... 26301 # if this isn't a blank or comment.. 26302 if ( $type ne 'b' && $type ne '#' ) { 26303 26304 # and we are in a BLOCK 26305 if ($nesting_block_flag) { 26306 26307 # the next token after a ';' and label starts a new stmt 26308 if ( $type eq ';' || $type eq 'J' ) { 26309 $in_statement_continuation = 0; 26310 } 26311 26312 # otherwise, we are continuing the current statement 26313 else { 26314 $in_statement_continuation = 1; 26315 } 26316 } 26317 26318 # if we are not in a BLOCK.. 26319 else { 26320 26321 # do not use continuation indentation if not list 26322 # environment (could be within if/elsif clause) 26323 if ( !$nesting_list_flag ) { 26324 $in_statement_continuation = 0; 26325 } 26326 26327 # otherwise, the next token after a ',' starts a new term 26328 elsif ( $type eq ',' ) { 26329 $in_statement_continuation = 0; 26330 } 26331 26332 # otherwise, we are continuing the current term 26333 else { 26334 $in_statement_continuation = 1; 26335 } 26336 } 26337 } 26338 } 26339 26340 if ( $level_in_tokenizer < 0 ) { 26341 unless ( $tokenizer_self->{_saw_negative_indentation} ) { 26342 $tokenizer_self->{_saw_negative_indentation} = 1; 26343 warning("Starting negative indentation\n"); 26344 } 26345 } 26346 26347 # set secondary nesting levels based on all continment token types 26348 # Note: these are set so that the nesting depth is the depth 26349 # of the PREVIOUS TOKEN, which is convenient for setting 26350 # the stength of token bonds 26351 my $slevel_i = $slevel_in_tokenizer; 26352 26353 # /^[L\{\(\[]$/ 26354 if ( $is_opening_type{$type} ) { 26355 $slevel_in_tokenizer++; 26356 $nesting_token_string .= $tok; 26357 $nesting_type_string .= $type; 26358 } 26359 26360 # /^[R\}\)\]]$/ 26361 elsif ( $is_closing_type{$type} ) { 26362 $slevel_in_tokenizer--; 26363 my $char = chop $nesting_token_string; 26364 26365 if ( $char ne $matching_start_token{$tok} ) { 26366 $nesting_token_string .= $char . $tok; 26367 $nesting_type_string .= $type; 26368 } 26369 else { 26370 chop $nesting_type_string; 26371 } 26372 } 26373 26374 push( @block_type, $routput_block_type->[$i] ); 26375 push( @ci_string, $ci_string_i ); 26376 push( @container_environment, $container_environment ); 26377 push( @container_type, $routput_container_type->[$i] ); 26378 push( @levels, $level_i ); 26379 push( @nesting_tokens, $nesting_token_string_i ); 26380 push( @nesting_types, $nesting_type_string_i ); 26381 push( @slevels, $slevel_i ); 26382 push( @token_type, $fix_type ); 26383 push( @type_sequence, $routput_type_sequence->[$i] ); 26384 push( @nesting_blocks, $nesting_block_string ); 26385 push( @nesting_lists, $nesting_list_string ); 26386 26387 # now form the previous token 26388 if ( $im >= 0 ) { 26389 $num = 26390 $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters 26391 26392 if ( $num > 0 ) { 26393 push( @tokens, 26394 substr( $input_line, $$rtoken_map[$im], $num ) ); 26395 } 26396 } 26397 $im = $i; 26398 } 26399 26400 $num = length($input_line) - $$rtoken_map[$im]; # make the last token 26401 if ( $num > 0 ) { 26402 push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) ); 26403 } 26404 26405 $tokenizer_self->{_in_attribute_list} = $in_attribute_list; 26406 $tokenizer_self->{_in_quote} = $in_quote; 26407 $tokenizer_self->{_quote_target} = 26408 $in_quote ? matching_end_token($quote_character) : ""; 26409 $tokenizer_self->{_rhere_target_list} = $rhere_target_list; 26410 26411 $line_of_tokens->{_rtoken_type} = \@token_type; 26412 $line_of_tokens->{_rtokens} = \@tokens; 26413 $line_of_tokens->{_rblock_type} = \@block_type; 26414 $line_of_tokens->{_rcontainer_type} = \@container_type; 26415 $line_of_tokens->{_rcontainer_environment} = \@container_environment; 26416 $line_of_tokens->{_rtype_sequence} = \@type_sequence; 26417 $line_of_tokens->{_rlevels} = \@levels; 26418 $line_of_tokens->{_rslevels} = \@slevels; 26419 $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens; 26420 $line_of_tokens->{_rci_levels} = \@ci_string; 26421 $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks; 26422 26423 return; 26424 } 26425} # end tokenize_this_line 26426 26427#########i############################################################# 26428# Tokenizer routines which assist in identifying token types 26429####################################################################### 26430 26431sub operator_expected { 26432 26433 # Many perl symbols have two or more meanings. For example, '<<' 26434 # can be a shift operator or a here-doc operator. The 26435 # interpretation of these symbols depends on the current state of 26436 # the tokenizer, which may either be expecting a term or an 26437 # operator. For this example, a << would be a shift if an operator 26438 # is expected, and a here-doc if a term is expected. This routine 26439 # is called to make this decision for any current token. It returns 26440 # one of three possible values: 26441 # 26442 # OPERATOR - operator expected (or at least, not a term) 26443 # UNKNOWN - can't tell 26444 # TERM - a term is expected (or at least, not an operator) 26445 # 26446 # The decision is based on what has been seen so far. This 26447 # information is stored in the "$last_nonblank_type" and 26448 # "$last_nonblank_token" variables. For example, if the 26449 # $last_nonblank_type is '=~', then we are expecting a TERM, whereas 26450 # if $last_nonblank_type is 'n' (numeric), we are expecting an 26451 # OPERATOR. 26452 # 26453 # If a UNKNOWN is returned, the calling routine must guess. A major 26454 # goal of this tokenizer is to minimize the possibility of returning 26455 # UNKNOWN, because a wrong guess can spoil the formatting of a 26456 # script. 26457 # 26458 # adding NEW_TOKENS: it is critically important that this routine be 26459 # updated to allow it to determine if an operator or term is to be 26460 # expected after the new token. Doing this simply involves adding 26461 # the new token character to one of the regexes in this routine or 26462 # to one of the hash lists 26463 # that it uses, which are initialized in the BEGIN section. 26464 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, 26465 # $statement_type 26466 26467 my ( $prev_type, $tok, $next_type ) = @_; 26468 26469 my $op_expected = UNKNOWN; 26470 26471##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n"; 26472 26473# Note: function prototype is available for token type 'U' for future 26474# program development. It contains the leading and trailing parens, 26475# and no blanks. It might be used to eliminate token type 'C', for 26476# example (prototype = '()'). Thus: 26477# if ($last_nonblank_type eq 'U') { 26478# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n"; 26479# } 26480 26481 # A possible filehandle (or object) requires some care... 26482 if ( $last_nonblank_type eq 'Z' ) { 26483 26484 # angle.t 26485 if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) { 26486 $op_expected = UNKNOWN; 26487 } 26488 26489 # For possible file handle like "$a", Perl uses weird parsing rules. 26490 # For example: 26491 # print $a/2,"/hi"; - division 26492 # print $a / 2,"/hi"; - division 26493 # print $a/ 2,"/hi"; - division 26494 # print $a /2,"/hi"; - pattern (and error)! 26495 elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) { 26496 $op_expected = TERM; 26497 } 26498 26499 # Note when an operation is being done where a 26500 # filehandle might be expected, since a change in whitespace 26501 # could change the interpretation of the statement. 26502 else { 26503 if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { 26504 complain("operator in print statement not recommended\n"); 26505 $op_expected = OPERATOR; 26506 } 26507 } 26508 } 26509 26510 # Check for smartmatch operator before preceding brace or square bracket. 26511 # For example, at the ? after the ] in the following expressions we are 26512 # expecting an operator: 26513 # 26514 # qr/3/ ~~ ['1234'] ? 1 : 0; 26515 # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a; 26516 elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) { 26517 $op_expected = OPERATOR; 26518 } 26519 26520 # handle something after 'do' and 'eval' 26521 elsif ( $is_block_operator{$last_nonblank_token} ) { 26522 26523 # something like $a = eval "expression"; 26524 # ^ 26525 if ( $last_nonblank_type eq 'k' ) { 26526 $op_expected = TERM; # expression or list mode following keyword 26527 } 26528 26529 # something like $a = do { BLOCK } / 2; 26530 # or this ? after a smartmatch anonynomous hash or array reference: 26531 # qr/3/ ~~ ['1234'] ? 1 : 0; 26532 # ^ 26533 else { 26534 $op_expected = OPERATOR; # block mode following } 26535 } 26536 } 26537 26538 # handle bare word.. 26539 elsif ( $last_nonblank_type eq 'w' ) { 26540 26541 # unfortunately, we can't tell what type of token to expect next 26542 # after most bare words 26543 $op_expected = UNKNOWN; 26544 } 26545 26546 # operator, but not term possible after these types 26547 # Note: moved ')' from type to token because parens in list context 26548 # get marked as '{' '}' now. This is a minor glitch in the following: 26549 # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : (); 26550 # 26551 elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ ) 26552 || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) ) 26553 { 26554 $op_expected = OPERATOR; 26555 26556 # in a 'use' statement, numbers and v-strings are not true 26557 # numbers, so to avoid incorrect error messages, we will 26558 # mark them as unknown for now (use.t) 26559 # TODO: it would be much nicer to create a new token V for VERSION 26560 # number in a use statement. Then this could be a check on type V 26561 # and related patches which change $statement_type for '=>' 26562 # and ',' could be removed. Further, it would clean things up to 26563 # scan the 'use' statement with a separate subroutine. 26564 if ( ( $statement_type eq 'use' ) 26565 && ( $last_nonblank_type =~ /^[nv]$/ ) ) 26566 { 26567 $op_expected = UNKNOWN; 26568 } 26569 26570 # expecting VERSION or {} after package NAMESPACE 26571 elsif ($statement_type =~ /^package\b/ 26572 && $last_nonblank_token =~ /^package\b/ ) 26573 { 26574 $op_expected = TERM; 26575 } 26576 } 26577 26578 # no operator after many keywords, such as "die", "warn", etc 26579 elsif ( $expecting_term_token{$last_nonblank_token} ) { 26580 26581 # patch for dor.t (defined or). 26582 # perl functions which may be unary operators 26583 # TODO: This list is incomplete, and these should be put 26584 # into a hash. 26585 if ( $tok eq '/' 26586 && $next_type eq '/' 26587 && $last_nonblank_type eq 'k' 26588 && $last_nonblank_token =~ /^eof|undef|shift|pop$/ ) 26589 { 26590 $op_expected = OPERATOR; 26591 } 26592 else { 26593 $op_expected = TERM; 26594 } 26595 } 26596 26597 # no operator after things like + - ** (i.e., other operators) 26598 elsif ( $expecting_term_types{$last_nonblank_type} ) { 26599 $op_expected = TERM; 26600 } 26601 26602 # a few operators, like "time", have an empty prototype () and so 26603 # take no parameters but produce a value to operate on 26604 elsif ( $expecting_operator_token{$last_nonblank_token} ) { 26605 $op_expected = OPERATOR; 26606 } 26607 26608 # post-increment and decrement produce values to be operated on 26609 elsif ( $expecting_operator_types{$last_nonblank_type} ) { 26610 $op_expected = OPERATOR; 26611 } 26612 26613 # no value to operate on after sub block 26614 elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; } 26615 26616 # a right brace here indicates the end of a simple block. 26617 # all non-structural right braces have type 'R' 26618 # all braces associated with block operator keywords have been given those 26619 # keywords as "last_nonblank_token" and caught above. 26620 # (This statement is order dependent, and must come after checking 26621 # $last_nonblank_token). 26622 elsif ( $last_nonblank_type eq '}' ) { 26623 26624 # patch for dor.t (defined or). 26625 if ( $tok eq '/' 26626 && $next_type eq '/' 26627 && $last_nonblank_token eq ']' ) 26628 { 26629 $op_expected = OPERATOR; 26630 } 26631 else { 26632 $op_expected = TERM; 26633 } 26634 } 26635 26636 # something else..what did I forget? 26637 else { 26638 26639 # collecting diagnostics on unknown operator types..see what was missed 26640 $op_expected = UNKNOWN; 26641 write_diagnostics( 26642"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n" 26643 ); 26644 } 26645 26646 TOKENIZER_DEBUG_FLAG_EXPECT && do { 26647 print STDOUT 26648"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; 26649 }; 26650 return $op_expected; 26651} 26652 26653sub new_statement_ok { 26654 26655 # return true if the current token can start a new statement 26656 # USES GLOBAL VARIABLES: $last_nonblank_type 26657 26658 return label_ok() # a label would be ok here 26659 26660 || $last_nonblank_type eq 'J'; # or we follow a label 26661 26662} 26663 26664sub label_ok { 26665 26666 # Decide if a bare word followed by a colon here is a label 26667 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, 26668 # $brace_depth, @brace_type 26669 26670 # if it follows an opening or closing code block curly brace.. 26671 if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' ) 26672 && $last_nonblank_type eq $last_nonblank_token ) 26673 { 26674 26675 # it is a label if and only if the curly encloses a code block 26676 return $brace_type[$brace_depth]; 26677 } 26678 26679 # otherwise, it is a label if and only if it follows a ';' (real or fake) 26680 # or another label 26681 else { 26682 return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' ); 26683 } 26684} 26685 26686sub code_block_type { 26687 26688 # Decide if this is a block of code, and its type. 26689 # Must be called only when $type = $token = '{' 26690 # The problem is to distinguish between the start of a block of code 26691 # and the start of an anonymous hash reference 26692 # Returns "" if not code block, otherwise returns 'last_nonblank_token' 26693 # to indicate the type of code block. (For example, 'last_nonblank_token' 26694 # might be 'if' for an if block, 'else' for an else block, etc). 26695 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, 26696 # $last_nonblank_block_type, $brace_depth, @brace_type 26697 26698 # handle case of multiple '{'s 26699 26700# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; 26701 26702 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; 26703 if ( $last_nonblank_token eq '{' 26704 && $last_nonblank_type eq $last_nonblank_token ) 26705 { 26706 26707 # opening brace where a statement may appear is probably 26708 # a code block but might be and anonymous hash reference 26709 if ( $brace_type[$brace_depth] ) { 26710 return decide_if_code_block( $i, $rtokens, $rtoken_type, 26711 $max_token_index ); 26712 } 26713 26714 # cannot start a code block within an anonymous hash 26715 else { 26716 return ""; 26717 } 26718 } 26719 26720 elsif ( $last_nonblank_token eq ';' ) { 26721 26722 # an opening brace where a statement may appear is probably 26723 # a code block but might be and anonymous hash reference 26724 return decide_if_code_block( $i, $rtokens, $rtoken_type, 26725 $max_token_index ); 26726 } 26727 26728 # handle case of '}{' 26729 elsif ($last_nonblank_token eq '}' 26730 && $last_nonblank_type eq $last_nonblank_token ) 26731 { 26732 26733 # a } { situation ... 26734 # could be hash reference after code block..(blktype1.t) 26735 if ($last_nonblank_block_type) { 26736 return decide_if_code_block( $i, $rtokens, $rtoken_type, 26737 $max_token_index ); 26738 } 26739 26740 # must be a block if it follows a closing hash reference 26741 else { 26742 return $last_nonblank_token; 26743 } 26744 } 26745 26746 # NOTE: braces after type characters start code blocks, but for 26747 # simplicity these are not identified as such. See also 26748 # sub is_non_structural_brace. 26749 # elsif ( $last_nonblank_type eq 't' ) { 26750 # return $last_nonblank_token; 26751 # } 26752 26753 # brace after label: 26754 elsif ( $last_nonblank_type eq 'J' ) { 26755 return $last_nonblank_token; 26756 } 26757 26758# otherwise, look at previous token. This must be a code block if 26759# it follows any of these: 26760# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ 26761 elsif ( $is_code_block_token{$last_nonblank_token} ) { 26762 26763 # Bug Patch: Note that the opening brace after the 'if' in the following 26764 # snippet is an anonymous hash ref and not a code block! 26765 # print 'hi' if { x => 1, }->{x}; 26766 # We can identify this situation because the last nonblank type 26767 # will be a keyword (instead of a closing peren) 26768 if ( $last_nonblank_token =~ /^(if|unless)$/ 26769 && $last_nonblank_type eq 'k' ) 26770 { 26771 return ""; 26772 } 26773 else { 26774 return $last_nonblank_token; 26775 } 26776 } 26777 26778 # or a sub or package BLOCK 26779 elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) 26780 && $last_nonblank_token =~ /^(sub|package)\b/ ) 26781 { 26782 return $last_nonblank_token; 26783 } 26784 26785 elsif ( $statement_type =~ /^(sub|package)\b/ ) { 26786 return $statement_type; 26787 } 26788 26789 # user-defined subs with block parameters (like grep/map/eval) 26790 elsif ( $last_nonblank_type eq 'G' ) { 26791 return $last_nonblank_token; 26792 } 26793 26794 # check bareword 26795 elsif ( $last_nonblank_type eq 'w' ) { 26796 return decide_if_code_block( $i, $rtokens, $rtoken_type, 26797 $max_token_index ); 26798 } 26799 26800 # anything else must be anonymous hash reference 26801 else { 26802 return ""; 26803 } 26804} 26805 26806sub decide_if_code_block { 26807 26808 # USES GLOBAL VARIABLES: $last_nonblank_token 26809 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; 26810 my ( $next_nonblank_token, $i_next ) = 26811 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 26812 26813 # we are at a '{' where a statement may appear. 26814 # We must decide if this brace starts an anonymous hash or a code 26815 # block. 26816 # return "" if anonymous hash, and $last_nonblank_token otherwise 26817 26818 # initialize to be code BLOCK 26819 my $code_block_type = $last_nonblank_token; 26820 26821 # Check for the common case of an empty anonymous hash reference: 26822 # Maybe something like sub { { } } 26823 if ( $next_nonblank_token eq '}' ) { 26824 $code_block_type = ""; 26825 } 26826 26827 else { 26828 26829 # To guess if this '{' is an anonymous hash reference, look ahead 26830 # and test as follows: 26831 # 26832 # it is a hash reference if next come: 26833 # - a string or digit followed by a comma or => 26834 # - bareword followed by => 26835 # otherwise it is a code block 26836 # 26837 # Examples of anonymous hash ref: 26838 # {'aa',}; 26839 # {1,2} 26840 # 26841 # Examples of code blocks: 26842 # {1; print "hello\n", 1;} 26843 # {$a,1}; 26844 26845 # We are only going to look ahead one more (nonblank/comment) line. 26846 # Strange formatting could cause a bad guess, but that's unlikely. 26847 my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ]; 26848 my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ]; 26849 my ( $rpre_tokens, $rpre_types ) = 26850 peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but 26851 # generous, and prevents 26852 # wasting lots of 26853 # time in mangled files 26854 if ( defined($rpre_types) && @$rpre_types ) { 26855 push @pre_types, @$rpre_types; 26856 push @pre_tokens, @$rpre_tokens; 26857 } 26858 26859 # put a sentinal token to simplify stopping the search 26860 push @pre_types, '}'; 26861 26862 my $jbeg = 0; 26863 $jbeg = 1 if $pre_types[0] eq 'b'; 26864 26865 # first look for one of these 26866 # - bareword 26867 # - bareword with leading - 26868 # - digit 26869 # - quoted string 26870 my $j = $jbeg; 26871 if ( $pre_types[$j] =~ /^[\'\"]/ ) { 26872 26873 # find the closing quote; don't worry about escapes 26874 my $quote_mark = $pre_types[$j]; 26875 for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) { 26876 if ( $pre_types[$k] eq $quote_mark ) { 26877 $j = $k + 1; 26878 my $next = $pre_types[$j]; 26879 last; 26880 } 26881 } 26882 } 26883 elsif ( $pre_types[$j] eq 'd' ) { 26884 $j++; 26885 } 26886 elsif ( $pre_types[$j] eq 'w' ) { 26887 unless ( $is_keyword{ $pre_tokens[$j] } ) { 26888 $j++; 26889 } 26890 } 26891 elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { 26892 $j++; 26893 } 26894 if ( $j > $jbeg ) { 26895 26896 $j++ if $pre_types[$j] eq 'b'; 26897 26898 # it's a hash ref if a comma or => follow next 26899 if ( $pre_types[$j] eq ',' 26900 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) ) 26901 { 26902 $code_block_type = ""; 26903 } 26904 } 26905 } 26906 26907 return $code_block_type; 26908} 26909 26910sub unexpected { 26911 26912 # report unexpected token type and show where it is 26913 # USES GLOBAL VARIABLES: $tokenizer_self 26914 my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map, 26915 $rpretoken_type, $input_line ) 26916 = @_; 26917 26918 if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) { 26919 my $msg = "found $found where $expecting expected"; 26920 my $pos = $$rpretoken_map[$i_tok]; 26921 interrupt_logfile(); 26922 my $input_line_number = $tokenizer_self->{_last_line_number}; 26923 my ( $offset, $numbered_line, $underline ) = 26924 make_numbered_line( $input_line_number, $input_line, $pos ); 26925 $underline = write_on_underline( $underline, $pos - $offset, '^' ); 26926 26927 my $trailer = ""; 26928 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { 26929 my $pos_prev = $$rpretoken_map[$last_nonblank_i]; 26930 my $num; 26931 if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) { 26932 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev; 26933 } 26934 else { 26935 $num = $pos - $pos_prev; 26936 } 26937 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } 26938 26939 $underline = 26940 write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); 26941 $trailer = " (previous token underlined)"; 26942 } 26943 warning( $numbered_line . "\n" ); 26944 warning( $underline . "\n" ); 26945 warning( $msg . $trailer . "\n" ); 26946 resume_logfile(); 26947 } 26948} 26949 26950sub is_non_structural_brace { 26951 26952 # Decide if a brace or bracket is structural or non-structural 26953 # by looking at the previous token and type 26954 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token 26955 26956 # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. 26957 # Tentatively deactivated because it caused the wrong operator expectation 26958 # for this code: 26959 # $user = @vars[1] / 100; 26960 # Must update sub operator_expected before re-implementing. 26961 # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { 26962 # return 0; 26963 # } 26964 26965 # NOTE: braces after type characters start code blocks, but for 26966 # simplicity these are not identified as such. See also 26967 # sub code_block_type 26968 # if ($last_nonblank_type eq 't') {return 0} 26969 26970 # otherwise, it is non-structural if it is decorated 26971 # by type information. 26972 # For example, the '{' here is non-structural: ${xxx} 26973 ( 26974 $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ 26975 26976 # or if we follow a hash or array closing curly brace or bracket 26977 # For example, the second '{' in this is non-structural: $a{'x'}{'y'} 26978 # because the first '}' would have been given type 'R' 26979 || $last_nonblank_type =~ /^([R\]])$/ 26980 ); 26981} 26982 26983#########i############################################################# 26984# Tokenizer routines for tracking container nesting depths 26985####################################################################### 26986 26987# The following routines keep track of nesting depths of the nesting 26988# types, ( [ { and ?. This is necessary for determining the indentation 26989# level, and also for debugging programs. Not only do they keep track of 26990# nesting depths of the individual brace types, but they check that each 26991# of the other brace types is balanced within matching pairs. For 26992# example, if the program sees this sequence: 26993# 26994# { ( ( ) } 26995# 26996# then it can determine that there is an extra left paren somewhere 26997# between the { and the }. And so on with every other possible 26998# combination of outer and inner brace types. For another 26999# example: 27000# 27001# ( [ ..... ] ] ) 27002# 27003# which has an extra ] within the parens. 27004# 27005# The brace types have indexes 0 .. 3 which are indexes into 27006# the matrices. 27007# 27008# The pair ? : are treated as just another nesting type, with ? acting 27009# as the opening brace and : acting as the closing brace. 27010# 27011# The matrix 27012# 27013# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; 27014# 27015# saves the nesting depth of brace type $b (where $b is either of the other 27016# nesting types) when brace type $a enters a new depth. When this depth 27017# decreases, a check is made that the current depth of brace types $b is 27018# unchanged, or otherwise there must have been an error. This can 27019# be very useful for localizing errors, particularly when perl runs to 27020# the end of a large file (such as this one) and announces that there 27021# is a problem somewhere. 27022# 27023# A numerical sequence number is maintained for every nesting type, 27024# so that each matching pair can be uniquely identified in a simple 27025# way. 27026 27027sub increase_nesting_depth { 27028 my ( $aa, $pos ) = @_; 27029 27030 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, 27031 # @current_sequence_number, @depth_array, @starting_line_of_current_depth, 27032 # $statement_type 27033 my $bb; 27034 $current_depth[$aa]++; 27035 $total_depth++; 27036 $total_depth[$aa][ $current_depth[$aa] ] = $total_depth; 27037 my $input_line_number = $tokenizer_self->{_last_line_number}; 27038 my $input_line = $tokenizer_self->{_line_text}; 27039 27040 # Sequence numbers increment by number of items. This keeps 27041 # a unique set of numbers but still allows the relative location 27042 # of any type to be determined. 27043 $nesting_sequence_number[$aa] += scalar(@closing_brace_names); 27044 my $seqno = $nesting_sequence_number[$aa]; 27045 $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno; 27046 27047 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] = 27048 [ $input_line_number, $input_line, $pos ]; 27049 27050 for $bb ( 0 .. $#closing_brace_names ) { 27051 next if ( $bb == $aa ); 27052 $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb]; 27053 } 27054 27055 # set a flag for indenting a nested ternary statement 27056 my $indent = 0; 27057 if ( $aa == QUESTION_COLON ) { 27058 $nested_ternary_flag[ $current_depth[$aa] ] = 0; 27059 if ( $current_depth[$aa] > 1 ) { 27060 if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) { 27061 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ]; 27062 if ( $pdepth == $total_depth - 1 ) { 27063 $indent = 1; 27064 $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1; 27065 } 27066 } 27067 } 27068 } 27069 $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type; 27070 $statement_type = ""; 27071 return ( $seqno, $indent ); 27072} 27073 27074sub decrease_nesting_depth { 27075 27076 my ( $aa, $pos ) = @_; 27077 27078 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, 27079 # @current_sequence_number, @depth_array, @starting_line_of_current_depth 27080 # $statement_type 27081 my $bb; 27082 my $seqno = 0; 27083 my $input_line_number = $tokenizer_self->{_last_line_number}; 27084 my $input_line = $tokenizer_self->{_line_text}; 27085 27086 my $outdent = 0; 27087 $total_depth--; 27088 if ( $current_depth[$aa] > 0 ) { 27089 27090 # set a flag for un-indenting after seeing a nested ternary statement 27091 $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ]; 27092 if ( $aa == QUESTION_COLON ) { 27093 $outdent = $nested_ternary_flag[ $current_depth[$aa] ]; 27094 } 27095 $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ]; 27096 27097 # check that any brace types $bb contained within are balanced 27098 for $bb ( 0 .. $#closing_brace_names ) { 27099 next if ( $bb == $aa ); 27100 27101 unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] == 27102 $current_depth[$bb] ) 27103 { 27104 my $diff = 27105 $current_depth[$bb] - 27106 $depth_array[$aa][$bb][ $current_depth[$aa] ]; 27107 27108 # don't whine too many times 27109 my $saw_brace_error = get_saw_brace_error(); 27110 if ( 27111 $saw_brace_error <= MAX_NAG_MESSAGES 27112 27113 # if too many closing types have occurred, we probably 27114 # already caught this error 27115 && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) 27116 ) 27117 { 27118 interrupt_logfile(); 27119 my $rsl = 27120 $starting_line_of_current_depth[$aa] 27121 [ $current_depth[$aa] ]; 27122 my $sl = $$rsl[0]; 27123 my $rel = [ $input_line_number, $input_line, $pos ]; 27124 my $el = $$rel[0]; 27125 my ($ess); 27126 27127 if ( $diff == 1 || $diff == -1 ) { 27128 $ess = ''; 27129 } 27130 else { 27131 $ess = 's'; 27132 } 27133 my $bname = 27134 ( $diff > 0 ) 27135 ? $opening_brace_names[$bb] 27136 : $closing_brace_names[$bb]; 27137 write_error_indicator_pair( @$rsl, '^' ); 27138 my $msg = <<"EOM"; 27139Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el 27140EOM 27141 27142 if ( $diff > 0 ) { 27143 my $rml = 27144 $starting_line_of_current_depth[$bb] 27145 [ $current_depth[$bb] ]; 27146 my $ml = $$rml[0]; 27147 $msg .= 27148" The most recent un-matched $bname is on line $ml\n"; 27149 write_error_indicator_pair( @$rml, '^' ); 27150 } 27151 write_error_indicator_pair( @$rel, '^' ); 27152 warning($msg); 27153 resume_logfile(); 27154 } 27155 increment_brace_error(); 27156 } 27157 } 27158 $current_depth[$aa]--; 27159 } 27160 else { 27161 27162 my $saw_brace_error = get_saw_brace_error(); 27163 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { 27164 my $msg = <<"EOM"; 27165There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number 27166EOM 27167 indicate_error( $msg, $input_line_number, $input_line, $pos, '^' ); 27168 } 27169 increment_brace_error(); 27170 } 27171 return ( $seqno, $outdent ); 27172} 27173 27174sub check_final_nesting_depths { 27175 my ($aa); 27176 27177 # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth 27178 27179 for $aa ( 0 .. $#closing_brace_names ) { 27180 27181 if ( $current_depth[$aa] ) { 27182 my $rsl = 27183 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ]; 27184 my $sl = $$rsl[0]; 27185 my $msg = <<"EOM"; 27186Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa] 27187The most recent un-matched $opening_brace_names[$aa] is on line $sl 27188EOM 27189 indicate_error( $msg, @$rsl, '^' ); 27190 increment_brace_error(); 27191 } 27192 } 27193} 27194 27195#########i############################################################# 27196# Tokenizer routines for looking ahead in input stream 27197####################################################################### 27198 27199sub peek_ahead_for_n_nonblank_pre_tokens { 27200 27201 # returns next n pretokens if they exist 27202 # returns undef's if hits eof without seeing any pretokens 27203 # USES GLOBAL VARIABLES: $tokenizer_self 27204 my $max_pretokens = shift; 27205 my $line; 27206 my $i = 0; 27207 my ( $rpre_tokens, $rmap, $rpre_types ); 27208 27209 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) 27210 { 27211 $line =~ s/^\s*//; # trim leading blanks 27212 next if ( length($line) <= 0 ); # skip blank 27213 next if ( $line =~ /^#/ ); # skip comment 27214 ( $rpre_tokens, $rmap, $rpre_types ) = 27215 pre_tokenize( $line, $max_pretokens ); 27216 last; 27217 } 27218 return ( $rpre_tokens, $rpre_types ); 27219} 27220 27221# look ahead for next non-blank, non-comment line of code 27222sub peek_ahead_for_nonblank_token { 27223 27224 # USES GLOBAL VARIABLES: $tokenizer_self 27225 my ( $rtokens, $max_token_index ) = @_; 27226 my $line; 27227 my $i = 0; 27228 27229 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) 27230 { 27231 $line =~ s/^\s*//; # trim leading blanks 27232 next if ( length($line) <= 0 ); # skip blank 27233 next if ( $line =~ /^#/ ); # skip comment 27234 my ( $rtok, $rmap, $rtype ) = 27235 pre_tokenize( $line, 2 ); # only need 2 pre-tokens 27236 my $j = $max_token_index + 1; 27237 my $tok; 27238 27239 foreach $tok (@$rtok) { 27240 last if ( $tok =~ "\n" ); 27241 $$rtokens[ ++$j ] = $tok; 27242 } 27243 last; 27244 } 27245 return $rtokens; 27246} 27247 27248#########i############################################################# 27249# Tokenizer guessing routines for ambiguous situations 27250####################################################################### 27251 27252sub guess_if_pattern_or_conditional { 27253 27254 # this routine is called when we have encountered a ? following an 27255 # unknown bareword, and we must decide if it starts a pattern or not 27256 # input parameters: 27257 # $i - token index of the ? starting possible pattern 27258 # output parameters: 27259 # $is_pattern = 0 if probably not pattern, =1 if probably a pattern 27260 # msg = a warning or diagnostic message 27261 # USES GLOBAL VARIABLES: $last_nonblank_token 27262 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; 27263 my $is_pattern = 0; 27264 my $msg = "guessing that ? after $last_nonblank_token starts a "; 27265 27266 if ( $i >= $max_token_index ) { 27267 $msg .= "conditional (no end to pattern found on the line)\n"; 27268 } 27269 else { 27270 my $ibeg = $i; 27271 $i = $ibeg + 1; 27272 my $next_token = $$rtokens[$i]; # first token after ? 27273 27274 # look for a possible ending ? on this line.. 27275 my $in_quote = 1; 27276 my $quote_depth = 0; 27277 my $quote_character = ''; 27278 my $quote_pos = 0; 27279 my $quoted_string; 27280 ( 27281 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 27282 $quoted_string 27283 ) 27284 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, 27285 $quote_pos, $quote_depth, $max_token_index ); 27286 27287 if ($in_quote) { 27288 27289 # we didn't find an ending ? on this line, 27290 # so we bias towards conditional 27291 $is_pattern = 0; 27292 $msg .= "conditional (no ending ? on this line)\n"; 27293 27294 # we found an ending ?, so we bias towards a pattern 27295 } 27296 else { 27297 27298 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { 27299 $is_pattern = 1; 27300 $msg .= "pattern (found ending ? and pattern expected)\n"; 27301 } 27302 else { 27303 $msg .= "pattern (uncertain, but found ending ?)\n"; 27304 } 27305 } 27306 } 27307 return ( $is_pattern, $msg ); 27308} 27309 27310sub guess_if_pattern_or_division { 27311 27312 # this routine is called when we have encountered a / following an 27313 # unknown bareword, and we must decide if it starts a pattern or is a 27314 # division 27315 # input parameters: 27316 # $i - token index of the / starting possible pattern 27317 # output parameters: 27318 # $is_pattern = 0 if probably division, =1 if probably a pattern 27319 # msg = a warning or diagnostic message 27320 # USES GLOBAL VARIABLES: $last_nonblank_token 27321 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; 27322 my $is_pattern = 0; 27323 my $msg = "guessing that / after $last_nonblank_token starts a "; 27324 27325 if ( $i >= $max_token_index ) { 27326 "division (no end to pattern found on the line)\n"; 27327 } 27328 else { 27329 my $ibeg = $i; 27330 my $divide_expected = 27331 numerator_expected( $i, $rtokens, $max_token_index ); 27332 $i = $ibeg + 1; 27333 my $next_token = $$rtokens[$i]; # first token after slash 27334 27335 # look for a possible ending / on this line.. 27336 my $in_quote = 1; 27337 my $quote_depth = 0; 27338 my $quote_character = ''; 27339 my $quote_pos = 0; 27340 my $quoted_string; 27341 ( 27342 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 27343 $quoted_string 27344 ) 27345 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, 27346 $quote_pos, $quote_depth, $max_token_index ); 27347 27348 if ($in_quote) { 27349 27350 # we didn't find an ending / on this line, 27351 # so we bias towards division 27352 if ( $divide_expected >= 0 ) { 27353 $is_pattern = 0; 27354 $msg .= "division (no ending / on this line)\n"; 27355 } 27356 else { 27357 $msg = "multi-line pattern (division not possible)\n"; 27358 $is_pattern = 1; 27359 } 27360 27361 } 27362 27363 # we found an ending /, so we bias towards a pattern 27364 else { 27365 27366 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { 27367 27368 if ( $divide_expected >= 0 ) { 27369 27370 if ( $i - $ibeg > 60 ) { 27371 $msg .= "division (matching / too distant)\n"; 27372 $is_pattern = 0; 27373 } 27374 else { 27375 $msg .= "pattern (but division possible too)\n"; 27376 $is_pattern = 1; 27377 } 27378 } 27379 else { 27380 $is_pattern = 1; 27381 $msg .= "pattern (division not possible)\n"; 27382 } 27383 } 27384 else { 27385 27386 if ( $divide_expected >= 0 ) { 27387 $is_pattern = 0; 27388 $msg .= "division (pattern not possible)\n"; 27389 } 27390 else { 27391 $is_pattern = 1; 27392 $msg .= 27393 "pattern (uncertain, but division would not work here)\n"; 27394 } 27395 } 27396 } 27397 } 27398 return ( $is_pattern, $msg ); 27399} 27400 27401# try to resolve here-doc vs. shift by looking ahead for 27402# non-code or the end token (currently only looks for end token) 27403# returns 1 if it is probably a here doc, 0 if not 27404sub guess_if_here_doc { 27405 27406 # This is how many lines we will search for a target as part of the 27407 # guessing strategy. It is a constant because there is probably 27408 # little reason to change it. 27409 # USES GLOBAL VARIABLES: $tokenizer_self, $current_package 27410 # %is_constant, 27411 use constant HERE_DOC_WINDOW => 40; 27412 27413 my $next_token = shift; 27414 my $here_doc_expected = 0; 27415 my $line; 27416 my $k = 0; 27417 my $msg = "checking <<"; 27418 27419 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) ) 27420 { 27421 chomp $line; 27422 27423 if ( $line =~ /^$next_token$/ ) { 27424 $msg .= " -- found target $next_token ahead $k lines\n"; 27425 $here_doc_expected = 1; # got it 27426 last; 27427 } 27428 last if ( $k >= HERE_DOC_WINDOW ); 27429 } 27430 27431 unless ($here_doc_expected) { 27432 27433 if ( !defined($line) ) { 27434 $here_doc_expected = -1; # hit eof without seeing target 27435 $msg .= " -- must be shift; target $next_token not in file\n"; 27436 27437 } 27438 else { # still unsure..taking a wild guess 27439 27440 if ( !$is_constant{$current_package}{$next_token} ) { 27441 $here_doc_expected = 1; 27442 $msg .= 27443 " -- guessing it's a here-doc ($next_token not a constant)\n"; 27444 } 27445 else { 27446 $msg .= 27447 " -- guessing it's a shift ($next_token is a constant)\n"; 27448 } 27449 } 27450 } 27451 write_logfile_entry($msg); 27452 return $here_doc_expected; 27453} 27454 27455#########i############################################################# 27456# Tokenizer Routines for scanning identifiers and related items 27457####################################################################### 27458 27459sub scan_bare_identifier_do { 27460 27461 # this routine is called to scan a token starting with an alphanumeric 27462 # variable or package separator, :: or '. 27463 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, 27464 # $last_nonblank_type,@paren_type, $paren_depth 27465 27466 my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map, 27467 $max_token_index ) 27468 = @_; 27469 my $i_begin = $i; 27470 my $package = undef; 27471 27472 my $i_beg = $i; 27473 27474 # we have to back up one pretoken at a :: since each : is one pretoken 27475 if ( $tok eq '::' ) { $i_beg-- } 27476 if ( $tok eq '->' ) { $i_beg-- } 27477 my $pos_beg = $$rtoken_map[$i_beg]; 27478 pos($input_line) = $pos_beg; 27479 27480 # Examples: 27481 # A::B::C 27482 # A:: 27483 # ::A 27484 # A'B 27485 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) { 27486 27487 my $pos = pos($input_line); 27488 my $numc = $pos - $pos_beg; 27489 $tok = substr( $input_line, $pos_beg, $numc ); 27490 27491 # type 'w' includes anything without leading type info 27492 # ($,%,@,*) including something like abc::def::ghi 27493 $type = 'w'; 27494 27495 my $sub_name = ""; 27496 if ( defined($2) ) { $sub_name = $2; } 27497 if ( defined($1) ) { 27498 $package = $1; 27499 27500 # patch: don't allow isolated package name which just ends 27501 # in the old style package separator (single quote). Example: 27502 # use CGI':all'; 27503 if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { 27504 $pos--; 27505 } 27506 27507 $package =~ s/\'/::/g; 27508 if ( $package =~ /^\:/ ) { $package = 'main' . $package } 27509 $package =~ s/::$//; 27510 } 27511 else { 27512 $package = $current_package; 27513 27514 if ( $is_keyword{$tok} ) { 27515 $type = 'k'; 27516 } 27517 } 27518 27519 # if it is a bareword.. 27520 if ( $type eq 'w' ) { 27521 27522 # check for v-string with leading 'v' type character 27523 # (This seems to have presidence over filehandle, type 'Y') 27524 if ( $tok =~ /^v\d[_\d]*$/ ) { 27525 27526 # we only have the first part - something like 'v101' - 27527 # look for more 27528 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { 27529 $pos = pos($input_line); 27530 $numc = $pos - $pos_beg; 27531 $tok = substr( $input_line, $pos_beg, $numc ); 27532 } 27533 $type = 'v'; 27534 27535 # warn if this version can't handle v-strings 27536 report_v_string($tok); 27537 } 27538 27539 elsif ( $is_constant{$package}{$sub_name} ) { 27540 $type = 'C'; 27541 } 27542 27543 # bareword after sort has implied empty prototype; for example: 27544 # @sorted = sort numerically ( 53, 29, 11, 32, 7 ); 27545 # This has priority over whatever the user has specified. 27546 elsif ($last_nonblank_token eq 'sort' 27547 && $last_nonblank_type eq 'k' ) 27548 { 27549 $type = 'Z'; 27550 } 27551 27552 # Note: strangely, perl does not seem to really let you create 27553 # functions which act like eval and do, in the sense that eval 27554 # and do may have operators following the final }, but any operators 27555 # that you create with prototype (&) apparently do not allow 27556 # trailing operators, only terms. This seems strange. 27557 # If this ever changes, here is the update 27558 # to make perltidy behave accordingly: 27559 27560 # elsif ( $is_block_function{$package}{$tok} ) { 27561 # $tok='eval'; # patch to do braces like eval - doesn't work 27562 # $type = 'k'; 27563 #} 27564 # FIXME: This could become a separate type to allow for different 27565 # future behavior: 27566 elsif ( $is_block_function{$package}{$sub_name} ) { 27567 $type = 'G'; 27568 } 27569 27570 elsif ( $is_block_list_function{$package}{$sub_name} ) { 27571 $type = 'G'; 27572 } 27573 elsif ( $is_user_function{$package}{$sub_name} ) { 27574 $type = 'U'; 27575 $prototype = $user_function_prototype{$package}{$sub_name}; 27576 } 27577 27578 # check for indirect object 27579 elsif ( 27580 27581 # added 2001-03-27: must not be followed immediately by '(' 27582 # see fhandle.t 27583 ( $input_line !~ m/\G\(/gc ) 27584 27585 # and 27586 && ( 27587 27588 # preceded by keyword like 'print', 'printf' and friends 27589 $is_indirect_object_taker{$last_nonblank_token} 27590 27591 # or preceded by something like 'print(' or 'printf(' 27592 || ( 27593 ( $last_nonblank_token eq '(' ) 27594 && $is_indirect_object_taker{ $paren_type[$paren_depth] 27595 } 27596 27597 ) 27598 ) 27599 ) 27600 { 27601 27602 # may not be indirect object unless followed by a space 27603 if ( $input_line =~ m/\G\s+/gc ) { 27604 $type = 'Y'; 27605 27606 # Abandon Hope ... 27607 # Perl's indirect object notation is a very bad 27608 # thing and can cause subtle bugs, especially for 27609 # beginning programmers. And I haven't even been 27610 # able to figure out a sane warning scheme which 27611 # doesn't get in the way of good scripts. 27612 27613 # Complain if a filehandle has any lower case 27614 # letters. This is suggested good practice. 27615 # Use 'sub_name' because something like 27616 # main::MYHANDLE is ok for filehandle 27617 if ( $sub_name =~ /[a-z]/ ) { 27618 27619 # could be bug caused by older perltidy if 27620 # followed by '(' 27621 if ( $input_line =~ m/\G\s*\(/gc ) { 27622 complain( 27623"Caution: unknown word '$tok' in indirect object slot\n" 27624 ); 27625 } 27626 } 27627 } 27628 27629 # bareword not followed by a space -- may not be filehandle 27630 # (may be function call defined in a 'use' statement) 27631 else { 27632 $type = 'Z'; 27633 } 27634 } 27635 } 27636 27637 # Now we must convert back from character position 27638 # to pre_token index. 27639 # I don't think an error flag can occur here ..but who knows 27640 my $error; 27641 ( $i, $error ) = 27642 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); 27643 if ($error) { 27644 warning("scan_bare_identifier: Possibly invalid tokenization\n"); 27645 } 27646 } 27647 27648 # no match but line not blank - could be syntax error 27649 # perl will take '::' alone without complaint 27650 else { 27651 $type = 'w'; 27652 27653 # change this warning to log message if it becomes annoying 27654 warning("didn't find identifier after leading ::\n"); 27655 } 27656 return ( $i, $tok, $type, $prototype ); 27657} 27658 27659sub scan_id_do { 27660 27661# This is the new scanner and will eventually replace scan_identifier. 27662# Only type 'sub' and 'package' are implemented. 27663# Token types $ * % @ & -> are not yet implemented. 27664# 27665# Scan identifier following a type token. 27666# The type of call depends on $id_scan_state: $id_scan_state = '' 27667# for starting call, in which case $tok must be the token defining 27668# the type. 27669# 27670# If the type token is the last nonblank token on the line, a value 27671# of $id_scan_state = $tok is returned, indicating that further 27672# calls must be made to get the identifier. If the type token is 27673# not the last nonblank token on the line, the identifier is 27674# scanned and handled and a value of '' is returned. 27675# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list, 27676# $statement_type, $tokenizer_self 27677 27678 my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state, 27679 $max_token_index ) 27680 = @_; 27681 my $type = ''; 27682 my ( $i_beg, $pos_beg ); 27683 27684 #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; 27685 #my ($a,$b,$c) = caller; 27686 #print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; 27687 27688 # on re-entry, start scanning at first token on the line 27689 if ($id_scan_state) { 27690 $i_beg = $i; 27691 $type = ''; 27692 } 27693 27694 # on initial entry, start scanning just after type token 27695 else { 27696 $i_beg = $i + 1; 27697 $id_scan_state = $tok; 27698 $type = 't'; 27699 } 27700 27701 # find $i_beg = index of next nonblank token, 27702 # and handle empty lines 27703 my $blank_line = 0; 27704 my $next_nonblank_token = $$rtokens[$i_beg]; 27705 if ( $i_beg > $max_token_index ) { 27706 $blank_line = 1; 27707 } 27708 else { 27709 27710 # only a '#' immediately after a '$' is not a comment 27711 if ( $next_nonblank_token eq '#' ) { 27712 unless ( $tok eq '$' ) { 27713 $blank_line = 1; 27714 } 27715 } 27716 27717 if ( $next_nonblank_token =~ /^\s/ ) { 27718 ( $next_nonblank_token, $i_beg ) = 27719 find_next_nonblank_token_on_this_line( $i_beg, $rtokens, 27720 $max_token_index ); 27721 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { 27722 $blank_line = 1; 27723 } 27724 } 27725 } 27726 27727 # handle non-blank line; identifier, if any, must follow 27728 unless ($blank_line) { 27729 27730 if ( $id_scan_state eq 'sub' ) { 27731 ( $i, $tok, $type, $id_scan_state ) = do_scan_sub( 27732 $input_line, $i, $i_beg, 27733 $tok, $type, $rtokens, 27734 $rtoken_map, $id_scan_state, $max_token_index 27735 ); 27736 } 27737 27738 elsif ( $id_scan_state eq 'package' ) { 27739 ( $i, $tok, $type ) = 27740 do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens, 27741 $rtoken_map, $max_token_index ); 27742 $id_scan_state = ''; 27743 } 27744 27745 else { 27746 warning("invalid token in scan_id: $tok\n"); 27747 $id_scan_state = ''; 27748 } 27749 } 27750 27751 if ( $id_scan_state && ( !defined($type) || !$type ) ) { 27752 27753 # shouldn't happen: 27754 warning( 27755"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n" 27756 ); 27757 report_definite_bug(); 27758 } 27759 27760 TOKENIZER_DEBUG_FLAG_NSCAN && do { 27761 print STDOUT 27762 "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; 27763 }; 27764 return ( $i, $tok, $type, $id_scan_state ); 27765} 27766 27767sub check_prototype { 27768 my ( $proto, $package, $subname ) = @_; 27769 return unless ( defined($package) && defined($subname) ); 27770 if ( defined($proto) ) { 27771 $proto =~ s/^\s*\(\s*//; 27772 $proto =~ s/\s*\)$//; 27773 if ($proto) { 27774 $is_user_function{$package}{$subname} = 1; 27775 $user_function_prototype{$package}{$subname} = "($proto)"; 27776 27777 # prototypes containing '&' must be treated specially.. 27778 if ( $proto =~ /\&/ ) { 27779 27780 # right curly braces of prototypes ending in 27781 # '&' may be followed by an operator 27782 if ( $proto =~ /\&$/ ) { 27783 $is_block_function{$package}{$subname} = 1; 27784 } 27785 27786 # right curly braces of prototypes NOT ending in 27787 # '&' may NOT be followed by an operator 27788 elsif ( $proto !~ /\&$/ ) { 27789 $is_block_list_function{$package}{$subname} = 1; 27790 } 27791 } 27792 } 27793 else { 27794 $is_constant{$package}{$subname} = 1; 27795 } 27796 } 27797 else { 27798 $is_user_function{$package}{$subname} = 1; 27799 } 27800} 27801 27802sub do_scan_package { 27803 27804 # do_scan_package parses a package name 27805 # it is called with $i_beg equal to the index of the first nonblank 27806 # token following a 'package' token. 27807 # USES GLOBAL VARIABLES: $current_package, 27808 27809 # package NAMESPACE 27810 # package NAMESPACE VERSION 27811 # package NAMESPACE BLOCK 27812 # package NAMESPACE VERSION BLOCK 27813 # 27814 # If VERSION is provided, package sets the $VERSION variable in the given 27815 # namespace to a version object with the VERSION provided. VERSION must be 27816 # a "strict" style version number as defined by the version module: a 27817 # positive decimal number (integer or decimal-fraction) without 27818 # exponentiation or else a dotted-decimal v-string with a leading 'v' 27819 # character and at least three components. 27820 # reference http://perldoc.perl.org/functions/package.html 27821 27822 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map, 27823 $max_token_index ) 27824 = @_; 27825 my $package = undef; 27826 my $pos_beg = $$rtoken_map[$i_beg]; 27827 pos($input_line) = $pos_beg; 27828 27829 # handle non-blank line; package name, if any, must follow 27830 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) { 27831 $package = $1; 27832 $package = ( defined($1) && $1 ) ? $1 : 'main'; 27833 $package =~ s/\'/::/g; 27834 if ( $package =~ /^\:/ ) { $package = 'main' . $package } 27835 $package =~ s/::$//; 27836 my $pos = pos($input_line); 27837 my $numc = $pos - $pos_beg; 27838 $tok = 'package ' . substr( $input_line, $pos_beg, $numc ); 27839 $type = 'i'; 27840 27841 # Now we must convert back from character position 27842 # to pre_token index. 27843 # I don't think an error flag can occur here ..but ? 27844 my $error; 27845 ( $i, $error ) = 27846 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); 27847 if ($error) { warning("Possibly invalid package\n") } 27848 $current_package = $package; 27849 27850 # we should now have package NAMESPACE 27851 # now expecting VERSION, BLOCK, or ; to follow ... 27852 # package NAMESPACE VERSION 27853 # package NAMESPACE BLOCK 27854 # package NAMESPACE VERSION BLOCK 27855 my ( $next_nonblank_token, $i_next ) = 27856 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 27857 if ( $next_nonblank_token =~ /^[v\.\d;\{\}]$/ ) { 27858 $statement_type = $tok; 27859 } 27860 else { 27861 warning( 27862 "Unexpected '$next_nonblank_token' after package name '$tok'\n" 27863 ); 27864 } 27865 } 27866 27867 # no match but line not blank -- 27868 # could be a label with name package, like package: , for example. 27869 else { 27870 $type = 'k'; 27871 } 27872 27873 return ( $i, $tok, $type ); 27874} 27875 27876sub scan_identifier_do { 27877 27878 # This routine assembles tokens into identifiers. It maintains a 27879 # scan state, id_scan_state. It updates id_scan_state based upon 27880 # current id_scan_state and token, and returns an updated 27881 # id_scan_state and the next index after the identifier. 27882 # USES GLOBAL VARIABLES: $context, $last_nonblank_token, 27883 # $last_nonblank_type 27884 27885 my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, 27886 $expecting ) 27887 = @_; 27888 my $i_begin = $i; 27889 my $type = ''; 27890 my $tok_begin = $$rtokens[$i_begin]; 27891 if ( $tok_begin eq ':' ) { $tok_begin = '::' } 27892 my $id_scan_state_begin = $id_scan_state; 27893 my $identifier_begin = $identifier; 27894 my $tok = $tok_begin; 27895 my $message = ""; 27896 27897 # these flags will be used to help figure out the type: 27898 my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ ); 27899 my $saw_type; 27900 27901 # allow old package separator (') except in 'use' statement 27902 my $allow_tick = ( $last_nonblank_token ne 'use' ); 27903 27904 # get started by defining a type and a state if necessary 27905 unless ($id_scan_state) { 27906 $context = UNKNOWN_CONTEXT; 27907 27908 # fixup for digraph 27909 if ( $tok eq '>' ) { 27910 $tok = '->'; 27911 $tok_begin = $tok; 27912 } 27913 $identifier = $tok; 27914 27915 if ( $tok eq '$' || $tok eq '*' ) { 27916 $id_scan_state = '$'; 27917 $context = SCALAR_CONTEXT; 27918 } 27919 elsif ( $tok eq '%' || $tok eq '@' ) { 27920 $id_scan_state = '$'; 27921 $context = LIST_CONTEXT; 27922 } 27923 elsif ( $tok eq '&' ) { 27924 $id_scan_state = '&'; 27925 } 27926 elsif ( $tok eq 'sub' or $tok eq 'package' ) { 27927 $saw_alpha = 0; # 'sub' is considered type info here 27928 $id_scan_state = '$'; 27929 $identifier .= ' '; # need a space to separate sub from sub name 27930 } 27931 elsif ( $tok eq '::' ) { 27932 $id_scan_state = 'A'; 27933 } 27934 elsif ( $tok =~ /^[A-Za-z_]/ ) { 27935 $id_scan_state = ':'; 27936 } 27937 elsif ( $tok eq '->' ) { 27938 $id_scan_state = '$'; 27939 } 27940 else { 27941 27942 # shouldn't happen 27943 my ( $a, $b, $c ) = caller; 27944 warning("Program Bug: scan_identifier given bad token = $tok \n"); 27945 warning(" called from sub $a line: $c\n"); 27946 report_definite_bug(); 27947 } 27948 $saw_type = !$saw_alpha; 27949 } 27950 else { 27951 $i--; 27952 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); 27953 } 27954 27955 # now loop to gather the identifier 27956 my $i_save = $i; 27957 27958 while ( $i < $max_token_index ) { 27959 $i_save = $i unless ( $tok =~ /^\s*$/ ); 27960 $tok = $$rtokens[ ++$i ]; 27961 27962 if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) { 27963 $tok = '::'; 27964 $i++; 27965 } 27966 27967 if ( $id_scan_state eq '$' ) { # starting variable name 27968 27969 if ( $tok eq '$' ) { 27970 27971 $identifier .= $tok; 27972 27973 # we've got a punctuation variable if end of line (punct.t) 27974 if ( $i == $max_token_index ) { 27975 $type = 'i'; 27976 $id_scan_state = ''; 27977 last; 27978 } 27979 } 27980 elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric .. 27981 $saw_alpha = 1; 27982 $id_scan_state = ':'; # now need :: 27983 $identifier .= $tok; 27984 } 27985 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. 27986 $saw_alpha = 1; 27987 $id_scan_state = ':'; # now need :: 27988 $identifier .= $tok; 27989 27990 # Perl will accept leading digits in identifiers, 27991 # although they may not always produce useful results. 27992 # Something like $main::0 is ok. But this also works: 27993 # 27994 # sub howdy::123::bubba{ print "bubba $54321!\n" } 27995 # howdy::123::bubba(); 27996 # 27997 } 27998 elsif ( $tok =~ /^[0-9]/ ) { # numeric 27999 $saw_alpha = 1; 28000 $id_scan_state = ':'; # now need :: 28001 $identifier .= $tok; 28002 } 28003 elsif ( $tok eq '::' ) { 28004 $id_scan_state = 'A'; 28005 $identifier .= $tok; 28006 } 28007 elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array 28008 $identifier .= $tok; # keep same state, a $ could follow 28009 } 28010 elsif ( $tok eq '{' ) { 28011 28012 # check for something like ${#} or ${�} 28013 ##if ( $identifier eq '$' 28014 if ( 28015 ( 28016 $identifier eq '$' 28017 || $identifier eq '@' 28018 || $identifier eq '$#' 28019 ) 28020 && $i + 2 <= $max_token_index 28021 && $$rtokens[ $i + 2 ] eq '}' 28022 && $$rtokens[ $i + 1 ] !~ /[\s\w]/ 28023 ) 28024 { 28025 my $next2 = $$rtokens[ $i + 2 ]; 28026 my $next1 = $$rtokens[ $i + 1 ]; 28027 $identifier .= $tok . $next1 . $next2; 28028 $i += 2; 28029 $id_scan_state = ''; 28030 last; 28031 } 28032 28033 # skip something like ${xxx} or ->{ 28034 $id_scan_state = ''; 28035 28036 # if this is the first token of a line, any tokens for this 28037 # identifier have already been accumulated 28038 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; } 28039 $i = $i_save; 28040 last; 28041 } 28042 28043 # space ok after leading $ % * & @ 28044 elsif ( $tok =~ /^\s*$/ ) { 28045 28046 if ( $identifier =~ /^[\$\%\*\&\@]/ ) { 28047 28048 if ( length($identifier) > 1 ) { 28049 $id_scan_state = ''; 28050 $i = $i_save; 28051 $type = 'i'; # probably punctuation variable 28052 last; 28053 } 28054 else { 28055 28056 # spaces after $'s are common, and space after @ 28057 # is harmless, so only complain about space 28058 # after other type characters. Space after $ and 28059 # @ will be removed in formatting. Report space 28060 # after % and * because they might indicate a 28061 # parsing error. In other words '% ' might be a 28062 # modulo operator. Delete this warning if it 28063 # gets annoying. 28064 if ( $identifier !~ /^[\@\$]$/ ) { 28065 $message = 28066 "Space in identifier, following $identifier\n"; 28067 } 28068 } 28069 } 28070 28071 # else: 28072 # space after '->' is ok 28073 } 28074 elsif ( $tok eq '^' ) { 28075 28076 # check for some special variables like $^W 28077 if ( $identifier =~ /^[\$\*\@\%]$/ ) { 28078 $identifier .= $tok; 28079 $id_scan_state = 'A'; 28080 28081 # Perl accepts '$^]' or '@^]', but 28082 # there must not be a space before the ']'. 28083 my $next1 = $$rtokens[ $i + 1 ]; 28084 if ( $next1 eq ']' ) { 28085 $i++; 28086 $identifier .= $next1; 28087 $id_scan_state = ""; 28088 last; 28089 } 28090 } 28091 else { 28092 $id_scan_state = ''; 28093 } 28094 } 28095 else { # something else 28096 28097 # check for various punctuation variables 28098 if ( $identifier =~ /^[\$\*\@\%]$/ ) { 28099 $identifier .= $tok; 28100 } 28101 28102 elsif ( $identifier eq '$#' ) { 28103 28104 if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } 28105 28106 # perl seems to allow just these: $#: $#- $#+ 28107 elsif ( $tok =~ /^[\:\-\+]$/ ) { 28108 $type = 'i'; 28109 $identifier .= $tok; 28110 } 28111 else { 28112 $i = $i_save; 28113 write_logfile_entry( 'Use of $# is deprecated' . "\n" ); 28114 } 28115 } 28116 elsif ( $identifier eq '$$' ) { 28117 28118 # perl does not allow references to punctuation 28119 # variables without braces. For example, this 28120 # won't work: 28121 # $:=\4; 28122 # $a = $$:; 28123 # You would have to use 28124 # $a = ${$:}; 28125 28126 $i = $i_save; 28127 if ( $tok eq '{' ) { $type = 't' } 28128 else { $type = 'i' } 28129 } 28130 elsif ( $identifier eq '->' ) { 28131 $i = $i_save; 28132 } 28133 else { 28134 $i = $i_save; 28135 if ( length($identifier) == 1 ) { $identifier = ''; } 28136 } 28137 $id_scan_state = ''; 28138 last; 28139 } 28140 } 28141 elsif ( $id_scan_state eq '&' ) { # starting sub call? 28142 28143 if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric .. 28144 $id_scan_state = ':'; # now need :: 28145 $saw_alpha = 1; 28146 $identifier .= $tok; 28147 } 28148 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. 28149 $id_scan_state = ':'; # now need :: 28150 $saw_alpha = 1; 28151 $identifier .= $tok; 28152 } 28153 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above 28154 $id_scan_state = ':'; # now need :: 28155 $saw_alpha = 1; 28156 $identifier .= $tok; 28157 } 28158 elsif ( $tok =~ /^\s*$/ ) { # allow space 28159 } 28160 elsif ( $tok eq '::' ) { # leading :: 28161 $id_scan_state = 'A'; # accept alpha next 28162 $identifier .= $tok; 28163 } 28164 elsif ( $tok eq '{' ) { 28165 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; } 28166 $i = $i_save; 28167 $id_scan_state = ''; 28168 last; 28169 } 28170 else { 28171 28172 # punctuation variable? 28173 # testfile: cunningham4.pl 28174 # 28175 # We have to be careful here. If we are in an unknown state, 28176 # we will reject the punctuation variable. In the following 28177 # example the '&' is a binary opeator but we are in an unknown 28178 # state because there is no sigil on 'Prima', so we don't 28179 # know what it is. But it is a bad guess that 28180 # '&~' is a punction variable. 28181 # $self->{text}->{colorMap}->[ 28182 # Prima::PodView::COLOR_CODE_FOREGROUND 28183 # & ~tb::COLOR_INDEX ] = 28184 # $sec->{ColorCode} 28185 if ( $identifier eq '&' && $expecting ) { 28186 $identifier .= $tok; 28187 } 28188 else { 28189 $identifier = ''; 28190 $i = $i_save; 28191 $type = '&'; 28192 } 28193 $id_scan_state = ''; 28194 last; 28195 } 28196 } 28197 elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::) 28198 28199 if ( $tok =~ /^[A-Za-z_]/ ) { # found it 28200 $identifier .= $tok; 28201 $id_scan_state = ':'; # now need :: 28202 $saw_alpha = 1; 28203 } 28204 elsif ( $tok eq "'" && $allow_tick ) { 28205 $identifier .= $tok; 28206 $id_scan_state = ':'; # now need :: 28207 $saw_alpha = 1; 28208 } 28209 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above 28210 $identifier .= $tok; 28211 $id_scan_state = ':'; # now need :: 28212 $saw_alpha = 1; 28213 } 28214 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { 28215 $id_scan_state = '('; 28216 $identifier .= $tok; 28217 } 28218 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { 28219 $id_scan_state = ')'; 28220 $identifier .= $tok; 28221 } 28222 else { 28223 $id_scan_state = ''; 28224 $i = $i_save; 28225 last; 28226 } 28227 } 28228 elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha 28229 28230 if ( $tok eq '::' ) { # got it 28231 $identifier .= $tok; 28232 $id_scan_state = 'A'; # now require alpha 28233 } 28234 elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here 28235 $identifier .= $tok; 28236 $id_scan_state = ':'; # now need :: 28237 $saw_alpha = 1; 28238 } 28239 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above 28240 $identifier .= $tok; 28241 $id_scan_state = ':'; # now need :: 28242 $saw_alpha = 1; 28243 } 28244 elsif ( $tok eq "'" && $allow_tick ) { # tick 28245 28246 if ( $is_keyword{$identifier} ) { 28247 $id_scan_state = ''; # that's all 28248 $i = $i_save; 28249 } 28250 else { 28251 $identifier .= $tok; 28252 } 28253 } 28254 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { 28255 $id_scan_state = '('; 28256 $identifier .= $tok; 28257 } 28258 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { 28259 $id_scan_state = ')'; 28260 $identifier .= $tok; 28261 } 28262 else { 28263 $id_scan_state = ''; # that's all 28264 $i = $i_save; 28265 last; 28266 } 28267 } 28268 elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype 28269 28270 if ( $tok eq '(' ) { # got it 28271 $identifier .= $tok; 28272 $id_scan_state = ')'; # now find the end of it 28273 } 28274 elsif ( $tok =~ /^\s*$/ ) { # blank - keep going 28275 $identifier .= $tok; 28276 } 28277 else { 28278 $id_scan_state = ''; # that's all - no prototype 28279 $i = $i_save; 28280 last; 28281 } 28282 } 28283 elsif ( $id_scan_state eq ')' ) { # looking for ) to end 28284 28285 if ( $tok eq ')' ) { # got it 28286 $identifier .= $tok; 28287 $id_scan_state = ''; # all done 28288 last; 28289 } 28290 elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { 28291 $identifier .= $tok; 28292 } 28293 else { # probable error in script, but keep going 28294 warning("Unexpected '$tok' while seeking end of prototype\n"); 28295 $identifier .= $tok; 28296 } 28297 } 28298 else { # can get here due to error in initialization 28299 $id_scan_state = ''; 28300 $i = $i_save; 28301 last; 28302 } 28303 } 28304 28305 if ( $id_scan_state eq ')' ) { 28306 warning("Hit end of line while seeking ) to end prototype\n"); 28307 } 28308 28309 # once we enter the actual identifier, it may not extend beyond 28310 # the end of the current line 28311 if ( $id_scan_state =~ /^[A\:\(\)]/ ) { 28312 $id_scan_state = ''; 28313 } 28314 if ( $i < 0 ) { $i = 0 } 28315 28316 unless ($type) { 28317 28318 if ($saw_type) { 28319 28320 if ($saw_alpha) { 28321 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) { 28322 $type = 'w'; 28323 } 28324 else { $type = 'i' } 28325 } 28326 elsif ( $identifier eq '->' ) { 28327 $type = '->'; 28328 } 28329 elsif ( 28330 ( length($identifier) > 1 ) 28331 28332 # In something like '@$=' we have an identifier '@$' 28333 # In something like '$${' we have type '$$' (and only 28334 # part of an identifier) 28335 && !( $identifier =~ /\$$/ && $tok eq '{' ) 28336 && ( $identifier !~ /^(sub |package )$/ ) 28337 ) 28338 { 28339 $type = 'i'; 28340 } 28341 else { $type = 't' } 28342 } 28343 elsif ($saw_alpha) { 28344 28345 # type 'w' includes anything without leading type info 28346 # ($,%,@,*) including something like abc::def::ghi 28347 $type = 'w'; 28348 } 28349 else { 28350 $type = ''; 28351 } # this can happen on a restart 28352 } 28353 28354 if ($identifier) { 28355 $tok = $identifier; 28356 if ($message) { write_logfile_entry($message) } 28357 } 28358 else { 28359 $tok = $tok_begin; 28360 $i = $i_begin; 28361 } 28362 28363 TOKENIZER_DEBUG_FLAG_SCAN_ID && do { 28364 my ( $a, $b, $c ) = caller; 28365 print STDOUT 28366"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; 28367 print STDOUT 28368"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; 28369 }; 28370 return ( $i, $tok, $type, $id_scan_state, $identifier ); 28371} 28372 28373{ 28374 28375 # saved package and subnames in case prototype is on separate line 28376 my ( $package_saved, $subname_saved ); 28377 28378 sub do_scan_sub { 28379 28380 # do_scan_sub parses a sub name and prototype 28381 # it is called with $i_beg equal to the index of the first nonblank 28382 # token following a 'sub' token. 28383 28384 # TODO: add future error checks to be sure we have a valid 28385 # sub name. For example, 'sub &doit' is wrong. Also, be sure 28386 # a name is given if and only if a non-anonymous sub is 28387 # appropriate. 28388 # USES GLOBAL VARS: $current_package, $last_nonblank_token, 28389 # $in_attribute_list, %saw_function_definition, 28390 # $statement_type 28391 28392 my ( 28393 $input_line, $i, $i_beg, 28394 $tok, $type, $rtokens, 28395 $rtoken_map, $id_scan_state, $max_token_index 28396 ) = @_; 28397 $id_scan_state = ""; # normally we get everything in one call 28398 my $subname = undef; 28399 my $package = undef; 28400 my $proto = undef; 28401 my $attrs = undef; 28402 my $match; 28403 28404 my $pos_beg = $$rtoken_map[$i_beg]; 28405 pos($input_line) = $pos_beg; 28406 28407 # sub NAME PROTO ATTRS 28408 if ( 28409 $input_line =~ m/\G\s* 28410 ((?:\w*(?:'|::))*) # package - something that ends in :: or ' 28411 (\w+) # NAME - required 28412 (\s*\([^){]*\))? # PROTO - something in parens 28413 (\s*:)? # ATTRS - leading : of attribute list 28414 /gcx 28415 ) 28416 { 28417 $match = 1; 28418 $subname = $2; 28419 $proto = $3; 28420 $attrs = $4; 28421 28422 $package = ( defined($1) && $1 ) ? $1 : $current_package; 28423 $package =~ s/\'/::/g; 28424 if ( $package =~ /^\:/ ) { $package = 'main' . $package } 28425 $package =~ s/::$//; 28426 my $pos = pos($input_line); 28427 my $numc = $pos - $pos_beg; 28428 $tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); 28429 $type = 'i'; 28430 } 28431 28432 # Look for prototype/attributes not preceded on this line by subname; 28433 # This might be an anonymous sub with attributes, 28434 # or a prototype on a separate line from its sub name 28435 elsif ( 28436 $input_line =~ m/\G(\s*\([^){]*\))? # PROTO 28437 (\s*:)? # ATTRS leading ':' 28438 /gcx 28439 && ( $1 || $2 ) 28440 ) 28441 { 28442 $match = 1; 28443 $proto = $1; 28444 $attrs = $2; 28445 28446 # Handle prototype on separate line from subname 28447 if ($subname_saved) { 28448 $package = $package_saved; 28449 $subname = $subname_saved; 28450 $tok = $last_nonblank_token; 28451 } 28452 $type = 'i'; 28453 } 28454 28455 if ($match) { 28456 28457 # ATTRS: if there are attributes, back up and let the ':' be 28458 # found later by the scanner. 28459 my $pos = pos($input_line); 28460 if ($attrs) { 28461 $pos -= length($attrs); 28462 } 28463 28464 my $next_nonblank_token = $tok; 28465 28466 # catch case of line with leading ATTR ':' after anonymous sub 28467 if ( $pos == $pos_beg && $tok eq ':' ) { 28468 $type = 'A'; 28469 $in_attribute_list = 1; 28470 } 28471 28472 # We must convert back from character position 28473 # to pre_token index. 28474 else { 28475 28476 # I don't think an error flag can occur here ..but ? 28477 my $error; 28478 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, 28479 $max_token_index ); 28480 if ($error) { warning("Possibly invalid sub\n") } 28481 28482 # check for multiple definitions of a sub 28483 ( $next_nonblank_token, my $i_next ) = 28484 find_next_nonblank_token_on_this_line( $i, $rtokens, 28485 $max_token_index ); 28486 } 28487 28488 if ( $next_nonblank_token =~ /^(\s*|#)$/ ) 28489 { # skip blank or side comment 28490 my ( $rpre_tokens, $rpre_types ) = 28491 peek_ahead_for_n_nonblank_pre_tokens(1); 28492 if ( defined($rpre_tokens) && @$rpre_tokens ) { 28493 $next_nonblank_token = $rpre_tokens->[0]; 28494 } 28495 else { 28496 $next_nonblank_token = '}'; 28497 } 28498 } 28499 $package_saved = ""; 28500 $subname_saved = ""; 28501 if ( $next_nonblank_token eq '{' ) { 28502 if ($subname) { 28503 28504 # Check for multiple definitions of a sub, but 28505 # it is ok to have multiple sub BEGIN, etc, 28506 # so we do not complain if name is all caps 28507 if ( $saw_function_definition{$package}{$subname} 28508 && $subname !~ /^[A-Z]+$/ ) 28509 { 28510 my $lno = $saw_function_definition{$package}{$subname}; 28511 warning( 28512"already saw definition of 'sub $subname' in package '$package' at line $lno\n" 28513 ); 28514 } 28515 $saw_function_definition{$package}{$subname} = 28516 $tokenizer_self->{_last_line_number}; 28517 } 28518 } 28519 elsif ( $next_nonblank_token eq ';' ) { 28520 } 28521 elsif ( $next_nonblank_token eq '}' ) { 28522 } 28523 28524 # ATTRS - if an attribute list follows, remember the name 28525 # of the sub so the next opening brace can be labeled. 28526 # Setting 'statement_type' causes any ':'s to introduce 28527 # attributes. 28528 elsif ( $next_nonblank_token eq ':' ) { 28529 $statement_type = $tok; 28530 } 28531 28532 # see if PROTO follows on another line: 28533 elsif ( $next_nonblank_token eq '(' ) { 28534 if ( $attrs || $proto ) { 28535 warning( 28536"unexpected '(' after definition or declaration of sub '$subname'\n" 28537 ); 28538 } 28539 else { 28540 $id_scan_state = 'sub'; # we must come back to get proto 28541 $statement_type = $tok; 28542 $package_saved = $package; 28543 $subname_saved = $subname; 28544 } 28545 } 28546 elsif ($next_nonblank_token) { # EOF technically ok 28547 warning( 28548"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" 28549 ); 28550 } 28551 check_prototype( $proto, $package, $subname ); 28552 } 28553 28554 # no match but line not blank 28555 else { 28556 } 28557 return ( $i, $tok, $type, $id_scan_state ); 28558 } 28559} 28560 28561#########i############################################################### 28562# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS 28563######################################################################### 28564 28565sub find_next_nonblank_token { 28566 my ( $i, $rtokens, $max_token_index ) = @_; 28567 28568 if ( $i >= $max_token_index ) { 28569 if ( !peeked_ahead() ) { 28570 peeked_ahead(1); 28571 $rtokens = 28572 peek_ahead_for_nonblank_token( $rtokens, $max_token_index ); 28573 } 28574 } 28575 my $next_nonblank_token = $$rtokens[ ++$i ]; 28576 28577 if ( $next_nonblank_token =~ /^\s*$/ ) { 28578 $next_nonblank_token = $$rtokens[ ++$i ]; 28579 } 28580 return ( $next_nonblank_token, $i ); 28581} 28582 28583sub numerator_expected { 28584 28585 # this is a filter for a possible numerator, in support of guessing 28586 # for the / pattern delimiter token. 28587 # returns - 28588 # 1 - yes 28589 # 0 - can't tell 28590 # -1 - no 28591 # Note: I am using the convention that variables ending in 28592 # _expected have these 3 possible values. 28593 my ( $i, $rtokens, $max_token_index ) = @_; 28594 my $next_token = $$rtokens[ $i + 1 ]; 28595 if ( $next_token eq '=' ) { $i++; } # handle /= 28596 my ( $next_nonblank_token, $i_next ) = 28597 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 28598 28599 if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { 28600 1; 28601 } 28602 else { 28603 28604 if ( $next_nonblank_token =~ /^\s*$/ ) { 28605 0; 28606 } 28607 else { 28608 -1; 28609 } 28610 } 28611} 28612 28613sub pattern_expected { 28614 28615 # This is the start of a filter for a possible pattern. 28616 # It looks at the token after a possbible pattern and tries to 28617 # determine if that token could end a pattern. 28618 # returns - 28619 # 1 - yes 28620 # 0 - can't tell 28621 # -1 - no 28622 my ( $i, $rtokens, $max_token_index ) = @_; 28623 my $next_token = $$rtokens[ $i + 1 ]; 28624 if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier 28625 my ( $next_nonblank_token, $i_next ) = 28626 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 28627 28628 # list of tokens which may follow a pattern 28629 # (can probably be expanded) 28630 if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ ) 28631 { 28632 1; 28633 } 28634 else { 28635 28636 if ( $next_nonblank_token =~ /^\s*$/ ) { 28637 0; 28638 } 28639 else { 28640 -1; 28641 } 28642 } 28643} 28644 28645sub find_next_nonblank_token_on_this_line { 28646 my ( $i, $rtokens, $max_token_index ) = @_; 28647 my $next_nonblank_token; 28648 28649 if ( $i < $max_token_index ) { 28650 $next_nonblank_token = $$rtokens[ ++$i ]; 28651 28652 if ( $next_nonblank_token =~ /^\s*$/ ) { 28653 28654 if ( $i < $max_token_index ) { 28655 $next_nonblank_token = $$rtokens[ ++$i ]; 28656 } 28657 } 28658 } 28659 else { 28660 $next_nonblank_token = ""; 28661 } 28662 return ( $next_nonblank_token, $i ); 28663} 28664 28665sub find_angle_operator_termination { 28666 28667 # We are looking at a '<' and want to know if it is an angle operator. 28668 # We are to return: 28669 # $i = pretoken index of ending '>' if found, current $i otherwise 28670 # $type = 'Q' if found, '>' otherwise 28671 my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_; 28672 my $i = $i_beg; 28673 my $type = '<'; 28674 pos($input_line) = 1 + $$rtoken_map[$i]; 28675 28676 my $filter; 28677 28678 # we just have to find the next '>' if a term is expected 28679 if ( $expecting == TERM ) { $filter = '[\>]' } 28680 28681 # we have to guess if we don't know what is expected 28682 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } 28683 28684 # shouldn't happen - we shouldn't be here if operator is expected 28685 else { warning("Program Bug in find_angle_operator_termination\n") } 28686 28687 # To illustrate what we might be looking at, in case we are 28688 # guessing, here are some examples of valid angle operators 28689 # (or file globs): 28690 # <tmp_imp/*> 28691 # <FH> 28692 # <$fh> 28693 # <*.c *.h> 28694 # <_> 28695 # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t) 28696 # <${PREFIX}*img*.$IMAGE_TYPE> 28697 # <img*.$IMAGE_TYPE> 28698 # <Timg*.$IMAGE_TYPE> 28699 # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> 28700 # 28701 # Here are some examples of lines which do not have angle operators: 28702 # return undef unless $self->[2]++ < $#{$self->[1]}; 28703 # < 2 || @$t > 28704 # 28705 # the following line from dlister.pl caused trouble: 28706 # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; 28707 # 28708 # If the '<' starts an angle operator, it must end on this line and 28709 # it must not have certain characters like ';' and '=' in it. I use 28710 # this to limit the testing. This filter should be improved if 28711 # possible. 28712 28713 if ( $input_line =~ /($filter)/g ) { 28714 28715 if ( $1 eq '>' ) { 28716 28717 # We MAY have found an angle operator termination if we get 28718 # here, but we need to do more to be sure we haven't been 28719 # fooled. 28720 my $pos = pos($input_line); 28721 28722 my $pos_beg = $$rtoken_map[$i]; 28723 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); 28724 28725 # Reject if the closing '>' follows a '-' as in: 28726 # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { } 28727 if ( $expecting eq UNKNOWN ) { 28728 my $check = substr( $input_line, $pos - 2, 1 ); 28729 if ( $check eq '-' ) { 28730 return ( $i, $type ); 28731 } 28732 } 28733 28734 ######################################debug##### 28735 #write_diagnostics( "ANGLE? :$str\n"); 28736 #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n"; 28737 ######################################debug##### 28738 $type = 'Q'; 28739 my $error; 28740 ( $i, $error ) = 28741 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); 28742 28743 # It may be possible that a quote ends midway in a pretoken. 28744 # If this happens, it may be necessary to split the pretoken. 28745 if ($error) { 28746 warning( 28747 "Possible tokinization error..please check this line\n"); 28748 report_possible_bug(); 28749 } 28750 28751 # Now let's see where we stand.... 28752 # OK if math op not possible 28753 if ( $expecting == TERM ) { 28754 } 28755 28756 # OK if there are no more than 2 pre-tokens inside 28757 # (not possible to write 2 token math between < and >) 28758 # This catches most common cases 28759 elsif ( $i <= $i_beg + 3 ) { 28760 write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); 28761 } 28762 28763 # Not sure.. 28764 else { 28765 28766 # Let's try a Brace Test: any braces inside must balance 28767 my $br = 0; 28768 while ( $str =~ /\{/g ) { $br++ } 28769 while ( $str =~ /\}/g ) { $br-- } 28770 my $sb = 0; 28771 while ( $str =~ /\[/g ) { $sb++ } 28772 while ( $str =~ /\]/g ) { $sb-- } 28773 my $pr = 0; 28774 while ( $str =~ /\(/g ) { $pr++ } 28775 while ( $str =~ /\)/g ) { $pr-- } 28776 28777 # if braces do not balance - not angle operator 28778 if ( $br || $sb || $pr ) { 28779 $i = $i_beg; 28780 $type = '<'; 28781 write_diagnostics( 28782 "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); 28783 } 28784 28785 # we should keep doing more checks here...to be continued 28786 # Tentatively accepting this as a valid angle operator. 28787 # There are lots more things that can be checked. 28788 else { 28789 write_diagnostics( 28790 "ANGLE-Guessing yes: $str expecting=$expecting\n"); 28791 write_logfile_entry("Guessing angle operator here: $str\n"); 28792 } 28793 } 28794 } 28795 28796 # didn't find ending > 28797 else { 28798 if ( $expecting == TERM ) { 28799 warning("No ending > for angle operator\n"); 28800 } 28801 } 28802 } 28803 return ( $i, $type ); 28804} 28805 28806sub scan_number_do { 28807 28808 # scan a number in any of the formats that Perl accepts 28809 # Underbars (_) are allowed in decimal numbers. 28810 # input parameters - 28811 # $input_line - the string to scan 28812 # $i - pre_token index to start scanning 28813 # $rtoken_map - reference to the pre_token map giving starting 28814 # character position in $input_line of token $i 28815 # output parameters - 28816 # $i - last pre_token index of the number just scanned 28817 # number - the number (characters); or undef if not a number 28818 28819 my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_; 28820 my $pos_beg = $$rtoken_map[$i]; 28821 my $pos; 28822 my $i_begin = $i; 28823 my $number = undef; 28824 my $type = $input_type; 28825 28826 my $first_char = substr( $input_line, $pos_beg, 1 ); 28827 28828 # Look for bad starting characters; Shouldn't happen.. 28829 if ( $first_char !~ /[\d\.\+\-Ee]/ ) { 28830 warning("Program bug - scan_number given character $first_char\n"); 28831 report_definite_bug(); 28832 return ( $i, $type, $number ); 28833 } 28834 28835 # handle v-string without leading 'v' character ('Two Dot' rule) 28836 # (vstring.t) 28837 # TODO: v-strings may contain underscores 28838 pos($input_line) = $pos_beg; 28839 if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { 28840 $pos = pos($input_line); 28841 my $numc = $pos - $pos_beg; 28842 $number = substr( $input_line, $pos_beg, $numc ); 28843 $type = 'v'; 28844 report_v_string($number); 28845 } 28846 28847 # handle octal, hex, binary 28848 if ( !defined($number) ) { 28849 pos($input_line) = $pos_beg; 28850 if ( $input_line =~ 28851 /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g ) 28852 { 28853 $pos = pos($input_line); 28854 my $numc = $pos - $pos_beg; 28855 $number = substr( $input_line, $pos_beg, $numc ); 28856 $type = 'n'; 28857 } 28858 } 28859 28860 # handle decimal 28861 if ( !defined($number) ) { 28862 pos($input_line) = $pos_beg; 28863 28864 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { 28865 $pos = pos($input_line); 28866 28867 # watch out for things like 0..40 which would give 0. by this; 28868 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) 28869 && ( substr( $input_line, $pos, 1 ) eq '.' ) ) 28870 { 28871 $pos--; 28872 } 28873 my $numc = $pos - $pos_beg; 28874 $number = substr( $input_line, $pos_beg, $numc ); 28875 $type = 'n'; 28876 } 28877 } 28878 28879 # filter out non-numbers like e + - . e2 .e3 +e6 28880 # the rule: at least one digit, and any 'e' must be preceded by a digit 28881 if ( 28882 $number !~ /\d/ # no digits 28883 || ( $number =~ /^(.*)[eE]/ 28884 && $1 !~ /\d/ ) # or no digits before the 'e' 28885 ) 28886 { 28887 $number = undef; 28888 $type = $input_type; 28889 return ( $i, $type, $number ); 28890 } 28891 28892 # Found a number; now we must convert back from character position 28893 # to pre_token index. An error here implies user syntax error. 28894 # An example would be an invalid octal number like '009'. 28895 my $error; 28896 ( $i, $error ) = 28897 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); 28898 if ($error) { warning("Possibly invalid number\n") } 28899 28900 return ( $i, $type, $number ); 28901} 28902 28903sub inverse_pretoken_map { 28904 28905 # Starting with the current pre_token index $i, scan forward until 28906 # finding the index of the next pre_token whose position is $pos. 28907 my ( $i, $pos, $rtoken_map, $max_token_index ) = @_; 28908 my $error = 0; 28909 28910 while ( ++$i <= $max_token_index ) { 28911 28912 if ( $pos <= $$rtoken_map[$i] ) { 28913 28914 # Let the calling routine handle errors in which we do not 28915 # land on a pre-token boundary. It can happen by running 28916 # perltidy on some non-perl scripts, for example. 28917 if ( $pos < $$rtoken_map[$i] ) { $error = 1 } 28918 $i--; 28919 last; 28920 } 28921 } 28922 return ( $i, $error ); 28923} 28924 28925sub find_here_doc { 28926 28927 # find the target of a here document, if any 28928 # input parameters: 28929 # $i - token index of the second < of << 28930 # ($i must be less than the last token index if this is called) 28931 # output parameters: 28932 # $found_target = 0 didn't find target; =1 found target 28933 # HERE_TARGET - the target string (may be empty string) 28934 # $i - unchanged if not here doc, 28935 # or index of the last token of the here target 28936 # $saw_error - flag noting unbalanced quote on here target 28937 my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; 28938 my $ibeg = $i; 28939 my $found_target = 0; 28940 my $here_doc_target = ''; 28941 my $here_quote_character = ''; 28942 my $saw_error = 0; 28943 my ( $next_nonblank_token, $i_next_nonblank, $next_token ); 28944 $next_token = $$rtokens[ $i + 1 ]; 28945 28946 # perl allows a backslash before the target string (heredoc.t) 28947 my $backslash = 0; 28948 if ( $next_token eq '\\' ) { 28949 $backslash = 1; 28950 $next_token = $$rtokens[ $i + 2 ]; 28951 } 28952 28953 ( $next_nonblank_token, $i_next_nonblank ) = 28954 find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); 28955 28956 if ( $next_nonblank_token =~ /[\'\"\`]/ ) { 28957 28958 my $in_quote = 1; 28959 my $quote_depth = 0; 28960 my $quote_pos = 0; 28961 my $quoted_string; 28962 28963 ( 28964 $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth, 28965 $quoted_string 28966 ) 28967 = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, 28968 $here_quote_character, $quote_pos, $quote_depth, $max_token_index ); 28969 28970 if ($in_quote) { # didn't find end of quote, so no target found 28971 $i = $ibeg; 28972 if ( $expecting == TERM ) { 28973 warning( 28974"Did not find here-doc string terminator ($here_quote_character) before end of line \n" 28975 ); 28976 $saw_error = 1; 28977 } 28978 } 28979 else { # found ending quote 28980 my $j; 28981 $found_target = 1; 28982 28983 my $tokj; 28984 for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) { 28985 $tokj = $$rtokens[$j]; 28986 28987 # we have to remove any backslash before the quote character 28988 # so that the here-doc-target exactly matches this string 28989 next 28990 if ( $tokj eq "\\" 28991 && $j < $i - 1 28992 && $$rtokens[ $j + 1 ] eq $here_quote_character ); 28993 $here_doc_target .= $tokj; 28994 } 28995 } 28996 } 28997 28998 elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { 28999 $found_target = 1; 29000 write_logfile_entry( 29001 "found blank here-target after <<; suggest using \"\"\n"); 29002 $i = $ibeg; 29003 } 29004 elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << 29005 29006 my $here_doc_expected; 29007 if ( $expecting == UNKNOWN ) { 29008 $here_doc_expected = guess_if_here_doc($next_token); 29009 } 29010 else { 29011 $here_doc_expected = 1; 29012 } 29013 29014 if ($here_doc_expected) { 29015 $found_target = 1; 29016 $here_doc_target = $next_token; 29017 $i = $ibeg + 1; 29018 } 29019 29020 } 29021 else { 29022 29023 if ( $expecting == TERM ) { 29024 $found_target = 1; 29025 write_logfile_entry("Note: bare here-doc operator <<\n"); 29026 } 29027 else { 29028 $i = $ibeg; 29029 } 29030 } 29031 29032 # patch to neglect any prepended backslash 29033 if ( $found_target && $backslash ) { $i++ } 29034 29035 return ( $found_target, $here_doc_target, $here_quote_character, $i, 29036 $saw_error ); 29037} 29038 29039sub do_quote { 29040 29041 # follow (or continue following) quoted string(s) 29042 # $in_quote return code: 29043 # 0 - ok, found end 29044 # 1 - still must find end of quote whose target is $quote_character 29045 # 2 - still looking for end of first of two quotes 29046 # 29047 # Returns updated strings: 29048 # $quoted_string_1 = quoted string seen while in_quote=1 29049 # $quoted_string_2 = quoted string seen while in_quote=2 29050 my ( 29051 $i, $in_quote, $quote_character, 29052 $quote_pos, $quote_depth, $quoted_string_1, 29053 $quoted_string_2, $rtokens, $rtoken_map, 29054 $max_token_index 29055 ) = @_; 29056 29057 my $in_quote_starting = $in_quote; 29058 29059 my $quoted_string; 29060 if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow 29061 my $ibeg = $i; 29062 ( 29063 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 29064 $quoted_string 29065 ) 29066 = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character, 29067 $quote_pos, $quote_depth, $max_token_index ); 29068 $quoted_string_2 .= $quoted_string; 29069 if ( $in_quote == 1 ) { 29070 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } 29071 $quote_character = ''; 29072 } 29073 else { 29074 $quoted_string_2 .= "\n"; 29075 } 29076 } 29077 29078 if ( $in_quote == 1 ) { # one (more) quote to follow 29079 my $ibeg = $i; 29080 ( 29081 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 29082 $quoted_string 29083 ) 29084 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, 29085 $quote_pos, $quote_depth, $max_token_index ); 29086 $quoted_string_1 .= $quoted_string; 29087 if ( $in_quote == 1 ) { 29088 $quoted_string_1 .= "\n"; 29089 } 29090 } 29091 return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 29092 $quoted_string_1, $quoted_string_2 ); 29093} 29094 29095sub follow_quoted_string { 29096 29097 # scan for a specific token, skipping escaped characters 29098 # if the quote character is blank, use the first non-blank character 29099 # input parameters: 29100 # $rtokens = reference to the array of tokens 29101 # $i = the token index of the first character to search 29102 # $in_quote = number of quoted strings being followed 29103 # $beginning_tok = the starting quote character 29104 # $quote_pos = index to check next for alphanumeric delimiter 29105 # output parameters: 29106 # $i = the token index of the ending quote character 29107 # $in_quote = decremented if found end, unchanged if not 29108 # $beginning_tok = the starting quote character 29109 # $quote_pos = index to check next for alphanumeric delimiter 29110 # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. 29111 # $quoted_string = the text of the quote (without quotation tokens) 29112 my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth, 29113 $max_token_index ) 29114 = @_; 29115 my ( $tok, $end_tok ); 29116 my $i = $i_beg - 1; 29117 my $quoted_string = ""; 29118 29119 TOKENIZER_DEBUG_FLAG_QUOTE && do { 29120 print STDOUT 29121"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; 29122 }; 29123 29124 # get the corresponding end token 29125 if ( $beginning_tok !~ /^\s*$/ ) { 29126 $end_tok = matching_end_token($beginning_tok); 29127 } 29128 29129 # a blank token means we must find and use the first non-blank one 29130 else { 29131 my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr> 29132 29133 while ( $i < $max_token_index ) { 29134 $tok = $$rtokens[ ++$i ]; 29135 29136 if ( $tok !~ /^\s*$/ ) { 29137 29138 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { 29139 $i = $max_token_index; 29140 } 29141 else { 29142 29143 if ( length($tok) > 1 ) { 29144 if ( $quote_pos <= 0 ) { $quote_pos = 1 } 29145 $beginning_tok = substr( $tok, $quote_pos - 1, 1 ); 29146 } 29147 else { 29148 $beginning_tok = $tok; 29149 $quote_pos = 0; 29150 } 29151 $end_tok = matching_end_token($beginning_tok); 29152 $quote_depth = 1; 29153 last; 29154 } 29155 } 29156 else { 29157 $allow_quote_comments = 1; 29158 } 29159 } 29160 } 29161 29162 # There are two different loops which search for the ending quote 29163 # character. In the rare case of an alphanumeric quote delimiter, we 29164 # have to look through alphanumeric tokens character-by-character, since 29165 # the pre-tokenization process combines multiple alphanumeric 29166 # characters, whereas for a non-alphanumeric delimiter, only tokens of 29167 # length 1 can match. 29168 29169 ################################################################### 29170 # Case 1 (rare): loop for case of alphanumeric quote delimiter.. 29171 # "quote_pos" is the position the current word to begin searching 29172 ################################################################### 29173 if ( $beginning_tok =~ /\w/ ) { 29174 29175 # Note this because it is not recommended practice except 29176 # for obfuscated perl contests 29177 if ( $in_quote == 1 ) { 29178 write_logfile_entry( 29179 "Note: alphanumeric quote delimiter ($beginning_tok) \n"); 29180 } 29181 29182 while ( $i < $max_token_index ) { 29183 29184 if ( $quote_pos == 0 || ( $i < 0 ) ) { 29185 $tok = $$rtokens[ ++$i ]; 29186 29187 if ( $tok eq '\\' ) { 29188 29189 # retain backslash unless it hides the end token 29190 $quoted_string .= $tok 29191 unless $$rtokens[ $i + 1 ] eq $end_tok; 29192 $quote_pos++; 29193 last if ( $i >= $max_token_index ); 29194 $tok = $$rtokens[ ++$i ]; 29195 } 29196 } 29197 my $old_pos = $quote_pos; 29198 29199 unless ( defined($tok) && defined($end_tok) && defined($quote_pos) ) 29200 { 29201 29202 } 29203 $quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); 29204 29205 if ( $quote_pos > 0 ) { 29206 29207 $quoted_string .= 29208 substr( $tok, $old_pos, $quote_pos - $old_pos - 1 ); 29209 29210 $quote_depth--; 29211 29212 if ( $quote_depth == 0 ) { 29213 $in_quote--; 29214 last; 29215 } 29216 } 29217 else { 29218 $quoted_string .= substr( $tok, $old_pos ); 29219 } 29220 } 29221 } 29222 29223 ######################################################################## 29224 # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter.. 29225 ######################################################################## 29226 else { 29227 29228 while ( $i < $max_token_index ) { 29229 $tok = $$rtokens[ ++$i ]; 29230 29231 if ( $tok eq $end_tok ) { 29232 $quote_depth--; 29233 29234 if ( $quote_depth == 0 ) { 29235 $in_quote--; 29236 last; 29237 } 29238 } 29239 elsif ( $tok eq $beginning_tok ) { 29240 $quote_depth++; 29241 } 29242 elsif ( $tok eq '\\' ) { 29243 29244 # retain backslash unless it hides the beginning or end token 29245 $tok = $$rtokens[ ++$i ]; 29246 $quoted_string .= '\\' 29247 unless ( $tok eq $end_tok || $tok eq $beginning_tok ); 29248 } 29249 $quoted_string .= $tok; 29250 } 29251 } 29252 if ( $i > $max_token_index ) { $i = $max_token_index } 29253 return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth, 29254 $quoted_string ); 29255} 29256 29257sub indicate_error { 29258 my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_; 29259 interrupt_logfile(); 29260 warning($msg); 29261 write_error_indicator_pair( $line_number, $input_line, $pos, $carrat ); 29262 resume_logfile(); 29263} 29264 29265sub write_error_indicator_pair { 29266 my ( $line_number, $input_line, $pos, $carrat ) = @_; 29267 my ( $offset, $numbered_line, $underline ) = 29268 make_numbered_line( $line_number, $input_line, $pos ); 29269 $underline = write_on_underline( $underline, $pos - $offset, $carrat ); 29270 warning( $numbered_line . "\n" ); 29271 $underline =~ s/\s*$//; 29272 warning( $underline . "\n" ); 29273} 29274 29275sub make_numbered_line { 29276 29277 # Given an input line, its line number, and a character position of 29278 # interest, create a string not longer than 80 characters of the form 29279 # $lineno: sub_string 29280 # such that the sub_string of $str contains the position of interest 29281 # 29282 # Here is an example of what we want, in this case we add trailing 29283 # '...' because the line is long. 29284 # 29285 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... 29286 # 29287 # Here is another example, this time in which we used leading '...' 29288 # because of excessive length: 29289 # 29290 # 2: ... er of the World Wide Web Consortium's 29291 # 29292 # input parameters are: 29293 # $lineno = line number 29294 # $str = the text of the line 29295 # $pos = position of interest (the error) : 0 = first character 29296 # 29297 # We return : 29298 # - $offset = an offset which corrects the position in case we only 29299 # display part of a line, such that $pos-$offset is the effective 29300 # position from the start of the displayed line. 29301 # - $numbered_line = the numbered line as above, 29302 # - $underline = a blank 'underline' which is all spaces with the same 29303 # number of characters as the numbered line. 29304 29305 my ( $lineno, $str, $pos ) = @_; 29306 my $offset = ( $pos < 60 ) ? 0 : $pos - 40; 29307 my $excess = length($str) - $offset - 68; 29308 my $numc = ( $excess > 0 ) ? 68 : undef; 29309 29310 if ( defined($numc) ) { 29311 if ( $offset == 0 ) { 29312 $str = substr( $str, $offset, $numc - 4 ) . " ..."; 29313 } 29314 else { 29315 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; 29316 } 29317 } 29318 else { 29319 29320 if ( $offset == 0 ) { 29321 } 29322 else { 29323 $str = "... " . substr( $str, $offset + 4 ); 29324 } 29325 } 29326 29327 my $numbered_line = sprintf( "%d: ", $lineno ); 29328 $offset -= length($numbered_line); 29329 $numbered_line .= $str; 29330 my $underline = " " x length($numbered_line); 29331 return ( $offset, $numbered_line, $underline ); 29332} 29333 29334sub write_on_underline { 29335 29336 # The "underline" is a string that shows where an error is; it starts 29337 # out as a string of blanks with the same length as the numbered line of 29338 # code above it, and we have to add marking to show where an error is. 29339 # In the example below, we want to write the string '--^' just below 29340 # the line of bad code: 29341 # 29342 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... 29343 # ---^ 29344 # We are given the current underline string, plus a position and a 29345 # string to write on it. 29346 # 29347 # In the above example, there will be 2 calls to do this: 29348 # First call: $pos=19, pos_chr=^ 29349 # Second call: $pos=16, pos_chr=--- 29350 # 29351 # This is a trivial thing to do with substr, but there is some 29352 # checking to do. 29353 29354 my ( $underline, $pos, $pos_chr ) = @_; 29355 29356 # check for error..shouldn't happen 29357 unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) { 29358 return $underline; 29359 } 29360 my $excess = length($pos_chr) + $pos - length($underline); 29361 if ( $excess > 0 ) { 29362 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); 29363 } 29364 substr( $underline, $pos, length($pos_chr) ) = $pos_chr; 29365 return ($underline); 29366} 29367 29368sub pre_tokenize { 29369 29370 # Break a string, $str, into a sequence of preliminary tokens. We 29371 # are interested in these types of tokens: 29372 # words (type='w'), example: 'max_tokens_wanted' 29373 # digits (type = 'd'), example: '0755' 29374 # whitespace (type = 'b'), example: ' ' 29375 # any other single character (i.e. punct; type = the character itself). 29376 # We cannot do better than this yet because we might be in a quoted 29377 # string or pattern. Caller sets $max_tokens_wanted to 0 to get all 29378 # tokens. 29379 my ( $str, $max_tokens_wanted ) = @_; 29380 29381 # we return references to these 3 arrays: 29382 my @tokens = (); # array of the tokens themselves 29383 my @token_map = (0); # string position of start of each token 29384 my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct 29385 29386 do { 29387 29388 # whitespace 29389 if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; } 29390 29391 # numbers 29392 # note that this must come before words! 29393 elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; } 29394 29395 # words 29396 elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; } 29397 29398 # single-character punctuation 29399 elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; } 29400 29401 # that's all.. 29402 else { 29403 return ( \@tokens, \@token_map, \@type ); 29404 } 29405 29406 push @tokens, $1; 29407 push @token_map, pos($str); 29408 29409 } while ( --$max_tokens_wanted != 0 ); 29410 29411 return ( \@tokens, \@token_map, \@type ); 29412} 29413 29414sub show_tokens { 29415 29416 # this is an old debug routine 29417 my ( $rtokens, $rtoken_map ) = @_; 29418 my $num = scalar(@$rtokens); 29419 my $i; 29420 29421 for ( $i = 0 ; $i < $num ; $i++ ) { 29422 my $len = length( $$rtokens[$i] ); 29423 print STDOUT "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n"; 29424 } 29425} 29426 29427sub matching_end_token { 29428 29429 # find closing character for a pattern 29430 my $beginning_token = shift; 29431 29432 if ( $beginning_token eq '{' ) { 29433 '}'; 29434 } 29435 elsif ( $beginning_token eq '[' ) { 29436 ']'; 29437 } 29438 elsif ( $beginning_token eq '<' ) { 29439 '>'; 29440 } 29441 elsif ( $beginning_token eq '(' ) { 29442 ')'; 29443 } 29444 else { 29445 $beginning_token; 29446 } 29447} 29448 29449sub dump_token_types { 29450 my $class = shift; 29451 my $fh = shift; 29452 29453 # This should be the latest list of token types in use 29454 # adding NEW_TOKENS: add a comment here 29455 print $fh <<'END_OF_LIST'; 29456 29457Here is a list of the token types currently used for lines of type 'CODE'. 29458For the following tokens, the "type" of a token is just the token itself. 29459 29460.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> 29461( ) <= >= == =~ !~ != ++ -- /= x= 29462... **= <<= >>= &&= ||= //= <=> 29463, + - / * | % ! x ~ = \ ? : . < > ^ & 29464 29465The following additional token types are defined: 29466 29467 type meaning 29468 b blank (white space) 29469 { indent: opening structural curly brace or square bracket or paren 29470 (code block, anonymous hash reference, or anonymous array reference) 29471 } outdent: right structural curly brace or square bracket or paren 29472 [ left non-structural square bracket (enclosing an array index) 29473 ] right non-structural square bracket 29474 ( left non-structural paren (all but a list right of an =) 29475 ) right non-structural paren 29476 L left non-structural curly brace (enclosing a key) 29477 R right non-structural curly brace 29478 ; terminal semicolon 29479 f indicates a semicolon in a "for" statement 29480 h here_doc operator << 29481 # a comment 29482 Q indicates a quote or pattern 29483 q indicates a qw quote block 29484 k a perl keyword 29485 C user-defined constant or constant function (with void prototype = ()) 29486 U user-defined function taking parameters 29487 G user-defined function taking block parameter (like grep/map/eval) 29488 M (unused, but reserved for subroutine definition name) 29489 P (unused, but -html uses it to label pod text) 29490 t type indicater such as %,$,@,*,&,sub 29491 w bare word (perhaps a subroutine call) 29492 i identifier of some type (with leading %, $, @, *, &, sub, -> ) 29493 n a number 29494 v a v-string 29495 F a file test operator (like -e) 29496 Y File handle 29497 Z identifier in indirect object slot: may be file handle, object 29498 J LABEL: code block label 29499 j LABEL after next, last, redo, goto 29500 p unary + 29501 m unary - 29502 pp pre-increment operator ++ 29503 mm pre-decrement operator -- 29504 A : used as attribute separator 29505 29506 Here are the '_line_type' codes used internally: 29507 SYSTEM - system-specific code before hash-bang line 29508 CODE - line of perl code (including comments) 29509 POD_START - line starting pod, such as '=head' 29510 POD - pod documentation text 29511 POD_END - last line of pod section, '=cut' 29512 HERE - text of here-document 29513 HERE_END - last line of here-doc (target word) 29514 FORMAT - format section 29515 FORMAT_END - last line of format section, '.' 29516 DATA_START - __DATA__ line 29517 DATA - unidentified text following __DATA__ 29518 END_START - __END__ line 29519 END - unidentified text following __END__ 29520 ERROR - we are in big trouble, probably not a perl script 29521END_OF_LIST 29522} 29523 29524BEGIN { 29525 29526 # These names are used in error messages 29527 @opening_brace_names = qw# '{' '[' '(' '?' #; 29528 @closing_brace_names = qw# '}' ']' ')' ':' #; 29529 29530 my @digraphs = qw( 29531 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> 29532 <= >= == =~ !~ != ++ -- /= x= ~~ 29533 ); 29534 @is_digraph{@digraphs} = (1) x scalar(@digraphs); 29535 29536 my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ ); 29537 @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); 29538 29539 # make a hash of all valid token types for self-checking the tokenizer 29540 # (adding NEW_TOKENS : select a new character and add to this list) 29541 my @valid_token_types = qw# 29542 A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v 29543 { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & 29544 #; 29545 push( @valid_token_types, @digraphs ); 29546 push( @valid_token_types, @trigraphs ); 29547 push( @valid_token_types, ( '#', ',', 'CORE::' ) ); 29548 @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); 29549 29550 # a list of file test letters, as in -e (Table 3-4 of 'camel 3') 29551 my @file_test_operators = 29552 qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z); 29553 @is_file_test_operator{@file_test_operators} = 29554 (1) x scalar(@file_test_operators); 29555 29556 # these functions have prototypes of the form (&), so when they are 29557 # followed by a block, that block MAY BE followed by an operator. 29558 # Smartmatch operator ~~ may be followed by anonomous hash or array ref 29559 @_ = qw( do eval ); 29560 @is_block_operator{@_} = (1) x scalar(@_); 29561 29562 # these functions allow an identifier in the indirect object slot 29563 @_ = qw( print printf sort exec system say); 29564 @is_indirect_object_taker{@_} = (1) x scalar(@_); 29565 29566 # These tokens may precede a code block 29567 # patched for SWITCH/CASE 29568 @_ = 29569 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else 29570 unless do while until eval for foreach map grep sort 29571 switch case given when); 29572 @is_code_block_token{@_} = (1) x scalar(@_); 29573 29574 # I'll build the list of keywords incrementally 29575 my @Keywords = (); 29576 29577 # keywords and tokens after which a value or pattern is expected, 29578 # but not an operator. In other words, these should consume terms 29579 # to their right, or at least they are not expected to be followed 29580 # immediately by operators. 29581 my @value_requestor = qw( 29582 AUTOLOAD 29583 BEGIN 29584 CHECK 29585 DESTROY 29586 END 29587 EQ 29588 GE 29589 GT 29590 INIT 29591 LE 29592 LT 29593 NE 29594 UNITCHECK 29595 abs 29596 accept 29597 alarm 29598 and 29599 atan2 29600 bind 29601 binmode 29602 bless 29603 break 29604 caller 29605 chdir 29606 chmod 29607 chomp 29608 chop 29609 chown 29610 chr 29611 chroot 29612 close 29613 closedir 29614 cmp 29615 connect 29616 continue 29617 cos 29618 crypt 29619 dbmclose 29620 dbmopen 29621 defined 29622 delete 29623 die 29624 dump 29625 each 29626 else 29627 elsif 29628 eof 29629 eq 29630 exec 29631 exists 29632 exit 29633 exp 29634 fcntl 29635 fileno 29636 flock 29637 for 29638 foreach 29639 formline 29640 ge 29641 getc 29642 getgrgid 29643 getgrnam 29644 gethostbyaddr 29645 gethostbyname 29646 getnetbyaddr 29647 getnetbyname 29648 getpeername 29649 getpgrp 29650 getpriority 29651 getprotobyname 29652 getprotobynumber 29653 getpwnam 29654 getpwuid 29655 getservbyname 29656 getservbyport 29657 getsockname 29658 getsockopt 29659 glob 29660 gmtime 29661 goto 29662 grep 29663 gt 29664 hex 29665 if 29666 index 29667 int 29668 ioctl 29669 join 29670 keys 29671 kill 29672 last 29673 lc 29674 lcfirst 29675 le 29676 length 29677 link 29678 listen 29679 local 29680 localtime 29681 lock 29682 log 29683 lstat 29684 lt 29685 map 29686 mkdir 29687 msgctl 29688 msgget 29689 msgrcv 29690 msgsnd 29691 my 29692 ne 29693 next 29694 no 29695 not 29696 oct 29697 open 29698 opendir 29699 or 29700 ord 29701 our 29702 pack 29703 pipe 29704 pop 29705 pos 29706 print 29707 printf 29708 prototype 29709 push 29710 quotemeta 29711 rand 29712 read 29713 readdir 29714 readlink 29715 readline 29716 readpipe 29717 recv 29718 redo 29719 ref 29720 rename 29721 require 29722 reset 29723 return 29724 reverse 29725 rewinddir 29726 rindex 29727 rmdir 29728 scalar 29729 seek 29730 seekdir 29731 select 29732 semctl 29733 semget 29734 semop 29735 send 29736 sethostent 29737 setnetent 29738 setpgrp 29739 setpriority 29740 setprotoent 29741 setservent 29742 setsockopt 29743 shift 29744 shmctl 29745 shmget 29746 shmread 29747 shmwrite 29748 shutdown 29749 sin 29750 sleep 29751 socket 29752 socketpair 29753 sort 29754 splice 29755 split 29756 sprintf 29757 sqrt 29758 srand 29759 stat 29760 study 29761 substr 29762 symlink 29763 syscall 29764 sysopen 29765 sysread 29766 sysseek 29767 system 29768 syswrite 29769 tell 29770 telldir 29771 tie 29772 tied 29773 truncate 29774 uc 29775 ucfirst 29776 umask 29777 undef 29778 unless 29779 unlink 29780 unpack 29781 unshift 29782 untie 29783 until 29784 use 29785 utime 29786 values 29787 vec 29788 waitpid 29789 warn 29790 while 29791 write 29792 xor 29793 29794 switch 29795 case 29796 given 29797 when 29798 err 29799 say 29800 ); 29801 29802 # patched above for SWITCH/CASE given/when err say 29803 # 'err' is a fairly safe addition. 29804 # TODO: 'default' still needed if appropriate 29805 # 'use feature' seen, but perltidy works ok without it. 29806 # Concerned that 'default' could break code. 29807 push( @Keywords, @value_requestor ); 29808 29809 # These are treated the same but are not keywords: 29810 my @extra_vr = qw( 29811 constant 29812 vars 29813 ); 29814 push( @value_requestor, @extra_vr ); 29815 29816 @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor); 29817 29818 # this list contains keywords which do not look for arguments, 29819 # so that they might be followed by an operator, or at least 29820 # not a term. 29821 my @operator_requestor = qw( 29822 endgrent 29823 endhostent 29824 endnetent 29825 endprotoent 29826 endpwent 29827 endservent 29828 fork 29829 getgrent 29830 gethostent 29831 getlogin 29832 getnetent 29833 getppid 29834 getprotoent 29835 getpwent 29836 getservent 29837 setgrent 29838 setpwent 29839 time 29840 times 29841 wait 29842 wantarray 29843 ); 29844 29845 push( @Keywords, @operator_requestor ); 29846 29847 # These are treated the same but are not considered keywords: 29848 my @extra_or = qw( 29849 STDERR 29850 STDIN 29851 STDOUT 29852 ); 29853 29854 push( @operator_requestor, @extra_or ); 29855 29856 @expecting_operator_token{@operator_requestor} = 29857 (1) x scalar(@operator_requestor); 29858 29859 # these token TYPES expect trailing operator but not a term 29860 # note: ++ and -- are post-increment and decrement, 'C' = constant 29861 my @operator_requestor_types = qw( ++ -- C <> q ); 29862 @expecting_operator_types{@operator_requestor_types} = 29863 (1) x scalar(@operator_requestor_types); 29864 29865 # these token TYPES consume values (terms) 29866 # note: pp and mm are pre-increment and decrement 29867 # f=semicolon in for, F=file test operator 29868 my @value_requestor_type = qw# 29869 L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x 29870 **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= 29871 <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ 29872 f F pp mm Y p m U J G j >> << ^ t 29873 #; 29874 push( @value_requestor_type, ',' ) 29875 ; # (perl doesn't like a ',' in a qw block) 29876 @expecting_term_types{@value_requestor_type} = 29877 (1) x scalar(@value_requestor_type); 29878 29879 # Note: the following valid token types are not assigned here to 29880 # hashes requesting to be followed by values or terms, but are 29881 # instead currently hard-coded into sub operator_expected: 29882 # ) -> :: Q R Z ] b h i k n v w } # 29883 29884 # For simple syntax checking, it is nice to have a list of operators which 29885 # will really be unhappy if not followed by a term. This includes most 29886 # of the above... 29887 %really_want_term = %expecting_term_types; 29888 29889 # with these exceptions... 29890 delete $really_want_term{'U'}; # user sub, depends on prototype 29891 delete $really_want_term{'F'}; # file test works on $_ if no following term 29892 delete $really_want_term{'Y'}; # indirect object, too risky to check syntax; 29893 # let perl do it 29894 29895 @_ = qw(q qq qw qx qr s y tr m); 29896 @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_); 29897 29898 # These keywords are handled specially in the tokenizer code: 29899 my @special_keywords = qw( 29900 do 29901 eval 29902 format 29903 m 29904 package 29905 q 29906 qq 29907 qr 29908 qw 29909 qx 29910 s 29911 sub 29912 tr 29913 y 29914 ); 29915 push( @Keywords, @special_keywords ); 29916 29917 # Keywords after which list formatting may be used 29918 # WARNING: do not include |map|grep|eval or perl may die on 29919 # syntax errors (map1.t). 29920 my @keyword_taking_list = qw( 29921 and 29922 chmod 29923 chomp 29924 chop 29925 chown 29926 dbmopen 29927 die 29928 elsif 29929 exec 29930 fcntl 29931 for 29932 foreach 29933 formline 29934 getsockopt 29935 if 29936 index 29937 ioctl 29938 join 29939 kill 29940 local 29941 msgctl 29942 msgrcv 29943 msgsnd 29944 my 29945 open 29946 or 29947 our 29948 pack 29949 print 29950 printf 29951 push 29952 read 29953 readpipe 29954 recv 29955 return 29956 reverse 29957 rindex 29958 seek 29959 select 29960 semctl 29961 semget 29962 send 29963 setpriority 29964 setsockopt 29965 shmctl 29966 shmget 29967 shmread 29968 shmwrite 29969 socket 29970 socketpair 29971 sort 29972 splice 29973 split 29974 sprintf 29975 substr 29976 syscall 29977 sysopen 29978 sysread 29979 sysseek 29980 system 29981 syswrite 29982 tie 29983 unless 29984 unlink 29985 unpack 29986 unshift 29987 until 29988 vec 29989 warn 29990 while 29991 given 29992 when 29993 ); 29994 @is_keyword_taking_list{@keyword_taking_list} = 29995 (1) x scalar(@keyword_taking_list); 29996 29997 # These are not used in any way yet 29998 # my @unused_keywords = qw( 29999 # __FILE__ 30000 # __LINE__ 30001 # __PACKAGE__ 30002 # ); 30003 30004 # The list of keywords was originally extracted from function 'keyword' in 30005 # perl file toke.c version 5.005.03, using this utility, plus a 30006 # little editing: (file getkwd.pl): 30007 # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } 30008 # Add 'get' prefix where necessary, then split into the above lists. 30009 # This list should be updated as necessary. 30010 # The list should not contain these special variables: 30011 # ARGV DATA ENV SIG STDERR STDIN STDOUT 30012 # __DATA__ __END__ 30013 30014 @is_keyword{@Keywords} = (1) x scalar(@Keywords); 30015} 300161; 30017__END__ 30018 30019=head1 NAME 30020 30021Perl::Tidy - Parses and beautifies perl source 30022 30023=head1 SYNOPSIS 30024 30025 use Perl::Tidy; 30026 30027 my $error_flag = Perl::Tidy::perltidy( 30028 source => $source, 30029 destination => $destination, 30030 stderr => $stderr, 30031 argv => $argv, 30032 perltidyrc => $perltidyrc, 30033 logfile => $logfile, 30034 errorfile => $errorfile, 30035 formatter => $formatter, # callback object (see below) 30036 dump_options => $dump_options, 30037 dump_options_type => $dump_options_type, 30038 prefilter => $prefilter_coderef, 30039 postfilter => $postfilter_coderef, 30040 ); 30041 30042=head1 DESCRIPTION 30043 30044This module makes the functionality of the perltidy utility available to perl 30045scripts. Any or all of the input parameters may be omitted, in which case the 30046@ARGV array will be used to provide input parameters as described 30047in the perltidy(1) man page. 30048 30049For example, the perltidy script is basically just this: 30050 30051 use Perl::Tidy; 30052 Perl::Tidy::perltidy(); 30053 30054The call to B<perltidy> returns a scalar B<$error_flag> which is TRUE if an 30055error caused premature termination, and FALSE if the process ran to normal 30056completion. Additional discuss of errors is contained below in the L<ERROR 30057HANDLING> section. 30058 30059The module accepts input and output streams by a variety of methods. 30060The following list of parameters may be any of the following: a 30061filename, an ARRAY reference, a SCALAR reference, or an object with 30062either a B<getline> or B<print> method, as appropriate. 30063 30064 source - the source of the script to be formatted 30065 destination - the destination of the formatted output 30066 stderr - standard error output 30067 perltidyrc - the .perltidyrc file 30068 logfile - the .LOG file stream, if any 30069 errorfile - the .ERR file stream, if any 30070 dump_options - ref to a hash to receive parameters (see below), 30071 dump_options_type - controls contents of dump_options 30072 dump_getopt_flags - ref to a hash to receive Getopt flags 30073 dump_options_category - ref to a hash giving category of options 30074 dump_abbreviations - ref to a hash giving all abbreviations 30075 30076The following chart illustrates the logic used to decide how to 30077treat a parameter. 30078 30079 ref($param) $param is assumed to be: 30080 ----------- --------------------- 30081 undef a filename 30082 SCALAR ref to string 30083 ARRAY ref to array 30084 (other) object with getline (if source) or print method 30085 30086If the parameter is an object, and the object has a B<close> method, that 30087close method will be called at the end of the stream. 30088 30089=over 4 30090 30091=item source 30092 30093If the B<source> parameter is given, it defines the source of the input stream. 30094If an input stream is defined with the B<source> parameter then no other source 30095filenames may be specified in the @ARGV array or B<argv> parameter. 30096 30097=item destination 30098 30099If the B<destination> parameter is given, it will be used to define the 30100file or memory location to receive output of perltidy. 30101 30102=item stderr 30103 30104The B<stderr> parameter allows the calling program to redirect the stream that 30105would otherwise go to the standard error output device to any of the stream 30106types listed above. This stream contains important warnings and errors 30107related to the parameters passed to perltidy. 30108 30109=item perltidyrc 30110 30111If the B<perltidyrc> file is given, it will be used instead of any 30112F<.perltidyrc> configuration file that would otherwise be used. 30113 30114=item errorfile 30115 30116The B<errorfile> parameter allows the calling program to capture 30117the stream that would otherwise go to either a .ERR file. This 30118stream contains warnings or errors related to the contents of one 30119source file or stream. 30120 30121The reason that this is different from the stderr stream is that when perltidy 30122is called to process multiple files there will be up to one .ERR file created 30123for each file and it would be very confusing if they were combined. 30124 30125However if perltidy is called to process just a single perl script then it may 30126be more conveninent to combine the B<errorfile> stream with the B<stderr> 30127stream. This can be done by setting the B<-se> parameter, in which case this 30128parameter is ignored. 30129 30130=item logfile 30131 30132The B<logfile> parameter allows the calling program to capture 30133the stream that would otherwise go to a .LOG file. This 30134stream is only created if requested with a B<-g> parameter. It 30135contains detailed diagnostic information about a script 30136which may be useful for debugging. 30137 30138=item argv 30139 30140If the B<argv> parameter is given, it will be used instead of the 30141B<@ARGV> array. The B<argv> parameter may be a string, a reference to a 30142string, or a reference to an array. If it is a string or reference to a 30143string, it will be parsed into an array of items just as if it were a 30144command line string. 30145 30146=item dump_options 30147 30148If the B<dump_options> parameter is given, it must be the reference to a hash. 30149In this case, the parameters contained in any perltidyrc configuration file 30150will be placed in this hash and perltidy will return immediately. This is 30151equivalent to running perltidy with --dump-options, except that the perameters 30152are returned in a hash rather than dumped to standard output. Also, by default 30153only the parameters in the perltidyrc file are returned, but this can be 30154changed (see the next parameter). This parameter provides a convenient method 30155for external programs to read a perltidyrc file. An example program using 30156this feature, F<perltidyrc_dump.pl>, is included in the distribution. 30157 30158Any combination of the B<dump_> parameters may be used together. 30159 30160=item dump_options_type 30161 30162This parameter is a string which can be used to control the parameters placed 30163in the hash reference supplied by B<dump_options>. The possible values are 30164'perltidyrc' (default) and 'full'. The 'full' parameter causes both the 30165default options plus any options found in a perltidyrc file to be returned. 30166 30167=item dump_getopt_flags 30168 30169If the B<dump_getopt_flags> parameter is given, it must be the reference to a 30170hash. This hash will receive all of the parameters that perltidy understands 30171and flags that are passed to Getopt::Long. This parameter may be 30172used alone or with the B<dump_options> flag. Perltidy will 30173exit immediately after filling this hash. See the demo program 30174F<perltidyrc_dump.pl> for example usage. 30175 30176=item dump_options_category 30177 30178If the B<dump_options_category> parameter is given, it must be the reference to a 30179hash. This hash will receive a hash with keys equal to all long parameter names 30180and values equal to the title of the corresponding section of the perltidy manual. 30181See the demo program F<perltidyrc_dump.pl> for example usage. 30182 30183=item dump_abbreviations 30184 30185If the B<dump_abbreviations> parameter is given, it must be the reference to a 30186hash. This hash will receive all abbreviations used by Perl::Tidy. See the 30187demo program F<perltidyrc_dump.pl> for example usage. 30188 30189=item prefilter 30190 30191A code reference that will be applied to the source before tidying. It is 30192expected to take the full content as a string in its input, and output the 30193transformed content. 30194 30195=item postfilter 30196 30197A code reference that will be applied to the tidied result before outputting. 30198It is expected to take the full content as a string in its input, and output 30199the transformed content. 30200 30201Note: A convenient way to check the function of your custom prefilter and 30202postfilter code is to use the --notidy option, first with just the prefilter 30203and then with both the prefilter and postfilter. See also the file 30204B<filter_example.pl> in the perltidy distribution. 30205 30206=back 30207 30208=head1 ERROR HANDLING 30209 30210Perltidy will return with an error flag indicating if the process had to be 30211terminated early due to errors in the input parameters. This can happen for 30212example if a parameter is misspelled or given an invalid value. The calling 30213program should check this flag because if it is set the destination stream will 30214be empty or incomplete and should be ignored. Error messages in the B<stderr> 30215stream will indicate the cause of any problem. 30216 30217If the error flag is not set then perltidy ran to completion. However there 30218may still be warning messages in the B<stderr> stream related to control 30219parameters, and there may be warning messages in the B<errorfile> stream 30220relating to possible syntax errors in the source code being tidied. 30221 30222In the event of a catastrophic error for which recovery is not possible 30223B<perltidy> terminates by making calls to B<croak> or B<confess> to help the 30224programmer localize the problem. These should normally only occur during 30225program development. 30226 30227=head1 NOTES ON FORMATTING PARAMETERS 30228 30229Parameters which control formatting may be passed in several ways: in a 30230F<.perltidyrc> configuration file, in the B<perltidyrc> parameter, and in the 30231B<argv> parameter. 30232 30233The B<-syn> (B<--check-syntax>) flag may be used with all source and 30234destination streams except for standard input and output. However 30235data streams which are not associated with a filename will 30236be copied to a temporary file before being be passed to Perl. This 30237use of temporary files can cause somewhat confusing output from Perl. 30238 30239If the B<-pbp> style is used it will typically be necessary to also 30240specify a B<-nst> flag. This is necessary to turn off the B<-st> flag 30241contained in the B<-pbp> parameter set which otherwise would direct 30242the output stream to the standard output. 30243 30244=head1 EXAMPLES 30245 30246The following example uses string references to hold the input and output 30247code and error streams, and illustrates checking for errors. 30248 30249 use Perl::Tidy; 30250 30251 my $source_string = <<'EOT'; 30252 my$error=Perl::Tidy::perltidy(argv=>$argv,source=>\$source_string, 30253 destination=>\$dest_string,stderr=>\$stderr_string, 30254 errorfile=>\$errorfile_string,); 30255 EOT 30256 30257 my $dest_string; 30258 my $stderr_string; 30259 my $errorfile_string; 30260 my $argv = "-npro"; # Ignore any .perltidyrc at this site 30261 $argv .= " -pbp"; # Format according to perl best practices 30262 $argv .= " -nst"; # Must turn off -st in case -pbp is specified 30263 $argv .= " -se"; # -se appends the errorfile to stderr 30264 ## $argv .= " --spell-check"; # uncomment to trigger an error 30265 30266 print "<<RAW SOURCE>>\n$source_string\n"; 30267 30268 my $error = Perl::Tidy::perltidy( 30269 argv => $argv, 30270 source => \$source_string, 30271 destination => \$dest_string, 30272 stderr => \$stderr_string, 30273 errorfile => \$errorfile_string, # ignored when -se flag is set 30274 ##phasers => 'stun', # uncomment to trigger an error 30275 ); 30276 30277 if ($error) { 30278 30279 # serious error in input parameters, no tidied output 30280 print "<<STDERR>>\n$stderr_string\n"; 30281 die "Exiting because of serious errors\n"; 30282 } 30283 30284 if ($dest_string) { print "<<TIDIED SOURCE>>\n$dest_string\n" } 30285 if ($stderr_string) { print "<<STDERR>>\n$stderr_string\n" } 30286 if ($errorfile_string) { print "<<.ERR file>>\n$errorfile_string\n" } 30287 30288Additional examples are given in examples section of the perltidy distribution. 30289 30290=head1 Using the B<formatter> Callback Object 30291 30292The B<formatter> parameter is an optional callback object which allows 30293the calling program to receive tokenized lines directly from perltidy for 30294further specialized processing. When this parameter is used, the two 30295formatting options which are built into perltidy (beautification or 30296html) are ignored. The following diagram illustrates the logical flow: 30297 30298 |-- (normal route) -> code beautification 30299 caller->perltidy->|-- (-html flag ) -> create html 30300 |-- (formatter given)-> callback to write_line 30301 30302This can be useful for processing perl scripts in some way. The 30303parameter C<$formatter> in the perltidy call, 30304 30305 formatter => $formatter, 30306 30307is an object created by the caller with a C<write_line> method which 30308will accept and process tokenized lines, one line per call. Here is 30309a simple example of a C<write_line> which merely prints the line number, 30310the line type (as determined by perltidy), and the text of the line: 30311 30312 sub write_line { 30313 30314 # This is called from perltidy line-by-line 30315 my $self = shift; 30316 my $line_of_tokens = shift; 30317 my $line_type = $line_of_tokens->{_line_type}; 30318 my $input_line_number = $line_of_tokens->{_line_number}; 30319 my $input_line = $line_of_tokens->{_line_text}; 30320 print "$input_line_number:$line_type:$input_line"; 30321 } 30322 30323The complete program, B<perllinetype>, is contained in the examples section of 30324the source distribution. As this example shows, the callback method 30325receives a parameter B<$line_of_tokens>, which is a reference to a hash 30326of other useful information. This example uses these hash entries: 30327 30328 $line_of_tokens->{_line_number} - the line number (1,2,...) 30329 $line_of_tokens->{_line_text} - the text of the line 30330 $line_of_tokens->{_line_type} - the type of the line, one of: 30331 30332 SYSTEM - system-specific code before hash-bang line 30333 CODE - line of perl code (including comments) 30334 POD_START - line starting pod, such as '=head' 30335 POD - pod documentation text 30336 POD_END - last line of pod section, '=cut' 30337 HERE - text of here-document 30338 HERE_END - last line of here-doc (target word) 30339 FORMAT - format section 30340 FORMAT_END - last line of format section, '.' 30341 DATA_START - __DATA__ line 30342 DATA - unidentified text following __DATA__ 30343 END_START - __END__ line 30344 END - unidentified text following __END__ 30345 ERROR - we are in big trouble, probably not a perl script 30346 30347Most applications will be only interested in lines of type B<CODE>. For 30348another example, let's write a program which checks for one of the 30349so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which 30350can slow down processing. Here is a B<write_line>, from the example 30351program B<find_naughty.pl>, which does that: 30352 30353 sub write_line { 30354 30355 # This is called back from perltidy line-by-line 30356 # We're looking for $`, $&, and $' 30357 my ( $self, $line_of_tokens ) = @_; 30358 30359 # pull out some stuff we might need 30360 my $line_type = $line_of_tokens->{_line_type}; 30361 my $input_line_number = $line_of_tokens->{_line_number}; 30362 my $input_line = $line_of_tokens->{_line_text}; 30363 my $rtoken_type = $line_of_tokens->{_rtoken_type}; 30364 my $rtokens = $line_of_tokens->{_rtokens}; 30365 chomp $input_line; 30366 30367 # skip comments, pod, etc 30368 return if ( $line_type ne 'CODE' ); 30369 30370 # loop over tokens looking for $`, $&, and $' 30371 for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) { 30372 30373 # we only want to examine token types 'i' (identifier) 30374 next unless $$rtoken_type[$j] eq 'i'; 30375 30376 # pull out the actual token text 30377 my $token = $$rtokens[$j]; 30378 30379 # and check it 30380 if ( $token =~ /^\$[\`\&\']$/ ) { 30381 print STDERR 30382 "$input_line_number: $token\n"; 30383 } 30384 } 30385 } 30386 30387This example pulls out these tokenization variables from the $line_of_tokens 30388hash reference: 30389 30390 $rtoken_type = $line_of_tokens->{_rtoken_type}; 30391 $rtokens = $line_of_tokens->{_rtokens}; 30392 30393The variable C<$rtoken_type> is a reference to an array of token type codes, 30394and C<$rtokens> is a reference to a corresponding array of token text. 30395These are obviously only defined for lines of type B<CODE>. 30396Perltidy classifies tokens into types, and has a brief code for each type. 30397You can get a complete list at any time by running perltidy from the 30398command line with 30399 30400 perltidy --dump-token-types 30401 30402In the present example, we are only looking for tokens of type B<i> 30403(identifiers), so the for loop skips past all other types. When an 30404identifier is found, its actual text is checked to see if it is one 30405being sought. If so, the above write_line prints the token and its 30406line number. 30407 30408The B<formatter> feature is relatively new in perltidy, and further 30409documentation needs to be written to complete its description. However, 30410several example programs have been written and can be found in the 30411B<examples> section of the source distribution. Probably the best way 30412to get started is to find one of the examples which most closely matches 30413your application and start modifying it. 30414 30415For help with perltidy's pecular way of breaking lines into tokens, you 30416might run, from the command line, 30417 30418 perltidy -D filename 30419 30420where F<filename> is a short script of interest. This will produce 30421F<filename.DEBUG> with interleaved lines of text and their token types. 30422The B<-D> flag has been in perltidy from the beginning for this purpose. 30423If you want to see the code which creates this file, it is 30424C<write_debug_entry> in Tidy.pm. 30425 30426=head1 EXPORT 30427 30428 &perltidy 30429 30430=head1 CREDITS 30431 30432Thanks to Hugh Myers who developed the initial modular interface 30433to perltidy. 30434 30435=head1 VERSION 30436 30437This man page documents Perl::Tidy version 20121207. 30438 30439=head1 LICENSE 30440 30441This package is free software; you can redistribute it and/or modify it 30442under the terms of the "GNU General Public License". 30443 30444Please refer to the file "COPYING" for details. 30445 30446=head1 AUTHOR 30447 30448 Steve Hancock 30449 perltidy at users.sourceforge.net 30450 30451=head1 SEE ALSO 30452 30453The perltidy(1) man page describes all of the features of perltidy. It 30454can be found at http://perltidy.sourceforge.net. 30455 30456=cut 30457