1############################################################ 2# 3# perltidy - a perl script indenter and formatter 4# 5# Copyright (c) 2000-2007 by Steve Hancock 6# Distributed under the GPL license agreement; see file COPYING 7# 8# This program is free software; you can redistribute it and/or modify 9# it under the terms of the GNU General Public License as published by 10# the Free Software Foundation; either version 2 of the License, or 11# (at your option) any later version. 12# 13# This program is distributed in the hope that it will be useful, 14# but WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16# GNU General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with this program; if not, write to the Free Software 20# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21# 22# For brief instructions instructions, try 'perltidy -h'. 23# For more complete documentation, try 'man perltidy' 24# or visit http://perltidy.sourceforge.net 25# 26# This script is an example of the default style. It was formatted with: 27# 28# perltidy Tidy.pm 29# 30# Code Contributions: 31# Michael Cartmell supplied code for adaptation to VMS and helped with 32# v-strings. 33# Hugh S. Myers supplied sub streamhandle and the supporting code to 34# create a Perl::Tidy module which can operate on strings, arrays, etc. 35# Yves Orton supplied coding to help detect Windows versions. 36# Axel Rose supplied a patch for MacPerl. 37# Sebastien Aperghis-Tramoni supplied a patch for the defined or operator. 38# Dan Tyrell contributed a patch for binary I/O. 39# Ueli Hugenschmidt contributed a patch for -fpsc 40# Many others have supplied key ideas, suggestions, and bug reports; 41# see the CHANGES file. 42# 43############################################################ 44 45package Perl::Tidy; 46use 5.004; # need IO::File from 5.004 or later 47BEGIN { $^W = 1; } # turn on warnings 48 49use strict; 50use Exporter; 51use Carp; 52$|++; 53 54use vars qw{ 55 $VERSION 56 @ISA 57 @EXPORT 58 $missing_file_spec 59}; 60 61@ISA = qw( Exporter ); 62@EXPORT = qw( &perltidy ); 63 64use IO::File; 65use File::Basename; 66 67BEGIN { 68 ( $VERSION = q($Id: Tidy.pm,v 1.73 2007/12/05 17:51:17 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker 69} 70 71sub streamhandle { 72 73 # given filename and mode (r or w), create an object which: 74 # has a 'getline' method if mode='r', and 75 # has a 'print' method if mode='w'. 76 # The objects also need a 'close' method. 77 # 78 # How the object is made: 79 # 80 # if $filename is: Make object using: 81 # ---------------- ----------------- 82 # '-' (STDIN if mode = 'r', STDOUT if mode='w') 83 # string IO::File 84 # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray) 85 # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar) 86 # object object 87 # (check for 'print' method for 'w' mode) 88 # (check for 'getline' method for 'r' mode) 89 my $ref = ref( my $filename = shift ); 90 my $mode = shift; 91 my $New; 92 my $fh; 93 94 # handle a reference 95 if ($ref) { 96 if ( $ref eq 'ARRAY' ) { 97 $New = sub { Perl::Tidy::IOScalarArray->new(@_) }; 98 } 99 elsif ( $ref eq 'SCALAR' ) { 100 $New = sub { Perl::Tidy::IOScalar->new(@_) }; 101 } 102 else { 103 104 # Accept an object with a getline method for reading. Note: 105 # IO::File is built-in and does not respond to the defined 106 # operator. If this causes trouble, the check can be 107 # skipped and we can just let it crash if there is no 108 # getline. 109 if ( $mode =~ /[rR]/ ) { 110 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) { 111 $New = sub { $filename }; 112 } 113 else { 114 $New = sub { undef }; 115 confess <<EOM; 116------------------------------------------------------------------------ 117No 'getline' method is defined for object of class $ref 118Please check your call to Perl::Tidy::perltidy. Trace follows. 119------------------------------------------------------------------------ 120EOM 121 } 122 } 123 124 # Accept an object with a print method for writing. 125 # See note above about IO::File 126 if ( $mode =~ /[wW]/ ) { 127 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) { 128 $New = sub { $filename }; 129 } 130 else { 131 $New = sub { undef }; 132 confess <<EOM; 133------------------------------------------------------------------------ 134No 'print' method is defined for object of class $ref 135Please check your call to Perl::Tidy::perltidy. Trace follows. 136------------------------------------------------------------------------ 137EOM 138 } 139 } 140 } 141 } 142 143 # handle a string 144 else { 145 if ( $filename eq '-' ) { 146 $New = sub { $mode eq 'w' ? *STDOUT : *STDIN } 147 } 148 else { 149 $New = sub { IO::File->new(@_) }; 150 } 151 } 152 $fh = $New->( $filename, $mode ) 153 or warn "Couldn't open file:$filename in mode:$mode : $!\n"; 154 return $fh, ( $ref or $filename ); 155} 156 157sub find_input_line_ending { 158 159 # Peek at a file and return first line ending character. 160 # Quietly return undef in case of any trouble. 161 my ($input_file) = @_; 162 my $ending; 163 164 # silently ignore input from object or stdin 165 if ( ref($input_file) || $input_file eq '-' ) { 166 return $ending; 167 } 168 open( INFILE, $input_file ) || return $ending; 169 170 binmode INFILE; 171 my $buf; 172 read( INFILE, $buf, 1024 ); 173 close INFILE; 174 if ( $buf && $buf =~ /([\012\015]+)/ ) { 175 my $test = $1; 176 177 # dos 178 if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" } 179 180 # mac 181 elsif ( $test =~ /^\015+$/ ) { $ending = "\015" } 182 183 # unix 184 elsif ( $test =~ /^\012+$/ ) { $ending = "\012" } 185 186 # unknown 187 else { } 188 } 189 190 # no ending seen 191 else { } 192 193 return $ending; 194} 195 196sub catfile { 197 198 # concatenate a path and file basename 199 # returns undef in case of error 200 201 BEGIN { eval "require File::Spec"; $missing_file_spec = $@; } 202 203 # use File::Spec if we can 204 unless ($missing_file_spec) { 205 return File::Spec->catfile(@_); 206 } 207 208 # Perl 5.004 systems may not have File::Spec so we'll make 209 # a simple try. We assume File::Basename is available. 210 # return undef if not successful. 211 my $name = pop @_; 212 my $path = join '/', @_; 213 my $test_file = $path . $name; 214 my ( $test_name, $test_path ) = fileparse($test_file); 215 return $test_file if ( $test_name eq $name ); 216 return undef if ( $^O eq 'VMS' ); 217 218 # this should work at least for Windows and Unix: 219 $test_file = $path . '/' . $name; 220 ( $test_name, $test_path ) = fileparse($test_file); 221 return $test_file if ( $test_name eq $name ); 222 return undef; 223} 224 225sub make_temporary_filename { 226 227 # Make a temporary filename. 228 # 229 # The POSIX tmpnam() function tends to be unreliable for non-unix 230 # systems (at least for the win32 systems that I've tested), so use 231 # a pre-defined name. A slight disadvantage of this is that two 232 # perltidy runs in the same working directory may conflict. 233 # However, the chance of that is small and managable by the user. 234 # An alternative would be to check for the file's existance and use, 235 # say .TMP0, .TMP1, etc, but that scheme has its own problems. So, 236 # keep it simple. 237 my $name = "perltidy.TMP"; 238 if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) { 239 return $name; 240 } 241 eval "use POSIX qw(tmpnam)"; 242 if ($@) { return $name } 243 use IO::File; 244 245 # just make a couple of tries before giving up and using the default 246 for ( 0 .. 1 ) { 247 my $tmpname = tmpnam(); 248 my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL ); 249 if ($fh) { 250 $fh->close(); 251 return ($tmpname); 252 last; 253 } 254 } 255 return ($name); 256} 257 258# Here is a map of the flow of data from the input source to the output 259# line sink: 260# 261# LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter--> 262# input groups output 263# lines tokens lines of lines lines 264# lines 265# 266# The names correspond to the package names responsible for the unit processes. 267# 268# The overall process is controlled by the "main" package. 269# 270# LineSource is the stream of input lines 271# 272# Tokenizer analyzes a line and breaks it into tokens, peeking ahead 273# if necessary. A token is any section of the input line which should be 274# manipulated as a single entity during formatting. For example, a single 275# ',' character is a token, and so is an entire side comment. It handles 276# the complexities of Perl syntax, such as distinguishing between '<<' as 277# a shift operator and as a here-document, or distinguishing between '/' 278# as a divide symbol and as a pattern delimiter. 279# 280# Formatter inserts and deletes whitespace between tokens, and breaks 281# sequences of tokens at appropriate points as output lines. It bases its 282# decisions on the default rules as modified by any command-line options. 283# 284# VerticalAligner collects groups of lines together and tries to line up 285# certain tokens, such as '=>', '#', and '=' by adding whitespace. 286# 287# FileWriter simply writes lines to the output stream. 288# 289# The Logger package, not shown, records significant events and warning 290# messages. It writes a .LOG file, which may be saved with a 291# '-log' or a '-g' flag. 292 293{ 294 295 # variables needed by interrupt handler: 296 my $tokenizer; 297 my $input_file; 298 299 # this routine may be called to give a status report if interrupted. If a 300 # parameter is given, it will call exit with that parameter. This is no 301 # longer used because it works under Unix but not under Windows. 302 sub interrupt_handler { 303 304 my $exit_flag = shift; 305 print STDERR "perltidy interrupted"; 306 if ($tokenizer) { 307 my $input_line_number = 308 Perl::Tidy::Tokenizer::get_input_line_number(); 309 print STDERR " at line $input_line_number"; 310 } 311 if ($input_file) { 312 313 if ( ref $input_file ) { print STDERR " of reference to:" } 314 else { print STDERR " of file:" } 315 print STDERR " $input_file"; 316 } 317 print STDERR "\n"; 318 exit $exit_flag if defined($exit_flag); 319 } 320 321 sub perltidy { 322 323 my %defaults = ( 324 argv => undef, 325 destination => undef, 326 formatter => undef, 327 logfile => undef, 328 errorfile => undef, 329 perltidyrc => undef, 330 source => undef, 331 stderr => undef, 332 dump_options => undef, 333 dump_options_type => undef, 334 dump_getopt_flags => undef, 335 dump_options_category => undef, 336 dump_options_range => undef, 337 dump_abbreviations => undef, 338 ); 339 340 # don't overwrite callers ARGV 341 local @ARGV = @ARGV; 342 343 my %input_hash = @_; 344 345 if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) { 346 local $" = ')('; 347 my @good_keys = sort keys %defaults; 348 @bad_keys = sort @bad_keys; 349 confess <<EOM; 350------------------------------------------------------------------------ 351Unknown perltidy parameter : (@bad_keys) 352perltidy only understands : (@good_keys) 353------------------------------------------------------------------------ 354 355EOM 356 } 357 358 my $get_hash_ref = sub { 359 my ($key) = @_; 360 my $hash_ref = $input_hash{$key}; 361 if ( defined($hash_ref) ) { 362 unless ( ref($hash_ref) eq 'HASH' ) { 363 my $what = ref($hash_ref); 364 my $but_is = 365 $what ? "but is ref to $what" : "but is not a reference"; 366 croak <<EOM; 367------------------------------------------------------------------------ 368error in call to perltidy: 369-$key must be reference to HASH $but_is 370------------------------------------------------------------------------ 371EOM 372 } 373 } 374 return $hash_ref; 375 }; 376 377 %input_hash = ( %defaults, %input_hash ); 378 my $argv = $input_hash{'argv'}; 379 my $destination_stream = $input_hash{'destination'}; 380 my $errorfile_stream = $input_hash{'errorfile'}; 381 my $logfile_stream = $input_hash{'logfile'}; 382 my $perltidyrc_stream = $input_hash{'perltidyrc'}; 383 my $source_stream = $input_hash{'source'}; 384 my $stderr_stream = $input_hash{'stderr'}; 385 my $user_formatter = $input_hash{'formatter'}; 386 387 # various dump parameters 388 my $dump_options_type = $input_hash{'dump_options_type'}; 389 my $dump_options = $get_hash_ref->('dump_options'); 390 my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags'); 391 my $dump_options_category = $get_hash_ref->('dump_options_category'); 392 my $dump_abbreviations = $get_hash_ref->('dump_abbreviations'); 393 my $dump_options_range = $get_hash_ref->('dump_options_range'); 394 395 # validate dump_options_type 396 if ( defined($dump_options) ) { 397 unless ( defined($dump_options_type) ) { 398 $dump_options_type = 'perltidyrc'; 399 } 400 unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) { 401 croak <<EOM; 402------------------------------------------------------------------------ 403Please check value of -dump_options_type in call to perltidy; 404saw: '$dump_options_type' 405expecting: 'perltidyrc' or 'full' 406------------------------------------------------------------------------ 407EOM 408 409 } 410 } 411 else { 412 $dump_options_type = ""; 413 } 414 415 if ($user_formatter) { 416 417 # if the user defines a formatter, there is no output stream, 418 # but we need a null stream to keep coding simple 419 $destination_stream = Perl::Tidy::DevNull->new(); 420 } 421 422 # see if ARGV is overridden 423 if ( defined($argv) ) { 424 425 my $rargv = ref $argv; 426 if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef } 427 428 # ref to ARRAY 429 if ($rargv) { 430 if ( $rargv eq 'ARRAY' ) { 431 @ARGV = @$argv; 432 } 433 else { 434 croak <<EOM; 435------------------------------------------------------------------------ 436Please check value of -argv in call to perltidy; 437it must be a string or ref to ARRAY but is: $rargv 438------------------------------------------------------------------------ 439EOM 440 } 441 } 442 443 # string 444 else { 445 my ( $rargv, $msg ) = parse_args($argv); 446 if ($msg) { 447 die <<EOM; 448Error parsing this string passed to to perltidy with 'argv': 449$msg 450EOM 451 } 452 @ARGV = @{$rargv}; 453 } 454 } 455 456 # redirect STDERR if requested 457 if ($stderr_stream) { 458 my ( $fh_stderr, $stderr_file ) = 459 Perl::Tidy::streamhandle( $stderr_stream, 'w' ); 460 if ($fh_stderr) { *STDERR = $fh_stderr } 461 else { 462 croak <<EOM; 463------------------------------------------------------------------------ 464Unable to redirect STDERR to $stderr_stream 465Please check value of -stderr in call to perltidy 466------------------------------------------------------------------------ 467EOM 468 } 469 } 470 471 my $rpending_complaint; 472 $$rpending_complaint = ""; 473 my $rpending_logfile_message; 474 $$rpending_logfile_message = ""; 475 476 my ( $is_Windows, $Windows_type ) = 477 look_for_Windows($rpending_complaint); 478 479 # VMS file names are restricted to a 40.40 format, so we append _tdy 480 # instead of .tdy, etc. (but see also sub check_vms_filename) 481 my $dot; 482 my $dot_pattern; 483 if ( $^O eq 'VMS' ) { 484 $dot = '_'; 485 $dot_pattern = '_'; 486 } 487 else { 488 $dot = '.'; 489 $dot_pattern = '\.'; # must escape for use in regex 490 } 491 492 # handle command line options 493 my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string, 494 $rexpansion, $roption_category, $roption_range ) 495 = process_command_line( 496 $perltidyrc_stream, $is_Windows, $Windows_type, 497 $rpending_complaint, $dump_options_type, 498 ); 499 500 # return or exit immediately after all dumps 501 my $quit_now = 0; 502 503 # Getopt parameters and their flags 504 if ( defined($dump_getopt_flags) ) { 505 $quit_now = 1; 506 foreach my $op ( @{$roption_string} ) { 507 my $opt = $op; 508 my $flag = ""; 509 510 # Examples: 511 # some-option=s 512 # some-option=i 513 # some-option:i 514 # some-option! 515 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) { 516 $opt = $1; 517 $flag = $2; 518 } 519 $dump_getopt_flags->{$opt} = $flag; 520 } 521 } 522 523 if ( defined($dump_options_category) ) { 524 $quit_now = 1; 525 %{$dump_options_category} = %{$roption_category}; 526 } 527 528 if ( defined($dump_options_range) ) { 529 $quit_now = 1; 530 %{$dump_options_range} = %{$roption_range}; 531 } 532 533 if ( defined($dump_abbreviations) ) { 534 $quit_now = 1; 535 %{$dump_abbreviations} = %{$rexpansion}; 536 } 537 538 if ( defined($dump_options) ) { 539 $quit_now = 1; 540 %{$dump_options} = %{$rOpts}; 541 } 542 543 return if ($quit_now); 544 545 # make printable string of options for this run as possible diagnostic 546 my $readable_options = readable_options( $rOpts, $roption_string ); 547 548 # dump from command line 549 if ( $rOpts->{'dump-options'} ) { 550 print STDOUT $readable_options; 551 exit 1; 552 } 553 554 check_options( $rOpts, $is_Windows, $Windows_type, 555 $rpending_complaint ); 556 557 if ($user_formatter) { 558 $rOpts->{'format'} = 'user'; 559 } 560 561 # there must be one entry here for every possible format 562 my %default_file_extension = ( 563 tidy => 'tdy', 564 html => 'html', 565 user => '', 566 ); 567 568 # be sure we have a valid output format 569 unless ( exists $default_file_extension{ $rOpts->{'format'} } ) { 570 my $formats = join ' ', 571 sort map { "'" . $_ . "'" } keys %default_file_extension; 572 my $fmt = $rOpts->{'format'}; 573 die "-format='$fmt' but must be one of: $formats\n"; 574 } 575 576 my $output_extension = 577 make_extension( $rOpts->{'output-file-extension'}, 578 $default_file_extension{ $rOpts->{'format'} }, $dot ); 579 580 my $backup_extension = 581 make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot ); 582 583 my $html_toc_extension = 584 make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot ); 585 586 my $html_src_extension = 587 make_extension( $rOpts->{'html-src-extension'}, 'src', $dot ); 588 589 # check for -b option; 590 my $in_place_modify = $rOpts->{'backup-and-modify-in-place'} 591 && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode 592 && @ARGV > 0; # silently ignore if standard input; 593 # this allows -b to be in a .perltidyrc file 594 # without error messages when running from an editor 595 596 # turn off -b with warnings in case of conflicts with other options 597 if ($in_place_modify) { 598 if ( $rOpts->{'standard-output'} ) { 599 warn "Ignoring -b; you may not use -b and -st together\n"; 600 $in_place_modify = 0; 601 } 602 if ($destination_stream) { 603 warn 604"Ignoring -b; you may not specify a destination array and -b together\n"; 605 $in_place_modify = 0; 606 } 607 if ($source_stream) { 608 warn 609"Ignoring -b; you may not specify a source array and -b together\n"; 610 $in_place_modify = 0; 611 } 612 if ( $rOpts->{'outfile'} ) { 613 warn "Ignoring -b; you may not use -b and -o together\n"; 614 $in_place_modify = 0; 615 } 616 if ( defined( $rOpts->{'output-path'} ) ) { 617 warn "Ignoring -b; you may not use -b and -opath together\n"; 618 $in_place_modify = 0; 619 } 620 } 621 622 Perl::Tidy::Formatter::check_options($rOpts); 623 if ( $rOpts->{'format'} eq 'html' ) { 624 Perl::Tidy::HtmlWriter->check_options($rOpts); 625 } 626 627 # make the pattern of file extensions that we shouldn't touch 628 my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)"; 629 if ($output_extension) { 630 my $ext = quotemeta($output_extension); 631 $forbidden_file_extensions .= "|$ext"; 632 } 633 if ( $in_place_modify && $backup_extension ) { 634 my $ext = quotemeta($backup_extension); 635 $forbidden_file_extensions .= "|$ext"; 636 } 637 $forbidden_file_extensions .= ')$'; 638 639 # Create a diagnostics object if requested; 640 # This is only useful for code development 641 my $diagnostics_object = undef; 642 if ( $rOpts->{'DIAGNOSTICS'} ) { 643 $diagnostics_object = Perl::Tidy::Diagnostics->new(); 644 } 645 646 # no filenames should be given if input is from an array 647 if ($source_stream) { 648 if ( @ARGV > 0 ) { 649 die 650"You may not specify any filenames when a source array is given\n"; 651 } 652 653 # we'll stuff the source array into ARGV 654 unshift( @ARGV, $source_stream ); 655 656 # No special treatment for source stream which is a filename. 657 # This will enable checks for binary files and other bad stuff. 658 $source_stream = undef unless ref($source_stream); 659 } 660 661 # use stdin by default if no source array and no args 662 else { 663 unshift( @ARGV, '-' ) unless @ARGV; 664 } 665 666 # loop to process all files in argument list 667 my $number_of_files = @ARGV; 668 my $formatter = undef; 669 $tokenizer = undef; 670 while ( $input_file = shift @ARGV ) { 671 my $fileroot; 672 my $input_file_permissions; 673 674 #--------------------------------------------------------------- 675 # determine the input file name 676 #--------------------------------------------------------------- 677 if ($source_stream) { 678 $fileroot = "perltidy"; 679 } 680 elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN 681 $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc 682 $in_place_modify = 0; 683 } 684 else { 685 $fileroot = $input_file; 686 unless ( -e $input_file ) { 687 688 # file doesn't exist - check for a file glob 689 if ( $input_file =~ /([\?\*\[\{])/ ) { 690 691 # Windows shell may not remove quotes, so do it 692 my $input_file = $input_file; 693 if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } 694 if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } 695 my $pattern = fileglob_to_re($input_file); 696 eval "/$pattern/"; 697 if ( !$@ && opendir( DIR, './' ) ) { 698 my @files = 699 grep { /$pattern/ && !-d $_ } readdir(DIR); 700 closedir(DIR); 701 if (@files) { 702 unshift @ARGV, @files; 703 next; 704 } 705 } 706 } 707 print "skipping file: '$input_file': no matches found\n"; 708 next; 709 } 710 711 unless ( -f $input_file ) { 712 print "skipping file: $input_file: not a regular file\n"; 713 next; 714 } 715 716 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { 717 print 718"skipping file: $input_file: Non-text (override with -f)\n"; 719 next; 720 } 721 722 # we should have a valid filename now 723 $fileroot = $input_file; 724 $input_file_permissions = ( stat $input_file )[2] & 07777; 725 726 if ( $^O eq 'VMS' ) { 727 ( $fileroot, $dot ) = check_vms_filename($fileroot); 728 } 729 730 # add option to change path here 731 if ( defined( $rOpts->{'output-path'} ) ) { 732 733 my ( $base, $old_path ) = fileparse($fileroot); 734 my $new_path = $rOpts->{'output-path'}; 735 unless ( -d $new_path ) { 736 unless ( mkdir $new_path, 0777 ) { 737 die "unable to create directory $new_path: $!\n"; 738 } 739 } 740 my $path = $new_path; 741 $fileroot = catfile( $path, $base ); 742 unless ($fileroot) { 743 die <<EOM; 744------------------------------------------------------------------------ 745Problem combining $new_path and $base to make a filename; check -opath 746------------------------------------------------------------------------ 747EOM 748 } 749 } 750 } 751 752 # Skip files with same extension as the output files because 753 # this can lead to a messy situation with files like 754 # script.tdy.tdy.tdy ... or worse problems ... when you 755 # rerun perltidy over and over with wildcard input. 756 if ( 757 !$source_stream 758 && ( $input_file =~ /$forbidden_file_extensions/o 759 || $input_file eq 'DIAGNOSTICS' ) 760 ) 761 { 762 print "skipping file: $input_file: wrong extension\n"; 763 next; 764 } 765 766 # the 'source_object' supplies a method to read the input file 767 my $source_object = 768 Perl::Tidy::LineSource->new( $input_file, $rOpts, 769 $rpending_logfile_message ); 770 next unless ($source_object); 771 772 # register this file name with the Diagnostics package 773 $diagnostics_object->set_input_file($input_file) 774 if $diagnostics_object; 775 776 #--------------------------------------------------------------- 777 # determine the output file name 778 #--------------------------------------------------------------- 779 my $output_file = undef; 780 my $actual_output_extension; 781 782 if ( $rOpts->{'outfile'} ) { 783 784 if ( $number_of_files <= 1 ) { 785 786 if ( $rOpts->{'standard-output'} ) { 787 die "You may not use -o and -st together\n"; 788 } 789 elsif ($destination_stream) { 790 die 791"You may not specify a destination array and -o together\n"; 792 } 793 elsif ( defined( $rOpts->{'output-path'} ) ) { 794 die "You may not specify -o and -opath together\n"; 795 } 796 elsif ( defined( $rOpts->{'output-file-extension'} ) ) { 797 die "You may not specify -o and -oext together\n"; 798 } 799 $output_file = $rOpts->{outfile}; 800 801 # make sure user gives a file name after -o 802 if ( $output_file =~ /^-/ ) { 803 die "You must specify a valid filename after -o\n"; 804 } 805 806 # do not overwrite input file with -o 807 if ( defined($input_file_permissions) 808 && ( $output_file eq $input_file ) ) 809 { 810 die 811 "Use 'perltidy -b $input_file' to modify in-place\n"; 812 } 813 } 814 else { 815 die "You may not use -o with more than one input file\n"; 816 } 817 } 818 elsif ( $rOpts->{'standard-output'} ) { 819 if ($destination_stream) { 820 die 821"You may not specify a destination array and -st together\n"; 822 } 823 $output_file = '-'; 824 825 if ( $number_of_files <= 1 ) { 826 } 827 else { 828 die "You may not use -st with more than one input file\n"; 829 } 830 } 831 elsif ($destination_stream) { 832 $output_file = $destination_stream; 833 } 834 elsif ($source_stream) { # source but no destination goes to stdout 835 $output_file = '-'; 836 } 837 elsif ( $input_file eq '-' ) { 838 $output_file = '-'; 839 } 840 else { 841 if ($in_place_modify) { 842 $output_file = IO::File->new_tmpfile() 843 or die "cannot open temp file for -b option: $!\n"; 844 } 845 else { 846 $actual_output_extension = $output_extension; 847 $output_file = $fileroot . $output_extension; 848 } 849 } 850 851 # the 'sink_object' knows how to write the output file 852 my $tee_file = $fileroot . $dot . "TEE"; 853 854 my $line_separator = $rOpts->{'output-line-ending'}; 855 if ( $rOpts->{'preserve-line-endings'} ) { 856 $line_separator = find_input_line_ending($input_file); 857 } 858 859 # Eventually all I/O may be done with binmode, but for now it is 860 # only done when a user requests a particular line separator 861 # through the -ple or -ole flags 862 my $binmode = 0; 863 if ( defined($line_separator) ) { $binmode = 1 } 864 else { $line_separator = "\n" } 865 866 my $sink_object = 867 Perl::Tidy::LineSink->new( $output_file, $tee_file, 868 $line_separator, $rOpts, $rpending_logfile_message, $binmode ); 869 870 #--------------------------------------------------------------- 871 # initialize the error logger 872 #--------------------------------------------------------------- 873 my $warning_file = $fileroot . $dot . "ERR"; 874 if ($errorfile_stream) { $warning_file = $errorfile_stream } 875 my $log_file = $fileroot . $dot . "LOG"; 876 if ($logfile_stream) { $log_file = $logfile_stream } 877 878 my $logger_object = 879 Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file, 880 $saw_extrude ); 881 write_logfile_header( 882 $rOpts, $logger_object, $config_file, 883 $rraw_options, $Windows_type, $readable_options, 884 ); 885 if ($$rpending_logfile_message) { 886 $logger_object->write_logfile_entry($$rpending_logfile_message); 887 } 888 if ($$rpending_complaint) { 889 $logger_object->complain($$rpending_complaint); 890 } 891 892 #--------------------------------------------------------------- 893 # initialize the debug object, if any 894 #--------------------------------------------------------------- 895 my $debugger_object = undef; 896 if ( $rOpts->{DEBUG} ) { 897 $debugger_object = 898 Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" ); 899 } 900 901 #--------------------------------------------------------------- 902 # create a formatter for this file : html writer or pretty printer 903 #--------------------------------------------------------------- 904 905 # we have to delete any old formatter because, for safety, 906 # the formatter will check to see that there is only one. 907 $formatter = undef; 908 909 if ($user_formatter) { 910 $formatter = $user_formatter; 911 } 912 elsif ( $rOpts->{'format'} eq 'html' ) { 913 $formatter = 914 Perl::Tidy::HtmlWriter->new( $fileroot, $output_file, 915 $actual_output_extension, $html_toc_extension, 916 $html_src_extension ); 917 } 918 elsif ( $rOpts->{'format'} eq 'tidy' ) { 919 $formatter = Perl::Tidy::Formatter->new( 920 logger_object => $logger_object, 921 diagnostics_object => $diagnostics_object, 922 sink_object => $sink_object, 923 ); 924 } 925 else { 926 die "I don't know how to do -format=$rOpts->{'format'}\n"; 927 } 928 929 unless ($formatter) { 930 die "Unable to continue with $rOpts->{'format'} formatting\n"; 931 } 932 933 #--------------------------------------------------------------- 934 # create the tokenizer for this file 935 #--------------------------------------------------------------- 936 $tokenizer = undef; # must destroy old tokenizer 937 $tokenizer = Perl::Tidy::Tokenizer->new( 938 source_object => $source_object, 939 logger_object => $logger_object, 940 debugger_object => $debugger_object, 941 diagnostics_object => $diagnostics_object, 942 starting_level => $rOpts->{'starting-indentation-level'}, 943 tabs => $rOpts->{'tabs'}, 944 indent_columns => $rOpts->{'indent-columns'}, 945 look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, 946 look_for_autoloader => $rOpts->{'look-for-autoloader'}, 947 look_for_selfloader => $rOpts->{'look-for-selfloader'}, 948 trim_qw => $rOpts->{'trim-qw'}, 949 ); 950 951 #--------------------------------------------------------------- 952 # now we can do it 953 #--------------------------------------------------------------- 954 process_this_file( $tokenizer, $formatter ); 955 956 #--------------------------------------------------------------- 957 # close the input source and report errors 958 #--------------------------------------------------------------- 959 $source_object->close_input_file(); 960 961 # get file names to use for syntax check 962 my $ifname = $source_object->get_input_file_copy_name(); 963 my $ofname = $sink_object->get_output_file_copy(); 964 965 #--------------------------------------------------------------- 966 # handle the -b option (backup and modify in-place) 967 #--------------------------------------------------------------- 968 if ($in_place_modify) { 969 unless ( -f $input_file ) { 970 971 # oh, oh, no real file to backup .. 972 # shouldn't happen because of numerous preliminary checks 973 die print 974"problem with -b backing up input file '$input_file': not a file\n"; 975 } 976 my $backup_name = $input_file . $backup_extension; 977 if ( -f $backup_name ) { 978 unlink($backup_name) 979 or die 980"unable to remove previous '$backup_name' for -b option; check permissions: $!\n"; 981 } 982 rename( $input_file, $backup_name ) 983 or die 984"problem renaming $input_file to $backup_name for -b option: $!\n"; 985 $ifname = $backup_name; 986 987 seek( $output_file, 0, 0 ) 988 or die "unable to rewind tmp file for -b option: $!\n"; 989 990 my $fout = IO::File->new("> $input_file") 991 or die 992"problem opening $input_file for write for -b option; check directory permissions: $!\n"; 993 binmode $fout; 994 my $line; 995 while ( $line = $output_file->getline() ) { 996 $fout->print($line); 997 } 998 $fout->close(); 999 $output_file = $input_file; 1000 $ofname = $input_file; 1001 } 1002 1003 #--------------------------------------------------------------- 1004 # clean up and report errors 1005 #--------------------------------------------------------------- 1006 $sink_object->close_output_file() if $sink_object; 1007 $debugger_object->close_debug_file() if $debugger_object; 1008 1009 my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes 1010 if ($output_file) { 1011 1012 if ($input_file_permissions) { 1013 1014 # give output script same permissions as input script, but 1015 # make it user-writable or else we can't run perltidy again. 1016 # Thus we retain whatever executable flags were set. 1017 if ( $rOpts->{'format'} eq 'tidy' ) { 1018 chmod( $input_file_permissions | 0600, $output_file ); 1019 } 1020 1021 # else use default permissions for html and any other format 1022 1023 } 1024 if ( $logger_object && $rOpts->{'check-syntax'} ) { 1025 $infile_syntax_ok = 1026 check_syntax( $ifname, $ofname, $logger_object, $rOpts ); 1027 } 1028 } 1029 1030 $logger_object->finish( $infile_syntax_ok, $formatter ) 1031 if $logger_object; 1032 } # end of loop to process all files 1033 } # end of main program 1034} 1035 1036sub fileglob_to_re { 1037 1038 # modified (corrected) from version in find2perl 1039 my $x = shift; 1040 $x =~ s#([./^\$()])#\\$1#g; # escape special characters 1041 $x =~ s#\*#.*#g; # '*' -> '.*' 1042 $x =~ s#\?#.#g; # '?' -> '.' 1043 "^$x\\z"; # match whole word 1044} 1045 1046sub make_extension { 1047 1048 # Make a file extension, including any leading '.' if necessary 1049 # The '.' may actually be an '_' under VMS 1050 my ( $extension, $default, $dot ) = @_; 1051 1052 # Use the default if none specified 1053 $extension = $default unless ($extension); 1054 1055 # Only extensions with these leading characters get a '.' 1056 # This rule gives the user some freedom 1057 if ( $extension =~ /^[a-zA-Z0-9]/ ) { 1058 $extension = $dot . $extension; 1059 } 1060 return $extension; 1061} 1062 1063sub write_logfile_header { 1064 my ( 1065 $rOpts, $logger_object, $config_file, 1066 $rraw_options, $Windows_type, $readable_options 1067 ) = @_; 1068 $logger_object->write_logfile_entry( 1069"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n" 1070 ); 1071 if ($Windows_type) { 1072 $logger_object->write_logfile_entry("Windows type is $Windows_type\n"); 1073 } 1074 my $options_string = join( ' ', @$rraw_options ); 1075 1076 if ($config_file) { 1077 $logger_object->write_logfile_entry( 1078 "Found Configuration File >>> $config_file \n"); 1079 } 1080 $logger_object->write_logfile_entry( 1081 "Configuration and command line parameters for this run:\n"); 1082 $logger_object->write_logfile_entry("$options_string\n"); 1083 1084 if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) { 1085 $rOpts->{'logfile'} = 1; # force logfile to be saved 1086 $logger_object->write_logfile_entry( 1087 "Final parameter set for this run\n"); 1088 $logger_object->write_logfile_entry( 1089 "------------------------------------\n"); 1090 1091 $logger_object->write_logfile_entry($readable_options); 1092 1093 $logger_object->write_logfile_entry( 1094 "------------------------------------\n"); 1095 } 1096 $logger_object->write_logfile_entry( 1097 "To find error messages search for 'WARNING' with your editor\n"); 1098} 1099 1100sub generate_options { 1101 1102 ###################################################################### 1103 # Generate and return references to: 1104 # @option_string - the list of options to be passed to Getopt::Long 1105 # @defaults - the list of default options 1106 # %expansion - a hash showing how all abbreviations are expanded 1107 # %category - a hash giving the general category of each option 1108 # %option_range - a hash giving the valid ranges of certain options 1109 1110 # Note: a few options are not documented in the man page and usage 1111 # message. This is because these are experimental or debug options and 1112 # may or may not be retained in future versions. 1113 # 1114 # Here are the undocumented flags as far as I know. Any of them 1115 # may disappear at any time. They are mainly for fine-tuning 1116 # and debugging. 1117 # 1118 # fll --> fuzzy-line-length # a trivial parameter which gets 1119 # turned off for the extrude option 1120 # which is mainly for debugging 1121 # chk --> check-multiline-quotes # check for old bug; to be deleted 1122 # scl --> short-concatenation-item-length # helps break at '.' 1123 # recombine # for debugging line breaks 1124 # valign # for debugging vertical alignment 1125 # I --> DIAGNOSTICS # for debugging 1126 ###################################################################### 1127 1128 # here is a summary of the Getopt codes: 1129 # <none> does not take an argument 1130 # =s takes a mandatory string 1131 # :s takes an optional string (DO NOT USE - filenames will get eaten up) 1132 # =i takes a mandatory integer 1133 # :i takes an optional integer (NOT RECOMMENDED - can cause trouble) 1134 # ! does not take an argument and may be negated 1135 # i.e., -foo and -nofoo are allowed 1136 # a double dash signals the end of the options list 1137 # 1138 #--------------------------------------------------------------- 1139 # Define the option string passed to GetOptions. 1140 #--------------------------------------------------------------- 1141 1142 my @option_string = (); 1143 my %expansion = (); 1144 my %option_category = (); 1145 my %option_range = (); 1146 my $rexpansion = \%expansion; 1147 1148 # names of categories in manual 1149 # leading integers will allow sorting 1150 my @category_name = ( 1151 '0. I/O control', 1152 '1. Basic formatting options', 1153 '2. Code indentation control', 1154 '3. Whitespace control', 1155 '4. Comment controls', 1156 '5. Linebreak controls', 1157 '6. Controlling list formatting', 1158 '7. Retaining or ignoring existing line breaks', 1159 '8. Blank line control', 1160 '9. Other controls', 1161 '10. HTML options', 1162 '11. pod2html options', 1163 '12. Controlling HTML properties', 1164 '13. Debugging', 1165 ); 1166 1167 # These options are parsed directly by perltidy: 1168 # help h 1169 # version v 1170 # However, they are included in the option set so that they will 1171 # be seen in the options dump. 1172 1173 # These long option names have no abbreviations or are treated specially 1174 @option_string = qw( 1175 html! 1176 noprofile 1177 no-profile 1178 npro 1179 recombine! 1180 valign! 1181 ); 1182 1183 my $category = 13; # Debugging 1184 foreach (@option_string) { 1185 my $opt = $_; # must avoid changing the actual flag 1186 $opt =~ s/!$//; 1187 $option_category{$opt} = $category_name[$category]; 1188 } 1189 1190 $category = 11; # HTML 1191 $option_category{html} = $category_name[$category]; 1192 1193 # routine to install and check options 1194 my $add_option = sub { 1195 my ( $long_name, $short_name, $flag ) = @_; 1196 push @option_string, $long_name . $flag; 1197 $option_category{$long_name} = $category_name[$category]; 1198 if ($short_name) { 1199 if ( $expansion{$short_name} ) { 1200 my $existing_name = $expansion{$short_name}[0]; 1201 die 1202"redefining abbreviation $short_name for $long_name; already used for $existing_name\n"; 1203 } 1204 $expansion{$short_name} = [$long_name]; 1205 if ( $flag eq '!' ) { 1206 my $nshort_name = 'n' . $short_name; 1207 my $nolong_name = 'no' . $long_name; 1208 if ( $expansion{$nshort_name} ) { 1209 my $existing_name = $expansion{$nshort_name}[0]; 1210 die 1211"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n"; 1212 } 1213 $expansion{$nshort_name} = [$nolong_name]; 1214 } 1215 } 1216 }; 1217 1218 # Install long option names which have a simple abbreviation. 1219 # Options with code '!' get standard negation ('no' for long names, 1220 # 'n' for abbreviations). Categories follow the manual. 1221 1222 ########################### 1223 $category = 0; # I/O_Control 1224 ########################### 1225 $add_option->( 'backup-and-modify-in-place', 'b', '!' ); 1226 $add_option->( 'backup-file-extension', 'bext', '=s' ); 1227 $add_option->( 'force-read-binary', 'f', '!' ); 1228 $add_option->( 'format', 'fmt', '=s' ); 1229 $add_option->( 'logfile', 'log', '!' ); 1230 $add_option->( 'logfile-gap', 'g', ':i' ); 1231 $add_option->( 'outfile', 'o', '=s' ); 1232 $add_option->( 'output-file-extension', 'oext', '=s' ); 1233 $add_option->( 'output-path', 'opath', '=s' ); 1234 $add_option->( 'profile', 'pro', '=s' ); 1235 $add_option->( 'quiet', 'q', '!' ); 1236 $add_option->( 'standard-error-output', 'se', '!' ); 1237 $add_option->( 'standard-output', 'st', '!' ); 1238 $add_option->( 'warning-output', 'w', '!' ); 1239 1240 # options which are both toggle switches and values moved here 1241 # to hide from tidyview (which does not show category 0 flags): 1242 # -ole moved here from category 1 1243 # -sil moved here from category 2 1244 $add_option->( 'output-line-ending', 'ole', '=s' ); 1245 $add_option->( 'starting-indentation-level', 'sil', '=i' ); 1246 1247 ######################################## 1248 $category = 1; # Basic formatting options 1249 ######################################## 1250 $add_option->( 'check-syntax', 'syn', '!' ); 1251 $add_option->( 'entab-leading-whitespace', 'et', '=i' ); 1252 $add_option->( 'indent-columns', 'i', '=i' ); 1253 $add_option->( 'maximum-line-length', 'l', '=i' ); 1254 $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' ); 1255 $add_option->( 'preserve-line-endings', 'ple', '!' ); 1256 $add_option->( 'tabs', 't', '!' ); 1257 1258 ######################################## 1259 $category = 2; # Code indentation control 1260 ######################################## 1261 $add_option->( 'continuation-indentation', 'ci', '=i' ); 1262 $add_option->( 'line-up-parentheses', 'lp', '!' ); 1263 $add_option->( 'outdent-keyword-list', 'okwl', '=s' ); 1264 $add_option->( 'outdent-keywords', 'okw', '!' ); 1265 $add_option->( 'outdent-labels', 'ola', '!' ); 1266 $add_option->( 'outdent-long-quotes', 'olq', '!' ); 1267 $add_option->( 'indent-closing-brace', 'icb', '!' ); 1268 $add_option->( 'closing-token-indentation', 'cti', '=i' ); 1269 $add_option->( 'closing-paren-indentation', 'cpi', '=i' ); 1270 $add_option->( 'closing-brace-indentation', 'cbi', '=i' ); 1271 $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' ); 1272 $add_option->( 'brace-left-and-indent', 'bli', '!' ); 1273 $add_option->( 'brace-left-and-indent-list', 'blil', '=s' ); 1274 1275 ######################################## 1276 $category = 3; # Whitespace control 1277 ######################################## 1278 $add_option->( 'add-semicolons', 'asc', '!' ); 1279 $add_option->( 'add-whitespace', 'aws', '!' ); 1280 $add_option->( 'block-brace-tightness', 'bbt', '=i' ); 1281 $add_option->( 'brace-tightness', 'bt', '=i' ); 1282 $add_option->( 'delete-old-whitespace', 'dws', '!' ); 1283 $add_option->( 'delete-semicolons', 'dsm', '!' ); 1284 $add_option->( 'nospace-after-keyword', 'nsak', '=s' ); 1285 $add_option->( 'nowant-left-space', 'nwls', '=s' ); 1286 $add_option->( 'nowant-right-space', 'nwrs', '=s' ); 1287 $add_option->( 'paren-tightness', 'pt', '=i' ); 1288 $add_option->( 'space-after-keyword', 'sak', '=s' ); 1289 $add_option->( 'space-for-semicolon', 'sfs', '!' ); 1290 $add_option->( 'space-function-paren', 'sfp', '!' ); 1291 $add_option->( 'space-keyword-paren', 'skp', '!' ); 1292 $add_option->( 'space-terminal-semicolon', 'sts', '!' ); 1293 $add_option->( 'square-bracket-tightness', 'sbt', '=i' ); 1294 $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' ); 1295 $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' ); 1296 $add_option->( 'trim-qw', 'tqw', '!' ); 1297 $add_option->( 'want-left-space', 'wls', '=s' ); 1298 $add_option->( 'want-right-space', 'wrs', '=s' ); 1299 1300 ######################################## 1301 $category = 4; # Comment controls 1302 ######################################## 1303 $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' ); 1304 $add_option->( 'closing-side-comment-interval', 'csci', '=i' ); 1305 $add_option->( 'closing-side-comment-list', 'cscl', '=s' ); 1306 $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' ); 1307 $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' ); 1308 $add_option->( 'closing-side-comment-warnings', 'cscw', '!' ); 1309 $add_option->( 'closing-side-comments', 'csc', '!' ); 1310 $add_option->( 'format-skipping', 'fs', '!' ); 1311 $add_option->( 'format-skipping-begin', 'fsb', '=s' ); 1312 $add_option->( 'format-skipping-end', 'fse', '=s' ); 1313 $add_option->( 'hanging-side-comments', 'hsc', '!' ); 1314 $add_option->( 'indent-block-comments', 'ibc', '!' ); 1315 $add_option->( 'indent-spaced-block-comments', 'isbc', '!' ); 1316 $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' ); 1317 $add_option->( 'minimum-space-to-comment', 'msc', '=i' ); 1318 $add_option->( 'outdent-long-comments', 'olc', '!' ); 1319 $add_option->( 'outdent-static-block-comments', 'osbc', '!' ); 1320 $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' ); 1321 $add_option->( 'static-block-comments', 'sbc', '!' ); 1322 $add_option->( 'static-side-comment-prefix', 'sscp', '=s' ); 1323 $add_option->( 'static-side-comments', 'ssc', '!' ); 1324 1325 ######################################## 1326 $category = 5; # Linebreak controls 1327 ######################################## 1328 $add_option->( 'add-newlines', 'anl', '!' ); 1329 $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' ); 1330 $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' ); 1331 $add_option->( 'brace-vertical-tightness', 'bvt', '=i' ); 1332 $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' ); 1333 $add_option->( 'cuddled-else', 'ce', '!' ); 1334 $add_option->( 'delete-old-newlines', 'dnl', '!' ); 1335 $add_option->( 'opening-brace-always-on-right', 'bar', '!' ); 1336 $add_option->( 'opening-brace-on-new-line', 'bl', '!' ); 1337 $add_option->( 'opening-hash-brace-right', 'ohbr', '!' ); 1338 $add_option->( 'opening-paren-right', 'opr', '!' ); 1339 $add_option->( 'opening-square-bracket-right', 'osbr', '!' ); 1340 $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' ); 1341 $add_option->( 'paren-vertical-tightness', 'pvt', '=i' ); 1342 $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' ); 1343 $add_option->( 'stack-closing-hash-brace', 'schb', '!' ); 1344 $add_option->( 'stack-closing-paren', 'scp', '!' ); 1345 $add_option->( 'stack-closing-square-bracket', 'scsb', '!' ); 1346 $add_option->( 'stack-opening-hash-brace', 'sohb', '!' ); 1347 $add_option->( 'stack-opening-paren', 'sop', '!' ); 1348 $add_option->( 'stack-opening-square-bracket', 'sosb', '!' ); 1349 $add_option->( 'vertical-tightness', 'vt', '=i' ); 1350 $add_option->( 'vertical-tightness-closing', 'vtc', '=i' ); 1351 $add_option->( 'want-break-after', 'wba', '=s' ); 1352 $add_option->( 'want-break-before', 'wbb', '=s' ); 1353 $add_option->( 'break-after-all-operators', 'baao', '!' ); 1354 $add_option->( 'break-before-all-operators', 'bbao', '!' ); 1355 $add_option->( 'keep-interior-semicolons', 'kis', '!' ); 1356 1357 ######################################## 1358 $category = 6; # Controlling list formatting 1359 ######################################## 1360 $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' ); 1361 $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' ); 1362 $add_option->( 'maximum-fields-per-table', 'mft', '=i' ); 1363 1364 ######################################## 1365 $category = 7; # Retaining or ignoring existing line breaks 1366 ######################################## 1367 $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' ); 1368 $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' ); 1369 $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' ); 1370 $add_option->( 'ignore-old-breakpoints', 'iob', '!' ); 1371 1372 ######################################## 1373 $category = 8; # Blank line control 1374 ######################################## 1375 $add_option->( 'blanks-before-blocks', 'bbb', '!' ); 1376 $add_option->( 'blanks-before-comments', 'bbc', '!' ); 1377 $add_option->( 'blanks-before-subs', 'bbs', '!' ); 1378 $add_option->( 'long-block-line-count', 'lbl', '=i' ); 1379 $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); 1380 $add_option->( 'swallow-optional-blank-lines', 'sob', '!' ); 1381 1382 ######################################## 1383 $category = 9; # Other controls 1384 ######################################## 1385 $add_option->( 'delete-block-comments', 'dbc', '!' ); 1386 $add_option->( 'delete-closing-side-comments', 'dcsc', '!' ); 1387 $add_option->( 'delete-pod', 'dp', '!' ); 1388 $add_option->( 'delete-side-comments', 'dsc', '!' ); 1389 $add_option->( 'tee-block-comments', 'tbc', '!' ); 1390 $add_option->( 'tee-pod', 'tp', '!' ); 1391 $add_option->( 'tee-side-comments', 'tsc', '!' ); 1392 $add_option->( 'look-for-autoloader', 'lal', '!' ); 1393 $add_option->( 'look-for-hash-bang', 'x', '!' ); 1394 $add_option->( 'look-for-selfloader', 'lsl', '!' ); 1395 $add_option->( 'pass-version-line', 'pvl', '!' ); 1396 1397 ######################################## 1398 $category = 13; # Debugging 1399 ######################################## 1400 $add_option->( 'DEBUG', 'D', '!' ); 1401 $add_option->( 'DIAGNOSTICS', 'I', '!' ); 1402 $add_option->( 'check-multiline-quotes', 'chk', '!' ); 1403 $add_option->( 'dump-defaults', 'ddf', '!' ); 1404 $add_option->( 'dump-long-names', 'dln', '!' ); 1405 $add_option->( 'dump-options', 'dop', '!' ); 1406 $add_option->( 'dump-profile', 'dpro', '!' ); 1407 $add_option->( 'dump-short-names', 'dsn', '!' ); 1408 $add_option->( 'dump-token-types', 'dtt', '!' ); 1409 $add_option->( 'dump-want-left-space', 'dwls', '!' ); 1410 $add_option->( 'dump-want-right-space', 'dwrs', '!' ); 1411 $add_option->( 'fuzzy-line-length', 'fll', '!' ); 1412 $add_option->( 'help', 'h', '' ); 1413 $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); 1414 $add_option->( 'show-options', 'opt', '!' ); 1415 $add_option->( 'version', 'v', '' ); 1416 1417 #--------------------------------------------------------------------- 1418 1419 # The Perl::Tidy::HtmlWriter will add its own options to the string 1420 Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string ); 1421 1422 ######################################## 1423 # Set categories 10, 11, 12 1424 ######################################## 1425 # Based on their known order 1426 $category = 12; # HTML properties 1427 foreach my $opt (@option_string) { 1428 my $long_name = $opt; 1429 $long_name =~ s/(!|=.*|:.*)$//; 1430 unless ( defined( $option_category{$long_name} ) ) { 1431 if ( $long_name =~ /^html-linked/ ) { 1432 $category = 10; # HTML options 1433 } 1434 elsif ( $long_name =~ /^pod2html/ ) { 1435 $category = 11; # Pod2html 1436 } 1437 $option_category{$long_name} = $category_name[$category]; 1438 } 1439 } 1440 1441 #--------------------------------------------------------------- 1442 # Assign valid ranges to certain options 1443 #--------------------------------------------------------------- 1444 # In the future, these may be used to make preliminary checks 1445 # hash keys are long names 1446 # If key or value is undefined: 1447 # strings may have any value 1448 # integer ranges are >=0 1449 # If value is defined: 1450 # value is [qw(any valid words)] for strings 1451 # value is [min, max] for integers 1452 # if min is undefined, there is no lower limit 1453 # if max is undefined, there is no upper limit 1454 # Parameters not listed here have defaults 1455 %option_range = ( 1456 'format' => [ 'tidy', 'html', 'user' ], 1457 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ], 1458 1459 'block-brace-tightness' => [ 0, 2 ], 1460 'brace-tightness' => [ 0, 2 ], 1461 'paren-tightness' => [ 0, 2 ], 1462 'square-bracket-tightness' => [ 0, 2 ], 1463 1464 'block-brace-vertical-tightness' => [ 0, 2 ], 1465 'brace-vertical-tightness' => [ 0, 2 ], 1466 'brace-vertical-tightness-closing' => [ 0, 2 ], 1467 'paren-vertical-tightness' => [ 0, 2 ], 1468 'paren-vertical-tightness-closing' => [ 0, 2 ], 1469 'square-bracket-vertical-tightness' => [ 0, 2 ], 1470 'square-bracket-vertical-tightness-closing' => [ 0, 2 ], 1471 'vertical-tightness' => [ 0, 2 ], 1472 'vertical-tightness-closing' => [ 0, 2 ], 1473 1474 'closing-brace-indentation' => [ 0, 3 ], 1475 'closing-paren-indentation' => [ 0, 3 ], 1476 'closing-square-bracket-indentation' => [ 0, 3 ], 1477 'closing-token-indentation' => [ 0, 3 ], 1478 1479 'closing-side-comment-else-flag' => [ 0, 2 ], 1480 'comma-arrow-breakpoints' => [ 0, 3 ], 1481 ); 1482 1483 # Note: we could actually allow negative ci if someone really wants it: 1484 # $option_range{'continuation-indentation'} = [ undef, undef ]; 1485 1486 #--------------------------------------------------------------- 1487 # Assign default values to the above options here, except 1488 # for 'outfile' and 'help'. 1489 # These settings should approximate the perlstyle(1) suggestions. 1490 #--------------------------------------------------------------- 1491 my @defaults = qw( 1492 add-newlines 1493 add-semicolons 1494 add-whitespace 1495 blanks-before-blocks 1496 blanks-before-comments 1497 blanks-before-subs 1498 block-brace-tightness=0 1499 block-brace-vertical-tightness=0 1500 brace-tightness=1 1501 brace-vertical-tightness-closing=0 1502 brace-vertical-tightness=0 1503 break-at-old-logical-breakpoints 1504 break-at-old-ternary-breakpoints 1505 break-at-old-keyword-breakpoints 1506 comma-arrow-breakpoints=1 1507 nocheck-syntax 1508 closing-side-comment-interval=6 1509 closing-side-comment-maximum-text=20 1510 closing-side-comment-else-flag=0 1511 closing-paren-indentation=0 1512 closing-brace-indentation=0 1513 closing-square-bracket-indentation=0 1514 continuation-indentation=2 1515 delete-old-newlines 1516 delete-semicolons 1517 fuzzy-line-length 1518 hanging-side-comments 1519 indent-block-comments 1520 indent-columns=4 1521 long-block-line-count=8 1522 look-for-autoloader 1523 look-for-selfloader 1524 maximum-consecutive-blank-lines=1 1525 maximum-fields-per-table=0 1526 maximum-line-length=80 1527 minimum-space-to-comment=4 1528 nobrace-left-and-indent 1529 nocuddled-else 1530 nodelete-old-whitespace 1531 nohtml 1532 nologfile 1533 noquiet 1534 noshow-options 1535 nostatic-side-comments 1536 noswallow-optional-blank-lines 1537 notabs 1538 nowarning-output 1539 outdent-labels 1540 outdent-long-quotes 1541 outdent-long-comments 1542 paren-tightness=1 1543 paren-vertical-tightness-closing=0 1544 paren-vertical-tightness=0 1545 pass-version-line 1546 recombine 1547 valign 1548 short-concatenation-item-length=8 1549 space-for-semicolon 1550 square-bracket-tightness=1 1551 square-bracket-vertical-tightness-closing=0 1552 square-bracket-vertical-tightness=0 1553 static-block-comments 1554 trim-qw 1555 format=tidy 1556 backup-file-extension=bak 1557 format-skipping 1558 1559 pod2html 1560 html-table-of-contents 1561 html-entities 1562 ); 1563 1564 push @defaults, "perl-syntax-check-flags=-c -T"; 1565 1566 #--------------------------------------------------------------- 1567 # Define abbreviations which will be expanded into the above primitives. 1568 # These may be defined recursively. 1569 #--------------------------------------------------------------- 1570 %expansion = ( 1571 %expansion, 1572 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], 1573 'fnl' => [qw(freeze-newlines)], 1574 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], 1575 'fws' => [qw(freeze-whitespace)], 1576 'indent-only' => [qw(freeze-newlines freeze-whitespace)], 1577 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)], 1578 'nooutdent-long-lines' => 1579 [qw(nooutdent-long-quotes nooutdent-long-comments)], 1580 'noll' => [qw(nooutdent-long-lines)], 1581 'io' => [qw(indent-only)], 1582 'delete-all-comments' => 1583 [qw(delete-block-comments delete-side-comments delete-pod)], 1584 'nodelete-all-comments' => 1585 [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)], 1586 'dac' => [qw(delete-all-comments)], 1587 'ndac' => [qw(nodelete-all-comments)], 1588 'gnu' => [qw(gnu-style)], 1589 'pbp' => [qw(perl-best-practices)], 1590 'tee-all-comments' => 1591 [qw(tee-block-comments tee-side-comments tee-pod)], 1592 'notee-all-comments' => 1593 [qw(notee-block-comments notee-side-comments notee-pod)], 1594 'tac' => [qw(tee-all-comments)], 1595 'ntac' => [qw(notee-all-comments)], 1596 'html' => [qw(format=html)], 1597 'nhtml' => [qw(format=tidy)], 1598 'tidy' => [qw(format=tidy)], 1599 1600 'break-after-comma-arrows' => [qw(cab=0)], 1601 'nobreak-after-comma-arrows' => [qw(cab=1)], 1602 'baa' => [qw(cab=0)], 1603 'nbaa' => [qw(cab=1)], 1604 1605 'break-at-old-trinary-breakpoints' => [qw(bot)], 1606 1607 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)], 1608 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)], 1609 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)], 1610 'icp' => [qw(cpi=2 cbi=2 csbi=2)], 1611 'nicp' => [qw(cpi=0 cbi=0 csbi=0)], 1612 1613 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)], 1614 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)], 1615 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)], 1616 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)], 1617 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)], 1618 1619 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)], 1620 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)], 1621 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)], 1622 1623 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)], 1624 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)], 1625 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)], 1626 1627 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], 1628 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], 1629 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], 1630 1631 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], 1632 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], 1633 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], 1634 1635 'otr' => [qw(opr ohbr osbr)], 1636 'opening-token-right' => [qw(opr ohbr osbr)], 1637 'notr' => [qw(nopr nohbr nosbr)], 1638 'noopening-token-right' => [qw(nopr nohbr nosbr)], 1639 1640 'sot' => [qw(sop sohb sosb)], 1641 'nsot' => [qw(nsop nsohb nsosb)], 1642 'stack-opening-tokens' => [qw(sop sohb sosb)], 1643 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)], 1644 1645 'sct' => [qw(scp schb scsb)], 1646 'stack-closing-tokens' => => [qw(scp schb scsb)], 1647 'nsct' => [qw(nscp nschb nscsb)], 1648 'nostack-opening-tokens' => [qw(nscp nschb nscsb)], 1649 1650 # 'mangle' originally deleted pod and comments, but to keep it 1651 # reversible, it no longer does. But if you really want to 1652 # delete them, just use: 1653 # -mangle -dac 1654 1655 # An interesting use for 'mangle' is to do this: 1656 # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new 1657 # which will form as many one-line blocks as possible 1658 1659 'mangle' => [ 1660 qw( 1661 check-syntax 1662 delete-old-newlines 1663 delete-old-whitespace 1664 delete-semicolons 1665 indent-columns=0 1666 maximum-consecutive-blank-lines=0 1667 maximum-line-length=100000 1668 noadd-newlines 1669 noadd-semicolons 1670 noadd-whitespace 1671 noblanks-before-blocks 1672 noblanks-before-subs 1673 notabs 1674 ) 1675 ], 1676 1677 # 'extrude' originally deleted pod and comments, but to keep it 1678 # reversible, it no longer does. But if you really want to 1679 # delete them, just use 1680 # extrude -dac 1681 # 1682 # An interesting use for 'extrude' is to do this: 1683 # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new 1684 # which will break up all one-line blocks. 1685 1686 'extrude' => [ 1687 qw( 1688 check-syntax 1689 ci=0 1690 delete-old-newlines 1691 delete-old-whitespace 1692 delete-semicolons 1693 indent-columns=0 1694 maximum-consecutive-blank-lines=0 1695 maximum-line-length=1 1696 noadd-semicolons 1697 noadd-whitespace 1698 noblanks-before-blocks 1699 noblanks-before-subs 1700 nofuzzy-line-length 1701 notabs 1702 norecombine 1703 ) 1704 ], 1705 1706 # this style tries to follow the GNU Coding Standards (which do 1707 # not really apply to perl but which are followed by some perl 1708 # programmers). 1709 'gnu-style' => [ 1710 qw( 1711 lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1 1712 ) 1713 ], 1714 1715 # Style suggested in Damian Conway's Perl Best Practices 1716 'perl-best-practices' => [ 1717 qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq), 1718q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=) 1719 ], 1720 1721 # Additional styles can be added here 1722 ); 1723 1724 Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion ); 1725 1726 # Uncomment next line to dump all expansions for debugging: 1727 # dump_short_names(\%expansion); 1728 return ( 1729 \@option_string, \@defaults, \%expansion, 1730 \%option_category, \%option_range 1731 ); 1732 1733} # end of generate_options 1734 1735sub process_command_line { 1736 1737 my ( 1738 $perltidyrc_stream, $is_Windows, $Windows_type, 1739 $rpending_complaint, $dump_options_type 1740 ) = @_; 1741 1742 use Getopt::Long; 1743 1744 my ( 1745 $roption_string, $rdefaults, $rexpansion, 1746 $roption_category, $roption_range 1747 ) = generate_options(); 1748 1749 #--------------------------------------------------------------- 1750 # set the defaults by passing the above list through GetOptions 1751 #--------------------------------------------------------------- 1752 my %Opts = (); 1753 { 1754 local @ARGV; 1755 my $i; 1756 1757 # do not load the defaults if we are just dumping perltidyrc 1758 unless ( $dump_options_type eq 'perltidyrc' ) { 1759 for $i (@$rdefaults) { push @ARGV, "--" . $i } 1760 } 1761 1762 # Patch to save users Getopt::Long configuration 1763 # and set to Getopt::Long defaults. Use eval to avoid 1764 # breaking old versions of Perl without these routines. 1765 my $glc; 1766 eval { $glc = Getopt::Long::Configure() }; 1767 unless ($@) { 1768 eval { Getopt::Long::ConfigDefaults() }; 1769 } 1770 else { $glc = undef } 1771 1772 if ( !GetOptions( \%Opts, @$roption_string ) ) { 1773 die "Programming Bug: error in setting default options"; 1774 } 1775 1776 # Patch to put the previous Getopt::Long configuration back 1777 eval { Getopt::Long::Configure($glc) } if defined $glc; 1778 } 1779 1780 my $word; 1781 my @raw_options = (); 1782 my $config_file = ""; 1783 my $saw_ignore_profile = 0; 1784 my $saw_extrude = 0; 1785 my $saw_dump_profile = 0; 1786 my $i; 1787 1788 #--------------------------------------------------------------- 1789 # Take a first look at the command-line parameters. Do as many 1790 # immediate dumps as possible, which can avoid confusion if the 1791 # perltidyrc file has an error. 1792 #--------------------------------------------------------------- 1793 foreach $i (@ARGV) { 1794 1795 $i =~ s/^--/-/; 1796 if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) { 1797 $saw_ignore_profile = 1; 1798 } 1799 1800 # note: this must come before -pro and -profile, below: 1801 elsif ( $i =~ /^-(dump-profile|dpro)$/ ) { 1802 $saw_dump_profile = 1; 1803 } 1804 elsif ( $i =~ /^-(pro|profile)=(.+)/ ) { 1805 if ($config_file) { 1806 warn 1807"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n"; 1808 } 1809 $config_file = $2; 1810 unless ( -e $config_file ) { 1811 warn "cannot find file given with -pro=$config_file: $!\n"; 1812 $config_file = ""; 1813 } 1814 } 1815 elsif ( $i =~ /^-(pro|profile)=?$/ ) { 1816 die "usage: -pro=filename or --profile=filename, no spaces\n"; 1817 } 1818 elsif ( $i =~ /^-extrude$/ ) { 1819 $saw_extrude = 1; 1820 } 1821 elsif ( $i =~ /^-(help|h|HELP|H)$/ ) { 1822 usage(); 1823 exit 1; 1824 } 1825 elsif ( $i =~ /^-(version|v)$/ ) { 1826 show_version(); 1827 exit 1; 1828 } 1829 elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) { 1830 dump_defaults(@$rdefaults); 1831 exit 1; 1832 } 1833 elsif ( $i =~ /^-(dump-long-names|dln)$/ ) { 1834 dump_long_names(@$roption_string); 1835 exit 1; 1836 } 1837 elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) { 1838 dump_short_names($rexpansion); 1839 exit 1; 1840 } 1841 elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) { 1842 Perl::Tidy::Tokenizer->dump_token_types(*STDOUT); 1843 exit 1; 1844 } 1845 } 1846 1847 if ( $saw_dump_profile && $saw_ignore_profile ) { 1848 warn "No profile to dump because of -npro\n"; 1849 exit 1; 1850 } 1851 1852 #--------------------------------------------------------------- 1853 # read any .perltidyrc configuration file 1854 #--------------------------------------------------------------- 1855 unless ($saw_ignore_profile) { 1856 1857 # resolve possible conflict between $perltidyrc_stream passed 1858 # as call parameter to perltidy and -pro=filename on command 1859 # line. 1860 if ($perltidyrc_stream) { 1861 if ($config_file) { 1862 warn <<EOM; 1863 Conflict: a perltidyrc configuration file was specified both as this 1864 perltidy call parameter: $perltidyrc_stream 1865 and with this -profile=$config_file. 1866 Using -profile=$config_file. 1867EOM 1868 } 1869 else { 1870 $config_file = $perltidyrc_stream; 1871 } 1872 } 1873 1874 # look for a config file if we don't have one yet 1875 my $rconfig_file_chatter; 1876 $$rconfig_file_chatter = ""; 1877 $config_file = 1878 find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter, 1879 $rpending_complaint ) 1880 unless $config_file; 1881 1882 # open any config file 1883 my $fh_config; 1884 if ($config_file) { 1885 ( $fh_config, $config_file ) = 1886 Perl::Tidy::streamhandle( $config_file, 'r' ); 1887 unless ($fh_config) { 1888 $$rconfig_file_chatter .= 1889 "# $config_file exists but cannot be opened\n"; 1890 } 1891 } 1892 1893 if ($saw_dump_profile) { 1894 if ($saw_dump_profile) { 1895 dump_config_file( $fh_config, $config_file, 1896 $rconfig_file_chatter ); 1897 exit 1; 1898 } 1899 } 1900 1901 if ($fh_config) { 1902 1903 my ( $rconfig_list, $death_message ) = 1904 read_config_file( $fh_config, $config_file, $rexpansion ); 1905 die $death_message if ($death_message); 1906 1907 # process any .perltidyrc parameters right now so we can 1908 # localize errors 1909 if (@$rconfig_list) { 1910 local @ARGV = @$rconfig_list; 1911 1912 expand_command_abbreviations( $rexpansion, \@raw_options, 1913 $config_file ); 1914 1915 if ( !GetOptions( \%Opts, @$roption_string ) ) { 1916 die 1917"Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n"; 1918 } 1919 1920 # Anything left in this local @ARGV is an error and must be 1921 # invalid bare words from the configuration file. We cannot 1922 # check this earlier because bare words may have been valid 1923 # values for parameters. We had to wait for GetOptions to have 1924 # a look at @ARGV. 1925 if (@ARGV) { 1926 my $count = @ARGV; 1927 my $str = "\'" . pop(@ARGV) . "\'"; 1928 while ( my $param = pop(@ARGV) ) { 1929 if ( length($str) < 70 ) { 1930 $str .= ", '$param'"; 1931 } 1932 else { 1933 $str .= ", ..."; 1934 last; 1935 } 1936 } 1937 die <<EOM; 1938There are $count unrecognized values in the configuration file '$config_file': 1939$str 1940Use leading dashes for parameters. Use -npro to ignore this file. 1941EOM 1942 } 1943 1944 # Undo any options which cause premature exit. They are not 1945 # appropriate for a config file, and it could be hard to 1946 # diagnose the cause of the premature exit. 1947 foreach ( 1948 qw{ 1949 dump-defaults 1950 dump-long-names 1951 dump-options 1952 dump-profile 1953 dump-short-names 1954 dump-token-types 1955 dump-want-left-space 1956 dump-want-right-space 1957 help 1958 stylesheet 1959 version 1960 } 1961 ) 1962 { 1963 1964 if ( defined( $Opts{$_} ) ) { 1965 delete $Opts{$_}; 1966 warn "ignoring --$_ in config file: $config_file\n"; 1967 } 1968 } 1969 } 1970 } 1971 } 1972 1973 #--------------------------------------------------------------- 1974 # now process the command line parameters 1975 #--------------------------------------------------------------- 1976 expand_command_abbreviations( $rexpansion, \@raw_options, $config_file ); 1977 1978 if ( !GetOptions( \%Opts, @$roption_string ) ) { 1979 die "Error on command line; for help try 'perltidy -h'\n"; 1980 } 1981 1982 return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string, 1983 $rexpansion, $roption_category, $roption_range ); 1984} # end of process_command_line 1985 1986sub check_options { 1987 1988 my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_; 1989 1990 #--------------------------------------------------------------- 1991 # check and handle any interactions among the basic options.. 1992 #--------------------------------------------------------------- 1993 1994 # Since -vt, -vtc, and -cti are abbreviations, but under 1995 # msdos, an unquoted input parameter like vtc=1 will be 1996 # seen as 2 parameters, vtc and 1, so the abbreviations 1997 # won't be seen. Therefore, we will catch them here if 1998 # they get through. 1999 2000 if ( defined $rOpts->{'vertical-tightness'} ) { 2001 my $vt = $rOpts->{'vertical-tightness'}; 2002 $rOpts->{'paren-vertical-tightness'} = $vt; 2003 $rOpts->{'square-bracket-vertical-tightness'} = $vt; 2004 $rOpts->{'brace-vertical-tightness'} = $vt; 2005 } 2006 2007 if ( defined $rOpts->{'vertical-tightness-closing'} ) { 2008 my $vtc = $rOpts->{'vertical-tightness-closing'}; 2009 $rOpts->{'paren-vertical-tightness-closing'} = $vtc; 2010 $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc; 2011 $rOpts->{'brace-vertical-tightness-closing'} = $vtc; 2012 } 2013 2014 if ( defined $rOpts->{'closing-token-indentation'} ) { 2015 my $cti = $rOpts->{'closing-token-indentation'}; 2016 $rOpts->{'closing-square-bracket-indentation'} = $cti; 2017 $rOpts->{'closing-brace-indentation'} = $cti; 2018 $rOpts->{'closing-paren-indentation'} = $cti; 2019 } 2020 2021 # In quiet mode, there is no log file and hence no way to report 2022 # results of syntax check, so don't do it. 2023 if ( $rOpts->{'quiet'} ) { 2024 $rOpts->{'check-syntax'} = 0; 2025 } 2026 2027 # can't check syntax if no output 2028 if ( $rOpts->{'format'} ne 'tidy' ) { 2029 $rOpts->{'check-syntax'} = 0; 2030 } 2031 2032 # Never let Windows 9x/Me systems run syntax check -- this will prevent a 2033 # wide variety of nasty problems on these systems, because they cannot 2034 # reliably run backticks. Don't even think about changing this! 2035 if ( $rOpts->{'check-syntax'} 2036 && $is_Windows 2037 && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) ) 2038 { 2039 $rOpts->{'check-syntax'} = 0; 2040 } 2041 2042 # It's really a bad idea to check syntax as root unless you wrote 2043 # the script yourself. FIXME: not sure if this works with VMS 2044 unless ($is_Windows) { 2045 2046 if ( $< == 0 && $rOpts->{'check-syntax'} ) { 2047 $rOpts->{'check-syntax'} = 0; 2048 $$rpending_complaint .= 2049"Syntax check deactivated for safety; you shouldn't run this as root\n"; 2050 } 2051 } 2052 2053 # see if user set a non-negative logfile-gap 2054 if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) { 2055 2056 # a zero gap will be taken as a 1 2057 if ( $rOpts->{'logfile-gap'} == 0 ) { 2058 $rOpts->{'logfile-gap'} = 1; 2059 } 2060 2061 # setting a non-negative logfile gap causes logfile to be saved 2062 $rOpts->{'logfile'} = 1; 2063 } 2064 2065 # not setting logfile gap, or setting it negative, causes default of 50 2066 else { 2067 $rOpts->{'logfile-gap'} = 50; 2068 } 2069 2070 # set short-cut flag when only indentation is to be done. 2071 # Note that the user may or may not have already set the 2072 # indent-only flag. 2073 if ( !$rOpts->{'add-whitespace'} 2074 && !$rOpts->{'delete-old-whitespace'} 2075 && !$rOpts->{'add-newlines'} 2076 && !$rOpts->{'delete-old-newlines'} ) 2077 { 2078 $rOpts->{'indent-only'} = 1; 2079 } 2080 2081 # -isbc implies -ibc 2082 if ( $rOpts->{'indent-spaced-block-comments'} ) { 2083 $rOpts->{'indent-block-comments'} = 1; 2084 } 2085 2086 # -bli flag implies -bl 2087 if ( $rOpts->{'brace-left-and-indent'} ) { 2088 $rOpts->{'opening-brace-on-new-line'} = 1; 2089 } 2090 2091 if ( $rOpts->{'opening-brace-always-on-right'} 2092 && $rOpts->{'opening-brace-on-new-line'} ) 2093 { 2094 warn <<EOM; 2095 Conflict: you specified both 'opening-brace-always-on-right' (-bar) and 2096 'opening-brace-on-new-line' (-bl). Ignoring -bl. 2097EOM 2098 $rOpts->{'opening-brace-on-new-line'} = 0; 2099 } 2100 2101 # it simplifies things if -bl is 0 rather than undefined 2102 if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) { 2103 $rOpts->{'opening-brace-on-new-line'} = 0; 2104 } 2105 2106 # -sbl defaults to -bl if not defined 2107 if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) { 2108 $rOpts->{'opening-sub-brace-on-new-line'} = 2109 $rOpts->{'opening-brace-on-new-line'}; 2110 } 2111 2112 # set shortcut flag if no blanks to be written 2113 unless ( $rOpts->{'maximum-consecutive-blank-lines'} ) { 2114 $rOpts->{'swallow-optional-blank-lines'} = 1; 2115 } 2116 2117 if ( $rOpts->{'entab-leading-whitespace'} ) { 2118 if ( $rOpts->{'entab-leading-whitespace'} < 0 ) { 2119 warn "-et=n must use a positive integer; ignoring -et\n"; 2120 $rOpts->{'entab-leading-whitespace'} = undef; 2121 } 2122 2123 # entab leading whitespace has priority over the older 'tabs' option 2124 if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; } 2125 } 2126} 2127 2128sub expand_command_abbreviations { 2129 2130 # go through @ARGV and expand any abbreviations 2131 2132 my ( $rexpansion, $rraw_options, $config_file ) = @_; 2133 my ($word); 2134 2135 # set a pass limit to prevent an infinite loop; 2136 # 10 should be plenty, but it may be increased to allow deeply 2137 # nested expansions. 2138 my $max_passes = 10; 2139 my @new_argv = (); 2140 2141 # keep looping until all expansions have been converted into actual 2142 # dash parameters.. 2143 for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) { 2144 my @new_argv = (); 2145 my $abbrev_count = 0; 2146 2147 # loop over each item in @ARGV.. 2148 foreach $word (@ARGV) { 2149 2150 # convert any leading 'no-' to just 'no' 2151 if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 } 2152 2153 # if it is a dash flag (instead of a file name).. 2154 if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) { 2155 2156 my $abr = $1; 2157 my $flags = $2; 2158 2159 # save the raw input for debug output in case of circular refs 2160 if ( $pass_count == 0 ) { 2161 push( @$rraw_options, $word ); 2162 } 2163 2164 # recombine abbreviation and flag, if necessary, 2165 # to allow abbreviations with arguments such as '-vt=1' 2166 if ( $rexpansion->{ $abr . $flags } ) { 2167 $abr = $abr . $flags; 2168 $flags = ""; 2169 } 2170 2171 # if we see this dash item in the expansion hash.. 2172 if ( $rexpansion->{$abr} ) { 2173 $abbrev_count++; 2174 2175 # stuff all of the words that it expands to into the 2176 # new arg list for the next pass 2177 foreach my $abbrev ( @{ $rexpansion->{$abr} } ) { 2178 next unless $abbrev; # for safety; shouldn't happen 2179 push( @new_argv, '--' . $abbrev . $flags ); 2180 } 2181 } 2182 2183 # not in expansion hash, must be actual long name 2184 else { 2185 push( @new_argv, $word ); 2186 } 2187 } 2188 2189 # not a dash item, so just save it for the next pass 2190 else { 2191 push( @new_argv, $word ); 2192 } 2193 } # end of this pass 2194 2195 # update parameter list @ARGV to the new one 2196 @ARGV = @new_argv; 2197 last unless ( $abbrev_count > 0 ); 2198 2199 # make sure we are not in an infinite loop 2200 if ( $pass_count == $max_passes ) { 2201 print STDERR 2202"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n"; 2203 print STDERR "Here are the raw options\n"; 2204 local $" = ')('; 2205 print STDERR "(@$rraw_options)\n"; 2206 my $num = @new_argv; 2207 2208 if ( $num < 50 ) { 2209 print STDERR "After $max_passes passes here is ARGV\n"; 2210 print STDERR "(@new_argv)\n"; 2211 } 2212 else { 2213 print STDERR "After $max_passes passes ARGV has $num entries\n"; 2214 } 2215 2216 if ($config_file) { 2217 die <<"DIE"; 2218Please check your configuration file $config_file for circular-references. 2219To deactivate it, use -npro. 2220DIE 2221 } 2222 else { 2223 die <<'DIE'; 2224Program bug - circular-references in the %expansion hash, probably due to 2225a recent program change. 2226DIE 2227 } 2228 } # end of check for circular references 2229 } # end of loop over all passes 2230} 2231 2232# Debug routine -- this will dump the expansion hash 2233sub dump_short_names { 2234 my $rexpansion = shift; 2235 print STDOUT <<EOM; 2236List of short names. This list shows how all abbreviations are 2237translated into other abbreviations and, eventually, into long names. 2238New abbreviations may be defined in a .perltidyrc file. 2239For a list of all long names, use perltidy --dump-long-names (-dln). 2240-------------------------------------------------------------------------- 2241EOM 2242 foreach my $abbrev ( sort keys %$rexpansion ) { 2243 my @list = @{ $$rexpansion{$abbrev} }; 2244 print STDOUT "$abbrev --> @list\n"; 2245 } 2246} 2247 2248sub check_vms_filename { 2249 2250 # given a valid filename (the perltidy input file) 2251 # create a modified filename and separator character 2252 # suitable for VMS. 2253 # 2254 # Contributed by Michael Cartmell 2255 # 2256 my ( $base, $path ) = fileparse( $_[0] ); 2257 2258 # remove explicit ; version 2259 $base =~ s/;-?\d*$// 2260 2261 # remove explicit . version ie two dots in filename NB ^ escapes a dot 2262 or $base =~ s/( # begin capture $1 2263 (?:^|[^^])\. # match a dot not preceded by a caret 2264 (?: # followed by nothing 2265 | # or 2266 .*[^^] # anything ending in a non caret 2267 ) 2268 ) # end capture $1 2269 \.-?\d*$ # match . version number 2270 /$1/x; 2271 2272 # normalise filename, if there are no unescaped dots then append one 2273 $base .= '.' unless $base =~ /(?:^|[^^])\./; 2274 2275 # if we don't already have an extension then we just append the extention 2276 my $separator = ( $base =~ /\.$/ ) ? "" : "_"; 2277 return ( $path . $base, $separator ); 2278} 2279 2280sub Win_OS_Type { 2281 2282 # TODO: are these more standard names? 2283 # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003 2284 2285 # Returns a string that determines what MS OS we are on. 2286 # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003 2287 # Returns blank string if not an MS system. 2288 # Original code contributed by: Yves Orton 2289 # We need to know this to decide where to look for config files 2290 2291 my $rpending_complaint = shift; 2292 my $os = ""; 2293 return $os unless $^O =~ /win32|dos/i; # is it a MS box? 2294 2295 # Systems built from Perl source may not have Win32.pm 2296 # But probably have Win32::GetOSVersion() anyway so the 2297 # following line is not 'required': 2298 # return $os unless eval('require Win32'); 2299 2300 # Use the standard API call to determine the version 2301 my ( $undef, $major, $minor, $build, $id ); 2302 eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() }; 2303 2304 # 2305 # NAME ID MAJOR MINOR 2306 # Windows NT 4 2 4 0 2307 # Windows 2000 2 5 0 2308 # Windows XP 2 5 1 2309 # Windows Server 2003 2 5 2 2310 2311 return "win32s" unless $id; # If id==0 then its a win32s box. 2312 $os = { # Magic numbers from MSDN 2313 # documentation of GetOSVersion 2314 1 => { 2315 0 => "95", 2316 10 => "98", 2317 90 => "Me" 2318 }, 2319 2 => { 2320 0 => "2000", # or NT 4, see below 2321 1 => "XP/.Net", 2322 2 => "Win2003", 2323 51 => "NT3.51" 2324 } 2325 }->{$id}->{$minor}; 2326 2327 # If $os is undefined, the above code is out of date. Suggested updates 2328 # are welcome. 2329 unless ( defined $os ) { 2330 $os = ""; 2331 $$rpending_complaint .= <<EOS; 2332Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record! 2333We won't be able to look for a system-wide config file. 2334EOS 2335 } 2336 2337 # Unfortunately the logic used for the various versions isnt so clever.. 2338 # so we have to handle an outside case. 2339 return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os; 2340} 2341 2342sub is_unix { 2343 return 2344 ( $^O !~ /win32|dos/i ) 2345 && ( $^O ne 'VMS' ) 2346 && ( $^O ne 'OS2' ) 2347 && ( $^O ne 'MacOS' ); 2348} 2349 2350sub look_for_Windows { 2351 2352 # determine Windows sub-type and location of 2353 # system-wide configuration files 2354 my $rpending_complaint = shift; 2355 my $is_Windows = ( $^O =~ /win32|dos/i ); 2356 my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows; 2357 return ( $is_Windows, $Windows_type ); 2358} 2359 2360sub find_config_file { 2361 2362 # look for a .perltidyrc configuration file 2363 my ( $is_Windows, $Windows_type, $rconfig_file_chatter, 2364 $rpending_complaint ) = @_; 2365 2366 $$rconfig_file_chatter .= "# Config file search...system reported as:"; 2367 if ($is_Windows) { 2368 $$rconfig_file_chatter .= "Windows $Windows_type\n"; 2369 } 2370 else { 2371 $$rconfig_file_chatter .= " $^O\n"; 2372 } 2373 2374 # sub to check file existance and record all tests 2375 my $exists_config_file = sub { 2376 my $config_file = shift; 2377 return 0 unless $config_file; 2378 $$rconfig_file_chatter .= "# Testing: $config_file\n"; 2379 return -f $config_file; 2380 }; 2381 2382 my $config_file; 2383 2384 # look in current directory first 2385 $config_file = ".perltidyrc"; 2386 return $config_file if $exists_config_file->($config_file); 2387 2388 # Default environment vars. 2389 my @envs = qw(PERLTIDY HOME); 2390 2391 # Check the NT/2k/XP locations, first a local machine def, then a 2392 # network def 2393 push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i; 2394 2395 # Now go through the enviornment ... 2396 foreach my $var (@envs) { 2397 $$rconfig_file_chatter .= "# Examining: \$ENV{$var}"; 2398 if ( defined( $ENV{$var} ) ) { 2399 $$rconfig_file_chatter .= " = $ENV{$var}\n"; 2400 2401 # test ENV{ PERLTIDY } as file: 2402 if ( $var eq 'PERLTIDY' ) { 2403 $config_file = "$ENV{$var}"; 2404 return $config_file if $exists_config_file->($config_file); 2405 } 2406 2407 # test ENV as directory: 2408 $config_file = catfile( $ENV{$var}, ".perltidyrc" ); 2409 return $config_file if $exists_config_file->($config_file); 2410 } 2411 else { 2412 $$rconfig_file_chatter .= "\n"; 2413 } 2414 } 2415 2416 # then look for a system-wide definition 2417 # where to look varies with OS 2418 if ($is_Windows) { 2419 2420 if ($Windows_type) { 2421 my ( $os, $system, $allusers ) = 2422 Win_Config_Locs( $rpending_complaint, $Windows_type ); 2423 2424 # Check All Users directory, if there is one. 2425 if ($allusers) { 2426 $config_file = catfile( $allusers, ".perltidyrc" ); 2427 return $config_file if $exists_config_file->($config_file); 2428 } 2429 2430 # Check system directory. 2431 $config_file = catfile( $system, ".perltidyrc" ); 2432 return $config_file if $exists_config_file->($config_file); 2433 } 2434 } 2435 2436 # Place to add customization code for other systems 2437 elsif ( $^O eq 'OS2' ) { 2438 } 2439 elsif ( $^O eq 'MacOS' ) { 2440 } 2441 elsif ( $^O eq 'VMS' ) { 2442 } 2443 2444 # Assume some kind of Unix 2445 else { 2446 2447 $config_file = "/usr/local/etc/perltidyrc"; 2448 return $config_file if $exists_config_file->($config_file); 2449 2450 $config_file = "/etc/perltidyrc"; 2451 return $config_file if $exists_config_file->($config_file); 2452 } 2453 2454 # Couldn't find a config file 2455 return; 2456} 2457 2458sub Win_Config_Locs { 2459 2460 # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP), 2461 # or undef if its not a win32 OS. In list context returns OS, System 2462 # Directory, and All Users Directory. All Users will be empty on a 2463 # 9x/Me box. Contributed by: Yves Orton. 2464 2465 my $rpending_complaint = shift; 2466 my $os = (@_) ? shift : Win_OS_Type(); 2467 return unless $os; 2468 2469 my $system = ""; 2470 my $allusers = ""; 2471 2472 if ( $os =~ /9[58]|Me/ ) { 2473 $system = "C:/Windows"; 2474 } 2475 elsif ( $os =~ /NT|XP|200?/ ) { 2476 $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/"; 2477 $allusers = 2478 ( $os =~ /NT/ ) 2479 ? "C:/WinNT/profiles/All Users/" 2480 : "C:/Documents and Settings/All Users/"; 2481 } 2482 else { 2483 2484 # This currently would only happen on a win32s computer. I dont have 2485 # one to test, so I am unsure how to proceed. Suggestions welcome! 2486 $$rpending_complaint .= 2487"I dont know a sensible place to look for config files on an $os system.\n"; 2488 return; 2489 } 2490 return wantarray ? ( $os, $system, $allusers ) : $os; 2491} 2492 2493sub dump_config_file { 2494 my $fh = shift; 2495 my $config_file = shift; 2496 my $rconfig_file_chatter = shift; 2497 print STDOUT "$$rconfig_file_chatter"; 2498 if ($fh) { 2499 print STDOUT "# Dump of file: '$config_file'\n"; 2500 while ( my $line = $fh->getline() ) { print STDOUT $line } 2501 eval { $fh->close() }; 2502 } 2503 else { 2504 print STDOUT "# ...no config file found\n"; 2505 } 2506} 2507 2508sub read_config_file { 2509 2510 my ( $fh, $config_file, $rexpansion ) = @_; 2511 my @config_list = (); 2512 2513 # file is bad if non-empty $death_message is returned 2514 my $death_message = ""; 2515 2516 my $name = undef; 2517 my $line_no; 2518 while ( my $line = $fh->getline() ) { 2519 $line_no++; 2520 chomp $line; 2521 next if $line =~ /^\s*#/; # skip full-line comment 2522 ( $line, $death_message ) = 2523 strip_comment( $line, $config_file, $line_no ); 2524 last if ($death_message); 2525 $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends 2526 next unless $line; 2527 2528 # look for something of the general form 2529 # newname { body } 2530 # or just 2531 # body 2532 2533 if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) { 2534 my ( $newname, $body, $curly ) = ( $2, $3, $4 ); 2535 2536 # handle a new alias definition 2537 if ($newname) { 2538 if ($name) { 2539 $death_message = 2540"No '}' seen after $name and before $newname in config file $config_file line $.\n"; 2541 last; 2542 } 2543 $name = $newname; 2544 2545 if ( ${$rexpansion}{$name} ) { 2546 local $" = ')('; 2547 my @names = sort keys %$rexpansion; 2548 $death_message = 2549 "Here is a list of all installed aliases\n(@names)\n" 2550 . "Attempting to redefine alias ($name) in config file $config_file line $.\n"; 2551 last; 2552 } 2553 ${$rexpansion}{$name} = []; 2554 } 2555 2556 # now do the body 2557 if ($body) { 2558 2559 my ( $rbody_parts, $msg ) = parse_args($body); 2560 if ($msg) { 2561 $death_message = <<EOM; 2562Error reading file '$config_file' at line number $line_no. 2563$msg 2564Please fix this line or use -npro to avoid reading this file 2565EOM 2566 last; 2567 } 2568 2569 if ($name) { 2570 2571 # remove leading dashes if this is an alias 2572 foreach (@$rbody_parts) { s/^\-+//; } 2573 push @{ ${$rexpansion}{$name} }, @$rbody_parts; 2574 } 2575 else { 2576 push( @config_list, @$rbody_parts ); 2577 } 2578 } 2579 2580 if ($curly) { 2581 unless ($name) { 2582 $death_message = 2583"Unexpected '}' seen in config file $config_file line $.\n"; 2584 last; 2585 } 2586 $name = undef; 2587 } 2588 } 2589 } 2590 eval { $fh->close() }; 2591 return ( \@config_list, $death_message ); 2592} 2593 2594sub strip_comment { 2595 2596 my ( $instr, $config_file, $line_no ) = @_; 2597 my $msg = ""; 2598 2599 # nothing to do if no comments 2600 if ( $instr !~ /#/ ) { 2601 return ( $instr, $msg ); 2602 } 2603 2604 # use simple method of no quotes 2605 elsif ( $instr !~ /['"]/ ) { 2606 $instr =~ s/\s*\#.*$//; # simple trim 2607 return ( $instr, $msg ); 2608 } 2609 2610 # handle comments and quotes 2611 my $outstr = ""; 2612 my $quote_char = ""; 2613 while (1) { 2614 2615 # looking for ending quote character 2616 if ($quote_char) { 2617 if ( $instr =~ /\G($quote_char)/gc ) { 2618 $quote_char = ""; 2619 $outstr .= $1; 2620 } 2621 elsif ( $instr =~ /\G(.)/gc ) { 2622 $outstr .= $1; 2623 } 2624 2625 # error..we reached the end without seeing the ending quote char 2626 else { 2627 $msg = <<EOM; 2628Error reading file $config_file at line number $line_no. 2629Did not see ending quote character <$quote_char> in this text: 2630$instr 2631Please fix this line or use -npro to avoid reading this file 2632EOM 2633 last; 2634 } 2635 } 2636 2637 # accumulating characters and looking for start of a quoted string 2638 else { 2639 if ( $instr =~ /\G([\"\'])/gc ) { 2640 $outstr .= $1; 2641 $quote_char = $1; 2642 } 2643 elsif ( $instr =~ /\G#/gc ) { 2644 last; 2645 } 2646 elsif ( $instr =~ /\G(.)/gc ) { 2647 $outstr .= $1; 2648 } 2649 else { 2650 last; 2651 } 2652 } 2653 } 2654 return ( $outstr, $msg ); 2655} 2656 2657sub parse_args { 2658 2659 # Parse a command string containing multiple string with possible 2660 # quotes, into individual commands. It might look like this, for example: 2661 # 2662 # -wba=" + - " -some-thing -wbb='. && ||' 2663 # 2664 # There is no need, at present, to handle escaped quote characters. 2665 # (They are not perltidy tokens, so needn't be in strings). 2666 2667 my ($body) = @_; 2668 my @body_parts = (); 2669 my $quote_char = ""; 2670 my $part = ""; 2671 my $msg = ""; 2672 while (1) { 2673 2674 # looking for ending quote character 2675 if ($quote_char) { 2676 if ( $body =~ /\G($quote_char)/gc ) { 2677 $quote_char = ""; 2678 } 2679 elsif ( $body =~ /\G(.)/gc ) { 2680 $part .= $1; 2681 } 2682 2683 # error..we reached the end without seeing the ending quote char 2684 else { 2685 if ( length($part) ) { push @body_parts, $part; } 2686 $msg = <<EOM; 2687Did not see ending quote character <$quote_char> in this text: 2688$body 2689EOM 2690 last; 2691 } 2692 } 2693 2694 # accumulating characters and looking for start of a quoted string 2695 else { 2696 if ( $body =~ /\G([\"\'])/gc ) { 2697 $quote_char = $1; 2698 } 2699 elsif ( $body =~ /\G(\s+)/gc ) { 2700 if ( length($part) ) { push @body_parts, $part; } 2701 $part = ""; 2702 } 2703 elsif ( $body =~ /\G(.)/gc ) { 2704 $part .= $1; 2705 } 2706 else { 2707 if ( length($part) ) { push @body_parts, $part; } 2708 last; 2709 } 2710 } 2711 } 2712 return ( \@body_parts, $msg ); 2713} 2714 2715sub dump_long_names { 2716 2717 my @names = sort @_; 2718 print STDOUT <<EOM; 2719# Command line long names (passed to GetOptions) 2720#--------------------------------------------------------------- 2721# here is a summary of the Getopt codes: 2722# <none> does not take an argument 2723# =s takes a mandatory string 2724# :s takes an optional string 2725# =i takes a mandatory integer 2726# :i takes an optional integer 2727# ! does not take an argument and may be negated 2728# i.e., -foo and -nofoo are allowed 2729# a double dash signals the end of the options list 2730# 2731#--------------------------------------------------------------- 2732EOM 2733 2734 foreach (@names) { print STDOUT "$_\n" } 2735} 2736 2737sub dump_defaults { 2738 my @defaults = sort @_; 2739 print STDOUT "Default command line options:\n"; 2740 foreach (@_) { print STDOUT "$_\n" } 2741} 2742 2743sub readable_options { 2744 2745 # return options for this run as a string which could be 2746 # put in a perltidyrc file 2747 my ( $rOpts, $roption_string ) = @_; 2748 my %Getopt_flags; 2749 my $rGetopt_flags = \%Getopt_flags; 2750 my $readable_options = "# Final parameter set for this run.\n"; 2751 $readable_options .= 2752 "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n"; 2753 foreach my $opt ( @{$roption_string} ) { 2754 my $flag = ""; 2755 if ( $opt =~ /(.*)(!|=.*)$/ ) { 2756 $opt = $1; 2757 $flag = $2; 2758 } 2759 if ( defined( $rOpts->{$opt} ) ) { 2760 $rGetopt_flags->{$opt} = $flag; 2761 } 2762 } 2763 foreach my $key ( sort keys %{$rOpts} ) { 2764 my $flag = $rGetopt_flags->{$key}; 2765 my $value = $rOpts->{$key}; 2766 my $prefix = '--'; 2767 my $suffix = ""; 2768 if ($flag) { 2769 if ( $flag =~ /^=/ ) { 2770 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' } 2771 $suffix = "=" . $value; 2772 } 2773 elsif ( $flag =~ /^!/ ) { 2774 $prefix .= "no" unless ($value); 2775 } 2776 else { 2777 2778 # shouldn't happen 2779 $readable_options .= 2780 "# ERROR in dump_options: unrecognized flag $flag for $key\n"; 2781 } 2782 } 2783 $readable_options .= $prefix . $key . $suffix . "\n"; 2784 } 2785 return $readable_options; 2786} 2787 2788sub show_version { 2789 print <<"EOM"; 2790This is perltidy, v$VERSION 2791 2792Copyright 2000-2007, Steve Hancock 2793 2794Perltidy is free software and may be copied under the terms of the GNU 2795General Public License, which is included in the distribution files. 2796 2797Complete documentation for perltidy can be found using 'man perltidy' 2798or on the internet at http://perltidy.sourceforge.net. 2799EOM 2800} 2801 2802sub usage { 2803 2804 print STDOUT <<EOF; 2805This is perltidy version $VERSION, a perl script indenter. Usage: 2806 2807 perltidy [ options ] file1 file2 file3 ... 2808 (output goes to file1.tdy, file2.tdy, file3.tdy, ...) 2809 perltidy [ options ] file1 -o outfile 2810 perltidy [ options ] file1 -st >outfile 2811 perltidy [ options ] <infile >outfile 2812 2813Options have short and long forms. Short forms are shown; see 2814man pages for long forms. Note: '=s' indicates a required string, 2815and '=n' indicates a required integer. 2816 2817I/O control 2818 -h show this help 2819 -o=file name of the output file (only if single input file) 2820 -oext=s change output extension from 'tdy' to s 2821 -opath=path change path to be 'path' for output files 2822 -b backup original to .bak and modify file in-place 2823 -bext=s change default backup extension from 'bak' to s 2824 -q deactivate error messages (for running under editor) 2825 -w include non-critical warning messages in the .ERR error output 2826 -syn run perl -c to check syntax (default under unix systems) 2827 -log save .LOG file, which has useful diagnostics 2828 -f force perltidy to read a binary file 2829 -g like -log but writes more detailed .LOG file, for debugging scripts 2830 -opt write the set of options actually used to a .LOG file 2831 -npro ignore .perltidyrc configuration command file 2832 -pro=file read configuration commands from file instead of .perltidyrc 2833 -st send output to standard output, STDOUT 2834 -se send error output to standard error output, STDERR 2835 -v display version number to standard output and quit 2836 2837Basic Options: 2838 -i=n use n columns per indentation level (default n=4) 2839 -t tabs: use one tab character per indentation level, not recommeded 2840 -nt no tabs: use n spaces per indentation level (default) 2841 -et=n entab leading whitespace n spaces per tab; not recommended 2842 -io "indent only": just do indentation, no other formatting. 2843 -sil=n set starting indentation level to n; use if auto detection fails 2844 -ole=s specify output line ending (s=dos or win, mac, unix) 2845 -ple keep output line endings same as input (input must be filename) 2846 2847Whitespace Control 2848 -fws freeze whitespace; this disables all whitespace changes 2849 and disables the following switches: 2850 -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight) 2851 -bbt same as -bt but for code block braces; same as -bt if not given 2852 -bbvt block braces vertically tight; use with -bl or -bli 2853 -bbvtl=s make -bbvt to apply to selected list of block types 2854 -pt=n paren tightness (n=0, 1 or 2) 2855 -sbt=n square bracket tightness (n=0, 1, or 2) 2856 -bvt=n brace vertical tightness, 2857 n=(0=open, 1=close unless multiple steps on a line, 2=always close) 2858 -pvt=n paren vertical tightness (see -bvt for n) 2859 -sbvt=n square bracket vertical tightness (see -bvt for n) 2860 -bvtc=n closing brace vertical tightness: 2861 n=(0=open, 1=sometimes close, 2=always close) 2862 -pvtc=n closing paren vertical tightness, see -bvtc for n. 2863 -sbvtc=n closing square bracket vertical tightness, see -bvtc for n. 2864 -ci=n sets continuation indentation=n, default is n=2 spaces 2865 -lp line up parentheses, brackets, and non-BLOCK braces 2866 -sfs add space before semicolon in for( ; ; ) 2867 -aws allow perltidy to add whitespace (default) 2868 -dws delete all old non-essential whitespace 2869 -icb indent closing brace of a code block 2870 -cti=n closing indentation of paren, square bracket, or non-block brace: 2871 n=0 none, =1 align with opening, =2 one full indentation level 2872 -icp equivalent to -cti=2 2873 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /' 2874 -wrs=s want space right of tokens in string; 2875 -sts put space before terminal semicolon of a statement 2876 -sak=s put space between keywords given in s and '('; 2877 -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local' 2878 2879Line Break Control 2880 -fnl freeze newlines; this disables all line break changes 2881 and disables the following switches: 2882 -anl add newlines; ok to introduce new line breaks 2883 -bbs add blank line before subs and packages 2884 -bbc add blank line before block comments 2885 -bbb add blank line between major blocks 2886 -sob swallow optional blank lines 2887 -ce cuddled else; use this style: '} else {' 2888 -dnl delete old newlines (default) 2889 -mbl=n maximum consecutive blank lines (default=1) 2890 -l=n maximum line length; default n=80 2891 -bl opening brace on new line 2892 -sbl opening sub brace on new line. value of -bl is used if not given. 2893 -bli opening brace on new line and indented 2894 -bar opening brace always on right, even for long clauses 2895 -vt=n vertical tightness (requires -lp); n controls break after opening 2896 token: 0=never 1=no break if next line balanced 2=no break 2897 -vtc=n vertical tightness of closing container; n controls if closing 2898 token starts new line: 0=always 1=not unless list 1=never 2899 -wba=s want break after tokens in string; i.e. wba=': .' 2900 -wbb=s want break before tokens in string 2901 2902Following Old Breakpoints 2903 -kis keep interior semicolons. Allows multiple statements per line. 2904 -boc break at old comma breaks: turns off all automatic list formatting 2905 -bol break at old logical breakpoints: or, and, ||, && (default) 2906 -bok break at old list keyword breakpoints such as map, sort (default) 2907 -bot break at old conditional (ternary ?:) operator breakpoints (default) 2908 -cab=n break at commas after a comma-arrow (=>): 2909 n=0 break at all commas after => 2910 n=1 stable: break unless this breaks an existing one-line container 2911 n=2 break only if a one-line container cannot be formed 2912 n=3 do not treat commas after => specially at all 2913 2914Comment controls 2915 -ibc indent block comments (default) 2916 -isbc indent spaced block comments; may indent unless no leading space 2917 -msc=n minimum desired spaces to side comment, default 4 2918 -fpsc=n fix position for side comments; default 0; 2919 -csc add or update closing side comments after closing BLOCK brace 2920 -dcsc delete closing side comments created by a -csc command 2921 -cscp=s change closing side comment prefix to be other than '## end' 2922 -cscl=s change closing side comment to apply to selected list of blocks 2923 -csci=n minimum number of lines needed to apply a -csc tag, default n=6 2924 -csct=n maximum number of columns of appended text, default n=20 2925 -cscw causes warning if old side comment is overwritten with -csc 2926 2927 -sbc use 'static block comments' identified by leading '##' (default) 2928 -sbcp=s change static block comment identifier to be other than '##' 2929 -osbc outdent static block comments 2930 2931 -ssc use 'static side comments' identified by leading '##' (default) 2932 -sscp=s change static side comment identifier to be other than '##' 2933 2934Delete selected text 2935 -dac delete all comments AND pod 2936 -dbc delete block comments 2937 -dsc delete side comments 2938 -dp delete pod 2939 2940Send selected text to a '.TEE' file 2941 -tac tee all comments AND pod 2942 -tbc tee block comments 2943 -tsc tee side comments 2944 -tp tee pod 2945 2946Outdenting 2947 -olq outdent long quoted strings (default) 2948 -olc outdent a long block comment line 2949 -ola outdent statement labels 2950 -okw outdent control keywords (redo, next, last, goto, return) 2951 -okwl=s specify alternative keywords for -okw command 2952 2953Other controls 2954 -mft=n maximum fields per table; default n=40 2955 -x do not format lines before hash-bang line (i.e., for VMS) 2956 -asc allows perltidy to add a ';' when missing (default) 2957 -dsm allows perltidy to delete an unnecessary ';' (default) 2958 2959Combinations of other parameters 2960 -gnu attempt to follow GNU Coding Standards as applied to perl 2961 -mangle remove as many newlines as possible (but keep comments and pods) 2962 -extrude insert as many newlines as possible 2963 2964Dump and die, debugging 2965 -dop dump options used in this run to standard output and quit 2966 -ddf dump default options to standard output and quit 2967 -dsn dump all option short names to standard output and quit 2968 -dln dump option long names to standard output and quit 2969 -dpro dump whatever configuration file is in effect to standard output 2970 -dtt dump all token types to standard output and quit 2971 2972HTML 2973 -html write an html file (see 'man perl2web' for many options) 2974 Note: when -html is used, no indentation or formatting are done. 2975 Hint: try perltidy -html -css=mystyle.css filename.pl 2976 and edit mystyle.css to change the appearance of filename.html. 2977 -nnn gives line numbers 2978 -pre only writes out <pre>..</pre> code section 2979 -toc places a table of contents to subs at the top (default) 2980 -pod passes pod text through pod2html (default) 2981 -frm write html as a frame (3 files) 2982 -text=s extra extension for table of contents if -frm, default='toc' 2983 -sext=s extra extension for file content if -frm, default='src' 2984 2985A prefix of "n" negates short form toggle switches, and a prefix of "no" 2986negates the long forms. For example, -nasc means don't add missing 2987semicolons. 2988 2989If you are unable to see this entire text, try "perltidy -h | more" 2990For more detailed information, and additional options, try "man perltidy", 2991or go to the perltidy home page at http://perltidy.sourceforge.net 2992EOF 2993 2994} 2995 2996sub process_this_file { 2997 2998 my ( $truth, $beauty ) = @_; 2999 3000 # loop to process each line of this file 3001 while ( my $line_of_tokens = $truth->get_line() ) { 3002 $beauty->write_line($line_of_tokens); 3003 } 3004 3005 # finish up 3006 eval { $beauty->finish_formatting() }; 3007 $truth->report_tokenization_errors(); 3008} 3009 3010sub check_syntax { 3011 3012 # Use 'perl -c' to make sure that we did not create bad syntax 3013 # This is a very good independent check for programming errors 3014 # 3015 # Given names of the input and output files, ($ifname, $ofname), 3016 # we do the following: 3017 # - check syntax of the input file 3018 # - if bad, all done (could be an incomplete code snippet) 3019 # - if infile syntax ok, then check syntax of the output file; 3020 # - if outfile syntax bad, issue warning; this implies a code bug! 3021 # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good 3022 3023 my ( $ifname, $ofname, $logger_object, $rOpts ) = @_; 3024 my $infile_syntax_ok = 0; 3025 my $line_of_dashes = '-' x 42 . "\n"; 3026 3027 my $flags = $rOpts->{'perl-syntax-check-flags'}; 3028 3029 # be sure we invoke perl with -c 3030 # note: perl will accept repeated flags like '-c -c'. It is safest 3031 # to append another -c than try to find an interior bundled c, as 3032 # in -Tc, because such a 'c' might be in a quoted string, for example. 3033 if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" } 3034 3035 # be sure we invoke perl with -x if requested 3036 # same comments about repeated parameters applies 3037 if ( $rOpts->{'look-for-hash-bang'} ) { 3038 if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" } 3039 } 3040 3041 # this shouldn't happen unless a termporary file couldn't be made 3042 if ( $ifname eq '-' ) { 3043 $logger_object->write_logfile_entry( 3044 "Cannot run perl -c on STDIN and STDOUT\n"); 3045 return $infile_syntax_ok; 3046 } 3047 3048 $logger_object->write_logfile_entry( 3049 "checking input file syntax with perl $flags\n"); 3050 $logger_object->write_logfile_entry($line_of_dashes); 3051 3052 # Not all operating systems/shells support redirection of the standard 3053 # error output. 3054 my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1'; 3055 3056 my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection ); 3057 $logger_object->write_logfile_entry("$perl_output\n"); 3058 3059 if ( $perl_output =~ /syntax\s*OK/ ) { 3060 $infile_syntax_ok = 1; 3061 $logger_object->write_logfile_entry($line_of_dashes); 3062 $logger_object->write_logfile_entry( 3063 "checking output file syntax with perl $flags ...\n"); 3064 $logger_object->write_logfile_entry($line_of_dashes); 3065 3066 my $perl_output = 3067 do_syntax_check( $ofname, $flags, $error_redirection ); 3068 $logger_object->write_logfile_entry("$perl_output\n"); 3069 3070 unless ( $perl_output =~ /syntax\s*OK/ ) { 3071 $logger_object->write_logfile_entry($line_of_dashes); 3072 $logger_object->warning( 3073"The output file has a syntax error when tested with perl $flags $ofname !\n" 3074 ); 3075 $logger_object->warning( 3076 "This implies an error in perltidy; the file $ofname is bad\n"); 3077 $logger_object->report_definite_bug(); 3078 3079 # the perl version number will be helpful for diagnosing the problem 3080 $logger_object->write_logfile_entry( 3081 qx/perl -v $error_redirection/ . "\n" ); 3082 } 3083 } 3084 else { 3085 3086 # Only warn of perl -c syntax errors. Other messages, 3087 # such as missing modules, are too common. They can be 3088 # seen by running with perltidy -w 3089 $logger_object->complain("A syntax check using perl $flags gives: \n"); 3090 $logger_object->complain($line_of_dashes); 3091 $logger_object->complain("$perl_output\n"); 3092 $logger_object->complain($line_of_dashes); 3093 $infile_syntax_ok = -1; 3094 $logger_object->write_logfile_entry($line_of_dashes); 3095 $logger_object->write_logfile_entry( 3096"The output file will not be checked because of input file problems\n" 3097 ); 3098 } 3099 return $infile_syntax_ok; 3100} 3101 3102sub do_syntax_check { 3103 my ( $fname, $flags, $error_redirection ) = @_; 3104 3105 # We have to quote the filename in case it has unusual characters 3106 # or spaces. Example: this filename #CM11.pm# gives trouble. 3107 $fname = '"' . $fname . '"'; 3108 3109 # Under VMS something like -T will become -t (and an error) so we 3110 # will put quotes around the flags. Double quotes seem to work on 3111 # Unix/Windows/VMS, but this may not work on all systems. (Single 3112 # quotes do not work under Windows). It could become necessary to 3113 # put double quotes around each flag, such as: -"c" -"T" 3114 # We may eventually need some system-dependent coding here. 3115 $flags = '"' . $flags . '"'; 3116 3117 # now wish for luck... 3118 return qx/perl $flags $fname $error_redirection/; 3119} 3120 3121##################################################################### 3122# 3123# This is a stripped down version of IO::Scalar 3124# Given a reference to a scalar, it supplies either: 3125# a getline method which reads lines (mode='r'), or 3126# a print method which reads lines (mode='w') 3127# 3128##################################################################### 3129package Perl::Tidy::IOScalar; 3130use Carp; 3131 3132sub new { 3133 my ( $package, $rscalar, $mode ) = @_; 3134 my $ref = ref $rscalar; 3135 if ( $ref ne 'SCALAR' ) { 3136 confess <<EOM; 3137------------------------------------------------------------------------ 3138expecting ref to SCALAR but got ref to ($ref); trace follows: 3139------------------------------------------------------------------------ 3140EOM 3141 3142 } 3143 if ( $mode eq 'w' ) { 3144 $$rscalar = ""; 3145 return bless [ $rscalar, $mode ], $package; 3146 } 3147 elsif ( $mode eq 'r' ) { 3148 3149 # Convert a scalar to an array. 3150 # This avoids looking for "\n" on each call to getline 3151 my @array = map { $_ .= "\n" } split /\n/, ${$rscalar}; 3152 my $i_next = 0; 3153 return bless [ \@array, $mode, $i_next ], $package; 3154 } 3155 else { 3156 confess <<EOM; 3157------------------------------------------------------------------------ 3158expecting mode = 'r' or 'w' but got mode ($mode); trace follows: 3159------------------------------------------------------------------------ 3160EOM 3161 } 3162} 3163 3164sub getline { 3165 my $self = shift; 3166 my $mode = $self->[1]; 3167 if ( $mode ne 'r' ) { 3168 confess <<EOM; 3169------------------------------------------------------------------------ 3170getline call requires mode = 'r' but mode = ($mode); trace follows: 3171------------------------------------------------------------------------ 3172EOM 3173 } 3174 my $i = $self->[2]++; 3175 ##my $line = $self->[0]->[$i]; 3176 return $self->[0]->[$i]; 3177} 3178 3179sub print { 3180 my $self = shift; 3181 my $mode = $self->[1]; 3182 if ( $mode ne 'w' ) { 3183 confess <<EOM; 3184------------------------------------------------------------------------ 3185print call requires mode = 'w' but mode = ($mode); trace follows: 3186------------------------------------------------------------------------ 3187EOM 3188 } 3189 ${ $self->[0] } .= $_[0]; 3190} 3191sub close { return } 3192 3193##################################################################### 3194# 3195# This is a stripped down version of IO::ScalarArray 3196# Given a reference to an array, it supplies either: 3197# a getline method which reads lines (mode='r'), or 3198# a print method which reads lines (mode='w') 3199# 3200# NOTE: this routine assumes that that there aren't any embedded 3201# newlines within any of the array elements. There are no checks 3202# for that. 3203# 3204##################################################################### 3205package Perl::Tidy::IOScalarArray; 3206use Carp; 3207 3208sub new { 3209 my ( $package, $rarray, $mode ) = @_; 3210 my $ref = ref $rarray; 3211 if ( $ref ne 'ARRAY' ) { 3212 confess <<EOM; 3213------------------------------------------------------------------------ 3214expecting ref to ARRAY but got ref to ($ref); trace follows: 3215------------------------------------------------------------------------ 3216EOM 3217 3218 } 3219 if ( $mode eq 'w' ) { 3220 @$rarray = (); 3221 return bless [ $rarray, $mode ], $package; 3222 } 3223 elsif ( $mode eq 'r' ) { 3224 my $i_next = 0; 3225 return bless [ $rarray, $mode, $i_next ], $package; 3226 } 3227 else { 3228 confess <<EOM; 3229------------------------------------------------------------------------ 3230expecting mode = 'r' or 'w' but got mode ($mode); trace follows: 3231------------------------------------------------------------------------ 3232EOM 3233 } 3234} 3235 3236sub getline { 3237 my $self = shift; 3238 my $mode = $self->[1]; 3239 if ( $mode ne 'r' ) { 3240 confess <<EOM; 3241------------------------------------------------------------------------ 3242getline requires mode = 'r' but mode = ($mode); trace follows: 3243------------------------------------------------------------------------ 3244EOM 3245 } 3246 my $i = $self->[2]++; 3247 return $self->[0]->[$i]; 3248} 3249 3250sub print { 3251 my $self = shift; 3252 my $mode = $self->[1]; 3253 if ( $mode ne 'w' ) { 3254 confess <<EOM; 3255------------------------------------------------------------------------ 3256print requires mode = 'w' but mode = ($mode); trace follows: 3257------------------------------------------------------------------------ 3258EOM 3259 } 3260 push @{ $self->[0] }, $_[0]; 3261} 3262sub close { return } 3263 3264##################################################################### 3265# 3266# the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method 3267# which returns the next line to be parsed 3268# 3269##################################################################### 3270 3271package Perl::Tidy::LineSource; 3272 3273sub new { 3274 3275 my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_; 3276 my $input_file_copy = undef; 3277 my $fh_copy; 3278 3279 my $input_line_ending; 3280 if ( $rOpts->{'preserve-line-endings'} ) { 3281 $input_line_ending = Perl::Tidy::find_input_line_ending($input_file); 3282 } 3283 3284 ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' ); 3285 return undef unless $fh; 3286 3287 # in order to check output syntax when standard output is used, 3288 # or when it is an object, we have to make a copy of the file 3289 if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} ) 3290 { 3291 3292 # Turning off syntax check when input output is used. 3293 # The reason is that temporary files cause problems on 3294 # on many systems. 3295 $rOpts->{'check-syntax'} = 0; 3296 $input_file_copy = '-'; 3297 3298 $$rpending_logfile_message .= <<EOM; 3299Note: --syntax check will be skipped because standard input is used 3300EOM 3301 3302 } 3303 3304 return bless { 3305 _fh => $fh, 3306 _fh_copy => $fh_copy, 3307 _filename => $input_file, 3308 _input_file_copy => $input_file_copy, 3309 _input_line_ending => $input_line_ending, 3310 _rinput_buffer => [], 3311 _started => 0, 3312 }, $class; 3313} 3314 3315sub get_input_file_copy_name { 3316 my $self = shift; 3317 my $ifname = $self->{_input_file_copy}; 3318 unless ($ifname) { 3319 $ifname = $self->{_filename}; 3320 } 3321 return $ifname; 3322} 3323 3324sub close_input_file { 3325 my $self = shift; 3326 eval { $self->{_fh}->close() }; 3327 eval { $self->{_fh_copy}->close() } if $self->{_fh_copy}; 3328} 3329 3330sub get_line { 3331 my $self = shift; 3332 my $line = undef; 3333 my $fh = $self->{_fh}; 3334 my $fh_copy = $self->{_fh_copy}; 3335 my $rinput_buffer = $self->{_rinput_buffer}; 3336 3337 if ( scalar(@$rinput_buffer) ) { 3338 $line = shift @$rinput_buffer; 3339 } 3340 else { 3341 $line = $fh->getline(); 3342 3343 # patch to read raw mac files under unix, dos 3344 # see if the first line has embedded \r's 3345 if ( $line && !$self->{_started} ) { 3346 if ( $line =~ /[\015][^\015\012]/ ) { 3347 3348 # found one -- break the line up and store in a buffer 3349 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line; 3350 my $count = @$rinput_buffer; 3351 $line = shift @$rinput_buffer; 3352 } 3353 $self->{_started}++; 3354 } 3355 } 3356 if ( $line && $fh_copy ) { $fh_copy->print($line); } 3357 return $line; 3358} 3359 3360##################################################################### 3361# 3362# the Perl::Tidy::LineSink class supplies a write_line method for 3363# actual file writing 3364# 3365##################################################################### 3366 3367package Perl::Tidy::LineSink; 3368 3369sub new { 3370 3371 my ( $class, $output_file, $tee_file, $line_separator, $rOpts, 3372 $rpending_logfile_message, $binmode ) 3373 = @_; 3374 my $fh = undef; 3375 my $fh_copy = undef; 3376 my $fh_tee = undef; 3377 my $output_file_copy = ""; 3378 my $output_file_open = 0; 3379 3380 if ( $rOpts->{'format'} eq 'tidy' ) { 3381 ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' ); 3382 unless ($fh) { die "Cannot write to output stream\n"; } 3383 $output_file_open = 1; 3384 if ($binmode) { 3385 if ( ref($fh) eq 'IO::File' ) { 3386 binmode $fh; 3387 } 3388 if ( $output_file eq '-' ) { binmode STDOUT } 3389 } 3390 } 3391 3392 # in order to check output syntax when standard output is used, 3393 # or when it is an object, we have to make a copy of the file 3394 if ( $output_file eq '-' || ref $output_file ) { 3395 if ( $rOpts->{'check-syntax'} ) { 3396 3397 # Turning off syntax check when standard output is used. 3398 # The reason is that temporary files cause problems on 3399 # on many systems. 3400 $rOpts->{'check-syntax'} = 0; 3401 $output_file_copy = '-'; 3402 $$rpending_logfile_message .= <<EOM; 3403Note: --syntax check will be skipped because standard output is used 3404EOM 3405 3406 } 3407 } 3408 3409 bless { 3410 _fh => $fh, 3411 _fh_copy => $fh_copy, 3412 _fh_tee => $fh_tee, 3413 _output_file => $output_file, 3414 _output_file_open => $output_file_open, 3415 _output_file_copy => $output_file_copy, 3416 _tee_flag => 0, 3417 _tee_file => $tee_file, 3418 _tee_file_opened => 0, 3419 _line_separator => $line_separator, 3420 _binmode => $binmode, 3421 }, $class; 3422} 3423 3424sub write_line { 3425 3426 my $self = shift; 3427 my $fh = $self->{_fh}; 3428 my $fh_copy = $self->{_fh_copy}; 3429 3430 my $output_file_open = $self->{_output_file_open}; 3431 chomp $_[0]; 3432 $_[0] .= $self->{_line_separator}; 3433 3434 $fh->print( $_[0] ) if ( $self->{_output_file_open} ); 3435 print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} ); 3436 3437 if ( $self->{_tee_flag} ) { 3438 unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() } 3439 my $fh_tee = $self->{_fh_tee}; 3440 print $fh_tee $_[0]; 3441 } 3442} 3443 3444sub get_output_file_copy { 3445 my $self = shift; 3446 my $ofname = $self->{_output_file_copy}; 3447 unless ($ofname) { 3448 $ofname = $self->{_output_file}; 3449 } 3450 return $ofname; 3451} 3452 3453sub tee_on { 3454 my $self = shift; 3455 $self->{_tee_flag} = 1; 3456} 3457 3458sub tee_off { 3459 my $self = shift; 3460 $self->{_tee_flag} = 0; 3461} 3462 3463sub really_open_tee_file { 3464 my $self = shift; 3465 my $tee_file = $self->{_tee_file}; 3466 my $fh_tee; 3467 $fh_tee = IO::File->new(">$tee_file") 3468 or die("couldn't open TEE file $tee_file: $!\n"); 3469 binmode $fh_tee if $self->{_binmode}; 3470 $self->{_tee_file_opened} = 1; 3471 $self->{_fh_tee} = $fh_tee; 3472} 3473 3474sub close_output_file { 3475 my $self = shift; 3476 eval { $self->{_fh}->close() } if $self->{_output_file_open}; 3477 eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} ); 3478 $self->close_tee_file(); 3479} 3480 3481sub close_tee_file { 3482 my $self = shift; 3483 3484 if ( $self->{_tee_file_opened} ) { 3485 eval { $self->{_fh_tee}->close() }; 3486 $self->{_tee_file_opened} = 0; 3487 } 3488} 3489 3490##################################################################### 3491# 3492# The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is 3493# useful for program development. 3494# 3495# Only one such file is created regardless of the number of input 3496# files processed. This allows the results of processing many files 3497# to be summarized in a single file. 3498# 3499##################################################################### 3500 3501package Perl::Tidy::Diagnostics; 3502 3503sub new { 3504 3505 my $class = shift; 3506 bless { 3507 _write_diagnostics_count => 0, 3508 _last_diagnostic_file => "", 3509 _input_file => "", 3510 _fh => undef, 3511 }, $class; 3512} 3513 3514sub set_input_file { 3515 my $self = shift; 3516 $self->{_input_file} = $_[0]; 3517} 3518 3519# This is a diagnostic routine which is useful for program development. 3520# Output from debug messages go to a file named DIAGNOSTICS, where 3521# they are labeled by file and line. This allows many files to be 3522# scanned at once for some particular condition of interest. 3523sub write_diagnostics { 3524 my $self = shift; 3525 3526 unless ( $self->{_write_diagnostics_count} ) { 3527 open DIAGNOSTICS, ">DIAGNOSTICS" 3528 or death("couldn't open DIAGNOSTICS: $!\n"); 3529 } 3530 3531 my $last_diagnostic_file = $self->{_last_diagnostic_file}; 3532 my $input_file = $self->{_input_file}; 3533 if ( $last_diagnostic_file ne $input_file ) { 3534 print DIAGNOSTICS "\nFILE:$input_file\n"; 3535 } 3536 $self->{_last_diagnostic_file} = $input_file; 3537 my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); 3538 print DIAGNOSTICS "$input_line_number:\t@_"; 3539 $self->{_write_diagnostics_count}++; 3540} 3541 3542##################################################################### 3543# 3544# The Perl::Tidy::Logger class writes the .LOG and .ERR files 3545# 3546##################################################################### 3547 3548package Perl::Tidy::Logger; 3549 3550sub new { 3551 my $class = shift; 3552 my $fh; 3553 my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_; 3554 3555 # remove any old error output file 3556 unless ( ref($warning_file) ) { 3557 if ( -e $warning_file ) { unlink($warning_file) } 3558 } 3559 3560 bless { 3561 _log_file => $log_file, 3562 _fh_warnings => undef, 3563 _rOpts => $rOpts, 3564 _fh_warnings => undef, 3565 _last_input_line_written => 0, 3566 _at_end_of_file => 0, 3567 _use_prefix => 1, 3568 _block_log_output => 0, 3569 _line_of_tokens => undef, 3570 _output_line_number => undef, 3571 _wrote_line_information_string => 0, 3572 _wrote_column_headings => 0, 3573 _warning_file => $warning_file, 3574 _warning_count => 0, 3575 _complaint_count => 0, 3576 _saw_code_bug => -1, # -1=no 0=maybe 1=for sure 3577 _saw_brace_error => 0, 3578 _saw_extrude => $saw_extrude, 3579 _output_array => [], 3580 }, $class; 3581} 3582 3583sub close_log_file { 3584 3585 my $self = shift; 3586 if ( $self->{_fh_warnings} ) { 3587 eval { $self->{_fh_warnings}->close() }; 3588 $self->{_fh_warnings} = undef; 3589 } 3590} 3591 3592sub get_warning_count { 3593 my $self = shift; 3594 return $self->{_warning_count}; 3595} 3596 3597sub get_use_prefix { 3598 my $self = shift; 3599 return $self->{_use_prefix}; 3600} 3601 3602sub block_log_output { 3603 my $self = shift; 3604 $self->{_block_log_output} = 1; 3605} 3606 3607sub unblock_log_output { 3608 my $self = shift; 3609 $self->{_block_log_output} = 0; 3610} 3611 3612sub interrupt_logfile { 3613 my $self = shift; 3614 $self->{_use_prefix} = 0; 3615 $self->warning("\n"); 3616 $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" ); 3617} 3618 3619sub resume_logfile { 3620 my $self = shift; 3621 $self->write_logfile_entry( '#' x 60 . "\n" ); 3622 $self->{_use_prefix} = 1; 3623} 3624 3625sub we_are_at_the_last_line { 3626 my $self = shift; 3627 unless ( $self->{_wrote_line_information_string} ) { 3628 $self->write_logfile_entry("Last line\n\n"); 3629 } 3630 $self->{_at_end_of_file} = 1; 3631} 3632 3633# record some stuff in case we go down in flames 3634sub black_box { 3635 my $self = shift; 3636 my ( $line_of_tokens, $output_line_number ) = @_; 3637 my $input_line = $line_of_tokens->{_line_text}; 3638 my $input_line_number = $line_of_tokens->{_line_number}; 3639 3640 # save line information in case we have to write a logfile message 3641 $self->{_line_of_tokens} = $line_of_tokens; 3642 $self->{_output_line_number} = $output_line_number; 3643 $self->{_wrote_line_information_string} = 0; 3644 3645 my $last_input_line_written = $self->{_last_input_line_written}; 3646 my $rOpts = $self->{_rOpts}; 3647 if ( 3648 ( 3649 ( $input_line_number - $last_input_line_written ) >= 3650 $rOpts->{'logfile-gap'} 3651 ) 3652 || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) 3653 ) 3654 { 3655 my $rlevels = $line_of_tokens->{_rlevels}; 3656 my $structural_indentation_level = $$rlevels[0]; 3657 $self->{_last_input_line_written} = $input_line_number; 3658 ( my $out_str = $input_line ) =~ s/^\s*//; 3659 chomp $out_str; 3660 3661 $out_str = ( '.' x $structural_indentation_level ) . $out_str; 3662 3663 if ( length($out_str) > 35 ) { 3664 $out_str = substr( $out_str, 0, 35 ) . " ...."; 3665 } 3666 $self->logfile_output( "", "$out_str\n" ); 3667 } 3668} 3669 3670sub write_logfile_entry { 3671 my $self = shift; 3672 3673 # add leading >>> to avoid confusing error mesages and code 3674 $self->logfile_output( ">>>", "@_" ); 3675} 3676 3677sub write_column_headings { 3678 my $self = shift; 3679 3680 $self->{_wrote_column_headings} = 1; 3681 my $routput_array = $self->{_output_array}; 3682 push @{$routput_array}, <<EOM; 3683The nesting depths in the table below are at the start of the lines. 3684The indicated output line numbers are not always exact. 3685ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not. 3686 3687in:out indent c b nesting code + messages; (messages begin with >>>) 3688lines levels i k (code begins with one '.' per indent level) 3689------ ----- - - -------- ------------------------------------------- 3690EOM 3691} 3692 3693sub make_line_information_string { 3694 3695 # make columns of information when a logfile message needs to go out 3696 my $self = shift; 3697 my $line_of_tokens = $self->{_line_of_tokens}; 3698 my $input_line_number = $line_of_tokens->{_line_number}; 3699 my $line_information_string = ""; 3700 if ($input_line_number) { 3701 3702 my $output_line_number = $self->{_output_line_number}; 3703 my $brace_depth = $line_of_tokens->{_curly_brace_depth}; 3704 my $paren_depth = $line_of_tokens->{_paren_depth}; 3705 my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth}; 3706 my $python_indentation_level = 3707 $line_of_tokens->{_python_indentation_level}; 3708 my $rlevels = $line_of_tokens->{_rlevels}; 3709 my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; 3710 my $rci_levels = $line_of_tokens->{_rci_levels}; 3711 my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; 3712 3713 my $structural_indentation_level = $$rlevels[0]; 3714 3715 $self->write_column_headings() unless $self->{_wrote_column_headings}; 3716 3717 # keep logfile columns aligned for scripts up to 999 lines; 3718 # for longer scripts it doesn't really matter 3719 my $extra_space = ""; 3720 $extra_space .= 3721 ( $input_line_number < 10 ) ? " " 3722 : ( $input_line_number < 100 ) ? " " 3723 : ""; 3724 $extra_space .= 3725 ( $output_line_number < 10 ) ? " " 3726 : ( $output_line_number < 100 ) ? " " 3727 : ""; 3728 3729 # there are 2 possible nesting strings: 3730 # the original which looks like this: (0 [1 {2 3731 # the new one, which looks like this: {{[ 3732 # the new one is easier to read, and shows the order, but 3733 # could be arbitrarily long, so we use it unless it is too long 3734 my $nesting_string = 3735 "($paren_depth [$square_bracket_depth {$brace_depth"; 3736 my $nesting_string_new = $$rnesting_tokens[0]; 3737 3738 my $ci_level = $$rci_levels[0]; 3739 if ( $ci_level > 9 ) { $ci_level = '*' } 3740 my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0'; 3741 3742 if ( length($nesting_string_new) <= 8 ) { 3743 $nesting_string = 3744 $nesting_string_new . " " x ( 8 - length($nesting_string_new) ); 3745 } 3746 if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 } 3747 $line_information_string = 3748"L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; 3749 } 3750 return $line_information_string; 3751} 3752 3753sub logfile_output { 3754 my $self = shift; 3755 my ( $prompt, $msg ) = @_; 3756 return if ( $self->{_block_log_output} ); 3757 3758 my $routput_array = $self->{_output_array}; 3759 if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) { 3760 push @{$routput_array}, "$msg"; 3761 } 3762 else { 3763 my $line_information_string = $self->make_line_information_string(); 3764 $self->{_wrote_line_information_string} = 1; 3765 3766 if ($line_information_string) { 3767 push @{$routput_array}, "$line_information_string $prompt$msg"; 3768 } 3769 else { 3770 push @{$routput_array}, "$msg"; 3771 } 3772 } 3773} 3774 3775sub get_saw_brace_error { 3776 my $self = shift; 3777 return $self->{_saw_brace_error}; 3778} 3779 3780sub increment_brace_error { 3781 my $self = shift; 3782 $self->{_saw_brace_error}++; 3783} 3784 3785sub brace_warning { 3786 my $self = shift; 3787 use constant BRACE_WARNING_LIMIT => 10; 3788 my $saw_brace_error = $self->{_saw_brace_error}; 3789 3790 if ( $saw_brace_error < BRACE_WARNING_LIMIT ) { 3791 $self->warning(@_); 3792 } 3793 $saw_brace_error++; 3794 $self->{_saw_brace_error} = $saw_brace_error; 3795 3796 if ( $saw_brace_error == BRACE_WARNING_LIMIT ) { 3797 $self->warning("No further warnings of this type will be given\n"); 3798 } 3799} 3800 3801sub complain { 3802 3803 # handle non-critical warning messages based on input flag 3804 my $self = shift; 3805 my $rOpts = $self->{_rOpts}; 3806 3807 # these appear in .ERR output only if -w flag is used 3808 if ( $rOpts->{'warning-output'} ) { 3809 $self->warning(@_); 3810 } 3811 3812 # otherwise, they go to the .LOG file 3813 else { 3814 $self->{_complaint_count}++; 3815 $self->write_logfile_entry(@_); 3816 } 3817} 3818 3819sub warning { 3820 3821 # report errors to .ERR file (or stdout) 3822 my $self = shift; 3823 use constant WARNING_LIMIT => 50; 3824 3825 my $rOpts = $self->{_rOpts}; 3826 unless ( $rOpts->{'quiet'} ) { 3827 3828 my $warning_count = $self->{_warning_count}; 3829 unless ($warning_count) { 3830 my $warning_file = $self->{_warning_file}; 3831 my $fh_warnings; 3832 if ( $rOpts->{'standard-error-output'} ) { 3833 $fh_warnings = *STDERR; 3834 } 3835 else { 3836 ( $fh_warnings, my $filename ) = 3837 Perl::Tidy::streamhandle( $warning_file, 'w' ); 3838 $fh_warnings or die("couldn't open $filename $!\n"); 3839 warn "## Please see file $filename\n"; 3840 } 3841 $self->{_fh_warnings} = $fh_warnings; 3842 } 3843 3844 my $fh_warnings = $self->{_fh_warnings}; 3845 if ( $warning_count < WARNING_LIMIT ) { 3846 if ( $self->get_use_prefix() > 0 ) { 3847 my $input_line_number = 3848 Perl::Tidy::Tokenizer::get_input_line_number(); 3849 $fh_warnings->print("$input_line_number:\t@_"); 3850 $self->write_logfile_entry("WARNING: @_"); 3851 } 3852 else { 3853 $fh_warnings->print(@_); 3854 $self->write_logfile_entry(@_); 3855 } 3856 } 3857 $warning_count++; 3858 $self->{_warning_count} = $warning_count; 3859 3860 if ( $warning_count == WARNING_LIMIT ) { 3861 $fh_warnings->print("No further warnings will be given\n"); 3862 } 3863 } 3864} 3865 3866# programming bug codes: 3867# -1 = no bug 3868# 0 = maybe, not sure. 3869# 1 = definitely 3870sub report_possible_bug { 3871 my $self = shift; 3872 my $saw_code_bug = $self->{_saw_code_bug}; 3873 $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug; 3874} 3875 3876sub report_definite_bug { 3877 my $self = shift; 3878 $self->{_saw_code_bug} = 1; 3879} 3880 3881sub ask_user_for_bug_report { 3882 my $self = shift; 3883 3884 my ( $infile_syntax_ok, $formatter ) = @_; 3885 my $saw_code_bug = $self->{_saw_code_bug}; 3886 if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) { 3887 $self->warning(<<EOM); 3888 3889You may have encountered a code bug in perltidy. If you think so, and 3890the problem is not listed in the BUGS file at 3891http://perltidy.sourceforge.net, please report it so that it can be 3892corrected. Include the smallest possible script which has the problem, 3893along with the .LOG file. See the manual pages for contact information. 3894Thank you! 3895EOM 3896 3897 } 3898 elsif ( $saw_code_bug == 1 ) { 3899 if ( $self->{_saw_extrude} ) { 3900 $self->warning(<<EOM); 3901 3902You may have encountered a bug in perltidy. However, since you are using the 3903-extrude option, the problem may be with perl or one of its modules, which have 3904occasional problems with this type of file. If you believe that the 3905problem is with perltidy, and the problem is not listed in the BUGS file at 3906http://perltidy.sourceforge.net, please report it so that it can be corrected. 3907Include the smallest possible script which has the problem, along with the .LOG 3908file. See the manual pages for contact information. 3909Thank you! 3910EOM 3911 } 3912 else { 3913 $self->warning(<<EOM); 3914 3915Oops, you seem to have encountered a bug in perltidy. Please check the 3916BUGS file at http://perltidy.sourceforge.net. If the problem is not 3917listed there, please report it so that it can be corrected. Include the 3918smallest possible script which produces this message, along with the 3919.LOG file if appropriate. See the manual pages for contact information. 3920Your efforts are appreciated. 3921Thank you! 3922EOM 3923 my $added_semicolon_count = 0; 3924 eval { 3925 $added_semicolon_count = 3926 $formatter->get_added_semicolon_count(); 3927 }; 3928 if ( $added_semicolon_count > 0 ) { 3929 $self->warning(<<EOM); 3930 3931The log file shows that perltidy added $added_semicolon_count semicolons. 3932Please rerun with -nasc to see if that is the cause of the syntax error. Even 3933if that is the problem, please report it so that it can be fixed. 3934EOM 3935 3936 } 3937 } 3938 } 3939} 3940 3941sub finish { 3942 3943 # called after all formatting to summarize errors 3944 my $self = shift; 3945 my ( $infile_syntax_ok, $formatter ) = @_; 3946 3947 my $rOpts = $self->{_rOpts}; 3948 my $warning_count = $self->{_warning_count}; 3949 my $saw_code_bug = $self->{_saw_code_bug}; 3950 3951 my $save_logfile = 3952 ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) 3953 || $saw_code_bug == 1 3954 || $rOpts->{'logfile'}; 3955 my $log_file = $self->{_log_file}; 3956 if ($warning_count) { 3957 if ($save_logfile) { 3958 $self->block_log_output(); # avoid echoing this to the logfile 3959 $self->warning( 3960 "The logfile $log_file may contain useful information\n"); 3961 $self->unblock_log_output(); 3962 } 3963 3964 if ( $self->{_complaint_count} > 0 ) { 3965 $self->warning( 3966"To see $self->{_complaint_count} non-critical warnings rerun with -w\n" 3967 ); 3968 } 3969 3970 if ( $self->{_saw_brace_error} 3971 && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) ) 3972 { 3973 $self->warning("To save a full .LOG file rerun with -g\n"); 3974 } 3975 } 3976 $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter ); 3977 3978 if ($save_logfile) { 3979 my $log_file = $self->{_log_file}; 3980 my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' ); 3981 if ($fh) { 3982 my $routput_array = $self->{_output_array}; 3983 foreach ( @{$routput_array} ) { $fh->print($_) } 3984 eval { $fh->close() }; 3985 } 3986 } 3987} 3988 3989##################################################################### 3990# 3991# The Perl::Tidy::DevNull class supplies a dummy print method 3992# 3993##################################################################### 3994 3995package Perl::Tidy::DevNull; 3996sub new { return bless {}, $_[0] } 3997sub print { return } 3998sub close { return } 3999 4000##################################################################### 4001# 4002# The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html 4003# 4004##################################################################### 4005 4006package Perl::Tidy::HtmlWriter; 4007 4008use File::Basename; 4009 4010# class variables 4011use vars qw{ 4012 %html_color 4013 %html_bold 4014 %html_italic 4015 %token_short_names 4016 %short_to_long_names 4017 $rOpts 4018 $css_filename 4019 $css_linkname 4020 $missing_html_entities 4021}; 4022 4023# replace unsafe characters with HTML entity representation if HTML::Entities 4024# is available 4025{ eval "use HTML::Entities"; $missing_html_entities = $@; } 4026 4027sub new { 4028 4029 my ( $class, $input_file, $html_file, $extension, $html_toc_extension, 4030 $html_src_extension ) 4031 = @_; 4032 4033 my $html_file_opened = 0; 4034 my $html_fh; 4035 ( $html_fh, my $html_filename ) = 4036 Perl::Tidy::streamhandle( $html_file, 'w' ); 4037 unless ($html_fh) { 4038 warn("can't open $html_file: $!\n"); 4039 return undef; 4040 } 4041 $html_file_opened = 1; 4042 4043 if ( !$input_file || $input_file eq '-' || ref($input_file) ) { 4044 $input_file = "NONAME"; 4045 } 4046 4047 # write the table of contents to a string 4048 my $toc_string; 4049 my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' ); 4050 4051 my $html_pre_fh; 4052 my @pre_string_stack; 4053 if ( $rOpts->{'html-pre-only'} ) { 4054 4055 # pre section goes directly to the output stream 4056 $html_pre_fh = $html_fh; 4057 $html_pre_fh->print( <<"PRE_END"); 4058<pre> 4059PRE_END 4060 } 4061 else { 4062 4063 # pre section go out to a temporary string 4064 my $pre_string; 4065 $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); 4066 push @pre_string_stack, \$pre_string; 4067 } 4068 4069 # pod text gets diverted if the 'pod2html' is used 4070 my $html_pod_fh; 4071 my $pod_string; 4072 if ( $rOpts->{'pod2html'} ) { 4073 if ( $rOpts->{'html-pre-only'} ) { 4074 undef $rOpts->{'pod2html'}; 4075 } 4076 else { 4077 eval "use Pod::Html"; 4078 if ($@) { 4079 warn 4080"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n"; 4081 undef $rOpts->{'pod2html'}; 4082 } 4083 else { 4084 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' ); 4085 } 4086 } 4087 } 4088 4089 my $toc_filename; 4090 my $src_filename; 4091 if ( $rOpts->{'frames'} ) { 4092 unless ($extension) { 4093 warn 4094"cannot use frames without a specified output extension; ignoring -frm\n"; 4095 undef $rOpts->{'frames'}; 4096 } 4097 else { 4098 $toc_filename = $input_file . $html_toc_extension . $extension; 4099 $src_filename = $input_file . $html_src_extension . $extension; 4100 } 4101 } 4102 4103 # ---------------------------------------------------------- 4104 # Output is now directed as follows: 4105 # html_toc_fh <-- table of contents items 4106 # html_pre_fh <-- the <pre> section of formatted code, except: 4107 # html_pod_fh <-- pod goes here with the pod2html option 4108 # ---------------------------------------------------------- 4109 4110 my $title = $rOpts->{'title'}; 4111 unless ($title) { 4112 ( $title, my $path ) = fileparse($input_file); 4113 } 4114 my $toc_item_count = 0; 4115 my $in_toc_package = ""; 4116 my $last_level = 0; 4117 bless { 4118 _input_file => $input_file, # name of input file 4119 _title => $title, # title, unescaped 4120 _html_file => $html_file, # name of .html output file 4121 _toc_filename => $toc_filename, # for frames option 4122 _src_filename => $src_filename, # for frames option 4123 _html_file_opened => $html_file_opened, # a flag 4124 _html_fh => $html_fh, # the output stream 4125 _html_pre_fh => $html_pre_fh, # pre section goes here 4126 _rpre_string_stack => \@pre_string_stack, # stack of pre sections 4127 _html_pod_fh => $html_pod_fh, # pod goes here if pod2html 4128 _rpod_string => \$pod_string, # string holding pod 4129 _pod_cut_count => 0, # how many =cut's? 4130 _html_toc_fh => $html_toc_fh, # fh for table of contents 4131 _rtoc_string => \$toc_string, # string holding toc 4132 _rtoc_item_count => \$toc_item_count, # how many toc items 4133 _rin_toc_package => \$in_toc_package, # package name 4134 _rtoc_name_count => {}, # hash to track unique names 4135 _rpackage_stack => [], # stack to check for package 4136 # name changes 4137 _rlast_level => \$last_level, # brace indentation level 4138 }, $class; 4139} 4140 4141sub add_toc_item { 4142 4143 # Add an item to the html table of contents. 4144 # This is called even if no table of contents is written, 4145 # because we still want to put the anchors in the <pre> text. 4146 # We are given an anchor name and its type; types are: 4147 # 'package', 'sub', '__END__', '__DATA__', 'EOF' 4148 # There must be an 'EOF' call at the end to wrap things up. 4149 my $self = shift; 4150 my ( $name, $type ) = @_; 4151 my $html_toc_fh = $self->{_html_toc_fh}; 4152 my $html_pre_fh = $self->{_html_pre_fh}; 4153 my $rtoc_name_count = $self->{_rtoc_name_count}; 4154 my $rtoc_item_count = $self->{_rtoc_item_count}; 4155 my $rlast_level = $self->{_rlast_level}; 4156 my $rin_toc_package = $self->{_rin_toc_package}; 4157 my $rpackage_stack = $self->{_rpackage_stack}; 4158 4159 # packages contain sublists of subs, so to avoid errors all package 4160 # items are written and finished with the following routines 4161 my $end_package_list = sub { 4162 if ($$rin_toc_package) { 4163 $html_toc_fh->print("</ul>\n</li>\n"); 4164 $$rin_toc_package = ""; 4165 } 4166 }; 4167 4168 my $start_package_list = sub { 4169 my ( $unique_name, $package ) = @_; 4170 if ($$rin_toc_package) { $end_package_list->() } 4171 $html_toc_fh->print(<<EOM); 4172<li><a href=\"#$unique_name\">package $package</a> 4173<ul> 4174EOM 4175 $$rin_toc_package = $package; 4176 }; 4177 4178 # start the table of contents on the first item 4179 unless ($$rtoc_item_count) { 4180 4181 # but just quit if we hit EOF without any other entries 4182 # in this case, there will be no toc 4183 return if ( $type eq 'EOF' ); 4184 $html_toc_fh->print( <<"TOC_END"); 4185<!-- BEGIN CODE INDEX --><a name="code-index"></a> 4186<ul> 4187TOC_END 4188 } 4189 $$rtoc_item_count++; 4190 4191 # make a unique anchor name for this location: 4192 # - packages get a 'package-' prefix 4193 # - subs use their names 4194 my $unique_name = $name; 4195 if ( $type eq 'package' ) { $unique_name = "package-$name" } 4196 4197 # append '-1', '-2', etc if necessary to make unique; this will 4198 # be unique because subs and packages cannot have a '-' 4199 if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) { 4200 $unique_name .= "-$count"; 4201 } 4202 4203 # - all names get terminal '-' if pod2html is used, to avoid 4204 # conflicts with anchor names created by pod2html 4205 if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' } 4206 4207 # start/stop lists of subs 4208 if ( $type eq 'sub' ) { 4209 my $package = $rpackage_stack->[$$rlast_level]; 4210 unless ($package) { $package = 'main' } 4211 4212 # if we're already in a package/sub list, be sure its the right 4213 # package or else close it 4214 if ( $$rin_toc_package && $$rin_toc_package ne $package ) { 4215 $end_package_list->(); 4216 } 4217 4218 # start a package/sub list if necessary 4219 unless ($$rin_toc_package) { 4220 $start_package_list->( $unique_name, $package ); 4221 } 4222 } 4223 4224 # now write an entry in the toc for this item 4225 if ( $type eq 'package' ) { 4226 $start_package_list->( $unique_name, $name ); 4227 } 4228 elsif ( $type eq 'sub' ) { 4229 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n"); 4230 } 4231 else { 4232 $end_package_list->(); 4233 $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n"); 4234 } 4235 4236 # write the anchor in the <pre> section 4237 $html_pre_fh->print("<a name=\"$unique_name\"></a>"); 4238 4239 # end the table of contents, if any, on the end of file 4240 if ( $type eq 'EOF' ) { 4241 $html_toc_fh->print( <<"TOC_END"); 4242</ul> 4243<!-- END CODE INDEX --> 4244TOC_END 4245 } 4246} 4247 4248BEGIN { 4249 4250 # This is the official list of tokens which may be identified by the 4251 # user. Long names are used as getopt keys. Short names are 4252 # convenient short abbreviations for specifying input. Short names 4253 # somewhat resemble token type characters, but are often different 4254 # because they may only be alphanumeric, to allow command line 4255 # input. Also, note that because of case insensitivity of html, 4256 # this table must be in a single case only (I've chosen to use all 4257 # lower case). 4258 # When adding NEW_TOKENS: update this hash table 4259 # short names => long names 4260 %short_to_long_names = ( 4261 'n' => 'numeric', 4262 'p' => 'paren', 4263 'q' => 'quote', 4264 's' => 'structure', 4265 'c' => 'comment', 4266 'v' => 'v-string', 4267 'cm' => 'comma', 4268 'w' => 'bareword', 4269 'co' => 'colon', 4270 'pu' => 'punctuation', 4271 'i' => 'identifier', 4272 'j' => 'label', 4273 'h' => 'here-doc-target', 4274 'hh' => 'here-doc-text', 4275 'k' => 'keyword', 4276 'sc' => 'semicolon', 4277 'm' => 'subroutine', 4278 'pd' => 'pod-text', 4279 ); 4280 4281 # Now we have to map actual token types into one of the above short 4282 # names; any token types not mapped will get 'punctuation' 4283 # properties. 4284 4285 # The values of this hash table correspond to the keys of the 4286 # previous hash table. 4287 # The keys of this hash table are token types and can be seen 4288 # by running with --dump-token-types (-dtt). 4289 4290 # When adding NEW_TOKENS: update this hash table 4291 # $type => $short_name 4292 %token_short_names = ( 4293 '#' => 'c', 4294 'n' => 'n', 4295 'v' => 'v', 4296 'k' => 'k', 4297 'F' => 'k', 4298 'Q' => 'q', 4299 'q' => 'q', 4300 'J' => 'j', 4301 'j' => 'j', 4302 'h' => 'h', 4303 'H' => 'hh', 4304 'w' => 'w', 4305 ',' => 'cm', 4306 '=>' => 'cm', 4307 ';' => 'sc', 4308 ':' => 'co', 4309 'f' => 'sc', 4310 '(' => 'p', 4311 ')' => 'p', 4312 'M' => 'm', 4313 'P' => 'pd', 4314 'A' => 'co', 4315 ); 4316 4317 # These token types will all be called identifiers for now 4318 # FIXME: need to separate user defined modules as separate type 4319 my @identifier = qw" i t U C Y Z G :: "; 4320 @token_short_names{@identifier} = ('i') x scalar(@identifier); 4321 4322 # These token types will be called 'structure' 4323 my @structure = qw" { } "; 4324 @token_short_names{@structure} = ('s') x scalar(@structure); 4325 4326 # OLD NOTES: save for reference 4327 # Any of these could be added later if it would be useful. 4328 # For now, they will by default become punctuation 4329 # my @list = qw" L R [ ] "; 4330 # @token_long_names{@list} = ('non-structure') x scalar(@list); 4331 # 4332 # my @list = qw" 4333 # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm 4334 # "; 4335 # @token_long_names{@list} = ('math') x scalar(@list); 4336 # 4337 # my @list = qw" & &= ~ ~= ^ ^= | |= "; 4338 # @token_long_names{@list} = ('bit') x scalar(@list); 4339 # 4340 # my @list = qw" == != < > <= <=> "; 4341 # @token_long_names{@list} = ('numerical-comparison') x scalar(@list); 4342 # 4343 # my @list = qw" && || ! &&= ||= //= "; 4344 # @token_long_names{@list} = ('logical') x scalar(@list); 4345 # 4346 # my @list = qw" . .= =~ !~ x x= "; 4347 # @token_long_names{@list} = ('string-operators') x scalar(@list); 4348 # 4349 # # Incomplete.. 4350 # my @list = qw" .. -> <> ... \ ? "; 4351 # @token_long_names{@list} = ('misc-operators') x scalar(@list); 4352 4353} 4354 4355sub make_getopt_long_names { 4356 my $class = shift; 4357 my ($rgetopt_names) = @_; 4358 while ( my ( $short_name, $name ) = each %short_to_long_names ) { 4359 push @$rgetopt_names, "html-color-$name=s"; 4360 push @$rgetopt_names, "html-italic-$name!"; 4361 push @$rgetopt_names, "html-bold-$name!"; 4362 } 4363 push @$rgetopt_names, "html-color-background=s"; 4364 push @$rgetopt_names, "html-linked-style-sheet=s"; 4365 push @$rgetopt_names, "nohtml-style-sheets"; 4366 push @$rgetopt_names, "html-pre-only"; 4367 push @$rgetopt_names, "html-line-numbers"; 4368 push @$rgetopt_names, "html-entities!"; 4369 push @$rgetopt_names, "stylesheet"; 4370 push @$rgetopt_names, "html-table-of-contents!"; 4371 push @$rgetopt_names, "pod2html!"; 4372 push @$rgetopt_names, "frames!"; 4373 push @$rgetopt_names, "html-toc-extension=s"; 4374 push @$rgetopt_names, "html-src-extension=s"; 4375 4376 # Pod::Html parameters: 4377 push @$rgetopt_names, "backlink=s"; 4378 push @$rgetopt_names, "cachedir=s"; 4379 push @$rgetopt_names, "htmlroot=s"; 4380 push @$rgetopt_names, "libpods=s"; 4381 push @$rgetopt_names, "podpath=s"; 4382 push @$rgetopt_names, "podroot=s"; 4383 push @$rgetopt_names, "title=s"; 4384 4385 # Pod::Html parameters with leading 'pod' which will be removed 4386 # before the call to Pod::Html 4387 push @$rgetopt_names, "podquiet!"; 4388 push @$rgetopt_names, "podverbose!"; 4389 push @$rgetopt_names, "podrecurse!"; 4390 push @$rgetopt_names, "podflush"; 4391 push @$rgetopt_names, "podheader!"; 4392 push @$rgetopt_names, "podindex!"; 4393} 4394 4395sub make_abbreviated_names { 4396 4397 # We're appending things like this to the expansion list: 4398 # 'hcc' => [qw(html-color-comment)], 4399 # 'hck' => [qw(html-color-keyword)], 4400 # etc 4401 my $class = shift; 4402 my ($rexpansion) = @_; 4403 4404 # abbreviations for color/bold/italic properties 4405 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { 4406 ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"]; 4407 ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"]; 4408 ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"]; 4409 ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"]; 4410 ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"]; 4411 } 4412 4413 # abbreviations for all other html options 4414 ${$rexpansion}{"hcbg"} = ["html-color-background"]; 4415 ${$rexpansion}{"pre"} = ["html-pre-only"]; 4416 ${$rexpansion}{"toc"} = ["html-table-of-contents"]; 4417 ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"]; 4418 ${$rexpansion}{"nnn"} = ["html-line-numbers"]; 4419 ${$rexpansion}{"hent"} = ["html-entities"]; 4420 ${$rexpansion}{"nhent"} = ["nohtml-entities"]; 4421 ${$rexpansion}{"css"} = ["html-linked-style-sheet"]; 4422 ${$rexpansion}{"nss"} = ["nohtml-style-sheets"]; 4423 ${$rexpansion}{"ss"} = ["stylesheet"]; 4424 ${$rexpansion}{"pod"} = ["pod2html"]; 4425 ${$rexpansion}{"npod"} = ["nopod2html"]; 4426 ${$rexpansion}{"frm"} = ["frames"]; 4427 ${$rexpansion}{"nfrm"} = ["noframes"]; 4428 ${$rexpansion}{"text"} = ["html-toc-extension"]; 4429 ${$rexpansion}{"sext"} = ["html-src-extension"]; 4430} 4431 4432sub check_options { 4433 4434 # This will be called once after options have been parsed 4435 my $class = shift; 4436 $rOpts = shift; 4437 4438 # X11 color names for default settings that seemed to look ok 4439 # (these color names are only used for programming clarity; the hex 4440 # numbers are actually written) 4441 use constant ForestGreen => "#228B22"; 4442 use constant SaddleBrown => "#8B4513"; 4443 use constant magenta4 => "#8B008B"; 4444 use constant IndianRed3 => "#CD5555"; 4445 use constant DeepSkyBlue4 => "#00688B"; 4446 use constant MediumOrchid3 => "#B452CD"; 4447 use constant black => "#000000"; 4448 use constant white => "#FFFFFF"; 4449 use constant red => "#FF0000"; 4450 4451 # set default color, bold, italic properties 4452 # anything not listed here will be given the default (punctuation) color -- 4453 # these types currently not listed and get default: ws pu s sc cm co p 4454 # When adding NEW_TOKENS: add an entry here if you don't want defaults 4455 4456 # set_default_properties( $short_name, default_color, bold?, italic? ); 4457 set_default_properties( 'c', ForestGreen, 0, 0 ); 4458 set_default_properties( 'pd', ForestGreen, 0, 1 ); 4459 set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown 4460 set_default_properties( 'q', IndianRed3, 0, 0 ); 4461 set_default_properties( 'hh', IndianRed3, 0, 1 ); 4462 set_default_properties( 'h', IndianRed3, 1, 0 ); 4463 set_default_properties( 'i', DeepSkyBlue4, 0, 0 ); 4464 set_default_properties( 'w', black, 0, 0 ); 4465 set_default_properties( 'n', MediumOrchid3, 0, 0 ); 4466 set_default_properties( 'v', MediumOrchid3, 0, 0 ); 4467 set_default_properties( 'j', IndianRed3, 1, 0 ); 4468 set_default_properties( 'm', red, 1, 0 ); 4469 4470 set_default_color( 'html-color-background', white ); 4471 set_default_color( 'html-color-punctuation', black ); 4472 4473 # setup property lookup tables for tokens based on their short names 4474 # every token type has a short name, and will use these tables 4475 # to do the html markup 4476 while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { 4477 $html_color{$short_name} = $rOpts->{"html-color-$long_name"}; 4478 $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"}; 4479 $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"}; 4480 } 4481 4482 # write style sheet to STDOUT and die if requested 4483 if ( defined( $rOpts->{'stylesheet'} ) ) { 4484 write_style_sheet_file('-'); 4485 exit 1; 4486 } 4487 4488 # make sure user gives a file name after -css 4489 if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) { 4490 $css_linkname = $rOpts->{'html-linked-style-sheet'}; 4491 if ( $css_linkname =~ /^-/ ) { 4492 die "You must specify a valid filename after -css\n"; 4493 } 4494 } 4495 4496 # check for conflict 4497 if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) { 4498 $rOpts->{'nohtml-style-sheets'} = 0; 4499 warning("You can't specify both -css and -nss; -nss ignored\n"); 4500 } 4501 4502 # write a style sheet file if necessary 4503 if ($css_linkname) { 4504 4505 # if the selected filename exists, don't write, because user may 4506 # have done some work by hand to create it; use backup name instead 4507 # Also, this will avoid a potential disaster in which the user 4508 # forgets to specify the style sheet, like this: 4509 # perltidy -html -css myfile1.pl myfile2.pl 4510 # This would cause myfile1.pl to parsed as the style sheet by GetOpts 4511 my $css_filename = $css_linkname; 4512 unless ( -e $css_filename ) { 4513 write_style_sheet_file($css_filename); 4514 } 4515 } 4516 $missing_html_entities = 1 unless $rOpts->{'html-entities'}; 4517} 4518 4519sub write_style_sheet_file { 4520 4521 my $css_filename = shift; 4522 my $fh; 4523 unless ( $fh = IO::File->new("> $css_filename") ) { 4524 die "can't open $css_filename: $!\n"; 4525 } 4526 write_style_sheet_data($fh); 4527 eval { $fh->close }; 4528} 4529 4530sub write_style_sheet_data { 4531 4532 # write the style sheet data to an open file handle 4533 my $fh = shift; 4534 4535 my $bg_color = $rOpts->{'html-color-background'}; 4536 my $text_color = $rOpts->{'html-color-punctuation'}; 4537 4538 # pre-bgcolor is new, and may not be defined 4539 my $pre_bg_color = $rOpts->{'html-pre-color-background'}; 4540 $pre_bg_color = $bg_color unless $pre_bg_color; 4541 4542 $fh->print(<<"EOM"); 4543/* default style sheet generated by perltidy */ 4544body {background: $bg_color; color: $text_color} 4545pre { color: $text_color; 4546 background: $pre_bg_color; 4547 font-family: courier; 4548 } 4549 4550EOM 4551 4552 foreach my $short_name ( sort keys %short_to_long_names ) { 4553 my $long_name = $short_to_long_names{$short_name}; 4554 4555 my $abbrev = '.' . $short_name; 4556 if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment 4557 my $color = $html_color{$short_name}; 4558 if ( !defined($color) ) { $color = $text_color } 4559 $fh->print("$abbrev \{ color: $color;"); 4560 4561 if ( $html_bold{$short_name} ) { 4562 $fh->print(" font-weight:bold;"); 4563 } 4564 4565 if ( $html_italic{$short_name} ) { 4566 $fh->print(" font-style:italic;"); 4567 } 4568 $fh->print("} /* $long_name */\n"); 4569 } 4570} 4571 4572sub set_default_color { 4573 4574 # make sure that options hash $rOpts->{$key} contains a valid color 4575 my ( $key, $color ) = @_; 4576 if ( $rOpts->{$key} ) { $color = $rOpts->{$key} } 4577 $rOpts->{$key} = check_RGB($color); 4578} 4579 4580sub check_RGB { 4581 4582 # if color is a 6 digit hex RGB value, prepend a #, otherwise 4583 # assume that it is a valid ascii color name 4584 my ($color) = @_; 4585 if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" } 4586 return $color; 4587} 4588 4589sub set_default_properties { 4590 my ( $short_name, $color, $bold, $italic ) = @_; 4591 4592 set_default_color( "html-color-$short_to_long_names{$short_name}", $color ); 4593 my $key; 4594 $key = "html-bold-$short_to_long_names{$short_name}"; 4595 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold; 4596 $key = "html-italic-$short_to_long_names{$short_name}"; 4597 $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic; 4598} 4599 4600sub pod_to_html { 4601 4602 # Use Pod::Html to process the pod and make the page 4603 # then merge the perltidy code sections into it. 4604 # return 1 if success, 0 otherwise 4605 my $self = shift; 4606 my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_; 4607 my $input_file = $self->{_input_file}; 4608 my $title = $self->{_title}; 4609 my $success_flag = 0; 4610 4611 # don't try to use pod2html if no pod 4612 unless ($pod_string) { 4613 return $success_flag; 4614 } 4615 4616 # Pod::Html requires a real temporary filename 4617 # If we are making a frame, we have a name available 4618 # Otherwise, we have to fine one 4619 my $tmpfile; 4620 if ( $rOpts->{'frames'} ) { 4621 $tmpfile = $self->{_toc_filename}; 4622 } 4623 else { 4624 $tmpfile = Perl::Tidy::make_temporary_filename(); 4625 } 4626 my $fh_tmp = IO::File->new( $tmpfile, 'w' ); 4627 unless ($fh_tmp) { 4628 warn "unable to open temporary file $tmpfile; cannot use pod2html\n"; 4629 return $success_flag; 4630 } 4631 4632 #------------------------------------------------------------------ 4633 # Warning: a temporary file is open; we have to clean up if 4634 # things go bad. From here on all returns should be by going to 4635 # RETURN so that the temporary file gets unlinked. 4636 #------------------------------------------------------------------ 4637 4638 # write the pod text to the temporary file 4639 $fh_tmp->print($pod_string); 4640 $fh_tmp->close(); 4641 4642 # Hand off the pod to pod2html. 4643 # Note that we can use the same temporary filename for input and output 4644 # because of the way pod2html works. 4645 { 4646 4647 my @args; 4648 push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title"; 4649 my $kw; 4650 4651 # Flags with string args: 4652 # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s", 4653 # "podpath=s", "podroot=s" 4654 # Note: -css=s is handled by perltidy itself 4655 foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) { 4656 if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" } 4657 } 4658 4659 # Toggle switches; these have extra leading 'pod' 4660 # "header!", "index!", "recurse!", "quiet!", "verbose!" 4661 foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) { 4662 my $kwd = $kw; # allows us to strip 'pod' 4663 if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" } 4664 elsif ( defined( $rOpts->{$kw} ) ) { 4665 $kwd =~ s/^pod//; 4666 push @args, "--no$kwd"; 4667 } 4668 } 4669 4670 # "flush", 4671 $kw = 'podflush'; 4672 if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" } 4673 4674 # Must clean up if pod2html dies (it can); 4675 # Be careful not to overwrite callers __DIE__ routine 4676 local $SIG{__DIE__} = sub { 4677 print $_[0]; 4678 unlink $tmpfile if -e $tmpfile; 4679 exit 1; 4680 }; 4681 4682 pod2html(@args); 4683 } 4684 $fh_tmp = IO::File->new( $tmpfile, 'r' ); 4685 unless ($fh_tmp) { 4686 4687 # this error shouldn't happen ... we just used this filename 4688 warn "unable to open temporary file $tmpfile; cannot use pod2html\n"; 4689 goto RETURN; 4690 } 4691 4692 my $html_fh = $self->{_html_fh}; 4693 my @toc; 4694 my $in_toc; 4695 my $no_print; 4696 4697 # This routine will write the html selectively and store the toc 4698 my $html_print = sub { 4699 foreach (@_) { 4700 $html_fh->print($_) unless ($no_print); 4701 if ($in_toc) { push @toc, $_ } 4702 } 4703 }; 4704 4705 # loop over lines of html output from pod2html and merge in 4706 # the necessary perltidy html sections 4707 my ( $saw_body, $saw_index, $saw_body_end ); 4708 while ( my $line = $fh_tmp->getline() ) { 4709 4710 if ( $line =~ /^\s*<html>\s*$/i ) { 4711 my $date = localtime; 4712 $html_print->("<!-- Generated by perltidy on $date -->\n"); 4713 $html_print->($line); 4714 } 4715 4716 # Copy the perltidy css, if any, after <body> tag 4717 elsif ( $line =~ /^\s*<body.*>\s*$/i ) { 4718 $saw_body = 1; 4719 $html_print->($css_string) if $css_string; 4720 $html_print->($line); 4721 4722 # add a top anchor and heading 4723 $html_print->("<a name=\"-top-\"></a>\n"); 4724 $title = escape_html($title); 4725 $html_print->("<h1>$title</h1>\n"); 4726 } 4727 elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) { 4728 $in_toc = 1; 4729 4730 # when frames are used, an extra table of contents in the 4731 # contents panel is confusing, so don't print it 4732 $no_print = $rOpts->{'frames'} 4733 || !$rOpts->{'html-table-of-contents'}; 4734 $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'}; 4735 $html_print->($line); 4736 } 4737 4738 # Copy the perltidy toc, if any, after the Pod::Html toc 4739 elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) { 4740 $saw_index = 1; 4741 $html_print->($line); 4742 if ($toc_string) { 4743 $html_print->("<hr />\n") if $rOpts->{'frames'}; 4744 $html_print->("<h2>Code Index:</h2>\n"); 4745 my @toc = map { $_ .= "\n" } split /\n/, $toc_string; 4746 $html_print->(@toc); 4747 } 4748 $in_toc = 0; 4749 $no_print = 0; 4750 } 4751 4752 # Copy one perltidy section after each marker 4753 elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) { 4754 $line = $2; 4755 $html_print->($1) if $1; 4756 4757 # Intermingle code and pod sections if we saw multiple =cut's. 4758 if ( $self->{_pod_cut_count} > 1 ) { 4759 my $rpre_string = shift(@$rpre_string_stack); 4760 if ($$rpre_string) { 4761 $html_print->('<pre>'); 4762 $html_print->($$rpre_string); 4763 $html_print->('</pre>'); 4764 } 4765 else { 4766 4767 # shouldn't happen: we stored a string before writing 4768 # each marker. 4769 warn 4770"Problem merging html stream with pod2html; order may be wrong\n"; 4771 } 4772 $html_print->($line); 4773 } 4774 4775 # If didn't see multiple =cut lines, we'll put the pod out first 4776 # and then the code, because it's less confusing. 4777 else { 4778 4779 # since we are not intermixing code and pod, we don't need 4780 # or want any <hr> lines which separated pod and code 4781 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i ); 4782 } 4783 } 4784 4785 # Copy any remaining code section before the </body> tag 4786 elsif ( $line =~ /^\s*<\/body>\s*$/i ) { 4787 $saw_body_end = 1; 4788 if (@$rpre_string_stack) { 4789 unless ( $self->{_pod_cut_count} > 1 ) { 4790 $html_print->('<hr />'); 4791 } 4792 while ( my $rpre_string = shift(@$rpre_string_stack) ) { 4793 $html_print->('<pre>'); 4794 $html_print->($$rpre_string); 4795 $html_print->('</pre>'); 4796 } 4797 } 4798 $html_print->($line); 4799 } 4800 else { 4801 $html_print->($line); 4802 } 4803 } 4804 4805 $success_flag = 1; 4806 unless ($saw_body) { 4807 warn "Did not see <body> in pod2html output\n"; 4808 $success_flag = 0; 4809 } 4810 unless ($saw_body_end) { 4811 warn "Did not see </body> in pod2html output\n"; 4812 $success_flag = 0; 4813 } 4814 unless ($saw_index) { 4815 warn "Did not find INDEX END in pod2html output\n"; 4816 $success_flag = 0; 4817 } 4818 4819 RETURN: 4820 eval { $html_fh->close() }; 4821 4822 # note that we have to unlink tmpfile before making frames 4823 # because the tmpfile may be one of the names used for frames 4824 unlink $tmpfile if -e $tmpfile; 4825 if ( $success_flag && $rOpts->{'frames'} ) { 4826 $self->make_frame( \@toc ); 4827 } 4828 return $success_flag; 4829} 4830 4831sub make_frame { 4832 4833 # Make a frame with table of contents in the left panel 4834 # and the text in the right panel. 4835 # On entry: 4836 # $html_filename contains the no-frames html output 4837 # $rtoc is a reference to an array with the table of contents 4838 my $self = shift; 4839 my ($rtoc) = @_; 4840 my $input_file = $self->{_input_file}; 4841 my $html_filename = $self->{_html_file}; 4842 my $toc_filename = $self->{_toc_filename}; 4843 my $src_filename = $self->{_src_filename}; 4844 my $title = $self->{_title}; 4845 $title = escape_html($title); 4846 4847 # FUTURE input parameter: 4848 my $top_basename = ""; 4849 4850 # We need to produce 3 html files: 4851 # 1. - the table of contents 4852 # 2. - the contents (source code) itself 4853 # 3. - the frame which contains them 4854 4855 # get basenames for relative links 4856 my ( $toc_basename, $toc_path ) = fileparse($toc_filename); 4857 my ( $src_basename, $src_path ) = fileparse($src_filename); 4858 4859 # 1. Make the table of contents panel, with appropriate changes 4860 # to the anchor names 4861 my $src_frame_name = 'SRC'; 4862 my $first_anchor = 4863 write_toc_html( $title, $toc_filename, $src_basename, $rtoc, 4864 $src_frame_name ); 4865 4866 # 2. The current .html filename is renamed to be the contents panel 4867 rename( $html_filename, $src_filename ) 4868 or die "Cannot rename $html_filename to $src_filename:$!\n"; 4869 4870 # 3. Then use the original html filename for the frame 4871 write_frame_html( 4872 $title, $html_filename, $top_basename, 4873 $toc_basename, $src_basename, $src_frame_name 4874 ); 4875} 4876 4877sub write_toc_html { 4878 4879 # write a separate html table of contents file for frames 4880 my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_; 4881 my $fh = IO::File->new( $toc_filename, 'w' ) 4882 or die "Cannot open $toc_filename:$!\n"; 4883 $fh->print(<<EOM); 4884<html> 4885<head> 4886<title>$title</title> 4887</head> 4888<body> 4889<h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1> 4890EOM 4891 4892 my $first_anchor = 4893 change_anchor_names( $rtoc, $src_basename, "$src_frame_name" ); 4894 $fh->print( join "", @$rtoc ); 4895 4896 $fh->print(<<EOM); 4897</body> 4898</html> 4899EOM 4900 4901} 4902 4903sub write_frame_html { 4904 4905 # write an html file to be the table of contents frame 4906 my ( 4907 $title, $frame_filename, $top_basename, 4908 $toc_basename, $src_basename, $src_frame_name 4909 ) = @_; 4910 4911 my $fh = IO::File->new( $frame_filename, 'w' ) 4912 or die "Cannot open $toc_basename:$!\n"; 4913 4914 $fh->print(<<EOM); 4915<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" 4916 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> 4917<?xml version="1.0" encoding="iso-8859-1" ?> 4918<html xmlns="http://www.w3.org/1999/xhtml"> 4919<head> 4920<title>$title</title> 4921</head> 4922EOM 4923 4924 # two left panels, one right, if master index file 4925 if ($top_basename) { 4926 $fh->print(<<EOM); 4927<frameset cols="20%,80%"> 4928<frameset rows="30%,70%"> 4929<frame src = "$top_basename" /> 4930<frame src = "$toc_basename" /> 4931</frameset> 4932EOM 4933 } 4934 4935 # one left panels, one right, if no master index file 4936 else { 4937 $fh->print(<<EOM); 4938<frameset cols="20%,*"> 4939<frame src = "$toc_basename" /> 4940EOM 4941 } 4942 $fh->print(<<EOM); 4943<frame src = "$src_basename" name = "$src_frame_name" /> 4944<noframes> 4945<body> 4946<p>If you see this message, you are using a non-frame-capable web client.</p> 4947<p>This document contains:</p> 4948<ul> 4949<li><a href="$toc_basename">A table of contents</a></li> 4950<li><a href="$src_basename">The source code</a></li> 4951</ul> 4952</body> 4953</noframes> 4954</frameset> 4955</html> 4956EOM 4957} 4958 4959sub change_anchor_names { 4960 4961 # add a filename and target to anchors 4962 # also return the first anchor 4963 my ( $rlines, $filename, $target ) = @_; 4964 my $first_anchor; 4965 foreach my $line (@$rlines) { 4966 4967 # We're looking for lines like this: 4968 # <LI><A HREF="#synopsis">SYNOPSIS</A></LI> 4969 # ---- - -------- ----------------- 4970 # $1 $4 $5 4971 if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) { 4972 my $pre = $1; 4973 my $name = $4; 4974 my $post = $5; 4975 my $href = "$filename#$name"; 4976 $line = "$pre<a href=\"$href\" target=\"$target\">$post\n"; 4977 unless ($first_anchor) { $first_anchor = $href } 4978 } 4979 } 4980 return $first_anchor; 4981} 4982 4983sub close_html_file { 4984 my $self = shift; 4985 return unless $self->{_html_file_opened}; 4986 4987 my $html_fh = $self->{_html_fh}; 4988 my $rtoc_string = $self->{_rtoc_string}; 4989 4990 # There are 3 basic paths to html output... 4991 4992 # --------------------------------- 4993 # Path 1: finish up if in -pre mode 4994 # --------------------------------- 4995 if ( $rOpts->{'html-pre-only'} ) { 4996 $html_fh->print( <<"PRE_END"); 4997</pre> 4998PRE_END 4999 eval { $html_fh->close() }; 5000 return; 5001 } 5002 5003 # Finish the index 5004 $self->add_toc_item( 'EOF', 'EOF' ); 5005 5006 my $rpre_string_stack = $self->{_rpre_string_stack}; 5007 5008 # Patch to darken the <pre> background color in case of pod2html and 5009 # interleaved code/documentation. Otherwise, the distinction 5010 # between code and documentation is blurred. 5011 if ( $rOpts->{pod2html} 5012 && $self->{_pod_cut_count} >= 1 5013 && $rOpts->{'html-color-background'} eq '#FFFFFF' ) 5014 { 5015 $rOpts->{'html-pre-color-background'} = '#F0F0F0'; 5016 } 5017 5018 # put the css or its link into a string, if used 5019 my $css_string; 5020 my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' ); 5021 5022 # use css linked to another file 5023 if ( $rOpts->{'html-linked-style-sheet'} ) { 5024 $fh_css->print( 5025 qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />) 5026 ); 5027 } 5028 5029 # use css embedded in this file 5030 elsif ( !$rOpts->{'nohtml-style-sheets'} ) { 5031 $fh_css->print( <<'ENDCSS'); 5032<style type="text/css"> 5033<!-- 5034ENDCSS 5035 write_style_sheet_data($fh_css); 5036 $fh_css->print( <<"ENDCSS"); 5037--> 5038</style> 5039ENDCSS 5040 } 5041 5042 # ----------------------------------------------------------- 5043 # path 2: use pod2html if requested 5044 # If we fail for some reason, continue on to path 3 5045 # ----------------------------------------------------------- 5046 if ( $rOpts->{'pod2html'} ) { 5047 my $rpod_string = $self->{_rpod_string}; 5048 $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string, 5049 $rpre_string_stack ) 5050 && return; 5051 } 5052 5053 # -------------------------------------------------- 5054 # path 3: write code in html, with pod only in italics 5055 # -------------------------------------------------- 5056 my $input_file = $self->{_input_file}; 5057 my $title = escape_html($input_file); 5058 my $date = localtime; 5059 $html_fh->print( <<"HTML_START"); 5060<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 5061 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 5062<!-- Generated by perltidy on $date --> 5063<html xmlns="http://www.w3.org/1999/xhtml"> 5064<head> 5065<title>$title</title> 5066HTML_START 5067 5068 # output the css, if used 5069 if ($css_string) { 5070 $html_fh->print($css_string); 5071 $html_fh->print( <<"ENDCSS"); 5072</head> 5073<body> 5074ENDCSS 5075 } 5076 else { 5077 5078 $html_fh->print( <<"HTML_START"); 5079</head> 5080<body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\"> 5081HTML_START 5082 } 5083 5084 $html_fh->print("<a name=\"-top-\"></a>\n"); 5085 $html_fh->print( <<"EOM"); 5086<h1>$title</h1> 5087EOM 5088 5089 # copy the table of contents 5090 if ( $$rtoc_string 5091 && !$rOpts->{'frames'} 5092 && $rOpts->{'html-table-of-contents'} ) 5093 { 5094 $html_fh->print($$rtoc_string); 5095 } 5096 5097 # copy the pre section(s) 5098 my $fname_comment = $input_file; 5099 $fname_comment =~ s/--+/-/g; # protect HTML comment tags 5100 $html_fh->print( <<"END_PRE"); 5101<hr /> 5102<!-- contents of filename: $fname_comment --> 5103<pre> 5104END_PRE 5105 5106 foreach my $rpre_string (@$rpre_string_stack) { 5107 $html_fh->print($$rpre_string); 5108 } 5109 5110 # and finish the html page 5111 $html_fh->print( <<"HTML_END"); 5112</pre> 5113</body> 5114</html> 5115HTML_END 5116 eval { $html_fh->close() }; # could be object without close method 5117 5118 if ( $rOpts->{'frames'} ) { 5119 my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string; 5120 $self->make_frame( \@toc ); 5121 } 5122} 5123 5124sub markup_tokens { 5125 my $self = shift; 5126 my ( $rtokens, $rtoken_type, $rlevels ) = @_; 5127 my ( @colored_tokens, $j, $string, $type, $token, $level ); 5128 my $rlast_level = $self->{_rlast_level}; 5129 my $rpackage_stack = $self->{_rpackage_stack}; 5130 5131 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { 5132 $type = $$rtoken_type[$j]; 5133 $token = $$rtokens[$j]; 5134 $level = $$rlevels[$j]; 5135 $level = 0 if ( $level < 0 ); 5136 5137 #------------------------------------------------------- 5138 # Update the package stack. The package stack is needed to keep 5139 # the toc correct because some packages may be declared within 5140 # blocks and go out of scope when we leave the block. 5141 #------------------------------------------------------- 5142 if ( $level > $$rlast_level ) { 5143 unless ( $rpackage_stack->[ $level - 1 ] ) { 5144 $rpackage_stack->[ $level - 1 ] = 'main'; 5145 } 5146 $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ]; 5147 } 5148 elsif ( $level < $$rlast_level ) { 5149 my $package = $rpackage_stack->[$level]; 5150 unless ($package) { $package = 'main' } 5151 5152 # if we change packages due to a nesting change, we 5153 # have to make an entry in the toc 5154 if ( $package ne $rpackage_stack->[ $level + 1 ] ) { 5155 $self->add_toc_item( $package, 'package' ); 5156 } 5157 } 5158 $$rlast_level = $level; 5159 5160 #------------------------------------------------------- 5161 # Intercept a sub name here; split it 5162 # into keyword 'sub' and sub name; and add an 5163 # entry in the toc 5164 #------------------------------------------------------- 5165 if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { 5166 $token = $self->markup_html_element( $1, 'k' ); 5167 push @colored_tokens, $token; 5168 $token = $2; 5169 $type = 'M'; 5170 5171 # but don't include sub declarations in the toc; 5172 # these wlll have leading token types 'i;' 5173 my $signature = join "", @$rtoken_type; 5174 unless ( $signature =~ /^i;/ ) { 5175 my $subname = $token; 5176 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype 5177 $self->add_toc_item( $subname, 'sub' ); 5178 } 5179 } 5180 5181 #------------------------------------------------------- 5182 # Intercept a package name here; split it 5183 # into keyword 'package' and name; add to the toc, 5184 # and update the package stack 5185 #------------------------------------------------------- 5186 if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) { 5187 $token = $self->markup_html_element( $1, 'k' ); 5188 push @colored_tokens, $token; 5189 $token = $2; 5190 $type = 'i'; 5191 $self->add_toc_item( "$token", 'package' ); 5192 $rpackage_stack->[$level] = $token; 5193 } 5194 5195 $token = $self->markup_html_element( $token, $type ); 5196 push @colored_tokens, $token; 5197 } 5198 return ( \@colored_tokens ); 5199} 5200 5201sub markup_html_element { 5202 my $self = shift; 5203 my ( $token, $type ) = @_; 5204 5205 return $token if ( $type eq 'b' ); # skip a blank token 5206 return $token if ( $token =~ /^\s*$/ ); # skip a blank line 5207 $token = escape_html($token); 5208 5209 # get the short abbreviation for this token type 5210 my $short_name = $token_short_names{$type}; 5211 if ( !defined($short_name) ) { 5212 $short_name = "pu"; # punctuation is default 5213 } 5214 5215 # handle style sheets.. 5216 if ( !$rOpts->{'nohtml-style-sheets'} ) { 5217 if ( $short_name ne 'pu' ) { 5218 $token = qq(<span class="$short_name">) . $token . "</span>"; 5219 } 5220 } 5221 5222 # handle no style sheets.. 5223 else { 5224 my $color = $html_color{$short_name}; 5225 5226 if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) { 5227 $token = qq(<font color="$color">) . $token . "</font>"; 5228 } 5229 if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" } 5230 if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" } 5231 } 5232 return $token; 5233} 5234 5235sub escape_html { 5236 5237 my $token = shift; 5238 if ($missing_html_entities) { 5239 $token =~ s/\&/&/g; 5240 $token =~ s/\</</g; 5241 $token =~ s/\>/>/g; 5242 $token =~ s/\"/"/g; 5243 } 5244 else { 5245 HTML::Entities::encode_entities($token); 5246 } 5247 return $token; 5248} 5249 5250sub finish_formatting { 5251 5252 # called after last line 5253 my $self = shift; 5254 $self->close_html_file(); 5255 return; 5256} 5257 5258sub write_line { 5259 5260 my $self = shift; 5261 return unless $self->{_html_file_opened}; 5262 my $html_pre_fh = $self->{_html_pre_fh}; 5263 my ($line_of_tokens) = @_; 5264 my $line_type = $line_of_tokens->{_line_type}; 5265 my $input_line = $line_of_tokens->{_line_text}; 5266 my $line_number = $line_of_tokens->{_line_number}; 5267 chomp $input_line; 5268 5269 # markup line of code.. 5270 my $html_line; 5271 if ( $line_type eq 'CODE' ) { 5272 my $rtoken_type = $line_of_tokens->{_rtoken_type}; 5273 my $rtokens = $line_of_tokens->{_rtokens}; 5274 my $rlevels = $line_of_tokens->{_rlevels}; 5275 5276 if ( $input_line =~ /(^\s*)/ ) { 5277 $html_line = $1; 5278 } 5279 else { 5280 $html_line = ""; 5281 } 5282 my ($rcolored_tokens) = 5283 $self->markup_tokens( $rtokens, $rtoken_type, $rlevels ); 5284 $html_line .= join '', @$rcolored_tokens; 5285 } 5286 5287 # markup line of non-code.. 5288 else { 5289 my $line_character; 5290 if ( $line_type eq 'HERE' ) { $line_character = 'H' } 5291 elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' } 5292 elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' } 5293 elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' } 5294 elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' } 5295 elsif ( $line_type eq 'END_START' ) { 5296 $line_character = 'k'; 5297 $self->add_toc_item( '__END__', '__END__' ); 5298 } 5299 elsif ( $line_type eq 'DATA_START' ) { 5300 $line_character = 'k'; 5301 $self->add_toc_item( '__DATA__', '__DATA__' ); 5302 } 5303 elsif ( $line_type =~ /^POD/ ) { 5304 $line_character = 'P'; 5305 if ( $rOpts->{'pod2html'} ) { 5306 my $html_pod_fh = $self->{_html_pod_fh}; 5307 if ( $line_type eq 'POD_START' ) { 5308 5309 my $rpre_string_stack = $self->{_rpre_string_stack}; 5310 my $rpre_string = $rpre_string_stack->[-1]; 5311 5312 # if we have written any non-blank lines to the 5313 # current pre section, start writing to a new output 5314 # string 5315 if ( $$rpre_string =~ /\S/ ) { 5316 my $pre_string; 5317 $html_pre_fh = 5318 Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); 5319 $self->{_html_pre_fh} = $html_pre_fh; 5320 push @$rpre_string_stack, \$pre_string; 5321 5322 # leave a marker in the pod stream so we know 5323 # where to put the pre section we just 5324 # finished. 5325 my $for_html = '=for html'; # don't confuse pod utils 5326 $html_pod_fh->print(<<EOM); 5327 5328$for_html 5329<!-- pERLTIDY sECTION --> 5330 5331EOM 5332 } 5333 5334 # otherwise, just clear the current string and start 5335 # over 5336 else { 5337 $$rpre_string = ""; 5338 $html_pod_fh->print("\n"); 5339 } 5340 } 5341 $html_pod_fh->print( $input_line . "\n" ); 5342 if ( $line_type eq 'POD_END' ) { 5343 $self->{_pod_cut_count}++; 5344 $html_pod_fh->print("\n"); 5345 } 5346 return; 5347 } 5348 } 5349 else { $line_character = 'Q' } 5350 $html_line = $self->markup_html_element( $input_line, $line_character ); 5351 } 5352 5353 # add the line number if requested 5354 if ( $rOpts->{'html-line-numbers'} ) { 5355 my $extra_space .= 5356 ( $line_number < 10 ) ? " " 5357 : ( $line_number < 100 ) ? " " 5358 : ( $line_number < 1000 ) ? " " 5359 : ""; 5360 $html_line = $extra_space . $line_number . " " . $html_line; 5361 } 5362 5363 # write the line 5364 $html_pre_fh->print("$html_line\n"); 5365} 5366 5367##################################################################### 5368# 5369# The Perl::Tidy::Formatter package adds indentation, whitespace, and 5370# line breaks to the token stream 5371# 5372# WARNING: This is not a real class for speed reasons. Only one 5373# Formatter may be used. 5374# 5375##################################################################### 5376 5377package Perl::Tidy::Formatter; 5378 5379BEGIN { 5380 5381 # Caution: these debug flags produce a lot of output 5382 # They should all be 0 except when debugging small scripts 5383 use constant FORMATTER_DEBUG_FLAG_BOND => 0; 5384 use constant FORMATTER_DEBUG_FLAG_BREAK => 0; 5385 use constant FORMATTER_DEBUG_FLAG_CI => 0; 5386 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0; 5387 use constant FORMATTER_DEBUG_FLAG_FORCE => 0; 5388 use constant FORMATTER_DEBUG_FLAG_LIST => 0; 5389 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0; 5390 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0; 5391 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0; 5392 use constant FORMATTER_DEBUG_FLAG_STORE => 0; 5393 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0; 5394 use constant FORMATTER_DEBUG_FLAG_WHITE => 0; 5395 5396 my $debug_warning = sub { 5397 print "FORMATTER_DEBUGGING with key $_[0]\n"; 5398 }; 5399 5400 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); 5401 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); 5402 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); 5403 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); 5404 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); 5405 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); 5406 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); 5407 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); 5408 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); 5409 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); 5410 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); 5411 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); 5412} 5413 5414use Carp; 5415use vars qw{ 5416 5417 @gnu_stack 5418 $max_gnu_stack_index 5419 $gnu_position_predictor 5420 $line_start_index_to_go 5421 $last_indentation_written 5422 $last_unadjusted_indentation 5423 $last_leading_token 5424 5425 $saw_VERSION_in_this_file 5426 $saw_END_or_DATA_ 5427 5428 @gnu_item_list 5429 $max_gnu_item_index 5430 $gnu_sequence_number 5431 $last_output_indentation 5432 %last_gnu_equals 5433 %gnu_comma_count 5434 %gnu_arrow_count 5435 5436 @block_type_to_go 5437 @type_sequence_to_go 5438 @container_environment_to_go 5439 @bond_strength_to_go 5440 @forced_breakpoint_to_go 5441 @lengths_to_go 5442 @levels_to_go 5443 @leading_spaces_to_go 5444 @reduced_spaces_to_go 5445 @matching_token_to_go 5446 @mate_index_to_go 5447 @nesting_blocks_to_go 5448 @ci_levels_to_go 5449 @nesting_depth_to_go 5450 @nobreak_to_go 5451 @old_breakpoint_to_go 5452 @tokens_to_go 5453 @types_to_go 5454 5455 %saved_opening_indentation 5456 5457 $max_index_to_go 5458 $comma_count_in_batch 5459 $old_line_count_in_batch 5460 $last_nonblank_index_to_go 5461 $last_nonblank_type_to_go 5462 $last_nonblank_token_to_go 5463 $last_last_nonblank_index_to_go 5464 $last_last_nonblank_type_to_go 5465 $last_last_nonblank_token_to_go 5466 @nonblank_lines_at_depth 5467 $starting_in_quote 5468 $ending_in_quote 5469 5470 $in_format_skipping_section 5471 $format_skipping_pattern_begin 5472 $format_skipping_pattern_end 5473 5474 $forced_breakpoint_count 5475 $forced_breakpoint_undo_count 5476 @forced_breakpoint_undo_stack 5477 %postponed_breakpoint 5478 5479 $tabbing 5480 $embedded_tab_count 5481 $first_embedded_tab_at 5482 $last_embedded_tab_at 5483 $deleted_semicolon_count 5484 $first_deleted_semicolon_at 5485 $last_deleted_semicolon_at 5486 $added_semicolon_count 5487 $first_added_semicolon_at 5488 $last_added_semicolon_at 5489 $first_tabbing_disagreement 5490 $last_tabbing_disagreement 5491 $in_tabbing_disagreement 5492 $tabbing_disagreement_count 5493 $input_line_tabbing 5494 5495 $last_line_type 5496 $last_line_leading_type 5497 $last_line_leading_level 5498 $last_last_line_leading_level 5499 5500 %block_leading_text 5501 %block_opening_line_number 5502 $csc_new_statement_ok 5503 $accumulating_text_for_block 5504 $leading_block_text 5505 $rleading_block_if_elsif_text 5506 $leading_block_text_level 5507 $leading_block_text_length_exceeded 5508 $leading_block_text_line_length 5509 $leading_block_text_line_number 5510 $closing_side_comment_prefix_pattern 5511 $closing_side_comment_list_pattern 5512 5513 $last_nonblank_token 5514 $last_nonblank_type 5515 $last_last_nonblank_token 5516 $last_last_nonblank_type 5517 $last_nonblank_block_type 5518 $last_output_level 5519 %is_do_follower 5520 %is_if_brace_follower 5521 %space_after_keyword 5522 $rbrace_follower 5523 $looking_for_else 5524 %is_last_next_redo_return 5525 %is_other_brace_follower 5526 %is_else_brace_follower 5527 %is_anon_sub_brace_follower 5528 %is_anon_sub_1_brace_follower 5529 %is_sort_map_grep 5530 %is_sort_map_grep_eval 5531 %is_sort_map_grep_eval_do 5532 %is_block_without_semicolon 5533 %is_if_unless 5534 %is_and_or 5535 %is_assignment 5536 %is_chain_operator 5537 %is_if_unless_and_or_last_next_redo_return 5538 %is_until_while_for_if_elsif_else 5539 5540 @has_broken_sublist 5541 @dont_align 5542 @want_comma_break 5543 5544 $is_static_block_comment 5545 $index_start_one_line_block 5546 $semicolons_before_block_self_destruct 5547 $index_max_forced_break 5548 $input_line_number 5549 $diagnostics_object 5550 $vertical_aligner_object 5551 $logger_object 5552 $file_writer_object 5553 $formatter_self 5554 @ci_stack 5555 $last_line_had_side_comment 5556 %want_break_before 5557 %outdent_keyword 5558 $static_block_comment_pattern 5559 $static_side_comment_pattern 5560 %opening_vertical_tightness 5561 %closing_vertical_tightness 5562 %closing_token_indentation 5563 5564 %opening_token_right 5565 %stack_opening_token 5566 %stack_closing_token 5567 5568 $block_brace_vertical_tightness_pattern 5569 5570 $rOpts_add_newlines 5571 $rOpts_add_whitespace 5572 $rOpts_block_brace_tightness 5573 $rOpts_block_brace_vertical_tightness 5574 $rOpts_brace_left_and_indent 5575 $rOpts_comma_arrow_breakpoints 5576 $rOpts_break_at_old_keyword_breakpoints 5577 $rOpts_break_at_old_comma_breakpoints 5578 $rOpts_break_at_old_logical_breakpoints 5579 $rOpts_break_at_old_ternary_breakpoints 5580 $rOpts_closing_side_comment_else_flag 5581 $rOpts_closing_side_comment_maximum_text 5582 $rOpts_continuation_indentation 5583 $rOpts_cuddled_else 5584 $rOpts_delete_old_whitespace 5585 $rOpts_fuzzy_line_length 5586 $rOpts_indent_columns 5587 $rOpts_line_up_parentheses 5588 $rOpts_maximum_fields_per_table 5589 $rOpts_maximum_line_length 5590 $rOpts_short_concatenation_item_length 5591 $rOpts_swallow_optional_blank_lines 5592 $rOpts_ignore_old_breakpoints 5593 $rOpts_format_skipping 5594 $rOpts_space_function_paren 5595 $rOpts_space_keyword_paren 5596 $rOpts_keep_interior_semicolons 5597 5598 $half_maximum_line_length 5599 5600 %is_opening_type 5601 %is_closing_type 5602 %is_keyword_returning_list 5603 %tightness 5604 %matching_token 5605 $rOpts 5606 %right_bond_strength 5607 %left_bond_strength 5608 %binary_ws_rules 5609 %want_left_space 5610 %want_right_space 5611 %is_digraph 5612 %is_trigraph 5613 $bli_pattern 5614 $bli_list_string 5615 %is_closing_type 5616 %is_opening_type 5617 %is_closing_token 5618 %is_opening_token 5619}; 5620 5621BEGIN { 5622 5623 # default list of block types for which -bli would apply 5624 $bli_list_string = 'if else elsif unless while for foreach do : sub'; 5625 5626 @_ = qw( 5627 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> 5628 <= >= == =~ !~ != ++ -- /= x= 5629 ); 5630 @is_digraph{@_} = (1) x scalar(@_); 5631 5632 @_ = qw( ... **= <<= >>= &&= ||= //= <=> ); 5633 @is_trigraph{@_} = (1) x scalar(@_); 5634 5635 @_ = qw( 5636 = **= += *= &= <<= &&= 5637 -= /= |= >>= ||= //= 5638 .= %= ^= 5639 x= 5640 ); 5641 @is_assignment{@_} = (1) x scalar(@_); 5642 5643 @_ = qw( 5644 grep 5645 keys 5646 map 5647 reverse 5648 sort 5649 split 5650 ); 5651 @is_keyword_returning_list{@_} = (1) x scalar(@_); 5652 5653 @_ = qw(is if unless and or err last next redo return); 5654 @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_); 5655 5656 # always break after a closing curly of these block types: 5657 @_ = qw(until while for if elsif else); 5658 @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_); 5659 5660 @_ = qw(last next redo return); 5661 @is_last_next_redo_return{@_} = (1) x scalar(@_); 5662 5663 @_ = qw(sort map grep); 5664 @is_sort_map_grep{@_} = (1) x scalar(@_); 5665 5666 @_ = qw(sort map grep eval); 5667 @is_sort_map_grep_eval{@_} = (1) x scalar(@_); 5668 5669 @_ = qw(sort map grep eval do); 5670 @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_); 5671 5672 @_ = qw(if unless); 5673 @is_if_unless{@_} = (1) x scalar(@_); 5674 5675 @_ = qw(and or err); 5676 @is_and_or{@_} = (1) x scalar(@_); 5677 5678 # Identify certain operators which often occur in chains. 5679 # Note: the minus (-) causes a side effect of padding of the first line in 5680 # something like this (by sub set_logical_padding): 5681 # Checkbutton => 'Transmission checked', 5682 # -variable => \$TRANS 5683 # This usually improves appearance so it seems ok. 5684 @_ = qw(&& || and or : ? . + - * /); 5685 @is_chain_operator{@_} = (1) x scalar(@_); 5686 5687 # We can remove semicolons after blocks preceded by these keywords 5688 @_ = 5689 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else 5690 unless while until for foreach); 5691 @is_block_without_semicolon{@_} = (1) x scalar(@_); 5692 5693 # 'L' is token for opening { at hash key 5694 @_ = qw" L { ( [ "; 5695 @is_opening_type{@_} = (1) x scalar(@_); 5696 5697 # 'R' is token for closing } at hash key 5698 @_ = qw" R } ) ] "; 5699 @is_closing_type{@_} = (1) x scalar(@_); 5700 5701 @_ = qw" { ( [ "; 5702 @is_opening_token{@_} = (1) x scalar(@_); 5703 5704 @_ = qw" } ) ] "; 5705 @is_closing_token{@_} = (1) x scalar(@_); 5706} 5707 5708# whitespace codes 5709use constant WS_YES => 1; 5710use constant WS_OPTIONAL => 0; 5711use constant WS_NO => -1; 5712 5713# Token bond strengths. 5714use constant NO_BREAK => 10000; 5715use constant VERY_STRONG => 100; 5716use constant STRONG => 2.1; 5717use constant NOMINAL => 1.1; 5718use constant WEAK => 0.8; 5719use constant VERY_WEAK => 0.55; 5720 5721# values for testing indexes in output array 5722use constant UNDEFINED_INDEX => -1; 5723 5724# Maximum number of little messages; probably need not be changed. 5725use constant MAX_NAG_MESSAGES => 6; 5726 5727# increment between sequence numbers for each type 5728# For example, ?: pairs might have numbers 7,11,15,... 5729use constant TYPE_SEQUENCE_INCREMENT => 4; 5730 5731{ 5732 5733 # methods to count instances 5734 my $_count = 0; 5735 sub get_count { $_count; } 5736 sub _increment_count { ++$_count } 5737 sub _decrement_count { --$_count } 5738} 5739 5740sub trim { 5741 5742 # trim leading and trailing whitespace from a string 5743 $_[0] =~ s/\s+$//; 5744 $_[0] =~ s/^\s+//; 5745 return $_[0]; 5746} 5747 5748sub split_words { 5749 5750 # given a string containing words separated by whitespace, 5751 # return the list of words 5752 my ($str) = @_; 5753 return unless $str; 5754 $str =~ s/\s+$//; 5755 $str =~ s/^\s+//; 5756 return split( /\s+/, $str ); 5757} 5758 5759# interface to Perl::Tidy::Logger routines 5760sub warning { 5761 if ($logger_object) { 5762 $logger_object->warning(@_); 5763 } 5764} 5765 5766sub complain { 5767 if ($logger_object) { 5768 $logger_object->complain(@_); 5769 } 5770} 5771 5772sub write_logfile_entry { 5773 if ($logger_object) { 5774 $logger_object->write_logfile_entry(@_); 5775 } 5776} 5777 5778sub black_box { 5779 if ($logger_object) { 5780 $logger_object->black_box(@_); 5781 } 5782} 5783 5784sub report_definite_bug { 5785 if ($logger_object) { 5786 $logger_object->report_definite_bug(); 5787 } 5788} 5789 5790sub get_saw_brace_error { 5791 if ($logger_object) { 5792 $logger_object->get_saw_brace_error(); 5793 } 5794} 5795 5796sub we_are_at_the_last_line { 5797 if ($logger_object) { 5798 $logger_object->we_are_at_the_last_line(); 5799 } 5800} 5801 5802# interface to Perl::Tidy::Diagnostics routine 5803sub write_diagnostics { 5804 5805 if ($diagnostics_object) { 5806 $diagnostics_object->write_diagnostics(@_); 5807 } 5808} 5809 5810sub get_added_semicolon_count { 5811 my $self = shift; 5812 return $added_semicolon_count; 5813} 5814 5815sub DESTROY { 5816 $_[0]->_decrement_count(); 5817} 5818 5819sub new { 5820 5821 my $class = shift; 5822 5823 # we are given an object with a write_line() method to take lines 5824 my %defaults = ( 5825 sink_object => undef, 5826 diagnostics_object => undef, 5827 logger_object => undef, 5828 ); 5829 my %args = ( %defaults, @_ ); 5830 5831 $logger_object = $args{logger_object}; 5832 $diagnostics_object = $args{diagnostics_object}; 5833 5834 # we create another object with a get_line() and peek_ahead() method 5835 my $sink_object = $args{sink_object}; 5836 $file_writer_object = 5837 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object ); 5838 5839 # initialize the leading whitespace stack to negative levels 5840 # so that we can never run off the end of the stack 5841 $gnu_position_predictor = 0; # where the current token is predicted to be 5842 $max_gnu_stack_index = 0; 5843 $max_gnu_item_index = -1; 5844 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); 5845 @gnu_item_list = (); 5846 $last_output_indentation = 0; 5847 $last_indentation_written = 0; 5848 $last_unadjusted_indentation = 0; 5849 $last_leading_token = ""; 5850 5851 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; 5852 $saw_END_or_DATA_ = 0; 5853 5854 @block_type_to_go = (); 5855 @type_sequence_to_go = (); 5856 @container_environment_to_go = (); 5857 @bond_strength_to_go = (); 5858 @forced_breakpoint_to_go = (); 5859 @lengths_to_go = (); # line length to start of ith token 5860 @levels_to_go = (); 5861 @matching_token_to_go = (); 5862 @mate_index_to_go = (); 5863 @nesting_blocks_to_go = (); 5864 @ci_levels_to_go = (); 5865 @nesting_depth_to_go = (0); 5866 @nobreak_to_go = (); 5867 @old_breakpoint_to_go = (); 5868 @tokens_to_go = (); 5869 @types_to_go = (); 5870 @leading_spaces_to_go = (); 5871 @reduced_spaces_to_go = (); 5872 5873 @dont_align = (); 5874 @has_broken_sublist = (); 5875 @want_comma_break = (); 5876 5877 @ci_stack = (""); 5878 $first_tabbing_disagreement = 0; 5879 $last_tabbing_disagreement = 0; 5880 $tabbing_disagreement_count = 0; 5881 $in_tabbing_disagreement = 0; 5882 $input_line_tabbing = undef; 5883 5884 $last_line_type = ""; 5885 $last_last_line_leading_level = 0; 5886 $last_line_leading_level = 0; 5887 $last_line_leading_type = '#'; 5888 5889 $last_nonblank_token = ';'; 5890 $last_nonblank_type = ';'; 5891 $last_last_nonblank_token = ';'; 5892 $last_last_nonblank_type = ';'; 5893 $last_nonblank_block_type = ""; 5894 $last_output_level = 0; 5895 $looking_for_else = 0; 5896 $embedded_tab_count = 0; 5897 $first_embedded_tab_at = 0; 5898 $last_embedded_tab_at = 0; 5899 $deleted_semicolon_count = 0; 5900 $first_deleted_semicolon_at = 0; 5901 $last_deleted_semicolon_at = 0; 5902 $added_semicolon_count = 0; 5903 $first_added_semicolon_at = 0; 5904 $last_added_semicolon_at = 0; 5905 $last_line_had_side_comment = 0; 5906 $is_static_block_comment = 0; 5907 %postponed_breakpoint = (); 5908 5909 # variables for adding side comments 5910 %block_leading_text = (); 5911 %block_opening_line_number = (); 5912 $csc_new_statement_ok = 1; 5913 5914 %saved_opening_indentation = (); 5915 $in_format_skipping_section = 0; 5916 5917 reset_block_text_accumulator(); 5918 5919 prepare_for_new_input_lines(); 5920 5921 $vertical_aligner_object = 5922 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object, 5923 $logger_object, $diagnostics_object ); 5924 5925 if ( $rOpts->{'entab-leading-whitespace'} ) { 5926 write_logfile_entry( 5927"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n" 5928 ); 5929 } 5930 elsif ( $rOpts->{'tabs'} ) { 5931 write_logfile_entry("Indentation will be with a tab character\n"); 5932 } 5933 else { 5934 write_logfile_entry( 5935 "Indentation will be with $rOpts->{'indent-columns'} spaces\n"); 5936 } 5937 5938 # This was the start of a formatter referent, but object-oriented 5939 # coding has turned out to be too slow here. 5940 $formatter_self = {}; 5941 5942 bless $formatter_self, $class; 5943 5944 # Safety check..this is not a class yet 5945 if ( _increment_count() > 1 ) { 5946 confess 5947"Attempt to create more than 1 object in $class, which is not a true class yet\n"; 5948 } 5949 return $formatter_self; 5950} 5951 5952sub prepare_for_new_input_lines { 5953 5954 $gnu_sequence_number++; # increment output batch counter 5955 %last_gnu_equals = (); 5956 %gnu_comma_count = (); 5957 %gnu_arrow_count = (); 5958 $line_start_index_to_go = 0; 5959 $max_gnu_item_index = UNDEFINED_INDEX; 5960 $index_max_forced_break = UNDEFINED_INDEX; 5961 $max_index_to_go = UNDEFINED_INDEX; 5962 $last_nonblank_index_to_go = UNDEFINED_INDEX; 5963 $last_nonblank_type_to_go = ''; 5964 $last_nonblank_token_to_go = ''; 5965 $last_last_nonblank_index_to_go = UNDEFINED_INDEX; 5966 $last_last_nonblank_type_to_go = ''; 5967 $last_last_nonblank_token_to_go = ''; 5968 $forced_breakpoint_count = 0; 5969 $forced_breakpoint_undo_count = 0; 5970 $rbrace_follower = undef; 5971 $lengths_to_go[0] = 0; 5972 $old_line_count_in_batch = 1; 5973 $comma_count_in_batch = 0; 5974 $starting_in_quote = 0; 5975 5976 destroy_one_line_block(); 5977} 5978 5979sub write_line { 5980 5981 my $self = shift; 5982 my ($line_of_tokens) = @_; 5983 5984 my $line_type = $line_of_tokens->{_line_type}; 5985 my $input_line = $line_of_tokens->{_line_text}; 5986 5987 # _line_type codes are: 5988 # SYSTEM - system-specific code before hash-bang line 5989 # CODE - line of perl code (including comments) 5990 # POD_START - line starting pod, such as '=head' 5991 # POD - pod documentation text 5992 # POD_END - last line of pod section, '=cut' 5993 # HERE - text of here-document 5994 # HERE_END - last line of here-doc (target word) 5995 # FORMAT - format section 5996 # FORMAT_END - last line of format section, '.' 5997 # DATA_START - __DATA__ line 5998 # DATA - unidentified text following __DATA__ 5999 # END_START - __END__ line 6000 # END - unidentified text following __END__ 6001 # ERROR - we are in big trouble, probably not a perl script 6002 6003 # put a blank line after an =cut which comes before __END__ and __DATA__ 6004 # (required by podchecker) 6005 if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) { 6006 $file_writer_object->reset_consecutive_blank_lines(); 6007 if ( $input_line !~ /^\s*$/ ) { want_blank_line() } 6008 } 6009 6010 # handle line of code.. 6011 if ( $line_type eq 'CODE' ) { 6012 6013 # let logger see all non-blank lines of code 6014 if ( $input_line !~ /^\s*$/ ) { 6015 my $output_line_number = 6016 $vertical_aligner_object->get_output_line_number(); 6017 black_box( $line_of_tokens, $output_line_number ); 6018 } 6019 print_line_of_tokens($line_of_tokens); 6020 } 6021 6022 # handle line of non-code.. 6023 else { 6024 6025 # set special flags 6026 my $skip_line = 0; 6027 my $tee_line = 0; 6028 if ( $line_type =~ /^POD/ ) { 6029 6030 # Pod docs should have a preceding blank line. But be 6031 # very careful in __END__ and __DATA__ sections, because: 6032 # 1. the user may be using this section for any purpose whatsoever 6033 # 2. the blank counters are not active there 6034 # It should be safe to request a blank line between an 6035 # __END__ or __DATA__ and an immediately following '=head' 6036 # type line, (types END_START and DATA_START), but not for 6037 # any other lines of type END or DATA. 6038 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } 6039 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } 6040 if ( !$skip_line 6041 && $line_type eq 'POD_START' 6042 && $last_line_type !~ /^(END|DATA)$/ ) 6043 { 6044 want_blank_line(); 6045 } 6046 } 6047 6048 # leave the blank counters in a predictable state 6049 # after __END__ or __DATA__ 6050 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) { 6051 $file_writer_object->reset_consecutive_blank_lines(); 6052 $saw_END_or_DATA_ = 1; 6053 } 6054 6055 # write unindented non-code line 6056 if ( !$skip_line ) { 6057 if ($tee_line) { $file_writer_object->tee_on() } 6058 write_unindented_line($input_line); 6059 if ($tee_line) { $file_writer_object->tee_off() } 6060 } 6061 } 6062 $last_line_type = $line_type; 6063} 6064 6065sub create_one_line_block { 6066 $index_start_one_line_block = $_[0]; 6067 $semicolons_before_block_self_destruct = $_[1]; 6068} 6069 6070sub destroy_one_line_block { 6071 $index_start_one_line_block = UNDEFINED_INDEX; 6072 $semicolons_before_block_self_destruct = 0; 6073} 6074 6075sub leading_spaces_to_go { 6076 6077 # return the number of indentation spaces for a token in the output stream; 6078 # these were previously stored by 'set_leading_whitespace'. 6079 6080 return get_SPACES( $leading_spaces_to_go[ $_[0] ] ); 6081 6082} 6083 6084sub get_SPACES { 6085 6086 # return the number of leading spaces associated with an indentation 6087 # variable $indentation is either a constant number of spaces or an object 6088 # with a get_SPACES method. 6089 my $indentation = shift; 6090 return ref($indentation) ? $indentation->get_SPACES() : $indentation; 6091} 6092 6093sub get_RECOVERABLE_SPACES { 6094 6095 # return the number of spaces (+ means shift right, - means shift left) 6096 # that we would like to shift a group of lines with the same indentation 6097 # to get them to line up with their opening parens 6098 my $indentation = shift; 6099 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; 6100} 6101 6102sub get_AVAILABLE_SPACES_to_go { 6103 6104 my $item = $leading_spaces_to_go[ $_[0] ]; 6105 6106 # return the number of available leading spaces associated with an 6107 # indentation variable. $indentation is either a constant number of 6108 # spaces or an object with a get_AVAILABLE_SPACES method. 6109 return ref($item) ? $item->get_AVAILABLE_SPACES() : 0; 6110} 6111 6112sub new_lp_indentation_item { 6113 6114 # this is an interface to the IndentationItem class 6115 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_; 6116 6117 # A negative level implies not to store the item in the item_list 6118 my $index = 0; 6119 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; } 6120 6121 my $item = Perl::Tidy::IndentationItem->new( 6122 $spaces, $level, 6123 $ci_level, $available_spaces, 6124 $index, $gnu_sequence_number, 6125 $align_paren, $max_gnu_stack_index, 6126 $line_start_index_to_go, 6127 ); 6128 6129 if ( $level >= 0 ) { 6130 $gnu_item_list[$max_gnu_item_index] = $item; 6131 } 6132 6133 return $item; 6134} 6135 6136sub set_leading_whitespace { 6137 6138 # This routine defines leading whitespace 6139 # given: the level and continuation_level of a token, 6140 # define: space count of leading string which would apply if it 6141 # were the first token of a new line. 6142 6143 my ( $level, $ci_level, $in_continued_quote ) = @_; 6144 6145 # modify for -bli, which adds one continuation indentation for 6146 # opening braces 6147 if ( $rOpts_brace_left_and_indent 6148 && $max_index_to_go == 0 6149 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o ) 6150 { 6151 $ci_level++; 6152 } 6153 6154 # patch to avoid trouble when input file has negative indentation. 6155 # other logic should catch this error. 6156 if ( $level < 0 ) { $level = 0 } 6157 6158 #------------------------------------------- 6159 # handle the standard indentation scheme 6160 #------------------------------------------- 6161 unless ($rOpts_line_up_parentheses) { 6162 my $space_count = 6163 $ci_level * $rOpts_continuation_indentation + 6164 $level * $rOpts_indent_columns; 6165 my $ci_spaces = 6166 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation; 6167 6168 if ($in_continued_quote) { 6169 $space_count = 0; 6170 $ci_spaces = 0; 6171 } 6172 $leading_spaces_to_go[$max_index_to_go] = $space_count; 6173 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces; 6174 return; 6175 } 6176 6177 #------------------------------------------------------------- 6178 # handle case of -lp indentation.. 6179 #------------------------------------------------------------- 6180 6181 # The continued_quote flag means that this is the first token of a 6182 # line, and it is the continuation of some kind of multi-line quote 6183 # or pattern. It requires special treatment because it must have no 6184 # added leading whitespace. So we create a special indentation item 6185 # which is not in the stack. 6186 if ($in_continued_quote) { 6187 my $space_count = 0; 6188 my $available_space = 0; 6189 $level = -1; # flag to prevent storing in item_list 6190 $leading_spaces_to_go[$max_index_to_go] = 6191 $reduced_spaces_to_go[$max_index_to_go] = 6192 new_lp_indentation_item( $space_count, $level, $ci_level, 6193 $available_space, 0 ); 6194 return; 6195 } 6196 6197 # get the top state from the stack 6198 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES(); 6199 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); 6200 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); 6201 6202 my $type = $types_to_go[$max_index_to_go]; 6203 my $token = $tokens_to_go[$max_index_to_go]; 6204 my $total_depth = $nesting_depth_to_go[$max_index_to_go]; 6205 6206 if ( $type eq '{' || $type eq '(' ) { 6207 6208 $gnu_comma_count{ $total_depth + 1 } = 0; 6209 $gnu_arrow_count{ $total_depth + 1 } = 0; 6210 6211 # If we come to an opening token after an '=' token of some type, 6212 # see if it would be helpful to 'break' after the '=' to save space 6213 my $last_equals = $last_gnu_equals{$total_depth}; 6214 if ( $last_equals && $last_equals > $line_start_index_to_go ) { 6215 6216 # find the position if we break at the '=' 6217 my $i_test = $last_equals; 6218 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } 6219 6220 # TESTING 6221 ##my $too_close = ($i_test==$max_index_to_go-1); 6222 6223 my $test_position = total_line_length( $i_test, $max_index_to_go ); 6224 6225 if ( 6226 6227 # the equals is not just before an open paren (testing) 6228 ##!$too_close && 6229 6230 # if we are beyond the midpoint 6231 $gnu_position_predictor > $half_maximum_line_length 6232 6233 # or we are beyont the 1/4 point and there was an old 6234 # break at the equals 6235 || ( 6236 $gnu_position_predictor > $half_maximum_line_length / 2 6237 && ( 6238 $old_breakpoint_to_go[$last_equals] 6239 || ( $last_equals > 0 6240 && $old_breakpoint_to_go[ $last_equals - 1 ] ) 6241 || ( $last_equals > 1 6242 && $types_to_go[ $last_equals - 1 ] eq 'b' 6243 && $old_breakpoint_to_go[ $last_equals - 2 ] ) 6244 ) 6245 ) 6246 ) 6247 { 6248 6249 # then make the switch -- note that we do not set a real 6250 # breakpoint here because we may not really need one; sub 6251 # scan_list will do that if necessary 6252 $line_start_index_to_go = $i_test + 1; 6253 $gnu_position_predictor = $test_position; 6254 } 6255 } 6256 } 6257 6258 # Check for decreasing depth .. 6259 # Note that one token may have both decreasing and then increasing 6260 # depth. For example, (level, ci) can go from (1,1) to (2,0). So, 6261 # in this example we would first go back to (1,0) then up to (2,0) 6262 # in a single call. 6263 if ( $level < $current_level || $ci_level < $current_ci_level ) { 6264 6265 # loop to find the first entry at or completely below this level 6266 my ( $lev, $ci_lev ); 6267 while (1) { 6268 if ($max_gnu_stack_index) { 6269 6270 # save index of token which closes this level 6271 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go); 6272 6273 # Undo any extra indentation if we saw no commas 6274 my $available_spaces = 6275 $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES(); 6276 6277 my $comma_count = 0; 6278 my $arrow_count = 0; 6279 if ( $type eq '}' || $type eq ')' ) { 6280 $comma_count = $gnu_comma_count{$total_depth}; 6281 $arrow_count = $gnu_arrow_count{$total_depth}; 6282 $comma_count = 0 unless $comma_count; 6283 $arrow_count = 0 unless $arrow_count; 6284 } 6285 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count); 6286 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count); 6287 6288 if ( $available_spaces > 0 ) { 6289 6290 if ( $comma_count <= 0 || $arrow_count > 0 ) { 6291 6292 my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX(); 6293 my $seqno = 6294 $gnu_stack[$max_gnu_stack_index] 6295 ->get_SEQUENCE_NUMBER(); 6296 6297 # Be sure this item was created in this batch. This 6298 # should be true because we delete any available 6299 # space from open items at the end of each batch. 6300 if ( $gnu_sequence_number != $seqno 6301 || $i > $max_gnu_item_index ) 6302 { 6303 warning( 6304"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n" 6305 ); 6306 report_definite_bug(); 6307 } 6308 6309 else { 6310 if ( $arrow_count == 0 ) { 6311 $gnu_item_list[$i] 6312 ->permanently_decrease_AVAILABLE_SPACES( 6313 $available_spaces); 6314 } 6315 else { 6316 $gnu_item_list[$i] 6317 ->tentatively_decrease_AVAILABLE_SPACES( 6318 $available_spaces); 6319 } 6320 6321 my $j; 6322 for ( 6323 $j = $i + 1 ; 6324 $j <= $max_gnu_item_index ; 6325 $j++ 6326 ) 6327 { 6328 $gnu_item_list[$j] 6329 ->decrease_SPACES($available_spaces); 6330 } 6331 } 6332 } 6333 } 6334 6335 # go down one level 6336 --$max_gnu_stack_index; 6337 $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); 6338 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); 6339 6340 # stop when we reach a level at or below the current level 6341 if ( $lev <= $level && $ci_lev <= $ci_level ) { 6342 $space_count = 6343 $gnu_stack[$max_gnu_stack_index]->get_SPACES(); 6344 $current_level = $lev; 6345 $current_ci_level = $ci_lev; 6346 last; 6347 } 6348 } 6349 6350 # reached bottom of stack .. should never happen because 6351 # only negative levels can get here, and $level was forced 6352 # to be positive above. 6353 else { 6354 warning( 6355"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n" 6356 ); 6357 report_definite_bug(); 6358 last; 6359 } 6360 } 6361 } 6362 6363 # handle increasing depth 6364 if ( $level > $current_level || $ci_level > $current_ci_level ) { 6365 6366 # Compute the standard incremental whitespace. This will be 6367 # the minimum incremental whitespace that will be used. This 6368 # choice results in a smooth transition between the gnu-style 6369 # and the standard style. 6370 my $standard_increment = 6371 ( $level - $current_level ) * $rOpts_indent_columns + 6372 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; 6373 6374 # Now we have to define how much extra incremental space 6375 # ("$available_space") we want. This extra space will be 6376 # reduced as necessary when long lines are encountered or when 6377 # it becomes clear that we do not have a good list. 6378 my $available_space = 0; 6379 my $align_paren = 0; 6380 my $excess = 0; 6381 6382 # initialization on empty stack.. 6383 if ( $max_gnu_stack_index == 0 ) { 6384 $space_count = $level * $rOpts_indent_columns; 6385 } 6386 6387 # if this is a BLOCK, add the standard increment 6388 elsif ($last_nonblank_block_type) { 6389 $space_count += $standard_increment; 6390 } 6391 6392 # if last nonblank token was not structural indentation, 6393 # just use standard increment 6394 elsif ( $last_nonblank_type ne '{' ) { 6395 $space_count += $standard_increment; 6396 } 6397 6398 # otherwise use the space to the first non-blank level change token 6399 else { 6400 6401 $space_count = $gnu_position_predictor; 6402 6403 my $min_gnu_indentation = 6404 $gnu_stack[$max_gnu_stack_index]->get_SPACES(); 6405 6406 $available_space = $space_count - $min_gnu_indentation; 6407 if ( $available_space >= $standard_increment ) { 6408 $min_gnu_indentation += $standard_increment; 6409 } 6410 elsif ( $available_space > 1 ) { 6411 $min_gnu_indentation += $available_space + 1; 6412 } 6413 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { 6414 if ( ( $tightness{$last_nonblank_token} < 2 ) ) { 6415 $min_gnu_indentation += 2; 6416 } 6417 else { 6418 $min_gnu_indentation += 1; 6419 } 6420 } 6421 else { 6422 $min_gnu_indentation += $standard_increment; 6423 } 6424 $available_space = $space_count - $min_gnu_indentation; 6425 6426 if ( $available_space < 0 ) { 6427 $space_count = $min_gnu_indentation; 6428 $available_space = 0; 6429 } 6430 $align_paren = 1; 6431 } 6432 6433 # update state, but not on a blank token 6434 if ( $types_to_go[$max_index_to_go] ne 'b' ) { 6435 6436 $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1); 6437 6438 ++$max_gnu_stack_index; 6439 $gnu_stack[$max_gnu_stack_index] = 6440 new_lp_indentation_item( $space_count, $level, $ci_level, 6441 $available_space, $align_paren ); 6442 6443 # If the opening paren is beyond the half-line length, then 6444 # we will use the minimum (standard) indentation. This will 6445 # help avoid problems associated with running out of space 6446 # near the end of a line. As a result, in deeply nested 6447 # lists, there will be some indentations which are limited 6448 # to this minimum standard indentation. But the most deeply 6449 # nested container will still probably be able to shift its 6450 # parameters to the right for proper alignment, so in most 6451 # cases this will not be noticable. 6452 if ( $available_space > 0 6453 && $space_count > $half_maximum_line_length ) 6454 { 6455 $gnu_stack[$max_gnu_stack_index] 6456 ->tentatively_decrease_AVAILABLE_SPACES($available_space); 6457 } 6458 } 6459 } 6460 6461 # Count commas and look for non-list characters. Once we see a 6462 # non-list character, we give up and don't look for any more commas. 6463 if ( $type eq '=>' ) { 6464 $gnu_arrow_count{$total_depth}++; 6465 6466 # tentatively treating '=>' like '=' for estimating breaks 6467 # TODO: this could use some experimentation 6468 $last_gnu_equals{$total_depth} = $max_index_to_go; 6469 } 6470 6471 elsif ( $type eq ',' ) { 6472 $gnu_comma_count{$total_depth}++; 6473 } 6474 6475 elsif ( $is_assignment{$type} ) { 6476 $last_gnu_equals{$total_depth} = $max_index_to_go; 6477 } 6478 6479 # this token might start a new line 6480 # if this is a non-blank.. 6481 if ( $type ne 'b' ) { 6482 6483 # and if .. 6484 if ( 6485 6486 # this is the first nonblank token of the line 6487 $max_index_to_go == 1 && $types_to_go[0] eq 'b' 6488 6489 # or previous character was one of these: 6490 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/ 6491 6492 # or previous character was opening and this does not close it 6493 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' ) 6494 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' ) 6495 6496 # or this token is one of these: 6497 || $type =~ /^([\.]|\|\||\&\&)$/ 6498 6499 # or this is a closing structure 6500 || ( $last_nonblank_type_to_go eq '}' 6501 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go ) 6502 6503 # or previous token was keyword 'return' 6504 || ( $last_nonblank_type_to_go eq 'k' 6505 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) ) 6506 6507 # or starting a new line at certain keywords is fine 6508 || ( $type eq 'k' 6509 && $is_if_unless_and_or_last_next_redo_return{$token} ) 6510 6511 # or this is after an assignment after a closing structure 6512 || ( 6513 $is_assignment{$last_nonblank_type_to_go} 6514 && ( 6515 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/ 6516 6517 # and it is significantly to the right 6518 || $gnu_position_predictor > $half_maximum_line_length 6519 ) 6520 ) 6521 ) 6522 { 6523 check_for_long_gnu_style_lines(); 6524 $line_start_index_to_go = $max_index_to_go; 6525 6526 # back up 1 token if we want to break before that type 6527 # otherwise, we may strand tokens like '?' or ':' on a line 6528 if ( $line_start_index_to_go > 0 ) { 6529 if ( $last_nonblank_type_to_go eq 'k' ) { 6530 6531 if ( $want_break_before{$last_nonblank_token_to_go} ) { 6532 $line_start_index_to_go--; 6533 } 6534 } 6535 elsif ( $want_break_before{$last_nonblank_type_to_go} ) { 6536 $line_start_index_to_go--; 6537 } 6538 } 6539 } 6540 } 6541 6542 # remember the predicted position of this token on the output line 6543 if ( $max_index_to_go > $line_start_index_to_go ) { 6544 $gnu_position_predictor = 6545 total_line_length( $line_start_index_to_go, $max_index_to_go ); 6546 } 6547 else { 6548 $gnu_position_predictor = $space_count + 6549 token_sequence_length( $max_index_to_go, $max_index_to_go ); 6550 } 6551 6552 # store the indentation object for this token 6553 # this allows us to manipulate the leading whitespace 6554 # (in case we have to reduce indentation to fit a line) without 6555 # having to change any token values 6556 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index]; 6557 $reduced_spaces_to_go[$max_index_to_go] = 6558 ( $max_gnu_stack_index > 0 && $ci_level ) 6559 ? $gnu_stack[ $max_gnu_stack_index - 1 ] 6560 : $gnu_stack[$max_gnu_stack_index]; 6561 return; 6562} 6563 6564sub check_for_long_gnu_style_lines { 6565 6566 # look at the current estimated maximum line length, and 6567 # remove some whitespace if it exceeds the desired maximum 6568 6569 # this is only for the '-lp' style 6570 return unless ($rOpts_line_up_parentheses); 6571 6572 # nothing can be done if no stack items defined for this line 6573 return if ( $max_gnu_item_index == UNDEFINED_INDEX ); 6574 6575 # see if we have exceeded the maximum desired line length 6576 # keep 2 extra free because they are needed in some cases 6577 # (result of trial-and-error testing) 6578 my $spaces_needed = 6579 $gnu_position_predictor - $rOpts_maximum_line_length + 2; 6580 6581 return if ( $spaces_needed < 0 ); 6582 6583 # We are over the limit, so try to remove a requested number of 6584 # spaces from leading whitespace. We are only allowed to remove 6585 # from whitespace items created on this batch, since others have 6586 # already been used and cannot be undone. 6587 my @candidates = (); 6588 my $i; 6589 6590 # loop over all whitespace items created for the current batch 6591 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { 6592 my $item = $gnu_item_list[$i]; 6593 6594 # item must still be open to be a candidate (otherwise it 6595 # cannot influence the current token) 6596 next if ( $item->get_CLOSED() >= 0 ); 6597 6598 my $available_spaces = $item->get_AVAILABLE_SPACES(); 6599 6600 if ( $available_spaces > 0 ) { 6601 push( @candidates, [ $i, $available_spaces ] ); 6602 } 6603 } 6604 6605 return unless (@candidates); 6606 6607 # sort by available whitespace so that we can remove whitespace 6608 # from the maximum available first 6609 @candidates = sort { $b->[1] <=> $a->[1] } @candidates; 6610 6611 # keep removing whitespace until we are done or have no more 6612 my $candidate; 6613 foreach $candidate (@candidates) { 6614 my ( $i, $available_spaces ) = @{$candidate}; 6615 my $deleted_spaces = 6616 ( $available_spaces > $spaces_needed ) 6617 ? $spaces_needed 6618 : $available_spaces; 6619 6620 # remove the incremental space from this item 6621 $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces); 6622 6623 my $i_debug = $i; 6624 6625 # update the leading whitespace of this item and all items 6626 # that came after it 6627 for ( ; $i <= $max_gnu_item_index ; $i++ ) { 6628 6629 my $old_spaces = $gnu_item_list[$i]->get_SPACES(); 6630 if ( $old_spaces > $deleted_spaces ) { 6631 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces); 6632 } 6633 6634 # shouldn't happen except for code bug: 6635 else { 6636 my $level = $gnu_item_list[$i_debug]->get_LEVEL(); 6637 my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL(); 6638 my $old_level = $gnu_item_list[$i]->get_LEVEL(); 6639 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL(); 6640 warning( 6641"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n" 6642 ); 6643 report_definite_bug(); 6644 } 6645 } 6646 $gnu_position_predictor -= $deleted_spaces; 6647 $spaces_needed -= $deleted_spaces; 6648 last unless ( $spaces_needed > 0 ); 6649 } 6650} 6651 6652sub finish_lp_batch { 6653 6654 # This routine is called once after each each output stream batch is 6655 # finished to undo indentation for all incomplete -lp 6656 # indentation levels. It is too risky to leave a level open, 6657 # because then we can't backtrack in case of a long line to follow. 6658 # This means that comments and blank lines will disrupt this 6659 # indentation style. But the vertical aligner may be able to 6660 # get the space back if there are side comments. 6661 6662 # this is only for the 'lp' style 6663 return unless ($rOpts_line_up_parentheses); 6664 6665 # nothing can be done if no stack items defined for this line 6666 return if ( $max_gnu_item_index == UNDEFINED_INDEX ); 6667 6668 # loop over all whitespace items created for the current batch 6669 my $i; 6670 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { 6671 my $item = $gnu_item_list[$i]; 6672 6673 # only look for open items 6674 next if ( $item->get_CLOSED() >= 0 ); 6675 6676 # Tentatively remove all of the available space 6677 # (The vertical aligner will try to get it back later) 6678 my $available_spaces = $item->get_AVAILABLE_SPACES(); 6679 if ( $available_spaces > 0 ) { 6680 6681 # delete incremental space for this item 6682 $gnu_item_list[$i] 6683 ->tentatively_decrease_AVAILABLE_SPACES($available_spaces); 6684 6685 # Reduce the total indentation space of any nodes that follow 6686 # Note that any such nodes must necessarily be dependents 6687 # of this node. 6688 foreach ( $i + 1 .. $max_gnu_item_index ) { 6689 $gnu_item_list[$_]->decrease_SPACES($available_spaces); 6690 } 6691 } 6692 } 6693 return; 6694} 6695 6696sub reduce_lp_indentation { 6697 6698 # reduce the leading whitespace at token $i if possible by $spaces_needed 6699 # (a large value of $spaces_needed will remove all excess space) 6700 # NOTE: to be called from scan_list only for a sequence of tokens 6701 # contained between opening and closing parens/braces/brackets 6702 6703 my ( $i, $spaces_wanted ) = @_; 6704 my $deleted_spaces = 0; 6705 6706 my $item = $leading_spaces_to_go[$i]; 6707 my $available_spaces = $item->get_AVAILABLE_SPACES(); 6708 6709 if ( 6710 $available_spaces > 0 6711 && ( ( $spaces_wanted <= $available_spaces ) 6712 || !$item->get_HAVE_CHILD() ) 6713 ) 6714 { 6715 6716 # we'll remove these spaces, but mark them as recoverable 6717 $deleted_spaces = 6718 $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted); 6719 } 6720 6721 return $deleted_spaces; 6722} 6723 6724sub token_sequence_length { 6725 6726 # return length of tokens ($ifirst .. $ilast) including first & last 6727 # returns 0 if $ifirst > $ilast 6728 my $ifirst = shift; 6729 my $ilast = shift; 6730 return 0 if ( $ilast < 0 || $ifirst > $ilast ); 6731 return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 ); 6732 return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst]; 6733} 6734 6735sub total_line_length { 6736 6737 # return length of a line of tokens ($ifirst .. $ilast) 6738 my $ifirst = shift; 6739 my $ilast = shift; 6740 if ( $ifirst < 0 ) { $ifirst = 0 } 6741 6742 return leading_spaces_to_go($ifirst) + 6743 token_sequence_length( $ifirst, $ilast ); 6744} 6745 6746sub excess_line_length { 6747 6748 # return number of characters by which a line of tokens ($ifirst..$ilast) 6749 # exceeds the allowable line length. 6750 my $ifirst = shift; 6751 my $ilast = shift; 6752 if ( $ifirst < 0 ) { $ifirst = 0 } 6753 return leading_spaces_to_go($ifirst) + 6754 token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length; 6755} 6756 6757sub finish_formatting { 6758 6759 # flush buffer and write any informative messages 6760 my $self = shift; 6761 6762 flush(); 6763 $file_writer_object->decrement_output_line_number() 6764 ; # fix up line number since it was incremented 6765 we_are_at_the_last_line(); 6766 if ( $added_semicolon_count > 0 ) { 6767 my $first = ( $added_semicolon_count > 1 ) ? "First" : ""; 6768 my $what = 6769 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; 6770 write_logfile_entry("$added_semicolon_count $what added:\n"); 6771 write_logfile_entry( 6772 " $first at input line $first_added_semicolon_at\n"); 6773 6774 if ( $added_semicolon_count > 1 ) { 6775 write_logfile_entry( 6776 " Last at input line $last_added_semicolon_at\n"); 6777 } 6778 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n"); 6779 write_logfile_entry("\n"); 6780 } 6781 6782 if ( $deleted_semicolon_count > 0 ) { 6783 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : ""; 6784 my $what = 6785 ( $deleted_semicolon_count > 1 ) 6786 ? "semicolons were" 6787 : "semicolon was"; 6788 write_logfile_entry( 6789 "$deleted_semicolon_count unnecessary $what deleted:\n"); 6790 write_logfile_entry( 6791 " $first at input line $first_deleted_semicolon_at\n"); 6792 6793 if ( $deleted_semicolon_count > 1 ) { 6794 write_logfile_entry( 6795 " Last at input line $last_deleted_semicolon_at\n"); 6796 } 6797 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n"); 6798 write_logfile_entry("\n"); 6799 } 6800 6801 if ( $embedded_tab_count > 0 ) { 6802 my $first = ( $embedded_tab_count > 1 ) ? "First" : ""; 6803 my $what = 6804 ( $embedded_tab_count > 1 ) 6805 ? "quotes or patterns" 6806 : "quote or pattern"; 6807 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n"); 6808 write_logfile_entry( 6809"This means the display of this script could vary with device or software\n" 6810 ); 6811 write_logfile_entry(" $first at input line $first_embedded_tab_at\n"); 6812 6813 if ( $embedded_tab_count > 1 ) { 6814 write_logfile_entry( 6815 " Last at input line $last_embedded_tab_at\n"); 6816 } 6817 write_logfile_entry("\n"); 6818 } 6819 6820 if ($first_tabbing_disagreement) { 6821 write_logfile_entry( 6822"First indentation disagreement seen at input line $first_tabbing_disagreement\n" 6823 ); 6824 } 6825 6826 if ($in_tabbing_disagreement) { 6827 write_logfile_entry( 6828"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n" 6829 ); 6830 } 6831 else { 6832 6833 if ($last_tabbing_disagreement) { 6834 6835 write_logfile_entry( 6836"Last indentation disagreement seen at input line $last_tabbing_disagreement\n" 6837 ); 6838 } 6839 else { 6840 write_logfile_entry("No indentation disagreement seen\n"); 6841 } 6842 } 6843 write_logfile_entry("\n"); 6844 6845 $vertical_aligner_object->report_anything_unusual(); 6846 6847 $file_writer_object->report_line_length_errors(); 6848} 6849 6850sub check_options { 6851 6852 # This routine is called to check the Opts hash after it is defined 6853 6854 ($rOpts) = @_; 6855 my ( $tabbing_string, $tab_msg ); 6856 6857 make_static_block_comment_pattern(); 6858 make_static_side_comment_pattern(); 6859 make_closing_side_comment_prefix(); 6860 make_closing_side_comment_list_pattern(); 6861 $format_skipping_pattern_begin = 6862 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' ); 6863 $format_skipping_pattern_end = 6864 make_format_skipping_pattern( 'format-skipping-end', '#>>>' ); 6865 6866 # If closing side comments ARE selected, then we can safely 6867 # delete old closing side comments unless closing side comment 6868 # warnings are requested. This is a good idea because it will 6869 # eliminate any old csc's which fall below the line count threshold. 6870 # We cannot do this if warnings are turned on, though, because we 6871 # might delete some text which has been added. So that must 6872 # be handled when comments are created. 6873 if ( $rOpts->{'closing-side-comments'} ) { 6874 if ( !$rOpts->{'closing-side-comment-warnings'} ) { 6875 $rOpts->{'delete-closing-side-comments'} = 1; 6876 } 6877 } 6878 6879 # If closing side comments ARE NOT selected, but warnings ARE 6880 # selected and we ARE DELETING csc's, then we will pretend to be 6881 # adding with a huge interval. This will force the comments to be 6882 # generated for comparison with the old comments, but not added. 6883 elsif ( $rOpts->{'closing-side-comment-warnings'} ) { 6884 if ( $rOpts->{'delete-closing-side-comments'} ) { 6885 $rOpts->{'delete-closing-side-comments'} = 0; 6886 $rOpts->{'closing-side-comments'} = 1; 6887 $rOpts->{'closing-side-comment-interval'} = 100000000; 6888 } 6889 } 6890 6891 make_bli_pattern(); 6892 make_block_brace_vertical_tightness_pattern(); 6893 6894 if ( $rOpts->{'line-up-parentheses'} ) { 6895 6896 if ( $rOpts->{'indent-only'} 6897 || !$rOpts->{'add-newlines'} 6898 || !$rOpts->{'delete-old-newlines'} ) 6899 { 6900 warn <<EOM; 6901----------------------------------------------------------------------- 6902Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp 6903 6904The -lp indentation logic requires that perltidy be able to coordinate 6905arbitrarily large numbers of line breakpoints. This isn't possible 6906with these flags. Sometimes an acceptable workaround is to use -wocb=3 6907----------------------------------------------------------------------- 6908EOM 6909 $rOpts->{'line-up-parentheses'} = 0; 6910 } 6911 } 6912 6913 # At present, tabs are not compatable with the line-up-parentheses style 6914 # (it would be possible to entab the total leading whitespace 6915 # just prior to writing the line, if desired). 6916 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) { 6917 warn <<EOM; 6918Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et. 6919EOM 6920 $rOpts->{'tabs'} = 0; 6921 } 6922 6923 # Likewise, tabs are not compatable with outdenting.. 6924 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { 6925 warn <<EOM; 6926Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et. 6927EOM 6928 $rOpts->{'tabs'} = 0; 6929 } 6930 6931 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { 6932 warn <<EOM; 6933Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et. 6934EOM 6935 $rOpts->{'tabs'} = 0; 6936 } 6937 6938 if ( !$rOpts->{'space-for-semicolon'} ) { 6939 $want_left_space{'f'} = -1; 6940 } 6941 6942 if ( $rOpts->{'space-terminal-semicolon'} ) { 6943 $want_left_space{';'} = 1; 6944 } 6945 6946 # implement outdenting preferences for keywords 6947 %outdent_keyword = (); 6948 unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) { 6949 @_ = qw(next last redo goto return); # defaults 6950 } 6951 6952 # FUTURE: if not a keyword, assume that it is an identifier 6953 foreach (@_) { 6954 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) { 6955 $outdent_keyword{$_} = 1; 6956 } 6957 else { 6958 warn "ignoring '$_' in -okwl list; not a perl keyword"; 6959 } 6960 } 6961 6962 # implement user whitespace preferences 6963 if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) { 6964 @want_left_space{@_} = (1) x scalar(@_); 6965 } 6966 6967 if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) { 6968 @want_right_space{@_} = (1) x scalar(@_); 6969 } 6970 6971 if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) { 6972 @want_left_space{@_} = (-1) x scalar(@_); 6973 } 6974 6975 if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) { 6976 @want_right_space{@_} = (-1) x scalar(@_); 6977 } 6978 if ( $rOpts->{'dump-want-left-space'} ) { 6979 dump_want_left_space(*STDOUT); 6980 exit 1; 6981 } 6982 6983 if ( $rOpts->{'dump-want-right-space'} ) { 6984 dump_want_right_space(*STDOUT); 6985 exit 1; 6986 } 6987 6988 # default keywords for which space is introduced before an opening paren 6989 # (at present, including them messes up vertical alignment) 6990 @_ = qw(my local our and or err eq ne if else elsif until 6991 unless while for foreach return switch case given when); 6992 @space_after_keyword{@_} = (1) x scalar(@_); 6993 6994 # allow user to modify these defaults 6995 if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) { 6996 @space_after_keyword{@_} = (1) x scalar(@_); 6997 } 6998 6999 if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) { 7000 @space_after_keyword{@_} = (0) x scalar(@_); 7001 } 7002 7003 # implement user break preferences 7004 my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | & 7005 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= 7006 . : ? && || and or err xor 7007 ); 7008 7009 my $break_after = sub { 7010 foreach my $tok (@_) { 7011 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: 7012 my $lbs = $left_bond_strength{$tok}; 7013 my $rbs = $right_bond_strength{$tok}; 7014 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { 7015 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = 7016 ( $lbs, $rbs ); 7017 } 7018 } 7019 }; 7020 7021 my $break_before = sub { 7022 foreach my $tok (@_) { 7023 my $lbs = $left_bond_strength{$tok}; 7024 my $rbs = $right_bond_strength{$tok}; 7025 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { 7026 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = 7027 ( $lbs, $rbs ); 7028 } 7029 } 7030 }; 7031 7032 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); 7033 $break_before->(@all_operators) 7034 if ( $rOpts->{'break-before-all-operators'} ); 7035 7036 $break_after->( split_words( $rOpts->{'want-break-after'} ) ); 7037 $break_before->( split_words( $rOpts->{'want-break-before'} ) ); 7038 7039 # make note if breaks are before certain key types 7040 %want_break_before = (); 7041 foreach my $tok ( @all_operators, ',' ) { 7042 $want_break_before{$tok} = 7043 $left_bond_strength{$tok} < $right_bond_strength{$tok}; 7044 } 7045 7046 # Coordinate ?/: breaks, which must be similar 7047 if ( !$want_break_before{':'} ) { 7048 $want_break_before{'?'} = $want_break_before{':'}; 7049 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; 7050 $left_bond_strength{'?'} = NO_BREAK; 7051 } 7052 7053 # Define here tokens which may follow the closing brace of a do statement 7054 # on the same line, as in: 7055 # } while ( $something); 7056 @_ = qw(until while unless if ; : ); 7057 push @_, ','; 7058 @is_do_follower{@_} = (1) x scalar(@_); 7059 7060 # These tokens may follow the closing brace of an if or elsif block. 7061 # In other words, for cuddled else we want code to look like: 7062 # } elsif ( $something) { 7063 # } else { 7064 if ( $rOpts->{'cuddled-else'} ) { 7065 @_ = qw(else elsif); 7066 @is_if_brace_follower{@_} = (1) x scalar(@_); 7067 } 7068 else { 7069 %is_if_brace_follower = (); 7070 } 7071 7072 # nothing can follow the closing curly of an else { } block: 7073 %is_else_brace_follower = (); 7074 7075 # what can follow a multi-line anonymous sub definition closing curly: 7076 @_ = qw# ; : => or and && || ~~ !~~ ) #; 7077 push @_, ','; 7078 @is_anon_sub_brace_follower{@_} = (1) x scalar(@_); 7079 7080 # what can follow a one-line anonynomous sub closing curly: 7081 # one-line anonumous subs also have ']' here... 7082 # see tk3.t and PP.pm 7083 @_ = qw# ; : => or and && || ) ] ~~ !~~ #; 7084 push @_, ','; 7085 @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_); 7086 7087 # What can follow a closing curly of a block 7088 # which is not an if/elsif/else/do/sort/map/grep/eval/sub 7089 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' 7090 @_ = qw# ; : => or and && || ) #; 7091 push @_, ','; 7092 7093 # allow cuddled continue if cuddled else is specified 7094 if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; } 7095 7096 @is_other_brace_follower{@_} = (1) x scalar(@_); 7097 7098 $right_bond_strength{'{'} = WEAK; 7099 $left_bond_strength{'{'} = VERY_STRONG; 7100 7101 # make -l=0 equal to -l=infinite 7102 if ( !$rOpts->{'maximum-line-length'} ) { 7103 $rOpts->{'maximum-line-length'} = 1000000; 7104 } 7105 7106 # make -lbl=0 equal to -lbl=infinite 7107 if ( !$rOpts->{'long-block-line-count'} ) { 7108 $rOpts->{'long-block-line-count'} = 1000000; 7109 } 7110 7111 my $ole = $rOpts->{'output-line-ending'}; 7112 if ($ole) { 7113 my %endings = ( 7114 dos => "\015\012", 7115 win => "\015\012", 7116 mac => "\015", 7117 unix => "\012", 7118 ); 7119 $ole = lc $ole; 7120 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { 7121 my $str = join " ", keys %endings; 7122 die <<EOM; 7123Unrecognized line ending '$ole'; expecting one of: $str 7124EOM 7125 } 7126 if ( $rOpts->{'preserve-line-endings'} ) { 7127 warn "Ignoring -ple; conflicts with -ole\n"; 7128 $rOpts->{'preserve-line-endings'} = undef; 7129 } 7130 } 7131 7132 # hashes used to simplify setting whitespace 7133 %tightness = ( 7134 '{' => $rOpts->{'brace-tightness'}, 7135 '}' => $rOpts->{'brace-tightness'}, 7136 '(' => $rOpts->{'paren-tightness'}, 7137 ')' => $rOpts->{'paren-tightness'}, 7138 '[' => $rOpts->{'square-bracket-tightness'}, 7139 ']' => $rOpts->{'square-bracket-tightness'}, 7140 ); 7141 %matching_token = ( 7142 '{' => '}', 7143 '(' => ')', 7144 '[' => ']', 7145 '?' => ':', 7146 ); 7147 7148 # frequently used parameters 7149 $rOpts_add_newlines = $rOpts->{'add-newlines'}; 7150 $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; 7151 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; 7152 $rOpts_block_brace_vertical_tightness = 7153 $rOpts->{'block-brace-vertical-tightness'}; 7154 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'}; 7155 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; 7156 $rOpts_break_at_old_ternary_breakpoints = 7157 $rOpts->{'break-at-old-ternary-breakpoints'}; 7158 $rOpts_break_at_old_comma_breakpoints = 7159 $rOpts->{'break-at-old-comma-breakpoints'}; 7160 $rOpts_break_at_old_keyword_breakpoints = 7161 $rOpts->{'break-at-old-keyword-breakpoints'}; 7162 $rOpts_break_at_old_logical_breakpoints = 7163 $rOpts->{'break-at-old-logical-breakpoints'}; 7164 $rOpts_closing_side_comment_else_flag = 7165 $rOpts->{'closing-side-comment-else-flag'}; 7166 $rOpts_closing_side_comment_maximum_text = 7167 $rOpts->{'closing-side-comment-maximum-text'}; 7168 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; 7169 $rOpts_cuddled_else = $rOpts->{'cuddled-else'}; 7170 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; 7171 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; 7172 $rOpts_indent_columns = $rOpts->{'indent-columns'}; 7173 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; 7174 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; 7175 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; 7176 $rOpts_short_concatenation_item_length = 7177 $rOpts->{'short-concatenation-item-length'}; 7178 $rOpts_swallow_optional_blank_lines = 7179 $rOpts->{'swallow-optional-blank-lines'}; 7180 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; 7181 $rOpts_format_skipping = $rOpts->{'format-skipping'}; 7182 $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; 7183 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; 7184 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; 7185 $half_maximum_line_length = $rOpts_maximum_line_length / 2; 7186 7187 # Note that both opening and closing tokens can access the opening 7188 # and closing flags of their container types. 7189 %opening_vertical_tightness = ( 7190 '(' => $rOpts->{'paren-vertical-tightness'}, 7191 '{' => $rOpts->{'brace-vertical-tightness'}, 7192 '[' => $rOpts->{'square-bracket-vertical-tightness'}, 7193 ')' => $rOpts->{'paren-vertical-tightness'}, 7194 '}' => $rOpts->{'brace-vertical-tightness'}, 7195 ']' => $rOpts->{'square-bracket-vertical-tightness'}, 7196 ); 7197 7198 %closing_vertical_tightness = ( 7199 '(' => $rOpts->{'paren-vertical-tightness-closing'}, 7200 '{' => $rOpts->{'brace-vertical-tightness-closing'}, 7201 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, 7202 ')' => $rOpts->{'paren-vertical-tightness-closing'}, 7203 '}' => $rOpts->{'brace-vertical-tightness-closing'}, 7204 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, 7205 ); 7206 7207 # assume flag for '>' same as ')' for closing qw quotes 7208 %closing_token_indentation = ( 7209 ')' => $rOpts->{'closing-paren-indentation'}, 7210 '}' => $rOpts->{'closing-brace-indentation'}, 7211 ']' => $rOpts->{'closing-square-bracket-indentation'}, 7212 '>' => $rOpts->{'closing-paren-indentation'}, 7213 ); 7214 7215 %opening_token_right = ( 7216 '(' => $rOpts->{'opening-paren-right'}, 7217 '{' => $rOpts->{'opening-hash-brace-right'}, 7218 '[' => $rOpts->{'opening-square-bracket-right'}, 7219 ); 7220 7221 %stack_opening_token = ( 7222 '(' => $rOpts->{'stack-opening-paren'}, 7223 '{' => $rOpts->{'stack-opening-hash-brace'}, 7224 '[' => $rOpts->{'stack-opening-square-bracket'}, 7225 ); 7226 7227 %stack_closing_token = ( 7228 ')' => $rOpts->{'stack-closing-paren'}, 7229 '}' => $rOpts->{'stack-closing-hash-brace'}, 7230 ']' => $rOpts->{'stack-closing-square-bracket'}, 7231 ); 7232} 7233 7234sub make_static_block_comment_pattern { 7235 7236 # create the pattern used to identify static block comments 7237 $static_block_comment_pattern = '^\s*##'; 7238 7239 # allow the user to change it 7240 if ( $rOpts->{'static-block-comment-prefix'} ) { 7241 my $prefix = $rOpts->{'static-block-comment-prefix'}; 7242 $prefix =~ s/^\s*//; 7243 my $pattern = $prefix; 7244 7245 # user may give leading caret to force matching left comments only 7246 if ( $prefix !~ /^\^#/ ) { 7247 if ( $prefix !~ /^#/ ) { 7248 die 7249"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"; 7250 } 7251 $pattern = '^\s*' . $prefix; 7252 } 7253 eval "'##'=~/$pattern/"; 7254 if ($@) { 7255 die 7256"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"; 7257 } 7258 $static_block_comment_pattern = $pattern; 7259 } 7260} 7261 7262sub make_format_skipping_pattern { 7263 my ( $opt_name, $default ) = @_; 7264 my $param = $rOpts->{$opt_name}; 7265 unless ($param) { $param = $default } 7266 $param =~ s/^\s*//; 7267 if ( $param !~ /^#/ ) { 7268 die "ERROR: the $opt_name parameter '$param' must begin with '#'\n"; 7269 } 7270 my $pattern = '^' . $param . '\s'; 7271 eval "'#'=~/$pattern/"; 7272 if ($@) { 7273 die 7274"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"; 7275 } 7276 return $pattern; 7277} 7278 7279sub make_closing_side_comment_list_pattern { 7280 7281 # turn any input list into a regex for recognizing selected block types 7282 $closing_side_comment_list_pattern = '^\w+'; 7283 if ( defined( $rOpts->{'closing-side-comment-list'} ) 7284 && $rOpts->{'closing-side-comment-list'} ) 7285 { 7286 $closing_side_comment_list_pattern = 7287 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} ); 7288 } 7289} 7290 7291sub make_bli_pattern { 7292 7293 if ( defined( $rOpts->{'brace-left-and-indent-list'} ) 7294 && $rOpts->{'brace-left-and-indent-list'} ) 7295 { 7296 $bli_list_string = $rOpts->{'brace-left-and-indent-list'}; 7297 } 7298 7299 $bli_pattern = make_block_pattern( '-blil', $bli_list_string ); 7300} 7301 7302sub make_block_brace_vertical_tightness_pattern { 7303 7304 # turn any input list into a regex for recognizing selected block types 7305 $block_brace_vertical_tightness_pattern = 7306 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; 7307 7308 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} ) 7309 && $rOpts->{'block-brace-vertical-tightness-list'} ) 7310 { 7311 $block_brace_vertical_tightness_pattern = 7312 make_block_pattern( '-bbvtl', 7313 $rOpts->{'block-brace-vertical-tightness-list'} ); 7314 } 7315} 7316 7317sub make_block_pattern { 7318 7319 # given a string of block-type keywords, return a regex to match them 7320 # The only tricky part is that labels are indicated with a single ':' 7321 # and the 'sub' token text may have additional text after it (name of 7322 # sub). 7323 # 7324 # Example: 7325 # 7326 # input string: "if else elsif unless while for foreach do : sub"; 7327 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; 7328 7329 my ( $abbrev, $string ) = @_; 7330 my @list = split_words($string); 7331 my @words = (); 7332 my %seen; 7333 for my $i (@list) { 7334 next if $seen{$i}; 7335 $seen{$i} = 1; 7336 if ( $i eq 'sub' ) { 7337 } 7338 elsif ( $i eq ':' ) { 7339 push @words, '\w+:'; 7340 } 7341 elsif ( $i =~ /^\w/ ) { 7342 push @words, $i; 7343 } 7344 else { 7345 warn "unrecognized block type $i after $abbrev, ignoring\n"; 7346 } 7347 } 7348 my $pattern = '(' . join( '|', @words ) . ')$'; 7349 if ( $seen{'sub'} ) { 7350 $pattern = '(' . $pattern . '|sub)'; 7351 } 7352 $pattern = '^' . $pattern; 7353 return $pattern; 7354} 7355 7356sub make_static_side_comment_pattern { 7357 7358 # create the pattern used to identify static side comments 7359 $static_side_comment_pattern = '^##'; 7360 7361 # allow the user to change it 7362 if ( $rOpts->{'static-side-comment-prefix'} ) { 7363 my $prefix = $rOpts->{'static-side-comment-prefix'}; 7364 $prefix =~ s/^\s*//; 7365 my $pattern = '^' . $prefix; 7366 eval "'##'=~/$pattern/"; 7367 if ($@) { 7368 die 7369"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"; 7370 } 7371 $static_side_comment_pattern = $pattern; 7372 } 7373} 7374 7375sub make_closing_side_comment_prefix { 7376 7377 # Be sure we have a valid closing side comment prefix 7378 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'}; 7379 my $csc_prefix_pattern; 7380 if ( !defined($csc_prefix) ) { 7381 $csc_prefix = '## end'; 7382 $csc_prefix_pattern = '^##\s+end'; 7383 } 7384 else { 7385 my $test_csc_prefix = $csc_prefix; 7386 if ( $test_csc_prefix !~ /^#/ ) { 7387 $test_csc_prefix = '#' . $test_csc_prefix; 7388 } 7389 7390 # make a regex to recognize the prefix 7391 my $test_csc_prefix_pattern = $test_csc_prefix; 7392 7393 # escape any special characters 7394 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g; 7395 7396 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern; 7397 7398 # allow exact number of intermediate spaces to vary 7399 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g; 7400 7401 # make sure we have a good pattern 7402 # if we fail this we probably have an error in escaping 7403 # characters. 7404 eval "'##'=~/$test_csc_prefix_pattern/"; 7405 if ($@) { 7406 7407 # shouldn't happen..must have screwed up escaping, above 7408 report_definite_bug(); 7409 warn 7410"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"; 7411 7412 # just warn and keep going with defaults 7413 warn "Please consider using a simpler -cscp prefix\n"; 7414 warn "Using default -cscp instead; please check output\n"; 7415 } 7416 else { 7417 $csc_prefix = $test_csc_prefix; 7418 $csc_prefix_pattern = $test_csc_prefix_pattern; 7419 } 7420 } 7421 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix; 7422 $closing_side_comment_prefix_pattern = $csc_prefix_pattern; 7423} 7424 7425sub dump_want_left_space { 7426 my $fh = shift; 7427 local $" = "\n"; 7428 print $fh <<EOM; 7429These values are the main control of whitespace to the left of a token type; 7430They may be altered with the -wls parameter. 7431For a list of token types, use perltidy --dump-token-types (-dtt) 7432 1 means the token wants a space to its left 7433-1 means the token does not want a space to its left 7434------------------------------------------------------------------------ 7435EOM 7436 foreach ( sort keys %want_left_space ) { 7437 print $fh "$_\t$want_left_space{$_}\n"; 7438 } 7439} 7440 7441sub dump_want_right_space { 7442 my $fh = shift; 7443 local $" = "\n"; 7444 print $fh <<EOM; 7445These values are the main control of whitespace to the right of a token type; 7446They may be altered with the -wrs parameter. 7447For a list of token types, use perltidy --dump-token-types (-dtt) 7448 1 means the token wants a space to its right 7449-1 means the token does not want a space to its right 7450------------------------------------------------------------------------ 7451EOM 7452 foreach ( sort keys %want_right_space ) { 7453 print $fh "$_\t$want_right_space{$_}\n"; 7454 } 7455} 7456 7457{ # begin is_essential_whitespace 7458 7459 my %is_sort_grep_map; 7460 my %is_for_foreach; 7461 7462 BEGIN { 7463 7464 @_ = qw(sort grep map); 7465 @is_sort_grep_map{@_} = (1) x scalar(@_); 7466 7467 @_ = qw(for foreach); 7468 @is_for_foreach{@_} = (1) x scalar(@_); 7469 7470 } 7471 7472 sub is_essential_whitespace { 7473 7474 # Essential whitespace means whitespace which cannot be safely deleted 7475 # without risking the introduction of a syntax error. 7476 # We are given three tokens and their types: 7477 # ($tokenl, $typel) is the token to the left of the space in question 7478 # ($tokenr, $typer) is the token to the right of the space in question 7479 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl 7480 # 7481 # This is a slow routine but is not needed too often except when -mangle 7482 # is used. 7483 # 7484 # Note: This routine should almost never need to be changed. It is 7485 # for avoiding syntax problems rather than for formatting. 7486 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_; 7487 7488 my $result = 7489 7490 # never combine two bare words or numbers 7491 # examples: and ::ok(1) 7492 # return ::spw(...) 7493 # for bla::bla:: abc 7494 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl 7495 # $input eq"quit" to make $inputeq"quit" 7496 # my $size=-s::SINK if $file; <==OK but we won't do it 7497 # don't join something like: for bla::bla:: abc 7498 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl 7499 ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) ) 7500 7501 # do not combine a number with a concatination dot 7502 # example: pom.caputo: 7503 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n"); 7504 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) ) 7505 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) ) 7506 7507 # do not join a minus with a bare word, because you might form 7508 # a file test operator. Example from Complex.pm: 7509 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test. 7510 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) ) 7511 7512 # and something like this could become ambiguous without space 7513 # after the '-': 7514 # use constant III=>1; 7515 # $a = $b - III; 7516 # and even this: 7517 # $a = - III; 7518 || ( ( $tokenl eq '-' ) 7519 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) ) 7520 7521 # '= -' should not become =- or you will get a warning 7522 # about reversed -= 7523 # || ($tokenr eq '-') 7524 7525 # keep a space between a quote and a bareword to prevent the 7526 # bareword from becomming a quote modifier. 7527 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) 7528 7529 # keep a space between a token ending in '$' and any word; 7530 # this caused trouble: "die @$ if $@" 7531 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ ) 7532 && ( $tokenr =~ /^[a-zA-Z_]/ ) ) 7533 7534 # perl is very fussy about spaces before << 7535 || ( $tokenr =~ /^\<\</ ) 7536 7537 # avoid combining tokens to create new meanings. Example: 7538 # $a+ +$b must not become $a++$b 7539 || ( $is_digraph{ $tokenl . $tokenr } ) 7540 || ( $is_trigraph{ $tokenl . $tokenr } ) 7541 7542 # another example: do not combine these two &'s: 7543 # allow_options & &OPT_EXECCGI 7544 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } ) 7545 7546 # don't combine $$ or $# with any alphanumeric 7547 # (testfile mangle.t with --mangle) 7548 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) ) 7549 7550 # retain any space after possible filehandle 7551 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle) 7552 || ( $typel eq 'Z' ) 7553 7554 # Perl is sensitive to whitespace after the + here: 7555 # $b = xvals $a + 0.1 * yvals $a; 7556 || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ ) 7557 7558 # keep paren separate in 'use Foo::Bar ()' 7559 || ( $tokenr eq '(' 7560 && $typel eq 'w' 7561 && $typell eq 'k' 7562 && $tokenll eq 'use' ) 7563 7564 # keep any space between filehandle and paren: 7565 # file mangle.t with --mangle: 7566 || ( $typel eq 'Y' && $tokenr eq '(' ) 7567 7568 # retain any space after here doc operator ( hereerr.t) 7569 || ( $typel eq 'h' ) 7570 7571 # be careful with a space around ++ and --, to avoid ambiguity as to 7572 # which token it applies 7573 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) ) 7574 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) ) 7575 7576 # need space after foreach my; for example, this will fail in 7577 # older versions of Perl: 7578 # foreach my$ft(@filetypes)... 7579 || ( 7580 $tokenl eq 'my' 7581 7582 # /^(for|foreach)$/ 7583 && $is_for_foreach{$tokenll} 7584 && $tokenr =~ /^\$/ 7585 ) 7586 7587 # must have space between grep and left paren; "grep(" will fail 7588 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} ) 7589 7590 # don't stick numbers next to left parens, as in: 7591 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm) 7592 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) ) 7593 7594 # We must be sure that a space between a ? and a quoted string 7595 # remains if the space before the ? remains. [Loca.pm, lockarea] 7596 # ie, 7597 # $b=join $comma ? ',' : ':', @_; # ok 7598 # $b=join $comma?',' : ':', @_; # ok! 7599 # $b=join $comma ?',' : ':', @_; # error! 7600 # Not really required: 7601 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) ) 7602 7603 # do not remove space between an '&' and a bare word because 7604 # it may turn into a function evaluation, like here 7605 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm] 7606 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY); 7607 || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) 7608 7609 ; # the value of this long logic sequence is the result we want 7610 return $result; 7611 } 7612} 7613 7614sub set_white_space_flag { 7615 7616 # This routine examines each pair of nonblank tokens and 7617 # sets values for array @white_space_flag. 7618 # 7619 # $white_space_flag[$j] is a flag indicating whether a white space 7620 # BEFORE token $j is needed, with the following values: 7621 # 7622 # -1 do not want a space before token $j 7623 # 0 optional space or $j is a whitespace 7624 # 1 want a space before token $j 7625 # 7626 # 7627 # The values for the first token will be defined based 7628 # upon the contents of the "to_go" output array. 7629 # 7630 # Note: retain debug print statements because they are usually 7631 # required after adding new token types. 7632 7633 BEGIN { 7634 7635 # initialize these global hashes, which control the use of 7636 # whitespace around tokens: 7637 # 7638 # %binary_ws_rules 7639 # %want_left_space 7640 # %want_right_space 7641 # %space_after_keyword 7642 # 7643 # Many token types are identical to the tokens themselves. 7644 # See the tokenizer for a complete list. Here are some special types: 7645 # k = perl keyword 7646 # f = semicolon in for statement 7647 # m = unary minus 7648 # p = unary plus 7649 # Note that :: is excluded since it should be contained in an identifier 7650 # Note that '->' is excluded because it never gets space 7651 # parentheses and brackets are excluded since they are handled specially 7652 # curly braces are included but may be overridden by logic, such as 7653 # newline logic. 7654 7655 # NEW_TOKENS: create a whitespace rule here. This can be as 7656 # simple as adding your new letter to @spaces_both_sides, for 7657 # example. 7658 7659 @_ = qw" L { ( [ "; 7660 @is_opening_type{@_} = (1) x scalar(@_); 7661 7662 @_ = qw" R } ) ] "; 7663 @is_closing_type{@_} = (1) x scalar(@_); 7664 7665 my @spaces_both_sides = qw" 7666 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= 7667 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ 7668 &&= ||= //= <=> A k f w F n C Y U G v 7669 "; 7670 7671 my @spaces_left_side = qw" 7672 t ! ~ m p { \ h pp mm Z j 7673 "; 7674 push( @spaces_left_side, '#' ); # avoids warning message 7675 7676 my @spaces_right_side = qw" 7677 ; } ) ] R J ++ -- **= 7678 "; 7679 push( @spaces_right_side, ',' ); # avoids warning message 7680 @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); 7681 @want_right_space{@spaces_both_sides} = 7682 (1) x scalar(@spaces_both_sides); 7683 @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side); 7684 @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side); 7685 @want_left_space{@spaces_right_side} = 7686 (-1) x scalar(@spaces_right_side); 7687 @want_right_space{@spaces_right_side} = 7688 (1) x scalar(@spaces_right_side); 7689 $want_left_space{'L'} = WS_NO; 7690 $want_left_space{'->'} = WS_NO; 7691 $want_right_space{'->'} = WS_NO; 7692 $want_left_space{'**'} = WS_NO; 7693 $want_right_space{'**'} = WS_NO; 7694 7695 # hash type information must stay tightly bound 7696 # as in : ${xxxx} 7697 $binary_ws_rules{'i'}{'L'} = WS_NO; 7698 $binary_ws_rules{'i'}{'{'} = WS_YES; 7699 $binary_ws_rules{'k'}{'{'} = WS_YES; 7700 $binary_ws_rules{'U'}{'{'} = WS_YES; 7701 $binary_ws_rules{'i'}{'['} = WS_NO; 7702 $binary_ws_rules{'R'}{'L'} = WS_NO; 7703 $binary_ws_rules{'R'}{'{'} = WS_NO; 7704 $binary_ws_rules{'t'}{'L'} = WS_NO; 7705 $binary_ws_rules{'t'}{'{'} = WS_NO; 7706 $binary_ws_rules{'}'}{'L'} = WS_NO; 7707 $binary_ws_rules{'}'}{'{'} = WS_NO; 7708 $binary_ws_rules{'$'}{'L'} = WS_NO; 7709 $binary_ws_rules{'$'}{'{'} = WS_NO; 7710 $binary_ws_rules{'@'}{'L'} = WS_NO; 7711 $binary_ws_rules{'@'}{'{'} = WS_NO; 7712 $binary_ws_rules{'='}{'L'} = WS_YES; 7713 7714 # the following includes ') {' 7715 # as in : if ( xxx ) { yyy } 7716 $binary_ws_rules{']'}{'L'} = WS_NO; 7717 $binary_ws_rules{']'}{'{'} = WS_NO; 7718 $binary_ws_rules{')'}{'{'} = WS_YES; 7719 $binary_ws_rules{')'}{'['} = WS_NO; 7720 $binary_ws_rules{']'}{'['} = WS_NO; 7721 $binary_ws_rules{']'}{'{'} = WS_NO; 7722 $binary_ws_rules{'}'}{'['} = WS_NO; 7723 $binary_ws_rules{'R'}{'['} = WS_NO; 7724 7725 $binary_ws_rules{']'}{'++'} = WS_NO; 7726 $binary_ws_rules{']'}{'--'} = WS_NO; 7727 $binary_ws_rules{')'}{'++'} = WS_NO; 7728 $binary_ws_rules{')'}{'--'} = WS_NO; 7729 7730 $binary_ws_rules{'R'}{'++'} = WS_NO; 7731 $binary_ws_rules{'R'}{'--'} = WS_NO; 7732 7733 ######################################################## 7734 # should no longer be necessary (see niek.pl) 7735 ##$binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label 7736 ##$binary_ws_rules{'w'}{':'} = WS_NO; 7737 ######################################################## 7738 $binary_ws_rules{'i'}{'Q'} = WS_YES; 7739 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' 7740 7741 # FIXME: we need to split 'i' into variables and functions 7742 # and have no space for functions but space for variables. For now, 7743 # I have a special patch in the special rules below 7744 $binary_ws_rules{'i'}{'('} = WS_NO; 7745 7746 $binary_ws_rules{'w'}{'('} = WS_NO; 7747 $binary_ws_rules{'w'}{'{'} = WS_YES; 7748 } 7749 my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_; 7750 my ( $last_token, $last_type, $last_block_type, $token, $type, 7751 $block_type ); 7752 my (@white_space_flag); 7753 my $j_tight_closing_paren = -1; 7754 7755 if ( $max_index_to_go >= 0 ) { 7756 $token = $tokens_to_go[$max_index_to_go]; 7757 $type = $types_to_go[$max_index_to_go]; 7758 $block_type = $block_type_to_go[$max_index_to_go]; 7759 } 7760 else { 7761 $token = ' '; 7762 $type = 'b'; 7763 $block_type = ''; 7764 } 7765 7766 # loop over all tokens 7767 my ( $j, $ws ); 7768 7769 for ( $j = 0 ; $j <= $jmax ; $j++ ) { 7770 7771 if ( $$rtoken_type[$j] eq 'b' ) { 7772 $white_space_flag[$j] = WS_OPTIONAL; 7773 next; 7774 } 7775 7776 # set a default value, to be changed as needed 7777 $ws = undef; 7778 $last_token = $token; 7779 $last_type = $type; 7780 $last_block_type = $block_type; 7781 $token = $$rtokens[$j]; 7782 $type = $$rtoken_type[$j]; 7783 $block_type = $$rblock_type[$j]; 7784 7785 #--------------------------------------------------------------- 7786 # section 1: 7787 # handle space on the inside of opening braces 7788 #--------------------------------------------------------------- 7789 7790 # /^[L\{\(\[]$/ 7791 if ( $is_opening_type{$last_type} ) { 7792 7793 $j_tight_closing_paren = -1; 7794 7795 # let's keep empty matched braces together: () {} [] 7796 # except for BLOCKS 7797 if ( $token eq $matching_token{$last_token} ) { 7798 if ($block_type) { 7799 $ws = WS_YES; 7800 } 7801 else { 7802 $ws = WS_NO; 7803 } 7804 } 7805 else { 7806 7807 # we're considering the right of an opening brace 7808 # tightness = 0 means always pad inside with space 7809 # tightness = 1 means pad inside if "complex" 7810 # tightness = 2 means never pad inside with space 7811 7812 my $tightness; 7813 if ( $last_type eq '{' 7814 && $last_token eq '{' 7815 && $last_block_type ) 7816 { 7817 $tightness = $rOpts_block_brace_tightness; 7818 } 7819 else { $tightness = $tightness{$last_token} } 7820 7821 if ( $tightness <= 0 ) { 7822 $ws = WS_YES; 7823 } 7824 elsif ( $tightness > 1 ) { 7825 $ws = WS_NO; 7826 } 7827 else { 7828 7829 # Patch to count '-foo' as single token so that 7830 # each of $a{-foo} and $a{foo} and $a{'foo'} do 7831 # not get spaces with default formatting. 7832 my $j_here = $j; 7833 ++$j_here 7834 if ( $token eq '-' 7835 && $last_token eq '{' 7836 && $$rtoken_type[ $j + 1 ] eq 'w' ); 7837 7838 # $j_next is where a closing token should be if 7839 # the container has a single token 7840 my $j_next = 7841 ( $$rtoken_type[ $j_here + 1 ] eq 'b' ) 7842 ? $j_here + 2 7843 : $j_here + 1; 7844 my $tok_next = $$rtokens[$j_next]; 7845 my $type_next = $$rtoken_type[$j_next]; 7846 7847 # for tightness = 1, if there is just one token 7848 # within the matching pair, we will keep it tight 7849 if ( 7850 $tok_next eq $matching_token{$last_token} 7851 7852 # but watch out for this: [ [ ] (misc.t) 7853 && $last_token ne $token 7854 ) 7855 { 7856 7857 # remember where to put the space for the closing paren 7858 $j_tight_closing_paren = $j_next; 7859 $ws = WS_NO; 7860 } 7861 else { 7862 $ws = WS_YES; 7863 } 7864 } 7865 } 7866 } # done with opening braces and brackets 7867 my $ws_1 = $ws 7868 if FORMATTER_DEBUG_FLAG_WHITE; 7869 7870 #--------------------------------------------------------------- 7871 # section 2: 7872 # handle space on inside of closing brace pairs 7873 #--------------------------------------------------------------- 7874 7875 # /[\}\)\]R]/ 7876 if ( $is_closing_type{$type} ) { 7877 7878 if ( $j == $j_tight_closing_paren ) { 7879 7880 $j_tight_closing_paren = -1; 7881 $ws = WS_NO; 7882 } 7883 else { 7884 7885 if ( !defined($ws) ) { 7886 7887 my $tightness; 7888 if ( $type eq '}' && $token eq '}' && $block_type ) { 7889 $tightness = $rOpts_block_brace_tightness; 7890 } 7891 else { $tightness = $tightness{$token} } 7892 7893 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; 7894 } 7895 } 7896 } 7897 7898 my $ws_2 = $ws 7899 if FORMATTER_DEBUG_FLAG_WHITE; 7900 7901 #--------------------------------------------------------------- 7902 # section 3: 7903 # use the binary table 7904 #--------------------------------------------------------------- 7905 if ( !defined($ws) ) { 7906 $ws = $binary_ws_rules{$last_type}{$type}; 7907 } 7908 my $ws_3 = $ws 7909 if FORMATTER_DEBUG_FLAG_WHITE; 7910 7911 #--------------------------------------------------------------- 7912 # section 4: 7913 # some special cases 7914 #--------------------------------------------------------------- 7915 if ( $token eq '(' ) { 7916 7917 # This will have to be tweaked as tokenization changes. 7918 # We usually want a space at '} (', for example: 7919 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); 7920 # 7921 # But not others: 7922 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } ); 7923 # At present, the above & block is marked as type L/R so this case 7924 # won't go through here. 7925 if ( $last_type eq '}' ) { $ws = WS_YES } 7926 7927 # NOTE: some older versions of Perl had occasional problems if 7928 # spaces are introduced between keywords or functions and opening 7929 # parens. So the default is not to do this except is certain 7930 # cases. The current Perl seems to tolerate spaces. 7931 7932 # Space between keyword and '(' 7933 elsif ( $last_type eq 'k' ) { 7934 $ws = WS_NO 7935 unless ( $rOpts_space_keyword_paren 7936 || $space_after_keyword{$last_token} ); 7937 } 7938 7939 # Space between function and '(' 7940 # ----------------------------------------------------- 7941 # 'w' and 'i' checks for something like: 7942 # myfun( &myfun( ->myfun( 7943 # ----------------------------------------------------- 7944 elsif (( $last_type =~ /^[wU]$/ ) 7945 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) ) 7946 { 7947 $ws = WS_NO unless ($rOpts_space_function_paren); 7948 } 7949 7950 # space between something like $i and ( in 7951 # for $i ( 0 .. 20 ) { 7952 # FIXME: eventually, type 'i' needs to be split into multiple 7953 # token types so this can be a hardwired rule. 7954 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { 7955 $ws = WS_YES; 7956 } 7957 7958 # allow constant function followed by '()' to retain no space 7959 elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) { 7960 $ws = WS_NO; 7961 } 7962 } 7963 7964 # patch for SWITCH/CASE: make space at ']{' optional 7965 # since the '{' might begin a case or when block 7966 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) { 7967 $ws = WS_OPTIONAL; 7968 } 7969 7970 # keep space between 'sub' and '{' for anonymous sub definition 7971 if ( $type eq '{' ) { 7972 if ( $last_token eq 'sub' ) { 7973 $ws = WS_YES; 7974 } 7975 7976 # this is needed to avoid no space in '){' 7977 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES } 7978 7979 # avoid any space before the brace or bracket in something like 7980 # @opts{'a','b',...} 7981 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) { 7982 $ws = WS_NO; 7983 } 7984 } 7985 7986 elsif ( $type eq 'i' ) { 7987 7988 # never a space before -> 7989 if ( $token =~ /^\-\>/ ) { 7990 $ws = WS_NO; 7991 } 7992 } 7993 7994 # retain any space between '-' and bare word 7995 elsif ( $type eq 'w' || $type eq 'C' ) { 7996 $ws = WS_OPTIONAL if $last_type eq '-'; 7997 7998 # never a space before -> 7999 if ( $token =~ /^\-\>/ ) { 8000 $ws = WS_NO; 8001 } 8002 } 8003 8004 # retain any space between '-' and bare word 8005 # example: avoid space between 'USER' and '-' here: 8006 # $myhash{USER-NAME}='steve'; 8007 elsif ( $type eq 'm' || $type eq '-' ) { 8008 $ws = WS_OPTIONAL if ( $last_type eq 'w' ); 8009 } 8010 8011 # always space before side comment 8012 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 } 8013 8014 # always preserver whatever space was used after a possible 8015 # filehandle (except _) or here doc operator 8016 if ( 8017 $type ne '#' 8018 && ( ( $last_type eq 'Z' && $last_token ne '_' ) 8019 || $last_type eq 'h' ) 8020 ) 8021 { 8022 $ws = WS_OPTIONAL; 8023 } 8024 8025 my $ws_4 = $ws 8026 if FORMATTER_DEBUG_FLAG_WHITE; 8027 8028 #--------------------------------------------------------------- 8029 # section 5: 8030 # default rules not covered above 8031 #--------------------------------------------------------------- 8032 # if we fall through to here, 8033 # look at the pre-defined hash tables for the two tokens, and 8034 # if (they are equal) use the common value 8035 # if (either is zero or undef) use the other 8036 # if (either is -1) use it 8037 # That is, 8038 # left vs right 8039 # 1 vs 1 --> 1 8040 # 0 vs 0 --> 0 8041 # -1 vs -1 --> -1 8042 # 8043 # 0 vs -1 --> -1 8044 # 0 vs 1 --> 1 8045 # 1 vs 0 --> 1 8046 # -1 vs 0 --> -1 8047 # 8048 # -1 vs 1 --> -1 8049 # 1 vs -1 --> -1 8050 if ( !defined($ws) ) { 8051 my $wl = $want_left_space{$type}; 8052 my $wr = $want_right_space{$last_type}; 8053 if ( !defined($wl) ) { $wl = 0 } 8054 if ( !defined($wr) ) { $wr = 0 } 8055 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; 8056 } 8057 8058 if ( !defined($ws) ) { 8059 $ws = 0; 8060 write_diagnostics( 8061 "WS flag is undefined for tokens $last_token $token\n"); 8062 } 8063 8064 # Treat newline as a whitespace. Otherwise, we might combine 8065 # 'Send' and '-recipients' here according to the above rules: 8066 # my $msg = new Fax::Send 8067 # -recipients => $to, 8068 # -data => $data; 8069 if ( $ws == 0 && $j == 0 ) { $ws = 1 } 8070 8071 if ( ( $ws == 0 ) 8072 && $j > 0 8073 && $j < $jmax 8074 && ( $last_type !~ /^[Zh]$/ ) ) 8075 { 8076 8077 # If this happens, we have a non-fatal but undesirable 8078 # hole in the above rules which should be patched. 8079 write_diagnostics( 8080 "WS flag is zero for tokens $last_token $token\n"); 8081 } 8082 $white_space_flag[$j] = $ws; 8083 8084 FORMATTER_DEBUG_FLAG_WHITE && do { 8085 my $str = substr( $last_token, 0, 15 ); 8086 $str .= ' ' x ( 16 - length($str) ); 8087 if ( !defined($ws_1) ) { $ws_1 = "*" } 8088 if ( !defined($ws_2) ) { $ws_2 = "*" } 8089 if ( !defined($ws_3) ) { $ws_3 = "*" } 8090 if ( !defined($ws_4) ) { $ws_4 = "*" } 8091 print 8092"WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; 8093 }; 8094 } 8095 return \@white_space_flag; 8096} 8097 8098{ # begin print_line_of_tokens 8099 8100 my $rtoken_type; 8101 my $rtokens; 8102 my $rlevels; 8103 my $rslevels; 8104 my $rblock_type; 8105 my $rcontainer_type; 8106 my $rcontainer_environment; 8107 my $rtype_sequence; 8108 my $input_line; 8109 my $rnesting_tokens; 8110 my $rci_levels; 8111 my $rnesting_blocks; 8112 8113 my $in_quote; 8114 my $python_indentation_level; 8115 8116 # These local token variables are stored by store_token_to_go: 8117 my $block_type; 8118 my $ci_level; 8119 my $container_environment; 8120 my $container_type; 8121 my $in_continued_quote; 8122 my $level; 8123 my $nesting_blocks; 8124 my $no_internal_newlines; 8125 my $slevel; 8126 my $token; 8127 my $type; 8128 my $type_sequence; 8129 8130 # routine to pull the jth token from the line of tokens 8131 sub extract_token { 8132 my $j = shift; 8133 $token = $$rtokens[$j]; 8134 $type = $$rtoken_type[$j]; 8135 $block_type = $$rblock_type[$j]; 8136 $container_type = $$rcontainer_type[$j]; 8137 $container_environment = $$rcontainer_environment[$j]; 8138 $type_sequence = $$rtype_sequence[$j]; 8139 $level = $$rlevels[$j]; 8140 $slevel = $$rslevels[$j]; 8141 $nesting_blocks = $$rnesting_blocks[$j]; 8142 $ci_level = $$rci_levels[$j]; 8143 } 8144 8145 { 8146 my @saved_token; 8147 8148 sub save_current_token { 8149 8150 @saved_token = ( 8151 $block_type, $ci_level, 8152 $container_environment, $container_type, 8153 $in_continued_quote, $level, 8154 $nesting_blocks, $no_internal_newlines, 8155 $slevel, $token, 8156 $type, $type_sequence, 8157 ); 8158 } 8159 8160 sub restore_current_token { 8161 ( 8162 $block_type, $ci_level, 8163 $container_environment, $container_type, 8164 $in_continued_quote, $level, 8165 $nesting_blocks, $no_internal_newlines, 8166 $slevel, $token, 8167 $type, $type_sequence, 8168 ) = @saved_token; 8169 } 8170 } 8171 8172 # Routine to place the current token into the output stream. 8173 # Called once per output token. 8174 sub store_token_to_go { 8175 8176 my $flag = $no_internal_newlines; 8177 if ( $_[0] ) { $flag = 1 } 8178 8179 $tokens_to_go[ ++$max_index_to_go ] = $token; 8180 $types_to_go[$max_index_to_go] = $type; 8181 $nobreak_to_go[$max_index_to_go] = $flag; 8182 $old_breakpoint_to_go[$max_index_to_go] = 0; 8183 $forced_breakpoint_to_go[$max_index_to_go] = 0; 8184 $block_type_to_go[$max_index_to_go] = $block_type; 8185 $type_sequence_to_go[$max_index_to_go] = $type_sequence; 8186 $container_environment_to_go[$max_index_to_go] = $container_environment; 8187 $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks; 8188 $ci_levels_to_go[$max_index_to_go] = $ci_level; 8189 $mate_index_to_go[$max_index_to_go] = -1; 8190 $matching_token_to_go[$max_index_to_go] = ''; 8191 $bond_strength_to_go[$max_index_to_go] = 0; 8192 8193 # Note: negative levels are currently retained as a diagnostic so that 8194 # the 'final indentation level' is correctly reported for bad scripts. 8195 # But this means that every use of $level as an index must be checked. 8196 # If this becomes too much of a problem, we might give up and just clip 8197 # them at zero. 8198 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0; 8199 $levels_to_go[$max_index_to_go] = $level; 8200 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0; 8201 $lengths_to_go[ $max_index_to_go + 1 ] = 8202 $lengths_to_go[$max_index_to_go] + length($token); 8203 8204 # Define the indentation that this token would have if it started 8205 # a new line. We have to do this now because we need to know this 8206 # when considering one-line blocks. 8207 set_leading_whitespace( $level, $ci_level, $in_continued_quote ); 8208 8209 if ( $type ne 'b' ) { 8210 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go; 8211 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go; 8212 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go; 8213 $last_nonblank_index_to_go = $max_index_to_go; 8214 $last_nonblank_type_to_go = $type; 8215 $last_nonblank_token_to_go = $token; 8216 if ( $type eq ',' ) { 8217 $comma_count_in_batch++; 8218 } 8219 } 8220 8221 FORMATTER_DEBUG_FLAG_STORE && do { 8222 my ( $a, $b, $c ) = caller(); 8223 print 8224"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n"; 8225 }; 8226 } 8227 8228 sub insert_new_token_to_go { 8229 8230 # insert a new token into the output stream. use same level as 8231 # previous token; assumes a character at max_index_to_go. 8232 save_current_token(); 8233 ( $token, $type, $slevel, $no_internal_newlines ) = @_; 8234 8235 if ( $max_index_to_go == UNDEFINED_INDEX ) { 8236 warning("code bug: bad call to insert_new_token_to_go\n"); 8237 } 8238 $level = $levels_to_go[$max_index_to_go]; 8239 8240 # FIXME: it seems to be necessary to use the next, rather than 8241 # previous, value of this variable when creating a new blank (align.t) 8242 #my $slevel = $nesting_depth_to_go[$max_index_to_go]; 8243 $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; 8244 $ci_level = $ci_levels_to_go[$max_index_to_go]; 8245 $container_environment = $container_environment_to_go[$max_index_to_go]; 8246 $in_continued_quote = 0; 8247 $block_type = ""; 8248 $type_sequence = ""; 8249 store_token_to_go(); 8250 restore_current_token(); 8251 return; 8252 } 8253 8254 sub print_line_of_tokens { 8255 8256 my $line_of_tokens = shift; 8257 8258 # This routine is called once per input line to process all of 8259 # the tokens on that line. This is the first stage of 8260 # beautification. 8261 # 8262 # Full-line comments and blank lines may be processed immediately. 8263 # 8264 # For normal lines of code, the tokens are stored one-by-one, 8265 # via calls to 'sub store_token_to_go', until a known line break 8266 # point is reached. Then, the batch of collected tokens is 8267 # passed along to 'sub output_line_to_go' for further 8268 # processing. This routine decides if there should be 8269 # whitespace between each pair of non-white tokens, so later 8270 # routines only need to decide on any additional line breaks. 8271 # Any whitespace is initally a single space character. Later, 8272 # the vertical aligner may expand that to be multiple space 8273 # characters if necessary for alignment. 8274 8275 # extract input line number for error messages 8276 $input_line_number = $line_of_tokens->{_line_number}; 8277 8278 $rtoken_type = $line_of_tokens->{_rtoken_type}; 8279 $rtokens = $line_of_tokens->{_rtokens}; 8280 $rlevels = $line_of_tokens->{_rlevels}; 8281 $rslevels = $line_of_tokens->{_rslevels}; 8282 $rblock_type = $line_of_tokens->{_rblock_type}; 8283 $rcontainer_type = $line_of_tokens->{_rcontainer_type}; 8284 $rcontainer_environment = $line_of_tokens->{_rcontainer_environment}; 8285 $rtype_sequence = $line_of_tokens->{_rtype_sequence}; 8286 $input_line = $line_of_tokens->{_line_text}; 8287 $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; 8288 $rci_levels = $line_of_tokens->{_rci_levels}; 8289 $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; 8290 8291 $in_continued_quote = $starting_in_quote = 8292 $line_of_tokens->{_starting_in_quote}; 8293 $in_quote = $line_of_tokens->{_ending_in_quote}; 8294 $ending_in_quote = $in_quote; 8295 $python_indentation_level = 8296 $line_of_tokens->{_python_indentation_level}; 8297 8298 my $j; 8299 my $j_next; 8300 my $jmax; 8301 my $next_nonblank_token; 8302 my $next_nonblank_token_type; 8303 my $rwhite_space_flag; 8304 8305 $jmax = @$rtokens - 1; 8306 $block_type = ""; 8307 $container_type = ""; 8308 $container_environment = ""; 8309 $type_sequence = ""; 8310 $no_internal_newlines = 1 - $rOpts_add_newlines; 8311 $is_static_block_comment = 0; 8312 8313 # Handle a continued quote.. 8314 if ($in_continued_quote) { 8315 8316 # A line which is entirely a quote or pattern must go out 8317 # verbatim. Note: the \n is contained in $input_line. 8318 if ( $jmax <= 0 ) { 8319 if ( ( $input_line =~ "\t" ) ) { 8320 note_embedded_tab(); 8321 } 8322 write_unindented_line("$input_line"); 8323 $last_line_had_side_comment = 0; 8324 return; 8325 } 8326 8327 # prior to version 20010406, perltidy had a bug which placed 8328 # continuation indentation before the last line of some multiline 8329 # quotes and patterns -- exactly the lines passing this way. 8330 # To help find affected lines in scripts run with these 8331 # versions, run with '-chk', and it will warn of any quotes or 8332 # patterns which might have been modified by these early 8333 # versions. 8334 if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) { 8335 warning( 8336"-chk: please check this line for extra leading whitespace\n" 8337 ); 8338 } 8339 } 8340 8341 # Write line verbatim if we are in a formatting skip section 8342 if ($in_format_skipping_section) { 8343 write_unindented_line("$input_line"); 8344 $last_line_had_side_comment = 0; 8345 8346 # Note: extra space appended to comment simplifies pattern matching 8347 if ( $jmax == 0 8348 && $$rtoken_type[0] eq '#' 8349 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o ) 8350 { 8351 $in_format_skipping_section = 0; 8352 write_logfile_entry("Exiting formatting skip section\n"); 8353 } 8354 return; 8355 } 8356 8357 # See if we are entering a formatting skip section 8358 if ( $rOpts_format_skipping 8359 && $jmax == 0 8360 && $$rtoken_type[0] eq '#' 8361 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o ) 8362 { 8363 flush(); 8364 $in_format_skipping_section = 1; 8365 write_logfile_entry("Entering formatting skip section\n"); 8366 write_unindented_line("$input_line"); 8367 $last_line_had_side_comment = 0; 8368 return; 8369 } 8370 8371 # delete trailing blank tokens 8372 if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- } 8373 8374 # Handle a blank line.. 8375 if ( $jmax < 0 ) { 8376 8377 # For the 'swallow-optional-blank-lines' option, we delete all 8378 # old blank lines and let the blank line rules generate any 8379 # needed blanks. 8380 if ( !$rOpts_swallow_optional_blank_lines ) { 8381 flush(); 8382 $file_writer_object->write_blank_code_line(); 8383 $last_line_leading_type = 'b'; 8384 } 8385 $last_line_had_side_comment = 0; 8386 return; 8387 } 8388 8389 # see if this is a static block comment (starts with ## by default) 8390 my $is_static_block_comment_without_leading_space = 0; 8391 if ( $jmax == 0 8392 && $$rtoken_type[0] eq '#' 8393 && $rOpts->{'static-block-comments'} 8394 && $input_line =~ /$static_block_comment_pattern/o ) 8395 { 8396 $is_static_block_comment = 1; 8397 $is_static_block_comment_without_leading_space = 8398 substr( $input_line, 0, 1 ) eq '#'; 8399 } 8400 8401 # Check for comments which are line directives 8402 # Treat exactly as static block comments without leading space 8403 # reference: perlsyn, near end, section Plain Old Comments (Not!) 8404 # example: '# line 42 "new_filename.plx"' 8405 if ( 8406 $jmax == 0 8407 && $$rtoken_type[0] eq '#' 8408 && $input_line =~ /^\# \s* 8409 line \s+ (\d+) \s* 8410 (?:\s("?)([^"]+)\2)? \s* 8411 $/x 8412 ) 8413 { 8414 $is_static_block_comment = 1; 8415 $is_static_block_comment_without_leading_space = 1; 8416 } 8417 8418 # create a hanging side comment if appropriate 8419 if ( 8420 $jmax == 0 8421 && $$rtoken_type[0] eq '#' # only token is a comment 8422 && $last_line_had_side_comment # last line had side comment 8423 && $input_line =~ /^\s/ # there is some leading space 8424 && !$is_static_block_comment # do not make static comment hanging 8425 && $rOpts->{'hanging-side-comments'} # user is allowing this 8426 ) 8427 { 8428 8429 # We will insert an empty qw string at the start of the token list 8430 # to force this comment to be a side comment. The vertical aligner 8431 # should then line it up with the previous side comment. 8432 unshift @$rtoken_type, 'q'; 8433 unshift @$rtokens, ''; 8434 unshift @$rlevels, $$rlevels[0]; 8435 unshift @$rslevels, $$rslevels[0]; 8436 unshift @$rblock_type, ''; 8437 unshift @$rcontainer_type, ''; 8438 unshift @$rcontainer_environment, ''; 8439 unshift @$rtype_sequence, ''; 8440 unshift @$rnesting_tokens, $$rnesting_tokens[0]; 8441 unshift @$rci_levels, $$rci_levels[0]; 8442 unshift @$rnesting_blocks, $$rnesting_blocks[0]; 8443 $jmax = 1; 8444 } 8445 8446 # remember if this line has a side comment 8447 $last_line_had_side_comment = 8448 ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' ); 8449 8450 # Handle a block (full-line) comment.. 8451 if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) { 8452 8453 if ( $rOpts->{'delete-block-comments'} ) { return } 8454 8455 if ( $rOpts->{'tee-block-comments'} ) { 8456 $file_writer_object->tee_on(); 8457 } 8458 8459 destroy_one_line_block(); 8460 output_line_to_go(); 8461 8462 # output a blank line before block comments 8463 if ( 8464 $last_line_leading_type !~ /^[#b]$/ 8465 && $rOpts->{'blanks-before-comments'} # only if allowed 8466 && ! 8467 $is_static_block_comment # never before static block comments 8468 ) 8469 { 8470 flush(); # switching to new output stream 8471 $file_writer_object->write_blank_code_line(); 8472 $last_line_leading_type = 'b'; 8473 } 8474 8475 # TRIM COMMENTS -- This could be turned off as a option 8476 $$rtokens[0] =~ s/\s*$//; # trim right end 8477 8478 if ( 8479 $rOpts->{'indent-block-comments'} 8480 && ( !$rOpts->{'indent-spaced-block-comments'} 8481 || $input_line =~ /^\s+/ ) 8482 && !$is_static_block_comment_without_leading_space 8483 ) 8484 { 8485 extract_token(0); 8486 store_token_to_go(); 8487 output_line_to_go(); 8488 } 8489 else { 8490 flush(); # switching to new output stream 8491 $file_writer_object->write_code_line( $$rtokens[0] . "\n" ); 8492 $last_line_leading_type = '#'; 8493 } 8494 if ( $rOpts->{'tee-block-comments'} ) { 8495 $file_writer_object->tee_off(); 8496 } 8497 return; 8498 } 8499 8500 # compare input/output indentation except for continuation lines 8501 # (because they have an unknown amount of initial blank space) 8502 # and lines which are quotes (because they may have been outdented) 8503 # Note: this test is placed here because we know the continuation flag 8504 # at this point, which allows us to avoid non-meaningful checks. 8505 my $structural_indentation_level = $$rlevels[0]; 8506 compare_indentation_levels( $python_indentation_level, 8507 $structural_indentation_level ) 8508 unless ( $python_indentation_level < 0 8509 || ( $$rci_levels[0] > 0 ) 8510 || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' ) 8511 ); 8512 8513 # Patch needed for MakeMaker. Do not break a statement 8514 # in which $VERSION may be calculated. See MakeMaker.pm; 8515 # this is based on the coding in it. 8516 # The first line of a file that matches this will be eval'd: 8517 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ 8518 # Examples: 8519 # *VERSION = \'1.01'; 8520 # ( $VERSION ) = '$Revision: 1.73 $ ' =~ /\$Revision:\s+([^\s]+)/; 8521 # We will pass such a line straight through without breaking 8522 # it unless -npvl is used 8523 8524 my $is_VERSION_statement = 0; 8525 8526 if ( 8527 !$saw_VERSION_in_this_file 8528 && $input_line =~ /VERSION/ # quick check to reject most lines 8529 && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ 8530 ) 8531 { 8532 $saw_VERSION_in_this_file = 1; 8533 $is_VERSION_statement = 1; 8534 write_logfile_entry("passing VERSION line; -npvl deactivates\n"); 8535 $no_internal_newlines = 1; 8536 } 8537 8538 # take care of indentation-only 8539 # NOTE: In previous versions we sent all qw lines out immediately here. 8540 # No longer doing this: also write a line which is entirely a 'qw' list 8541 # to allow stacking of opening and closing tokens. Note that interior 8542 # qw lines will still go out at the end of this routine. 8543 if ( $rOpts->{'indent-only'} ) { 8544 flush(); 8545 trim($input_line); 8546 8547 extract_token(0); 8548 $token = $input_line; 8549 $type = 'q'; 8550 $block_type = ""; 8551 $container_type = ""; 8552 $container_environment = ""; 8553 $type_sequence = ""; 8554 store_token_to_go(); 8555 output_line_to_go(); 8556 return; 8557 } 8558 8559 push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding 8560 push( @$rtoken_type, 'b', 'b' ); 8561 ($rwhite_space_flag) = 8562 set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type ); 8563 8564 # find input tabbing to allow checks for tabbing disagreement 8565 ## not used for now 8566 ##$input_line_tabbing = ""; 8567 ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; } 8568 8569 # if the buffer hasn't been flushed, add a leading space if 8570 # necessary to keep essential whitespace. This is really only 8571 # necessary if we are squeezing out all ws. 8572 if ( $max_index_to_go >= 0 ) { 8573 8574 $old_line_count_in_batch++; 8575 8576 if ( 8577 is_essential_whitespace( 8578 $last_last_nonblank_token, 8579 $last_last_nonblank_type, 8580 $tokens_to_go[$max_index_to_go], 8581 $types_to_go[$max_index_to_go], 8582 $$rtokens[0], 8583 $$rtoken_type[0] 8584 ) 8585 ) 8586 { 8587 my $slevel = $$rslevels[0]; 8588 insert_new_token_to_go( ' ', 'b', $slevel, 8589 $no_internal_newlines ); 8590 } 8591 } 8592 8593 # If we just saw the end of an elsif block, write nag message 8594 # if we do not see another elseif or an else. 8595 if ($looking_for_else) { 8596 8597 unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) { 8598 write_logfile_entry("(No else block)\n"); 8599 } 8600 $looking_for_else = 0; 8601 } 8602 8603 # This is a good place to kill incomplete one-line blocks 8604 if ( ( $semicolons_before_block_self_destruct == 0 ) 8605 && ( $max_index_to_go >= 0 ) 8606 && ( $types_to_go[$max_index_to_go] eq ';' ) 8607 && ( $$rtokens[0] ne '}' ) ) 8608 { 8609 destroy_one_line_block(); 8610 output_line_to_go(); 8611 } 8612 8613 # loop to process the tokens one-by-one 8614 $type = 'b'; 8615 $token = ""; 8616 8617 foreach $j ( 0 .. $jmax ) { 8618 8619 # pull out the local values for this token 8620 extract_token($j); 8621 8622 if ( $type eq '#' ) { 8623 8624 # trim trailing whitespace 8625 # (there is no option at present to prevent this) 8626 $token =~ s/\s*$//; 8627 8628 if ( 8629 $rOpts->{'delete-side-comments'} 8630 8631 # delete closing side comments if necessary 8632 || ( $rOpts->{'delete-closing-side-comments'} 8633 && $token =~ /$closing_side_comment_prefix_pattern/o 8634 && $last_nonblank_block_type =~ 8635 /$closing_side_comment_list_pattern/o ) 8636 ) 8637 { 8638 if ( $types_to_go[$max_index_to_go] eq 'b' ) { 8639 unstore_token_to_go(); 8640 } 8641 last; 8642 } 8643 } 8644 8645 # If we are continuing after seeing a right curly brace, flush 8646 # buffer unless we see what we are looking for, as in 8647 # } else ... 8648 if ( $rbrace_follower && $type ne 'b' ) { 8649 8650 unless ( $rbrace_follower->{$token} ) { 8651 output_line_to_go(); 8652 } 8653 $rbrace_follower = undef; 8654 } 8655 8656 $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1; 8657 $next_nonblank_token = $$rtokens[$j_next]; 8658 $next_nonblank_token_type = $$rtoken_type[$j_next]; 8659 8660 #-------------------------------------------------------- 8661 # Start of section to patch token text 8662 #-------------------------------------------------------- 8663 8664 # Modify certain tokens here for whitespace 8665 # The following is not yet done, but could be: 8666 # sub (x x x) 8667 if ( $type =~ /^[wit]$/ ) { 8668 8669 # Examples: 8670 # change '$ var' to '$var' etc 8671 # '-> new' to '->new' 8672 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) { 8673 $token =~ s/\s*//g; 8674 } 8675 8676 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g } 8677 } 8678 8679 # change 'LABEL :' to 'LABEL:' 8680 elsif ( $type eq 'J' ) { $token =~ s/\s+//g } 8681 8682 # patch to add space to something like "x10" 8683 # This avoids having to split this token in the pre-tokenizer 8684 elsif ( $type eq 'n' ) { 8685 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / } 8686 } 8687 8688 elsif ( $type eq 'Q' ) { 8689 note_embedded_tab() if ( $token =~ "\t" ); 8690 8691 # make note of something like '$var = s/xxx/yyy/;' 8692 # in case it should have been '$var =~ s/xxx/yyy/;' 8693 if ( 8694 $token =~ /^(s|tr|y|m|\/)/ 8695 && $last_nonblank_token =~ /^(=|==|!=)$/ 8696 8697 # precededed by simple scalar 8698 && $last_last_nonblank_type eq 'i' 8699 && $last_last_nonblank_token =~ /^\$/ 8700 8701 # followed by some kind of termination 8702 # (but give complaint if we can's see far enough ahead) 8703 && $next_nonblank_token =~ /^[; \)\}]$/ 8704 8705 # scalar is not decleared 8706 && !( 8707 $types_to_go[0] eq 'k' 8708 && $tokens_to_go[0] =~ /^(my|our|local)$/ 8709 ) 8710 ) 8711 { 8712 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; 8713 complain( 8714"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n" 8715 ); 8716 } 8717 } 8718 8719 # trim blanks from right of qw quotes 8720 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this) 8721 elsif ( $type eq 'q' ) { 8722 $token =~ s/\s*$//; 8723 note_embedded_tab() if ( $token =~ "\t" ); 8724 } 8725 8726 #-------------------------------------------------------- 8727 # End of section to patch token text 8728 #-------------------------------------------------------- 8729 8730 # insert any needed whitespace 8731 if ( ( $type ne 'b' ) 8732 && ( $max_index_to_go >= 0 ) 8733 && ( $types_to_go[$max_index_to_go] ne 'b' ) 8734 && $rOpts_add_whitespace ) 8735 { 8736 my $ws = $$rwhite_space_flag[$j]; 8737 8738 if ( $ws == 1 ) { 8739 insert_new_token_to_go( ' ', 'b', $slevel, 8740 $no_internal_newlines ); 8741 } 8742 } 8743 8744 # Do not allow breaks which would promote a side comment to a 8745 # block comment. In order to allow a break before an opening 8746 # or closing BLOCK, followed by a side comment, those sections 8747 # of code will handle this flag separately. 8748 my $side_comment_follows = ( $next_nonblank_token_type eq '#' ); 8749 my $is_opening_BLOCK = 8750 ( $type eq '{' 8751 && $token eq '{' 8752 && $block_type 8753 && $block_type ne 't' ); 8754 my $is_closing_BLOCK = 8755 ( $type eq '}' 8756 && $token eq '}' 8757 && $block_type 8758 && $block_type ne 't' ); 8759 8760 if ( $side_comment_follows 8761 && !$is_opening_BLOCK 8762 && !$is_closing_BLOCK ) 8763 { 8764 $no_internal_newlines = 1; 8765 } 8766 8767 # We're only going to handle breaking for code BLOCKS at this 8768 # (top) level. Other indentation breaks will be handled by 8769 # sub scan_list, which is better suited to dealing with them. 8770 if ($is_opening_BLOCK) { 8771 8772 # Tentatively output this token. This is required before 8773 # calling starting_one_line_block. We may have to unstore 8774 # it, though, if we have to break before it. 8775 store_token_to_go($side_comment_follows); 8776 8777 # Look ahead to see if we might form a one-line block 8778 my $too_long = 8779 starting_one_line_block( $j, $jmax, $level, $slevel, 8780 $ci_level, $rtokens, $rtoken_type, $rblock_type ); 8781 clear_breakpoint_undo_stack(); 8782 8783 # to simplify the logic below, set a flag to indicate if 8784 # this opening brace is far from the keyword which introduces it 8785 my $keyword_on_same_line = 1; 8786 if ( ( $max_index_to_go >= 0 ) 8787 && ( $last_nonblank_type eq ')' ) ) 8788 { 8789 if ( $block_type =~ /^(if|else|elsif)$/ 8790 && ( $tokens_to_go[0] eq '}' ) 8791 && $rOpts_cuddled_else ) 8792 { 8793 $keyword_on_same_line = 1; 8794 } 8795 elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) 8796 { 8797 $keyword_on_same_line = 0; 8798 } 8799 } 8800 8801 # decide if user requested break before '{' 8802 my $want_break = 8803 8804 # use -bl flag if not a sub block of any type 8805 $block_type !~ /^sub/ 8806 ? $rOpts->{'opening-brace-on-new-line'} 8807 8808 # use -sbl flag unless this is an anonymous sub block 8809 : $block_type !~ /^sub\W*$/ 8810 ? $rOpts->{'opening-sub-brace-on-new-line'} 8811 8812 # do not break for anonymous subs 8813 : 0; 8814 8815 # Break before an opening '{' ... 8816 if ( 8817 8818 # if requested 8819 $want_break 8820 8821 # and we were unable to start looking for a block, 8822 && $index_start_one_line_block == UNDEFINED_INDEX 8823 8824 # or if it will not be on same line as its keyword, so that 8825 # it will be outdented (eval.t, overload.t), and the user 8826 # has not insisted on keeping it on the right 8827 || ( !$keyword_on_same_line 8828 && !$rOpts->{'opening-brace-always-on-right'} ) 8829 8830 ) 8831 { 8832 8833 # but only if allowed 8834 unless ($no_internal_newlines) { 8835 8836 # since we already stored this token, we must unstore it 8837 unstore_token_to_go(); 8838 8839 # then output the line 8840 output_line_to_go(); 8841 8842 # and now store this token at the start of a new line 8843 store_token_to_go($side_comment_follows); 8844 } 8845 } 8846 8847 # Now update for side comment 8848 if ($side_comment_follows) { $no_internal_newlines = 1 } 8849 8850 # now output this line 8851 unless ($no_internal_newlines) { 8852 output_line_to_go(); 8853 } 8854 } 8855 8856 elsif ($is_closing_BLOCK) { 8857 8858 # If there is a pending one-line block .. 8859 if ( $index_start_one_line_block != UNDEFINED_INDEX ) { 8860 8861 # we have to terminate it if.. 8862 if ( 8863 8864 # it is too long (final length may be different from 8865 # initial estimate). note: must allow 1 space for this token 8866 excess_line_length( $index_start_one_line_block, 8867 $max_index_to_go ) >= 0 8868 8869 # or if it has too many semicolons 8870 || ( $semicolons_before_block_self_destruct == 0 8871 && $last_nonblank_type ne ';' ) 8872 ) 8873 { 8874 destroy_one_line_block(); 8875 } 8876 } 8877 8878 # put a break before this closing curly brace if appropriate 8879 unless ( $no_internal_newlines 8880 || $index_start_one_line_block != UNDEFINED_INDEX ) 8881 { 8882 8883 # add missing semicolon if ... 8884 # there are some tokens 8885 if ( 8886 ( $max_index_to_go > 0 ) 8887 8888 # and we don't have one 8889 && ( $last_nonblank_type ne ';' ) 8890 8891 # patch until some block type issues are fixed: 8892 # Do not add semi-colon for block types '{', 8893 # '}', and ';' because we cannot be sure yet 8894 # that this is a block and not an anonomyous 8895 # hash (blktype.t, blktype1.t) 8896 && ( $block_type !~ /^[\{\};]$/ ) 8897 8898 # it seems best not to add semicolons in these 8899 # special block types: sort|map|grep 8900 && ( !$is_sort_map_grep{$block_type} ) 8901 8902 # and we are allowed to do so. 8903 && $rOpts->{'add-semicolons'} 8904 ) 8905 { 8906 8907 save_current_token(); 8908 $token = ';'; 8909 $type = ';'; 8910 $level = $levels_to_go[$max_index_to_go]; 8911 $slevel = $nesting_depth_to_go[$max_index_to_go]; 8912 $nesting_blocks = 8913 $nesting_blocks_to_go[$max_index_to_go]; 8914 $ci_level = $ci_levels_to_go[$max_index_to_go]; 8915 $block_type = ""; 8916 $container_type = ""; 8917 $container_environment = ""; 8918 $type_sequence = ""; 8919 8920 # Note - we remove any blank AFTER extracting its 8921 # parameters such as level, etc, above 8922 if ( $types_to_go[$max_index_to_go] eq 'b' ) { 8923 unstore_token_to_go(); 8924 } 8925 store_token_to_go(); 8926 8927 note_added_semicolon(); 8928 restore_current_token(); 8929 } 8930 8931 # then write out everything before this closing curly brace 8932 output_line_to_go(); 8933 8934 } 8935 8936 # Now update for side comment 8937 if ($side_comment_follows) { $no_internal_newlines = 1 } 8938 8939 # store the closing curly brace 8940 store_token_to_go(); 8941 8942 # ok, we just stored a closing curly brace. Often, but 8943 # not always, we want to end the line immediately. 8944 # So now we have to check for special cases. 8945 8946 # if this '}' successfully ends a one-line block.. 8947 my $is_one_line_block = 0; 8948 my $keep_going = 0; 8949 if ( $index_start_one_line_block != UNDEFINED_INDEX ) { 8950 8951 # Remember the type of token just before the 8952 # opening brace. It would be more general to use 8953 # a stack, but this will work for one-line blocks. 8954 $is_one_line_block = 8955 $types_to_go[$index_start_one_line_block]; 8956 8957 # we have to actually make it by removing tentative 8958 # breaks that were set within it 8959 undo_forced_breakpoint_stack(0); 8960 set_nobreaks( $index_start_one_line_block, 8961 $max_index_to_go - 1 ); 8962 8963 # then re-initialize for the next one-line block 8964 destroy_one_line_block(); 8965 8966 # then decide if we want to break after the '}' .. 8967 # We will keep going to allow certain brace followers as in: 8968 # do { $ifclosed = 1; last } unless $losing; 8969 # 8970 # But make a line break if the curly ends a 8971 # significant block: 8972 if ( 8973 $is_block_without_semicolon{$block_type} 8974 8975 # if needless semicolon follows we handle it later 8976 && $next_nonblank_token ne ';' 8977 ) 8978 { 8979 output_line_to_go() unless ($no_internal_newlines); 8980 } 8981 } 8982 8983 # set string indicating what we need to look for brace follower 8984 # tokens 8985 if ( $block_type eq 'do' ) { 8986 $rbrace_follower = \%is_do_follower; 8987 } 8988 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { 8989 $rbrace_follower = \%is_if_brace_follower; 8990 } 8991 elsif ( $block_type eq 'else' ) { 8992 $rbrace_follower = \%is_else_brace_follower; 8993 } 8994 8995 # added eval for borris.t 8996 elsif ($is_sort_map_grep_eval{$block_type} 8997 || $is_one_line_block eq 'G' ) 8998 { 8999 $rbrace_follower = undef; 9000 $keep_going = 1; 9001 } 9002 9003 # anonymous sub 9004 elsif ( $block_type =~ /^sub\W*$/ ) { 9005 9006 if ($is_one_line_block) { 9007 $rbrace_follower = \%is_anon_sub_1_brace_follower; 9008 } 9009 else { 9010 $rbrace_follower = \%is_anon_sub_brace_follower; 9011 } 9012 } 9013 9014 # None of the above: specify what can follow a closing 9015 # brace of a block which is not an 9016 # if/elsif/else/do/sort/map/grep/eval 9017 # Testfiles: 9018 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t 9019 else { 9020 $rbrace_follower = \%is_other_brace_follower; 9021 } 9022 9023 # See if an elsif block is followed by another elsif or else; 9024 # complain if not. 9025 if ( $block_type eq 'elsif' ) { 9026 9027 if ( $next_nonblank_token_type eq 'b' ) { # end of line? 9028 $looking_for_else = 1; # ok, check on next line 9029 } 9030 else { 9031 9032 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) { 9033 write_logfile_entry("No else block :(\n"); 9034 } 9035 } 9036 } 9037 9038 # keep going after certain block types (map,sort,grep,eval) 9039 # added eval for borris.t 9040 if ($keep_going) { 9041 9042 # keep going 9043 } 9044 9045 # if no more tokens, postpone decision until re-entring 9046 elsif ( ( $next_nonblank_token_type eq 'b' ) 9047 && $rOpts_add_newlines ) 9048 { 9049 unless ($rbrace_follower) { 9050 output_line_to_go() unless ($no_internal_newlines); 9051 } 9052 } 9053 9054 elsif ($rbrace_follower) { 9055 9056 unless ( $rbrace_follower->{$next_nonblank_token} ) { 9057 output_line_to_go() unless ($no_internal_newlines); 9058 } 9059 $rbrace_follower = undef; 9060 } 9061 9062 else { 9063 output_line_to_go() unless ($no_internal_newlines); 9064 } 9065 9066 } # end treatment of closing block token 9067 9068 # handle semicolon 9069 elsif ( $type eq ';' ) { 9070 9071 # kill one-line blocks with too many semicolons 9072 $semicolons_before_block_self_destruct--; 9073 if ( 9074 ( $semicolons_before_block_self_destruct < 0 ) 9075 || ( $semicolons_before_block_self_destruct == 0 9076 && $next_nonblank_token_type !~ /^[b\}]$/ ) 9077 ) 9078 { 9079 destroy_one_line_block(); 9080 } 9081 9082 # Remove unnecessary semicolons, but not after bare 9083 # blocks, where it could be unsafe if the brace is 9084 # mistokenized. 9085 if ( 9086 ( 9087 $last_nonblank_token eq '}' 9088 && ( 9089 $is_block_without_semicolon{ 9090 $last_nonblank_block_type} 9091 || $last_nonblank_block_type =~ /^sub\s+\w/ 9092 || $last_nonblank_block_type =~ /^\w+:$/ ) 9093 ) 9094 || $last_nonblank_type eq ';' 9095 ) 9096 { 9097 9098 if ( 9099 $rOpts->{'delete-semicolons'} 9100 9101 # don't delete ; before a # because it would promote it 9102 # to a block comment 9103 && ( $next_nonblank_token_type ne '#' ) 9104 ) 9105 { 9106 note_deleted_semicolon(); 9107 output_line_to_go() 9108 unless ( $no_internal_newlines 9109 || $index_start_one_line_block != UNDEFINED_INDEX ); 9110 next; 9111 } 9112 else { 9113 write_logfile_entry("Extra ';'\n"); 9114 } 9115 } 9116 store_token_to_go(); 9117 9118 output_line_to_go() 9119 unless ( $no_internal_newlines 9120 || ( $rOpts_keep_interior_semicolons && $j < $jmax ) 9121 || ( $next_nonblank_token eq '}' ) ); 9122 9123 } 9124 9125 # handle here_doc target string 9126 elsif ( $type eq 'h' ) { 9127 $no_internal_newlines = 9128 1; # no newlines after seeing here-target 9129 destroy_one_line_block(); 9130 store_token_to_go(); 9131 } 9132 9133 # handle all other token types 9134 else { 9135 9136 # if this is a blank... 9137 if ( $type eq 'b' ) { 9138 9139 # make it just one character 9140 $token = ' ' if $rOpts_add_whitespace; 9141 9142 # delete it if unwanted by whitespace rules 9143 # or we are deleting all whitespace 9144 my $ws = $$rwhite_space_flag[ $j + 1 ]; 9145 if ( ( defined($ws) && $ws == -1 ) 9146 || $rOpts_delete_old_whitespace ) 9147 { 9148 9149 # unless it might make a syntax error 9150 next 9151 unless is_essential_whitespace( 9152 $last_last_nonblank_token, 9153 $last_last_nonblank_type, 9154 $tokens_to_go[$max_index_to_go], 9155 $types_to_go[$max_index_to_go], 9156 $$rtokens[ $j + 1 ], 9157 $$rtoken_type[ $j + 1 ] 9158 ); 9159 } 9160 } 9161 store_token_to_go(); 9162 } 9163 9164 # remember two previous nonblank OUTPUT tokens 9165 if ( $type ne '#' && $type ne 'b' ) { 9166 $last_last_nonblank_token = $last_nonblank_token; 9167 $last_last_nonblank_type = $last_nonblank_type; 9168 $last_nonblank_token = $token; 9169 $last_nonblank_type = $type; 9170 $last_nonblank_block_type = $block_type; 9171 } 9172 9173 # unset the continued-quote flag since it only applies to the 9174 # first token, and we want to resume normal formatting if 9175 # there are additional tokens on the line 9176 $in_continued_quote = 0; 9177 9178 } # end of loop over all tokens in this 'line_of_tokens' 9179 9180 # we have to flush .. 9181 if ( 9182 9183 # if there is a side comment 9184 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} ) 9185 9186 # if this line ends in a quote 9187 # NOTE: This is critically important for insuring that quoted lines 9188 # do not get processed by things like -sot and -sct 9189 || $in_quote 9190 9191 # if this is a VERSION statement 9192 || $is_VERSION_statement 9193 9194 # to keep a label on one line if that is how it is now 9195 || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) ) 9196 9197 # if we are instructed to keep all old line breaks 9198 || !$rOpts->{'delete-old-newlines'} 9199 ) 9200 { 9201 destroy_one_line_block(); 9202 output_line_to_go(); 9203 } 9204 9205 # mark old line breakpoints in current output stream 9206 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) { 9207 $old_breakpoint_to_go[$max_index_to_go] = 1; 9208 } 9209 } # end sub print_line_of_tokens 9210} # end print_line_of_tokens 9211 9212# sub output_line_to_go sends one logical line of tokens on down the 9213# pipeline to the VerticalAligner package, breaking the line into continuation 9214# lines as necessary. The line of tokens is ready to go in the "to_go" 9215# arrays. 9216sub output_line_to_go { 9217 9218 # debug stuff; this routine can be called from many points 9219 FORMATTER_DEBUG_FLAG_OUTPUT && do { 9220 my ( $a, $b, $c ) = caller; 9221 write_diagnostics( 9222"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n" 9223 ); 9224 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ]; 9225 write_diagnostics("$output_str\n"); 9226 }; 9227 9228 # just set a tentative breakpoint if we might be in a one-line block 9229 if ( $index_start_one_line_block != UNDEFINED_INDEX ) { 9230 set_forced_breakpoint($max_index_to_go); 9231 return; 9232 } 9233 9234 my $cscw_block_comment; 9235 $cscw_block_comment = add_closing_side_comment() 9236 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); 9237 9238 match_opening_and_closing_tokens(); 9239 9240 # tell the -lp option we are outputting a batch so it can close 9241 # any unfinished items in its stack 9242 finish_lp_batch(); 9243 9244 # If this line ends in a code block brace, set breaks at any 9245 # previous closing code block braces to breakup a chain of code 9246 # blocks on one line. This is very rare but can happen for 9247 # user-defined subs. For example we might be looking at this: 9248 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { 9249 my $saw_good_break = 0; # flag to force breaks even if short line 9250 if ( 9251 9252 # looking for opening or closing block brace 9253 $block_type_to_go[$max_index_to_go] 9254 9255 # but not one of these which are never duplicated on a line: 9256 # until|while|for|if|elsif|else 9257 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] } 9258 ) 9259 { 9260 my $lev = $nesting_depth_to_go[$max_index_to_go]; 9261 9262 # Walk backwards from the end and 9263 # set break at any closing block braces at the same level. 9264 # But quit if we are not in a chain of blocks. 9265 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) { 9266 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level 9267 next if ( $levels_to_go[$i] > $lev ); # skip past higher level 9268 9269 if ( $block_type_to_go[$i] ) { 9270 if ( $tokens_to_go[$i] eq '}' ) { 9271 set_forced_breakpoint($i); 9272 $saw_good_break = 1; 9273 } 9274 } 9275 9276 # quit if we see anything besides words, function, blanks 9277 # at this level 9278 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last } 9279 } 9280 } 9281 9282 my $imin = 0; 9283 my $imax = $max_index_to_go; 9284 9285 # trim any blank tokens 9286 if ( $max_index_to_go >= 0 ) { 9287 if ( $types_to_go[$imin] eq 'b' ) { $imin++ } 9288 if ( $types_to_go[$imax] eq 'b' ) { $imax-- } 9289 } 9290 9291 # anything left to write? 9292 if ( $imin <= $imax ) { 9293 9294 # add a blank line before certain key types 9295 if ( $last_line_leading_type !~ /^[#b]/ ) { 9296 my $want_blank = 0; 9297 my $leading_token = $tokens_to_go[$imin]; 9298 my $leading_type = $types_to_go[$imin]; 9299 9300 # blank lines before subs except declarations and one-liners 9301 # MCONVERSION LOCATION - for sub tokenization change 9302 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { 9303 $want_blank = ( $rOpts->{'blanks-before-subs'} ) 9304 && ( 9305 terminal_type( \@types_to_go, \@block_type_to_go, $imin, 9306 $imax ) !~ /^[\;\}]$/ 9307 ); 9308 } 9309 9310 # break before all package declarations 9311 # MCONVERSION LOCATION - for tokenizaton change 9312 elsif ($leading_token =~ /^(package\s)/ 9313 && $leading_type eq 'i' ) 9314 { 9315 $want_blank = ( $rOpts->{'blanks-before-subs'} ); 9316 } 9317 9318 # break before certain key blocks except one-liners 9319 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { 9320 $want_blank = ( $rOpts->{'blanks-before-subs'} ) 9321 && ( 9322 terminal_type( \@types_to_go, \@block_type_to_go, $imin, 9323 $imax ) ne '}' 9324 ); 9325 } 9326 9327 # Break before certain block types if we haven't had a 9328 # break at this level for a while. This is the 9329 # difficult decision.. 9330 elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/ 9331 && $leading_type eq 'k' ) 9332 { 9333 my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; 9334 if ( !defined($lc) ) { $lc = 0 } 9335 9336 $want_blank = 9337 $rOpts->{'blanks-before-blocks'} 9338 && $lc >= $rOpts->{'long-block-line-count'} 9339 && $file_writer_object->get_consecutive_nonblank_lines() >= 9340 $rOpts->{'long-block-line-count'} 9341 && ( 9342 terminal_type( \@types_to_go, \@block_type_to_go, $imin, 9343 $imax ) ne '}' 9344 ); 9345 } 9346 9347 if ($want_blank) { 9348 9349 # future: send blank line down normal path to VerticalAligner 9350 Perl::Tidy::VerticalAligner::flush(); 9351 $file_writer_object->write_blank_code_line(); 9352 } 9353 } 9354 9355 # update blank line variables and count number of consecutive 9356 # non-blank, non-comment lines at this level 9357 $last_last_line_leading_level = $last_line_leading_level; 9358 $last_line_leading_level = $levels_to_go[$imin]; 9359 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 } 9360 $last_line_leading_type = $types_to_go[$imin]; 9361 if ( $last_line_leading_level == $last_last_line_leading_level 9362 && $last_line_leading_type ne 'b' 9363 && $last_line_leading_type ne '#' 9364 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) ) 9365 { 9366 $nonblank_lines_at_depth[$last_line_leading_level]++; 9367 } 9368 else { 9369 $nonblank_lines_at_depth[$last_line_leading_level] = 1; 9370 } 9371 9372 FORMATTER_DEBUG_FLAG_FLUSH && do { 9373 my ( $package, $file, $line ) = caller; 9374 print 9375"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n"; 9376 }; 9377 9378 # add a couple of extra terminal blank tokens 9379 pad_array_to_go(); 9380 9381 # set all forced breakpoints for good list formatting 9382 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0; 9383 9384 if ( 9385 $max_index_to_go > 0 9386 && ( 9387 $is_long_line 9388 || $old_line_count_in_batch > 1 9389 || is_unbalanced_batch() 9390 || ( 9391 $comma_count_in_batch 9392 && ( $rOpts_maximum_fields_per_table > 0 9393 || $rOpts_comma_arrow_breakpoints == 0 ) 9394 ) 9395 ) 9396 ) 9397 { 9398 $saw_good_break ||= scan_list(); 9399 } 9400 9401 # let $ri_first and $ri_last be references to lists of 9402 # first and last tokens of line fragments to output.. 9403 my ( $ri_first, $ri_last ); 9404 9405 # write a single line if.. 9406 if ( 9407 9408 # we aren't allowed to add any newlines 9409 !$rOpts_add_newlines 9410 9411 # or, we don't already have an interior breakpoint 9412 # and we didn't see a good breakpoint 9413 || ( 9414 !$forced_breakpoint_count 9415 && !$saw_good_break 9416 9417 # and this line is 'short' 9418 && !$is_long_line 9419 ) 9420 ) 9421 { 9422 @$ri_first = ($imin); 9423 @$ri_last = ($imax); 9424 } 9425 9426 # otherwise use multiple lines 9427 else { 9428 9429 ( $ri_first, $ri_last, my $colon_count ) = 9430 set_continuation_breaks($saw_good_break); 9431 9432 break_all_chain_tokens( $ri_first, $ri_last ); 9433 9434 break_equals( $ri_first, $ri_last ); 9435 9436 # now we do a correction step to clean this up a bit 9437 # (The only time we would not do this is for debugging) 9438 if ( $rOpts->{'recombine'} ) { 9439 ( $ri_first, $ri_last ) = 9440 recombine_breakpoints( $ri_first, $ri_last ); 9441 } 9442 9443 insert_final_breaks( $ri_first, $ri_last ) if $colon_count; 9444 } 9445 9446 # do corrector step if -lp option is used 9447 my $do_not_pad = 0; 9448 if ($rOpts_line_up_parentheses) { 9449 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last ); 9450 } 9451 send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad ); 9452 } 9453 prepare_for_new_input_lines(); 9454 9455 # output any new -cscw block comment 9456 if ($cscw_block_comment) { 9457 flush(); 9458 $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); 9459 } 9460} 9461 9462sub note_added_semicolon { 9463 $last_added_semicolon_at = $input_line_number; 9464 if ( $added_semicolon_count == 0 ) { 9465 $first_added_semicolon_at = $last_added_semicolon_at; 9466 } 9467 $added_semicolon_count++; 9468 write_logfile_entry("Added ';' here\n"); 9469} 9470 9471sub note_deleted_semicolon { 9472 $last_deleted_semicolon_at = $input_line_number; 9473 if ( $deleted_semicolon_count == 0 ) { 9474 $first_deleted_semicolon_at = $last_deleted_semicolon_at; 9475 } 9476 $deleted_semicolon_count++; 9477 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;) 9478} 9479 9480sub note_embedded_tab { 9481 $embedded_tab_count++; 9482 $last_embedded_tab_at = $input_line_number; 9483 if ( !$first_embedded_tab_at ) { 9484 $first_embedded_tab_at = $last_embedded_tab_at; 9485 } 9486 9487 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) { 9488 write_logfile_entry("Embedded tabs in quote or pattern\n"); 9489 } 9490} 9491 9492sub starting_one_line_block { 9493 9494 # after seeing an opening curly brace, look for the closing brace 9495 # and see if the entire block will fit on a line. This routine is 9496 # not always right because it uses the old whitespace, so a check 9497 # is made later (at the closing brace) to make sure we really 9498 # have a one-line block. We have to do this preliminary check, 9499 # though, because otherwise we would always break at a semicolon 9500 # within a one-line block if the block contains multiple statements. 9501 9502 my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type, 9503 $rblock_type ) 9504 = @_; 9505 9506 # kill any current block - we can only go 1 deep 9507 destroy_one_line_block(); 9508 9509 # return value: 9510 # 1=distance from start of block to opening brace exceeds line length 9511 # 0=otherwise 9512 9513 my $i_start = 0; 9514 9515 # shouldn't happen: there must have been a prior call to 9516 # store_token_to_go to put the opening brace in the output stream 9517 if ( $max_index_to_go < 0 ) { 9518 warning("program bug: store_token_to_go called incorrectly\n"); 9519 report_definite_bug(); 9520 } 9521 else { 9522 9523 # cannot use one-line blocks with cuddled else else/elsif lines 9524 if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) { 9525 return 0; 9526 } 9527 } 9528 9529 my $block_type = $$rblock_type[$j]; 9530 9531 # find the starting keyword for this block (such as 'if', 'else', ...) 9532 9533 if ( $block_type =~ /^[\{\}\;\:]$/ ) { 9534 $i_start = $max_index_to_go; 9535 } 9536 9537 elsif ( $last_last_nonblank_token_to_go eq ')' ) { 9538 9539 # For something like "if (xxx) {", the keyword "if" will be 9540 # just after the most recent break. This will be 0 unless 9541 # we have just killed a one-line block and are starting another. 9542 # (doif.t) 9543 $i_start = $index_max_forced_break + 1; 9544 if ( $types_to_go[$i_start] eq 'b' ) { 9545 $i_start++; 9546 } 9547 9548 unless ( $tokens_to_go[$i_start] eq $block_type ) { 9549 return 0; 9550 } 9551 } 9552 9553 # the previous nonblank token should start these block types 9554 elsif ( 9555 ( $last_last_nonblank_token_to_go eq $block_type ) 9556 || ( $block_type =~ /^sub/ 9557 && $last_last_nonblank_token_to_go =~ /^sub/ ) 9558 ) 9559 { 9560 $i_start = $last_last_nonblank_index_to_go; 9561 } 9562 9563 # patch for SWITCH/CASE to retain one-line case/when blocks 9564 elsif ( $block_type eq 'case' || $block_type eq 'when' ) { 9565 $i_start = $index_max_forced_break + 1; 9566 if ( $types_to_go[$i_start] eq 'b' ) { 9567 $i_start++; 9568 } 9569 unless ( $tokens_to_go[$i_start] eq $block_type ) { 9570 return 0; 9571 } 9572 } 9573 9574 else { 9575 return 1; 9576 } 9577 9578 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; 9579 9580 my $i; 9581 9582 # see if length is too long to even start 9583 if ( $pos > $rOpts_maximum_line_length ) { 9584 return 1; 9585 } 9586 9587 for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) { 9588 9589 # old whitespace could be arbitrarily large, so don't use it 9590 if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 } 9591 else { $pos += length( $$rtokens[$i] ) } 9592 9593 # Return false result if we exceed the maximum line length, 9594 if ( $pos > $rOpts_maximum_line_length ) { 9595 return 0; 9596 } 9597 9598 # or encounter another opening brace before finding the closing brace. 9599 elsif ($$rtokens[$i] eq '{' 9600 && $$rtoken_type[$i] eq '{' 9601 && $$rblock_type[$i] ) 9602 { 9603 return 0; 9604 } 9605 9606 # if we find our closing brace.. 9607 elsif ($$rtokens[$i] eq '}' 9608 && $$rtoken_type[$i] eq '}' 9609 && $$rblock_type[$i] ) 9610 { 9611 9612 # be sure any trailing comment also fits on the line 9613 my $i_nonblank = 9614 ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1; 9615 9616 if ( $$rtoken_type[$i_nonblank] eq '#' ) { 9617 $pos += length( $$rtokens[$i_nonblank] ); 9618 9619 if ( $i_nonblank > $i + 1 ) { 9620 $pos += length( $$rtokens[ $i + 1 ] ); 9621 } 9622 9623 if ( $pos > $rOpts_maximum_line_length ) { 9624 return 0; 9625 } 9626 } 9627 9628 # ok, it's a one-line block 9629 create_one_line_block( $i_start, 20 ); 9630 return 0; 9631 } 9632 9633 # just keep going for other characters 9634 else { 9635 } 9636 } 9637 9638 # Allow certain types of new one-line blocks to form by joining 9639 # input lines. These can be safely done, but for other block types, 9640 # we keep old one-line blocks but do not form new ones. It is not 9641 # always a good idea to make as many one-line blocks as possible, 9642 # so other types are not done. The user can always use -mangle. 9643 if ( $is_sort_map_grep_eval{$block_type} ) { 9644 create_one_line_block( $i_start, 1 ); 9645 } 9646 9647 return 0; 9648} 9649 9650sub unstore_token_to_go { 9651 9652 # remove most recent token from output stream 9653 if ( $max_index_to_go > 0 ) { 9654 $max_index_to_go--; 9655 } 9656 else { 9657 $max_index_to_go = UNDEFINED_INDEX; 9658 } 9659 9660} 9661 9662sub want_blank_line { 9663 flush(); 9664 $file_writer_object->want_blank_line(); 9665} 9666 9667sub write_unindented_line { 9668 flush(); 9669 $file_writer_object->write_line( $_[0] ); 9670} 9671 9672sub undo_lp_ci { 9673 9674 # If there is a single, long parameter within parens, like this: 9675 # 9676 # $self->command( "/msg " 9677 # . $infoline->chan 9678 # . " You said $1, but did you know that it's square was " 9679 # . $1 * $1 . " ?" ); 9680 # 9681 # we can remove the continuation indentation of the 2nd and higher lines 9682 # to achieve this effect, which is more pleasing: 9683 # 9684 # $self->command("/msg " 9685 # . $infoline->chan 9686 # . " You said $1, but did you know that it's square was " 9687 # . $1 * $1 . " ?"); 9688 9689 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_; 9690 my $max_line = @$ri_first - 1; 9691 9692 # must be multiple lines 9693 return unless $max_line > $line_open; 9694 9695 my $lev_start = $levels_to_go[$i_start]; 9696 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; 9697 9698 # see if all additional lines in this container have continuation 9699 # indentation 9700 my $n; 9701 my $line_1 = 1 + $line_open; 9702 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) { 9703 my $ibeg = $$ri_first[$n]; 9704 my $iend = $$ri_last[$n]; 9705 if ( $ibeg eq $closing_index ) { $n--; last } 9706 return if ( $lev_start != $levels_to_go[$ibeg] ); 9707 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); 9708 last if ( $closing_index <= $iend ); 9709 } 9710 9711 # we can reduce the indentation of all continuation lines 9712 my $continuation_line_count = $n - $line_open; 9713 @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] = 9714 (0) x ($continuation_line_count); 9715 @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] = 9716 @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ]; 9717} 9718 9719sub set_logical_padding { 9720 9721 # Look at a batch of lines and see if extra padding can improve the 9722 # alignment when there are certain leading operators. Here is an 9723 # example, in which some extra space is introduced before 9724 # '( $year' to make it line up with the subsequent lines: 9725 # 9726 # if ( ( $Year < 1601 ) 9727 # || ( $Year > 2899 ) 9728 # || ( $EndYear < 1601 ) 9729 # || ( $EndYear > 2899 ) ) 9730 # { 9731 # &Error_OutOfRange; 9732 # } 9733 # 9734 my ( $ri_first, $ri_last ) = @_; 9735 my $max_line = @$ri_first - 1; 9736 9737 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces, 9738 $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); 9739 9740 # looking at each line of this batch.. 9741 foreach $line ( 0 .. $max_line - 1 ) { 9742 9743 # see if the next line begins with a logical operator 9744 $ibeg = $$ri_first[$line]; 9745 $iend = $$ri_last[$line]; 9746 $ibeg_next = $$ri_first[ $line + 1 ]; 9747 $tok_next = $tokens_to_go[$ibeg_next]; 9748 $type_next = $types_to_go[$ibeg_next]; 9749 9750 $has_leading_op_next = ( $tok_next =~ /^\w/ ) 9751 ? $is_chain_operator{$tok_next} # + - * / : ? && || 9752 : $is_chain_operator{$type_next}; # and, or 9753 9754 next unless ($has_leading_op_next); 9755 9756 # next line must not be at lesser depth 9757 next 9758 if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] ); 9759 9760 # identify the token in this line to be padded on the left 9761 $ipad = undef; 9762 9763 # handle lines at same depth... 9764 if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) { 9765 9766 # if this is not first line of the batch ... 9767 if ( $line > 0 ) { 9768 9769 # and we have leading operator.. 9770 next if $has_leading_op; 9771 9772 # Introduce padding if.. 9773 # 1. the previous line is at lesser depth, or 9774 # 2. the previous line ends in an assignment 9775 # 3. the previous line ends in a 'return' 9776 # 4. the previous line ends in a comma 9777 # Example 1: previous line at lesser depth 9778 # if ( ( $Year < 1601 ) # <- we are here but 9779 # || ( $Year > 2899 ) # list has not yet 9780 # || ( $EndYear < 1601 ) # collapsed vertically 9781 # || ( $EndYear > 2899 ) ) 9782 # { 9783 # 9784 # Example 2: previous line ending in assignment: 9785 # $leapyear = 9786 # $year % 4 ? 0 # <- We are here 9787 # : $year % 100 ? 1 9788 # : $year % 400 ? 0 9789 # : 1; 9790 # 9791 # Example 3: previous line ending in comma: 9792 # push @expr, 9793 # /test/ ? undef 9794 # : eval($_) ? 1 9795 # : eval($_) ? 1 9796 # : 0; 9797 9798 # be sure levels agree (do not indent after an indented 'if') 9799 next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); 9800 9801 # allow padding on first line after a comma but only if: 9802 # (1) this is line 2 and 9803 # (2) there are at more than three lines and 9804 # (3) lines 3 and 4 have the same leading operator 9805 # These rules try to prevent padding within a long 9806 # comma-separated list. 9807 my $ok_comma; 9808 if ( $types_to_go[$iendm] eq ',' 9809 && $line == 1 9810 && $max_line > 2 ) 9811 { 9812 my $ibeg_next_next = $$ri_first[ $line + 2 ]; 9813 my $tok_next_next = $tokens_to_go[$ibeg_next_next]; 9814 $ok_comma = $tok_next_next eq $tok_next; 9815 } 9816 9817 next 9818 unless ( 9819 $is_assignment{ $types_to_go[$iendm] } 9820 || $ok_comma 9821 || ( $nesting_depth_to_go[$ibegm] < 9822 $nesting_depth_to_go[$ibeg] ) 9823 || ( $types_to_go[$iendm] eq 'k' 9824 && $tokens_to_go[$iendm] eq 'return' ) 9825 ); 9826 9827 # we will add padding before the first token 9828 $ipad = $ibeg; 9829 } 9830 9831 # for first line of the batch.. 9832 else { 9833 9834 # WARNING: Never indent if first line is starting in a 9835 # continued quote, which would change the quote. 9836 next if $starting_in_quote; 9837 9838 # if this is text after closing '}' 9839 # then look for an interior token to pad 9840 if ( $types_to_go[$ibeg] eq '}' ) { 9841 9842 } 9843 9844 # otherwise, we might pad if it looks really good 9845 else { 9846 9847 # we might pad token $ibeg, so be sure that it 9848 # is at the same depth as the next line. 9849 next 9850 if ( $nesting_depth_to_go[$ibeg] != 9851 $nesting_depth_to_go[$ibeg_next] ); 9852 9853 # We can pad on line 1 of a statement if at least 3 9854 # lines will be aligned. Otherwise, it 9855 # can look very confusing. 9856 9857 # We have to be careful not to pad if there are too few 9858 # lines. The current rule is: 9859 # (1) in general we require at least 3 consecutive lines 9860 # with the same leading chain operator token, 9861 # (2) but an exception is that we only require two lines 9862 # with leading colons if there are no more lines. For example, 9863 # the first $i in the following snippet would get padding 9864 # by the second rule: 9865 # 9866 # $i == 1 ? ( "First", "Color" ) 9867 # : $i == 2 ? ( "Then", "Rarity" ) 9868 # : ( "Then", "Name" ); 9869 9870 if ( $max_line > 1 ) { 9871 my $leading_token = $tokens_to_go[$ibeg_next]; 9872 my $tokens_differ; 9873 9874 # never indent line 1 of a '.' series because 9875 # previous line is most likely at same level. 9876 # TODO: we should also look at the leasing_spaces 9877 # of the last output line and skip if it is same 9878 # as this line. 9879 next if ( $leading_token eq '.' ); 9880 9881 my $count = 1; 9882 foreach my $l ( 2 .. 3 ) { 9883 last if ( $line + $l > $max_line ); 9884 my $ibeg_next_next = $$ri_first[ $line + $l ]; 9885 if ( $tokens_to_go[$ibeg_next_next] ne 9886 $leading_token ) 9887 { 9888 $tokens_differ = 1; 9889 last; 9890 } 9891 $count++; 9892 } 9893 next if ($tokens_differ); 9894 next if ( $count < 3 && $leading_token ne ':' ); 9895 $ipad = $ibeg; 9896 } 9897 else { 9898 next; 9899 } 9900 } 9901 } 9902 } 9903 9904 # find interior token to pad if necessary 9905 if ( !defined($ipad) ) { 9906 9907 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) { 9908 9909 # find any unclosed container 9910 next 9911 unless ( $type_sequence_to_go[$i] 9912 && $mate_index_to_go[$i] > $iend ); 9913 9914 # find next nonblank token to pad 9915 $ipad = $i + 1; 9916 if ( $types_to_go[$ipad] eq 'b' ) { 9917 $ipad++; 9918 last if ( $ipad > $iend ); 9919 } 9920 } 9921 last unless $ipad; 9922 } 9923 9924 # next line must not be at greater depth 9925 my $iend_next = $$ri_last[ $line + 1 ]; 9926 next 9927 if ( $nesting_depth_to_go[ $iend_next + 1 ] > 9928 $nesting_depth_to_go[$ipad] ); 9929 9930 # lines must be somewhat similar to be padded.. 9931 my $inext_next = $ibeg_next + 1; 9932 if ( $types_to_go[$inext_next] eq 'b' ) { 9933 $inext_next++; 9934 } 9935 my $type = $types_to_go[$ipad]; 9936 my $type_next = $types_to_go[ $ipad + 1 ]; 9937 9938 # see if there are multiple continuation lines 9939 my $logical_continuation_lines = 1; 9940 if ( $line + 2 <= $max_line ) { 9941 my $leading_token = $tokens_to_go[$ibeg_next]; 9942 my $ibeg_next_next = $$ri_first[ $line + 2 ]; 9943 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token 9944 && $nesting_depth_to_go[$ibeg_next] eq 9945 $nesting_depth_to_go[$ibeg_next_next] ) 9946 { 9947 $logical_continuation_lines++; 9948 } 9949 } 9950 9951 # see if leading types match 9952 my $types_match = $types_to_go[$inext_next] eq $type; 9953 my $matches_without_bang; 9954 9955 # if first line has leading ! then compare the following token 9956 if ( !$types_match && $type eq '!' ) { 9957 $types_match = $matches_without_bang = 9958 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; 9959 } 9960 9961 if ( 9962 9963 # either we have multiple continuation lines to follow 9964 # and we are not padding the first token 9965 ( $logical_continuation_lines > 1 && $ipad > 0 ) 9966 9967 # or.. 9968 || ( 9969 9970 # types must match 9971 $types_match 9972 9973 # and keywords must match if keyword 9974 && !( 9975 $type eq 'k' 9976 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] 9977 ) 9978 ) 9979 ) 9980 { 9981 9982 #----------------------begin special checks-------------- 9983 # 9984 # SPECIAL CHECK 1: 9985 # A check is needed before we can make the pad. 9986 # If we are in a list with some long items, we want each 9987 # item to stand out. So in the following example, the 9988 # first line begining with '$casefold->' would look good 9989 # padded to align with the next line, but then it 9990 # would be indented more than the last line, so we 9991 # won't do it. 9992 # 9993 # ok( 9994 # $casefold->{code} eq '0041' 9995 # && $casefold->{status} eq 'C' 9996 # && $casefold->{mapping} eq '0061', 9997 # 'casefold 0x41' 9998 # ); 9999 # 10000 # Note: 10001 # It would be faster, and almost as good, to use a comma 10002 # count, and not pad if comma_count > 1 and the previous 10003 # line did not end with a comma. 10004 # 10005 my $ok_to_pad = 1; 10006 10007 my $ibg = $$ri_first[ $line + 1 ]; 10008 my $depth = $nesting_depth_to_go[ $ibg + 1 ]; 10009 10010 # just use simplified formula for leading spaces to avoid 10011 # needless sub calls 10012 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; 10013 10014 # look at each line beyond the next .. 10015 my $l = $line + 1; 10016 foreach $l ( $line + 2 .. $max_line ) { 10017 my $ibg = $$ri_first[$l]; 10018 10019 # quit looking at the end of this container 10020 last 10021 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth ) 10022 || ( $nesting_depth_to_go[$ibg] < $depth ); 10023 10024 # cannot do the pad if a later line would be 10025 # outdented more 10026 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) { 10027 $ok_to_pad = 0; 10028 last; 10029 } 10030 } 10031 10032 # don't pad if we end in a broken list 10033 if ( $l == $max_line ) { 10034 my $i2 = $$ri_last[$l]; 10035 if ( $types_to_go[$i2] eq '#' ) { 10036 my $i1 = $$ri_first[$l]; 10037 next 10038 if ( 10039 terminal_type( \@types_to_go, \@block_type_to_go, $i1, 10040 $i2 ) eq ',' 10041 ); 10042 } 10043 } 10044 10045 # SPECIAL CHECK 2: 10046 # a minus may introduce a quoted variable, and we will 10047 # add the pad only if this line begins with a bare word, 10048 # such as for the word 'Button' here: 10049 # [ 10050 # Button => "Print letter \"~$_\"", 10051 # -command => [ sub { print "$_[0]\n" }, $_ ], 10052 # -accelerator => "Meta+$_" 10053 # ]; 10054 # 10055 # On the other hand, if 'Button' is quoted, it looks best 10056 # not to pad: 10057 # [ 10058 # 'Button' => "Print letter \"~$_\"", 10059 # -command => [ sub { print "$_[0]\n" }, $_ ], 10060 # -accelerator => "Meta+$_" 10061 # ]; 10062 if ( $types_to_go[$ibeg_next] eq 'm' ) { 10063 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q'; 10064 } 10065 10066 next unless $ok_to_pad; 10067 10068 #----------------------end special check--------------- 10069 10070 my $length_1 = total_line_length( $ibeg, $ipad - 1 ); 10071 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); 10072 $pad_spaces = $length_2 - $length_1; 10073 10074 # If the first line has a leading ! and the second does 10075 # not, then remove one space to try to align the next 10076 # leading characters, which are often the same. For example: 10077 # if ( !$ts 10078 # || $ts == $self->Holder 10079 # || $self->Holder->Type eq "Arena" ) 10080 # 10081 # This usually helps readability, but if there are subsequent 10082 # ! operators things will still get messed up. For example: 10083 # 10084 # if ( !exists $Net::DNS::typesbyname{$qtype} 10085 # && exists $Net::DNS::classesbyname{$qtype} 10086 # && !exists $Net::DNS::classesbyname{$qclass} 10087 # && exists $Net::DNS::typesbyname{$qclass} ) 10088 # We can't fix that. 10089 if ($matches_without_bang) { $pad_spaces-- } 10090 10091 # make sure this won't change if -lp is used 10092 my $indentation_1 = $leading_spaces_to_go[$ibeg]; 10093 if ( ref($indentation_1) ) { 10094 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) { 10095 my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; 10096 unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) { 10097 $pad_spaces = 0; 10098 } 10099 } 10100 } 10101 10102 # we might be able to handle a pad of -1 by removing a blank 10103 # token 10104 if ( $pad_spaces < 0 ) { 10105 10106 if ( $pad_spaces == -1 ) { 10107 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) { 10108 $tokens_to_go[ $ipad - 1 ] = ''; 10109 } 10110 } 10111 $pad_spaces = 0; 10112 } 10113 10114 # now apply any padding for alignment 10115 if ( $ipad >= 0 && $pad_spaces ) { 10116 10117 my $length_t = total_line_length( $ibeg, $iend ); 10118 if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) { 10119 $tokens_to_go[$ipad] = 10120 ' ' x $pad_spaces . $tokens_to_go[$ipad]; 10121 } 10122 } 10123 } 10124 } 10125 continue { 10126 $iendm = $iend; 10127 $ibegm = $ibeg; 10128 $has_leading_op = $has_leading_op_next; 10129 } # end of loop over lines 10130 return; 10131} 10132 10133sub correct_lp_indentation { 10134 10135 # When the -lp option is used, we need to make a last pass through 10136 # each line to correct the indentation positions in case they differ 10137 # from the predictions. This is necessary because perltidy uses a 10138 # predictor/corrector method for aligning with opening parens. The 10139 # predictor is usually good, but sometimes stumbles. The corrector 10140 # tries to patch things up once the actual opening paren locations 10141 # are known. 10142 my ( $ri_first, $ri_last ) = @_; 10143 my $do_not_pad = 0; 10144 10145 # Note on flag '$do_not_pad': 10146 # We want to avoid a situation like this, where the aligner inserts 10147 # whitespace before the '=' to align it with a previous '=', because 10148 # otherwise the parens might become mis-aligned in a situation like 10149 # this, where the '=' has become aligned with the previous line, 10150 # pushing the opening '(' forward beyond where we want it. 10151 # 10152 # $mkFloor::currentRoom = ''; 10153 # $mkFloor::c_entry = $c->Entry( 10154 # -width => '10', 10155 # -relief => 'sunken', 10156 # ... 10157 # ); 10158 # 10159 # We leave it to the aligner to decide how to do this. 10160 10161 # first remove continuation indentation if appropriate 10162 my $max_line = @$ri_first - 1; 10163 10164 # looking at each line of this batch.. 10165 my ( $ibeg, $iend ); 10166 my $line; 10167 foreach $line ( 0 .. $max_line ) { 10168 $ibeg = $$ri_first[$line]; 10169 $iend = $$ri_last[$line]; 10170 10171 # looking at each token in this output line.. 10172 my $i; 10173 foreach $i ( $ibeg .. $iend ) { 10174 10175 # How many space characters to place before this token 10176 # for special alignment. Actual padding is done in the 10177 # continue block. 10178 10179 # looking for next unvisited indentation item 10180 my $indentation = $leading_spaces_to_go[$i]; 10181 if ( !$indentation->get_MARKED() ) { 10182 $indentation->set_MARKED(1); 10183 10184 # looking for indentation item for which we are aligning 10185 # with parens, braces, and brackets 10186 next unless ( $indentation->get_ALIGN_PAREN() ); 10187 10188 # skip closed container on this line 10189 if ( $i > $ibeg ) { 10190 my $im = $i - 1; 10191 if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- } 10192 if ( $type_sequence_to_go[$im] 10193 && $mate_index_to_go[$im] <= $iend ) 10194 { 10195 next; 10196 } 10197 } 10198 10199 if ( $line == 1 && $i == $ibeg ) { 10200 $do_not_pad = 1; 10201 } 10202 10203 # Ok, let's see what the error is and try to fix it 10204 my $actual_pos; 10205 my $predicted_pos = $indentation->get_SPACES(); 10206 if ( $i > $ibeg ) { 10207 10208 # token is mid-line - use length to previous token 10209 $actual_pos = total_line_length( $ibeg, $i - 1 ); 10210 10211 # for mid-line token, we must check to see if all 10212 # additional lines have continuation indentation, 10213 # and remove it if so. Otherwise, we do not get 10214 # good alignment. 10215 my $closing_index = $indentation->get_CLOSED(); 10216 if ( $closing_index > $iend ) { 10217 my $ibeg_next = $$ri_first[ $line + 1 ]; 10218 if ( $ci_levels_to_go[$ibeg_next] > 0 ) { 10219 undo_lp_ci( $line, $i, $closing_index, $ri_first, 10220 $ri_last ); 10221 } 10222 } 10223 } 10224 elsif ( $line > 0 ) { 10225 10226 # handle case where token starts a new line; 10227 # use length of previous line 10228 my $ibegm = $$ri_first[ $line - 1 ]; 10229 my $iendm = $$ri_last[ $line - 1 ]; 10230 $actual_pos = total_line_length( $ibegm, $iendm ); 10231 10232 # follow -pt style 10233 ++$actual_pos 10234 if ( $types_to_go[ $iendm + 1 ] eq 'b' ); 10235 } 10236 else { 10237 10238 # token is first character of first line of batch 10239 $actual_pos = $predicted_pos; 10240 } 10241 10242 my $move_right = $actual_pos - $predicted_pos; 10243 10244 # done if no error to correct (gnu2.t) 10245 if ( $move_right == 0 ) { 10246 $indentation->set_RECOVERABLE_SPACES($move_right); 10247 next; 10248 } 10249 10250 # if we have not seen closure for this indentation in 10251 # this batch, we can only pass on a request to the 10252 # vertical aligner 10253 my $closing_index = $indentation->get_CLOSED(); 10254 10255 if ( $closing_index < 0 ) { 10256 $indentation->set_RECOVERABLE_SPACES($move_right); 10257 next; 10258 } 10259 10260 # If necessary, look ahead to see if there is really any 10261 # leading whitespace dependent on this whitespace, and 10262 # also find the longest line using this whitespace. 10263 # Since it is always safe to move left if there are no 10264 # dependents, we only need to do this if we may have 10265 # dependent nodes or need to move right. 10266 10267 my $right_margin = 0; 10268 my $have_child = $indentation->get_HAVE_CHILD(); 10269 10270 my %saw_indentation; 10271 my $line_count = 1; 10272 $saw_indentation{$indentation} = $indentation; 10273 10274 if ( $have_child || $move_right > 0 ) { 10275 $have_child = 0; 10276 my $max_length = 0; 10277 if ( $i == $ibeg ) { 10278 $max_length = total_line_length( $ibeg, $iend ); 10279 } 10280 10281 # look ahead at the rest of the lines of this batch.. 10282 my $line_t; 10283 foreach $line_t ( $line + 1 .. $max_line ) { 10284 my $ibeg_t = $$ri_first[$line_t]; 10285 my $iend_t = $$ri_last[$line_t]; 10286 last if ( $closing_index <= $ibeg_t ); 10287 10288 # remember all different indentation objects 10289 my $indentation_t = $leading_spaces_to_go[$ibeg_t]; 10290 $saw_indentation{$indentation_t} = $indentation_t; 10291 $line_count++; 10292 10293 # remember longest line in the group 10294 my $length_t = total_line_length( $ibeg_t, $iend_t ); 10295 if ( $length_t > $max_length ) { 10296 $max_length = $length_t; 10297 } 10298 } 10299 $right_margin = $rOpts_maximum_line_length - $max_length; 10300 if ( $right_margin < 0 ) { $right_margin = 0 } 10301 } 10302 10303 my $first_line_comma_count = 10304 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; 10305 my $comma_count = $indentation->get_COMMA_COUNT(); 10306 my $arrow_count = $indentation->get_ARROW_COUNT(); 10307 10308 # This is a simple approximate test for vertical alignment: 10309 # if we broke just after an opening paren, brace, bracket, 10310 # and there are 2 or more commas in the first line, 10311 # and there are no '=>'s, 10312 # then we are probably vertically aligned. We could set 10313 # an exact flag in sub scan_list, but this is good 10314 # enough. 10315 my $indentation_count = keys %saw_indentation; 10316 my $is_vertically_aligned = 10317 ( $i == $ibeg 10318 && $first_line_comma_count > 1 10319 && $indentation_count == 1 10320 && ( $arrow_count == 0 || $arrow_count == $line_count ) ); 10321 10322 # Make the move if possible .. 10323 if ( 10324 10325 # we can always move left 10326 $move_right < 0 10327 10328 # but we should only move right if we are sure it will 10329 # not spoil vertical alignment 10330 || ( $comma_count == 0 ) 10331 || ( $comma_count > 0 && !$is_vertically_aligned ) 10332 ) 10333 { 10334 my $move = 10335 ( $move_right <= $right_margin ) 10336 ? $move_right 10337 : $right_margin; 10338 10339 foreach ( keys %saw_indentation ) { 10340 $saw_indentation{$_} 10341 ->permanently_decrease_AVAILABLE_SPACES( -$move ); 10342 } 10343 } 10344 10345 # Otherwise, record what we want and the vertical aligner 10346 # will try to recover it. 10347 else { 10348 $indentation->set_RECOVERABLE_SPACES($move_right); 10349 } 10350 } 10351 } 10352 } 10353 return $do_not_pad; 10354} 10355 10356# flush is called to output any tokens in the pipeline, so that 10357# an alternate source of lines can be written in the correct order 10358 10359sub flush { 10360 destroy_one_line_block(); 10361 output_line_to_go(); 10362 Perl::Tidy::VerticalAligner::flush(); 10363} 10364 10365sub reset_block_text_accumulator { 10366 10367 # save text after 'if' and 'elsif' to append after 'else' 10368 if ($accumulating_text_for_block) { 10369 10370 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) { 10371 push @{$rleading_block_if_elsif_text}, $leading_block_text; 10372 } 10373 } 10374 $accumulating_text_for_block = ""; 10375 $leading_block_text = ""; 10376 $leading_block_text_level = 0; 10377 $leading_block_text_length_exceeded = 0; 10378 $leading_block_text_line_number = 0; 10379 $leading_block_text_line_length = 0; 10380} 10381 10382sub set_block_text_accumulator { 10383 my $i = shift; 10384 $accumulating_text_for_block = $tokens_to_go[$i]; 10385 if ( $accumulating_text_for_block !~ /^els/ ) { 10386 $rleading_block_if_elsif_text = []; 10387 } 10388 $leading_block_text = ""; 10389 $leading_block_text_level = $levels_to_go[$i]; 10390 $leading_block_text_line_number = 10391 $vertical_aligner_object->get_output_line_number(); 10392 $leading_block_text_length_exceeded = 0; 10393 10394 # this will contain the column number of the last character 10395 # of the closing side comment 10396 $leading_block_text_line_length = 10397 length($accumulating_text_for_block) + 10398 length( $rOpts->{'closing-side-comment-prefix'} ) + 10399 $leading_block_text_level * $rOpts_indent_columns + 3; 10400} 10401 10402sub accumulate_block_text { 10403 my $i = shift; 10404 10405 # accumulate leading text for -csc, ignoring any side comments 10406 if ( $accumulating_text_for_block 10407 && !$leading_block_text_length_exceeded 10408 && $types_to_go[$i] ne '#' ) 10409 { 10410 10411 my $added_length = length( $tokens_to_go[$i] ); 10412 $added_length += 1 if $i == 0; 10413 my $new_line_length = $leading_block_text_line_length + $added_length; 10414 10415 # we can add this text if we don't exceed some limits.. 10416 if ( 10417 10418 # we must not have already exceeded the text length limit 10419 length($leading_block_text) < 10420 $rOpts_closing_side_comment_maximum_text 10421 10422 # and either: 10423 # the new total line length must be below the line length limit 10424 # or the new length must be below the text length limit 10425 # (ie, we may allow one token to exceed the text length limit) 10426 && ( $new_line_length < $rOpts_maximum_line_length 10427 || length($leading_block_text) + $added_length < 10428 $rOpts_closing_side_comment_maximum_text ) 10429 10430 # UNLESS: we are adding a closing paren before the brace we seek. 10431 # This is an attempt to avoid situations where the ... to be 10432 # added are longer than the omitted right paren, as in: 10433 10434 # foreach my $item (@a_rather_long_variable_name_here) { 10435 # &whatever; 10436 # } ## end foreach my $item (@a_rather_long_variable_name_here... 10437 10438 || ( 10439 $tokens_to_go[$i] eq ')' 10440 && ( 10441 ( 10442 $i + 1 <= $max_index_to_go 10443 && $block_type_to_go[ $i + 1 ] eq 10444 $accumulating_text_for_block 10445 ) 10446 || ( $i + 2 <= $max_index_to_go 10447 && $block_type_to_go[ $i + 2 ] eq 10448 $accumulating_text_for_block ) 10449 ) 10450 ) 10451 ) 10452 { 10453 10454 # add an extra space at each newline 10455 if ( $i == 0 ) { $leading_block_text .= ' ' } 10456 10457 # add the token text 10458 $leading_block_text .= $tokens_to_go[$i]; 10459 $leading_block_text_line_length = $new_line_length; 10460 } 10461 10462 # show that text was truncated if necessary 10463 elsif ( $types_to_go[$i] ne 'b' ) { 10464 $leading_block_text_length_exceeded = 1; 10465 $leading_block_text .= '...'; 10466 } 10467 } 10468} 10469 10470{ 10471 my %is_if_elsif_else_unless_while_until_for_foreach; 10472 10473 BEGIN { 10474 10475 # These block types may have text between the keyword and opening 10476 # curly. Note: 'else' does not, but must be included to allow trailing 10477 # if/elsif text to be appended. 10478 # patch for SWITCH/CASE: added 'case' and 'when' 10479 @_ = qw(if elsif else unless while until for foreach case when); 10480 @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_); 10481 } 10482 10483 sub accumulate_csc_text { 10484 10485 # called once per output buffer when -csc is used. Accumulates 10486 # the text placed after certain closing block braces. 10487 # Defines and returns the following for this buffer: 10488 10489 my $block_leading_text = ""; # the leading text of the last '}' 10490 my $rblock_leading_if_elsif_text; 10491 my $i_block_leading_text = 10492 -1; # index of token owning block_leading_text 10493 my $block_line_count = 100; # how many lines the block spans 10494 my $terminal_type = 'b'; # type of last nonblank token 10495 my $i_terminal = 0; # index of last nonblank token 10496 my $terminal_block_type = ""; 10497 10498 for my $i ( 0 .. $max_index_to_go ) { 10499 my $type = $types_to_go[$i]; 10500 my $block_type = $block_type_to_go[$i]; 10501 my $token = $tokens_to_go[$i]; 10502 10503 # remember last nonblank token type 10504 if ( $type ne '#' && $type ne 'b' ) { 10505 $terminal_type = $type; 10506 $terminal_block_type = $block_type; 10507 $i_terminal = $i; 10508 } 10509 10510 my $type_sequence = $type_sequence_to_go[$i]; 10511 if ( $block_type && $type_sequence ) { 10512 10513 if ( $token eq '}' ) { 10514 10515 # restore any leading text saved when we entered this block 10516 if ( defined( $block_leading_text{$type_sequence} ) ) { 10517 ( $block_leading_text, $rblock_leading_if_elsif_text ) = 10518 @{ $block_leading_text{$type_sequence} }; 10519 $i_block_leading_text = $i; 10520 delete $block_leading_text{$type_sequence}; 10521 $rleading_block_if_elsif_text = 10522 $rblock_leading_if_elsif_text; 10523 } 10524 10525 # if we run into a '}' then we probably started accumulating 10526 # at something like a trailing 'if' clause..no harm done. 10527 if ( $accumulating_text_for_block 10528 && $levels_to_go[$i] <= $leading_block_text_level ) 10529 { 10530 my $lev = $levels_to_go[$i]; 10531 reset_block_text_accumulator(); 10532 } 10533 10534 if ( defined( $block_opening_line_number{$type_sequence} ) ) 10535 { 10536 my $output_line_number = 10537 $vertical_aligner_object->get_output_line_number(); 10538 $block_line_count = 10539 $output_line_number - 10540 $block_opening_line_number{$type_sequence} + 1; 10541 delete $block_opening_line_number{$type_sequence}; 10542 } 10543 else { 10544 10545 # Error: block opening line undefined for this line.. 10546 # This shouldn't be possible, but it is not a 10547 # significant problem. 10548 } 10549 } 10550 10551 elsif ( $token eq '{' ) { 10552 10553 my $line_number = 10554 $vertical_aligner_object->get_output_line_number(); 10555 $block_opening_line_number{$type_sequence} = $line_number; 10556 10557 if ( $accumulating_text_for_block 10558 && $levels_to_go[$i] == $leading_block_text_level ) 10559 { 10560 10561 if ( $accumulating_text_for_block eq $block_type ) { 10562 10563 # save any leading text before we enter this block 10564 $block_leading_text{$type_sequence} = [ 10565 $leading_block_text, 10566 $rleading_block_if_elsif_text 10567 ]; 10568 $block_opening_line_number{$type_sequence} = 10569 $leading_block_text_line_number; 10570 reset_block_text_accumulator(); 10571 } 10572 else { 10573 10574 # shouldn't happen, but not a serious error. 10575 # We were accumulating -csc text for block type 10576 # $accumulating_text_for_block and unexpectedly 10577 # encountered a '{' for block type $block_type. 10578 } 10579 } 10580 } 10581 } 10582 10583 if ( $type eq 'k' 10584 && $csc_new_statement_ok 10585 && $is_if_elsif_else_unless_while_until_for_foreach{$token} 10586 && $token =~ /$closing_side_comment_list_pattern/o ) 10587 { 10588 set_block_text_accumulator($i); 10589 } 10590 else { 10591 10592 # note: ignoring type 'q' because of tricks being played 10593 # with 'q' for hanging side comments 10594 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) { 10595 $csc_new_statement_ok = 10596 ( $block_type || $type eq 'J' || $type eq ';' ); 10597 } 10598 if ( $type eq ';' 10599 && $accumulating_text_for_block 10600 && $levels_to_go[$i] == $leading_block_text_level ) 10601 { 10602 reset_block_text_accumulator(); 10603 } 10604 else { 10605 accumulate_block_text($i); 10606 } 10607 } 10608 } 10609 10610 # Treat an 'else' block specially by adding preceding 'if' and 10611 # 'elsif' text. Otherwise, the 'end else' is not helpful, 10612 # especially for cuddled-else formatting. 10613 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) { 10614 $block_leading_text = 10615 make_else_csc_text( $i_terminal, $terminal_block_type, 10616 $block_leading_text, $rblock_leading_if_elsif_text ); 10617 } 10618 10619 return ( $terminal_type, $i_terminal, $i_block_leading_text, 10620 $block_leading_text, $block_line_count ); 10621 } 10622} 10623 10624sub make_else_csc_text { 10625 10626 # create additional -csc text for an 'else' and optionally 'elsif', 10627 # depending on the value of switch 10628 # $rOpts_closing_side_comment_else_flag: 10629 # 10630 # = 0 add 'if' text to trailing else 10631 # = 1 same as 0 plus: 10632 # add 'if' to 'elsif's if can fit in line length 10633 # add last 'elsif' to trailing else if can fit in one line 10634 # = 2 same as 1 but do not check if exceed line length 10635 # 10636 # $rif_elsif_text = a reference to a list of all previous closing 10637 # side comments created for this if block 10638 # 10639 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_; 10640 my $csc_text = $block_leading_text; 10641 10642 if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 ) 10643 { 10644 return $csc_text; 10645 } 10646 10647 my $count = @{$rif_elsif_text}; 10648 return $csc_text unless ($count); 10649 10650 my $if_text = '[ if' . $rif_elsif_text->[0]; 10651 10652 # always show the leading 'if' text on 'else' 10653 if ( $block_type eq 'else' ) { 10654 $csc_text .= $if_text; 10655 } 10656 10657 # see if that's all 10658 if ( $rOpts_closing_side_comment_else_flag == 0 ) { 10659 return $csc_text; 10660 } 10661 10662 my $last_elsif_text = ""; 10663 if ( $count > 1 ) { 10664 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ]; 10665 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; } 10666 } 10667 10668 # tentatively append one more item 10669 my $saved_text = $csc_text; 10670 if ( $block_type eq 'else' ) { 10671 $csc_text .= $last_elsif_text; 10672 } 10673 else { 10674 $csc_text .= ' ' . $if_text; 10675 } 10676 10677 # all done if no length checks requested 10678 if ( $rOpts_closing_side_comment_else_flag == 2 ) { 10679 return $csc_text; 10680 } 10681 10682 # undo it if line length exceeded 10683 my $length = 10684 length($csc_text) + 10685 length($block_type) + 10686 length( $rOpts->{'closing-side-comment-prefix'} ) + 10687 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3; 10688 if ( $length > $rOpts_maximum_line_length ) { 10689 $csc_text = $saved_text; 10690 } 10691 return $csc_text; 10692} 10693 10694sub add_closing_side_comment { 10695 10696 # add closing side comments after closing block braces if -csc used 10697 my $cscw_block_comment; 10698 10699 #--------------------------------------------------------------- 10700 # Step 1: loop through all tokens of this line to accumulate 10701 # the text needed to create the closing side comments. Also see 10702 # how the line ends. 10703 #--------------------------------------------------------------- 10704 10705 my ( $terminal_type, $i_terminal, $i_block_leading_text, 10706 $block_leading_text, $block_line_count ) 10707 = accumulate_csc_text(); 10708 10709 #--------------------------------------------------------------- 10710 # Step 2: make the closing side comment if this ends a block 10711 #--------------------------------------------------------------- 10712 my $have_side_comment = $i_terminal != $max_index_to_go; 10713 10714 # if this line might end in a block closure.. 10715 if ( 10716 $terminal_type eq '}' 10717 10718 # ..and either 10719 && ( 10720 10721 # the block is long enough 10722 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} ) 10723 10724 # or there is an existing comment to check 10725 || ( $have_side_comment 10726 && $rOpts->{'closing-side-comment-warnings'} ) 10727 ) 10728 10729 # .. and if this is one of the types of interest 10730 && $block_type_to_go[$i_terminal] =~ 10731 /$closing_side_comment_list_pattern/o 10732 10733 # .. but not an anonymous sub 10734 # These are not normally of interest, and their closing braces are 10735 # often followed by commas or semicolons anyway. This also avoids 10736 # possible erratic output due to line numbering inconsistencies 10737 # in the cases where their closing braces terminate a line. 10738 && $block_type_to_go[$i_terminal] ne 'sub' 10739 10740 # ..and the corresponding opening brace must is not in this batch 10741 # (because we do not need to tag one-line blocks, although this 10742 # should also be caught with a positive -csci value) 10743 && $mate_index_to_go[$i_terminal] < 0 10744 10745 # ..and either 10746 && ( 10747 10748 # this is the last token (line doesnt have a side comment) 10749 !$have_side_comment 10750 10751 # or the old side comment is a closing side comment 10752 || $tokens_to_go[$max_index_to_go] =~ 10753 /$closing_side_comment_prefix_pattern/o 10754 ) 10755 ) 10756 { 10757 10758 # then make the closing side comment text 10759 my $token = 10760"$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]"; 10761 10762 # append any extra descriptive text collected above 10763 if ( $i_block_leading_text == $i_terminal ) { 10764 $token .= $block_leading_text; 10765 } 10766 $token =~ s/\s*$//; # trim any trailing whitespace 10767 10768 # handle case of existing closing side comment 10769 if ($have_side_comment) { 10770 10771 # warn if requested and tokens differ significantly 10772 if ( $rOpts->{'closing-side-comment-warnings'} ) { 10773 my $old_csc = $tokens_to_go[$max_index_to_go]; 10774 my $new_csc = $token; 10775 $new_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...' 10776 my $new_trailing_dots = $1; 10777 $old_csc =~ s/\.\.\.\s*$//; 10778 $new_csc =~ s/\s+//g; # trim all whitespace 10779 $old_csc =~ s/\s+//g; 10780 10781 # Patch to handle multiple closing side comments at 10782 # else and elsif's. These have become too complicated 10783 # to check, so if we see an indication of 10784 # '[ if' or '[ # elsif', then assume they were made 10785 # by perltidy. 10786 if ( $block_type_to_go[$i_terminal] eq 'else' ) { 10787 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc } 10788 } 10789 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) { 10790 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc } 10791 } 10792 10793 # if old comment is contained in new comment, 10794 # only compare the common part. 10795 if ( length($new_csc) > length($old_csc) ) { 10796 $new_csc = substr( $new_csc, 0, length($old_csc) ); 10797 } 10798 10799 # if the new comment is shorter and has been limited, 10800 # only compare the common part. 10801 if ( length($new_csc) < length($old_csc) && $new_trailing_dots ) 10802 { 10803 $old_csc = substr( $old_csc, 0, length($new_csc) ); 10804 } 10805 10806 # any remaining difference? 10807 if ( $new_csc ne $old_csc ) { 10808 10809 # just leave the old comment if we are below the threshold 10810 # for creating side comments 10811 if ( $block_line_count < 10812 $rOpts->{'closing-side-comment-interval'} ) 10813 { 10814 $token = undef; 10815 } 10816 10817 # otherwise we'll make a note of it 10818 else { 10819 10820 warning( 10821"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n" 10822 ); 10823 10824 # save the old side comment in a new trailing block comment 10825 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; 10826 $year += 1900; 10827 $month += 1; 10828 $cscw_block_comment = 10829"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; 10830 } 10831 } 10832 else { 10833 10834 # No differences.. we can safely delete old comment if we 10835 # are below the threshold 10836 if ( $block_line_count < 10837 $rOpts->{'closing-side-comment-interval'} ) 10838 { 10839 $token = undef; 10840 unstore_token_to_go() 10841 if ( $types_to_go[$max_index_to_go] eq '#' ); 10842 unstore_token_to_go() 10843 if ( $types_to_go[$max_index_to_go] eq 'b' ); 10844 } 10845 } 10846 } 10847 10848 # switch to the new csc (unless we deleted it!) 10849 $tokens_to_go[$max_index_to_go] = $token if $token; 10850 } 10851 10852 # handle case of NO existing closing side comment 10853 else { 10854 10855 # insert the new side comment into the output token stream 10856 my $type = '#'; 10857 my $block_type = ''; 10858 my $type_sequence = ''; 10859 my $container_environment = 10860 $container_environment_to_go[$max_index_to_go]; 10861 my $level = $levels_to_go[$max_index_to_go]; 10862 my $slevel = $nesting_depth_to_go[$max_index_to_go]; 10863 my $no_internal_newlines = 0; 10864 10865 my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; 10866 my $ci_level = $ci_levels_to_go[$max_index_to_go]; 10867 my $in_continued_quote = 0; 10868 10869 # first insert a blank token 10870 insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines ); 10871 10872 # then the side comment 10873 insert_new_token_to_go( $token, $type, $slevel, 10874 $no_internal_newlines ); 10875 } 10876 } 10877 return $cscw_block_comment; 10878} 10879 10880sub previous_nonblank_token { 10881 my ($i) = @_; 10882 my $name = ""; 10883 my $im = $i - 1; 10884 return "" if ( $im < 0 ); 10885 if ( $types_to_go[$im] eq 'b' ) { $im--; } 10886 return "" if ( $im < 0 ); 10887 $name = $tokens_to_go[$im]; 10888 10889 # prepend any sub name to an isolated -> to avoid unwanted alignments 10890 # [test case is test8/penco.pl] 10891 if ( $name eq '->' ) { 10892 $im--; 10893 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) { 10894 $name = $tokens_to_go[$im] . $name; 10895 } 10896 } 10897 return $name; 10898} 10899 10900sub send_lines_to_vertical_aligner { 10901 10902 my ( $ri_first, $ri_last, $do_not_pad ) = @_; 10903 10904 my $rindentation_list = [0]; # ref to indentations for each line 10905 10906 # define the array @matching_token_to_go for the output tokens 10907 # which will be non-blank for each special token (such as =>) 10908 # for which alignment is required. 10909 set_vertical_alignment_markers( $ri_first, $ri_last ); 10910 10911 # flush if necessary to avoid unwanted alignment 10912 my $must_flush = 0; 10913 if ( @$ri_first > 1 ) { 10914 10915 # flush before a long if statement 10916 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) { 10917 $must_flush = 1; 10918 } 10919 } 10920 if ($must_flush) { 10921 Perl::Tidy::VerticalAligner::flush(); 10922 } 10923 10924 set_logical_padding( $ri_first, $ri_last ); 10925 10926 # loop to prepare each line for shipment 10927 my $n_last_line = @$ri_first - 1; 10928 my $in_comma_list; 10929 for my $n ( 0 .. $n_last_line ) { 10930 my $ibeg = $$ri_first[$n]; 10931 my $iend = $$ri_last[$n]; 10932 10933 my ( $rtokens, $rfields, $rpatterns ) = 10934 make_alignment_patterns( $ibeg, $iend ); 10935 10936 my ( $indentation, $lev, $level_end, $terminal_type, 10937 $is_semicolon_terminated, $is_outdented_line ) 10938 = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns, 10939 $ri_first, $ri_last, $rindentation_list ); 10940 10941 # we will allow outdenting of long lines.. 10942 my $outdent_long_lines = ( 10943 10944 # which are long quotes, if allowed 10945 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) 10946 10947 # which are long block comments, if allowed 10948 || ( 10949 $types_to_go[$ibeg] eq '#' 10950 && $rOpts->{'outdent-long-comments'} 10951 10952 # but not if this is a static block comment 10953 && !$is_static_block_comment 10954 ) 10955 ); 10956 10957 my $level_jump = 10958 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; 10959 10960 my $rvertical_tightness_flags = 10961 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, 10962 $ri_first, $ri_last ); 10963 10964 # flush an outdented line to avoid any unwanted vertical alignment 10965 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); 10966 10967 my $is_terminal_ternary = 0; 10968 if ( $tokens_to_go[$ibeg] eq ':' 10969 || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' ) 10970 { 10971 if ( ( $terminal_type eq ';' && $level_end <= $lev ) 10972 || ( $level_end < $lev ) ) 10973 { 10974 $is_terminal_ternary = 1; 10975 } 10976 } 10977 10978 # send this new line down the pipe 10979 my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; 10980 Perl::Tidy::VerticalAligner::append_line( 10981 $lev, 10982 $level_end, 10983 $indentation, 10984 $rfields, 10985 $rtokens, 10986 $rpatterns, 10987 $forced_breakpoint_to_go[$iend] || $in_comma_list, 10988 $outdent_long_lines, 10989 $is_terminal_ternary, 10990 $is_semicolon_terminated, 10991 $do_not_pad, 10992 $rvertical_tightness_flags, 10993 $level_jump, 10994 ); 10995 $in_comma_list = 10996 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; 10997 10998 # flush an outdented line to avoid any unwanted vertical alignment 10999 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); 11000 11001 $do_not_pad = 0; 11002 11003 } # end of loop to output each line 11004 11005 # remember indentation of lines containing opening containers for 11006 # later use by sub set_adjusted_indentation 11007 save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); 11008} 11009 11010{ # begin make_alignment_patterns 11011 11012 my %block_type_map; 11013 my %keyword_map; 11014 11015 BEGIN { 11016 11017 # map related block names into a common name to 11018 # allow alignment 11019 %block_type_map = ( 11020 'unless' => 'if', 11021 'else' => 'if', 11022 'elsif' => 'if', 11023 'when' => 'if', 11024 'default' => 'if', 11025 'case' => 'if', 11026 'sort' => 'map', 11027 'grep' => 'map', 11028 ); 11029 11030 # map certain keywords to the same 'if' class to align 11031 # long if/elsif sequences. [elsif.pl] 11032 %keyword_map = ( 11033 'unless' => 'if', 11034 'else' => 'if', 11035 'elsif' => 'if', 11036 'when' => 'given', 11037 'default' => 'given', 11038 'case' => 'switch', 11039 11040 # treat an 'undef' similar to numbers and quotes 11041 'undef' => 'Q', 11042 ); 11043 } 11044 11045 sub make_alignment_patterns { 11046 11047 # Here we do some important preliminary work for the 11048 # vertical aligner. We create three arrays for one 11049 # output line. These arrays contain strings that can 11050 # be tested by the vertical aligner to see if 11051 # consecutive lines can be aligned vertically. 11052 # 11053 # The three arrays are indexed on the vertical 11054 # alignment fields and are: 11055 # @tokens - a list of any vertical alignment tokens for this line. 11056 # These are tokens, such as '=' '&&' '#' etc which 11057 # we want to might align vertically. These are 11058 # decorated with various information such as 11059 # nesting depth to prevent unwanted vertical 11060 # alignment matches. 11061 # @fields - the actual text of the line between the vertical alignment 11062 # tokens. 11063 # @patterns - a modified list of token types, one for each alignment 11064 # field. These should normally each match before alignment is 11065 # allowed, even when the alignment tokens match. 11066 my ( $ibeg, $iend ) = @_; 11067 my @tokens = (); 11068 my @fields = (); 11069 my @patterns = (); 11070 my $i_start = $ibeg; 11071 my $i; 11072 11073 my $depth = 0; 11074 my @container_name = (""); 11075 my @multiple_comma_arrows = (undef); 11076 11077 my $j = 0; # field index 11078 11079 $patterns[0] = ""; 11080 for $i ( $ibeg .. $iend ) { 11081 11082 # Keep track of containers balanced on this line only. 11083 # These are used below to prevent unwanted cross-line alignments. 11084 # Unbalanced containers already avoid aligning across 11085 # container boundaries. 11086 if ( $tokens_to_go[$i] eq '(' ) { 11087 11088 # if container is balanced on this line... 11089 my $i_mate = $mate_index_to_go[$i]; 11090 if ( $i_mate > $i && $i_mate <= $iend ) { 11091 $depth++; 11092 my $seqno = $type_sequence_to_go[$i]; 11093 my $count = comma_arrow_count($seqno); 11094 $multiple_comma_arrows[$depth] = $count && $count > 1; 11095 11096 # Append the previous token name to make the container name 11097 # more unique. This name will also be given to any commas 11098 # within this container, and it helps avoid undesirable 11099 # alignments of different types of containers. 11100 my $name = previous_nonblank_token($i); 11101 $name =~ s/^->//; 11102 $container_name[$depth] = "+" . $name; 11103 11104 # Make the container name even more unique if necessary. 11105 # If we are not vertically aligning this opening paren, 11106 # append a character count to avoid bad alignment because 11107 # it usually looks bad to align commas within continers 11108 # for which the opening parens do not align. Here 11109 # is an example very BAD alignment of commas (because 11110 # the atan2 functions are not all aligned): 11111 # $XY = 11112 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) + 11113 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) - 11114 # $X * atan2( $X, 1 ) - 11115 # $Y * atan2( $Y, 1 ); 11116 # 11117 # On the other hand, it is usually okay to align commas if 11118 # opening parens align, such as: 11119 # glVertex3d( $cx + $s * $xs, $cy, $z ); 11120 # glVertex3d( $cx, $cy + $s * $ys, $z ); 11121 # glVertex3d( $cx - $s * $xs, $cy, $z ); 11122 # glVertex3d( $cx, $cy - $s * $ys, $z ); 11123 # 11124 # To distinguish between these situations, we will 11125 # append the length of the line from the previous matching 11126 # token, or beginning of line, to the function name. This 11127 # will allow the vertical aligner to reject undesirable 11128 # matches. 11129 11130 # if we are not aligning on this paren... 11131 if ( $matching_token_to_go[$i] eq '' ) { 11132 11133 # Sum length from previous alignment, or start of line. 11134 # Note that we have to sum token lengths here because 11135 # padding has been done and so array $lengths_to_go 11136 # is now wrong. 11137 my $len = 11138 length( 11139 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); 11140 $len += leading_spaces_to_go($i_start) 11141 if ( $i_start == $ibeg ); 11142 11143 # tack length onto the container name to make unique 11144 $container_name[$depth] .= "-" . $len; 11145 } 11146 } 11147 } 11148 elsif ( $tokens_to_go[$i] eq ')' ) { 11149 $depth-- if $depth > 0; 11150 } 11151 11152 # if we find a new synchronization token, we are done with 11153 # a field 11154 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) { 11155 11156 my $tok = my $raw_tok = $matching_token_to_go[$i]; 11157 11158 # make separators in different nesting depths unique 11159 # by appending the nesting depth digit. 11160 if ( $raw_tok ne '#' ) { 11161 $tok .= "$nesting_depth_to_go[$i]"; 11162 } 11163 11164 # also decorate commas with any container name to avoid 11165 # unwanted cross-line alignments. 11166 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) { 11167 if ( $container_name[$depth] ) { 11168 $tok .= $container_name[$depth]; 11169 } 11170 } 11171 11172 # Patch to avoid aligning leading and trailing if, unless. 11173 # Mark trailing if, unless statements with container names. 11174 # This makes them different from leading if, unless which 11175 # are not so marked at present. If we ever need to name 11176 # them too, we could use ci to distinguish them. 11177 # Example problem to avoid: 11178 # return ( 2, "DBERROR" ) 11179 # if ( $retval == 2 ); 11180 # if ( scalar @_ ) { 11181 # my ( $a, $b, $c, $d, $e, $f ) = @_; 11182 # } 11183 if ( $raw_tok eq '(' ) { 11184 my $ci = $ci_levels_to_go[$ibeg]; 11185 if ( $container_name[$depth] =~ /^\+(if|unless)/ 11186 && $ci ) 11187 { 11188 $tok .= $container_name[$depth]; 11189 } 11190 } 11191 11192 # Decorate block braces with block types to avoid 11193 # unwanted alignments such as the following: 11194 # foreach ( @{$routput_array} ) { $fh->print($_) } 11195 # eval { $fh->close() }; 11196 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) { 11197 my $block_type = $block_type_to_go[$i]; 11198 11199 # map certain related block types to allow 11200 # else blocks to align 11201 $block_type = $block_type_map{$block_type} 11202 if ( defined( $block_type_map{$block_type} ) ); 11203 11204 # remove sub names to allow one-line sub braces to align 11205 # regardless of name 11206 if ( $block_type =~ /^sub / ) { $block_type = 'sub' } 11207 11208 # allow all control-type blocks to align 11209 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' } 11210 11211 $tok .= $block_type; 11212 } 11213 11214 # concatenate the text of the consecutive tokens to form 11215 # the field 11216 push( @fields, 11217 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); 11218 11219 # store the alignment token for this field 11220 push( @tokens, $tok ); 11221 11222 # get ready for the next batch 11223 $i_start = $i; 11224 $j++; 11225 $patterns[$j] = ""; 11226 } 11227 11228 # continue accumulating tokens 11229 # handle non-keywords.. 11230 if ( $types_to_go[$i] ne 'k' ) { 11231 my $type = $types_to_go[$i]; 11232 11233 # Mark most things before arrows as a quote to 11234 # get them to line up. Testfile: mixed.pl. 11235 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) { 11236 my $next_type = $types_to_go[ $i + 1 ]; 11237 my $i_next_nonblank = 11238 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); 11239 11240 if ( $types_to_go[$i_next_nonblank] eq '=>' ) { 11241 $type = 'Q'; 11242 11243 # Patch to ignore leading minus before words, 11244 # by changing pattern 'mQ' into just 'Q', 11245 # so that we can align things like this: 11246 # Button => "Print letter \"~$_\"", 11247 # -command => [ sub { print "$_[0]\n" }, $_ ], 11248 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" } 11249 } 11250 } 11251 11252 # patch to make numbers and quotes align 11253 if ( $type eq 'n' ) { $type = 'Q' } 11254 11255 # patch to ignore any ! in patterns 11256 if ( $type eq '!' ) { $type = '' } 11257 11258 $patterns[$j] .= $type; 11259 } 11260 11261 # for keywords we have to use the actual text 11262 else { 11263 11264 my $tok = $tokens_to_go[$i]; 11265 11266 # but map certain keywords to a common string to allow 11267 # alignment. 11268 $tok = $keyword_map{$tok} 11269 if ( defined( $keyword_map{$tok} ) ); 11270 $patterns[$j] .= $tok; 11271 } 11272 } 11273 11274 # done with this line .. join text of tokens to make the last field 11275 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) ); 11276 return ( \@tokens, \@fields, \@patterns ); 11277 } 11278 11279} # end make_alignment_patterns 11280 11281{ # begin unmatched_indexes 11282 11283 # closure to keep track of unbalanced containers. 11284 # arrays shared by the routines in this block: 11285 my @unmatched_opening_indexes_in_this_batch; 11286 my @unmatched_closing_indexes_in_this_batch; 11287 my %comma_arrow_count; 11288 11289 sub is_unbalanced_batch { 11290 @unmatched_opening_indexes_in_this_batch + 11291 @unmatched_closing_indexes_in_this_batch; 11292 } 11293 11294 sub comma_arrow_count { 11295 my $seqno = $_[0]; 11296 return $comma_arrow_count{$seqno}; 11297 } 11298 11299 sub match_opening_and_closing_tokens { 11300 11301 # Match up indexes of opening and closing braces, etc, in this batch. 11302 # This has to be done after all tokens are stored because unstoring 11303 # of tokens would otherwise cause trouble. 11304 11305 @unmatched_opening_indexes_in_this_batch = (); 11306 @unmatched_closing_indexes_in_this_batch = (); 11307 %comma_arrow_count = (); 11308 11309 my ( $i, $i_mate, $token ); 11310 foreach $i ( 0 .. $max_index_to_go ) { 11311 if ( $type_sequence_to_go[$i] ) { 11312 $token = $tokens_to_go[$i]; 11313 if ( $token =~ /^[\(\[\{\?]$/ ) { 11314 push @unmatched_opening_indexes_in_this_batch, $i; 11315 } 11316 elsif ( $token =~ /^[\)\]\}\:]$/ ) { 11317 11318 $i_mate = pop @unmatched_opening_indexes_in_this_batch; 11319 if ( defined($i_mate) && $i_mate >= 0 ) { 11320 if ( $type_sequence_to_go[$i_mate] == 11321 $type_sequence_to_go[$i] ) 11322 { 11323 $mate_index_to_go[$i] = $i_mate; 11324 $mate_index_to_go[$i_mate] = $i; 11325 } 11326 else { 11327 push @unmatched_opening_indexes_in_this_batch, 11328 $i_mate; 11329 push @unmatched_closing_indexes_in_this_batch, $i; 11330 } 11331 } 11332 else { 11333 push @unmatched_closing_indexes_in_this_batch, $i; 11334 } 11335 } 11336 } 11337 elsif ( $tokens_to_go[$i] eq '=>' ) { 11338 if (@unmatched_opening_indexes_in_this_batch) { 11339 my $j = $unmatched_opening_indexes_in_this_batch[-1]; 11340 my $seqno = $type_sequence_to_go[$j]; 11341 $comma_arrow_count{$seqno}++; 11342 } 11343 } 11344 } 11345 } 11346 11347 sub save_opening_indentation { 11348 11349 # This should be called after each batch of tokens is output. It 11350 # saves indentations of lines of all unmatched opening tokens. 11351 # These will be used by sub get_opening_indentation. 11352 11353 my ( $ri_first, $ri_last, $rindentation_list ) = @_; 11354 11355 # we no longer need indentations of any saved indentations which 11356 # are unmatched closing tokens in this batch, because we will 11357 # never encounter them again. So we can delete them to keep 11358 # the hash size down. 11359 foreach (@unmatched_closing_indexes_in_this_batch) { 11360 my $seqno = $type_sequence_to_go[$_]; 11361 delete $saved_opening_indentation{$seqno}; 11362 } 11363 11364 # we need to save indentations of any unmatched opening tokens 11365 # in this batch because we may need them in a subsequent batch. 11366 foreach (@unmatched_opening_indexes_in_this_batch) { 11367 my $seqno = $type_sequence_to_go[$_]; 11368 $saved_opening_indentation{$seqno} = [ 11369 lookup_opening_indentation( 11370 $_, $ri_first, $ri_last, $rindentation_list 11371 ) 11372 ]; 11373 } 11374 } 11375} # end unmatched_indexes 11376 11377sub get_opening_indentation { 11378 11379 # get the indentation of the line which output the opening token 11380 # corresponding to a given closing token in the current output batch. 11381 # 11382 # given: 11383 # $i_closing - index in this line of a closing token ')' '}' or ']' 11384 # 11385 # $ri_first - reference to list of the first index $i for each output 11386 # line in this batch 11387 # $ri_last - reference to list of the last index $i for each output line 11388 # in this batch 11389 # $rindentation_list - reference to a list containing the indentation 11390 # used for each line. 11391 # 11392 # return: 11393 # -the indentation of the line which contained the opening token 11394 # which matches the token at index $i_opening 11395 # -and its offset (number of columns) from the start of the line 11396 # 11397 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; 11398 11399 # first, see if the opening token is in the current batch 11400 my $i_opening = $mate_index_to_go[$i_closing]; 11401 my ( $indent, $offset, $is_leading, $exists ); 11402 $exists = 1; 11403 if ( $i_opening >= 0 ) { 11404 11405 # it is..look up the indentation 11406 ( $indent, $offset, $is_leading ) = 11407 lookup_opening_indentation( $i_opening, $ri_first, $ri_last, 11408 $rindentation_list ); 11409 } 11410 11411 # if not, it should have been stored in the hash by a previous batch 11412 else { 11413 my $seqno = $type_sequence_to_go[$i_closing]; 11414 if ($seqno) { 11415 if ( $saved_opening_indentation{$seqno} ) { 11416 ( $indent, $offset, $is_leading ) = 11417 @{ $saved_opening_indentation{$seqno} }; 11418 } 11419 11420 # some kind of serious error 11421 # (example is badfile.t) 11422 else { 11423 $indent = 0; 11424 $offset = 0; 11425 $is_leading = 0; 11426 $exists = 0; 11427 } 11428 } 11429 11430 # if no sequence number it must be an unbalanced container 11431 else { 11432 $indent = 0; 11433 $offset = 0; 11434 $is_leading = 0; 11435 $exists = 0; 11436 } 11437 } 11438 return ( $indent, $offset, $is_leading, $exists ); 11439} 11440 11441sub lookup_opening_indentation { 11442 11443 # get the indentation of the line in the current output batch 11444 # which output a selected opening token 11445 # 11446 # given: 11447 # $i_opening - index of an opening token in the current output batch 11448 # whose line indentation we need 11449 # $ri_first - reference to list of the first index $i for each output 11450 # line in this batch 11451 # $ri_last - reference to list of the last index $i for each output line 11452 # in this batch 11453 # $rindentation_list - reference to a list containing the indentation 11454 # used for each line. (NOTE: the first slot in 11455 # this list is the last returned line number, and this is 11456 # followed by the list of indentations). 11457 # 11458 # return 11459 # -the indentation of the line which contained token $i_opening 11460 # -and its offset (number of columns) from the start of the line 11461 11462 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; 11463 11464 my $nline = $rindentation_list->[0]; # line number of previous lookup 11465 11466 # reset line location if necessary 11467 $nline = 0 if ( $i_opening < $ri_start->[$nline] ); 11468 11469 # find the correct line 11470 unless ( $i_opening > $ri_last->[-1] ) { 11471 while ( $i_opening > $ri_last->[$nline] ) { $nline++; } 11472 } 11473 11474 # error - token index is out of bounds - shouldn't happen 11475 else { 11476 warning( 11477"non-fatal program bug in lookup_opening_indentation - index out of range\n" 11478 ); 11479 report_definite_bug(); 11480 $nline = $#{$ri_last}; 11481 } 11482 11483 $rindentation_list->[0] = 11484 $nline; # save line number to start looking next call 11485 my $ibeg = $ri_start->[$nline]; 11486 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; 11487 my $is_leading = ( $ibeg == $i_opening ); 11488 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); 11489} 11490 11491{ 11492 my %is_if_elsif_else_unless_while_until_for_foreach; 11493 11494 BEGIN { 11495 11496 # These block types may have text between the keyword and opening 11497 # curly. Note: 'else' does not, but must be included to allow trailing 11498 # if/elsif text to be appended. 11499 # patch for SWITCH/CASE: added 'case' and 'when' 11500 @_ = qw(if elsif else unless while until for foreach case when); 11501 @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_); 11502 } 11503 11504 sub set_adjusted_indentation { 11505 11506 # This routine has the final say regarding the actual indentation of 11507 # a line. It starts with the basic indentation which has been 11508 # defined for the leading token, and then takes into account any 11509 # options that the user has set regarding special indenting and 11510 # outdenting. 11511 11512 my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, 11513 $rindentation_list ) 11514 = @_; 11515 11516 # we need to know the last token of this line 11517 my ( $terminal_type, $i_terminal ) = 11518 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend ); 11519 11520 my $is_outdented_line = 0; 11521 11522 my $is_semicolon_terminated = $terminal_type eq ';' 11523 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]; 11524 11525 ########################################################## 11526 # Section 1: set a flag and a default indentation 11527 # 11528 # Most lines are indented according to the initial token. 11529 # But it is common to outdent to the level just after the 11530 # terminal token in certain cases... 11531 # adjust_indentation flag: 11532 # 0 - do not adjust 11533 # 1 - outdent 11534 # 2 - vertically align with opening token 11535 # 3 - indent 11536 ########################################################## 11537 my $adjust_indentation = 0; 11538 my $default_adjust_indentation = $adjust_indentation; 11539 11540 my ( 11541 $opening_indentation, $opening_offset, 11542 $is_leading, $opening_exists 11543 ); 11544 11545 # if we are at a closing token of some type.. 11546 if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) { 11547 11548 # get the indentation of the line containing the corresponding 11549 # opening token 11550 ( 11551 $opening_indentation, $opening_offset, 11552 $is_leading, $opening_exists 11553 ) 11554 = get_opening_indentation( $ibeg, $ri_first, $ri_last, 11555 $rindentation_list ); 11556 11557 # First set the default behavior: 11558 # default behavior is to outdent closing lines 11559 # of the form: "); }; ]; )->xxx;" 11560 if ( 11561 $is_semicolon_terminated 11562 11563 # and 'cuddled parens' of the form: ")->pack(" 11564 || ( 11565 $terminal_type eq '(' 11566 && $types_to_go[$ibeg] eq ')' 11567 && ( $nesting_depth_to_go[$iend] + 1 == 11568 $nesting_depth_to_go[$ibeg] ) 11569 ) 11570 ) 11571 { 11572 $adjust_indentation = 1; 11573 } 11574 11575 # TESTING: outdent something like '),' 11576 if ( 11577 $terminal_type eq ',' 11578 11579 # allow just one character before the comma 11580 && $i_terminal == $ibeg + 1 11581 11582 # requre LIST environment; otherwise, we may outdent too much -- 11583 # this can happen in calls without parentheses (overload.t); 11584 && $container_environment_to_go[$i_terminal] eq 'LIST' 11585 ) 11586 { 11587 $adjust_indentation = 1; 11588 } 11589 11590 # undo continuation indentation of a terminal closing token if 11591 # it is the last token before a level decrease. This will allow 11592 # a closing token to line up with its opening counterpart, and 11593 # avoids a indentation jump larger than 1 level. 11594 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/ 11595 && $i_terminal == $ibeg ) 11596 { 11597 my $ci = $ci_levels_to_go[$ibeg]; 11598 my $lev = $levels_to_go[$ibeg]; 11599 my $next_type = $types_to_go[ $ibeg + 1 ]; 11600 my $i_next_nonblank = 11601 ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 ); 11602 if ( $i_next_nonblank <= $max_index_to_go 11603 && $levels_to_go[$i_next_nonblank] < $lev ) 11604 { 11605 $adjust_indentation = 1; 11606 } 11607 } 11608 11609 $default_adjust_indentation = $adjust_indentation; 11610 11611 # Now modify default behavior according to user request: 11612 # handle option to indent non-blocks of the form ); }; ]; 11613 # But don't do special indentation to something like ')->pack(' 11614 if ( !$block_type_to_go[$ibeg] ) { 11615 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] }; 11616 if ( $cti == 1 ) { 11617 if ( $i_terminal <= $ibeg + 1 11618 || $is_semicolon_terminated ) 11619 { 11620 $adjust_indentation = 2; 11621 } 11622 else { 11623 $adjust_indentation = 0; 11624 } 11625 } 11626 elsif ( $cti == 2 ) { 11627 if ($is_semicolon_terminated) { 11628 $adjust_indentation = 3; 11629 } 11630 else { 11631 $adjust_indentation = 0; 11632 } 11633 } 11634 elsif ( $cti == 3 ) { 11635 $adjust_indentation = 3; 11636 } 11637 } 11638 11639 # handle option to indent blocks 11640 else { 11641 if ( 11642 $rOpts->{'indent-closing-brace'} 11643 && ( 11644 $i_terminal == $ibeg # isolated terminal '}' 11645 || $is_semicolon_terminated 11646 ) 11647 ) # } xxxx ; 11648 { 11649 $adjust_indentation = 3; 11650 } 11651 } 11652 } 11653 11654 # if at ');', '};', '>;', and '];' of a terminal qw quote 11655 elsif ($$rpatterns[0] =~ /^qb*;$/ 11656 && $$rfields[0] =~ /^([\)\}\]\>]);$/ ) 11657 { 11658 if ( $closing_token_indentation{$1} == 0 ) { 11659 $adjust_indentation = 1; 11660 } 11661 else { 11662 $adjust_indentation = 3; 11663 } 11664 } 11665 11666 # if line begins with a ':', align it with any 11667 # previous line leading with corresponding ? 11668 elsif ( $types_to_go[$ibeg] eq ':' ) { 11669 ( 11670 $opening_indentation, $opening_offset, 11671 $is_leading, $opening_exists 11672 ) 11673 = get_opening_indentation( $ibeg, $ri_first, $ri_last, 11674 $rindentation_list ); 11675 if ($is_leading) { $adjust_indentation = 2; } 11676 } 11677 11678 ########################################################## 11679 # Section 2: set indentation according to flag set above 11680 # 11681 # Select the indentation object to define leading 11682 # whitespace. If we are outdenting something like '} } );' 11683 # then we want to use one level below the last token 11684 # ($i_terminal) in order to get it to fully outdent through 11685 # all levels. 11686 ########################################################## 11687 my $indentation; 11688 my $lev; 11689 my $level_end = $levels_to_go[$iend]; 11690 11691 if ( $adjust_indentation == 0 ) { 11692 $indentation = $leading_spaces_to_go[$ibeg]; 11693 $lev = $levels_to_go[$ibeg]; 11694 } 11695 elsif ( $adjust_indentation == 1 ) { 11696 $indentation = $reduced_spaces_to_go[$i_terminal]; 11697 $lev = $levels_to_go[$i_terminal]; 11698 } 11699 11700 # handle indented closing token which aligns with opening token 11701 elsif ( $adjust_indentation == 2 ) { 11702 11703 # handle option to align closing token with opening token 11704 $lev = $levels_to_go[$ibeg]; 11705 11706 # calculate spaces needed to align with opening token 11707 my $space_count = 11708 get_SPACES($opening_indentation) + $opening_offset; 11709 11710 # Indent less than the previous line. 11711 # 11712 # Problem: For -lp we don't exactly know what it was if there 11713 # were recoverable spaces sent to the aligner. A good solution 11714 # would be to force a flush of the vertical alignment buffer, so 11715 # that we would know. For now, this rule is used for -lp: 11716 # 11717 # When the last line did not start with a closing token we will 11718 # be optimistic that the aligner will recover everything wanted. 11719 # 11720 # This rule will prevent us from breaking a hierarchy of closing 11721 # tokens, and in a worst case will leave a closing paren too far 11722 # indented, but this is better than frequently leaving it not 11723 # indented enough. 11724 my $last_spaces = get_SPACES($last_indentation_written); 11725 if ( $last_leading_token !~ /^[\}\]\)]$/ ) { 11726 $last_spaces += 11727 get_RECOVERABLE_SPACES($last_indentation_written); 11728 } 11729 11730 # reset the indentation to the new space count if it works 11731 # only options are all or none: nothing in-between looks good 11732 $lev = $levels_to_go[$ibeg]; 11733 if ( $space_count < $last_spaces ) { 11734 if ($rOpts_line_up_parentheses) { 11735 my $lev = $levels_to_go[$ibeg]; 11736 $indentation = 11737 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); 11738 } 11739 else { 11740 $indentation = $space_count; 11741 } 11742 } 11743 11744 # revert to default if it doesnt work 11745 else { 11746 $space_count = leading_spaces_to_go($ibeg); 11747 if ( $default_adjust_indentation == 0 ) { 11748 $indentation = $leading_spaces_to_go[$ibeg]; 11749 } 11750 elsif ( $default_adjust_indentation == 1 ) { 11751 $indentation = $reduced_spaces_to_go[$i_terminal]; 11752 $lev = $levels_to_go[$i_terminal]; 11753 } 11754 } 11755 } 11756 11757 # Full indentaion of closing tokens (-icb and -icp or -cti=2) 11758 else { 11759 11760 # handle -icb (indented closing code block braces) 11761 # Updated method for indented block braces: indent one full level if 11762 # there is no continuation indentation. This will occur for major 11763 # structures such as sub, if, else, but not for things like map 11764 # blocks. 11765 # 11766 # Note: only code blocks without continuation indentation are 11767 # handled here (if, else, unless, ..). In the following snippet, 11768 # the terminal brace of the sort block will have continuation 11769 # indentation as shown so it will not be handled by the coding 11770 # here. We would have to undo the continuation indentation to do 11771 # this, but it probably looks ok as is. This is a possible future 11772 # update for semicolon terminated lines. 11773 # 11774 # if ($sortby eq 'date' or $sortby eq 'size') { 11775 # @files = sort { 11776 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} 11777 # or $a cmp $b 11778 # } @files; 11779 # } 11780 # 11781 if ( $block_type_to_go[$ibeg] 11782 && $ci_levels_to_go[$i_terminal] == 0 ) 11783 { 11784 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] ); 11785 $indentation = $spaces + $rOpts_indent_columns; 11786 11787 # NOTE: for -lp we could create a new indentation object, but 11788 # there is probably no need to do it 11789 } 11790 11791 # handle -icp and any -icb block braces which fall through above 11792 # test such as the 'sort' block mentioned above. 11793 else { 11794 11795 # There are currently two ways to handle -icp... 11796 # One way is to use the indentation of the previous line: 11797 # $indentation = $last_indentation_written; 11798 11799 # The other way is to use the indentation that the previous line 11800 # would have had if it hadn't been adjusted: 11801 $indentation = $last_unadjusted_indentation; 11802 11803 # Current method: use the minimum of the two. This avoids 11804 # inconsistent indentation. 11805 if ( get_SPACES($last_indentation_written) < 11806 get_SPACES($indentation) ) 11807 { 11808 $indentation = $last_indentation_written; 11809 } 11810 } 11811 11812 # use previous indentation but use own level 11813 # to cause list to be flushed properly 11814 $lev = $levels_to_go[$ibeg]; 11815 } 11816 11817 # remember indentation except for multi-line quotes, which get 11818 # no indentation 11819 unless ( $ibeg == 0 && $starting_in_quote ) { 11820 $last_indentation_written = $indentation; 11821 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg]; 11822 $last_leading_token = $tokens_to_go[$ibeg]; 11823 } 11824 11825 # be sure lines with leading closing tokens are not outdented more 11826 # than the line which contained the corresponding opening token. 11827 11828 ############################################################# 11829 # updated per bug report in alex_bug.pl: we must not 11830 # mess with the indentation of closing logical braces so 11831 # we must treat something like '} else {' as if it were 11832 # an isolated brace my $is_isolated_block_brace = ( 11833 # $iend == $ibeg ) && $block_type_to_go[$ibeg]; 11834 ############################################################# 11835 my $is_isolated_block_brace = $block_type_to_go[$ibeg] 11836 && ( $iend == $ibeg 11837 || $is_if_elsif_else_unless_while_until_for_foreach{ 11838 $block_type_to_go[$ibeg] } ); 11839 11840 # only do this for a ':; which is aligned with its leading '?' 11841 my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading; 11842 if ( defined($opening_indentation) 11843 && !$is_isolated_block_brace 11844 && !$is_unaligned_colon ) 11845 { 11846 if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) { 11847 $indentation = $opening_indentation; 11848 } 11849 } 11850 11851 # remember the indentation of each line of this batch 11852 push @{$rindentation_list}, $indentation; 11853 11854 # outdent lines with certain leading tokens... 11855 if ( 11856 11857 # must be first word of this batch 11858 $ibeg == 0 11859 11860 # and ... 11861 && ( 11862 11863 # certain leading keywords if requested 11864 ( 11865 $rOpts->{'outdent-keywords'} 11866 && $types_to_go[$ibeg] eq 'k' 11867 && $outdent_keyword{ $tokens_to_go[$ibeg] } 11868 ) 11869 11870 # or labels if requested 11871 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' ) 11872 11873 # or static block comments if requested 11874 || ( $types_to_go[$ibeg] eq '#' 11875 && $rOpts->{'outdent-static-block-comments'} 11876 && $is_static_block_comment ) 11877 ) 11878 ) 11879 11880 { 11881 my $space_count = leading_spaces_to_go($ibeg); 11882 if ( $space_count > 0 ) { 11883 $space_count -= $rOpts_continuation_indentation; 11884 $is_outdented_line = 1; 11885 if ( $space_count < 0 ) { $space_count = 0 } 11886 11887 # do not promote a spaced static block comment to non-spaced; 11888 # this is not normally necessary but could be for some 11889 # unusual user inputs (such as -ci = -i) 11890 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) { 11891 $space_count = 1; 11892 } 11893 11894 if ($rOpts_line_up_parentheses) { 11895 $indentation = 11896 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); 11897 } 11898 else { 11899 $indentation = $space_count; 11900 } 11901 } 11902 } 11903 11904 return ( $indentation, $lev, $level_end, $terminal_type, 11905 $is_semicolon_terminated, $is_outdented_line ); 11906 } 11907} 11908 11909sub set_vertical_tightness_flags { 11910 11911 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_; 11912 11913 # Define vertical tightness controls for the nth line of a batch. 11914 # We create an array of parameters which tell the vertical aligner 11915 # if we should combine this line with the next line to achieve the 11916 # desired vertical tightness. The array of parameters contains: 11917 # 11918 # [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace 11919 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok 11920 # if closing: spaces of padding to use 11921 # [2] sequence number of container 11922 # [3] valid flag: do not append if this flag is false. Will be 11923 # true if appropriate -vt flag is set. Otherwise, Will be 11924 # made true only for 2 line container in parens with -lp 11925 # 11926 # These flags are used by sub set_leading_whitespace in 11927 # the vertical aligner 11928 11929 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ]; 11930 11931 # For non-BLOCK tokens, we will need to examine the next line 11932 # too, so we won't consider the last line. 11933 if ( $n < $n_last_line ) { 11934 11935 # see if last token is an opening token...not a BLOCK... 11936 my $ibeg_next = $$ri_first[ $n + 1 ]; 11937 my $token_end = $tokens_to_go[$iend]; 11938 my $iend_next = $$ri_last[ $n + 1 ]; 11939 if ( 11940 $type_sequence_to_go[$iend] 11941 && !$block_type_to_go[$iend] 11942 && $is_opening_token{$token_end} 11943 && ( 11944 $opening_vertical_tightness{$token_end} > 0 11945 11946 # allow 2-line method call to be closed up 11947 || ( $rOpts_line_up_parentheses 11948 && $token_end eq '(' 11949 && $iend > $ibeg 11950 && $types_to_go[ $iend - 1 ] ne 'b' ) 11951 ) 11952 ) 11953 { 11954 11955 # avoid multiple jumps in nesting depth in one line if 11956 # requested 11957 my $ovt = $opening_vertical_tightness{$token_end}; 11958 my $iend_next = $$ri_last[ $n + 1 ]; 11959 unless ( 11960 $ovt < 2 11961 && ( $nesting_depth_to_go[ $iend_next + 1 ] != 11962 $nesting_depth_to_go[$ibeg_next] ) 11963 ) 11964 { 11965 11966 # If -vt flag has not been set, mark this as invalid 11967 # and aligner will validate it if it sees the closing paren 11968 # within 2 lines. 11969 my $valid_flag = $ovt; 11970 @{$rvertical_tightness_flags} = 11971 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag ); 11972 } 11973 } 11974 11975 # see if first token of next line is a closing token... 11976 # ..and be sure this line does not have a side comment 11977 my $token_next = $tokens_to_go[$ibeg_next]; 11978 if ( $type_sequence_to_go[$ibeg_next] 11979 && !$block_type_to_go[$ibeg_next] 11980 && $is_closing_token{$token_next} 11981 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen! 11982 { 11983 my $ovt = $opening_vertical_tightness{$token_next}; 11984 my $cvt = $closing_vertical_tightness{$token_next}; 11985 if ( 11986 11987 # never append a trailing line like )->pack( 11988 # because it will throw off later alignment 11989 ( 11990 $nesting_depth_to_go[$ibeg_next] == 11991 $nesting_depth_to_go[ $iend_next + 1 ] + 1 11992 ) 11993 && ( 11994 $cvt == 2 11995 || ( 11996 $container_environment_to_go[$ibeg_next] ne 'LIST' 11997 && ( 11998 $cvt == 1 11999 12000 # allow closing up 2-line method calls 12001 || ( $rOpts_line_up_parentheses 12002 && $token_next eq ')' ) 12003 ) 12004 ) 12005 ) 12006 ) 12007 { 12008 12009 # decide which trailing closing tokens to append.. 12010 my $ok = 0; 12011 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 } 12012 else { 12013 my $str = join( '', 12014 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] ); 12015 12016 # append closing token if followed by comment or ';' 12017 if ( $str =~ /^b?[#;]/ ) { $ok = 1 } 12018 } 12019 12020 if ($ok) { 12021 my $valid_flag = $cvt; 12022 @{$rvertical_tightness_flags} = ( 12023 2, 12024 $tightness{$token_next} == 2 ? 0 : 1, 12025 $type_sequence_to_go[$ibeg_next], $valid_flag, 12026 ); 12027 } 12028 } 12029 } 12030 12031 # Opening Token Right 12032 # If requested, move an isolated trailing opening token to the end of 12033 # the previous line which ended in a comma. We could do this 12034 # in sub recombine_breakpoints but that would cause problems 12035 # with -lp formatting. The problem is that indentation will 12036 # quickly move far to the right in nested expressions. By 12037 # doing it after indentation has been set, we avoid changes 12038 # to the indentation. Actual movement of the token takes place 12039 # in sub write_leader_and_string. 12040 if ( 12041 $opening_token_right{ $tokens_to_go[$ibeg_next] } 12042 12043 # previous line is not opening 12044 # (use -sot to combine with it) 12045 && !$is_opening_token{$token_end} 12046 12047 # previous line ended in one of these 12048 # (add other cases if necessary; '=>' and '.' are not necessary 12049 ##&& ($is_opening_token{$token_end} || $token_end eq ',') 12050 && !$block_type_to_go[$ibeg_next] 12051 12052 # this is a line with just an opening token 12053 && ( $iend_next == $ibeg_next 12054 || $iend_next == $ibeg_next + 2 12055 && $types_to_go[$iend_next] eq '#' ) 12056 12057 # looks bad if we align vertically with the wrong container 12058 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next] 12059 ) 12060 { 12061 my $valid_flag = 1; 12062 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; 12063 @{$rvertical_tightness_flags} = 12064 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, ); 12065 } 12066 12067 # Stacking of opening and closing tokens 12068 my $stackable; 12069 my $token_beg_next = $tokens_to_go[$ibeg_next]; 12070 12071 # patch to make something like 'qw(' behave like an opening paren 12072 # (aran.t) 12073 if ( $types_to_go[$ibeg_next] eq 'q' ) { 12074 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) { 12075 $token_beg_next = $1; 12076 } 12077 } 12078 12079 if ( $is_closing_token{$token_end} 12080 && $is_closing_token{$token_beg_next} ) 12081 { 12082 $stackable = $stack_closing_token{$token_beg_next} 12083 unless ( $block_type_to_go[$ibeg_next] ) 12084 ; # shouldn't happen; just checking 12085 } 12086 elsif ($is_opening_token{$token_end} 12087 && $is_opening_token{$token_beg_next} ) 12088 { 12089 $stackable = $stack_opening_token{$token_beg_next} 12090 unless ( $block_type_to_go[$ibeg_next] ) 12091 ; # shouldn't happen; just checking 12092 } 12093 12094 if ($stackable) { 12095 12096 my $is_semicolon_terminated; 12097 if ( $n + 1 == $n_last_line ) { 12098 my ( $terminal_type, $i_terminal ) = terminal_type( 12099 \@types_to_go, \@block_type_to_go, 12100 $ibeg_next, $iend_next 12101 ); 12102 $is_semicolon_terminated = $terminal_type eq ';' 12103 && $nesting_depth_to_go[$iend_next] < 12104 $nesting_depth_to_go[$ibeg_next]; 12105 } 12106 12107 # this must be a line with just an opening token 12108 # or end in a semicolon 12109 if ( 12110 $is_semicolon_terminated 12111 || ( $iend_next == $ibeg_next 12112 || $iend_next == $ibeg_next + 2 12113 && $types_to_go[$iend_next] eq '#' ) 12114 ) 12115 { 12116 my $valid_flag = 1; 12117 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; 12118 @{$rvertical_tightness_flags} = 12119 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, 12120 ); 12121 } 12122 } 12123 } 12124 12125 # Check for a last line with isolated opening BLOCK curly 12126 elsif ($rOpts_block_brace_vertical_tightness 12127 && $ibeg eq $iend 12128 && $types_to_go[$iend] eq '{' 12129 && $block_type_to_go[$iend] =~ 12130 /$block_brace_vertical_tightness_pattern/o ) 12131 { 12132 @{$rvertical_tightness_flags} = 12133 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 ); 12134 } 12135 12136 # pack in the sequence numbers of the ends of this line 12137 $rvertical_tightness_flags->[4] = get_seqno($ibeg); 12138 $rvertical_tightness_flags->[5] = get_seqno($iend); 12139 return $rvertical_tightness_flags; 12140} 12141 12142sub get_seqno { 12143 12144 # get opening and closing sequence numbers of a token for the vertical 12145 # aligner. Assign qw quotes a value to allow qw opening and closing tokens 12146 # to be treated somewhat like opening and closing tokens for stacking 12147 # tokens by the vertical aligner. 12148 my ($ii) = @_; 12149 my $seqno = $type_sequence_to_go[$ii]; 12150 if ( $types_to_go[$ii] eq 'q' ) { 12151 my $SEQ_QW = -1; 12152 if ( $ii > 0 ) { 12153 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ ); 12154 } 12155 else { 12156 if ( !$ending_in_quote ) { 12157 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ ); 12158 } 12159 } 12160 } 12161 return ($seqno); 12162} 12163 12164{ 12165 my %is_vertical_alignment_type; 12166 my %is_vertical_alignment_keyword; 12167 12168 BEGIN { 12169 12170 @_ = qw# 12171 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= 12172 { ? : => =~ && || // ~~ !~~ 12173 #; 12174 @is_vertical_alignment_type{@_} = (1) x scalar(@_); 12175 12176 @_ = qw(if unless and or err eq ne for foreach while until); 12177 @is_vertical_alignment_keyword{@_} = (1) x scalar(@_); 12178 } 12179 12180 sub set_vertical_alignment_markers { 12181 12182 # This routine takes the first step toward vertical alignment of the 12183 # lines of output text. It looks for certain tokens which can serve as 12184 # vertical alignment markers (such as an '='). 12185 # 12186 # Method: We look at each token $i in this output batch and set 12187 # $matching_token_to_go[$i] equal to those tokens at which we would 12188 # accept vertical alignment. 12189 12190 # nothing to do if we aren't allowed to change whitespace 12191 if ( !$rOpts_add_whitespace ) { 12192 for my $i ( 0 .. $max_index_to_go ) { 12193 $matching_token_to_go[$i] = ''; 12194 } 12195 return; 12196 } 12197 12198 my ( $ri_first, $ri_last ) = @_; 12199 12200 # remember the index of last nonblank token before any sidecomment 12201 my $i_terminal = $max_index_to_go; 12202 if ( $types_to_go[$i_terminal] eq '#' ) { 12203 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) { 12204 if ( $i_terminal > 0 ) { --$i_terminal } 12205 } 12206 } 12207 12208 # look at each line of this batch.. 12209 my $last_vertical_alignment_before_index; 12210 my $vert_last_nonblank_type; 12211 my $vert_last_nonblank_token; 12212 my $vert_last_nonblank_block_type; 12213 my $max_line = @$ri_first - 1; 12214 my ( $i, $type, $token, $block_type, $alignment_type ); 12215 my ( $ibeg, $iend, $line ); 12216 12217 foreach $line ( 0 .. $max_line ) { 12218 $ibeg = $$ri_first[$line]; 12219 $iend = $$ri_last[$line]; 12220 $last_vertical_alignment_before_index = -1; 12221 $vert_last_nonblank_type = ''; 12222 $vert_last_nonblank_token = ''; 12223 $vert_last_nonblank_block_type = ''; 12224 12225 # look at each token in this output line.. 12226 foreach $i ( $ibeg .. $iend ) { 12227 $alignment_type = ''; 12228 $type = $types_to_go[$i]; 12229 $block_type = $block_type_to_go[$i]; 12230 $token = $tokens_to_go[$i]; 12231 12232 # check for flag indicating that we should not align 12233 # this token 12234 if ( $matching_token_to_go[$i] ) { 12235 $matching_token_to_go[$i] = ''; 12236 next; 12237 } 12238 12239 #-------------------------------------------------------- 12240 # First see if we want to align BEFORE this token 12241 #-------------------------------------------------------- 12242 12243 # The first possible token that we can align before 12244 # is index 2 because: 1) it doesn't normally make sense to 12245 # align before the first token and 2) the second 12246 # token must be a blank if we are to align before 12247 # the third 12248 if ( $i < $ibeg + 2 ) { } 12249 12250 # must follow a blank token 12251 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { } 12252 12253 # align a side comment -- 12254 elsif ( $type eq '#' ) { 12255 12256 unless ( 12257 12258 # it is a static side comment 12259 ( 12260 $rOpts->{'static-side-comments'} 12261 && $token =~ /$static_side_comment_pattern/o 12262 ) 12263 12264 # or a closing side comment 12265 || ( $vert_last_nonblank_block_type 12266 && $token =~ 12267 /$closing_side_comment_prefix_pattern/o ) 12268 ) 12269 { 12270 $alignment_type = $type; 12271 } ## Example of a static side comment 12272 } 12273 12274 # otherwise, do not align two in a row to create a 12275 # blank field 12276 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { } 12277 12278 # align before one of these keywords 12279 # (within a line, since $i>1) 12280 elsif ( $type eq 'k' ) { 12281 12282 # /^(if|unless|and|or|eq|ne)$/ 12283 if ( $is_vertical_alignment_keyword{$token} ) { 12284 $alignment_type = $token; 12285 } 12286 } 12287 12288 # align before one of these types.. 12289 # Note: add '.' after new vertical aligner is operational 12290 elsif ( $is_vertical_alignment_type{$type} ) { 12291 $alignment_type = $token; 12292 12293 # Do not align a terminal token. Although it might 12294 # occasionally look ok to do this, it has been found to be 12295 # a good general rule. The main problems are: 12296 # (1) that the terminal token (such as an = or :) might get 12297 # moved far to the right where it is hard to see because 12298 # nothing follows it, and 12299 # (2) doing so may prevent other good alignments. 12300 if ( $i == $iend || $i >= $i_terminal ) { 12301 $alignment_type = ""; 12302 } 12303 12304 # Do not align leading ': (' or '. ('. This would prevent 12305 # alignment in something like the following: 12306 # $extra_space .= 12307 # ( $input_line_number < 10 ) ? " " 12308 # : ( $input_line_number < 100 ) ? " " 12309 # : ""; 12310 # or 12311 # $code = 12312 # ( $case_matters ? $accessor : " lc($accessor) " ) 12313 # . ( $yesno ? " eq " : " ne " ) 12314 if ( $i == $ibeg + 2 12315 && $types_to_go[$ibeg] =~ /^[\.\:]$/ 12316 && $types_to_go[ $i - 1 ] eq 'b' ) 12317 { 12318 $alignment_type = ""; 12319 } 12320 12321 # For a paren after keyword, only align something like this: 12322 # if ( $a ) { &a } 12323 # elsif ( $b ) { &b } 12324 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) { 12325 $alignment_type = "" 12326 unless $vert_last_nonblank_token =~ 12327 /^(if|unless|elsif)$/; 12328 } 12329 12330 # be sure the alignment tokens are unique 12331 # This didn't work well: reason not determined 12332 # if ($token ne $type) {$alignment_type .= $type} 12333 } 12334 12335 # NOTE: This is deactivated because it causes the previous 12336 # if/elsif alignment to fail 12337 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) 12338 #{ $alignment_type = $type; } 12339 12340 if ($alignment_type) { 12341 $last_vertical_alignment_before_index = $i; 12342 } 12343 12344 #-------------------------------------------------------- 12345 # Next see if we want to align AFTER the previous nonblank 12346 #-------------------------------------------------------- 12347 12348 # We want to line up ',' and interior ';' tokens, with the added 12349 # space AFTER these tokens. (Note: interior ';' is included 12350 # because it may occur in short blocks). 12351 if ( 12352 12353 # we haven't already set it 12354 !$alignment_type 12355 12356 # and its not the first token of the line 12357 && ( $i > $ibeg ) 12358 12359 # and it follows a blank 12360 && $types_to_go[ $i - 1 ] eq 'b' 12361 12362 # and previous token IS one of these: 12363 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ ) 12364 12365 # and it's NOT one of these 12366 && ( $type !~ /^[b\#\)\]\}]$/ ) 12367 12368 # then go ahead and align 12369 ) 12370 12371 { 12372 $alignment_type = $vert_last_nonblank_type; 12373 } 12374 12375 #-------------------------------------------------------- 12376 # then store the value 12377 #-------------------------------------------------------- 12378 $matching_token_to_go[$i] = $alignment_type; 12379 if ( $type ne 'b' ) { 12380 $vert_last_nonblank_type = $type; 12381 $vert_last_nonblank_token = $token; 12382 $vert_last_nonblank_block_type = $block_type; 12383 } 12384 } 12385 } 12386 } 12387} 12388 12389sub terminal_type { 12390 12391 # returns type of last token on this line (terminal token), as follows: 12392 # returns # for a full-line comment 12393 # returns ' ' for a blank line 12394 # otherwise returns final token type 12395 12396 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_; 12397 12398 # check for full-line comment.. 12399 if ( $$rtype[$ibeg] eq '#' ) { 12400 return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg]; 12401 } 12402 else { 12403 12404 # start at end and walk bakwards.. 12405 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { 12406 12407 # skip past any side comment and blanks 12408 next if ( $$rtype[$i] eq 'b' ); 12409 next if ( $$rtype[$i] eq '#' ); 12410 12411 # found it..make sure it is a BLOCK termination, 12412 # but hide a terminal } after sort/grep/map because it is not 12413 # necessarily the end of the line. (terminal.t) 12414 my $terminal_type = $$rtype[$i]; 12415 if ( 12416 $terminal_type eq '}' 12417 && ( !$$rblock_type[$i] 12418 || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) ) 12419 ) 12420 { 12421 $terminal_type = 'b'; 12422 } 12423 return wantarray ? ( $terminal_type, $i ) : $terminal_type; 12424 } 12425 12426 # empty line 12427 return wantarray ? ( ' ', $ibeg ) : ' '; 12428 } 12429} 12430 12431{ 12432 my %is_good_keyword_breakpoint; 12433 my %is_lt_gt_le_ge; 12434 12435 sub set_bond_strengths { 12436 12437 BEGIN { 12438 12439 @_ = qw(if unless while until for foreach); 12440 @is_good_keyword_breakpoint{@_} = (1) x scalar(@_); 12441 12442 @_ = qw(lt gt le ge); 12443 @is_lt_gt_le_ge{@_} = (1) x scalar(@_); 12444 12445 ############################################################### 12446 # NOTE: NO_BREAK's set here are HINTS which may not be honored; 12447 # essential NO_BREAKS's must be enforced in section 2, below. 12448 ############################################################### 12449 12450 # adding NEW_TOKENS: add a left and right bond strength by 12451 # mimmicking what is done for an existing token type. You 12452 # can skip this step at first and take the default, then 12453 # tweak later to get desired results. 12454 12455 # The bond strengths should roughly follow precenence order where 12456 # possible. If you make changes, please check the results very 12457 # carefully on a variety of scripts. 12458 12459 # no break around possible filehandle 12460 $left_bond_strength{'Z'} = NO_BREAK; 12461 $right_bond_strength{'Z'} = NO_BREAK; 12462 12463 # never put a bare word on a new line: 12464 # example print (STDERR, "bla"); will fail with break after ( 12465 $left_bond_strength{'w'} = NO_BREAK; 12466 12467 # blanks always have infinite strength to force breaks after real tokens 12468 $right_bond_strength{'b'} = NO_BREAK; 12469 12470 # try not to break on exponentation 12471 @_ = qw" ** .. ... <=> "; 12472 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12473 @right_bond_strength{@_} = (STRONG) x scalar(@_); 12474 12475 # The comma-arrow has very low precedence but not a good break point 12476 $left_bond_strength{'=>'} = NO_BREAK; 12477 $right_bond_strength{'=>'} = NOMINAL; 12478 12479 # ok to break after label 12480 $left_bond_strength{'J'} = NO_BREAK; 12481 $right_bond_strength{'J'} = NOMINAL; 12482 $left_bond_strength{'j'} = STRONG; 12483 $right_bond_strength{'j'} = STRONG; 12484 $left_bond_strength{'A'} = STRONG; 12485 $right_bond_strength{'A'} = STRONG; 12486 12487 $left_bond_strength{'->'} = STRONG; 12488 $right_bond_strength{'->'} = VERY_STRONG; 12489 12490 # breaking AFTER modulus operator is ok: 12491 @_ = qw" % "; 12492 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12493 @right_bond_strength{@_} = 12494 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_); 12495 12496 # Break AFTER math operators * and / 12497 @_ = qw" * / x "; 12498 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12499 @right_bond_strength{@_} = (NOMINAL) x scalar(@_); 12500 12501 # Break AFTER weakest math operators + and - 12502 # Make them weaker than * but a bit stronger than '.' 12503 @_ = qw" + - "; 12504 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12505 @right_bond_strength{@_} = 12506 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_); 12507 12508 # breaking BEFORE these is just ok: 12509 @_ = qw" >> << "; 12510 @right_bond_strength{@_} = (STRONG) x scalar(@_); 12511 @left_bond_strength{@_} = (NOMINAL) x scalar(@_); 12512 12513 # breaking before the string concatenation operator seems best 12514 # because it can be hard to see at the end of a line 12515 $right_bond_strength{'.'} = STRONG; 12516 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; 12517 12518 @_ = qw"} ] ) "; 12519 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12520 @right_bond_strength{@_} = (NOMINAL) x scalar(@_); 12521 12522 # make these a little weaker than nominal so that they get 12523 # favored for end-of-line characters 12524 @_ = qw"!= == =~ !~ ~~ !~~"; 12525 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12526 @right_bond_strength{@_} = 12527 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_); 12528 12529 # break AFTER these 12530 @_ = qw" < > | & >= <="; 12531 @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_); 12532 @right_bond_strength{@_} = 12533 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_); 12534 12535 # breaking either before or after a quote is ok 12536 # but bias for breaking before a quote 12537 $left_bond_strength{'Q'} = NOMINAL; 12538 $right_bond_strength{'Q'} = NOMINAL + 0.02; 12539 $left_bond_strength{'q'} = NOMINAL; 12540 $right_bond_strength{'q'} = NOMINAL; 12541 12542 # starting a line with a keyword is usually ok 12543 $left_bond_strength{'k'} = NOMINAL; 12544 12545 # we usually want to bond a keyword strongly to what immediately 12546 # follows, rather than leaving it stranded at the end of a line 12547 $right_bond_strength{'k'} = STRONG; 12548 12549 $left_bond_strength{'G'} = NOMINAL; 12550 $right_bond_strength{'G'} = STRONG; 12551 12552 # it is good to break AFTER various assignment operators 12553 @_ = qw( 12554 = **= += *= &= <<= &&= 12555 -= /= |= >>= ||= //= 12556 .= %= ^= 12557 x= 12558 ); 12559 @left_bond_strength{@_} = (STRONG) x scalar(@_); 12560 @right_bond_strength{@_} = 12561 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_); 12562 12563 # break BEFORE '&&' and '||' and '//' 12564 # set strength of '||' to same as '=' so that chains like 12565 # $a = $b || $c || $d will break before the first '||' 12566 $right_bond_strength{'||'} = NOMINAL; 12567 $left_bond_strength{'||'} = $right_bond_strength{'='}; 12568 12569 # same thing for '//' 12570 $right_bond_strength{'//'} = NOMINAL; 12571 $left_bond_strength{'//'} = $right_bond_strength{'='}; 12572 12573 # set strength of && a little higher than || 12574 $right_bond_strength{'&&'} = NOMINAL; 12575 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1; 12576 12577 $left_bond_strength{';'} = VERY_STRONG; 12578 $right_bond_strength{';'} = VERY_WEAK; 12579 $left_bond_strength{'f'} = VERY_STRONG; 12580 12581 # make right strength of for ';' a little less than '=' 12582 # to make for contents break after the ';' to avoid this: 12583 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j += 12584 # $number_of_fields ) 12585 # and make it weaker than ',' and 'and' too 12586 $right_bond_strength{'f'} = VERY_WEAK - 0.03; 12587 12588 # The strengths of ?/: should be somewhere between 12589 # an '=' and a quote (NOMINAL), 12590 # make strength of ':' slightly less than '?' to help 12591 # break long chains of ? : after the colons 12592 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL; 12593 $right_bond_strength{':'} = NO_BREAK; 12594 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01; 12595 $right_bond_strength{'?'} = NO_BREAK; 12596 12597 $left_bond_strength{','} = VERY_STRONG; 12598 $right_bond_strength{','} = VERY_WEAK; 12599 12600 # Set bond strengths of certain keywords 12601 # make 'or', 'err', 'and' slightly weaker than a ',' 12602 $left_bond_strength{'and'} = VERY_WEAK - 0.01; 12603 $left_bond_strength{'or'} = VERY_WEAK - 0.02; 12604 $left_bond_strength{'err'} = VERY_WEAK - 0.02; 12605 $left_bond_strength{'xor'} = NOMINAL; 12606 $right_bond_strength{'and'} = NOMINAL; 12607 $right_bond_strength{'or'} = NOMINAL; 12608 $right_bond_strength{'err'} = NOMINAL; 12609 $right_bond_strength{'xor'} = STRONG; 12610 } 12611 12612 # patch-its always ok to break at end of line 12613 $nobreak_to_go[$max_index_to_go] = 0; 12614 12615 # adding a small 'bias' to strengths is a simple way to make a line 12616 # break at the first of a sequence of identical terms. For example, 12617 # to force long string of conditional operators to break with 12618 # each line ending in a ':', we can add a small number to the bond 12619 # strength of each ':' 12620 my $colon_bias = 0; 12621 my $amp_bias = 0; 12622 my $bar_bias = 0; 12623 my $and_bias = 0; 12624 my $or_bias = 0; 12625 my $dot_bias = 0; 12626 my $f_bias = 0; 12627 my $code_bias = -.01; 12628 my $type = 'b'; 12629 my $token = ' '; 12630 my $last_type; 12631 my $last_nonblank_type = $type; 12632 my $last_nonblank_token = $token; 12633 my $delta_bias = 0.0001; 12634 my $list_str = $left_bond_strength{'?'}; 12635 12636 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, 12637 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, 12638 ); 12639 12640 # preliminary loop to compute bond strengths 12641 for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) { 12642 $last_type = $type; 12643 if ( $type ne 'b' ) { 12644 $last_nonblank_type = $type; 12645 $last_nonblank_token = $token; 12646 } 12647 $type = $types_to_go[$i]; 12648 12649 # strength on both sides of a blank is the same 12650 if ( $type eq 'b' && $last_type ne 'b' ) { 12651 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ]; 12652 next; 12653 } 12654 12655 $token = $tokens_to_go[$i]; 12656 $block_type = $block_type_to_go[$i]; 12657 $i_next = $i + 1; 12658 $next_type = $types_to_go[$i_next]; 12659 $next_token = $tokens_to_go[$i_next]; 12660 $total_nesting_depth = $nesting_depth_to_go[$i_next]; 12661 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); 12662 $next_nonblank_type = $types_to_go[$i_next_nonblank]; 12663 $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 12664 12665 # Some token chemistry... The decision about where to break a 12666 # line depends upon a "bond strength" between tokens. The LOWER 12667 # the bond strength, the MORE likely a break. The strength 12668 # values are based on trial-and-error, and need to be tweaked 12669 # occasionally to get desired results. Things to keep in mind 12670 # are: 12671 # 1. relative strengths are important. small differences 12672 # in strengths can make big formatting differences. 12673 # 2. each indentation level adds one unit of bond strength 12674 # 3. a value of NO_BREAK makes an unbreakable bond 12675 # 4. a value of VERY_WEAK is the strength of a ',' 12676 # 5. values below NOMINAL are considered ok break points 12677 # 6. values above NOMINAL are considered poor break points 12678 # We are computing the strength of the bond between the current 12679 # token and the NEXT token. 12680 my $bond_str = VERY_STRONG; # a default, high strength 12681 12682 #--------------------------------------------------------------- 12683 # section 1: 12684 # use minimum of left and right bond strengths if defined; 12685 # digraphs and trigraphs like to break on their left 12686 #--------------------------------------------------------------- 12687 my $bsr = $right_bond_strength{$type}; 12688 12689 if ( !defined($bsr) ) { 12690 12691 if ( $is_digraph{$type} || $is_trigraph{$type} ) { 12692 $bsr = STRONG; 12693 } 12694 else { 12695 $bsr = VERY_STRONG; 12696 } 12697 } 12698 12699 # define right bond strengths of certain keywords 12700 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) { 12701 $bsr = $right_bond_strength{$token}; 12702 } 12703 elsif ( $token eq 'ne' or $token eq 'eq' ) { 12704 $bsr = NOMINAL; 12705 } 12706 my $bsl = $left_bond_strength{$next_nonblank_type}; 12707 12708 # set terminal bond strength to the nominal value 12709 # this will cause good preceding breaks to be retained 12710 if ( $i_next_nonblank > $max_index_to_go ) { 12711 $bsl = NOMINAL; 12712 } 12713 12714 if ( !defined($bsl) ) { 12715 12716 if ( $is_digraph{$next_nonblank_type} 12717 || $is_trigraph{$next_nonblank_type} ) 12718 { 12719 $bsl = WEAK; 12720 } 12721 else { 12722 $bsl = VERY_STRONG; 12723 } 12724 } 12725 12726 # define right bond strengths of certain keywords 12727 if ( $next_nonblank_type eq 'k' 12728 && defined( $left_bond_strength{$next_nonblank_token} ) ) 12729 { 12730 $bsl = $left_bond_strength{$next_nonblank_token}; 12731 } 12732 elsif ($next_nonblank_token eq 'ne' 12733 or $next_nonblank_token eq 'eq' ) 12734 { 12735 $bsl = NOMINAL; 12736 } 12737 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) { 12738 $bsl = 0.9 * NOMINAL + 0.1 * STRONG; 12739 } 12740 12741 # Note: it might seem that we would want to keep a NO_BREAK if 12742 # either token has this value. This didn't work, because in an 12743 # arrow list, it prevents the comma from separating from the 12744 # following bare word (which is probably quoted by its arrow). 12745 # So necessary NO_BREAK's have to be handled as special cases 12746 # in the final section. 12747 $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; 12748 my $bond_str_1 = $bond_str; 12749 12750 #--------------------------------------------------------------- 12751 # section 2: 12752 # special cases 12753 #--------------------------------------------------------------- 12754 12755 # allow long lines before final { in an if statement, as in: 12756 # if (.......... 12757 # ..........) 12758 # { 12759 # 12760 # Otherwise, the line before the { tends to be too short. 12761 if ( $type eq ')' ) { 12762 if ( $next_nonblank_type eq '{' ) { 12763 $bond_str = VERY_WEAK + 0.03; 12764 } 12765 } 12766 12767 elsif ( $type eq '(' ) { 12768 if ( $next_nonblank_type eq '{' ) { 12769 $bond_str = NOMINAL; 12770 } 12771 } 12772 12773 # break on something like '} (', but keep this stronger than a ',' 12774 # example is in 'howe.pl' 12775 elsif ( $type eq 'R' or $type eq '}' ) { 12776 if ( $next_nonblank_type eq '(' ) { 12777 $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK; 12778 } 12779 } 12780 12781 #----------------------------------------------------------------- 12782 # adjust bond strength bias 12783 #----------------------------------------------------------------- 12784 12785 # TESTING: add any bias set by sub scan_list at old comma 12786 # break points. 12787 elsif ( $type eq ',' ) { 12788 $bond_str += $bond_strength_to_go[$i]; 12789 } 12790 12791 elsif ( $type eq 'f' ) { 12792 $bond_str += $f_bias; 12793 $f_bias += $delta_bias; 12794 } 12795 12796 # in long ?: conditionals, bias toward just one set per line (colon.t) 12797 elsif ( $type eq ':' ) { 12798 if ( !$want_break_before{$type} ) { 12799 $bond_str += $colon_bias; 12800 $colon_bias += $delta_bias; 12801 } 12802 } 12803 12804 if ( $next_nonblank_type eq ':' 12805 && $want_break_before{$next_nonblank_type} ) 12806 { 12807 $bond_str += $colon_bias; 12808 $colon_bias += $delta_bias; 12809 } 12810 12811 # if leading '.' is used, align all but 'short' quotes; 12812 # the idea is to not place something like "\n" on a single line. 12813 elsif ( $next_nonblank_type eq '.' ) { 12814 if ( $want_break_before{'.'} ) { 12815 unless ( 12816 $last_nonblank_type eq '.' 12817 && ( 12818 length($token) <= 12819 $rOpts_short_concatenation_item_length ) 12820 && ( $token !~ /^[\)\]\}]$/ ) 12821 ) 12822 { 12823 $dot_bias += $delta_bias; 12824 } 12825 $bond_str += $dot_bias; 12826 } 12827 } 12828 elsif ($next_nonblank_type eq '&&' 12829 && $want_break_before{$next_nonblank_type} ) 12830 { 12831 $bond_str += $amp_bias; 12832 $amp_bias += $delta_bias; 12833 } 12834 elsif ($next_nonblank_type eq '||' 12835 && $want_break_before{$next_nonblank_type} ) 12836 { 12837 $bond_str += $bar_bias; 12838 $bar_bias += $delta_bias; 12839 } 12840 elsif ( $next_nonblank_type eq 'k' ) { 12841 12842 if ( $next_nonblank_token eq 'and' 12843 && $want_break_before{$next_nonblank_token} ) 12844 { 12845 $bond_str += $and_bias; 12846 $and_bias += $delta_bias; 12847 } 12848 elsif ($next_nonblank_token =~ /^(or|err)$/ 12849 && $want_break_before{$next_nonblank_token} ) 12850 { 12851 $bond_str += $or_bias; 12852 $or_bias += $delta_bias; 12853 } 12854 12855 # FIXME: needs more testing 12856 elsif ( $is_keyword_returning_list{$next_nonblank_token} ) { 12857 $bond_str = $list_str if ( $bond_str > $list_str ); 12858 } 12859 elsif ( $token eq 'err' 12860 && !$want_break_before{$token} ) 12861 { 12862 $bond_str += $or_bias; 12863 $or_bias += $delta_bias; 12864 } 12865 } 12866 12867 if ( $type eq ':' 12868 && !$want_break_before{$type} ) 12869 { 12870 $bond_str += $colon_bias; 12871 $colon_bias += $delta_bias; 12872 } 12873 elsif ( $type eq '&&' 12874 && !$want_break_before{$type} ) 12875 { 12876 $bond_str += $amp_bias; 12877 $amp_bias += $delta_bias; 12878 } 12879 elsif ( $type eq '||' 12880 && !$want_break_before{$type} ) 12881 { 12882 $bond_str += $bar_bias; 12883 $bar_bias += $delta_bias; 12884 } 12885 elsif ( $type eq 'k' ) { 12886 12887 if ( $token eq 'and' 12888 && !$want_break_before{$token} ) 12889 { 12890 $bond_str += $and_bias; 12891 $and_bias += $delta_bias; 12892 } 12893 elsif ( $token eq 'or' 12894 && !$want_break_before{$token} ) 12895 { 12896 $bond_str += $or_bias; 12897 $or_bias += $delta_bias; 12898 } 12899 } 12900 12901 # keep matrix and hash indices together 12902 # but make them a little below STRONG to allow breaking open 12903 # something like {'some-word'}{'some-very-long-word'} at the }{ 12904 # (bracebrk.t) 12905 if ( ( $type eq ']' or $type eq 'R' ) 12906 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' ) 12907 ) 12908 { 12909 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL; 12910 } 12911 12912 if ( $next_nonblank_token =~ /^->/ ) { 12913 12914 # increase strength to the point where a break in the following 12915 # will be after the opening paren rather than at the arrow: 12916 # $a->$b($c); 12917 if ( $type eq 'i' ) { 12918 $bond_str = 1.45 * STRONG; 12919 } 12920 12921 elsif ( $type =~ /^[\)\]\}R]$/ ) { 12922 $bond_str = 0.1 * STRONG + 0.9 * NOMINAL; 12923 } 12924 12925 # otherwise make strength before an '->' a little over a '+' 12926 else { 12927 if ( $bond_str <= NOMINAL ) { 12928 $bond_str = NOMINAL + 0.01; 12929 } 12930 } 12931 } 12932 12933 if ( $token eq ')' && $next_nonblank_token eq '[' ) { 12934 $bond_str = 0.2 * STRONG + 0.8 * NOMINAL; 12935 } 12936 12937 # map1.t -- correct for a quirk in perl 12938 if ( $token eq '(' 12939 && $next_nonblank_type eq 'i' 12940 && $last_nonblank_type eq 'k' 12941 && $is_sort_map_grep{$last_nonblank_token} ) 12942 12943 # /^(sort|map|grep)$/ ) 12944 { 12945 $bond_str = NO_BREAK; 12946 } 12947 12948 # extrude.t: do not break before paren at: 12949 # -l pid_filename( 12950 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { 12951 $bond_str = NO_BREAK; 12952 } 12953 12954 # good to break after end of code blocks 12955 if ( $type eq '}' && $block_type ) { 12956 12957 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; 12958 $code_bias += $delta_bias; 12959 } 12960 12961 if ( $type eq 'k' ) { 12962 12963 # allow certain control keywords to stand out 12964 if ( $next_nonblank_type eq 'k' 12965 && $is_last_next_redo_return{$token} ) 12966 { 12967 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; 12968 } 12969 12970# Don't break after keyword my. This is a quick fix for a 12971# rare problem with perl. An example is this line from file 12972# Container.pm: 12973# foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) ) 12974 12975 if ( $token eq 'my' ) { 12976 $bond_str = NO_BREAK; 12977 } 12978 12979 } 12980 12981 # good to break before 'if', 'unless', etc 12982 if ( $is_if_brace_follower{$next_nonblank_token} ) { 12983 $bond_str = VERY_WEAK; 12984 } 12985 12986 if ( $next_nonblank_type eq 'k' ) { 12987 12988 # keywords like 'unless', 'if', etc, within statements 12989 # make good breaks 12990 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) { 12991 $bond_str = VERY_WEAK / 1.05; 12992 } 12993 } 12994 12995 # try not to break before a comma-arrow 12996 elsif ( $next_nonblank_type eq '=>' ) { 12997 if ( $bond_str < STRONG ) { $bond_str = STRONG } 12998 } 12999 13000 #---------------------------------------------------------------------- 13001 # only set NO_BREAK's from here on 13002 #---------------------------------------------------------------------- 13003 if ( $type eq 'C' or $type eq 'U' ) { 13004 13005 # use strict requires that bare word and => not be separated 13006 if ( $next_nonblank_type eq '=>' ) { 13007 $bond_str = NO_BREAK; 13008 } 13009 13010 # Never break between a bareword and a following paren because 13011 # perl may give an error. For example, if a break is placed 13012 # between 'to_filehandle' and its '(' the following line will 13013 # give a syntax error [Carp.pm]: my( $no) =fileno( 13014 # to_filehandle( $in)) ; 13015 if ( $next_nonblank_token eq '(' ) { 13016 $bond_str = NO_BREAK; 13017 } 13018 } 13019 13020 # use strict requires that bare word within braces not start new line 13021 elsif ( $type eq 'L' ) { 13022 13023 if ( $next_nonblank_type eq 'w' ) { 13024 $bond_str = NO_BREAK; 13025 } 13026 } 13027 13028 # in older version of perl, use strict can cause problems with 13029 # breaks before bare words following opening parens. For example, 13030 # this will fail under older versions if a break is made between 13031 # '(' and 'MAIL': 13032 # use strict; 13033 # open( MAIL, "a long filename or command"); 13034 # close MAIL; 13035 elsif ( $type eq '{' ) { 13036 13037 if ( $token eq '(' && $next_nonblank_type eq 'w' ) { 13038 13039 # but it's fine to break if the word is followed by a '=>' 13040 # or if it is obviously a sub call 13041 my $i_next_next_nonblank = $i_next_nonblank + 1; 13042 my $next_next_type = $types_to_go[$i_next_next_nonblank]; 13043 if ( $next_next_type eq 'b' 13044 && $i_next_nonblank < $max_index_to_go ) 13045 { 13046 $i_next_next_nonblank++; 13047 $next_next_type = $types_to_go[$i_next_next_nonblank]; 13048 } 13049 13050 ##if ( $next_next_type ne '=>' ) { 13051 # these are ok: '->xxx', '=>', '(' 13052 13053 # We'll check for an old breakpoint and keep a leading 13054 # bareword if it was that way in the input file. 13055 # Presumably it was ok that way. For example, the 13056 # following would remain unchanged: 13057 # 13058 # @months = ( 13059 # January, February, March, April, 13060 # May, June, July, August, 13061 # September, October, November, December, 13062 # ); 13063 # 13064 # This should be sufficient: 13065 if ( !$old_breakpoint_to_go[$i] 13066 && ( $next_next_type eq ',' || $next_next_type eq '}' ) 13067 ) 13068 { 13069 $bond_str = NO_BREAK; 13070 } 13071 } 13072 } 13073 13074 elsif ( $type eq 'w' ) { 13075 13076 if ( $next_nonblank_type eq 'R' ) { 13077 $bond_str = NO_BREAK; 13078 } 13079 13080 # use strict requires that bare word and => not be separated 13081 if ( $next_nonblank_type eq '=>' ) { 13082 $bond_str = NO_BREAK; 13083 } 13084 } 13085 13086 # in fact, use strict hates bare words on any new line. For 13087 # example, a break before the underscore here provokes the 13088 # wrath of use strict: 13089 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { 13090 elsif ( $type eq 'F' ) { 13091 $bond_str = NO_BREAK; 13092 } 13093 13094 # use strict does not allow separating type info from trailing { } 13095 # testfile is readmail.pl 13096 elsif ( $type eq 't' or $type eq 'i' ) { 13097 13098 if ( $next_nonblank_type eq 'L' ) { 13099 $bond_str = NO_BREAK; 13100 } 13101 } 13102 13103 # Do not break between a possible filehandle and a ? or / and do 13104 # not introduce a break after it if there is no blank 13105 # (extrude.t) 13106 elsif ( $type eq 'Z' ) { 13107 13108 # dont break.. 13109 if ( 13110 13111 # if there is no blank and we do not want one. Examples: 13112 # print $x++ # do not break after $x 13113 # print HTML"HELLO" # break ok after HTML 13114 ( 13115 $next_type ne 'b' 13116 && defined( $want_left_space{$next_type} ) 13117 && $want_left_space{$next_type} == WS_NO 13118 ) 13119 13120 # or we might be followed by the start of a quote 13121 || $next_nonblank_type =~ /^[\/\?]$/ 13122 ) 13123 { 13124 $bond_str = NO_BREAK; 13125 } 13126 } 13127 13128 # Do not break before a possible file handle 13129 if ( $next_nonblank_type eq 'Z' ) { 13130 $bond_str = NO_BREAK; 13131 } 13132 13133 # As a defensive measure, do not break between a '(' and a 13134 # filehandle. In some cases, this can cause an error. For 13135 # example, the following program works: 13136 # my $msg="hi!\n"; 13137 # print 13138 # ( STDOUT 13139 # $msg 13140 # ); 13141 # 13142 # But this program fails: 13143 # my $msg="hi!\n"; 13144 # print 13145 # ( 13146 # STDOUT 13147 # $msg 13148 # ); 13149 # 13150 # This is normally only a problem with the 'extrude' option 13151 if ( $next_nonblank_type eq 'Y' && $token eq '(' ) { 13152 $bond_str = NO_BREAK; 13153 } 13154 13155 # Breaking before a ++ can cause perl to guess wrong. For 13156 # example the following line will cause a syntax error 13157 # with -extrude if we break between '$i' and '++' [fixstyle2] 13158 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); 13159 elsif ( $next_nonblank_type eq '++' ) { 13160 $bond_str = NO_BREAK; 13161 } 13162 13163 # Breaking before a ? before a quote can cause trouble if 13164 # they are not separated by a blank. 13165 # Example: a syntax error occurs if you break before the ? here 13166 # my$logic=join$all?' && ':' || ',@regexps; 13167 # From: Professional_Perl_Programming_Code/multifind.pl 13168 elsif ( $next_nonblank_type eq '?' ) { 13169 $bond_str = NO_BREAK 13170 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' ); 13171 } 13172 13173 # Breaking before a . followed by a number 13174 # can cause trouble if there is no intervening space 13175 # Example: a syntax error occurs if you break before the .2 here 13176 # $str .= pack($endian.2, ensurrogate($ord)); 13177 # From: perl58/Unicode.pm 13178 elsif ( $next_nonblank_type eq '.' ) { 13179 $bond_str = NO_BREAK 13180 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' ); 13181 } 13182 13183 # patch to put cuddled elses back together when on multiple 13184 # lines, as in: } \n else \n { \n 13185 if ($rOpts_cuddled_else) { 13186 13187 if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' ) 13188 || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) ) 13189 { 13190 $bond_str = NO_BREAK; 13191 } 13192 } 13193 13194 # keep '}' together with ';' 13195 if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) { 13196 $bond_str = NO_BREAK; 13197 } 13198 13199 # never break between sub name and opening paren 13200 if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) { 13201 $bond_str = NO_BREAK; 13202 } 13203 13204 #--------------------------------------------------------------- 13205 # section 3: 13206 # now take nesting depth into account 13207 #--------------------------------------------------------------- 13208 # final strength incorporates the bond strength and nesting depth 13209 my $strength; 13210 13211 if ( defined($bond_str) && !$nobreak_to_go[$i] ) { 13212 if ( $total_nesting_depth > 0 ) { 13213 $strength = $bond_str + $total_nesting_depth; 13214 } 13215 else { 13216 $strength = $bond_str; 13217 } 13218 } 13219 else { 13220 $strength = NO_BREAK; 13221 } 13222 13223 # always break after side comment 13224 if ( $type eq '#' ) { $strength = 0 } 13225 13226 $bond_strength_to_go[$i] = $strength; 13227 13228 FORMATTER_DEBUG_FLAG_BOND && do { 13229 my $str = substr( $token, 0, 15 ); 13230 $str .= ' ' x ( 16 - length($str) ); 13231 print 13232"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n"; 13233 }; 13234 } 13235 } 13236 13237} 13238 13239sub pad_array_to_go { 13240 13241 # to simplify coding in scan_list and set_bond_strengths, it helps 13242 # to create some extra blank tokens at the end of the arrays 13243 $tokens_to_go[ $max_index_to_go + 1 ] = ''; 13244 $tokens_to_go[ $max_index_to_go + 2 ] = ''; 13245 $types_to_go[ $max_index_to_go + 1 ] = 'b'; 13246 $types_to_go[ $max_index_to_go + 2 ] = 'b'; 13247 $nesting_depth_to_go[ $max_index_to_go + 1 ] = 13248 $nesting_depth_to_go[$max_index_to_go]; 13249 13250 # /^[R\}\)\]]$/ 13251 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) { 13252 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { 13253 13254 # shouldn't happen: 13255 unless ( get_saw_brace_error() ) { 13256 warning( 13257"Program bug in scan_list: hit nesting error which should have been caught\n" 13258 ); 13259 report_definite_bug(); 13260 } 13261 } 13262 else { 13263 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; 13264 } 13265 } 13266 13267 # /^[L\{\(\[]$/ 13268 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) { 13269 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; 13270 } 13271} 13272 13273{ # begin scan_list 13274 13275 my ( 13276 $block_type, $current_depth, 13277 $depth, $i, 13278 $i_last_nonblank_token, $last_colon_sequence_number, 13279 $last_nonblank_token, $last_nonblank_type, 13280 $last_old_breakpoint_count, $minimum_depth, 13281 $next_nonblank_block_type, $next_nonblank_token, 13282 $next_nonblank_type, $old_breakpoint_count, 13283 $starting_breakpoint_count, $starting_depth, 13284 $token, $type, 13285 $type_sequence, 13286 ); 13287 13288 my ( 13289 @breakpoint_stack, @breakpoint_undo_stack, 13290 @comma_index, @container_type, 13291 @identifier_count_stack, @index_before_arrow, 13292 @interrupted_list, @item_count_stack, 13293 @last_comma_index, @last_dot_index, 13294 @last_nonblank_type, @old_breakpoint_count_stack, 13295 @opening_structure_index_stack, @rfor_semicolon_list, 13296 @has_old_logical_breakpoints, @rand_or_list, 13297 @i_equals, 13298 ); 13299 13300 # routine to define essential variables when we go 'up' to 13301 # a new depth 13302 sub check_for_new_minimum_depth { 13303 my $depth = shift; 13304 if ( $depth < $minimum_depth ) { 13305 13306 $minimum_depth = $depth; 13307 13308 # these arrays need not retain values between calls 13309 $breakpoint_stack[$depth] = $starting_breakpoint_count; 13310 $container_type[$depth] = ""; 13311 $identifier_count_stack[$depth] = 0; 13312 $index_before_arrow[$depth] = -1; 13313 $interrupted_list[$depth] = 1; 13314 $item_count_stack[$depth] = 0; 13315 $last_nonblank_type[$depth] = ""; 13316 $opening_structure_index_stack[$depth] = -1; 13317 13318 $breakpoint_undo_stack[$depth] = undef; 13319 $comma_index[$depth] = undef; 13320 $last_comma_index[$depth] = undef; 13321 $last_dot_index[$depth] = undef; 13322 $old_breakpoint_count_stack[$depth] = undef; 13323 $has_old_logical_breakpoints[$depth] = 0; 13324 $rand_or_list[$depth] = []; 13325 $rfor_semicolon_list[$depth] = []; 13326 $i_equals[$depth] = -1; 13327 13328 # these arrays must retain values between calls 13329 if ( !defined( $has_broken_sublist[$depth] ) ) { 13330 $dont_align[$depth] = 0; 13331 $has_broken_sublist[$depth] = 0; 13332 $want_comma_break[$depth] = 0; 13333 } 13334 } 13335 } 13336 13337 # routine to decide which commas to break at within a container; 13338 # returns: 13339 # $bp_count = number of comma breakpoints set 13340 # $do_not_break_apart = a flag indicating if container need not 13341 # be broken open 13342 sub set_comma_breakpoints { 13343 13344 my $dd = shift; 13345 my $bp_count = 0; 13346 my $do_not_break_apart = 0; 13347 13348 # anything to do? 13349 if ( $item_count_stack[$dd] ) { 13350 13351 # handle commas not in containers... 13352 if ( $dont_align[$dd] ) { 13353 do_uncontained_comma_breaks($dd); 13354 } 13355 13356 # handle commas within containers... 13357 else { 13358 my $fbc = $forced_breakpoint_count; 13359 13360 # always open comma lists not preceded by keywords, 13361 # barewords, identifiers (that is, anything that doesn't 13362 # look like a function call) 13363 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; 13364 13365 set_comma_breakpoints_do( 13366 $dd, 13367 $opening_structure_index_stack[$dd], 13368 $i, 13369 $item_count_stack[$dd], 13370 $identifier_count_stack[$dd], 13371 $comma_index[$dd], 13372 $next_nonblank_type, 13373 $container_type[$dd], 13374 $interrupted_list[$dd], 13375 \$do_not_break_apart, 13376 $must_break_open, 13377 ); 13378 $bp_count = $forced_breakpoint_count - $fbc; 13379 $do_not_break_apart = 0 if $must_break_open; 13380 } 13381 } 13382 return ( $bp_count, $do_not_break_apart ); 13383 } 13384 13385 sub do_uncontained_comma_breaks { 13386 13387 # Handle commas not in containers... 13388 # This is a catch-all routine for commas that we 13389 # don't know what to do with because the don't fall 13390 # within containers. We will bias the bond strength 13391 # to break at commas which ended lines in the input 13392 # file. This usually works better than just trying 13393 # to put as many items on a line as possible. A 13394 # downside is that if the input file is garbage it 13395 # won't work very well. However, the user can always 13396 # prevent following the old breakpoints with the 13397 # -iob flag. 13398 my $dd = shift; 13399 my $bias = -.01; 13400 foreach my $ii ( @{ $comma_index[$dd] } ) { 13401 if ( $old_breakpoint_to_go[$ii] ) { 13402 $bond_strength_to_go[$ii] = $bias; 13403 13404 # reduce bias magnitude to force breaks in order 13405 $bias *= 0.99; 13406 } 13407 } 13408 13409 # Also put a break before the first comma if 13410 # (1) there was a break there in the input, and 13411 # (2) that was exactly one previous break in the input 13412 # 13413 # For example, we will follow the user and break after 13414 # 'print' in this snippet: 13415 # print 13416 # "conformability (Not the same dimension)\n", 13417 # "\t", $have, " is ", text_unit($hu), "\n", 13418 # "\t", $want, " is ", text_unit($wu), "\n", 13419 # ; 13420 my $i_first_comma = $comma_index[$dd]->[0]; 13421 if ( $old_breakpoint_to_go[$i_first_comma] ) { 13422 my $level_comma = $levels_to_go[$i_first_comma]; 13423 my $ibreak = -1; 13424 my $obp_count = 0; 13425 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) { 13426 if ( $old_breakpoint_to_go[$ii] ) { 13427 $obp_count++; 13428 last if ( $obp_count > 1 ); 13429 $ibreak = $ii 13430 if ( $levels_to_go[$ii] == $level_comma ); 13431 } 13432 } 13433 if ( $ibreak >= 0 && $obp_count == 1 ) { 13434 set_forced_breakpoint($ibreak); 13435 } 13436 } 13437 } 13438 13439 my %is_logical_container; 13440 13441 BEGIN { 13442 @_ = qw# if elsif unless while and or err not && | || ? : ! #; 13443 @is_logical_container{@_} = (1) x scalar(@_); 13444 } 13445 13446 sub set_for_semicolon_breakpoints { 13447 my $dd = shift; 13448 foreach ( @{ $rfor_semicolon_list[$dd] } ) { 13449 set_forced_breakpoint($_); 13450 } 13451 } 13452 13453 sub set_logical_breakpoints { 13454 my $dd = shift; 13455 if ( 13456 $item_count_stack[$dd] == 0 13457 && $is_logical_container{ $container_type[$dd] } 13458 13459 # TESTING: 13460 || $has_old_logical_breakpoints[$dd] 13461 ) 13462 { 13463 13464 # Look for breaks in this order: 13465 # 0 1 2 3 13466 # or and || && 13467 foreach my $i ( 0 .. 3 ) { 13468 if ( $rand_or_list[$dd][$i] ) { 13469 foreach ( @{ $rand_or_list[$dd][$i] } ) { 13470 set_forced_breakpoint($_); 13471 } 13472 13473 # break at any 'if' and 'unless' too 13474 foreach ( @{ $rand_or_list[$dd][4] } ) { 13475 set_forced_breakpoint($_); 13476 } 13477 $rand_or_list[$dd] = []; 13478 last; 13479 } 13480 } 13481 } 13482 } 13483 13484 sub is_unbreakable_container { 13485 13486 # never break a container of one of these types 13487 # because bad things can happen (map1.t) 13488 my $dd = shift; 13489 $is_sort_map_grep{ $container_type[$dd] }; 13490 } 13491 13492 sub scan_list { 13493 13494 # This routine is responsible for setting line breaks for all lists, 13495 # so that hierarchical structure can be displayed and so that list 13496 # items can be vertically aligned. The output of this routine is 13497 # stored in the array @forced_breakpoint_to_go, which is used to set 13498 # final breakpoints. 13499 13500 $starting_depth = $nesting_depth_to_go[0]; 13501 13502 $block_type = ' '; 13503 $current_depth = $starting_depth; 13504 $i = -1; 13505 $last_colon_sequence_number = -1; 13506 $last_nonblank_token = ';'; 13507 $last_nonblank_type = ';'; 13508 $last_nonblank_block_type = ' '; 13509 $last_old_breakpoint_count = 0; 13510 $minimum_depth = $current_depth + 1; # forces update in check below 13511 $old_breakpoint_count = 0; 13512 $starting_breakpoint_count = $forced_breakpoint_count; 13513 $token = ';'; 13514 $type = ';'; 13515 $type_sequence = ''; 13516 13517 check_for_new_minimum_depth($current_depth); 13518 13519 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0; 13520 my $want_previous_breakpoint = -1; 13521 13522 my $saw_good_breakpoint; 13523 my $i_line_end = -1; 13524 my $i_line_start = -1; 13525 13526 # loop over all tokens in this batch 13527 while ( ++$i <= $max_index_to_go ) { 13528 if ( $type ne 'b' ) { 13529 $i_last_nonblank_token = $i - 1; 13530 $last_nonblank_type = $type; 13531 $last_nonblank_token = $token; 13532 $last_nonblank_block_type = $block_type; 13533 } 13534 $type = $types_to_go[$i]; 13535 $block_type = $block_type_to_go[$i]; 13536 $token = $tokens_to_go[$i]; 13537 $type_sequence = $type_sequence_to_go[$i]; 13538 my $next_type = $types_to_go[ $i + 1 ]; 13539 my $next_token = $tokens_to_go[ $i + 1 ]; 13540 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); 13541 $next_nonblank_type = $types_to_go[$i_next_nonblank]; 13542 $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 13543 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; 13544 13545 # set break if flag was set 13546 if ( $want_previous_breakpoint >= 0 ) { 13547 set_forced_breakpoint($want_previous_breakpoint); 13548 $want_previous_breakpoint = -1; 13549 } 13550 13551 $last_old_breakpoint_count = $old_breakpoint_count; 13552 if ( $old_breakpoint_to_go[$i] ) { 13553 $i_line_end = $i; 13554 $i_line_start = $i_next_nonblank; 13555 13556 $old_breakpoint_count++; 13557 13558 # Break before certain keywords if user broke there and 13559 # this is a 'safe' break point. The idea is to retain 13560 # any preferred breaks for sequential list operations, 13561 # like a schwartzian transform. 13562 if ($rOpts_break_at_old_keyword_breakpoints) { 13563 if ( 13564 $next_nonblank_type eq 'k' 13565 && $is_keyword_returning_list{$next_nonblank_token} 13566 && ( $type =~ /^[=\)\]\}Riw]$/ 13567 || $type eq 'k' 13568 && $is_keyword_returning_list{$token} ) 13569 ) 13570 { 13571 13572 # we actually have to set this break next time through 13573 # the loop because if we are at a closing token (such 13574 # as '}') which forms a one-line block, this break might 13575 # get undone. 13576 $want_previous_breakpoint = $i; 13577 } 13578 } 13579 } 13580 next if ( $type eq 'b' ); 13581 $depth = $nesting_depth_to_go[ $i + 1 ]; 13582 13583 # safety check - be sure we always break after a comment 13584 # Shouldn't happen .. an error here probably means that the 13585 # nobreak flag did not get turned off correctly during 13586 # formatting. 13587 if ( $type eq '#' ) { 13588 if ( $i != $max_index_to_go ) { 13589 warning( 13590"Non-fatal program bug: backup logic needed to break after a comment\n" 13591 ); 13592 report_definite_bug(); 13593 $nobreak_to_go[$i] = 0; 13594 set_forced_breakpoint($i); 13595 } 13596 } 13597 13598 # Force breakpoints at certain tokens in long lines. 13599 # Note that such breakpoints will be undone later if these tokens 13600 # are fully contained within parens on a line. 13601 if ( 13602 13603 # break before a keyword within a line 13604 $type eq 'k' 13605 && $i > 0 13606 13607 # if one of these keywords: 13608 && $token =~ /^(if|unless|while|until|for)$/ 13609 13610 # but do not break at something like '1 while' 13611 && ( $last_nonblank_type ne 'n' || $i > 2 ) 13612 13613 # and let keywords follow a closing 'do' brace 13614 && $last_nonblank_block_type ne 'do' 13615 13616 && ( 13617 $is_long_line 13618 13619 # or container is broken (by side-comment, etc) 13620 || ( $next_nonblank_token eq '(' 13621 && $mate_index_to_go[$i_next_nonblank] < $i ) 13622 ) 13623 ) 13624 { 13625 set_forced_breakpoint( $i - 1 ); 13626 } 13627 13628 # remember locations of '||' and '&&' for possible breaks if we 13629 # decide this is a long logical expression. 13630 if ( $type eq '||' ) { 13631 push @{ $rand_or_list[$depth][2] }, $i; 13632 ++$has_old_logical_breakpoints[$depth] 13633 if ( ( $i == $i_line_start || $i == $i_line_end ) 13634 && $rOpts_break_at_old_logical_breakpoints ); 13635 } 13636 elsif ( $type eq '&&' ) { 13637 push @{ $rand_or_list[$depth][3] }, $i; 13638 ++$has_old_logical_breakpoints[$depth] 13639 if ( ( $i == $i_line_start || $i == $i_line_end ) 13640 && $rOpts_break_at_old_logical_breakpoints ); 13641 } 13642 elsif ( $type eq 'f' ) { 13643 push @{ $rfor_semicolon_list[$depth] }, $i; 13644 } 13645 elsif ( $type eq 'k' ) { 13646 if ( $token eq 'and' ) { 13647 push @{ $rand_or_list[$depth][1] }, $i; 13648 ++$has_old_logical_breakpoints[$depth] 13649 if ( ( $i == $i_line_start || $i == $i_line_end ) 13650 && $rOpts_break_at_old_logical_breakpoints ); 13651 } 13652 13653 # break immediately at 'or's which are probably not in a logical 13654 # block -- but we will break in logical breaks below so that 13655 # they do not add to the forced_breakpoint_count 13656 elsif ( $token eq 'or' ) { 13657 push @{ $rand_or_list[$depth][0] }, $i; 13658 ++$has_old_logical_breakpoints[$depth] 13659 if ( ( $i == $i_line_start || $i == $i_line_end ) 13660 && $rOpts_break_at_old_logical_breakpoints ); 13661 if ( $is_logical_container{ $container_type[$depth] } ) { 13662 } 13663 else { 13664 if ($is_long_line) { set_forced_breakpoint($i) } 13665 elsif ( ( $i == $i_line_start || $i == $i_line_end ) 13666 && $rOpts_break_at_old_logical_breakpoints ) 13667 { 13668 $saw_good_breakpoint = 1; 13669 } 13670 } 13671 } 13672 elsif ( $token eq 'if' || $token eq 'unless' ) { 13673 push @{ $rand_or_list[$depth][4] }, $i; 13674 if ( ( $i == $i_line_start || $i == $i_line_end ) 13675 && $rOpts_break_at_old_logical_breakpoints ) 13676 { 13677 set_forced_breakpoint($i); 13678 } 13679 } 13680 } 13681 elsif ( $is_assignment{$type} ) { 13682 $i_equals[$depth] = $i; 13683 } 13684 13685 if ($type_sequence) { 13686 13687 # handle any postponed closing breakpoints 13688 if ( $token =~ /^[\)\]\}\:]$/ ) { 13689 if ( $type eq ':' ) { 13690 $last_colon_sequence_number = $type_sequence; 13691 13692 # TESTING: retain break at a ':' line break 13693 if ( ( $i == $i_line_start || $i == $i_line_end ) 13694 && $rOpts_break_at_old_ternary_breakpoints ) 13695 { 13696 13697 # TESTING: 13698 set_forced_breakpoint($i); 13699 13700 # break at previous '=' 13701 if ( $i_equals[$depth] > 0 ) { 13702 set_forced_breakpoint( $i_equals[$depth] ); 13703 $i_equals[$depth] = -1; 13704 } 13705 } 13706 } 13707 if ( defined( $postponed_breakpoint{$type_sequence} ) ) { 13708 my $inc = ( $type eq ':' ) ? 0 : 1; 13709 set_forced_breakpoint( $i - $inc ); 13710 delete $postponed_breakpoint{$type_sequence}; 13711 } 13712 } 13713 13714 # set breaks at ?/: if they will get separated (and are 13715 # not a ?/: chain), or if the '?' is at the end of the 13716 # line 13717 elsif ( $token eq '?' ) { 13718 my $i_colon = $mate_index_to_go[$i]; 13719 if ( 13720 $i_colon <= 0 # the ':' is not in this batch 13721 || $i == 0 # this '?' is the first token of the line 13722 || $i == 13723 $max_index_to_go # or this '?' is the last token 13724 ) 13725 { 13726 13727 # don't break at a '?' if preceded by ':' on 13728 # this line of previous ?/: pair on this line. 13729 # This is an attempt to preserve a chain of ?/: 13730 # expressions (elsif2.t). And don't break if 13731 # this has a side comment. 13732 set_forced_breakpoint($i) 13733 unless ( 13734 $type_sequence == ( 13735 $last_colon_sequence_number + 13736 TYPE_SEQUENCE_INCREMENT 13737 ) 13738 || $tokens_to_go[$max_index_to_go] eq '#' 13739 ); 13740 set_closing_breakpoint($i); 13741 } 13742 } 13743 } 13744 13745#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; 13746 13747 #------------------------------------------------------------ 13748 # Handle Increasing Depth.. 13749 # 13750 # prepare for a new list when depth increases 13751 # token $i is a '(','{', or '[' 13752 #------------------------------------------------------------ 13753 if ( $depth > $current_depth ) { 13754 13755 $breakpoint_stack[$depth] = $forced_breakpoint_count; 13756 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; 13757 $has_broken_sublist[$depth] = 0; 13758 $identifier_count_stack[$depth] = 0; 13759 $index_before_arrow[$depth] = -1; 13760 $interrupted_list[$depth] = 0; 13761 $item_count_stack[$depth] = 0; 13762 $last_comma_index[$depth] = undef; 13763 $last_dot_index[$depth] = undef; 13764 $last_nonblank_type[$depth] = $last_nonblank_type; 13765 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; 13766 $opening_structure_index_stack[$depth] = $i; 13767 $rand_or_list[$depth] = []; 13768 $rfor_semicolon_list[$depth] = []; 13769 $i_equals[$depth] = -1; 13770 $want_comma_break[$depth] = 0; 13771 $container_type[$depth] = 13772 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ ) 13773 ? $last_nonblank_token 13774 : ""; 13775 $has_old_logical_breakpoints[$depth] = 0; 13776 13777 # if line ends here then signal closing token to break 13778 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) 13779 { 13780 set_closing_breakpoint($i); 13781 } 13782 13783 # Not all lists of values should be vertically aligned.. 13784 $dont_align[$depth] = 13785 13786 # code BLOCKS are handled at a higher level 13787 ( $block_type ne "" ) 13788 13789 # certain paren lists 13790 || ( $type eq '(' ) && ( 13791 13792 # it does not usually look good to align a list of 13793 # identifiers in a parameter list, as in: 13794 # my($var1, $var2, ...) 13795 # (This test should probably be refined, for now I'm just 13796 # testing for any keyword) 13797 ( $last_nonblank_type eq 'k' ) 13798 13799 # a trailing '(' usually indicates a non-list 13800 || ( $next_nonblank_type eq '(' ) 13801 ); 13802 13803 # patch to outdent opening brace of long if/for/.. 13804 # statements (like this one). See similar coding in 13805 # set_continuation breaks. We have also catch it here for 13806 # short line fragments which otherwise will not go through 13807 # set_continuation_breaks. 13808 if ( 13809 $block_type 13810 13811 # if we have the ')' but not its '(' in this batch.. 13812 && ( $last_nonblank_token eq ')' ) 13813 && $mate_index_to_go[$i_last_nonblank_token] < 0 13814 13815 # and user wants brace to left 13816 && !$rOpts->{'opening-brace-always-on-right'} 13817 13818 && ( $type eq '{' ) # should be true 13819 && ( $token eq '{' ) # should be true 13820 ) 13821 { 13822 set_forced_breakpoint( $i - 1 ); 13823 } 13824 } 13825 13826 #------------------------------------------------------------ 13827 # Handle Decreasing Depth.. 13828 # 13829 # finish off any old list when depth decreases 13830 # token $i is a ')','}', or ']' 13831 #------------------------------------------------------------ 13832 elsif ( $depth < $current_depth ) { 13833 13834 check_for_new_minimum_depth($depth); 13835 13836 # force all outer logical containers to break after we see on 13837 # old breakpoint 13838 $has_old_logical_breakpoints[$depth] ||= 13839 $has_old_logical_breakpoints[$current_depth]; 13840 13841 # Patch to break between ') {' if the paren list is broken. 13842 # There is similar logic in set_continuation_breaks for 13843 # non-broken lists. 13844 if ( $token eq ')' 13845 && $next_nonblank_block_type 13846 && $interrupted_list[$current_depth] 13847 && $next_nonblank_type eq '{' 13848 && !$rOpts->{'opening-brace-always-on-right'} ) 13849 { 13850 set_forced_breakpoint($i); 13851 } 13852 13853#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; 13854 13855 # set breaks at commas if necessary 13856 my ( $bp_count, $do_not_break_apart ) = 13857 set_comma_breakpoints($current_depth); 13858 13859 my $i_opening = $opening_structure_index_stack[$current_depth]; 13860 my $saw_opening_structure = ( $i_opening >= 0 ); 13861 13862 # this term is long if we had to break at interior commas.. 13863 my $is_long_term = $bp_count > 0; 13864 13865 # ..or if the length between opening and closing parens exceeds 13866 # allowed line length 13867 if ( !$is_long_term && $saw_opening_structure ) { 13868 my $i_opening_minus = find_token_starting_list($i_opening); 13869 13870 # Note: we have to allow for one extra space after a 13871 # closing token so that we do not strand a comma or 13872 # semicolon, hence the '>=' here (oneline.t) 13873 $is_long_term = 13874 excess_line_length( $i_opening_minus, $i ) >= 0; 13875 } 13876 13877 # We've set breaks after all comma-arrows. Now we have to 13878 # undo them if this can be a one-line block 13879 # (the only breakpoints set will be due to comma-arrows) 13880 if ( 13881 13882 # user doesn't require breaking after all comma-arrows 13883 ( $rOpts_comma_arrow_breakpoints != 0 ) 13884 13885 # and if the opening structure is in this batch 13886 && $saw_opening_structure 13887 13888 # and either on the same old line 13889 && ( 13890 $old_breakpoint_count_stack[$current_depth] == 13891 $last_old_breakpoint_count 13892 13893 # or user wants to form long blocks with arrows 13894 || $rOpts_comma_arrow_breakpoints == 2 13895 ) 13896 13897 # and we made some breakpoints between the opening and closing 13898 && ( $breakpoint_undo_stack[$current_depth] < 13899 $forced_breakpoint_undo_count ) 13900 13901 # and this block is short enough to fit on one line 13902 # Note: use < because need 1 more space for possible comma 13903 && !$is_long_term 13904 13905 ) 13906 { 13907 undo_forced_breakpoint_stack( 13908 $breakpoint_undo_stack[$current_depth] ); 13909 } 13910 13911 # now see if we have any comma breakpoints left 13912 my $has_comma_breakpoints = 13913 ( $breakpoint_stack[$current_depth] != 13914 $forced_breakpoint_count ); 13915 13916 # update broken-sublist flag of the outer container 13917 $has_broken_sublist[$depth] = 13918 $has_broken_sublist[$depth] 13919 || $has_broken_sublist[$current_depth] 13920 || $is_long_term 13921 || $has_comma_breakpoints; 13922 13923# Having come to the closing ')', '}', or ']', now we have to decide if we 13924# should 'open up' the structure by placing breaks at the opening and 13925# closing containers. This is a tricky decision. Here are some of the 13926# basic considerations: 13927# 13928# -If this is a BLOCK container, then any breakpoints will have already 13929# been set (and according to user preferences), so we need do nothing here. 13930# 13931# -If we have a comma-separated list for which we can align the list items, 13932# then we need to do so because otherwise the vertical aligner cannot 13933# currently do the alignment. 13934# 13935# -If this container does itself contain a container which has been broken 13936# open, then it should be broken open to properly show the structure. 13937# 13938# -If there is nothing to align, and no other reason to break apart, 13939# then do not do it. 13940# 13941# We will not break open the parens of a long but 'simple' logical expression. 13942# For example: 13943# 13944# This is an example of a simple logical expression and its formatting: 13945# 13946# if ( $bigwasteofspace1 && $bigwasteofspace2 13947# || $bigwasteofspace3 && $bigwasteofspace4 ) 13948# 13949# Most people would prefer this than the 'spacey' version: 13950# 13951# if ( 13952# $bigwasteofspace1 && $bigwasteofspace2 13953# || $bigwasteofspace3 && $bigwasteofspace4 13954# ) 13955# 13956# To illustrate the rules for breaking logical expressions, consider: 13957# 13958# FULLY DENSE: 13959# if ( $opt_excl 13960# and ( exists $ids_excl_uc{$id_uc} 13961# or grep $id_uc =~ /$_/, @ids_excl_uc )) 13962# 13963# This is on the verge of being difficult to read. The current default is to 13964# open it up like this: 13965# 13966# DEFAULT: 13967# if ( 13968# $opt_excl 13969# and ( exists $ids_excl_uc{$id_uc} 13970# or grep $id_uc =~ /$_/, @ids_excl_uc ) 13971# ) 13972# 13973# This is a compromise which tries to avoid being too dense and to spacey. 13974# A more spaced version would be: 13975# 13976# SPACEY: 13977# if ( 13978# $opt_excl 13979# and ( 13980# exists $ids_excl_uc{$id_uc} 13981# or grep $id_uc =~ /$_/, @ids_excl_uc 13982# ) 13983# ) 13984# 13985# Some people might prefer the spacey version -- an option could be added. The 13986# innermost expression contains a long block '( exists $ids_... ')'. 13987# 13988# Here is how the logic goes: We will force a break at the 'or' that the 13989# innermost expression contains, but we will not break apart its opening and 13990# closing containers because (1) it contains no multi-line sub-containers itself, 13991# and (2) there is no alignment to be gained by breaking it open like this 13992# 13993# and ( 13994# exists $ids_excl_uc{$id_uc} 13995# or grep $id_uc =~ /$_/, @ids_excl_uc 13996# ) 13997# 13998# (although this looks perfectly ok and might be good for long expressions). The 13999# outer 'if' container, though, contains a broken sub-container, so it will be 14000# broken open to avoid too much density. Also, since it contains no 'or's, there 14001# will be a forced break at its 'and'. 14002 14003 # set some flags telling something about this container.. 14004 my $is_simple_logical_expression = 0; 14005 if ( $item_count_stack[$current_depth] == 0 14006 && $saw_opening_structure 14007 && $tokens_to_go[$i_opening] eq '(' 14008 && $is_logical_container{ $container_type[$current_depth] } 14009 ) 14010 { 14011 14012 # This seems to be a simple logical expression with 14013 # no existing breakpoints. Set a flag to prevent 14014 # opening it up. 14015 if ( !$has_comma_breakpoints ) { 14016 $is_simple_logical_expression = 1; 14017 } 14018 14019 # This seems to be a simple logical expression with 14020 # breakpoints (broken sublists, for example). Break 14021 # at all 'or's and '||'s. 14022 else { 14023 set_logical_breakpoints($current_depth); 14024 } 14025 } 14026 14027 if ( $is_long_term 14028 && @{ $rfor_semicolon_list[$current_depth] } ) 14029 { 14030 set_for_semicolon_breakpoints($current_depth); 14031 14032 # open up a long 'for' or 'foreach' container to allow 14033 # leading term alignment unless -lp is used. 14034 $has_comma_breakpoints = 1 14035 unless $rOpts_line_up_parentheses; 14036 } 14037 14038 if ( 14039 14040 # breaks for code BLOCKS are handled at a higher level 14041 !$block_type 14042 14043 # we do not need to break at the top level of an 'if' 14044 # type expression 14045 && !$is_simple_logical_expression 14046 14047 ## modification to keep ': (' containers vertically tight; 14048 ## but probably better to let user set -vt=1 to avoid 14049 ## inconsistency with other paren types 14050 ## && ($container_type[$current_depth] ne ':') 14051 14052 # otherwise, we require one of these reasons for breaking: 14053 && ( 14054 14055 # - this term has forced line breaks 14056 $has_comma_breakpoints 14057 14058 # - the opening container is separated from this batch 14059 # for some reason (comment, blank line, code block) 14060 # - this is a non-paren container spanning multiple lines 14061 || !$saw_opening_structure 14062 14063 # - this is a long block contained in another breakable 14064 # container 14065 || ( $is_long_term 14066 && $container_environment_to_go[$i_opening] ne 14067 'BLOCK' ) 14068 ) 14069 ) 14070 { 14071 14072 # For -lp option, we must put a breakpoint before 14073 # the token which has been identified as starting 14074 # this indentation level. This is necessary for 14075 # proper alignment. 14076 if ( $rOpts_line_up_parentheses && $saw_opening_structure ) 14077 { 14078 my $item = $leading_spaces_to_go[ $i_opening + 1 ]; 14079 if ( $i_opening + 1 < $max_index_to_go 14080 && $types_to_go[ $i_opening + 1 ] eq 'b' ) 14081 { 14082 $item = $leading_spaces_to_go[ $i_opening + 2 ]; 14083 } 14084 if ( defined($item) ) { 14085 my $i_start_2 = $item->get_STARTING_INDEX(); 14086 if ( 14087 defined($i_start_2) 14088 14089 # we are breaking after an opening brace, paren, 14090 # so don't break before it too 14091 && $i_start_2 ne $i_opening 14092 ) 14093 { 14094 14095 # Only break for breakpoints at the same 14096 # indentation level as the opening paren 14097 my $test1 = $nesting_depth_to_go[$i_opening]; 14098 my $test2 = $nesting_depth_to_go[$i_start_2]; 14099 if ( $test2 == $test1 ) { 14100 set_forced_breakpoint( $i_start_2 - 1 ); 14101 } 14102 } 14103 } 14104 } 14105 14106 # break after opening structure. 14107 # note: break before closing structure will be automatic 14108 if ( $minimum_depth <= $current_depth ) { 14109 14110 set_forced_breakpoint($i_opening) 14111 unless ( $do_not_break_apart 14112 || is_unbreakable_container($current_depth) ); 14113 14114 # break at '.' of lower depth level before opening token 14115 if ( $last_dot_index[$depth] ) { 14116 set_forced_breakpoint( $last_dot_index[$depth] ); 14117 } 14118 14119 # break before opening structure if preeced by another 14120 # closing structure and a comma. This is normally 14121 # done by the previous closing brace, but not 14122 # if it was a one-line block. 14123 if ( $i_opening > 2 ) { 14124 my $i_prev = 14125 ( $types_to_go[ $i_opening - 1 ] eq 'b' ) 14126 ? $i_opening - 2 14127 : $i_opening - 1; 14128 14129 if ( $types_to_go[$i_prev] eq ',' 14130 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ ) 14131 { 14132 set_forced_breakpoint($i_prev); 14133 } 14134 14135 # also break before something like ':(' or '?(' 14136 # if appropriate. 14137 elsif ( 14138 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ ) 14139 { 14140 my $token_prev = $tokens_to_go[$i_prev]; 14141 if ( $want_break_before{$token_prev} ) { 14142 set_forced_breakpoint($i_prev); 14143 } 14144 } 14145 } 14146 } 14147 14148 # break after comma following closing structure 14149 if ( $next_type eq ',' ) { 14150 set_forced_breakpoint( $i + 1 ); 14151 } 14152 14153 # break before an '=' following closing structure 14154 if ( 14155 $is_assignment{$next_nonblank_type} 14156 && ( $breakpoint_stack[$current_depth] != 14157 $forced_breakpoint_count ) 14158 ) 14159 { 14160 set_forced_breakpoint($i); 14161 } 14162 14163 # break at any comma before the opening structure Added 14164 # for -lp, but seems to be good in general. It isn't 14165 # obvious how far back to look; the '5' below seems to 14166 # work well and will catch the comma in something like 14167 # push @list, myfunc( $param, $param, .. 14168 14169 my $icomma = $last_comma_index[$depth]; 14170 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { 14171 unless ( $forced_breakpoint_to_go[$icomma] ) { 14172 set_forced_breakpoint($icomma); 14173 } 14174 } 14175 } # end logic to open up a container 14176 14177 # Break open a logical container open if it was already open 14178 elsif ($is_simple_logical_expression 14179 && $has_old_logical_breakpoints[$current_depth] ) 14180 { 14181 set_logical_breakpoints($current_depth); 14182 } 14183 14184 # Handle long container which does not get opened up 14185 elsif ($is_long_term) { 14186 14187 # must set fake breakpoint to alert outer containers that 14188 # they are complex 14189 set_fake_breakpoint(); 14190 } 14191 } 14192 14193 #------------------------------------------------------------ 14194 # Handle this token 14195 #------------------------------------------------------------ 14196 14197 $current_depth = $depth; 14198 14199 # handle comma-arrow 14200 if ( $type eq '=>' ) { 14201 next if ( $last_nonblank_type eq '=>' ); 14202 next if $rOpts_break_at_old_comma_breakpoints; 14203 next if $rOpts_comma_arrow_breakpoints == 3; 14204 $want_comma_break[$depth] = 1; 14205 $index_before_arrow[$depth] = $i_last_nonblank_token; 14206 next; 14207 } 14208 14209 elsif ( $type eq '.' ) { 14210 $last_dot_index[$depth] = $i; 14211 } 14212 14213 # Turn off alignment if we are sure that this is not a list 14214 # environment. To be safe, we will do this if we see certain 14215 # non-list tokens, such as ';', and also the environment is 14216 # not a list. Note that '=' could be in any of the = operators 14217 # (lextest.t). We can't just use the reported environment 14218 # because it can be incorrect in some cases. 14219 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} ) 14220 && $container_environment_to_go[$i] ne 'LIST' ) 14221 { 14222 $dont_align[$depth] = 1; 14223 $want_comma_break[$depth] = 0; 14224 $index_before_arrow[$depth] = -1; 14225 } 14226 14227 # now just handle any commas 14228 next unless ( $type eq ',' ); 14229 14230 $last_dot_index[$depth] = undef; 14231 $last_comma_index[$depth] = $i; 14232 14233 # break here if this comma follows a '=>' 14234 # but not if there is a side comment after the comma 14235 if ( $want_comma_break[$depth] ) { 14236 14237 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { 14238 $want_comma_break[$depth] = 0; 14239 $index_before_arrow[$depth] = -1; 14240 next; 14241 } 14242 14243 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); 14244 14245 # break before the previous token if it looks safe 14246 # Example of something that we will not try to break before: 14247 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, 14248 # Also we don't want to break at a binary operator (like +): 14249 # $c->createOval( 14250 # $x + $R, $y + 14251 # $R => $x - $R, 14252 # $y - $R, -fill => 'black', 14253 # ); 14254 my $ibreak = $index_before_arrow[$depth] - 1; 14255 if ( $ibreak > 0 14256 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) 14257 { 14258 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } 14259 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } 14260 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { 14261 14262 # don't break pointer calls, such as the following: 14263 # File::Spec->curdir => 1, 14264 # (This is tokenized as adjacent 'w' tokens) 14265 if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) { 14266 set_forced_breakpoint($ibreak); 14267 } 14268 } 14269 } 14270 14271 $want_comma_break[$depth] = 0; 14272 $index_before_arrow[$depth] = -1; 14273 14274 # handle list which mixes '=>'s and ','s: 14275 # treat any list items so far as an interrupted list 14276 $interrupted_list[$depth] = 1; 14277 next; 14278 } 14279 14280 # break after all commas above starting depth 14281 if ( $depth < $starting_depth && !$dont_align[$depth] ) { 14282 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); 14283 next; 14284 } 14285 14286 # add this comma to the list.. 14287 my $item_count = $item_count_stack[$depth]; 14288 if ( $item_count == 0 ) { 14289 14290 # but do not form a list with no opening structure 14291 # for example: 14292 14293 # open INFILE_COPY, ">$input_file_copy" 14294 # or die ("very long message"); 14295 14296 if ( ( $opening_structure_index_stack[$depth] < 0 ) 14297 && $container_environment_to_go[$i] eq 'BLOCK' ) 14298 { 14299 $dont_align[$depth] = 1; 14300 } 14301 } 14302 14303 $comma_index[$depth][$item_count] = $i; 14304 ++$item_count_stack[$depth]; 14305 if ( $last_nonblank_type =~ /^[iR\]]$/ ) { 14306 $identifier_count_stack[$depth]++; 14307 } 14308 } 14309 14310 #------------------------------------------- 14311 # end of loop over all tokens in this batch 14312 #------------------------------------------- 14313 14314 # set breaks for any unfinished lists .. 14315 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) { 14316 14317 $interrupted_list[$dd] = 1; 14318 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); 14319 set_comma_breakpoints($dd); 14320 set_logical_breakpoints($dd) 14321 if ( $has_old_logical_breakpoints[$dd] ); 14322 set_for_semicolon_breakpoints($dd); 14323 14324 # break open container... 14325 my $i_opening = $opening_structure_index_stack[$dd]; 14326 set_forced_breakpoint($i_opening) 14327 unless ( 14328 is_unbreakable_container($dd) 14329 14330 # Avoid a break which would place an isolated ' or " 14331 # on a line 14332 || ( $type eq 'Q' 14333 && $i_opening >= $max_index_to_go - 2 14334 && $token =~ /^['"]$/ ) 14335 ); 14336 } 14337 14338 # Return a flag indicating if the input file had some good breakpoints. 14339 # This flag will be used to force a break in a line shorter than the 14340 # allowed line length. 14341 if ( $has_old_logical_breakpoints[$current_depth] ) { 14342 $saw_good_breakpoint = 1; 14343 } 14344 return $saw_good_breakpoint; 14345 } 14346} # end scan_list 14347 14348sub find_token_starting_list { 14349 14350 # When testing to see if a block will fit on one line, some 14351 # previous token(s) may also need to be on the line; particularly 14352 # if this is a sub call. So we will look back at least one 14353 # token. NOTE: This isn't perfect, but not critical, because 14354 # if we mis-identify a block, it will be wrapped and therefore 14355 # fixed the next time it is formatted. 14356 my $i_opening_paren = shift; 14357 my $i_opening_minus = $i_opening_paren; 14358 my $im1 = $i_opening_paren - 1; 14359 my $im2 = $i_opening_paren - 2; 14360 my $im3 = $i_opening_paren - 3; 14361 my $typem1 = $types_to_go[$im1]; 14362 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b'; 14363 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) { 14364 $i_opening_minus = $i_opening_paren; 14365 } 14366 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) { 14367 $i_opening_minus = $im1 if $im1 >= 0; 14368 14369 # walk back to improve length estimate 14370 for ( my $j = $im1 ; $j >= 0 ; $j-- ) { 14371 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ ); 14372 $i_opening_minus = $j; 14373 } 14374 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } 14375 } 14376 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 } 14377 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) { 14378 $i_opening_minus = $im2; 14379 } 14380 return $i_opening_minus; 14381} 14382 14383{ # begin set_comma_breakpoints_do 14384 14385 my %is_keyword_with_special_leading_term; 14386 14387 BEGIN { 14388 14389 # These keywords have prototypes which allow a special leading item 14390 # followed by a list 14391 @_ = 14392 qw(formline grep kill map printf sprintf push chmod join pack unshift); 14393 @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_); 14394 } 14395 14396 sub set_comma_breakpoints_do { 14397 14398 # Given a list with some commas, set breakpoints at some of the 14399 # commas, if necessary, to make it easy to read. This list is 14400 # an example: 14401 my ( 14402 $depth, $i_opening_paren, $i_closing_paren, 14403 $item_count, $identifier_count, $rcomma_index, 14404 $next_nonblank_type, $list_type, $interrupted, 14405 $rdo_not_break_apart, $must_break_open, 14406 ) = @_; 14407 14408 # nothing to do if no commas seen 14409 return if ( $item_count < 1 ); 14410 my $i_first_comma = $$rcomma_index[0]; 14411 my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ]; 14412 my $i_last_comma = $i_true_last_comma; 14413 if ( $i_last_comma >= $max_index_to_go ) { 14414 $i_last_comma = $$rcomma_index[ --$item_count - 1 ]; 14415 return if ( $item_count < 1 ); 14416 } 14417 14418 #--------------------------------------------------------------- 14419 # find lengths of all items in the list to calculate page layout 14420 #--------------------------------------------------------------- 14421 my $comma_count = $item_count; 14422 my @item_lengths; 14423 my @i_term_begin; 14424 my @i_term_end; 14425 my @i_term_comma; 14426 my $i_prev_plus; 14427 my @max_length = ( 0, 0 ); 14428 my $first_term_length; 14429 my $i = $i_opening_paren; 14430 my $is_odd = 1; 14431 14432 for ( my $j = 0 ; $j < $comma_count ; $j++ ) { 14433 $is_odd = 1 - $is_odd; 14434 $i_prev_plus = $i + 1; 14435 $i = $$rcomma_index[$j]; 14436 14437 my $i_term_end = 14438 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1; 14439 my $i_term_begin = 14440 ( $types_to_go[$i_prev_plus] eq 'b' ) 14441 ? $i_prev_plus + 1 14442 : $i_prev_plus; 14443 push @i_term_begin, $i_term_begin; 14444 push @i_term_end, $i_term_end; 14445 push @i_term_comma, $i; 14446 14447 # note: currently adding 2 to all lengths (for comma and space) 14448 my $length = 14449 2 + token_sequence_length( $i_term_begin, $i_term_end ); 14450 push @item_lengths, $length; 14451 14452 if ( $j == 0 ) { 14453 $first_term_length = $length; 14454 } 14455 else { 14456 14457 if ( $length > $max_length[$is_odd] ) { 14458 $max_length[$is_odd] = $length; 14459 } 14460 } 14461 } 14462 14463 # now we have to make a distinction between the comma count and item 14464 # count, because the item count will be one greater than the comma 14465 # count if the last item is not terminated with a comma 14466 my $i_b = 14467 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' ) 14468 ? $i_last_comma + 1 14469 : $i_last_comma; 14470 my $i_e = 14471 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' ) 14472 ? $i_closing_paren - 2 14473 : $i_closing_paren - 1; 14474 my $i_effective_last_comma = $i_last_comma; 14475 14476 my $last_item_length = token_sequence_length( $i_b + 1, $i_e ); 14477 14478 if ( $last_item_length > 0 ) { 14479 14480 # add 2 to length because other lengths include a comma and a blank 14481 $last_item_length += 2; 14482 push @item_lengths, $last_item_length; 14483 push @i_term_begin, $i_b + 1; 14484 push @i_term_end, $i_e; 14485 push @i_term_comma, undef; 14486 14487 my $i_odd = $item_count % 2; 14488 14489 if ( $last_item_length > $max_length[$i_odd] ) { 14490 $max_length[$i_odd] = $last_item_length; 14491 } 14492 14493 $item_count++; 14494 $i_effective_last_comma = $i_e + 1; 14495 14496 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) { 14497 $identifier_count++; 14498 } 14499 } 14500 14501 #--------------------------------------------------------------- 14502 # End of length calculations 14503 #--------------------------------------------------------------- 14504 14505 #--------------------------------------------------------------- 14506 # Compound List Rule 1: 14507 # Break at (almost) every comma for a list containing a broken 14508 # sublist. This has higher priority than the Interrupted List 14509 # Rule. 14510 #--------------------------------------------------------------- 14511 if ( $has_broken_sublist[$depth] ) { 14512 14513 # Break at every comma except for a comma between two 14514 # simple, small terms. This prevents long vertical 14515 # columns of, say, just 0's. 14516 my $small_length = 10; # 2 + actual maximum length wanted 14517 14518 # We'll insert a break in long runs of small terms to 14519 # allow alignment in uniform tables. 14520 my $skipped_count = 0; 14521 my $columns = table_columns_available($i_first_comma); 14522 my $fields = int( $columns / $small_length ); 14523 if ( $rOpts_maximum_fields_per_table 14524 && $fields > $rOpts_maximum_fields_per_table ) 14525 { 14526 $fields = $rOpts_maximum_fields_per_table; 14527 } 14528 my $max_skipped_count = $fields - 1; 14529 14530 my $is_simple_last_term = 0; 14531 my $is_simple_next_term = 0; 14532 foreach my $j ( 0 .. $item_count ) { 14533 $is_simple_last_term = $is_simple_next_term; 14534 $is_simple_next_term = 0; 14535 if ( $j < $item_count 14536 && $i_term_end[$j] == $i_term_begin[$j] 14537 && $item_lengths[$j] <= $small_length ) 14538 { 14539 $is_simple_next_term = 1; 14540 } 14541 next if $j == 0; 14542 if ( $is_simple_last_term 14543 && $is_simple_next_term 14544 && $skipped_count < $max_skipped_count ) 14545 { 14546 $skipped_count++; 14547 } 14548 else { 14549 $skipped_count = 0; 14550 my $i = $i_term_comma[ $j - 1 ]; 14551 last unless defined $i; 14552 set_forced_breakpoint($i); 14553 } 14554 } 14555 14556 # always break at the last comma if this list is 14557 # interrupted; we wouldn't want to leave a terminal '{', for 14558 # example. 14559 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) } 14560 return; 14561 } 14562 14563#my ( $a, $b, $c ) = caller(); 14564#print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count 14565#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; 14566#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; 14567 14568 #--------------------------------------------------------------- 14569 # Interrupted List Rule: 14570 # A list is is forced to use old breakpoints if it was interrupted 14571 # by side comments or blank lines, or requested by user. 14572 #--------------------------------------------------------------- 14573 if ( $rOpts_break_at_old_comma_breakpoints 14574 || $interrupted 14575 || $i_opening_paren < 0 ) 14576 { 14577 copy_old_breakpoints( $i_first_comma, $i_true_last_comma ); 14578 return; 14579 } 14580 14581 #--------------------------------------------------------------- 14582 # Looks like a list of items. We have to look at it and size it up. 14583 #--------------------------------------------------------------- 14584 14585 my $opening_token = $tokens_to_go[$i_opening_paren]; 14586 my $opening_environment = 14587 $container_environment_to_go[$i_opening_paren]; 14588 14589 #------------------------------------------------------------------- 14590 # Return if this will fit on one line 14591 #------------------------------------------------------------------- 14592 14593 my $i_opening_minus = find_token_starting_list($i_opening_paren); 14594 return 14595 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0; 14596 14597 #------------------------------------------------------------------- 14598 # Now we know that this block spans multiple lines; we have to set 14599 # at least one breakpoint -- real or fake -- as a signal to break 14600 # open any outer containers. 14601 #------------------------------------------------------------------- 14602 set_fake_breakpoint(); 14603 14604 # be sure we do not extend beyond the current list length 14605 if ( $i_effective_last_comma >= $max_index_to_go ) { 14606 $i_effective_last_comma = $max_index_to_go - 1; 14607 } 14608 14609 # Set a flag indicating if we need to break open to keep -lp 14610 # items aligned. This is necessary if any of the list terms 14611 # exceeds the available space after the '('. 14612 my $need_lp_break_open = $must_break_open; 14613 if ( $rOpts_line_up_parentheses && !$must_break_open ) { 14614 my $columns_if_unbroken = $rOpts_maximum_line_length - 14615 total_line_length( $i_opening_minus, $i_opening_paren ); 14616 $need_lp_break_open = 14617 ( $max_length[0] > $columns_if_unbroken ) 14618 || ( $max_length[1] > $columns_if_unbroken ) 14619 || ( $first_term_length > $columns_if_unbroken ); 14620 } 14621 14622 # Specify if the list must have an even number of fields or not. 14623 # It is generally safest to assume an even number, because the 14624 # list items might be a hash list. But if we can be sure that 14625 # it is not a hash, then we can allow an odd number for more 14626 # flexibility. 14627 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count 14628 14629 if ( $identifier_count >= $item_count - 1 14630 || $is_assignment{$next_nonblank_type} 14631 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ ) 14632 ) 14633 { 14634 $odd_or_even = 1; 14635 } 14636 14637 # do we have a long first term which should be 14638 # left on a line by itself? 14639 my $use_separate_first_term = ( 14640 $odd_or_even == 1 # only if we can use 1 field/line 14641 && $item_count > 3 # need several items 14642 && $first_term_length > 14643 2 * $max_length[0] - 2 # need long first term 14644 && $first_term_length > 14645 2 * $max_length[1] - 2 # need long first term 14646 ); 14647 14648 # or do we know from the type of list that the first term should 14649 # be placed alone? 14650 if ( !$use_separate_first_term ) { 14651 if ( $is_keyword_with_special_leading_term{$list_type} ) { 14652 $use_separate_first_term = 1; 14653 14654 # should the container be broken open? 14655 if ( $item_count < 3 ) { 14656 if ( $i_first_comma - $i_opening_paren < 4 ) { 14657 $$rdo_not_break_apart = 1; 14658 } 14659 } 14660 elsif ($first_term_length < 20 14661 && $i_first_comma - $i_opening_paren < 4 ) 14662 { 14663 my $columns = table_columns_available($i_first_comma); 14664 if ( $first_term_length < $columns ) { 14665 $$rdo_not_break_apart = 1; 14666 } 14667 } 14668 } 14669 } 14670 14671 # if so, 14672 if ($use_separate_first_term) { 14673 14674 # ..set a break and update starting values 14675 $use_separate_first_term = 1; 14676 set_forced_breakpoint($i_first_comma); 14677 $i_opening_paren = $i_first_comma; 14678 $i_first_comma = $$rcomma_index[1]; 14679 $item_count--; 14680 return if $comma_count == 1; 14681 shift @item_lengths; 14682 shift @i_term_begin; 14683 shift @i_term_end; 14684 shift @i_term_comma; 14685 } 14686 14687 # if not, update the metrics to include the first term 14688 else { 14689 if ( $first_term_length > $max_length[0] ) { 14690 $max_length[0] = $first_term_length; 14691 } 14692 } 14693 14694 # Field width parameters 14695 my $pair_width = ( $max_length[0] + $max_length[1] ); 14696 my $max_width = 14697 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1]; 14698 14699 # Number of free columns across the page width for laying out tables 14700 my $columns = table_columns_available($i_first_comma); 14701 14702 # Estimated maximum number of fields which fit this space 14703 # This will be our first guess 14704 my $number_of_fields_max = 14705 maximum_number_of_fields( $columns, $odd_or_even, $max_width, 14706 $pair_width ); 14707 my $number_of_fields = $number_of_fields_max; 14708 14709 # Find the best-looking number of fields 14710 # and make this our second guess if possible 14711 my ( $number_of_fields_best, $ri_ragged_break_list, 14712 $new_identifier_count ) 14713 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths, 14714 $max_width ); 14715 14716 if ( $number_of_fields_best != 0 14717 && $number_of_fields_best < $number_of_fields_max ) 14718 { 14719 $number_of_fields = $number_of_fields_best; 14720 } 14721 14722 # ---------------------------------------------------------------------- 14723 # If we are crowded and the -lp option is being used, try to 14724 # undo some indentation 14725 # ---------------------------------------------------------------------- 14726 if ( 14727 $rOpts_line_up_parentheses 14728 && ( 14729 $number_of_fields == 0 14730 || ( $number_of_fields == 1 14731 && $number_of_fields != $number_of_fields_best ) 14732 ) 14733 ) 14734 { 14735 my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma); 14736 if ( $available_spaces > 0 ) { 14737 14738 my $spaces_wanted = $max_width - $columns; # for 1 field 14739 14740 if ( $number_of_fields_best == 0 ) { 14741 $number_of_fields_best = 14742 get_maximum_fields_wanted( \@item_lengths ); 14743 } 14744 14745 if ( $number_of_fields_best != 1 ) { 14746 my $spaces_wanted_2 = 14747 1 + $pair_width - $columns; # for 2 fields 14748 if ( $available_spaces > $spaces_wanted_2 ) { 14749 $spaces_wanted = $spaces_wanted_2; 14750 } 14751 } 14752 14753 if ( $spaces_wanted > 0 ) { 14754 my $deleted_spaces = 14755 reduce_lp_indentation( $i_first_comma, $spaces_wanted ); 14756 14757 # redo the math 14758 if ( $deleted_spaces > 0 ) { 14759 $columns = table_columns_available($i_first_comma); 14760 $number_of_fields_max = 14761 maximum_number_of_fields( $columns, $odd_or_even, 14762 $max_width, $pair_width ); 14763 $number_of_fields = $number_of_fields_max; 14764 14765 if ( $number_of_fields_best == 1 14766 && $number_of_fields >= 1 ) 14767 { 14768 $number_of_fields = $number_of_fields_best; 14769 } 14770 } 14771 } 14772 } 14773 } 14774 14775 # try for one column if two won't work 14776 if ( $number_of_fields <= 0 ) { 14777 $number_of_fields = int( $columns / $max_width ); 14778 } 14779 14780 # The user can place an upper bound on the number of fields, 14781 # which can be useful for doing maintenance on tables 14782 if ( $rOpts_maximum_fields_per_table 14783 && $number_of_fields > $rOpts_maximum_fields_per_table ) 14784 { 14785 $number_of_fields = $rOpts_maximum_fields_per_table; 14786 } 14787 14788 # How many columns (characters) and lines would this container take 14789 # if no additional whitespace were added? 14790 my $packed_columns = token_sequence_length( $i_opening_paren + 1, 14791 $i_effective_last_comma + 1 ); 14792 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero 14793 my $packed_lines = 1 + int( $packed_columns / $columns ); 14794 14795 # are we an item contained in an outer list? 14796 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; 14797 14798 if ( $number_of_fields <= 0 ) { 14799 14800# #--------------------------------------------------------------- 14801# # We're in trouble. We can't find a single field width that works. 14802# # There is no simple answer here; we may have a single long list 14803# # item, or many. 14804# #--------------------------------------------------------------- 14805# 14806# In many cases, it may be best to not force a break if there is just one 14807# comma, because the standard continuation break logic will do a better 14808# job without it. 14809# 14810# In the common case that all but one of the terms can fit 14811# on a single line, it may look better not to break open the 14812# containing parens. Consider, for example 14813# 14814# $color = 14815# join ( '/', 14816# sort { $color_value{$::a} <=> $color_value{$::b}; } 14817# keys %colors ); 14818# 14819# which will look like this with the container broken: 14820# 14821# $color = join ( 14822# '/', 14823# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors 14824# ); 14825# 14826# Here is an example of this rule for a long last term: 14827# 14828# log_message( 0, 256, 128, 14829# "Number of routes in adj-RIB-in to be considered: $peercount" ); 14830# 14831# And here is an example with a long first term: 14832# 14833# $s = sprintf( 14834# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", 14835# $r, $pu, $ps, $cu, $cs, $tt 14836# ) 14837# if $style eq 'all'; 14838 14839 my $i_last_comma = $$rcomma_index[ $comma_count - 1 ]; 14840 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0; 14841 my $long_first_term = 14842 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0; 14843 14844 # break at every comma ... 14845 if ( 14846 14847 # if requested by user or is best looking 14848 $number_of_fields_best == 1 14849 14850 # or if this is a sublist of a larger list 14851 || $in_hierarchical_list 14852 14853 # or if multiple commas and we dont have a long first or last 14854 # term 14855 || ( $comma_count > 1 14856 && !( $long_last_term || $long_first_term ) ) 14857 ) 14858 { 14859 foreach ( 0 .. $comma_count - 1 ) { 14860 set_forced_breakpoint( $$rcomma_index[$_] ); 14861 } 14862 } 14863 elsif ($long_last_term) { 14864 14865 set_forced_breakpoint($i_last_comma); 14866 $$rdo_not_break_apart = 1 unless $must_break_open; 14867 } 14868 elsif ($long_first_term) { 14869 14870 set_forced_breakpoint($i_first_comma); 14871 } 14872 else { 14873 14874 # let breaks be defined by default bond strength logic 14875 } 14876 return; 14877 } 14878 14879 # -------------------------------------------------------- 14880 # We have a tentative field count that seems to work. 14881 # How many lines will this require? 14882 # -------------------------------------------------------- 14883 my $formatted_lines = $item_count / ($number_of_fields); 14884 if ( $formatted_lines != int $formatted_lines ) { 14885 $formatted_lines = 1 + int $formatted_lines; 14886 } 14887 14888 # So far we've been trying to fill out to the right margin. But 14889 # compact tables are easier to read, so let's see if we can use fewer 14890 # fields without increasing the number of lines. 14891 $number_of_fields = 14892 compactify_table( $item_count, $number_of_fields, $formatted_lines, 14893 $odd_or_even ); 14894 14895 # How many spaces across the page will we fill? 14896 my $columns_per_line = 14897 ( int $number_of_fields / 2 ) * $pair_width + 14898 ( $number_of_fields % 2 ) * $max_width; 14899 14900 my $formatted_columns; 14901 14902 if ( $number_of_fields > 1 ) { 14903 $formatted_columns = 14904 ( $pair_width * ( int( $item_count / 2 ) ) + 14905 ( $item_count % 2 ) * $max_width ); 14906 } 14907 else { 14908 $formatted_columns = $max_width * $item_count; 14909 } 14910 if ( $formatted_columns < $packed_columns ) { 14911 $formatted_columns = $packed_columns; 14912 } 14913 14914 my $unused_columns = $formatted_columns - $packed_columns; 14915 14916 # set some empirical parameters to help decide if we should try to 14917 # align; high sparsity does not look good, especially with few lines 14918 my $sparsity = ($unused_columns) / ($formatted_columns); 14919 my $max_allowed_sparsity = 14920 ( $item_count < 3 ) ? 0.1 14921 : ( $packed_lines == 1 ) ? 0.15 14922 : ( $packed_lines == 2 ) ? 0.4 14923 : 0.7; 14924 14925 # Begin check for shortcut methods, which avoid treating a list 14926 # as a table for relatively small parenthesized lists. These 14927 # are usually easier to read if not formatted as tables. 14928 if ( 14929 $packed_lines <= 2 # probably can fit in 2 lines 14930 && $item_count < 9 # doesn't have too many items 14931 && $opening_environment eq 'BLOCK' # not a sub-container 14932 && $opening_token eq '(' # is paren list 14933 ) 14934 { 14935 14936 # Shortcut method 1: for -lp and just one comma: 14937 # This is a no-brainer, just break at the comma. 14938 if ( 14939 $rOpts_line_up_parentheses # -lp 14940 && $item_count == 2 # two items, one comma 14941 && !$must_break_open 14942 ) 14943 { 14944 my $i_break = $$rcomma_index[0]; 14945 set_forced_breakpoint($i_break); 14946 $$rdo_not_break_apart = 1; 14947 set_non_alignment_flags( $comma_count, $rcomma_index ); 14948 return; 14949 14950 } 14951 14952 # method 2 is for most small ragged lists which might look 14953 # best if not displayed as a table. 14954 if ( 14955 ( $number_of_fields == 2 && $item_count == 3 ) 14956 || ( 14957 $new_identifier_count > 0 # isn't all quotes 14958 && $sparsity > 0.15 14959 ) # would be fairly spaced gaps if aligned 14960 ) 14961 { 14962 14963 my $break_count = set_ragged_breakpoints( \@i_term_comma, 14964 $ri_ragged_break_list ); 14965 ++$break_count if ($use_separate_first_term); 14966 14967 # NOTE: we should really use the true break count here, 14968 # which can be greater if there are large terms and 14969 # little space, but usually this will work well enough. 14970 unless ($must_break_open) { 14971 14972 if ( $break_count <= 1 ) { 14973 $$rdo_not_break_apart = 1; 14974 } 14975 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) 14976 { 14977 $$rdo_not_break_apart = 1; 14978 } 14979 } 14980 set_non_alignment_flags( $comma_count, $rcomma_index ); 14981 return; 14982 } 14983 14984 } # end shortcut methods 14985 14986 # debug stuff 14987 14988 FORMATTER_DEBUG_FLAG_SPARSE && do { 14989 print 14990"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; 14991 14992 }; 14993 14994 #--------------------------------------------------------------- 14995 # Compound List Rule 2: 14996 # If this list is too long for one line, and it is an item of a 14997 # larger list, then we must format it, regardless of sparsity 14998 # (ian.t). One reason that we have to do this is to trigger 14999 # Compound List Rule 1, above, which causes breaks at all commas of 15000 # all outer lists. In this way, the structure will be properly 15001 # displayed. 15002 #--------------------------------------------------------------- 15003 15004 # Decide if this list is too long for one line unless broken 15005 my $total_columns = table_columns_available($i_opening_paren); 15006 my $too_long = $packed_columns > $total_columns; 15007 15008 # For a paren list, include the length of the token just before the 15009 # '(' because this is likely a sub call, and we would have to 15010 # include the sub name on the same line as the list. This is still 15011 # imprecise, but not too bad. (steve.t) 15012 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { 15013 15014 $too_long = excess_line_length( $i_opening_minus, 15015 $i_effective_last_comma + 1 ) > 0; 15016 } 15017 15018 # FIXME: For an item after a '=>', try to include the length of the 15019 # thing before the '=>'. This is crude and should be improved by 15020 # actually looking back token by token. 15021 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { 15022 my $i_opening_minus = $i_opening_paren - 4; 15023 if ( $i_opening_minus >= 0 ) { 15024 $too_long = excess_line_length( $i_opening_minus, 15025 $i_effective_last_comma + 1 ) > 0; 15026 } 15027 } 15028 15029 # Always break lists contained in '[' and '{' if too long for 1 line, 15030 # and always break lists which are too long and part of a more complex 15031 # structure. 15032 my $must_break_open_container = $must_break_open 15033 || ( $too_long 15034 && ( $in_hierarchical_list || $opening_token ne '(' ) ); 15035 15036#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n"; 15037 15038 #--------------------------------------------------------------- 15039 # The main decision: 15040 # Now decide if we will align the data into aligned columns. Do not 15041 # attempt to align columns if this is a tiny table or it would be 15042 # too spaced. It seems that the more packed lines we have, the 15043 # sparser the list that can be allowed and still look ok. 15044 #--------------------------------------------------------------- 15045 15046 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) 15047 || ( $formatted_lines < 2 ) 15048 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) 15049 ) 15050 { 15051 15052 #--------------------------------------------------------------- 15053 # too sparse: would look ugly if aligned in a table; 15054 #--------------------------------------------------------------- 15055 15056 # use old breakpoints if this is a 'big' list 15057 # FIXME: goal is to improve set_ragged_breakpoints so that 15058 # this is not necessary. 15059 if ( $packed_lines > 2 && $item_count > 10 ) { 15060 write_logfile_entry("List sparse: using old breakpoints\n"); 15061 copy_old_breakpoints( $i_first_comma, $i_last_comma ); 15062 } 15063 15064 # let the continuation logic handle it if 2 lines 15065 else { 15066 15067 my $break_count = set_ragged_breakpoints( \@i_term_comma, 15068 $ri_ragged_break_list ); 15069 ++$break_count if ($use_separate_first_term); 15070 15071 unless ($must_break_open_container) { 15072 if ( $break_count <= 1 ) { 15073 $$rdo_not_break_apart = 1; 15074 } 15075 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) 15076 { 15077 $$rdo_not_break_apart = 1; 15078 } 15079 } 15080 set_non_alignment_flags( $comma_count, $rcomma_index ); 15081 } 15082 return; 15083 } 15084 15085 #--------------------------------------------------------------- 15086 # go ahead and format as a table 15087 #--------------------------------------------------------------- 15088 write_logfile_entry( 15089 "List: auto formatting with $number_of_fields fields/row\n"); 15090 15091 my $j_first_break = 15092 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1; 15093 15094 for ( 15095 my $j = $j_first_break ; 15096 $j < $comma_count ; 15097 $j += $number_of_fields 15098 ) 15099 { 15100 my $i = $$rcomma_index[$j]; 15101 set_forced_breakpoint($i); 15102 } 15103 return; 15104 } 15105} 15106 15107sub set_non_alignment_flags { 15108 15109 # set flag which indicates that these commas should not be 15110 # aligned 15111 my ( $comma_count, $rcomma_index ) = @_; 15112 foreach ( 0 .. $comma_count - 1 ) { 15113 $matching_token_to_go[ $$rcomma_index[$_] ] = 1; 15114 } 15115} 15116 15117sub study_list_complexity { 15118 15119 # Look for complex tables which should be formatted with one term per line. 15120 # Returns the following: 15121 # 15122 # \@i_ragged_break_list = list of good breakpoints to avoid lines 15123 # which are hard to read 15124 # $number_of_fields_best = suggested number of fields based on 15125 # complexity; = 0 if any number may be used. 15126 # 15127 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_; 15128 my $item_count = @{$ri_term_begin}; 15129 my $complex_item_count = 0; 15130 my $number_of_fields_best = $rOpts_maximum_fields_per_table; 15131 my $i_max = @{$ritem_lengths} - 1; 15132 ##my @item_complexity; 15133 15134 my $i_last_last_break = -3; 15135 my $i_last_break = -2; 15136 my @i_ragged_break_list; 15137 15138 my $definitely_complex = 30; 15139 my $definitely_simple = 12; 15140 my $quote_count = 0; 15141 15142 for my $i ( 0 .. $i_max ) { 15143 my $ib = $ri_term_begin->[$i]; 15144 my $ie = $ri_term_end->[$i]; 15145 15146 # define complexity: start with the actual term length 15147 my $weighted_length = ( $ritem_lengths->[$i] - 2 ); 15148 15149 ##TBD: join types here and check for variations 15150 ##my $str=join "", @tokens_to_go[$ib..$ie]; 15151 15152 my $is_quote = 0; 15153 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) { 15154 $is_quote = 1; 15155 $quote_count++; 15156 } 15157 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) { 15158 $quote_count++; 15159 } 15160 15161 if ( $ib eq $ie ) { 15162 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) { 15163 $complex_item_count++; 15164 $weighted_length *= 2; 15165 } 15166 else { 15167 } 15168 } 15169 else { 15170 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) { 15171 $complex_item_count++; 15172 $weighted_length *= 2; 15173 } 15174 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) { 15175 $weighted_length += 4; 15176 } 15177 } 15178 15179 # add weight for extra tokens. 15180 $weighted_length += 2 * ( $ie - $ib ); 15181 15182## my $BUB = join '', @tokens_to_go[$ib..$ie]; 15183## print "# COMPLEXITY:$weighted_length $BUB\n"; 15184 15185##push @item_complexity, $weighted_length; 15186 15187 # now mark a ragged break after this item it if it is 'long and 15188 # complex': 15189 if ( $weighted_length >= $definitely_complex ) { 15190 15191 # if we broke after the previous term 15192 # then break before it too 15193 if ( $i_last_break == $i - 1 15194 && $i > 1 15195 && $i_last_last_break != $i - 2 ) 15196 { 15197 15198 ## FIXME: don't strand a small term 15199 pop @i_ragged_break_list; 15200 push @i_ragged_break_list, $i - 2; 15201 push @i_ragged_break_list, $i - 1; 15202 } 15203 15204 push @i_ragged_break_list, $i; 15205 $i_last_last_break = $i_last_break; 15206 $i_last_break = $i; 15207 } 15208 15209 # don't break before a small last term -- it will 15210 # not look good on a line by itself. 15211 elsif ($i == $i_max 15212 && $i_last_break == $i - 1 15213 && $weighted_length <= $definitely_simple ) 15214 { 15215 pop @i_ragged_break_list; 15216 } 15217 } 15218 15219 my $identifier_count = $i_max + 1 - $quote_count; 15220 15221 # Need more tuning here.. 15222 if ( $max_width > 12 15223 && $complex_item_count > $item_count / 2 15224 && $number_of_fields_best != 2 ) 15225 { 15226 $number_of_fields_best = 1; 15227 } 15228 15229 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count ); 15230} 15231 15232sub get_maximum_fields_wanted { 15233 15234 # Not all tables look good with more than one field of items. 15235 # This routine looks at a table and decides if it should be 15236 # formatted with just one field or not. 15237 # This coding is still under development. 15238 my ($ritem_lengths) = @_; 15239 15240 my $number_of_fields_best = 0; 15241 15242 # For just a few items, we tentatively assume just 1 field. 15243 my $item_count = @{$ritem_lengths}; 15244 if ( $item_count <= 5 ) { 15245 $number_of_fields_best = 1; 15246 } 15247 15248 # For larger tables, look at it both ways and see what looks best 15249 else { 15250 15251 my $is_odd = 1; 15252 my @max_length = ( 0, 0 ); 15253 my @last_length_2 = ( undef, undef ); 15254 my @first_length_2 = ( undef, undef ); 15255 my $last_length = undef; 15256 my $total_variation_1 = 0; 15257 my $total_variation_2 = 0; 15258 my @total_variation_2 = ( 0, 0 ); 15259 for ( my $j = 0 ; $j < $item_count ; $j++ ) { 15260 15261 $is_odd = 1 - $is_odd; 15262 my $length = $ritem_lengths->[$j]; 15263 if ( $length > $max_length[$is_odd] ) { 15264 $max_length[$is_odd] = $length; 15265 } 15266 15267 if ( defined($last_length) ) { 15268 my $dl = abs( $length - $last_length ); 15269 $total_variation_1 += $dl; 15270 } 15271 $last_length = $length; 15272 15273 my $ll = $last_length_2[$is_odd]; 15274 if ( defined($ll) ) { 15275 my $dl = abs( $length - $ll ); 15276 $total_variation_2[$is_odd] += $dl; 15277 } 15278 else { 15279 $first_length_2[$is_odd] = $length; 15280 } 15281 $last_length_2[$is_odd] = $length; 15282 } 15283 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1]; 15284 15285 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0; 15286 unless ( $total_variation_2 < $factor * $total_variation_1 ) { 15287 $number_of_fields_best = 1; 15288 } 15289 } 15290 return ($number_of_fields_best); 15291} 15292 15293sub table_columns_available { 15294 my $i_first_comma = shift; 15295 my $columns = 15296 $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma); 15297 15298 # Patch: the vertical formatter does not line up lines whose lengths 15299 # exactly equal the available line length because of allowances 15300 # that must be made for side comments. Therefore, the number of 15301 # available columns is reduced by 1 character. 15302 $columns -= 1; 15303 return $columns; 15304} 15305 15306sub maximum_number_of_fields { 15307 15308 # how many fields will fit in the available space? 15309 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_; 15310 my $max_pairs = int( $columns / $pair_width ); 15311 my $number_of_fields = $max_pairs * 2; 15312 if ( $odd_or_even == 1 15313 && $max_pairs * $pair_width + $max_width <= $columns ) 15314 { 15315 $number_of_fields++; 15316 } 15317 return $number_of_fields; 15318} 15319 15320sub compactify_table { 15321 15322 # given a table with a certain number of fields and a certain number 15323 # of lines, see if reducing the number of fields will make it look 15324 # better. 15325 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_; 15326 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) { 15327 my $min_fields; 15328 15329 for ( 15330 $min_fields = $number_of_fields ; 15331 $min_fields >= $odd_or_even 15332 && $min_fields * $formatted_lines >= $item_count ; 15333 $min_fields -= $odd_or_even 15334 ) 15335 { 15336 $number_of_fields = $min_fields; 15337 } 15338 } 15339 return $number_of_fields; 15340} 15341 15342sub set_ragged_breakpoints { 15343 15344 # Set breakpoints in a list that cannot be formatted nicely as a 15345 # table. 15346 my ( $ri_term_comma, $ri_ragged_break_list ) = @_; 15347 15348 my $break_count = 0; 15349 foreach (@$ri_ragged_break_list) { 15350 my $j = $ri_term_comma->[$_]; 15351 if ($j) { 15352 set_forced_breakpoint($j); 15353 $break_count++; 15354 } 15355 } 15356 return $break_count; 15357} 15358 15359sub copy_old_breakpoints { 15360 my ( $i_first_comma, $i_last_comma ) = @_; 15361 for my $i ( $i_first_comma .. $i_last_comma ) { 15362 if ( $old_breakpoint_to_go[$i] ) { 15363 set_forced_breakpoint($i); 15364 } 15365 } 15366} 15367 15368sub set_nobreaks { 15369 my ( $i, $j ) = @_; 15370 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) { 15371 15372 FORMATTER_DEBUG_FLAG_NOBREAK && do { 15373 my ( $a, $b, $c ) = caller(); 15374 print( 15375"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n" 15376 ); 15377 }; 15378 15379 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 ); 15380 } 15381 15382 # shouldn't happen; non-critical error 15383 else { 15384 FORMATTER_DEBUG_FLAG_NOBREAK && do { 15385 my ( $a, $b, $c ) = caller(); 15386 print( 15387"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n" 15388 ); 15389 }; 15390 } 15391} 15392 15393sub set_fake_breakpoint { 15394 15395 # Just bump up the breakpoint count as a signal that there are breaks. 15396 # This is useful if we have breaks but may want to postpone deciding where 15397 # to make them. 15398 $forced_breakpoint_count++; 15399} 15400 15401sub set_forced_breakpoint { 15402 my $i = shift; 15403 15404 return unless defined $i && $i >= 0; 15405 15406 # when called with certain tokens, use bond strengths to decide 15407 # if we break before or after it 15408 my $token = $tokens_to_go[$i]; 15409 15410 if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) { 15411 if ( $want_break_before{$token} && $i >= 0 ) { $i-- } 15412 } 15413 15414 # breaks are forced before 'if' and 'unless' 15415 elsif ( $is_if_unless{$token} ) { $i-- } 15416 15417 if ( $i >= 0 && $i <= $max_index_to_go ) { 15418 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; 15419 15420 FORMATTER_DEBUG_FLAG_FORCE && do { 15421 my ( $a, $b, $c ) = caller(); 15422 print 15423"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; 15424 }; 15425 15426 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) { 15427 $forced_breakpoint_to_go[$i_nonblank] = 1; 15428 15429 if ( $i_nonblank > $index_max_forced_break ) { 15430 $index_max_forced_break = $i_nonblank; 15431 } 15432 $forced_breakpoint_count++; 15433 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] = 15434 $i_nonblank; 15435 15436 # if we break at an opening container..break at the closing 15437 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) { 15438 set_closing_breakpoint($i_nonblank); 15439 } 15440 } 15441 } 15442} 15443 15444sub clear_breakpoint_undo_stack { 15445 $forced_breakpoint_undo_count = 0; 15446} 15447 15448sub undo_forced_breakpoint_stack { 15449 15450 my $i_start = shift; 15451 if ( $i_start < 0 ) { 15452 $i_start = 0; 15453 my ( $a, $b, $c ) = caller(); 15454 warning( 15455"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start " 15456 ); 15457 } 15458 15459 while ( $forced_breakpoint_undo_count > $i_start ) { 15460 my $i = 15461 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; 15462 if ( $i >= 0 && $i <= $max_index_to_go ) { 15463 $forced_breakpoint_to_go[$i] = 0; 15464 $forced_breakpoint_count--; 15465 15466 FORMATTER_DEBUG_FLAG_UNDOBP && do { 15467 my ( $a, $b, $c ) = caller(); 15468 print( 15469"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n" 15470 ); 15471 }; 15472 } 15473 15474 # shouldn't happen, but not a critical error 15475 else { 15476 FORMATTER_DEBUG_FLAG_UNDOBP && do { 15477 my ( $a, $b, $c ) = caller(); 15478 print( 15479"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go" 15480 ); 15481 }; 15482 } 15483 } 15484} 15485 15486{ # begin recombine_breakpoints 15487 15488 my %is_amp_amp; 15489 my %is_ternary; 15490 my %is_math_op; 15491 15492 BEGIN { 15493 15494 @_ = qw( && || ); 15495 @is_amp_amp{@_} = (1) x scalar(@_); 15496 15497 @_ = qw( ? : ); 15498 @is_ternary{@_} = (1) x scalar(@_); 15499 15500 @_ = qw( + - * / ); 15501 @is_math_op{@_} = (1) x scalar(@_); 15502 } 15503 15504 sub recombine_breakpoints { 15505 15506 # sub set_continuation_breaks is very liberal in setting line breaks 15507 # for long lines, always setting breaks at good breakpoints, even 15508 # when that creates small lines. Occasionally small line fragments 15509 # are produced which would look better if they were combined. 15510 # That's the task of this routine, recombine_breakpoints. 15511 # 15512 # $ri_beg = ref to array of BEGinning indexes of each line 15513 # $ri_end = ref to array of ENDing indexes of each line 15514 my ( $ri_beg, $ri_end ) = @_; 15515 15516 my $more_to_do = 1; 15517 15518 # We keep looping over all of the lines of this batch 15519 # until there are no more possible recombinations 15520 my $nmax_last = @$ri_end; 15521 while ($more_to_do) { 15522 my $n_best = 0; 15523 my $bs_best; 15524 my $n; 15525 my $nmax = @$ri_end - 1; 15526 15527 # safety check for infinite loop 15528 unless ( $nmax < $nmax_last ) { 15529 15530 # shouldn't happen because splice below decreases nmax on each pass: 15531 # but i get paranoid sometimes 15532 die "Program bug-infinite loop in recombine breakpoints\n"; 15533 } 15534 $nmax_last = $nmax; 15535 $more_to_do = 0; 15536 my $previous_outdentable_closing_paren; 15537 my $leading_amp_count = 0; 15538 my $this_line_is_semicolon_terminated; 15539 15540 # loop over all remaining lines in this batch 15541 for $n ( 1 .. $nmax ) { 15542 15543 #---------------------------------------------------------- 15544 # If we join the current pair of lines, 15545 # line $n-1 will become the left part of the joined line 15546 # line $n will become the right part of the joined line 15547 # 15548 # Here are Indexes of the endpoint tokens of the two lines: 15549 # 15550 # -----line $n-1--- | -----line $n----- 15551 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 15552 # ^ 15553 # | 15554 # We want to decide if we should remove the line break 15555 # betwen the tokens at $iend_1 and $ibeg_2 15556 # 15557 # We will apply a number of ad-hoc tests to see if joining 15558 # here will look ok. The code will just issue a 'next' 15559 # command if the join doesn't look good. If we get through 15560 # the gauntlet of tests, the lines will be recombined. 15561 #---------------------------------------------------------- 15562 # 15563 # beginning and ending tokens of the lines we are working on 15564 my $ibeg_1 = $$ri_beg[ $n - 1 ]; 15565 my $iend_1 = $$ri_end[ $n - 1 ]; 15566 my $iend_2 = $$ri_end[$n]; 15567 my $ibeg_2 = $$ri_beg[$n]; 15568 15569 my $ibeg_nmax = $$ri_beg[$nmax]; 15570 15571 # some beginning indexes of other lines, which may not exist 15572 my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1; 15573 my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1; 15574 my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1; 15575 15576 my $bs_tweak = 0; 15577 15578 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] - 15579 # $nesting_depth_to_go[$ibeg_1] ); 15580 15581##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n"; 15582 15583 # If line $n is the last line, we set some flags and 15584 # do any special checks for it 15585 if ( $n == $nmax ) { 15586 15587 # a terminal '{' should stay where it is 15588 next if $types_to_go[$ibeg_2] eq '{'; 15589 15590 # set flag if statement $n ends in ';' 15591 $this_line_is_semicolon_terminated = 15592 $types_to_go[$iend_2] eq ';' 15593 15594 # with possible side comment 15595 || ( $types_to_go[$iend_2] eq '#' 15596 && $iend_2 - $ibeg_2 >= 2 15597 && $types_to_go[ $iend_2 - 2 ] eq ';' 15598 && $types_to_go[ $iend_2 - 1 ] eq 'b' ); 15599 } 15600 15601 #---------------------------------------------------------- 15602 # Section 1: examine token at $iend_1 (right end of first line 15603 # of pair) 15604 #---------------------------------------------------------- 15605 15606 # an isolated '}' may join with a ';' terminated segment 15607 if ( $types_to_go[$iend_1] eq '}' ) { 15608 15609 # Check for cases where combining a semicolon terminated 15610 # statement with a previous isolated closing paren will 15611 # allow the combined line to be outdented. This is 15612 # generally a good move. For example, we can join up 15613 # the last two lines here: 15614 # ( 15615 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, 15616 # $size, $atime, $mtime, $ctime, $blksize, $blocks 15617 # ) 15618 # = stat($file); 15619 # 15620 # to get: 15621 # ( 15622 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, 15623 # $size, $atime, $mtime, $ctime, $blksize, $blocks 15624 # ) = stat($file); 15625 # 15626 # which makes the parens line up. 15627 # 15628 # Another example, from Joe Matarazzo, probably looks best 15629 # with the 'or' clause appended to the trailing paren: 15630 # $self->some_method( 15631 # PARAM1 => 'foo', 15632 # PARAM2 => 'bar' 15633 # ) or die "Some_method didn't work"; 15634 # 15635 $previous_outdentable_closing_paren = 15636 $this_line_is_semicolon_terminated # ends in ';' 15637 && $ibeg_1 == $iend_1 # only one token on last line 15638 && $tokens_to_go[$iend_1] eq 15639 ')' # must be structural paren 15640 15641 # only &&, ||, and : if no others seen 15642 # (but note: our count made below could be wrong 15643 # due to intervening comments) 15644 && ( $leading_amp_count == 0 15645 || $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ ) 15646 15647 # but leading colons probably line up with with a 15648 # previous colon or question (count could be wrong). 15649 && $types_to_go[$ibeg_2] ne ':' 15650 15651 # only one step in depth allowed. this line must not 15652 # begin with a ')' itself. 15653 && ( $nesting_depth_to_go[$iend_1] == 15654 $nesting_depth_to_go[$iend_2] + 1 ); 15655 15656 next 15657 unless ( 15658 $previous_outdentable_closing_paren 15659 15660 # handle '.' and '?' specially below 15661 || ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ ) 15662 ); 15663 } 15664 15665 # do not recombine lines with ending &&, ||, 15666 elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) { 15667 next unless $want_break_before{ $types_to_go[$iend_1] }; 15668 } 15669 15670 # keep a terminal colon 15671 elsif ( $types_to_go[$iend_1] eq ':' ) { 15672 next unless $want_break_before{ $types_to_go[$iend_1] }; 15673 } 15674 15675 # Identify and recombine a broken ?/: chain 15676 elsif ( $types_to_go[$iend_1] eq '?' ) { 15677 15678 # Do not recombine different levels 15679 next 15680 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); 15681 15682 # do not recombine unless next line ends in : 15683 next unless $types_to_go[$iend_2] eq ':'; 15684 } 15685 15686 # for lines ending in a comma... 15687 elsif ( $types_to_go[$iend_1] eq ',' ) { 15688 15689 # Do not recombine at comma which is following the 15690 # input bias. 15691 # TODO: might be best to make a special flag 15692 next if ( $old_breakpoint_to_go[$iend_1] ); 15693 15694 # an isolated '},' may join with an identifier + ';' 15695 # this is useful for the class of a 'bless' statement (bless.t) 15696 if ( $types_to_go[$ibeg_1] eq '}' 15697 && $types_to_go[$ibeg_2] eq 'i' ) 15698 { 15699 next 15700 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) 15701 && ( $iend_2 == ( $ibeg_2 + 1 ) ) 15702 && $this_line_is_semicolon_terminated ); 15703 15704 # override breakpoint 15705 $forced_breakpoint_to_go[$iend_1] = 0; 15706 } 15707 15708 # but otherwise .. 15709 else { 15710 15711 # do not recombine after a comma unless this will leave 15712 # just 1 more line 15713 next unless ( $n + 1 >= $nmax ); 15714 15715 # do not recombine if there is a change in indentation depth 15716 next 15717 if ( 15718 $levels_to_go[$iend_1] != $levels_to_go[$iend_2] ); 15719 15720 # do not recombine a "complex expression" after a 15721 # comma. "complex" means no parens. 15722 my $saw_paren; 15723 foreach my $ii ( $ibeg_2 .. $iend_2 ) { 15724 if ( $tokens_to_go[$ii] eq '(' ) { 15725 $saw_paren = 1; 15726 last; 15727 } 15728 } 15729 next if $saw_paren; 15730 } 15731 } 15732 15733 # opening paren.. 15734 elsif ( $types_to_go[$iend_1] eq '(' ) { 15735 15736 # No longer doing this 15737 } 15738 15739 elsif ( $types_to_go[$iend_1] eq ')' ) { 15740 15741 # No longer doing this 15742 } 15743 15744 # keep a terminal for-semicolon 15745 elsif ( $types_to_go[$iend_1] eq 'f' ) { 15746 next; 15747 } 15748 15749 # if '=' at end of line ... 15750 elsif ( $is_assignment{ $types_to_go[$iend_1] } ) { 15751 15752 my $is_short_quote = 15753 ( $types_to_go[$ibeg_2] eq 'Q' 15754 && $ibeg_2 == $iend_2 15755 && length( $tokens_to_go[$ibeg_2] ) < 15756 $rOpts_short_concatenation_item_length ); 15757 my $is_ternary = 15758 ( $types_to_go[$ibeg_1] eq '?' 15759 && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) ); 15760 15761 # always join an isolated '=', a short quote, or if this 15762 # will put ?/: at start of adjacent lines 15763 if ( $ibeg_1 != $iend_1 15764 && !$is_short_quote 15765 && !$is_ternary ) 15766 { 15767 next 15768 unless ( 15769 ( 15770 15771 # unless we can reduce this to two lines 15772 $nmax < $n + 2 15773 15774 # or three lines, the last with a leading semicolon 15775 || ( $nmax == $n + 2 15776 && $types_to_go[$ibeg_nmax] eq ';' ) 15777 15778 # or the next line ends with a here doc 15779 || $types_to_go[$iend_2] eq 'h' 15780 15781 # or the next line ends in an open paren or brace 15782 # and the break hasn't been forced [dima.t] 15783 || ( !$forced_breakpoint_to_go[$iend_1] 15784 && $types_to_go[$iend_2] eq '{' ) 15785 ) 15786 15787 # do not recombine if the two lines might align well 15788 # this is a very approximate test for this 15789 && ( $ibeg_3 >= 0 15790 && $types_to_go[$ibeg_2] ne 15791 $types_to_go[$ibeg_3] ) 15792 ); 15793 15794 # -lp users often prefer this: 15795 # my $title = function($env, $env, $sysarea, 15796 # "bubba Borrower Entry"); 15797 # so we will recombine if -lp is used we have ending 15798 # comma 15799 if ( !$rOpts_line_up_parentheses 15800 || $types_to_go[$iend_2] ne ',' ) 15801 { 15802 15803 # otherwise, scan the rhs line up to last token for 15804 # complexity. Note that we are not counting the last 15805 # token in case it is an opening paren. 15806 my $tv = 0; 15807 my $depth = $nesting_depth_to_go[$ibeg_2]; 15808 for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) { 15809 if ( $nesting_depth_to_go[$i] != $depth ) { 15810 $tv++; 15811 last if ( $tv > 1 ); 15812 } 15813 $depth = $nesting_depth_to_go[$i]; 15814 } 15815 15816 # ok to recombine if no level changes before last token 15817 if ( $tv > 0 ) { 15818 15819 # otherwise, do not recombine if more than two 15820 # level changes. 15821 next if ( $tv > 1 ); 15822 15823 # check total complexity of the two adjacent lines 15824 # that will occur if we do this join 15825 my $istop = 15826 ( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2; 15827 for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) { 15828 if ( $nesting_depth_to_go[$i] != $depth ) { 15829 $tv++; 15830 last if ( $tv > 2 ); 15831 } 15832 $depth = $nesting_depth_to_go[$i]; 15833 } 15834 15835 # do not recombine if total is more than 2 level changes 15836 next if ( $tv > 2 ); 15837 } 15838 } 15839 } 15840 15841 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { 15842 $forced_breakpoint_to_go[$iend_1] = 0; 15843 } 15844 } 15845 15846 # for keywords.. 15847 elsif ( $types_to_go[$iend_1] eq 'k' ) { 15848 15849 # make major control keywords stand out 15850 # (recombine.t) 15851 next 15852 if ( 15853 15854 #/^(last|next|redo|return)$/ 15855 $is_last_next_redo_return{ $tokens_to_go[$iend_1] } 15856 15857 # but only if followed by multiple lines 15858 && $n < $nmax 15859 ); 15860 15861 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { 15862 next 15863 unless $want_break_before{ $tokens_to_go[$iend_1] }; 15864 } 15865 } 15866 15867 # handle trailing + - * / 15868 elsif ( $is_math_op{ $types_to_go[$iend_1] } ) { 15869 15870 # combine lines if next line has single number 15871 # or a short term followed by same operator 15872 my $i_next_nonblank = $ibeg_2; 15873 my $i_next_next = $i_next_nonblank + 1; 15874 $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' ); 15875 my $number_follows = $types_to_go[$i_next_nonblank] eq 'n' 15876 && ( 15877 $i_next_nonblank == $iend_2 15878 || ( $i_next_next == $iend_2 15879 && $is_math_op{ $types_to_go[$i_next_next] } ) 15880 || $types_to_go[$i_next_next] eq ';' 15881 ); 15882 15883 # find token before last operator of previous line 15884 my $iend_1_minus = $iend_1; 15885 $iend_1_minus-- 15886 if ( $iend_1_minus > $ibeg_1 ); 15887 $iend_1_minus-- 15888 if ( $types_to_go[$iend_1_minus] eq 'b' 15889 && $iend_1_minus > $ibeg_1 ); 15890 15891 my $short_term_follows = 15892 ( $types_to_go[$iend_2] eq $types_to_go[$iend_1] 15893 && $types_to_go[$iend_1_minus] =~ /^[in]$/ 15894 && $iend_2 <= $ibeg_2 + 2 15895 && length( $tokens_to_go[$ibeg_2] ) < 15896 $rOpts_short_concatenation_item_length ); 15897 15898 next 15899 unless ( $number_follows || $short_term_follows ); 15900 } 15901 15902 #---------------------------------------------------------- 15903 # Section 2: Now examine token at $ibeg_2 (left end of second 15904 # line of pair) 15905 #---------------------------------------------------------- 15906 15907 # join lines identified above as capable of 15908 # causing an outdented line with leading closing paren 15909 if ($previous_outdentable_closing_paren) { 15910 $forced_breakpoint_to_go[$iend_1] = 0; 15911 } 15912 15913 # do not recombine lines with leading : 15914 elsif ( $types_to_go[$ibeg_2] eq ':' ) { 15915 $leading_amp_count++; 15916 next if $want_break_before{ $types_to_go[$ibeg_2] }; 15917 } 15918 15919 # handle lines with leading &&, || 15920 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { 15921 15922 $leading_amp_count++; 15923 15924 # ok to recombine if it follows a ? or : 15925 # and is followed by an open paren.. 15926 my $ok = 15927 ( $is_ternary{ $types_to_go[$ibeg_1] } 15928 && $tokens_to_go[$iend_2] eq '(' ) 15929 15930 # or is followed by a ? or : at same depth 15931 # 15932 # We are looking for something like this. We can 15933 # recombine the && line with the line above to make the 15934 # structure more clear: 15935 # return 15936 # exists $G->{Attr}->{V} 15937 # && exists $G->{Attr}->{V}->{$u} 15938 # ? %{ $G->{Attr}->{V}->{$u} } 15939 # : (); 15940 # 15941 # We should probably leave something like this alone: 15942 # return 15943 # exists $G->{Attr}->{E} 15944 # && exists $G->{Attr}->{E}->{$u} 15945 # && exists $G->{Attr}->{E}->{$u}->{$v} 15946 # ? %{ $G->{Attr}->{E}->{$u}->{$v} } 15947 # : (); 15948 # so that we either have all of the &&'s (or ||'s) 15949 # on one line, as in the first example, or break at 15950 # each one as in the second example. However, it 15951 # sometimes makes things worse to check for this because 15952 # it prevents multiple recombinations. So this is not done. 15953 || ( $ibeg_3 >= 0 15954 && $is_ternary{ $types_to_go[$ibeg_3] } 15955 && $nesting_depth_to_go[$ibeg_3] == 15956 $nesting_depth_to_go[$ibeg_2] ); 15957 15958 next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] }; 15959 $forced_breakpoint_to_go[$iend_1] = 0; 15960 15961 # tweak the bond strength to give this joint priority 15962 # over ? and : 15963 $bs_tweak = 0.25; 15964 } 15965 15966 # Identify and recombine a broken ?/: chain 15967 elsif ( $types_to_go[$ibeg_2] eq '?' ) { 15968 15969 # Do not recombine different levels 15970 my $lev = $levels_to_go[$ibeg_2]; 15971 next if ( $lev ne $levels_to_go[$ibeg_1] ); 15972 15973 # Do not recombine a '?' if either next line or 15974 # previous line does not start with a ':'. The reasons 15975 # are that (1) no alignment of the ? will be possible 15976 # and (2) the expression is somewhat complex, so the 15977 # '?' is harder to see in the interior of the line. 15978 my $follows_colon = 15979 $ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':'; 15980 my $precedes_colon = 15981 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; 15982 next unless ( $follows_colon || $precedes_colon ); 15983 15984 # we will always combining a ? line following a : line 15985 if ( !$follows_colon ) { 15986 15987 # ...otherwise recombine only if it looks like a chain. 15988 # we will just look at a few nearby lines to see if 15989 # this looks like a chain. 15990 my $local_count = 0; 15991 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) { 15992 $local_count++ 15993 if $ii >= 0 15994 && $types_to_go[$ii] eq ':' 15995 && $levels_to_go[$ii] == $lev; 15996 } 15997 next unless ( $local_count > 1 ); 15998 } 15999 $forced_breakpoint_to_go[$iend_1] = 0; 16000 } 16001 16002 # do not recombine lines with leading '.' 16003 elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) { 16004 my $i_next_nonblank = $ibeg_2 + 1; 16005 if ( $types_to_go[$i_next_nonblank] eq 'b' ) { 16006 $i_next_nonblank++; 16007 } 16008 16009 next 16010 unless ( 16011 16012 # ... unless there is just one and we can reduce 16013 # this to two lines if we do. For example, this 16014 # 16015 # 16016 # $bodyA .= 16017 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' 16018 # 16019 # looks better than this: 16020 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' 16021 # . '$args .= $pat;' 16022 16023 ( 16024 $n == 2 16025 && $n == $nmax 16026 && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] 16027 ) 16028 16029 # ... or this would strand a short quote , like this 16030 # . "some long qoute" 16031 # . "\n"; 16032 || ( $types_to_go[$i_next_nonblank] eq 'Q' 16033 && $i_next_nonblank >= $iend_2 - 1 16034 && length( $tokens_to_go[$i_next_nonblank] ) < 16035 $rOpts_short_concatenation_item_length ) 16036 ); 16037 } 16038 16039 # handle leading keyword.. 16040 elsif ( $types_to_go[$ibeg_2] eq 'k' ) { 16041 16042 # handle leading "or" 16043 if ( $tokens_to_go[$ibeg_2] eq 'or' ) { 16044 next 16045 unless ( 16046 $this_line_is_semicolon_terminated 16047 && ( 16048 16049 # following 'if' or 'unless' or 'or' 16050 $types_to_go[$ibeg_1] eq 'k' 16051 && $is_if_unless{ $tokens_to_go[$ibeg_1] } 16052 16053 # important: only combine a very simple or 16054 # statement because the step below may have 16055 # combined a trailing 'and' with this or, 16056 # and we do not want to then combine 16057 # everything together 16058 && ( $iend_2 - $ibeg_2 <= 7 ) 16059 ) 16060 ); 16061 } 16062 16063 # handle leading 'and' 16064 elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) { 16065 16066 # Decide if we will combine a single terminal 'and' 16067 # after an 'if' or 'unless'. 16068 16069 # This looks best with the 'and' on the same 16070 # line as the 'if': 16071 # 16072 # $a = 1 16073 # if $seconds and $nu < 2; 16074 # 16075 # But this looks better as shown: 16076 # 16077 # $a = 1 16078 # if !$this->{Parents}{$_} 16079 # or $this->{Parents}{$_} eq $_; 16080 # 16081 next 16082 unless ( 16083 $this_line_is_semicolon_terminated 16084 && ( 16085 16086 # following 'if' or 'unless' or 'or' 16087 $types_to_go[$ibeg_1] eq 'k' 16088 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } 16089 || $tokens_to_go[$ibeg_1] eq 'or' ) 16090 ) 16091 ); 16092 } 16093 16094 # handle leading "if" and "unless" 16095 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { 16096 16097 # FIXME: This is still experimental..may not be too useful 16098 next 16099 unless ( 16100 $this_line_is_semicolon_terminated 16101 16102 # previous line begins with 'and' or 'or' 16103 && $types_to_go[$ibeg_1] eq 'k' 16104 && $is_and_or{ $tokens_to_go[$ibeg_1] } 16105 16106 ); 16107 } 16108 16109 # handle all other leading keywords 16110 else { 16111 16112 # keywords look best at start of lines, 16113 # but combine things like "1 while" 16114 unless ( $is_assignment{ $types_to_go[$iend_1] } ) { 16115 next 16116 if ( ( $types_to_go[$iend_1] ne 'k' ) 16117 && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); 16118 } 16119 } 16120 } 16121 16122 # similar treatment of && and || as above for 'and' and 'or': 16123 # NOTE: This block of code is currently bypassed because 16124 # of a previous block but is retained for possible future use. 16125 elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { 16126 16127 # maybe looking at something like: 16128 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i; 16129 16130 next 16131 unless ( 16132 $this_line_is_semicolon_terminated 16133 16134 # previous line begins with an 'if' or 'unless' keyword 16135 && $types_to_go[$ibeg_1] eq 'k' 16136 && $is_if_unless{ $tokens_to_go[$ibeg_1] } 16137 16138 ); 16139 } 16140 16141 # handle leading + - * / 16142 elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) { 16143 my $i_next_nonblank = $ibeg_2 + 1; 16144 if ( $types_to_go[$i_next_nonblank] eq 'b' ) { 16145 $i_next_nonblank++; 16146 } 16147 16148 my $i_next_next = $i_next_nonblank + 1; 16149 $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' ); 16150 16151 my $is_number = ( 16152 $types_to_go[$i_next_nonblank] eq 'n' 16153 && ( $i_next_nonblank >= $iend_2 - 1 16154 || $types_to_go[$i_next_next] eq ';' ) 16155 ); 16156 16157 my $iend_1_nonblank = 16158 $types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1; 16159 my $iend_2_nonblank = 16160 $types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2; 16161 16162 my $is_short_term = 16163 ( $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1] 16164 && $types_to_go[$iend_2_nonblank] =~ /^[in]$/ 16165 && $types_to_go[$iend_1_nonblank] =~ /^[in]$/ 16166 && $iend_2_nonblank <= $ibeg_2 + 2 16167 && length( $tokens_to_go[$iend_2_nonblank] ) < 16168 $rOpts_short_concatenation_item_length ); 16169 16170 # Combine these lines if this line is a single 16171 # number, or if it is a short term with same 16172 # operator as the previous line. For example, in 16173 # the following code we will combine all of the 16174 # short terms $A, $B, $C, $D, $E, $F, together 16175 # instead of leaving them one per line: 16176 # my $time = 16177 # $A * $B * $C * $D * $E * $F * 16178 # ( 2. * $eps * $sigma * $area ) * 16179 # ( 1. / $tcold**3 - 1. / $thot**3 ); 16180 # This can be important in math-intensive code. 16181 next 16182 unless ( 16183 $is_number 16184 || $is_short_term 16185 16186 # or if we can reduce this to two lines if we do. 16187 || ( $n == 2 16188 && $n == $nmax 16189 && $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] ) 16190 ); 16191 } 16192 16193 # handle line with leading = or similar 16194 elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) { 16195 next unless $n == 1; 16196 next 16197 unless ( 16198 16199 # unless we can reduce this to two lines 16200 $nmax == 2 16201 16202 # or three lines, the last with a leading semicolon 16203 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) 16204 16205 # or the next line ends with a here doc 16206 || $types_to_go[$iend_2] eq 'h' 16207 ); 16208 } 16209 16210 #---------------------------------------------------------- 16211 # Section 3: 16212 # Combine the lines if we arrive here and it is possible 16213 #---------------------------------------------------------- 16214 16215 # honor hard breakpoints 16216 next if ( $forced_breakpoint_to_go[$iend_1] > 0 ); 16217 16218 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak; 16219 16220 # combined line cannot be too long 16221 next 16222 if excess_line_length( $ibeg_1, $iend_2 ) > 0; 16223 16224 # do not recombine if we would skip in indentation levels 16225 if ( $n < $nmax ) { 16226 my $if_next = $$ri_beg[ $n + 1 ]; 16227 next 16228 if ( 16229 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] 16230 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] 16231 16232 # but an isolated 'if (' is undesirable 16233 && !( 16234 $n == 1 16235 && $iend_1 - $ibeg_1 <= 2 16236 && $types_to_go[$ibeg_1] eq 'k' 16237 && $tokens_to_go[$ibeg_1] eq 'if' 16238 && $tokens_to_go[$iend_1] ne '(' 16239 ) 16240 ); 16241 } 16242 16243 # honor no-break's 16244 next if ( $bs == NO_BREAK ); 16245 16246 # remember the pair with the greatest bond strength 16247 if ( !$n_best ) { 16248 $n_best = $n; 16249 $bs_best = $bs; 16250 } 16251 else { 16252 16253 if ( $bs > $bs_best ) { 16254 $n_best = $n; 16255 $bs_best = $bs; 16256 } 16257 } 16258 } 16259 16260 # recombine the pair with the greatest bond strength 16261 if ($n_best) { 16262 splice @$ri_beg, $n_best, 1; 16263 splice @$ri_end, $n_best - 1, 1; 16264 16265 # keep going if we are still making progress 16266 $more_to_do++; 16267 } 16268 } 16269 return ( $ri_beg, $ri_end ); 16270 } 16271} # end recombine_breakpoints 16272 16273sub break_all_chain_tokens { 16274 16275 # scan the current breakpoints looking for breaks at certain "chain 16276 # operators" (. : && || + etc) which often occur repeatedly in a long 16277 # statement. If we see a break at any one, break at all similar tokens 16278 # within the same container. 16279 # 16280 my ( $ri_left, $ri_right ) = @_; 16281 16282 my %saw_chain_type; 16283 my %left_chain_type; 16284 my %right_chain_type; 16285 my %interior_chain_type; 16286 my $nmax = @$ri_right - 1; 16287 16288 # scan the left and right end tokens of all lines 16289 my $count = 0; 16290 for my $n ( 0 .. $nmax ) { 16291 my $il = $$ri_left[$n]; 16292 my $ir = $$ri_right[$n]; 16293 my $typel = $types_to_go[$il]; 16294 my $typer = $types_to_go[$ir]; 16295 $typel = '+' if ( $typel eq '-' ); # treat + and - the same 16296 $typer = '+' if ( $typer eq '-' ); 16297 $typel = '*' if ( $typel eq '/' ); # treat * and / the same 16298 $typer = '*' if ( $typer eq '/' ); 16299 my $tokenl = $tokens_to_go[$il]; 16300 my $tokenr = $tokens_to_go[$ir]; 16301 16302 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) { 16303 next if ( $typel eq '?' ); 16304 push @{ $left_chain_type{$typel} }, $il; 16305 $saw_chain_type{$typel} = 1; 16306 $count++; 16307 } 16308 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) { 16309 next if ( $typer eq '?' ); 16310 push @{ $right_chain_type{$typer} }, $ir; 16311 $saw_chain_type{$typer} = 1; 16312 $count++; 16313 } 16314 } 16315 return unless $count; 16316 16317 # now look for any interior tokens of the same types 16318 $count = 0; 16319 for my $n ( 0 .. $nmax ) { 16320 my $il = $$ri_left[$n]; 16321 my $ir = $$ri_right[$n]; 16322 for ( my $i = $il + 1 ; $i < $ir ; $i++ ) { 16323 my $type = $types_to_go[$i]; 16324 $type = '+' if ( $type eq '-' ); 16325 $type = '*' if ( $type eq '/' ); 16326 if ( $saw_chain_type{$type} ) { 16327 push @{ $interior_chain_type{$type} }, $i; 16328 $count++; 16329 } 16330 } 16331 } 16332 return unless $count; 16333 16334 # now make a list of all new break points 16335 my @insert_list; 16336 16337 # loop over all chain types 16338 foreach my $type ( keys %saw_chain_type ) { 16339 16340 # quit if just ONE continuation line with leading . For example-- 16341 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' 16342 # . $contents; 16343 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ ); 16344 16345 # loop over all interior chain tokens 16346 foreach my $itest ( @{ $interior_chain_type{$type} } ) { 16347 16348 # loop over all left end tokens of same type 16349 if ( $left_chain_type{$type} ) { 16350 next if $nobreak_to_go[ $itest - 1 ]; 16351 foreach my $i ( @{ $left_chain_type{$type} } ) { 16352 next unless in_same_container( $i, $itest ); 16353 push @insert_list, $itest - 1; 16354 16355 # Break at matching ? if this : is at a different level. 16356 # For example, the ? before $THRf_DEAD in the following 16357 # should get a break if its : gets a break. 16358 # 16359 # my $flags = 16360 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE 16361 # : ( $_ & 4 ) ? $THRf_R_DETACHED 16362 # : $THRf_R_JOINABLE; 16363 if ( $type eq ':' 16364 && $levels_to_go[$i] != $levels_to_go[$itest] ) 16365 { 16366 my $i_question = $mate_index_to_go[$itest]; 16367 if ( $i_question > 0 ) { 16368 push @insert_list, $i_question - 1; 16369 } 16370 } 16371 last; 16372 } 16373 } 16374 16375 # loop over all right end tokens of same type 16376 if ( $right_chain_type{$type} ) { 16377 next if $nobreak_to_go[$itest]; 16378 foreach my $i ( @{ $right_chain_type{$type} } ) { 16379 next unless in_same_container( $i, $itest ); 16380 push @insert_list, $itest; 16381 16382 # break at matching ? if this : is at a different level 16383 if ( $type eq ':' 16384 && $levels_to_go[$i] != $levels_to_go[$itest] ) 16385 { 16386 my $i_question = $mate_index_to_go[$itest]; 16387 if ( $i_question >= 0 ) { 16388 push @insert_list, $i_question; 16389 } 16390 } 16391 last; 16392 } 16393 } 16394 } 16395 } 16396 16397 # insert any new break points 16398 if (@insert_list) { 16399 insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); 16400 } 16401} 16402 16403sub break_equals { 16404 16405 # Look for assignment operators that could use a breakpoint. 16406 # For example, in the following snippet 16407 # 16408 # $HOME = $ENV{HOME} 16409 # || $ENV{LOGDIR} 16410 # || $pw[7] 16411 # || die "no home directory for user $<"; 16412 # 16413 # we could break at the = to get this, which is a little nicer: 16414 # $HOME = 16415 # $ENV{HOME} 16416 # || $ENV{LOGDIR} 16417 # || $pw[7] 16418 # || die "no home directory for user $<"; 16419 # 16420 # The logic here follows the logic in set_logical_padding, which 16421 # will add the padding in the second line to improve alignment. 16422 # 16423 my ( $ri_left, $ri_right ) = @_; 16424 my $nmax = @$ri_right - 1; 16425 return unless ( $nmax >= 2 ); 16426 16427 # scan the left ends of first two lines 16428 my $tokbeg = ""; 16429 my $depth_beg; 16430 for my $n ( 1 .. 2 ) { 16431 my $il = $$ri_left[$n]; 16432 my $typel = $types_to_go[$il]; 16433 my $tokenl = $tokens_to_go[$il]; 16434 16435 my $has_leading_op = ( $tokenl =~ /^\w/ ) 16436 ? $is_chain_operator{$tokenl} # + - * / : ? && || 16437 : $is_chain_operator{$typel}; # and, or 16438 return unless ($has_leading_op); 16439 if ( $n > 1 ) { 16440 return 16441 unless ( $tokenl eq $tokbeg 16442 && $nesting_depth_to_go[$il] eq $depth_beg ); 16443 } 16444 $tokbeg = $tokenl; 16445 $depth_beg = $nesting_depth_to_go[$il]; 16446 } 16447 16448 # now look for any interior tokens of the same types 16449 my $il = $$ri_left[0]; 16450 my $ir = $$ri_right[0]; 16451 16452 # now make a list of all new break points 16453 my @insert_list; 16454 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) { 16455 my $type = $types_to_go[$i]; 16456 if ( $is_assignment{$type} 16457 && $nesting_depth_to_go[$i] eq $depth_beg ) 16458 { 16459 if ( $want_break_before{$type} ) { 16460 push @insert_list, $i - 1; 16461 } 16462 else { 16463 push @insert_list, $i; 16464 } 16465 } 16466 } 16467 16468 # Break after a 'return' followed by a chain of operators 16469 # return ( $^O !~ /win32|dos/i ) 16470 # && ( $^O ne 'VMS' ) 16471 # && ( $^O ne 'OS2' ) 16472 # && ( $^O ne 'MacOS' ); 16473 # To give: 16474 # return 16475 # ( $^O !~ /win32|dos/i ) 16476 # && ( $^O ne 'VMS' ) 16477 # && ( $^O ne 'OS2' ) 16478 # && ( $^O ne 'MacOS' ); 16479 my $i = 0; 16480 if ( $types_to_go[$i] eq 'k' 16481 && $tokens_to_go[$i] eq 'return' 16482 && $ir > $il 16483 && $nesting_depth_to_go[$i] eq $depth_beg ) 16484 { 16485 push @insert_list, $i; 16486 } 16487 16488 return unless (@insert_list); 16489 16490 # One final check... 16491 # scan second and thrid lines and be sure there are no assignments 16492 # we want to avoid breaking at an = to make something like this: 16493 # unless ( $icon = 16494 # $html_icons{"$type-$state"} 16495 # or $icon = $html_icons{$type} 16496 # or $icon = $html_icons{$state} ) 16497 for my $n ( 1 .. 2 ) { 16498 my $il = $$ri_left[$n]; 16499 my $ir = $$ri_right[$n]; 16500 for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) { 16501 my $type = $types_to_go[$i]; 16502 return 16503 if ( $is_assignment{$type} 16504 && $nesting_depth_to_go[$i] eq $depth_beg ); 16505 } 16506 } 16507 16508 # ok, insert any new break point 16509 if (@insert_list) { 16510 insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); 16511 } 16512} 16513 16514sub insert_final_breaks { 16515 16516 my ( $ri_left, $ri_right ) = @_; 16517 16518 my $nmax = @$ri_right - 1; 16519 16520 # scan the left and right end tokens of all lines 16521 my $count = 0; 16522 my $i_first_colon = -1; 16523 for my $n ( 0 .. $nmax ) { 16524 my $il = $$ri_left[$n]; 16525 my $ir = $$ri_right[$n]; 16526 my $typel = $types_to_go[$il]; 16527 my $typer = $types_to_go[$ir]; 16528 return if ( $typel eq '?' ); 16529 return if ( $typer eq '?' ); 16530 if ( $typel eq ':' ) { $i_first_colon = $il; last; } 16531 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; } 16532 } 16533 16534 # For long ternary chains, 16535 # if the first : we see has its # ? is in the interior 16536 # of a preceding line, then see if there are any good 16537 # breakpoints before the ?. 16538 if ( $i_first_colon > 0 ) { 16539 my $i_question = $mate_index_to_go[$i_first_colon]; 16540 if ( $i_question > 0 ) { 16541 my @insert_list; 16542 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) { 16543 my $token = $tokens_to_go[$ii]; 16544 my $type = $types_to_go[$ii]; 16545 16546 # For now, a good break is either a comma or a 'return'. 16547 if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' ) 16548 && in_same_container( $ii, $i_question ) ) 16549 { 16550 push @insert_list, $ii; 16551 last; 16552 } 16553 } 16554 16555 # insert any new break points 16556 if (@insert_list) { 16557 insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); 16558 } 16559 } 16560 } 16561} 16562 16563sub in_same_container { 16564 16565 # check to see if tokens at i1 and i2 are in the 16566 # same container, and not separated by a comma, ? or : 16567 my ( $i1, $i2 ) = @_; 16568 my $type = $types_to_go[$i1]; 16569 my $depth = $nesting_depth_to_go[$i1]; 16570 return unless ( $nesting_depth_to_go[$i2] == $depth ); 16571 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } 16572 16573 ########################################################### 16574 # This is potentially a very slow routine and not critical. 16575 # For safety just give up for large differences. 16576 # See test file 'infinite_loop.txt' 16577 # TODO: replace this loop with a data structure 16578 ########################################################### 16579 return if ( $i2-$i1 > 200 ); 16580 16581 for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) { 16582 next if ( $nesting_depth_to_go[$i] > $depth ); 16583 return if ( $nesting_depth_to_go[$i] < $depth ); 16584 16585 my $tok = $tokens_to_go[$i]; 16586 $tok = ',' if $tok eq '=>'; # treat => same as , 16587 16588 # Example: we would not want to break at any of these .'s 16589 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>" 16590 if ( $type ne ':' ) { 16591 return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or'; 16592 } 16593 else { 16594 return if ( $tok =~ /^[\,]$/ ); 16595 } 16596 } 16597 return 1; 16598} 16599 16600sub set_continuation_breaks { 16601 16602 # Define an array of indexes for inserting newline characters to 16603 # keep the line lengths below the maximum desired length. There is 16604 # an implied break after the last token, so it need not be included. 16605 16606 # Method: 16607 # This routine is part of series of routines which adjust line 16608 # lengths. It is only called if a statement is longer than the 16609 # maximum line length, or if a preliminary scanning located 16610 # desirable break points. Sub scan_list has already looked at 16611 # these tokens and set breakpoints (in array 16612 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example 16613 # after commas, after opening parens, and before closing parens). 16614 # This routine will honor these breakpoints and also add additional 16615 # breakpoints as necessary to keep the line length below the maximum 16616 # requested. It bases its decision on where the 'bond strength' is 16617 # lowest. 16618 16619 # Output: returns references to the arrays: 16620 # @i_first 16621 # @i_last 16622 # which contain the indexes $i of the first and last tokens on each 16623 # line. 16624 16625 # In addition, the array: 16626 # $forced_breakpoint_to_go[$i] 16627 # may be updated to be =1 for any index $i after which there must be 16628 # a break. This signals later routines not to undo the breakpoint. 16629 16630 my $saw_good_break = shift; 16631 my @i_first = (); # the first index to output 16632 my @i_last = (); # the last index to output 16633 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s 16634 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } 16635 16636 set_bond_strengths(); 16637 16638 my $imin = 0; 16639 my $imax = $max_index_to_go; 16640 if ( $types_to_go[$imin] eq 'b' ) { $imin++ } 16641 if ( $types_to_go[$imax] eq 'b' ) { $imax-- } 16642 my $i_begin = $imin; # index for starting next iteration 16643 16644 my $leading_spaces = leading_spaces_to_go($imin); 16645 my $line_count = 0; 16646 my $last_break_strength = NO_BREAK; 16647 my $i_last_break = -1; 16648 my $max_bias = 0.001; 16649 my $tiny_bias = 0.0001; 16650 my $leading_alignment_token = ""; 16651 my $leading_alignment_type = ""; 16652 16653 # see if any ?/:'s are in order 16654 my $colons_in_order = 1; 16655 my $last_tok = ""; 16656 my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ]; 16657 my $colon_count = @colon_list; 16658 foreach (@colon_list) { 16659 if ( $_ eq $last_tok ) { $colons_in_order = 0; last } 16660 $last_tok = $_; 16661 } 16662 16663 # This is a sufficient but not necessary condition for colon chain 16664 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 ); 16665 16666 #------------------------------------------------------- 16667 # BEGINNING of main loop to set continuation breakpoints 16668 # Keep iterating until we reach the end 16669 #------------------------------------------------------- 16670 while ( $i_begin <= $imax ) { 16671 my $lowest_strength = NO_BREAK; 16672 my $starting_sum = $lengths_to_go[$i_begin]; 16673 my $i_lowest = -1; 16674 my $i_test = -1; 16675 my $lowest_next_token = ''; 16676 my $lowest_next_type = 'b'; 16677 my $i_lowest_next_nonblank = -1; 16678 16679 #------------------------------------------------------- 16680 # BEGINNING of inner loop to find the best next breakpoint 16681 #------------------------------------------------------- 16682 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) { 16683 my $type = $types_to_go[$i_test]; 16684 my $token = $tokens_to_go[$i_test]; 16685 my $next_type = $types_to_go[ $i_test + 1 ]; 16686 my $next_token = $tokens_to_go[ $i_test + 1 ]; 16687 my $i_next_nonblank = 16688 ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 ); 16689 my $next_nonblank_type = $types_to_go[$i_next_nonblank]; 16690 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 16691 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; 16692 my $strength = $bond_strength_to_go[$i_test]; 16693 my $must_break = 0; 16694 16695 # FIXME: TESTING: Might want to be able to break after these 16696 # force an immediate break at certain operators 16697 # with lower level than the start of the line 16698 if ( 16699 ( 16700 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ 16701 || ( $next_nonblank_type eq 'k' 16702 && $next_nonblank_token =~ /^(and|or)$/ ) 16703 ) 16704 && ( $nesting_depth_to_go[$i_begin] > 16705 $nesting_depth_to_go[$i_next_nonblank] ) 16706 ) 16707 { 16708 set_forced_breakpoint($i_next_nonblank); 16709 } 16710 16711 if ( 16712 16713 # Try to put a break where requested by scan_list 16714 $forced_breakpoint_to_go[$i_test] 16715 16716 # break between ) { in a continued line so that the '{' can 16717 # be outdented 16718 # See similar logic in scan_list which catches instances 16719 # where a line is just something like ') {' 16720 || ( $line_count 16721 && ( $token eq ')' ) 16722 && ( $next_nonblank_type eq '{' ) 16723 && ($next_nonblank_block_type) 16724 && !$rOpts->{'opening-brace-always-on-right'} ) 16725 16726 # There is an implied forced break at a terminal opening brace 16727 || ( ( $type eq '{' ) && ( $i_test == $imax ) ) 16728 ) 16729 { 16730 16731 # Forced breakpoints must sometimes be overridden, for example 16732 # because of a side comment causing a NO_BREAK. It is easier 16733 # to catch this here than when they are set. 16734 if ( $strength < NO_BREAK ) { 16735 $strength = $lowest_strength - $tiny_bias; 16736 $must_break = 1; 16737 } 16738 } 16739 16740 # quit if a break here would put a good terminal token on 16741 # the next line and we already have a possible break 16742 if ( 16743 !$must_break 16744 && ( $next_nonblank_type =~ /^[\;\,]$/ ) 16745 && ( 16746 ( 16747 $leading_spaces + 16748 $lengths_to_go[ $i_next_nonblank + 1 ] - 16749 $starting_sum 16750 ) > $rOpts_maximum_line_length 16751 ) 16752 ) 16753 { 16754 last if ( $i_lowest >= 0 ); 16755 } 16756 16757 # Avoid a break which would strand a single punctuation 16758 # token. For example, we do not want to strand a leading 16759 # '.' which is followed by a long quoted string. 16760 if ( 16761 !$must_break 16762 && ( $i_test == $i_begin ) 16763 && ( $i_test < $imax ) 16764 && ( $token eq $type ) 16765 && ( 16766 ( 16767 $leading_spaces + 16768 $lengths_to_go[ $i_test + 1 ] - 16769 $starting_sum 16770 ) <= $rOpts_maximum_line_length 16771 ) 16772 ) 16773 { 16774 $i_test++; 16775 16776 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) { 16777 $i_test++; 16778 } 16779 redo; 16780 } 16781 16782 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) 16783 { 16784 16785 # break at previous best break if it would have produced 16786 # a leading alignment of certain common tokens, and it 16787 # is different from the latest candidate break 16788 last 16789 if ($leading_alignment_type); 16790 16791 # Force at least one breakpoint if old code had good 16792 # break It is only called if a breakpoint is required or 16793 # desired. This will probably need some adjustments 16794 # over time. A goal is to try to be sure that, if a new 16795 # side comment is introduced into formated text, then 16796 # the same breakpoints will occur. scbreak.t 16797 last 16798 if ( 16799 $i_test == $imax # we are at the end 16800 && !$forced_breakpoint_count # 16801 && $saw_good_break # old line had good break 16802 && $type =~ /^[#;\{]$/ # and this line ends in 16803 # ';' or side comment 16804 && $i_last_break < 0 # and we haven't made a break 16805 && $i_lowest > 0 # and we saw a possible break 16806 && $i_lowest < $imax - 1 # (but not just before this ;) 16807 && $strength - $lowest_strength < 0.5 * WEAK # and it's good 16808 ); 16809 16810 $lowest_strength = $strength; 16811 $i_lowest = $i_test; 16812 $lowest_next_token = $next_nonblank_token; 16813 $lowest_next_type = $next_nonblank_type; 16814 $i_lowest_next_nonblank = $i_next_nonblank; 16815 last if $must_break; 16816 16817 # set flags to remember if a break here will produce a 16818 # leading alignment of certain common tokens 16819 if ( $line_count > 0 16820 && $i_test < $imax 16821 && ( $lowest_strength - $last_break_strength <= $max_bias ) 16822 ) 16823 { 16824 my $i_last_end = $i_begin - 1; 16825 if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 } 16826 my $tok_beg = $tokens_to_go[$i_begin]; 16827 my $type_beg = $types_to_go[$i_begin]; 16828 if ( 16829 16830 # check for leading alignment of certain tokens 16831 ( 16832 $tok_beg eq $next_nonblank_token 16833 && $is_chain_operator{$tok_beg} 16834 && ( $type_beg eq 'k' 16835 || $type_beg eq $tok_beg ) 16836 && $nesting_depth_to_go[$i_begin] >= 16837 $nesting_depth_to_go[$i_next_nonblank] 16838 ) 16839 16840 || ( $tokens_to_go[$i_last_end] eq $token 16841 && $is_chain_operator{$token} 16842 && ( $type eq 'k' || $type eq $token ) 16843 && $nesting_depth_to_go[$i_last_end] >= 16844 $nesting_depth_to_go[$i_test] ) 16845 ) 16846 { 16847 $leading_alignment_token = $next_nonblank_token; 16848 $leading_alignment_type = $next_nonblank_type; 16849 } 16850 } 16851 } 16852 16853 my $too_long = 16854 ( $i_test >= $imax ) 16855 ? 1 16856 : ( 16857 ( 16858 $leading_spaces + 16859 $lengths_to_go[ $i_test + 2 ] - 16860 $starting_sum 16861 ) > $rOpts_maximum_line_length 16862 ); 16863 16864 FORMATTER_DEBUG_FLAG_BREAK 16865 && print 16866"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n"; 16867 16868 # allow one extra terminal token after exceeding line length 16869 # if it would strand this token. 16870 if ( $rOpts_fuzzy_line_length 16871 && $too_long 16872 && ( $i_lowest == $i_test ) 16873 && ( length($token) > 1 ) 16874 && ( $next_nonblank_type =~ /^[\;\,]$/ ) ) 16875 { 16876 $too_long = 0; 16877 } 16878 16879 last 16880 if ( 16881 ( $i_test == $imax ) # we're done if no more tokens, 16882 || ( 16883 ( $i_lowest >= 0 ) # or no more space and we have a break 16884 && $too_long 16885 ) 16886 ); 16887 } 16888 16889 #------------------------------------------------------- 16890 # END of inner loop to find the best next breakpoint 16891 # Now decide exactly where to put the breakpoint 16892 #------------------------------------------------------- 16893 16894 # it's always ok to break at imax if no other break was found 16895 if ( $i_lowest < 0 ) { $i_lowest = $imax } 16896 16897 # semi-final index calculation 16898 my $i_next_nonblank = ( 16899 ( $types_to_go[ $i_lowest + 1 ] eq 'b' ) 16900 ? $i_lowest + 2 16901 : $i_lowest + 1 16902 ); 16903 my $next_nonblank_type = $types_to_go[$i_next_nonblank]; 16904 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 16905 16906 #------------------------------------------------------- 16907 # ?/: rule 1 : if a break here will separate a '?' on this 16908 # line from its closing ':', then break at the '?' instead. 16909 #------------------------------------------------------- 16910 my $i; 16911 foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) { 16912 next unless ( $tokens_to_go[$i] eq '?' ); 16913 16914 # do not break if probable sequence of ?/: statements 16915 next if ($is_colon_chain); 16916 16917 # do not break if statement is broken by side comment 16918 next 16919 if ( 16920 $tokens_to_go[$max_index_to_go] eq '#' 16921 && terminal_type( \@types_to_go, \@block_type_to_go, 0, 16922 $max_index_to_go ) !~ /^[\;\}]$/ 16923 ); 16924 16925 # no break needed if matching : is also on the line 16926 next 16927 if ( $mate_index_to_go[$i] >= 0 16928 && $mate_index_to_go[$i] <= $i_next_nonblank ); 16929 16930 $i_lowest = $i; 16931 if ( $want_break_before{'?'} ) { $i_lowest-- } 16932 last; 16933 } 16934 16935 #------------------------------------------------------- 16936 # END of inner loop to find the best next breakpoint: 16937 # Break the line after the token with index i=$i_lowest 16938 #------------------------------------------------------- 16939 16940 # final index calculation 16941 $i_next_nonblank = ( 16942 ( $types_to_go[ $i_lowest + 1 ] eq 'b' ) 16943 ? $i_lowest + 2 16944 : $i_lowest + 1 16945 ); 16946 $next_nonblank_type = $types_to_go[$i_next_nonblank]; 16947 $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; 16948 16949 FORMATTER_DEBUG_FLAG_BREAK 16950 && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n"; 16951 16952 #------------------------------------------------------- 16953 # ?/: rule 2 : if we break at a '?', then break at its ':' 16954 # 16955 # Note: this rule is also in sub scan_list to handle a break 16956 # at the start and end of a line (in case breaks are dictated 16957 # by side comments). 16958 #------------------------------------------------------- 16959 if ( $next_nonblank_type eq '?' ) { 16960 set_closing_breakpoint($i_next_nonblank); 16961 } 16962 elsif ( $types_to_go[$i_lowest] eq '?' ) { 16963 set_closing_breakpoint($i_lowest); 16964 } 16965 16966 #------------------------------------------------------- 16967 # ?/: rule 3 : if we break at a ':' then we save 16968 # its location for further work below. We may need to go 16969 # back and break at its '?'. 16970 #------------------------------------------------------- 16971 if ( $next_nonblank_type eq ':' ) { 16972 push @i_colon_breaks, $i_next_nonblank; 16973 } 16974 elsif ( $types_to_go[$i_lowest] eq ':' ) { 16975 push @i_colon_breaks, $i_lowest; 16976 } 16977 16978 # here we should set breaks for all '?'/':' pairs which are 16979 # separated by this line 16980 16981 $line_count++; 16982 16983 # save this line segment, after trimming blanks at the ends 16984 push( @i_first, 16985 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); 16986 push( @i_last, 16987 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); 16988 16989 # set a forced breakpoint at a container opening, if necessary, to 16990 # signal a break at a closing container. Excepting '(' for now. 16991 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/ 16992 && !$forced_breakpoint_to_go[$i_lowest] ) 16993 { 16994 set_closing_breakpoint($i_lowest); 16995 } 16996 16997 # get ready to go again 16998 $i_begin = $i_lowest + 1; 16999 $last_break_strength = $lowest_strength; 17000 $i_last_break = $i_lowest; 17001 $leading_alignment_token = ""; 17002 $leading_alignment_type = ""; 17003 $lowest_next_token = ''; 17004 $lowest_next_type = 'b'; 17005 17006 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { 17007 $i_begin++; 17008 } 17009 17010 # update indentation size 17011 if ( $i_begin <= $imax ) { 17012 $leading_spaces = leading_spaces_to_go($i_begin); 17013 } 17014 } 17015 17016 #------------------------------------------------------- 17017 # END of main loop to set continuation breakpoints 17018 # Now go back and make any necessary corrections 17019 #------------------------------------------------------- 17020 17021 #------------------------------------------------------- 17022 # ?/: rule 4 -- if we broke at a ':', then break at 17023 # corresponding '?' unless this is a chain of ?: expressions 17024 #------------------------------------------------------- 17025 if (@i_colon_breaks) { 17026 17027 # using a simple method for deciding if we are in a ?/: chain -- 17028 # this is a chain if it has multiple ?/: pairs all in order; 17029 # otherwise not. 17030 # Note that if line starts in a ':' we count that above as a break 17031 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); 17032 17033 unless ($is_chain) { 17034 my @insert_list = (); 17035 foreach (@i_colon_breaks) { 17036 my $i_question = $mate_index_to_go[$_]; 17037 if ( $i_question >= 0 ) { 17038 if ( $want_break_before{'?'} ) { 17039 $i_question--; 17040 if ( $i_question > 0 17041 && $types_to_go[$i_question] eq 'b' ) 17042 { 17043 $i_question--; 17044 } 17045 } 17046 17047 if ( $i_question >= 0 ) { 17048 push @insert_list, $i_question; 17049 } 17050 } 17051 insert_additional_breaks( \@insert_list, \@i_first, \@i_last ); 17052 } 17053 } 17054 } 17055 return ( \@i_first, \@i_last, $colon_count ); 17056} 17057 17058sub insert_additional_breaks { 17059 17060 # this routine will add line breaks at requested locations after 17061 # sub set_continuation_breaks has made preliminary breaks. 17062 17063 my ( $ri_break_list, $ri_first, $ri_last ) = @_; 17064 my $i_f; 17065 my $i_l; 17066 my $line_number = 0; 17067 my $i_break_left; 17068 foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) { 17069 17070 $i_f = $$ri_first[$line_number]; 17071 $i_l = $$ri_last[$line_number]; 17072 while ( $i_break_left >= $i_l ) { 17073 $line_number++; 17074 17075 # shouldn't happen unless caller passes bad indexes 17076 if ( $line_number >= @$ri_last ) { 17077 warning( 17078"Non-fatal program bug: couldn't set break at $i_break_left\n" 17079 ); 17080 report_definite_bug(); 17081 return; 17082 } 17083 $i_f = $$ri_first[$line_number]; 17084 $i_l = $$ri_last[$line_number]; 17085 } 17086 17087 my $i_break_right = $i_break_left + 1; 17088 if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ } 17089 17090 if ( $i_break_left >= $i_f 17091 && $i_break_left < $i_l 17092 && $i_break_right > $i_f 17093 && $i_break_right <= $i_l ) 17094 { 17095 splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) ); 17096 splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) ); 17097 } 17098 } 17099} 17100 17101sub set_closing_breakpoint { 17102 17103 # set a breakpoint at a matching closing token 17104 # at present, this is only used to break at a ':' which matches a '?' 17105 my $i_break = shift; 17106 17107 if ( $mate_index_to_go[$i_break] >= 0 ) { 17108 17109 # CAUTION: infinite recursion possible here: 17110 # set_closing_breakpoint calls set_forced_breakpoint, and 17111 # set_forced_breakpoint call set_closing_breakpoint 17112 # ( test files attrib.t, BasicLyx.pm.html). 17113 # Don't reduce the '2' in the statement below 17114 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { 17115 17116 # break before } ] and ), but sub set_forced_breakpoint will decide 17117 # to break before or after a ? and : 17118 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; 17119 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc ); 17120 } 17121 } 17122 else { 17123 my $type_sequence = $type_sequence_to_go[$i_break]; 17124 if ($type_sequence) { 17125 my $closing_token = $matching_token{ $tokens_to_go[$i_break] }; 17126 $postponed_breakpoint{$type_sequence} = 1; 17127 } 17128 } 17129} 17130 17131# check to see if output line tabbing agrees with input line 17132# this can be very useful for debugging a script which has an extra 17133# or missing brace 17134sub compare_indentation_levels { 17135 17136 my ( $python_indentation_level, $structural_indentation_level ) = @_; 17137 if ( ( $python_indentation_level ne $structural_indentation_level ) ) { 17138 $last_tabbing_disagreement = $input_line_number; 17139 17140 if ($in_tabbing_disagreement) { 17141 } 17142 else { 17143 $tabbing_disagreement_count++; 17144 17145 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { 17146 write_logfile_entry( 17147"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n" 17148 ); 17149 } 17150 $in_tabbing_disagreement = $input_line_number; 17151 $first_tabbing_disagreement = $in_tabbing_disagreement 17152 unless ($first_tabbing_disagreement); 17153 } 17154 } 17155 else { 17156 17157 if ($in_tabbing_disagreement) { 17158 17159 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { 17160 write_logfile_entry( 17161"End indentation disagreement from input line $in_tabbing_disagreement\n" 17162 ); 17163 17164 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) { 17165 write_logfile_entry( 17166 "No further tabbing disagreements will be noted\n"); 17167 } 17168 } 17169 $in_tabbing_disagreement = 0; 17170 } 17171 } 17172} 17173 17174##################################################################### 17175# 17176# the Perl::Tidy::IndentationItem class supplies items which contain 17177# how much whitespace should be used at the start of a line 17178# 17179##################################################################### 17180 17181package Perl::Tidy::IndentationItem; 17182 17183# Indexes for indentation items 17184use constant SPACES => 0; # total leading white spaces 17185use constant LEVEL => 1; # the indentation 'level' 17186use constant CI_LEVEL => 2; # the 'continuation level' 17187use constant AVAILABLE_SPACES => 3; # how many left spaces available 17188 # for this level 17189use constant CLOSED => 4; # index where we saw closing '}' 17190use constant COMMA_COUNT => 5; # how many commas at this level? 17191use constant SEQUENCE_NUMBER => 6; # output batch number 17192use constant INDEX => 7; # index in output batch list 17193use constant HAVE_CHILD => 8; # any dependents? 17194use constant RECOVERABLE_SPACES => 9; # how many spaces to the right 17195 # we would like to move to get 17196 # alignment (negative if left) 17197use constant ALIGN_PAREN => 10; # do we want to try to align 17198 # with an opening structure? 17199use constant MARKED => 11; # if visited by corrector logic 17200use constant STACK_DEPTH => 12; # indentation nesting depth 17201use constant STARTING_INDEX => 13; # first token index of this level 17202use constant ARROW_COUNT => 14; # how many =>'s 17203 17204sub new { 17205 17206 # Create an 'indentation_item' which describes one level of leading 17207 # whitespace when the '-lp' indentation is used. We return 17208 # a reference to an anonymous array of associated variables. 17209 # See above constants for storage scheme. 17210 my ( 17211 $class, $spaces, $level, 17212 $ci_level, $available_spaces, $index, 17213 $gnu_sequence_number, $align_paren, $stack_depth, 17214 $starting_index, 17215 ) = @_; 17216 my $closed = -1; 17217 my $arrow_count = 0; 17218 my $comma_count = 0; 17219 my $have_child = 0; 17220 my $want_right_spaces = 0; 17221 my $marked = 0; 17222 bless [ 17223 $spaces, $level, $ci_level, 17224 $available_spaces, $closed, $comma_count, 17225 $gnu_sequence_number, $index, $have_child, 17226 $want_right_spaces, $align_paren, $marked, 17227 $stack_depth, $starting_index, $arrow_count, 17228 ], $class; 17229} 17230 17231sub permanently_decrease_AVAILABLE_SPACES { 17232 17233 # make a permanent reduction in the available indentation spaces 17234 # at one indentation item. NOTE: if there are child nodes, their 17235 # total SPACES must be reduced by the caller. 17236 17237 my ( $item, $spaces_needed ) = @_; 17238 my $available_spaces = $item->get_AVAILABLE_SPACES(); 17239 my $deleted_spaces = 17240 ( $available_spaces > $spaces_needed ) 17241 ? $spaces_needed 17242 : $available_spaces; 17243 $item->decrease_AVAILABLE_SPACES($deleted_spaces); 17244 $item->decrease_SPACES($deleted_spaces); 17245 $item->set_RECOVERABLE_SPACES(0); 17246 17247 return $deleted_spaces; 17248} 17249 17250sub tentatively_decrease_AVAILABLE_SPACES { 17251 17252 # We are asked to tentatively delete $spaces_needed of indentation 17253 # for a indentation item. We may want to undo this later. NOTE: if 17254 # there are child nodes, their total SPACES must be reduced by the 17255 # caller. 17256 my ( $item, $spaces_needed ) = @_; 17257 my $available_spaces = $item->get_AVAILABLE_SPACES(); 17258 my $deleted_spaces = 17259 ( $available_spaces > $spaces_needed ) 17260 ? $spaces_needed 17261 : $available_spaces; 17262 $item->decrease_AVAILABLE_SPACES($deleted_spaces); 17263 $item->decrease_SPACES($deleted_spaces); 17264 $item->increase_RECOVERABLE_SPACES($deleted_spaces); 17265 return $deleted_spaces; 17266} 17267 17268sub get_STACK_DEPTH { 17269 my $self = shift; 17270 return $self->[STACK_DEPTH]; 17271} 17272 17273sub get_SPACES { 17274 my $self = shift; 17275 return $self->[SPACES]; 17276} 17277 17278sub get_MARKED { 17279 my $self = shift; 17280 return $self->[MARKED]; 17281} 17282 17283sub set_MARKED { 17284 my ( $self, $value ) = @_; 17285 if ( defined($value) ) { 17286 $self->[MARKED] = $value; 17287 } 17288 return $self->[MARKED]; 17289} 17290 17291sub get_AVAILABLE_SPACES { 17292 my $self = shift; 17293 return $self->[AVAILABLE_SPACES]; 17294} 17295 17296sub decrease_SPACES { 17297 my ( $self, $value ) = @_; 17298 if ( defined($value) ) { 17299 $self->[SPACES] -= $value; 17300 } 17301 return $self->[SPACES]; 17302} 17303 17304sub decrease_AVAILABLE_SPACES { 17305 my ( $self, $value ) = @_; 17306 if ( defined($value) ) { 17307 $self->[AVAILABLE_SPACES] -= $value; 17308 } 17309 return $self->[AVAILABLE_SPACES]; 17310} 17311 17312sub get_ALIGN_PAREN { 17313 my $self = shift; 17314 return $self->[ALIGN_PAREN]; 17315} 17316 17317sub get_RECOVERABLE_SPACES { 17318 my $self = shift; 17319 return $self->[RECOVERABLE_SPACES]; 17320} 17321 17322sub set_RECOVERABLE_SPACES { 17323 my ( $self, $value ) = @_; 17324 if ( defined($value) ) { 17325 $self->[RECOVERABLE_SPACES] = $value; 17326 } 17327 return $self->[RECOVERABLE_SPACES]; 17328} 17329 17330sub increase_RECOVERABLE_SPACES { 17331 my ( $self, $value ) = @_; 17332 if ( defined($value) ) { 17333 $self->[RECOVERABLE_SPACES] += $value; 17334 } 17335 return $self->[RECOVERABLE_SPACES]; 17336} 17337 17338sub get_CI_LEVEL { 17339 my $self = shift; 17340 return $self->[CI_LEVEL]; 17341} 17342 17343sub get_LEVEL { 17344 my $self = shift; 17345 return $self->[LEVEL]; 17346} 17347 17348sub get_SEQUENCE_NUMBER { 17349 my $self = shift; 17350 return $self->[SEQUENCE_NUMBER]; 17351} 17352 17353sub get_INDEX { 17354 my $self = shift; 17355 return $self->[INDEX]; 17356} 17357 17358sub get_STARTING_INDEX { 17359 my $self = shift; 17360 return $self->[STARTING_INDEX]; 17361} 17362 17363sub set_HAVE_CHILD { 17364 my ( $self, $value ) = @_; 17365 if ( defined($value) ) { 17366 $self->[HAVE_CHILD] = $value; 17367 } 17368 return $self->[HAVE_CHILD]; 17369} 17370 17371sub get_HAVE_CHILD { 17372 my $self = shift; 17373 return $self->[HAVE_CHILD]; 17374} 17375 17376sub set_ARROW_COUNT { 17377 my ( $self, $value ) = @_; 17378 if ( defined($value) ) { 17379 $self->[ARROW_COUNT] = $value; 17380 } 17381 return $self->[ARROW_COUNT]; 17382} 17383 17384sub get_ARROW_COUNT { 17385 my $self = shift; 17386 return $self->[ARROW_COUNT]; 17387} 17388 17389sub set_COMMA_COUNT { 17390 my ( $self, $value ) = @_; 17391 if ( defined($value) ) { 17392 $self->[COMMA_COUNT] = $value; 17393 } 17394 return $self->[COMMA_COUNT]; 17395} 17396 17397sub get_COMMA_COUNT { 17398 my $self = shift; 17399 return $self->[COMMA_COUNT]; 17400} 17401 17402sub set_CLOSED { 17403 my ( $self, $value ) = @_; 17404 if ( defined($value) ) { 17405 $self->[CLOSED] = $value; 17406 } 17407 return $self->[CLOSED]; 17408} 17409 17410sub get_CLOSED { 17411 my $self = shift; 17412 return $self->[CLOSED]; 17413} 17414 17415##################################################################### 17416# 17417# the Perl::Tidy::VerticalAligner::Line class supplies an object to 17418# contain a single output line 17419# 17420##################################################################### 17421 17422package Perl::Tidy::VerticalAligner::Line; 17423 17424{ 17425 17426 use strict; 17427 use Carp; 17428 17429 use constant JMAX => 0; 17430 use constant JMAX_ORIGINAL_LINE => 1; 17431 use constant RTOKENS => 2; 17432 use constant RFIELDS => 3; 17433 use constant RPATTERNS => 4; 17434 use constant INDENTATION => 5; 17435 use constant LEADING_SPACE_COUNT => 6; 17436 use constant OUTDENT_LONG_LINES => 7; 17437 use constant LIST_TYPE => 8; 17438 use constant IS_HANGING_SIDE_COMMENT => 9; 17439 use constant RALIGNMENTS => 10; 17440 use constant MAXIMUM_LINE_LENGTH => 11; 17441 use constant RVERTICAL_TIGHTNESS_FLAGS => 12; 17442 17443 my %_index_map; 17444 $_index_map{jmax} = JMAX; 17445 $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE; 17446 $_index_map{rtokens} = RTOKENS; 17447 $_index_map{rfields} = RFIELDS; 17448 $_index_map{rpatterns} = RPATTERNS; 17449 $_index_map{indentation} = INDENTATION; 17450 $_index_map{leading_space_count} = LEADING_SPACE_COUNT; 17451 $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES; 17452 $_index_map{list_type} = LIST_TYPE; 17453 $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT; 17454 $_index_map{ralignments} = RALIGNMENTS; 17455 $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH; 17456 $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS; 17457 17458 my @_default_data = (); 17459 $_default_data[JMAX] = undef; 17460 $_default_data[JMAX_ORIGINAL_LINE] = undef; 17461 $_default_data[RTOKENS] = undef; 17462 $_default_data[RFIELDS] = undef; 17463 $_default_data[RPATTERNS] = undef; 17464 $_default_data[INDENTATION] = undef; 17465 $_default_data[LEADING_SPACE_COUNT] = undef; 17466 $_default_data[OUTDENT_LONG_LINES] = undef; 17467 $_default_data[LIST_TYPE] = undef; 17468 $_default_data[IS_HANGING_SIDE_COMMENT] = undef; 17469 $_default_data[RALIGNMENTS] = []; 17470 $_default_data[MAXIMUM_LINE_LENGTH] = undef; 17471 $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef; 17472 17473 { 17474 17475 # methods to count object population 17476 my $_count = 0; 17477 sub get_count { $_count; } 17478 sub _increment_count { ++$_count } 17479 sub _decrement_count { --$_count } 17480 } 17481 17482 # Constructor may be called as a class method 17483 sub new { 17484 my ( $caller, %arg ) = @_; 17485 my $caller_is_obj = ref($caller); 17486 my $class = $caller_is_obj || $caller; 17487 no strict "refs"; 17488 my $self = bless [], $class; 17489 17490 $self->[RALIGNMENTS] = []; 17491 17492 my $index; 17493 foreach ( keys %_index_map ) { 17494 $index = $_index_map{$_}; 17495 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } 17496 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } 17497 else { $self->[$index] = $_default_data[$index] } 17498 } 17499 17500 $self->_increment_count(); 17501 return $self; 17502 } 17503 17504 sub DESTROY { 17505 $_[0]->_decrement_count(); 17506 } 17507 17508 sub get_jmax { $_[0]->[JMAX] } 17509 sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] } 17510 sub get_rtokens { $_[0]->[RTOKENS] } 17511 sub get_rfields { $_[0]->[RFIELDS] } 17512 sub get_rpatterns { $_[0]->[RPATTERNS] } 17513 sub get_indentation { $_[0]->[INDENTATION] } 17514 sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] } 17515 sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] } 17516 sub get_list_type { $_[0]->[LIST_TYPE] } 17517 sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] } 17518 sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] } 17519 17520 sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) } 17521 sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] } 17522 sub get_alignments { @{ $_[0]->[RALIGNMENTS] } } 17523 sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() } 17524 17525 sub get_starting_column { 17526 $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column(); 17527 } 17528 17529 sub increment_column { 17530 $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] ); 17531 } 17532 sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; } 17533 17534 sub current_field_width { 17535 my $self = shift; 17536 my ($j) = @_; 17537 if ( $j == 0 ) { 17538 return $self->get_column($j); 17539 } 17540 else { 17541 return $self->get_column($j) - $self->get_column( $j - 1 ); 17542 } 17543 } 17544 17545 sub field_width_growth { 17546 my $self = shift; 17547 my $j = shift; 17548 return $self->get_column($j) - $self->get_starting_column($j); 17549 } 17550 17551 sub starting_field_width { 17552 my $self = shift; 17553 my $j = shift; 17554 if ( $j == 0 ) { 17555 return $self->get_starting_column($j); 17556 } 17557 else { 17558 return $self->get_starting_column($j) - 17559 $self->get_starting_column( $j - 1 ); 17560 } 17561 } 17562 17563 sub increase_field_width { 17564 17565 my $self = shift; 17566 my ( $j, $pad ) = @_; 17567 my $jmax = $self->get_jmax(); 17568 for my $k ( $j .. $jmax ) { 17569 $self->increment_column( $k, $pad ); 17570 } 17571 } 17572 17573 sub get_available_space_on_right { 17574 my $self = shift; 17575 my $jmax = $self->get_jmax(); 17576 return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax); 17577 } 17578 17579 sub set_jmax { $_[0]->[JMAX] = $_[1] } 17580 sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] } 17581 sub set_rtokens { $_[0]->[RTOKENS] = $_[1] } 17582 sub set_rfields { $_[0]->[RFIELDS] = $_[1] } 17583 sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] } 17584 sub set_indentation { $_[0]->[INDENTATION] = $_[1] } 17585 sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] } 17586 sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] } 17587 sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] } 17588 sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] } 17589 sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] } 17590 17591} 17592 17593##################################################################### 17594# 17595# the Perl::Tidy::VerticalAligner::Alignment class holds information 17596# on a single column being aligned 17597# 17598##################################################################### 17599package Perl::Tidy::VerticalAligner::Alignment; 17600 17601{ 17602 17603 use strict; 17604 17605 #use Carp; 17606 17607 # Symbolic array indexes 17608 use constant COLUMN => 0; # the current column number 17609 use constant STARTING_COLUMN => 1; # column number when created 17610 use constant MATCHING_TOKEN => 2; # what token we are matching 17611 use constant STARTING_LINE => 3; # the line index of creation 17612 use constant ENDING_LINE => 4; # the most recent line to use it 17613 use constant SAVED_COLUMN => 5; # the most recent line to use it 17614 use constant SERIAL_NUMBER => 6; # unique number for this alignment 17615 # (just its index in an array) 17616 17617 # Correspondence between variables and array indexes 17618 my %_index_map; 17619 $_index_map{column} = COLUMN; 17620 $_index_map{starting_column} = STARTING_COLUMN; 17621 $_index_map{matching_token} = MATCHING_TOKEN; 17622 $_index_map{starting_line} = STARTING_LINE; 17623 $_index_map{ending_line} = ENDING_LINE; 17624 $_index_map{saved_column} = SAVED_COLUMN; 17625 $_index_map{serial_number} = SERIAL_NUMBER; 17626 17627 my @_default_data = (); 17628 $_default_data[COLUMN] = undef; 17629 $_default_data[STARTING_COLUMN] = undef; 17630 $_default_data[MATCHING_TOKEN] = undef; 17631 $_default_data[STARTING_LINE] = undef; 17632 $_default_data[ENDING_LINE] = undef; 17633 $_default_data[SAVED_COLUMN] = undef; 17634 $_default_data[SERIAL_NUMBER] = undef; 17635 17636 # class population count 17637 { 17638 my $_count = 0; 17639 sub get_count { $_count; } 17640 sub _increment_count { ++$_count } 17641 sub _decrement_count { --$_count } 17642 } 17643 17644 # constructor 17645 sub new { 17646 my ( $caller, %arg ) = @_; 17647 my $caller_is_obj = ref($caller); 17648 my $class = $caller_is_obj || $caller; 17649 no strict "refs"; 17650 my $self = bless [], $class; 17651 17652 foreach ( keys %_index_map ) { 17653 my $index = $_index_map{$_}; 17654 if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} } 17655 elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] } 17656 else { $self->[$index] = $_default_data[$index] } 17657 } 17658 $self->_increment_count(); 17659 return $self; 17660 } 17661 17662 sub DESTROY { 17663 $_[0]->_decrement_count(); 17664 } 17665 17666 sub get_column { return $_[0]->[COLUMN] } 17667 sub get_starting_column { return $_[0]->[STARTING_COLUMN] } 17668 sub get_matching_token { return $_[0]->[MATCHING_TOKEN] } 17669 sub get_starting_line { return $_[0]->[STARTING_LINE] } 17670 sub get_ending_line { return $_[0]->[ENDING_LINE] } 17671 sub get_serial_number { return $_[0]->[SERIAL_NUMBER] } 17672 17673 sub set_column { $_[0]->[COLUMN] = $_[1] } 17674 sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] } 17675 sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] } 17676 sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] } 17677 sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] } 17678 sub increment_column { $_[0]->[COLUMN] += $_[1] } 17679 17680 sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] } 17681 sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] } 17682 17683} 17684 17685package Perl::Tidy::VerticalAligner; 17686 17687# The Perl::Tidy::VerticalAligner package collects output lines and 17688# attempts to line up certain common tokens, such as => and #, which are 17689# identified by the calling routine. 17690# 17691# There are two main routines: append_line and flush. Append acts as a 17692# storage buffer, collecting lines into a group which can be vertically 17693# aligned. When alignment is no longer possible or desirable, it dumps 17694# the group to flush. 17695# 17696# append_line -----> flush 17697# 17698# collects writes 17699# vertical one 17700# groups group 17701 17702BEGIN { 17703 17704 # Caution: these debug flags produce a lot of output 17705 # They should all be 0 except when debugging small scripts 17706 17707 use constant VALIGN_DEBUG_FLAG_APPEND => 0; 17708 use constant VALIGN_DEBUG_FLAG_APPEND0 => 0; 17709 use constant VALIGN_DEBUG_FLAG_TERNARY => 0; 17710 17711 my $debug_warning = sub { 17712 print "VALIGN_DEBUGGING with key $_[0]\n"; 17713 }; 17714 17715 VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND'); 17716 VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0'); 17717 17718} 17719 17720use vars qw( 17721 $vertical_aligner_self 17722 $current_line 17723 $maximum_alignment_index 17724 $ralignment_list 17725 $maximum_jmax_seen 17726 $minimum_jmax_seen 17727 $previous_minimum_jmax_seen 17728 $previous_maximum_jmax_seen 17729 $maximum_line_index 17730 $group_level 17731 $group_type 17732 $group_maximum_gap 17733 $marginal_match 17734 $last_group_level_written 17735 $last_leading_space_count 17736 $extra_indent_ok 17737 $zero_count 17738 @group_lines 17739 $last_comment_column 17740 $last_side_comment_line_number 17741 $last_side_comment_length 17742 $last_side_comment_level 17743 $outdented_line_count 17744 $first_outdented_line_at 17745 $last_outdented_line_at 17746 $diagnostics_object 17747 $logger_object 17748 $file_writer_object 17749 @side_comment_history 17750 $comment_leading_space_count 17751 $is_matching_terminal_line 17752 17753 $cached_line_text 17754 $cached_line_type 17755 $cached_line_flag 17756 $cached_seqno 17757 $cached_line_valid 17758 $cached_line_leading_space_count 17759 $cached_seqno_string 17760 17761 $seqno_string 17762 $last_nonblank_seqno_string 17763 17764 $rOpts 17765 17766 $rOpts_maximum_line_length 17767 $rOpts_continuation_indentation 17768 $rOpts_indent_columns 17769 $rOpts_tabs 17770 $rOpts_entab_leading_whitespace 17771 $rOpts_valign 17772 17773 $rOpts_fixed_position_side_comment 17774 $rOpts_minimum_space_to_comment 17775 17776); 17777 17778sub initialize { 17779 17780 my $class; 17781 17782 ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object ) 17783 = @_; 17784 17785 # variables describing the entire space group: 17786 $ralignment_list = []; 17787 $group_level = 0; 17788 $last_group_level_written = -1; 17789 $extra_indent_ok = 0; # can we move all lines to the right? 17790 $last_side_comment_length = 0; 17791 $maximum_jmax_seen = 0; 17792 $minimum_jmax_seen = 0; 17793 $previous_minimum_jmax_seen = 0; 17794 $previous_maximum_jmax_seen = 0; 17795 17796 # variables describing each line of the group 17797 @group_lines = (); # list of all lines in group 17798 17799 $outdented_line_count = 0; 17800 $first_outdented_line_at = 0; 17801 $last_outdented_line_at = 0; 17802 $last_side_comment_line_number = 0; 17803 $last_side_comment_level = -1; 17804 $is_matching_terminal_line = 0; 17805 17806 # most recent 3 side comments; [ line number, column ] 17807 $side_comment_history[0] = [ -300, 0 ]; 17808 $side_comment_history[1] = [ -200, 0 ]; 17809 $side_comment_history[2] = [ -100, 0 ]; 17810 17811 # write_leader_and_string cache: 17812 $cached_line_text = ""; 17813 $cached_line_type = 0; 17814 $cached_line_flag = 0; 17815 $cached_seqno = 0; 17816 $cached_line_valid = 0; 17817 $cached_line_leading_space_count = 0; 17818 $cached_seqno_string = ""; 17819 17820 # string of sequence numbers joined together 17821 $seqno_string = ""; 17822 $last_nonblank_seqno_string = ""; 17823 17824 # frequently used parameters 17825 $rOpts_indent_columns = $rOpts->{'indent-columns'}; 17826 $rOpts_tabs = $rOpts->{'tabs'}; 17827 $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'}; 17828 $rOpts_fixed_position_side_comment = 17829 $rOpts->{'fixed-position-side-comment'}; 17830 $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; 17831 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; 17832 $rOpts_valign = $rOpts->{'valign'}; 17833 17834 forget_side_comment(); 17835 17836 initialize_for_new_group(); 17837 17838 $vertical_aligner_self = {}; 17839 bless $vertical_aligner_self, $class; 17840 return $vertical_aligner_self; 17841} 17842 17843sub initialize_for_new_group { 17844 $maximum_line_index = -1; # lines in the current group 17845 $maximum_alignment_index = -1; # alignments in current group 17846 $zero_count = 0; # count consecutive lines without tokens 17847 $current_line = undef; # line being matched for alignment 17848 $group_maximum_gap = 0; # largest gap introduced 17849 $group_type = ""; 17850 $marginal_match = 0; 17851 $comment_leading_space_count = 0; 17852 $last_leading_space_count = 0; 17853} 17854 17855# interface to Perl::Tidy::Diagnostics routines 17856sub write_diagnostics { 17857 if ($diagnostics_object) { 17858 $diagnostics_object->write_diagnostics(@_); 17859 } 17860} 17861 17862# interface to Perl::Tidy::Logger routines 17863sub warning { 17864 if ($logger_object) { 17865 $logger_object->warning(@_); 17866 } 17867} 17868 17869sub write_logfile_entry { 17870 if ($logger_object) { 17871 $logger_object->write_logfile_entry(@_); 17872 } 17873} 17874 17875sub report_definite_bug { 17876 if ($logger_object) { 17877 $logger_object->report_definite_bug(); 17878 } 17879} 17880 17881sub get_SPACES { 17882 17883 # return the number of leading spaces associated with an indentation 17884 # variable $indentation is either a constant number of spaces or an 17885 # object with a get_SPACES method. 17886 my $indentation = shift; 17887 return ref($indentation) ? $indentation->get_SPACES() : $indentation; 17888} 17889 17890sub get_RECOVERABLE_SPACES { 17891 17892 # return the number of spaces (+ means shift right, - means shift left) 17893 # that we would like to shift a group of lines with the same indentation 17894 # to get them to line up with their opening parens 17895 my $indentation = shift; 17896 return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; 17897} 17898 17899sub get_STACK_DEPTH { 17900 17901 my $indentation = shift; 17902 return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0; 17903} 17904 17905sub make_alignment { 17906 my ( $col, $token ) = @_; 17907 17908 # make one new alignment at column $col which aligns token $token 17909 ++$maximum_alignment_index; 17910 my $alignment = new Perl::Tidy::VerticalAligner::Alignment( 17911 column => $col, 17912 starting_column => $col, 17913 matching_token => $token, 17914 starting_line => $maximum_line_index, 17915 ending_line => $maximum_line_index, 17916 serial_number => $maximum_alignment_index, 17917 ); 17918 $ralignment_list->[$maximum_alignment_index] = $alignment; 17919 return $alignment; 17920} 17921 17922sub dump_alignments { 17923 print 17924"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n"; 17925 for my $i ( 0 .. $maximum_alignment_index ) { 17926 my $column = $ralignment_list->[$i]->get_column(); 17927 my $starting_column = $ralignment_list->[$i]->get_starting_column(); 17928 my $matching_token = $ralignment_list->[$i]->get_matching_token(); 17929 my $starting_line = $ralignment_list->[$i]->get_starting_line(); 17930 my $ending_line = $ralignment_list->[$i]->get_ending_line(); 17931 print 17932"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n"; 17933 } 17934} 17935 17936sub save_alignment_columns { 17937 for my $i ( 0 .. $maximum_alignment_index ) { 17938 $ralignment_list->[$i]->save_column(); 17939 } 17940} 17941 17942sub restore_alignment_columns { 17943 for my $i ( 0 .. $maximum_alignment_index ) { 17944 $ralignment_list->[$i]->restore_column(); 17945 } 17946} 17947 17948sub forget_side_comment { 17949 $last_comment_column = 0; 17950} 17951 17952sub append_line { 17953 17954 # sub append is called to place one line in the current vertical group. 17955 # 17956 # The input parameters are: 17957 # $level = indentation level of this line 17958 # $rfields = reference to array of fields 17959 # $rpatterns = reference to array of patterns, one per field 17960 # $rtokens = reference to array of tokens starting fields 1,2,.. 17961 # 17962 # Here is an example of what this package does. In this example, 17963 # we are trying to line up both the '=>' and the '#'. 17964 # 17965 # '18' => 'grave', # \` 17966 # '19' => 'acute', # `' 17967 # '20' => 'caron', # \v 17968 # <-tabs-><f1-><--field 2 ---><-f3-> 17969 # | | | | 17970 # | | | | 17971 # col1 col2 col3 col4 17972 # 17973 # The calling routine has already broken the entire line into 3 fields as 17974 # indicated. (So the work of identifying promising common tokens has 17975 # already been done). 17976 # 17977 # In this example, there will be 2 tokens being matched: '=>' and '#'. 17978 # They are the leading parts of fields 2 and 3, but we do need to know 17979 # what they are so that we can dump a group of lines when these tokens 17980 # change. 17981 # 17982 # The fields contain the actual characters of each field. The patterns 17983 # are like the fields, but they contain mainly token types instead 17984 # of tokens, so they have fewer characters. They are used to be 17985 # sure we are matching fields of similar type. 17986 # 17987 # In this example, there will be 4 column indexes being adjusted. The 17988 # first one is always at zero. The interior columns are at the start of 17989 # the matching tokens, and the last one tracks the maximum line length. 17990 # 17991 # Basically, each time a new line comes in, it joins the current vertical 17992 # group if possible. Otherwise it causes the current group to be dumped 17993 # and a new group is started. 17994 # 17995 # For each new group member, the column locations are increased, as 17996 # necessary, to make room for the new fields. When the group is finally 17997 # output, these column numbers are used to compute the amount of spaces of 17998 # padding needed for each field. 17999 # 18000 # Programming note: the fields are assumed not to have any tab characters. 18001 # Tabs have been previously removed except for tabs in quoted strings and 18002 # side comments. Tabs in these fields can mess up the column counting. 18003 # The log file warns the user if there are any such tabs. 18004 18005 my ( 18006 $level, $level_end, 18007 $indentation, $rfields, 18008 $rtokens, $rpatterns, 18009 $is_forced_break, $outdent_long_lines, 18010 $is_terminal_ternary, $is_terminal_statement, 18011 $do_not_pad, $rvertical_tightness_flags, 18012 $level_jump, 18013 ) = @_; 18014 18015 # number of fields is $jmax 18016 # number of tokens between fields is $jmax-1 18017 my $jmax = $#{$rfields}; 18018 18019 my $leading_space_count = get_SPACES($indentation); 18020 18021 # set outdented flag to be sure we either align within statements or 18022 # across statement boundaries, but not both. 18023 my $is_outdented = $last_leading_space_count > $leading_space_count; 18024 $last_leading_space_count = $leading_space_count; 18025 18026 # Patch: undo for hanging side comment 18027 my $is_hanging_side_comment = 18028 ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ ); 18029 $is_outdented = 0 if $is_hanging_side_comment; 18030 18031 VALIGN_DEBUG_FLAG_APPEND0 && do { 18032 print 18033"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n"; 18034 }; 18035 18036 # Validate cached line if necessary: If we can produce a container 18037 # with just 2 lines total by combining an existing cached opening 18038 # token with the closing token to follow, then we will mark both 18039 # cached flags as valid. 18040 if ($rvertical_tightness_flags) { 18041 if ( $maximum_line_index <= 0 18042 && $cached_line_type 18043 && $cached_seqno 18044 && $rvertical_tightness_flags->[2] 18045 && $rvertical_tightness_flags->[2] == $cached_seqno ) 18046 { 18047 $rvertical_tightness_flags->[3] ||= 1; 18048 $cached_line_valid ||= 1; 18049 } 18050 } 18051 18052 # do not join an opening block brace with an unbalanced line 18053 # unless requested with a flag value of 2 18054 if ( $cached_line_type == 3 18055 && $maximum_line_index < 0 18056 && $cached_line_flag < 2 18057 && $level_jump != 0 ) 18058 { 18059 $cached_line_valid = 0; 18060 } 18061 18062 # patch until new aligner is finished 18063 if ($do_not_pad) { my_flush() } 18064 18065 # shouldn't happen: 18066 if ( $level < 0 ) { $level = 0 } 18067 18068 # do not align code across indentation level changes 18069 # or if vertical alignment is turned off for debugging 18070 if ( $level != $group_level || $is_outdented || !$rOpts_valign ) { 18071 18072 # we are allowed to shift a group of lines to the right if its 18073 # level is greater than the previous and next group 18074 $extra_indent_ok = 18075 ( $level < $group_level && $last_group_level_written < $group_level ); 18076 18077 my_flush(); 18078 18079 # If we know that this line will get flushed out by itself because 18080 # of level changes, we can leave the extra_indent_ok flag set. 18081 # That way, if we get an external flush call, we will still be 18082 # able to do some -lp alignment if necessary. 18083 $extra_indent_ok = ( $is_terminal_statement && $level > $group_level ); 18084 18085 $group_level = $level; 18086 18087 # wait until after the above flush to get the leading space 18088 # count because it may have been changed if the -icp flag is in 18089 # effect 18090 $leading_space_count = get_SPACES($indentation); 18091 18092 } 18093 18094 # -------------------------------------------------------------------- 18095 # Patch to collect outdentable block COMMENTS 18096 # -------------------------------------------------------------------- 18097 my $is_blank_line = ""; 18098 my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ ); 18099 if ( $group_type eq 'COMMENT' ) { 18100 if ( 18101 ( 18102 $is_block_comment 18103 && $outdent_long_lines 18104 && $leading_space_count == $comment_leading_space_count 18105 ) 18106 || $is_blank_line 18107 ) 18108 { 18109 $group_lines[ ++$maximum_line_index ] = $rfields->[0]; 18110 return; 18111 } 18112 else { 18113 my_flush(); 18114 } 18115 } 18116 18117 # -------------------------------------------------------------------- 18118 # add dummy fields for terminal ternary 18119 # -------------------------------------------------------------------- 18120 my $j_terminal_match; 18121 if ( $is_terminal_ternary && $current_line ) { 18122 $j_terminal_match = 18123 fix_terminal_ternary( $rfields, $rtokens, $rpatterns ); 18124 $jmax = @{$rfields} - 1; 18125 } 18126 18127 # -------------------------------------------------------------------- 18128 # add dummy fields for else statement 18129 # -------------------------------------------------------------------- 18130 if ( $rfields->[0] =~ /^else\s*$/ 18131 && $current_line 18132 && $level_jump == 0 ) 18133 { 18134 $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns ); 18135 $jmax = @{$rfields} - 1; 18136 } 18137 18138 # -------------------------------------------------------------------- 18139 # Step 1. Handle simple line of code with no fields to match. 18140 # -------------------------------------------------------------------- 18141 if ( $jmax <= 0 ) { 18142 $zero_count++; 18143 18144 if ( $maximum_line_index >= 0 18145 && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) ) 18146 { 18147 18148 # flush the current group if it has some aligned columns.. 18149 if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() } 18150 18151 # flush current group if we are just collecting side comments.. 18152 elsif ( 18153 18154 # ...and we haven't seen a comment lately 18155 ( $zero_count > 3 ) 18156 18157 # ..or if this new line doesn't fit to the left of the comments 18158 || ( ( $leading_space_count + length( $$rfields[0] ) ) > 18159 $group_lines[0]->get_column(0) ) 18160 ) 18161 { 18162 my_flush(); 18163 } 18164 } 18165 18166 # patch to start new COMMENT group if this comment may be outdented 18167 if ( $is_block_comment 18168 && $outdent_long_lines 18169 && $maximum_line_index < 0 ) 18170 { 18171 $group_type = 'COMMENT'; 18172 $comment_leading_space_count = $leading_space_count; 18173 $group_lines[ ++$maximum_line_index ] = $rfields->[0]; 18174 return; 18175 } 18176 18177 # just write this line directly if no current group, no side comment, 18178 # and no space recovery is needed. 18179 if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) ) 18180 { 18181 write_leader_and_string( $leading_space_count, $$rfields[0], 0, 18182 $outdent_long_lines, $rvertical_tightness_flags ); 18183 return; 18184 } 18185 } 18186 else { 18187 $zero_count = 0; 18188 } 18189 18190 # programming check: (shouldn't happen) 18191 # an error here implies an incorrect call was made 18192 if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) { 18193 warning( 18194"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n" 18195 ); 18196 report_definite_bug(); 18197 } 18198 18199 # -------------------------------------------------------------------- 18200 # create an object to hold this line 18201 # -------------------------------------------------------------------- 18202 my $new_line = new Perl::Tidy::VerticalAligner::Line( 18203 jmax => $jmax, 18204 jmax_original_line => $jmax, 18205 rtokens => $rtokens, 18206 rfields => $rfields, 18207 rpatterns => $rpatterns, 18208 indentation => $indentation, 18209 leading_space_count => $leading_space_count, 18210 outdent_long_lines => $outdent_long_lines, 18211 list_type => "", 18212 is_hanging_side_comment => $is_hanging_side_comment, 18213 maximum_line_length => $rOpts->{'maximum-line-length'}, 18214 rvertical_tightness_flags => $rvertical_tightness_flags, 18215 ); 18216 18217 # Initialize a global flag saying if the last line of the group should 18218 # match end of group and also terminate the group. There should be no 18219 # returns between here and where the flag is handled at the bottom. 18220 my $col_matching_terminal = 0; 18221 if ( defined($j_terminal_match) ) { 18222 18223 # remember the column of the terminal ? or { to match with 18224 $col_matching_terminal = $current_line->get_column($j_terminal_match); 18225 18226 # set global flag for sub decide_if_aligned 18227 $is_matching_terminal_line = 1; 18228 } 18229 18230 # -------------------------------------------------------------------- 18231 # It simplifies things to create a zero length side comment 18232 # if none exists. 18233 # -------------------------------------------------------------------- 18234 make_side_comment( $new_line, $level_end ); 18235 18236 # -------------------------------------------------------------------- 18237 # Decide if this is a simple list of items. 18238 # There are 3 list types: none, comma, comma-arrow. 18239 # We use this below to be less restrictive in deciding what to align. 18240 # -------------------------------------------------------------------- 18241 if ($is_forced_break) { 18242 decide_if_list($new_line); 18243 } 18244 18245 if ($current_line) { 18246 18247 # -------------------------------------------------------------------- 18248 # Allow hanging side comment to join current group, if any 18249 # This will help keep side comments aligned, because otherwise we 18250 # will have to start a new group, making alignment less likely. 18251 # -------------------------------------------------------------------- 18252 join_hanging_comment( $new_line, $current_line ) 18253 if $is_hanging_side_comment; 18254 18255 # -------------------------------------------------------------------- 18256 # If there is just one previous line, and it has more fields 18257 # than the new line, try to join fields together to get a match with 18258 # the new line. At the present time, only a single leading '=' is 18259 # allowed to be compressed out. This is useful in rare cases where 18260 # a table is forced to use old breakpoints because of side comments, 18261 # and the table starts out something like this: 18262 # my %MonthChars = ('0', 'Jan', # side comment 18263 # '1', 'Feb', 18264 # '2', 'Mar', 18265 # Eliminating the '=' field will allow the remaining fields to line up. 18266 # This situation does not occur if there are no side comments 18267 # because scan_list would put a break after the opening '('. 18268 # -------------------------------------------------------------------- 18269 eliminate_old_fields( $new_line, $current_line ); 18270 18271 # -------------------------------------------------------------------- 18272 # If the new line has more fields than the current group, 18273 # see if we can match the first fields and combine the remaining 18274 # fields of the new line. 18275 # -------------------------------------------------------------------- 18276 eliminate_new_fields( $new_line, $current_line ); 18277 18278 # -------------------------------------------------------------------- 18279 # Flush previous group unless all common tokens and patterns match.. 18280 # -------------------------------------------------------------------- 18281 check_match( $new_line, $current_line ); 18282 18283 # -------------------------------------------------------------------- 18284 # See if there is space for this line in the current group (if any) 18285 # -------------------------------------------------------------------- 18286 if ($current_line) { 18287 check_fit( $new_line, $current_line ); 18288 } 18289 } 18290 18291 # -------------------------------------------------------------------- 18292 # Append this line to the current group (or start new group) 18293 # -------------------------------------------------------------------- 18294 accept_line($new_line); 18295 18296 # Future update to allow this to vary: 18297 $current_line = $new_line if ( $maximum_line_index == 0 ); 18298 18299 # output this group if it ends in a terminal else or ternary line 18300 if ( defined($j_terminal_match) ) { 18301 18302 # if there is only one line in the group (maybe due to failure to match 18303 # perfectly with previous lines), then align the ? or { of this 18304 # terminal line with the previous one unless that would make the line 18305 # too long 18306 if ( $maximum_line_index == 0 ) { 18307 my $col_now = $current_line->get_column($j_terminal_match); 18308 my $pad = $col_matching_terminal - $col_now; 18309 my $padding_available = 18310 $current_line->get_available_space_on_right(); 18311 if ( $pad > 0 && $pad <= $padding_available ) { 18312 $current_line->increase_field_width( $j_terminal_match, $pad ); 18313 } 18314 } 18315 my_flush(); 18316 $is_matching_terminal_line = 0; 18317 } 18318 18319 # -------------------------------------------------------------------- 18320 # Step 8. Some old debugging stuff 18321 # -------------------------------------------------------------------- 18322 VALIGN_DEBUG_FLAG_APPEND && do { 18323 print "APPEND fields:"; 18324 dump_array(@$rfields); 18325 print "APPEND tokens:"; 18326 dump_array(@$rtokens); 18327 print "APPEND patterns:"; 18328 dump_array(@$rpatterns); 18329 dump_alignments(); 18330 }; 18331 18332 return; 18333} 18334 18335sub join_hanging_comment { 18336 18337 my $line = shift; 18338 my $jmax = $line->get_jmax(); 18339 return 0 unless $jmax == 1; # must be 2 fields 18340 my $rtokens = $line->get_rtokens(); 18341 return 0 unless $$rtokens[0] eq '#'; # the second field is a comment.. 18342 my $rfields = $line->get_rfields(); 18343 return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty... 18344 my $old_line = shift; 18345 my $maximum_field_index = $old_line->get_jmax(); 18346 return 0 18347 unless $maximum_field_index > $jmax; # the current line has more fields 18348 my $rpatterns = $line->get_rpatterns(); 18349 18350 $line->set_is_hanging_side_comment(1); 18351 $jmax = $maximum_field_index; 18352 $line->set_jmax($jmax); 18353 $$rfields[$jmax] = $$rfields[1]; 18354 $$rtokens[ $jmax - 1 ] = $$rtokens[0]; 18355 $$rpatterns[ $jmax - 1 ] = $$rpatterns[0]; 18356 for ( my $j = 1 ; $j < $jmax ; $j++ ) { 18357 $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why? 18358 $$rtokens[ $j - 1 ] = ""; 18359 $$rpatterns[ $j - 1 ] = ""; 18360 } 18361 return 1; 18362} 18363 18364sub eliminate_old_fields { 18365 18366 my $new_line = shift; 18367 my $jmax = $new_line->get_jmax(); 18368 if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax } 18369 if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax } 18370 18371 # there must be one previous line 18372 return unless ( $maximum_line_index == 0 ); 18373 18374 my $old_line = shift; 18375 my $maximum_field_index = $old_line->get_jmax(); 18376 18377 ############################################### 18378 # this line must have fewer fields 18379 return unless $maximum_field_index > $jmax; 18380 ############################################### 18381 18382 # Identify specific cases where field elimination is allowed: 18383 # case=1: both lines have comma-separated lists, and the first 18384 # line has an equals 18385 # case=2: both lines have leading equals 18386 18387 # case 1 is the default 18388 my $case = 1; 18389 18390 # See if case 2: both lines have leading '=' 18391 # We'll require smiliar leading patterns in this case 18392 my $old_rtokens = $old_line->get_rtokens(); 18393 my $rtokens = $new_line->get_rtokens(); 18394 my $rpatterns = $new_line->get_rpatterns(); 18395 my $old_rpatterns = $old_line->get_rpatterns(); 18396 if ( $rtokens->[0] =~ /^=\d*$/ 18397 && $old_rtokens->[0] eq $rtokens->[0] 18398 && $old_rpatterns->[0] eq $rpatterns->[0] ) 18399 { 18400 $case = 2; 18401 } 18402 18403 # not too many fewer fields in new line for case 1 18404 return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax ); 18405 18406 # case 1 must have side comment 18407 my $old_rfields = $old_line->get_rfields(); 18408 return 18409 if ( $case == 1 18410 && length( $$old_rfields[$maximum_field_index] ) == 0 ); 18411 18412 my $rfields = $new_line->get_rfields(); 18413 18414 my $hid_equals = 0; 18415 18416 my @new_alignments = (); 18417 my @new_fields = (); 18418 my @new_matching_patterns = (); 18419 my @new_matching_tokens = (); 18420 18421 my $j = 0; 18422 my $k; 18423 my $current_field = ''; 18424 my $current_pattern = ''; 18425 18426 # loop over all old tokens 18427 my $in_match = 0; 18428 for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) { 18429 $current_field .= $$old_rfields[$k]; 18430 $current_pattern .= $$old_rpatterns[$k]; 18431 last if ( $j > $jmax - 1 ); 18432 18433 if ( $$old_rtokens[$k] eq $$rtokens[$j] ) { 18434 $in_match = 1; 18435 $new_fields[$j] = $current_field; 18436 $new_matching_patterns[$j] = $current_pattern; 18437 $current_field = ''; 18438 $current_pattern = ''; 18439 $new_matching_tokens[$j] = $$old_rtokens[$k]; 18440 $new_alignments[$j] = $old_line->get_alignment($k); 18441 $j++; 18442 } 18443 else { 18444 18445 if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) { 18446 last if ( $case == 2 ); # avoid problems with stuff 18447 # like: $a=$b=$c=$d; 18448 $hid_equals = 1; 18449 } 18450 last 18451 if ( $in_match && $case == 1 ) 18452 ; # disallow gaps in matching field types in case 1 18453 } 18454 } 18455 18456 # Modify the current state if we are successful. 18457 # We must exactly reach the ends of both lists for success. 18458 if ( ( $j == $jmax ) 18459 && ( $current_field eq '' ) 18460 && ( $case != 1 || $hid_equals ) ) 18461 { 18462 $k = $maximum_field_index; 18463 $current_field .= $$old_rfields[$k]; 18464 $current_pattern .= $$old_rpatterns[$k]; 18465 $new_fields[$j] = $current_field; 18466 $new_matching_patterns[$j] = $current_pattern; 18467 18468 $new_alignments[$j] = $old_line->get_alignment($k); 18469 $maximum_field_index = $j; 18470 18471 $old_line->set_alignments(@new_alignments); 18472 $old_line->set_jmax($jmax); 18473 $old_line->set_rtokens( \@new_matching_tokens ); 18474 $old_line->set_rfields( \@new_fields ); 18475 $old_line->set_rpatterns( \@$rpatterns ); 18476 } 18477} 18478 18479# create an empty side comment if none exists 18480sub make_side_comment { 18481 my $new_line = shift; 18482 my $level_end = shift; 18483 my $jmax = $new_line->get_jmax(); 18484 my $rtokens = $new_line->get_rtokens(); 18485 18486 # if line does not have a side comment... 18487 if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) { 18488 my $rfields = $new_line->get_rfields(); 18489 my $rpatterns = $new_line->get_rpatterns(); 18490 $$rtokens[$jmax] = '#'; 18491 $$rfields[ ++$jmax ] = ''; 18492 $$rpatterns[$jmax] = '#'; 18493 $new_line->set_jmax($jmax); 18494 $new_line->set_jmax_original_line($jmax); 18495 } 18496 18497 # line has a side comment.. 18498 else { 18499 18500 # don't remember old side comment location for very long 18501 my $line_number = $vertical_aligner_self->get_output_line_number(); 18502 my $rfields = $new_line->get_rfields(); 18503 if ( 18504 $line_number - $last_side_comment_line_number > 12 18505 18506 # and don't remember comment location across block level changes 18507 || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ ) 18508 ) 18509 { 18510 forget_side_comment(); 18511 } 18512 $last_side_comment_line_number = $line_number; 18513 $last_side_comment_level = $level_end; 18514 } 18515} 18516 18517sub decide_if_list { 18518 18519 my $line = shift; 18520 18521 # A list will be taken to be a line with a forced break in which all 18522 # of the field separators are commas or comma-arrows (except for the 18523 # trailing #) 18524 18525 # List separator tokens are things like ',3' or '=>2', 18526 # where the trailing digit is the nesting depth. Allow braces 18527 # to allow nested list items. 18528 my $rtokens = $line->get_rtokens(); 18529 my $test_token = $$rtokens[0]; 18530 if ( $test_token =~ /^(\,|=>)/ ) { 18531 my $list_type = $test_token; 18532 my $jmax = $line->get_jmax(); 18533 18534 foreach ( 1 .. $jmax - 2 ) { 18535 if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) { 18536 $list_type = ""; 18537 last; 18538 } 18539 } 18540 $line->set_list_type($list_type); 18541 } 18542} 18543 18544sub eliminate_new_fields { 18545 18546 return unless ( $maximum_line_index >= 0 ); 18547 my ( $new_line, $old_line ) = @_; 18548 my $jmax = $new_line->get_jmax(); 18549 18550 my $old_rtokens = $old_line->get_rtokens(); 18551 my $rtokens = $new_line->get_rtokens(); 18552 my $is_assignment = 18553 ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) ); 18554 18555 # must be monotonic variation 18556 return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax ); 18557 18558 # must be more fields in the new line 18559 my $maximum_field_index = $old_line->get_jmax(); 18560 return unless ( $maximum_field_index < $jmax ); 18561 18562 unless ($is_assignment) { 18563 return 18564 unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen ) 18565 ; # only if monotonic 18566 18567 # never combine fields of a comma list 18568 return 18569 unless ( $maximum_field_index > 1 ) 18570 && ( $new_line->get_list_type() !~ /^,/ ); 18571 } 18572 18573 my $rfields = $new_line->get_rfields(); 18574 my $rpatterns = $new_line->get_rpatterns(); 18575 my $old_rpatterns = $old_line->get_rpatterns(); 18576 18577 # loop over all OLD tokens except comment and check match 18578 my $match = 1; 18579 my $k; 18580 for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) { 18581 if ( ( $$old_rtokens[$k] ne $$rtokens[$k] ) 18582 || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) ) 18583 { 18584 $match = 0; 18585 last; 18586 } 18587 } 18588 18589 # first tokens agree, so combine extra new tokens 18590 if ($match) { 18591 for $k ( $maximum_field_index .. $jmax - 1 ) { 18592 18593 $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k]; 18594 $$rfields[$k] = ""; 18595 $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k]; 18596 $$rpatterns[$k] = ""; 18597 } 18598 18599 $$rtokens[ $maximum_field_index - 1 ] = '#'; 18600 $$rfields[$maximum_field_index] = $$rfields[$jmax]; 18601 $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax]; 18602 $jmax = $maximum_field_index; 18603 } 18604 $new_line->set_jmax($jmax); 18605} 18606 18607sub fix_terminal_ternary { 18608 18609 # Add empty fields as necessary to align a ternary term 18610 # like this: 18611 # 18612 # my $leapyear = 18613 # $year % 4 ? 0 18614 # : $year % 100 ? 1 18615 # : $year % 400 ? 0 18616 # : 1; 18617 # 18618 # returns 1 if the terminal item should be indented 18619 18620 my ( $rfields, $rtokens, $rpatterns ) = @_; 18621 18622 my $jmax = @{$rfields} - 1; 18623 my $old_line = $group_lines[$maximum_line_index]; 18624 my $rfields_old = $old_line->get_rfields(); 18625 18626 my $rpatterns_old = $old_line->get_rpatterns(); 18627 my $rtokens_old = $old_line->get_rtokens(); 18628 my $maximum_field_index = $old_line->get_jmax(); 18629 18630 # look for the question mark after the : 18631 my ($jquestion); 18632 my $depth_question; 18633 my $pad = ""; 18634 for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) { 18635 my $tok = $rtokens_old->[$j]; 18636 if ( $tok =~ /^\?(\d+)$/ ) { 18637 $depth_question = $1; 18638 18639 # depth must be correct 18640 next unless ( $depth_question eq $group_level ); 18641 18642 $jquestion = $j; 18643 if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) { 18644 $pad = " " x length($1); 18645 } 18646 else { 18647 return; # shouldn't happen 18648 } 18649 last; 18650 } 18651 } 18652 return unless ( defined($jquestion) ); # shouldn't happen 18653 18654 # Now splice the tokens and patterns of the previous line 18655 # into the else line to insure a match. Add empty fields 18656 # as necessary. 18657 my $jadd = $jquestion; 18658 18659 # Work on copies of the actual arrays in case we have 18660 # to return due to an error 18661 my @fields = @{$rfields}; 18662 my @patterns = @{$rpatterns}; 18663 my @tokens = @{$rtokens}; 18664 18665 VALIGN_DEBUG_FLAG_TERNARY && do { 18666 local $" = '><'; 18667 print "CURRENT FIELDS=<@{$rfields_old}>\n"; 18668 print "CURRENT TOKENS=<@{$rtokens_old}>\n"; 18669 print "CURRENT PATTERNS=<@{$rpatterns_old}>\n"; 18670 print "UNMODIFIED FIELDS=<@{$rfields}>\n"; 18671 print "UNMODIFIED TOKENS=<@{$rtokens}>\n"; 18672 print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n"; 18673 }; 18674 18675 # handle cases of leading colon on this line 18676 if ( $fields[0] =~ /^(:\s*)(.*)$/ ) { 18677 18678 my ( $colon, $therest ) = ( $1, $2 ); 18679 18680 # Handle sub-case of first field with leading colon plus additional code 18681 # This is the usual situation as at the '1' below: 18682 # ... 18683 # : $year % 400 ? 0 18684 # : 1; 18685 if ($therest) { 18686 18687 # Split the first field after the leading colon and insert padding. 18688 # Note that this padding will remain even if the terminal value goes 18689 # out on a separate line. This does not seem to look to bad, so no 18690 # mechanism has been included to undo it. 18691 my $field1 = shift @fields; 18692 unshift @fields, ( $colon, $pad . $therest ); 18693 18694 # change the leading pattern from : to ? 18695 return unless ( $patterns[0] =~ s/^\:/?/ ); 18696 18697 # install leading tokens and patterns of existing line 18698 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); 18699 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); 18700 18701 # insert appropriate number of empty fields 18702 splice( @fields, 1, 0, ('') x $jadd ) if $jadd; 18703 } 18704 18705 # handle sub-case of first field just equal to leading colon. 18706 # This can happen for example in the example below where 18707 # the leading '(' would create a new alignment token 18708 # : ( $name =~ /[]}]$/ ) ? ( $mname = $name ) 18709 # : ( $mname = $name . '->' ); 18710 else { 18711 18712 return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen 18713 18714 # prepend a leading ? onto the second pattern 18715 $patterns[1] = "?b" . $patterns[1]; 18716 18717 # pad the second field 18718 $fields[1] = $pad . $fields[1]; 18719 18720 # install leading tokens and patterns of existing line, replacing 18721 # leading token and inserting appropriate number of empty fields 18722 splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] ); 18723 splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] ); 18724 splice( @fields, 1, 0, ('') x $jadd ) if $jadd; 18725 } 18726 } 18727 18728 # Handle case of no leading colon on this line. This will 18729 # be the case when -wba=':' is used. For example, 18730 # $year % 400 ? 0 : 18731 # 1; 18732 else { 18733 18734 # install leading tokens and patterns of existing line 18735 $patterns[0] = '?' . 'b' . $patterns[0]; 18736 unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); 18737 unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); 18738 18739 # insert appropriate number of empty fields 18740 $jadd = $jquestion + 1; 18741 $fields[0] = $pad . $fields[0]; 18742 splice( @fields, 0, 0, ('') x $jadd ) if $jadd; 18743 } 18744 18745 VALIGN_DEBUG_FLAG_TERNARY && do { 18746 local $" = '><'; 18747 print "MODIFIED TOKENS=<@tokens>\n"; 18748 print "MODIFIED PATTERNS=<@patterns>\n"; 18749 print "MODIFIED FIELDS=<@fields>\n"; 18750 }; 18751 18752 # all ok .. update the arrays 18753 @{$rfields} = @fields; 18754 @{$rtokens} = @tokens; 18755 @{$rpatterns} = @patterns; 18756 18757 # force a flush after this line 18758 return $jquestion; 18759} 18760 18761sub fix_terminal_else { 18762 18763 # Add empty fields as necessary to align a balanced terminal 18764 # else block to a previous if/elsif/unless block, 18765 # like this: 18766 # 18767 # if ( 1 || $x ) { print "ok 13\n"; } 18768 # else { print "not ok 13\n"; } 18769 # 18770 # returns 1 if the else block should be indented 18771 # 18772 my ( $rfields, $rtokens, $rpatterns ) = @_; 18773 my $jmax = @{$rfields} - 1; 18774 return unless ( $jmax > 0 ); 18775 18776 # check for balanced else block following if/elsif/unless 18777 my $rfields_old = $current_line->get_rfields(); 18778 18779 # TBD: add handling for 'case' 18780 return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ ); 18781 18782 # look for the opening brace after the else, and extrace the depth 18783 my $tok_brace = $rtokens->[0]; 18784 my $depth_brace; 18785 if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; } 18786 18787 # probably: "else # side_comment" 18788 else { return } 18789 18790 my $rpatterns_old = $current_line->get_rpatterns(); 18791 my $rtokens_old = $current_line->get_rtokens(); 18792 my $maximum_field_index = $current_line->get_jmax(); 18793 18794 # be sure the previous if/elsif is followed by an opening paren 18795 my $jparen = 0; 18796 my $tok_paren = '(' . $depth_brace; 18797 my $tok_test = $rtokens_old->[$jparen]; 18798 return unless ( $tok_test eq $tok_paren ); # shouldn't happen 18799 18800 # Now find the opening block brace 18801 my ($jbrace); 18802 for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) { 18803 my $tok = $rtokens_old->[$j]; 18804 if ( $tok eq $tok_brace ) { 18805 $jbrace = $j; 18806 last; 18807 } 18808 } 18809 return unless ( defined($jbrace) ); # shouldn't happen 18810 18811 # Now splice the tokens and patterns of the previous line 18812 # into the else line to insure a match. Add empty fields 18813 # as necessary. 18814 my $jadd = $jbrace - $jparen; 18815 splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] ); 18816 splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] ); 18817 splice( @{$rfields}, 1, 0, ('') x $jadd ); 18818 18819 # force a flush after this line if it does not follow a case 18820 return $jbrace 18821 unless ( $rfields_old->[0] =~ /^case\s*$/ ); 18822} 18823 18824{ # sub check_match 18825 my %is_good_alignment; 18826 18827 BEGIN { 18828 18829 # Vertically aligning on certain "good" tokens is usually okay 18830 # so we can be less restrictive in marginal cases. 18831 @_ = qw( { ? => = ); 18832 push @_, (','); 18833 @is_good_alignment{@_} = (1) x scalar(@_); 18834 } 18835 18836 sub check_match { 18837 18838 # See if the current line matches the current vertical alignment group. 18839 # If not, flush the current group. 18840 my $new_line = shift; 18841 my $old_line = shift; 18842 18843 # uses global variables: 18844 # $previous_minimum_jmax_seen 18845 # $maximum_jmax_seen 18846 # $maximum_line_index 18847 # $marginal_match 18848 my $jmax = $new_line->get_jmax(); 18849 my $maximum_field_index = $old_line->get_jmax(); 18850 18851 # flush if this line has too many fields 18852 if ( $jmax > $maximum_field_index ) { goto NO_MATCH } 18853 18854 # flush if adding this line would make a non-monotonic field count 18855 if ( 18856 ( $maximum_field_index > $jmax ) # this has too few fields 18857 && ( 18858 ( $previous_minimum_jmax_seen < 18859 $jmax ) # and wouldn't be monotonic 18860 || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) 18861 ) 18862 ) 18863 { 18864 goto NO_MATCH; 18865 } 18866 18867 # otherwise see if this line matches the current group 18868 my $jmax_original_line = $new_line->get_jmax_original_line(); 18869 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); 18870 my $rtokens = $new_line->get_rtokens(); 18871 my $rfields = $new_line->get_rfields(); 18872 my $rpatterns = $new_line->get_rpatterns(); 18873 my $list_type = $new_line->get_list_type(); 18874 18875 my $group_list_type = $old_line->get_list_type(); 18876 my $old_rpatterns = $old_line->get_rpatterns(); 18877 my $old_rtokens = $old_line->get_rtokens(); 18878 18879 my $jlimit = $jmax - 1; 18880 if ( $maximum_field_index > $jmax ) { 18881 $jlimit = $jmax_original_line; 18882 --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); 18883 } 18884 18885 # handle comma-separated lists .. 18886 if ( $group_list_type && ( $list_type eq $group_list_type ) ) { 18887 for my $j ( 0 .. $jlimit ) { 18888 my $old_tok = $$old_rtokens[$j]; 18889 next unless $old_tok; 18890 my $new_tok = $$rtokens[$j]; 18891 next unless $new_tok; 18892 18893 # lists always match ... 18894 # unless they would align any '=>'s with ','s 18895 goto NO_MATCH 18896 if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/ 18897 || $new_tok =~ /^=>/ && $old_tok =~ /^,/ ); 18898 } 18899 } 18900 18901 # do detailed check for everything else except hanging side comments 18902 elsif ( !$is_hanging_side_comment ) { 18903 18904 my $leading_space_count = $new_line->get_leading_space_count(); 18905 18906 my $max_pad = 0; 18907 my $min_pad = 0; 18908 my $saw_good_alignment; 18909 18910 for my $j ( 0 .. $jlimit ) { 18911 18912 my $old_tok = $$old_rtokens[$j]; 18913 my $new_tok = $$rtokens[$j]; 18914 18915 # Note on encoding used for alignment tokens: 18916 # ------------------------------------------- 18917 # Tokens are "decorated" with information which can help 18918 # prevent unwanted alignments. Consider for example the 18919 # following two lines: 18920 # local ( $xn, $xd ) = split( '/', &'rnorm(@_) ); 18921 # local ( $i, $f ) = &'bdiv( $xn, $xd ); 18922 # There are three alignment tokens in each line, a comma, 18923 # an =, and a comma. In the first line these three tokens 18924 # are encoded as: 18925 # ,4+local-18 =3 ,4+split-7 18926 # and in the second line they are encoded as 18927 # ,4+local-18 =3 ,4+&'bdiv-8 18928 # Tokens always at least have token name and nesting 18929 # depth. So in this example the ='s are at depth 3 and 18930 # the ,'s are at depth 4. This prevents aligning tokens 18931 # of different depths. Commas contain additional 18932 # information, as follows: 18933 # , {depth} + {container name} - {spaces to opening paren} 18934 # This allows us to reject matching the rightmost commas 18935 # in the above two lines, since they are for different 18936 # function calls. This encoding is done in 18937 # 'sub send_lines_to_vertical_aligner'. 18938 18939 # Pick off actual token. 18940 # Everything up to the first digit is the actual token. 18941 my $alignment_token = $new_tok; 18942 if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 } 18943 18944 # see if the decorated tokens match 18945 my $tokens_match = $new_tok eq $old_tok 18946 18947 # Exception for matching terminal : of ternary statement.. 18948 # consider containers prefixed by ? and : a match 18949 || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ ); 18950 18951 # No match if the alignment tokens differ... 18952 if ( !$tokens_match ) { 18953 18954 # ...Unless this is a side comment 18955 if ( 18956 $j == $jlimit 18957 18958 # and there is either at least one alignment token 18959 # or this is a single item following a list. This 18960 # latter rule is required for 'December' to join 18961 # the following list: 18962 # my (@months) = ( 18963 # '', 'January', 'February', 'March', 18964 # 'April', 'May', 'June', 'July', 18965 # 'August', 'September', 'October', 'November', 18966 # 'December' 18967 # ); 18968 # If it doesn't then the -lp formatting will fail. 18969 && ( $j > 0 || $old_tok =~ /^,/ ) 18970 ) 18971 { 18972 $marginal_match = 1 18973 if ( $marginal_match == 0 18974 && $maximum_line_index == 0 ); 18975 last; 18976 } 18977 18978 goto NO_MATCH; 18979 } 18980 18981 # Calculate amount of padding required to fit this in. 18982 # $pad is the number of spaces by which we must increase 18983 # the current field to squeeze in this field. 18984 my $pad = 18985 length( $$rfields[$j] ) - $old_line->current_field_width($j); 18986 if ( $j == 0 ) { $pad += $leading_space_count; } 18987 18988 # remember max pads to limit marginal cases 18989 if ( $alignment_token ne '#' ) { 18990 if ( $pad > $max_pad ) { $max_pad = $pad } 18991 if ( $pad < $min_pad ) { $min_pad = $pad } 18992 } 18993 if ( $is_good_alignment{$alignment_token} ) { 18994 $saw_good_alignment = 1; 18995 } 18996 18997 # If patterns don't match, we have to be careful... 18998 if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) { 18999 19000 # flag this as a marginal match since patterns differ 19001 $marginal_match = 1 19002 if ( $marginal_match == 0 && $maximum_line_index == 0 ); 19003 19004 # We have to be very careful about aligning commas 19005 # when the pattern's don't match, because it can be 19006 # worse to create an alignment where none is needed 19007 # than to omit one. Here's an example where the ','s 19008 # are not in named continers. The first line below 19009 # should not match the next two: 19010 # ( $a, $b ) = ( $b, $r ); 19011 # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); 19012 # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); 19013 if ( $alignment_token eq ',' ) { 19014 19015 # do not align commas unless they are in named containers 19016 goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ ); 19017 } 19018 19019 # do not align parens unless patterns match; 19020 # large ugly spaces can occur in math expressions. 19021 elsif ( $alignment_token eq '(' ) { 19022 19023 # But we can allow a match if the parens don't 19024 # require any padding. 19025 if ( $pad != 0 ) { goto NO_MATCH } 19026 } 19027 19028 # Handle an '=' alignment with different patterns to 19029 # the left. 19030 elsif ( $alignment_token eq '=' ) { 19031 19032 # It is best to be a little restrictive when 19033 # aligning '=' tokens. Here is an example of 19034 # two lines that we will not align: 19035 # my $variable=6; 19036 # $bb=4; 19037 # The problem is that one is a 'my' declaration, 19038 # and the other isn't, so they're not very similar. 19039 # We will filter these out by comparing the first 19040 # letter of the pattern. This is crude, but works 19041 # well enough. 19042 if ( 19043 substr( $$old_rpatterns[$j], 0, 1 ) ne 19044 substr( $$rpatterns[$j], 0, 1 ) ) 19045 { 19046 goto NO_MATCH; 19047 } 19048 19049 # If we pass that test, we'll call it a marginal match. 19050 # Here is an example of a marginal match: 19051 # $done{$$op} = 1; 19052 # $op = compile_bblock($op); 19053 # The left tokens are both identifiers, but 19054 # one accesses a hash and the other doesn't. 19055 # We'll let this be a tentative match and undo 19056 # it later if we don't find more than 2 lines 19057 # in the group. 19058 elsif ( $maximum_line_index == 0 ) { 19059 $marginal_match = 19060 2; # =2 prevents being undone below 19061 } 19062 } 19063 } 19064 19065 # Don't let line with fewer fields increase column widths 19066 # ( align3.t ) 19067 if ( $maximum_field_index > $jmax ) { 19068 19069 # Exception: suspend this rule to allow last lines to join 19070 if ( $pad > 0 ) { goto NO_MATCH; } 19071 } 19072 } ## end for my $j ( 0 .. $jlimit) 19073 19074 # Turn off the "marginal match" flag in some cases... 19075 # A "marginal match" occurs when the alignment tokens agree 19076 # but there are differences in the other tokens (patterns). 19077 # If we leave the marginal match flag set, then the rule is that we 19078 # will align only if there are more than two lines in the group. 19079 # We will turn of the flag if we almost have a match 19080 # and either we have seen a good alignment token or we 19081 # just need a small pad (2 spaces) to fit. These rules are 19082 # the result of experimentation. Tokens which misaligned by just 19083 # one or two characters are annoying. On the other hand, 19084 # large gaps to less important alignment tokens are also annoying. 19085 if ( $marginal_match == 1 19086 && $jmax == $maximum_field_index 19087 && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) ) 19088 ) 19089 { 19090 $marginal_match = 0; 19091 } 19092 ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n"; 19093 } 19094 19095 # We have a match (even if marginal). 19096 # If the current line has fewer fields than the current group 19097 # but otherwise matches, copy the remaining group fields to 19098 # make it a perfect match. 19099 if ( $maximum_field_index > $jmax ) { 19100 my $comment = $$rfields[$jmax]; 19101 for $jmax ( $jlimit .. $maximum_field_index ) { 19102 $$rtokens[$jmax] = $$old_rtokens[$jmax]; 19103 $$rfields[ ++$jmax ] = ''; 19104 $$rpatterns[$jmax] = $$old_rpatterns[$jmax]; 19105 } 19106 $$rfields[$jmax] = $comment; 19107 $new_line->set_jmax($jmax); 19108 } 19109 return; 19110 19111 NO_MATCH: 19112 ##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n"; 19113 my_flush(); 19114 return; 19115 } 19116} 19117 19118sub check_fit { 19119 19120 return unless ( $maximum_line_index >= 0 ); 19121 my $new_line = shift; 19122 my $old_line = shift; 19123 19124 my $jmax = $new_line->get_jmax(); 19125 my $leading_space_count = $new_line->get_leading_space_count(); 19126 my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); 19127 my $rtokens = $new_line->get_rtokens(); 19128 my $rfields = $new_line->get_rfields(); 19129 my $rpatterns = $new_line->get_rpatterns(); 19130 19131 my $group_list_type = $group_lines[0]->get_list_type(); 19132 19133 my $padding_so_far = 0; 19134 my $padding_available = $old_line->get_available_space_on_right(); 19135 19136 # save current columns in case this doesn't work 19137 save_alignment_columns(); 19138 19139 my ( $j, $pad, $eight ); 19140 my $maximum_field_index = $old_line->get_jmax(); 19141 for $j ( 0 .. $jmax ) { 19142 19143 $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j); 19144 19145 if ( $j == 0 ) { 19146 $pad += $leading_space_count; 19147 } 19148 19149 # remember largest gap of the group, excluding gap to side comment 19150 if ( $pad < 0 19151 && $group_maximum_gap < -$pad 19152 && $j > 0 19153 && $j < $jmax - 1 ) 19154 { 19155 $group_maximum_gap = -$pad; 19156 } 19157 19158 next if $pad < 0; 19159 19160 ## This patch helps sometimes, but it doesn't check to see if 19161 ## the line is too long even without the side comment. It needs 19162 ## to be reworked. 19163 ##don't let a long token with no trailing side comment push 19164 ##side comments out, or end a group. (sidecmt1.t) 19165 ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0); 19166 19167 # This line will need space; lets see if we want to accept it.. 19168 if ( 19169 19170 # not if this won't fit 19171 ( $pad > $padding_available ) 19172 19173 # previously, there were upper bounds placed on padding here 19174 # (maximum_whitespace_columns), but they were not really helpful 19175 19176 ) 19177 { 19178 19179 # revert to starting state then flush; things didn't work out 19180 restore_alignment_columns(); 19181 my_flush(); 19182 last; 19183 } 19184 19185 # patch to avoid excessive gaps in previous lines, 19186 # due to a line of fewer fields. 19187 # return join( ".", 19188 # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"}, 19189 # $self->{"area"}, $self->{"id"}, $self->{"sel"} ); 19190 next if ( $jmax < $maximum_field_index && $j == $jmax - 1 ); 19191 19192 # looks ok, squeeze this field in 19193 $old_line->increase_field_width( $j, $pad ); 19194 $padding_available -= $pad; 19195 19196 # remember largest gap of the group, excluding gap to side comment 19197 if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) { 19198 $group_maximum_gap = $pad; 19199 } 19200 } 19201} 19202 19203sub accept_line { 19204 19205 # The current line either starts a new alignment group or is 19206 # accepted into the current alignment group. 19207 my $new_line = shift; 19208 $group_lines[ ++$maximum_line_index ] = $new_line; 19209 19210 # initialize field lengths if starting new group 19211 if ( $maximum_line_index == 0 ) { 19212 19213 my $jmax = $new_line->get_jmax(); 19214 my $rfields = $new_line->get_rfields(); 19215 my $rtokens = $new_line->get_rtokens(); 19216 my $j; 19217 my $col = $new_line->get_leading_space_count(); 19218 19219 for $j ( 0 .. $jmax ) { 19220 $col += length( $$rfields[$j] ); 19221 19222 # create initial alignments for the new group 19223 my $token = ""; 19224 if ( $j < $jmax ) { $token = $$rtokens[$j] } 19225 my $alignment = make_alignment( $col, $token ); 19226 $new_line->set_alignment( $j, $alignment ); 19227 } 19228 19229 $maximum_jmax_seen = $jmax; 19230 $minimum_jmax_seen = $jmax; 19231 } 19232 19233 # use previous alignments otherwise 19234 else { 19235 my @new_alignments = 19236 $group_lines[ $maximum_line_index - 1 ]->get_alignments(); 19237 $new_line->set_alignments(@new_alignments); 19238 } 19239 19240 # remember group jmax extremes for next call to append_line 19241 $previous_minimum_jmax_seen = $minimum_jmax_seen; 19242 $previous_maximum_jmax_seen = $maximum_jmax_seen; 19243} 19244 19245sub dump_array { 19246 19247 # debug routine to dump array contents 19248 local $" = ')('; 19249 print "(@_)\n"; 19250} 19251 19252# flush() sends the current Perl::Tidy::VerticalAligner group down the 19253# pipeline to Perl::Tidy::FileWriter. 19254 19255# This is the external flush, which also empties the cache 19256sub flush { 19257 19258 if ( $maximum_line_index < 0 ) { 19259 if ($cached_line_type) { 19260 $seqno_string = $cached_seqno_string; 19261 entab_and_output( $cached_line_text, 19262 $cached_line_leading_space_count, 19263 $last_group_level_written ); 19264 $cached_line_type = 0; 19265 $cached_line_text = ""; 19266 $cached_seqno_string = ""; 19267 } 19268 } 19269 else { 19270 my_flush(); 19271 } 19272} 19273 19274# This is the internal flush, which leaves the cache intact 19275sub my_flush { 19276 19277 return if ( $maximum_line_index < 0 ); 19278 19279 # handle a group of comment lines 19280 if ( $group_type eq 'COMMENT' ) { 19281 19282 VALIGN_DEBUG_FLAG_APPEND0 && do { 19283 my ( $a, $b, $c ) = caller(); 19284 print 19285"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n"; 19286 19287 }; 19288 my $leading_space_count = $comment_leading_space_count; 19289 my $leading_string = get_leading_string($leading_space_count); 19290 19291 # zero leading space count if any lines are too long 19292 my $max_excess = 0; 19293 for my $i ( 0 .. $maximum_line_index ) { 19294 my $str = $group_lines[$i]; 19295 my $excess = 19296 length($str) + $leading_space_count - $rOpts_maximum_line_length; 19297 if ( $excess > $max_excess ) { 19298 $max_excess = $excess; 19299 } 19300 } 19301 19302 if ( $max_excess > 0 ) { 19303 $leading_space_count -= $max_excess; 19304 if ( $leading_space_count < 0 ) { $leading_space_count = 0 } 19305 $last_outdented_line_at = 19306 $file_writer_object->get_output_line_number(); 19307 unless ($outdented_line_count) { 19308 $first_outdented_line_at = $last_outdented_line_at; 19309 } 19310 $outdented_line_count += ( $maximum_line_index + 1 ); 19311 } 19312 19313 # write the group of lines 19314 my $outdent_long_lines = 0; 19315 for my $i ( 0 .. $maximum_line_index ) { 19316 write_leader_and_string( $leading_space_count, $group_lines[$i], 0, 19317 $outdent_long_lines, "" ); 19318 } 19319 } 19320 19321 # handle a group of code lines 19322 else { 19323 19324 VALIGN_DEBUG_FLAG_APPEND0 && do { 19325 my $group_list_type = $group_lines[0]->get_list_type(); 19326 my ( $a, $b, $c ) = caller(); 19327 my $maximum_field_index = $group_lines[0]->get_jmax(); 19328 print 19329"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n"; 19330 19331 }; 19332 19333 # some small groups are best left unaligned 19334 my $do_not_align = decide_if_aligned(); 19335 19336 # optimize side comment location 19337 $do_not_align = adjust_side_comment($do_not_align); 19338 19339 # recover spaces for -lp option if possible 19340 my $extra_leading_spaces = get_extra_leading_spaces(); 19341 19342 # all lines of this group have the same basic leading spacing 19343 my $group_leader_length = $group_lines[0]->get_leading_space_count(); 19344 19345 # add extra leading spaces if helpful 19346 my $min_ci_gap = improve_continuation_indentation( $do_not_align, 19347 $group_leader_length ); 19348 19349 # loop to output all lines 19350 for my $i ( 0 .. $maximum_line_index ) { 19351 my $line = $group_lines[$i]; 19352 write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align, 19353 $group_leader_length, $extra_leading_spaces ); 19354 } 19355 } 19356 initialize_for_new_group(); 19357} 19358 19359sub decide_if_aligned { 19360 19361 # Do not try to align two lines which are not really similar 19362 return unless $maximum_line_index == 1; 19363 return if ($is_matching_terminal_line); 19364 19365 my $group_list_type = $group_lines[0]->get_list_type(); 19366 19367 my $do_not_align = ( 19368 19369 # always align lists 19370 !$group_list_type 19371 19372 && ( 19373 19374 # don't align if it was just a marginal match 19375 $marginal_match 19376 19377 # don't align two lines with big gap 19378 || $group_maximum_gap > 12 19379 19380 # or lines with differing number of alignment tokens 19381 # TODO: this could be improved. It occasionally rejects 19382 # good matches. 19383 || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen 19384 ) 19385 ); 19386 19387 # But try to convert them into a simple comment group if the first line 19388 # a has side comment 19389 my $rfields = $group_lines[0]->get_rfields(); 19390 my $maximum_field_index = $group_lines[0]->get_jmax(); 19391 if ( $do_not_align 19392 && ( $maximum_line_index > 0 ) 19393 && ( length( $$rfields[$maximum_field_index] ) > 0 ) ) 19394 { 19395 combine_fields(); 19396 $do_not_align = 0; 19397 } 19398 return $do_not_align; 19399} 19400 19401sub adjust_side_comment { 19402 19403 my $do_not_align = shift; 19404 19405 # let's see if we can move the side comment field out a little 19406 # to improve readability (the last field is always a side comment field) 19407 my $have_side_comment = 0; 19408 my $first_side_comment_line = -1; 19409 my $maximum_field_index = $group_lines[0]->get_jmax(); 19410 for my $i ( 0 .. $maximum_line_index ) { 19411 my $line = $group_lines[$i]; 19412 19413 if ( length( $line->get_rfields()->[$maximum_field_index] ) ) { 19414 $have_side_comment = 1; 19415 $first_side_comment_line = $i; 19416 last; 19417 } 19418 } 19419 19420 my $kmax = $maximum_field_index + 1; 19421 19422 if ($have_side_comment) { 19423 19424 my $line = $group_lines[0]; 19425 19426 # the maximum space without exceeding the line length: 19427 my $avail = $line->get_available_space_on_right(); 19428 19429 # try to use the previous comment column 19430 my $side_comment_column = $line->get_column( $kmax - 2 ); 19431 my $move = $last_comment_column - $side_comment_column; 19432 19433## my $sc_line0 = $side_comment_history[0]->[0]; 19434## my $sc_col0 = $side_comment_history[0]->[1]; 19435## my $sc_line1 = $side_comment_history[1]->[0]; 19436## my $sc_col1 = $side_comment_history[1]->[1]; 19437## my $sc_line2 = $side_comment_history[2]->[0]; 19438## my $sc_col2 = $side_comment_history[2]->[1]; 19439## 19440## # FUTURE UPDATES: 19441## # Be sure to ignore 'do not align' and '} # end comments' 19442## # Find first $move > 0 and $move <= $avail as follows: 19443## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12 19444## # 2. try sc_col2 if (line-sc_line2) < 12 19445## # 3. try min possible space, plus up to 8, 19446## # 4. try min possible space 19447 19448 if ( $kmax > 0 && !$do_not_align ) { 19449 19450 # but if this doesn't work, give up and use the minimum space 19451 if ( $move > $avail ) { 19452 $move = $rOpts_minimum_space_to_comment - 1; 19453 } 19454 19455 # but we want some minimum space to the comment 19456 my $min_move = $rOpts_minimum_space_to_comment - 1; 19457 if ( $move >= 0 19458 && $last_side_comment_length > 0 19459 && ( $first_side_comment_line == 0 ) 19460 && $group_level == $last_group_level_written ) 19461 { 19462 $min_move = 0; 19463 } 19464 19465 if ( $move < $min_move ) { 19466 $move = $min_move; 19467 } 19468 19469 # prevously, an upper bound was placed on $move here, 19470 # (maximum_space_to_comment), but it was not helpful 19471 19472 # don't exceed the available space 19473 if ( $move > $avail ) { $move = $avail } 19474 19475 # we can only increase space, never decrease 19476 if ( $move > 0 ) { 19477 $line->increase_field_width( $maximum_field_index - 1, $move ); 19478 } 19479 19480 # remember this column for the next group 19481 $last_comment_column = $line->get_column( $kmax - 2 ); 19482 } 19483 else { 19484 19485 # try to at least line up the existing side comment location 19486 if ( $kmax > 0 && $move > 0 && $move < $avail ) { 19487 $line->increase_field_width( $maximum_field_index - 1, $move ); 19488 $do_not_align = 0; 19489 } 19490 19491 # reset side comment column if we can't align 19492 else { 19493 forget_side_comment(); 19494 } 19495 } 19496 } 19497 return $do_not_align; 19498} 19499 19500sub improve_continuation_indentation { 19501 my ( $do_not_align, $group_leader_length ) = @_; 19502 19503 # See if we can increase the continuation indentation 19504 # to move all continuation lines closer to the next field 19505 # (unless it is a comment). 19506 # 19507 # '$min_ci_gap'is the extra indentation that we may need to introduce. 19508 # We will only introduce this to fields which already have some ci. 19509 # Without this variable, we would occasionally get something like this 19510 # (Complex.pm): 19511 # 19512 # use overload '+' => \&plus, 19513 # '-' => \&minus, 19514 # '*' => \&multiply, 19515 # ... 19516 # 'tan' => \&tan, 19517 # 'atan2' => \&atan2, 19518 # 19519 # Whereas with this variable, we can shift variables over to get this: 19520 # 19521 # use overload '+' => \&plus, 19522 # '-' => \&minus, 19523 # '*' => \&multiply, 19524 # ... 19525 # 'tan' => \&tan, 19526 # 'atan2' => \&atan2, 19527 19528 ## BUB: Deactivated#################### 19529 # The trouble with this patch is that it may, for example, 19530 # move in some 'or's or ':'s, and leave some out, so that the 19531 # left edge alignment suffers. 19532 return 0; 19533 ########################################### 19534 19535 my $maximum_field_index = $group_lines[0]->get_jmax(); 19536 19537 my $min_ci_gap = $rOpts_maximum_line_length; 19538 if ( $maximum_field_index > 1 && !$do_not_align ) { 19539 19540 for my $i ( 0 .. $maximum_line_index ) { 19541 my $line = $group_lines[$i]; 19542 my $leading_space_count = $line->get_leading_space_count(); 19543 my $rfields = $line->get_rfields(); 19544 19545 my $gap = 19546 $line->get_column(0) - 19547 $leading_space_count - 19548 length( $$rfields[0] ); 19549 19550 if ( $leading_space_count > $group_leader_length ) { 19551 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap } 19552 } 19553 } 19554 19555 if ( $min_ci_gap >= $rOpts_maximum_line_length ) { 19556 $min_ci_gap = 0; 19557 } 19558 } 19559 else { 19560 $min_ci_gap = 0; 19561 } 19562 return $min_ci_gap; 19563} 19564 19565sub write_vertically_aligned_line { 19566 19567 my ( $line, $min_ci_gap, $do_not_align, $group_leader_length, 19568 $extra_leading_spaces ) 19569 = @_; 19570 my $rfields = $line->get_rfields(); 19571 my $leading_space_count = $line->get_leading_space_count(); 19572 my $outdent_long_lines = $line->get_outdent_long_lines(); 19573 my $maximum_field_index = $line->get_jmax(); 19574 my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags(); 19575 19576 # add any extra spaces 19577 if ( $leading_space_count > $group_leader_length ) { 19578 $leading_space_count += $min_ci_gap; 19579 } 19580 19581 my $str = $$rfields[0]; 19582 19583 # loop to concatenate all fields of this line and needed padding 19584 my $total_pad_count = 0; 19585 my ( $j, $pad ); 19586 for $j ( 1 .. $maximum_field_index ) { 19587 19588 # skip zero-length side comments 19589 last 19590 if ( ( $j == $maximum_field_index ) 19591 && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) ) 19592 ); 19593 19594 # compute spaces of padding before this field 19595 my $col = $line->get_column( $j - 1 ); 19596 $pad = $col - ( length($str) + $leading_space_count ); 19597 19598 if ($do_not_align) { 19599 $pad = 19600 ( $j < $maximum_field_index ) 19601 ? 0 19602 : $rOpts_minimum_space_to_comment - 1; 19603 } 19604 19605 # if the -fpsc flag is set, move the side comment to the selected 19606 # column if and only if it is possible, ignoring constraints on 19607 # line length and minimum space to comment 19608 if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index ) 19609 { 19610 my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1; 19611 if ( $newpad >= 0 ) { $pad = $newpad; } 19612 } 19613 19614 # accumulate the padding 19615 if ( $pad > 0 ) { $total_pad_count += $pad; } 19616 19617 # add this field 19618 if ( !defined $$rfields[$j] ) { 19619 write_diagnostics("UNDEFined field at j=$j\n"); 19620 } 19621 19622 # only add padding when we have a finite field; 19623 # this avoids extra terminal spaces if we have empty fields 19624 if ( length( $$rfields[$j] ) > 0 ) { 19625 $str .= ' ' x $total_pad_count; 19626 $total_pad_count = 0; 19627 $str .= $$rfields[$j]; 19628 } 19629 else { 19630 $total_pad_count = 0; 19631 } 19632 19633 # update side comment history buffer 19634 if ( $j == $maximum_field_index ) { 19635 my $lineno = $file_writer_object->get_output_line_number(); 19636 shift @side_comment_history; 19637 push @side_comment_history, [ $lineno, $col ]; 19638 } 19639 } 19640 19641 my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) ); 19642 19643 # ship this line off 19644 write_leader_and_string( $leading_space_count + $extra_leading_spaces, 19645 $str, $side_comment_length, $outdent_long_lines, 19646 $rvertical_tightness_flags ); 19647} 19648 19649sub get_extra_leading_spaces { 19650 19651 #---------------------------------------------------------- 19652 # Define any extra indentation space (for the -lp option). 19653 # Here is why: 19654 # If a list has side comments, sub scan_list must dump the 19655 # list before it sees everything. When this happens, it sets 19656 # the indentation to the standard scheme, but notes how 19657 # many spaces it would have liked to use. We may be able 19658 # to recover that space here in the event that that all of the 19659 # lines of a list are back together again. 19660 #---------------------------------------------------------- 19661 19662 my $extra_leading_spaces = 0; 19663 if ($extra_indent_ok) { 19664 my $object = $group_lines[0]->get_indentation(); 19665 if ( ref($object) ) { 19666 my $extra_indentation_spaces_wanted = 19667 get_RECOVERABLE_SPACES($object); 19668 19669 # all indentation objects must be the same 19670 my $i; 19671 for $i ( 1 .. $maximum_line_index ) { 19672 if ( $object != $group_lines[$i]->get_indentation() ) { 19673 $extra_indentation_spaces_wanted = 0; 19674 last; 19675 } 19676 } 19677 19678 if ($extra_indentation_spaces_wanted) { 19679 19680 # the maximum space without exceeding the line length: 19681 my $avail = $group_lines[0]->get_available_space_on_right(); 19682 $extra_leading_spaces = 19683 ( $avail > $extra_indentation_spaces_wanted ) 19684 ? $extra_indentation_spaces_wanted 19685 : $avail; 19686 19687 # update the indentation object because with -icp the terminal 19688 # ');' will use the same adjustment. 19689 $object->permanently_decrease_AVAILABLE_SPACES( 19690 -$extra_leading_spaces ); 19691 } 19692 } 19693 } 19694 return $extra_leading_spaces; 19695} 19696 19697sub combine_fields { 19698 19699 # combine all fields except for the comment field ( sidecmt.t ) 19700 # Uses global variables: 19701 # @group_lines 19702 # $maximum_line_index 19703 my ( $j, $k ); 19704 my $maximum_field_index = $group_lines[0]->get_jmax(); 19705 for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) { 19706 my $line = $group_lines[$j]; 19707 my $rfields = $line->get_rfields(); 19708 foreach ( 1 .. $maximum_field_index - 1 ) { 19709 $$rfields[0] .= $$rfields[$_]; 19710 } 19711 $$rfields[1] = $$rfields[$maximum_field_index]; 19712 19713 $line->set_jmax(1); 19714 $line->set_column( 0, 0 ); 19715 $line->set_column( 1, 0 ); 19716 19717 } 19718 $maximum_field_index = 1; 19719 19720 for $j ( 0 .. $maximum_line_index ) { 19721 my $line = $group_lines[$j]; 19722 my $rfields = $line->get_rfields(); 19723 for $k ( 0 .. $maximum_field_index ) { 19724 my $pad = length( $$rfields[$k] ) - $line->current_field_width($k); 19725 if ( $k == 0 ) { 19726 $pad += $group_lines[$j]->get_leading_space_count(); 19727 } 19728 19729 if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) } 19730 19731 } 19732 } 19733} 19734 19735sub get_output_line_number { 19736 19737 # the output line number reported to a caller is the number of items 19738 # written plus the number of items in the buffer 19739 my $self = shift; 19740 1 + $maximum_line_index + $file_writer_object->get_output_line_number(); 19741} 19742 19743sub write_leader_and_string { 19744 19745 my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines, 19746 $rvertical_tightness_flags ) 19747 = @_; 19748 19749 # handle outdenting of long lines: 19750 if ($outdent_long_lines) { 19751 my $excess = 19752 length($str) - 19753 $side_comment_length + 19754 $leading_space_count - 19755 $rOpts_maximum_line_length; 19756 if ( $excess > 0 ) { 19757 $leading_space_count = 0; 19758 $last_outdented_line_at = 19759 $file_writer_object->get_output_line_number(); 19760 19761 unless ($outdented_line_count) { 19762 $first_outdented_line_at = $last_outdented_line_at; 19763 } 19764 $outdented_line_count++; 19765 } 19766 } 19767 19768 # Make preliminary leading whitespace. It could get changed 19769 # later by entabbing, so we have to keep track of any changes 19770 # to the leading_space_count from here on. 19771 my $leading_string = 19772 $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : ""; 19773 19774 # Unpack any recombination data; it was packed by 19775 # sub send_lines_to_vertical_aligner. Contents: 19776 # 19777 # [0] type: 1=opening 2=closing 3=opening block brace 19778 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok 19779 # if closing: spaces of padding to use 19780 # [2] sequence number of container 19781 # [3] valid flag: do not append if this flag is false 19782 # 19783 my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, 19784 $seqno_end ); 19785 if ($rvertical_tightness_flags) { 19786 ( 19787 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, 19788 $seqno_end 19789 ) = @{$rvertical_tightness_flags}; 19790 } 19791 19792 $seqno_string = $seqno_end; 19793 19794 # handle any cached line .. 19795 # either append this line to it or write it out 19796 if ( length($cached_line_text) ) { 19797 19798 if ( !$cached_line_valid ) { 19799 entab_and_output( $cached_line_text, 19800 $cached_line_leading_space_count, 19801 $last_group_level_written ); 19802 } 19803 19804 # handle cached line with opening container token 19805 elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) { 19806 19807 my $gap = $leading_space_count - length($cached_line_text); 19808 19809 # handle option of just one tight opening per line: 19810 if ( $cached_line_flag == 1 ) { 19811 if ( defined($open_or_close) && $open_or_close == 1 ) { 19812 $gap = -1; 19813 } 19814 } 19815 19816 if ( $gap >= 0 ) { 19817 $leading_string = $cached_line_text . ' ' x $gap; 19818 $leading_space_count = $cached_line_leading_space_count; 19819 $seqno_string = $cached_seqno_string . ':' . $seqno_beg; 19820 } 19821 else { 19822 entab_and_output( $cached_line_text, 19823 $cached_line_leading_space_count, 19824 $last_group_level_written ); 19825 } 19826 } 19827 19828 # handle cached line to place before this closing container token 19829 else { 19830 my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str; 19831 19832 if ( length($test_line) <= $rOpts_maximum_line_length ) { 19833 19834 $seqno_string = $cached_seqno_string . ':' . $seqno_beg; 19835 19836 # Patch to outdent closing tokens ending # in ');' 19837 # If we are joining a line like ');' to a previous stacked 19838 # set of closing tokens, then decide if we may outdent the 19839 # combined stack to the indentation of the ');'. Since we 19840 # should not normally outdent any of the other tokens more than 19841 # the indentation of the lines that contained them, we will 19842 # only do this if all of the corresponding opening 19843 # tokens were on the same line. This can happen with 19844 # -sot and -sct. For example, it is ok here: 19845 # __PACKAGE__->load_components( qw( 19846 # PK::Auto 19847 # Core 19848 # )); 19849 # 19850 # But, for example, we do not outdent in this example because 19851 # that would put the closing sub brace out farther than the 19852 # opening sub brace: 19853 # 19854 # perltidy -sot -sct 19855 # $c->Tk::bind( 19856 # '<Control-f>' => sub { 19857 # my ($c) = @_; 19858 # my $e = $c->XEvent; 19859 # itemsUnderArea $c; 19860 # } ); 19861 # 19862 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) { 19863 19864 # The way to tell this is if the stacked sequence numbers 19865 # of this output line are the reverse of the stacked 19866 # sequence numbers of the previous non-blank line of 19867 # sequence numbers. So we can join if the previous 19868 # nonblank string of tokens is the mirror image. For 19869 # example if stack )}] is 13:8:6 then we are looking for a 19870 # leading stack like [{( which is 6:8:13 We only need to 19871 # check the two ends, because the intermediate tokens must 19872 # fall in order. Note on speed: having to split on colons 19873 # and eliminate multiple colons might appear to be slow, 19874 # but it's not an issue because we almost never come 19875 # through here. In a typical file we don't. 19876 $seqno_string =~ s/^:+//; 19877 $last_nonblank_seqno_string =~ s/^:+//; 19878 $seqno_string =~ s/:+/:/g; 19879 $last_nonblank_seqno_string =~ s/:+/:/g; 19880 19881 # how many spaces can we outdent? 19882 my $diff = 19883 $cached_line_leading_space_count - $leading_space_count; 19884 if ( $diff > 0 19885 && length($seqno_string) 19886 && length($last_nonblank_seqno_string) == 19887 length($seqno_string) ) 19888 { 19889 my @seqno_last = 19890 ( split ':', $last_nonblank_seqno_string ); 19891 my @seqno_now = ( split ':', $seqno_string ); 19892 if ( $seqno_now[-1] == $seqno_last[0] 19893 && $seqno_now[0] == $seqno_last[-1] ) 19894 { 19895 19896 # OK to outdent .. 19897 # for absolute safety, be sure we only remove 19898 # whitespace 19899 my $ws = substr( $test_line, 0, $diff ); 19900 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { 19901 19902 $test_line = substr( $test_line, $diff ); 19903 $cached_line_leading_space_count -= $diff; 19904 } 19905 19906 # shouldn't happen, but not critical: 19907 ##else { 19908 ## ERROR transferring indentation here 19909 ##} 19910 } 19911 } 19912 } 19913 19914 $str = $test_line; 19915 $leading_string = ""; 19916 $leading_space_count = $cached_line_leading_space_count; 19917 } 19918 else { 19919 entab_and_output( $cached_line_text, 19920 $cached_line_leading_space_count, 19921 $last_group_level_written ); 19922 } 19923 } 19924 } 19925 $cached_line_type = 0; 19926 $cached_line_text = ""; 19927 19928 # make the line to be written 19929 my $line = $leading_string . $str; 19930 19931 # write or cache this line 19932 if ( !$open_or_close || $side_comment_length > 0 ) { 19933 entab_and_output( $line, $leading_space_count, $group_level ); 19934 } 19935 else { 19936 $cached_line_text = $line; 19937 $cached_line_type = $open_or_close; 19938 $cached_line_flag = $tightness_flag; 19939 $cached_seqno = $seqno; 19940 $cached_line_valid = $valid; 19941 $cached_line_leading_space_count = $leading_space_count; 19942 $cached_seqno_string = $seqno_string; 19943 } 19944 19945 $last_group_level_written = $group_level; 19946 $last_side_comment_length = $side_comment_length; 19947 $extra_indent_ok = 0; 19948} 19949 19950sub entab_and_output { 19951 my ( $line, $leading_space_count, $level ) = @_; 19952 19953 # The line is currently correct if there is no tabbing (recommended!) 19954 # We may have to lop off some leading spaces and replace with tabs. 19955 if ( $leading_space_count > 0 ) { 19956 19957 # Nothing to do if no tabs 19958 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) 19959 || $rOpts_indent_columns <= 0 ) 19960 { 19961 19962 # nothing to do 19963 } 19964 19965 # Handle entab option 19966 elsif ($rOpts_entab_leading_whitespace) { 19967 my $space_count = 19968 $leading_space_count % $rOpts_entab_leading_whitespace; 19969 my $tab_count = 19970 int( $leading_space_count / $rOpts_entab_leading_whitespace ); 19971 my $leading_string = "\t" x $tab_count . ' ' x $space_count; 19972 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { 19973 substr( $line, 0, $leading_space_count ) = $leading_string; 19974 } 19975 else { 19976 19977 # REMOVE AFTER TESTING 19978 # shouldn't happen - program error counting whitespace 19979 # we'll skip entabbing 19980 warning( 19981"Error entabbing in entab_and_output: expected count=$leading_space_count\n" 19982 ); 19983 } 19984 } 19985 19986 # Handle option of one tab per level 19987 else { 19988 my $leading_string = ( "\t" x $level ); 19989 my $space_count = 19990 $leading_space_count - $level * $rOpts_indent_columns; 19991 19992 # shouldn't happen: 19993 if ( $space_count < 0 ) { 19994 warning( 19995"Error entabbing in append_line: for level=$group_level count=$leading_space_count\n" 19996 ); 19997 $leading_string = ( ' ' x $leading_space_count ); 19998 } 19999 else { 20000 $leading_string .= ( ' ' x $space_count ); 20001 } 20002 if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { 20003 substr( $line, 0, $leading_space_count ) = $leading_string; 20004 } 20005 else { 20006 20007 # REMOVE AFTER TESTING 20008 # shouldn't happen - program error counting whitespace 20009 # we'll skip entabbing 20010 warning( 20011"Error entabbing in entab_and_output: expected count=$leading_space_count\n" 20012 ); 20013 } 20014 } 20015 } 20016 $file_writer_object->write_code_line( $line . "\n" ); 20017 if ($seqno_string) { 20018 $last_nonblank_seqno_string = $seqno_string; 20019 } 20020} 20021 20022{ # begin get_leading_string 20023 20024 my @leading_string_cache; 20025 20026 sub get_leading_string { 20027 20028 # define the leading whitespace string for this line.. 20029 my $leading_whitespace_count = shift; 20030 20031 # Handle case of zero whitespace, which includes multi-line quotes 20032 # (which may have a finite level; this prevents tab problems) 20033 if ( $leading_whitespace_count <= 0 ) { 20034 return ""; 20035 } 20036 20037 # look for previous result 20038 elsif ( $leading_string_cache[$leading_whitespace_count] ) { 20039 return $leading_string_cache[$leading_whitespace_count]; 20040 } 20041 20042 # must compute a string for this number of spaces 20043 my $leading_string; 20044 20045 # Handle simple case of no tabs 20046 if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) 20047 || $rOpts_indent_columns <= 0 ) 20048 { 20049 $leading_string = ( ' ' x $leading_whitespace_count ); 20050 } 20051 20052 # Handle entab option 20053 elsif ($rOpts_entab_leading_whitespace) { 20054 my $space_count = 20055 $leading_whitespace_count % $rOpts_entab_leading_whitespace; 20056 my $tab_count = int( 20057 $leading_whitespace_count / $rOpts_entab_leading_whitespace ); 20058 $leading_string = "\t" x $tab_count . ' ' x $space_count; 20059 } 20060 20061 # Handle option of one tab per level 20062 else { 20063 $leading_string = ( "\t" x $group_level ); 20064 my $space_count = 20065 $leading_whitespace_count - $group_level * $rOpts_indent_columns; 20066 20067 # shouldn't happen: 20068 if ( $space_count < 0 ) { 20069 warning( 20070"Error in append_line: for level=$group_level count=$leading_whitespace_count\n" 20071 ); 20072 $leading_string = ( ' ' x $leading_whitespace_count ); 20073 } 20074 else { 20075 $leading_string .= ( ' ' x $space_count ); 20076 } 20077 } 20078 $leading_string_cache[$leading_whitespace_count] = $leading_string; 20079 return $leading_string; 20080 } 20081} # end get_leading_string 20082 20083sub report_anything_unusual { 20084 my $self = shift; 20085 if ( $outdented_line_count > 0 ) { 20086 write_logfile_entry( 20087 "$outdented_line_count long lines were outdented:\n"); 20088 write_logfile_entry( 20089 " First at output line $first_outdented_line_at\n"); 20090 20091 if ( $outdented_line_count > 1 ) { 20092 write_logfile_entry( 20093 " Last at output line $last_outdented_line_at\n"); 20094 } 20095 write_logfile_entry( 20096 " use -noll to prevent outdenting, -l=n to increase line length\n" 20097 ); 20098 write_logfile_entry("\n"); 20099 } 20100} 20101 20102##################################################################### 20103# 20104# the Perl::Tidy::FileWriter class writes the output file 20105# 20106##################################################################### 20107 20108package Perl::Tidy::FileWriter; 20109 20110# Maximum number of little messages; probably need not be changed. 20111use constant MAX_NAG_MESSAGES => 6; 20112 20113sub write_logfile_entry { 20114 my $self = shift; 20115 my $logger_object = $self->{_logger_object}; 20116 if ($logger_object) { 20117 $logger_object->write_logfile_entry(@_); 20118 } 20119} 20120 20121sub new { 20122 my $class = shift; 20123 my ( $line_sink_object, $rOpts, $logger_object ) = @_; 20124 20125 bless { 20126 _line_sink_object => $line_sink_object, 20127 _logger_object => $logger_object, 20128 _rOpts => $rOpts, 20129 _output_line_number => 1, 20130 _consecutive_blank_lines => 0, 20131 _consecutive_nonblank_lines => 0, 20132 _first_line_length_error => 0, 20133 _max_line_length_error => 0, 20134 _last_line_length_error => 0, 20135 _first_line_length_error_at => 0, 20136 _max_line_length_error_at => 0, 20137 _last_line_length_error_at => 0, 20138 _line_length_error_count => 0, 20139 _max_output_line_length => 0, 20140 _max_output_line_length_at => 0, 20141 }, $class; 20142} 20143 20144sub tee_on { 20145 my $self = shift; 20146 $self->{_line_sink_object}->tee_on(); 20147} 20148 20149sub tee_off { 20150 my $self = shift; 20151 $self->{_line_sink_object}->tee_off(); 20152} 20153 20154sub get_output_line_number { 20155 my $self = shift; 20156 return $self->{_output_line_number}; 20157} 20158 20159sub decrement_output_line_number { 20160 my $self = shift; 20161 $self->{_output_line_number}--; 20162} 20163 20164sub get_consecutive_nonblank_lines { 20165 my $self = shift; 20166 return $self->{_consecutive_nonblank_lines}; 20167} 20168 20169sub reset_consecutive_blank_lines { 20170 my $self = shift; 20171 $self->{_consecutive_blank_lines} = 0; 20172} 20173 20174sub want_blank_line { 20175 my $self = shift; 20176 unless ( $self->{_consecutive_blank_lines} ) { 20177 $self->write_blank_code_line(); 20178 } 20179} 20180 20181sub write_blank_code_line { 20182 my $self = shift; 20183 my $rOpts = $self->{_rOpts}; 20184 return 20185 if ( $self->{_consecutive_blank_lines} >= 20186 $rOpts->{'maximum-consecutive-blank-lines'} ); 20187 $self->{_consecutive_blank_lines}++; 20188 $self->{_consecutive_nonblank_lines} = 0; 20189 $self->write_line("\n"); 20190} 20191 20192sub write_code_line { 20193 my $self = shift; 20194 my $a = shift; 20195 20196 if ( $a =~ /^\s*$/ ) { 20197 my $rOpts = $self->{_rOpts}; 20198 return 20199 if ( $self->{_consecutive_blank_lines} >= 20200 $rOpts->{'maximum-consecutive-blank-lines'} ); 20201 $self->{_consecutive_blank_lines}++; 20202 $self->{_consecutive_nonblank_lines} = 0; 20203 } 20204 else { 20205 $self->{_consecutive_blank_lines} = 0; 20206 $self->{_consecutive_nonblank_lines}++; 20207 } 20208 $self->write_line($a); 20209} 20210 20211sub write_line { 20212 my $self = shift; 20213 my $a = shift; 20214 20215 # TODO: go through and see if the test is necessary here 20216 if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; } 20217 20218 $self->{_line_sink_object}->write_line($a); 20219 20220 # This calculation of excess line length ignores any internal tabs 20221 my $rOpts = $self->{_rOpts}; 20222 my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1; 20223 if ( $a =~ /^\t+/g ) { 20224 $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 ); 20225 } 20226 20227 # Note that we just incremented output line number to future value 20228 # so we must subtract 1 for current line number 20229 if ( length($a) > 1 + $self->{_max_output_line_length} ) { 20230 $self->{_max_output_line_length} = length($a) - 1; 20231 $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1; 20232 } 20233 20234 if ( $exceed > 0 ) { 20235 my $output_line_number = $self->{_output_line_number}; 20236 $self->{_last_line_length_error} = $exceed; 20237 $self->{_last_line_length_error_at} = $output_line_number - 1; 20238 if ( $self->{_line_length_error_count} == 0 ) { 20239 $self->{_first_line_length_error} = $exceed; 20240 $self->{_first_line_length_error_at} = $output_line_number - 1; 20241 } 20242 20243 if ( 20244 $self->{_last_line_length_error} > $self->{_max_line_length_error} ) 20245 { 20246 $self->{_max_line_length_error} = $exceed; 20247 $self->{_max_line_length_error_at} = $output_line_number - 1; 20248 } 20249 20250 if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) { 20251 $self->write_logfile_entry( 20252 "Line length exceeded by $exceed characters\n"); 20253 } 20254 $self->{_line_length_error_count}++; 20255 } 20256 20257} 20258 20259sub report_line_length_errors { 20260 my $self = shift; 20261 my $rOpts = $self->{_rOpts}; 20262 my $line_length_error_count = $self->{_line_length_error_count}; 20263 if ( $line_length_error_count == 0 ) { 20264 $self->write_logfile_entry( 20265 "No lines exceeded $rOpts->{'maximum-line-length'} characters\n"); 20266 my $max_output_line_length = $self->{_max_output_line_length}; 20267 my $max_output_line_length_at = $self->{_max_output_line_length_at}; 20268 $self->write_logfile_entry( 20269" Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n" 20270 ); 20271 20272 } 20273 else { 20274 20275 my $word = ( $line_length_error_count > 1 ) ? "s" : ""; 20276 $self->write_logfile_entry( 20277"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n" 20278 ); 20279 20280 $word = ( $line_length_error_count > 1 ) ? "First" : ""; 20281 my $first_line_length_error = $self->{_first_line_length_error}; 20282 my $first_line_length_error_at = $self->{_first_line_length_error_at}; 20283 $self->write_logfile_entry( 20284" $word at line $first_line_length_error_at by $first_line_length_error characters\n" 20285 ); 20286 20287 if ( $line_length_error_count > 1 ) { 20288 my $max_line_length_error = $self->{_max_line_length_error}; 20289 my $max_line_length_error_at = $self->{_max_line_length_error_at}; 20290 my $last_line_length_error = $self->{_last_line_length_error}; 20291 my $last_line_length_error_at = $self->{_last_line_length_error_at}; 20292 $self->write_logfile_entry( 20293" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n" 20294 ); 20295 $self->write_logfile_entry( 20296" Last at line $last_line_length_error_at by $last_line_length_error characters\n" 20297 ); 20298 } 20299 } 20300} 20301 20302##################################################################### 20303# 20304# The Perl::Tidy::Debugger class shows line tokenization 20305# 20306##################################################################### 20307 20308package Perl::Tidy::Debugger; 20309 20310sub new { 20311 20312 my ( $class, $filename ) = @_; 20313 20314 bless { 20315 _debug_file => $filename, 20316 _debug_file_opened => 0, 20317 _fh => undef, 20318 }, $class; 20319} 20320 20321sub really_open_debug_file { 20322 20323 my $self = shift; 20324 my $debug_file = $self->{_debug_file}; 20325 my $fh; 20326 unless ( $fh = IO::File->new("> $debug_file") ) { 20327 warn("can't open $debug_file: $!\n"); 20328 } 20329 $self->{_debug_file_opened} = 1; 20330 $self->{_fh} = $fh; 20331 print $fh 20332 "Use -dump-token-types (-dtt) to get a list of token type codes\n"; 20333} 20334 20335sub close_debug_file { 20336 20337 my $self = shift; 20338 my $fh = $self->{_fh}; 20339 if ( $self->{_debug_file_opened} ) { 20340 20341 eval { $self->{_fh}->close() }; 20342 } 20343} 20344 20345sub write_debug_entry { 20346 20347 # This is a debug dump routine which may be modified as necessary 20348 # to dump tokens on a line-by-line basis. The output will be written 20349 # to the .DEBUG file when the -D flag is entered. 20350 my $self = shift; 20351 my $line_of_tokens = shift; 20352 20353 my $input_line = $line_of_tokens->{_line_text}; 20354 my $rtoken_type = $line_of_tokens->{_rtoken_type}; 20355 my $rtokens = $line_of_tokens->{_rtokens}; 20356 my $rlevels = $line_of_tokens->{_rlevels}; 20357 my $rslevels = $line_of_tokens->{_rslevels}; 20358 my $rblock_type = $line_of_tokens->{_rblock_type}; 20359 my $input_line_number = $line_of_tokens->{_line_number}; 20360 my $line_type = $line_of_tokens->{_line_type}; 20361 20362 my ( $j, $num ); 20363 20364 my $token_str = "$input_line_number: "; 20365 my $reconstructed_original = "$input_line_number: "; 20366 my $block_str = "$input_line_number: "; 20367 20368 #$token_str .= "$line_type: "; 20369 #$reconstructed_original .= "$line_type: "; 20370 20371 my $pattern = ""; 20372 my @next_char = ( '"', '"' ); 20373 my $i_next = 0; 20374 unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() } 20375 my $fh = $self->{_fh}; 20376 20377 for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { 20378 20379 # testing patterns 20380 if ( $$rtoken_type[$j] eq 'k' ) { 20381 $pattern .= $$rtokens[$j]; 20382 } 20383 else { 20384 $pattern .= $$rtoken_type[$j]; 20385 } 20386 $reconstructed_original .= $$rtokens[$j]; 20387 $block_str .= "($$rblock_type[$j])"; 20388 $num = length( $$rtokens[$j] ); 20389 my $type_str = $$rtoken_type[$j]; 20390 20391 # be sure there are no blank tokens (shouldn't happen) 20392 # This can only happen if a programming error has been made 20393 # because all valid tokens are non-blank 20394 if ( $type_str eq ' ' ) { 20395 print $fh "BLANK TOKEN on the next line\n"; 20396 $type_str = $next_char[$i_next]; 20397 $i_next = 1 - $i_next; 20398 } 20399 20400 if ( length($type_str) == 1 ) { 20401 $type_str = $type_str x $num; 20402 } 20403 $token_str .= $type_str; 20404 } 20405 20406 # Write what you want here ... 20407 # print $fh "$input_line\n"; 20408 # print $fh "$pattern\n"; 20409 print $fh "$reconstructed_original\n"; 20410 print $fh "$token_str\n"; 20411 20412 #print $fh "$block_str\n"; 20413} 20414 20415##################################################################### 20416# 20417# The Perl::Tidy::LineBuffer class supplies a 'get_line()' 20418# method for returning the next line to be parsed, as well as a 20419# 'peek_ahead()' method 20420# 20421# The input parameter is an object with a 'get_line()' method 20422# which returns the next line to be parsed 20423# 20424##################################################################### 20425 20426package Perl::Tidy::LineBuffer; 20427 20428sub new { 20429 20430 my $class = shift; 20431 my $line_source_object = shift; 20432 20433 return bless { 20434 _line_source_object => $line_source_object, 20435 _rlookahead_buffer => [], 20436 }, $class; 20437} 20438 20439sub peek_ahead { 20440 my $self = shift; 20441 my $buffer_index = shift; 20442 my $line = undef; 20443 my $line_source_object = $self->{_line_source_object}; 20444 my $rlookahead_buffer = $self->{_rlookahead_buffer}; 20445 if ( $buffer_index < scalar(@$rlookahead_buffer) ) { 20446 $line = $$rlookahead_buffer[$buffer_index]; 20447 } 20448 else { 20449 $line = $line_source_object->get_line(); 20450 push( @$rlookahead_buffer, $line ); 20451 } 20452 return $line; 20453} 20454 20455sub get_line { 20456 my $self = shift; 20457 my $line = undef; 20458 my $line_source_object = $self->{_line_source_object}; 20459 my $rlookahead_buffer = $self->{_rlookahead_buffer}; 20460 20461 if ( scalar(@$rlookahead_buffer) ) { 20462 $line = shift @$rlookahead_buffer; 20463 } 20464 else { 20465 $line = $line_source_object->get_line(); 20466 } 20467 return $line; 20468} 20469 20470######################################################################## 20471# 20472# the Perl::Tidy::Tokenizer package is essentially a filter which 20473# reads lines of perl source code from a source object and provides 20474# corresponding tokenized lines through its get_line() method. Lines 20475# flow from the source_object to the caller like this: 20476# 20477# source_object --> LineBuffer_object --> Tokenizer --> calling routine 20478# get_line() get_line() get_line() line_of_tokens 20479# 20480# The source object can be any object with a get_line() method which 20481# supplies one line (a character string) perl call. 20482# The LineBuffer object is created by the Tokenizer. 20483# The Tokenizer returns a reference to a data structure 'line_of_tokens' 20484# containing one tokenized line for each call to its get_line() method. 20485# 20486# WARNING: This is not a real class yet. Only one tokenizer my be used. 20487# 20488######################################################################## 20489 20490package Perl::Tidy::Tokenizer; 20491 20492BEGIN { 20493 20494 # Caution: these debug flags produce a lot of output 20495 # They should all be 0 except when debugging small scripts 20496 20497 use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0; 20498 use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0; 20499 use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0; 20500 use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0; 20501 use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0; 20502 20503 my $debug_warning = sub { 20504 print "TOKENIZER_DEBUGGING with key $_[0]\n"; 20505 }; 20506 20507 TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT'); 20508 TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN'); 20509 TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE'); 20510 TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID'); 20511 TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE'); 20512 20513} 20514 20515use Carp; 20516 20517# PACKAGE VARIABLES for for processing an entire FILE. 20518use vars qw{ 20519 $tokenizer_self 20520 20521 $last_nonblank_token 20522 $last_nonblank_type 20523 $last_nonblank_block_type 20524 $statement_type 20525 $in_attribute_list 20526 $current_package 20527 $context 20528 20529 %is_constant 20530 %is_user_function 20531 %user_function_prototype 20532 %is_block_function 20533 %is_block_list_function 20534 %saw_function_definition 20535 20536 $brace_depth 20537 $paren_depth 20538 $square_bracket_depth 20539 20540 @current_depth 20541 @total_depth 20542 $total_depth 20543 @nesting_sequence_number 20544 @current_sequence_number 20545 @paren_type 20546 @paren_semicolon_count 20547 @paren_structural_type 20548 @brace_type 20549 @brace_structural_type 20550 @brace_statement_type 20551 @brace_context 20552 @brace_package 20553 @square_bracket_type 20554 @square_bracket_structural_type 20555 @depth_array 20556 @nested_ternary_flag 20557 @starting_line_of_current_depth 20558}; 20559 20560# GLOBAL CONSTANTS for routines in this package 20561use vars qw{ 20562 %is_indirect_object_taker 20563 %is_block_operator 20564 %expecting_operator_token 20565 %expecting_operator_types 20566 %expecting_term_types 20567 %expecting_term_token 20568 %is_digraph 20569 %is_file_test_operator 20570 %is_trigraph 20571 %is_valid_token_type 20572 %is_keyword 20573 %is_code_block_token 20574 %really_want_term 20575 @opening_brace_names 20576 @closing_brace_names 20577 %is_keyword_taking_list 20578 %is_q_qq_qw_qx_qr_s_y_tr_m 20579}; 20580 20581# possible values of operator_expected() 20582use constant TERM => -1; 20583use constant UNKNOWN => 0; 20584use constant OPERATOR => 1; 20585 20586# possible values of context 20587use constant SCALAR_CONTEXT => -1; 20588use constant UNKNOWN_CONTEXT => 0; 20589use constant LIST_CONTEXT => 1; 20590 20591# Maximum number of little messages; probably need not be changed. 20592use constant MAX_NAG_MESSAGES => 6; 20593 20594{ 20595 20596 # methods to count instances 20597 my $_count = 0; 20598 sub get_count { $_count; } 20599 sub _increment_count { ++$_count } 20600 sub _decrement_count { --$_count } 20601} 20602 20603sub DESTROY { 20604 $_[0]->_decrement_count(); 20605} 20606 20607sub new { 20608 20609 my $class = shift; 20610 20611 # Note: 'tabs' and 'indent_columns' are temporary and should be 20612 # removed asap 20613 my %defaults = ( 20614 source_object => undef, 20615 debugger_object => undef, 20616 diagnostics_object => undef, 20617 logger_object => undef, 20618 starting_level => undef, 20619 indent_columns => 4, 20620 tabs => 0, 20621 look_for_hash_bang => 0, 20622 trim_qw => 1, 20623 look_for_autoloader => 1, 20624 look_for_selfloader => 1, 20625 starting_line_number => 1, 20626 ); 20627 my %args = ( %defaults, @_ ); 20628 20629 # we are given an object with a get_line() method to supply source lines 20630 my $source_object = $args{source_object}; 20631 20632 # we create another object with a get_line() and peek_ahead() method 20633 my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object); 20634 20635 # Tokenizer state data is as follows: 20636 # _rhere_target_list reference to list of here-doc targets 20637 # _here_doc_target the target string for a here document 20638 # _here_quote_character the type of here-doc quoting (" ' ` or none) 20639 # to determine if interpolation is done 20640 # _quote_target character we seek if chasing a quote 20641 # _line_start_quote line where we started looking for a long quote 20642 # _in_here_doc flag indicating if we are in a here-doc 20643 # _in_pod flag set if we are in pod documentation 20644 # _in_error flag set if we saw severe error (binary in script) 20645 # _in_data flag set if we are in __DATA__ section 20646 # _in_end flag set if we are in __END__ section 20647 # _in_format flag set if we are in a format description 20648 # _in_attribute_list flag telling if we are looking for attributes 20649 # _in_quote flag telling if we are chasing a quote 20650 # _starting_level indentation level of first line 20651 # _input_tabstr string denoting one indentation level of input file 20652 # _know_input_tabstr flag indicating if we know _input_tabstr 20653 # _line_buffer_object object with get_line() method to supply source code 20654 # _diagnostics_object place to write debugging information 20655 # _unexpected_error_count error count used to limit output 20656 # _lower_case_labels_at line numbers where lower case labels seen 20657 $tokenizer_self = { 20658 _rhere_target_list => [], 20659 _in_here_doc => 0, 20660 _here_doc_target => "", 20661 _here_quote_character => "", 20662 _in_data => 0, 20663 _in_end => 0, 20664 _in_format => 0, 20665 _in_error => 0, 20666 _in_pod => 0, 20667 _in_attribute_list => 0, 20668 _in_quote => 0, 20669 _quote_target => "", 20670 _line_start_quote => -1, 20671 _starting_level => $args{starting_level}, 20672 _know_starting_level => defined( $args{starting_level} ), 20673 _tabs => $args{tabs}, 20674 _indent_columns => $args{indent_columns}, 20675 _look_for_hash_bang => $args{look_for_hash_bang}, 20676 _trim_qw => $args{trim_qw}, 20677 _input_tabstr => "", 20678 _know_input_tabstr => -1, 20679 _last_line_number => $args{starting_line_number} - 1, 20680 _saw_perl_dash_P => 0, 20681 _saw_perl_dash_w => 0, 20682 _saw_use_strict => 0, 20683 _saw_v_string => 0, 20684 _look_for_autoloader => $args{look_for_autoloader}, 20685 _look_for_selfloader => $args{look_for_selfloader}, 20686 _saw_autoloader => 0, 20687 _saw_selfloader => 0, 20688 _saw_hash_bang => 0, 20689 _saw_end => 0, 20690 _saw_data => 0, 20691 _saw_negative_indentation => 0, 20692 _started_tokenizing => 0, 20693 _line_buffer_object => $line_buffer_object, 20694 _debugger_object => $args{debugger_object}, 20695 _diagnostics_object => $args{diagnostics_object}, 20696 _logger_object => $args{logger_object}, 20697 _unexpected_error_count => 0, 20698 _started_looking_for_here_target_at => 0, 20699 _nearly_matched_here_target_at => undef, 20700 _line_text => "", 20701 _rlower_case_labels_at => undef, 20702 }; 20703 20704 prepare_for_a_new_file(); 20705 find_starting_indentation_level(); 20706 20707 bless $tokenizer_self, $class; 20708 20709 # This is not a full class yet, so die if an attempt is made to 20710 # create more than one object. 20711 20712 if ( _increment_count() > 1 ) { 20713 confess 20714"Attempt to create more than 1 object in $class, which is not a true class yet\n"; 20715 } 20716 20717 return $tokenizer_self; 20718 20719} 20720 20721# interface to Perl::Tidy::Logger routines 20722sub warning { 20723 my $logger_object = $tokenizer_self->{_logger_object}; 20724 if ($logger_object) { 20725 $logger_object->warning(@_); 20726 } 20727} 20728 20729sub complain { 20730 my $logger_object = $tokenizer_self->{_logger_object}; 20731 if ($logger_object) { 20732 $logger_object->complain(@_); 20733 } 20734} 20735 20736sub write_logfile_entry { 20737 my $logger_object = $tokenizer_self->{_logger_object}; 20738 if ($logger_object) { 20739 $logger_object->write_logfile_entry(@_); 20740 } 20741} 20742 20743sub interrupt_logfile { 20744 my $logger_object = $tokenizer_self->{_logger_object}; 20745 if ($logger_object) { 20746 $logger_object->interrupt_logfile(); 20747 } 20748} 20749 20750sub resume_logfile { 20751 my $logger_object = $tokenizer_self->{_logger_object}; 20752 if ($logger_object) { 20753 $logger_object->resume_logfile(); 20754 } 20755} 20756 20757sub increment_brace_error { 20758 my $logger_object = $tokenizer_self->{_logger_object}; 20759 if ($logger_object) { 20760 $logger_object->increment_brace_error(); 20761 } 20762} 20763 20764sub report_definite_bug { 20765 my $logger_object = $tokenizer_self->{_logger_object}; 20766 if ($logger_object) { 20767 $logger_object->report_definite_bug(); 20768 } 20769} 20770 20771sub brace_warning { 20772 my $logger_object = $tokenizer_self->{_logger_object}; 20773 if ($logger_object) { 20774 $logger_object->brace_warning(@_); 20775 } 20776} 20777 20778sub get_saw_brace_error { 20779 my $logger_object = $tokenizer_self->{_logger_object}; 20780 if ($logger_object) { 20781 $logger_object->get_saw_brace_error(); 20782 } 20783 else { 20784 0; 20785 } 20786} 20787 20788# interface to Perl::Tidy::Diagnostics routines 20789sub write_diagnostics { 20790 if ( $tokenizer_self->{_diagnostics_object} ) { 20791 $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_); 20792 } 20793} 20794 20795sub report_tokenization_errors { 20796 20797 my $self = shift; 20798 20799 my $level = get_indentation_level(); 20800 if ( $level != $tokenizer_self->{_starting_level} ) { 20801 warning("final indentation level: $level\n"); 20802 } 20803 20804 check_final_nesting_depths(); 20805 20806 if ( $tokenizer_self->{_look_for_hash_bang} 20807 && !$tokenizer_self->{_saw_hash_bang} ) 20808 { 20809 warning( 20810 "hit EOF without seeing hash-bang line; maybe don't need -x?\n"); 20811 } 20812 20813 if ( $tokenizer_self->{_in_format} ) { 20814 warning("hit EOF while in format description\n"); 20815 } 20816 20817 if ( $tokenizer_self->{_in_pod} ) { 20818 20819 # Just write log entry if this is after __END__ or __DATA__ 20820 # because this happens to often, and it is not likely to be 20821 # a parsing error. 20822 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { 20823 write_logfile_entry( 20824"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" 20825 ); 20826 } 20827 20828 else { 20829 complain( 20830"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" 20831 ); 20832 } 20833 20834 } 20835 20836 if ( $tokenizer_self->{_in_here_doc} ) { 20837 my $here_doc_target = $tokenizer_self->{_here_doc_target}; 20838 my $started_looking_for_here_target_at = 20839 $tokenizer_self->{_started_looking_for_here_target_at}; 20840 if ($here_doc_target) { 20841 warning( 20842"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" 20843 ); 20844 } 20845 else { 20846 warning( 20847"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n" 20848 ); 20849 } 20850 my $nearly_matched_here_target_at = 20851 $tokenizer_self->{_nearly_matched_here_target_at}; 20852 if ($nearly_matched_here_target_at) { 20853 warning( 20854"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" 20855 ); 20856 } 20857 } 20858 20859 if ( $tokenizer_self->{_in_quote} ) { 20860 my $line_start_quote = $tokenizer_self->{_line_start_quote}; 20861 my $quote_target = $tokenizer_self->{_quote_target}; 20862 my $what = 20863 ( $tokenizer_self->{_in_attribute_list} ) 20864 ? "attribute list" 20865 : "quote/pattern"; 20866 warning( 20867"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n" 20868 ); 20869 } 20870 20871 unless ( $tokenizer_self->{_saw_perl_dash_w} ) { 20872 if ( $] < 5.006 ) { 20873 write_logfile_entry("Suggest including '-w parameter'\n"); 20874 } 20875 else { 20876 write_logfile_entry("Suggest including 'use warnings;'\n"); 20877 } 20878 } 20879 20880 if ( $tokenizer_self->{_saw_perl_dash_P} ) { 20881 write_logfile_entry("Use of -P parameter for defines is discouraged\n"); 20882 } 20883 20884 unless ( $tokenizer_self->{_saw_use_strict} ) { 20885 write_logfile_entry("Suggest including 'use strict;'\n"); 20886 } 20887 20888 # it is suggested that lables have at least one upper case character 20889 # for legibility and to avoid code breakage as new keywords are introduced 20890 if ( $tokenizer_self->{_rlower_case_labels_at} ) { 20891 my @lower_case_labels_at = 20892 @{ $tokenizer_self->{_rlower_case_labels_at} }; 20893 write_logfile_entry( 20894 "Suggest using upper case characters in label(s)\n"); 20895 local $" = ')('; 20896 write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n"); 20897 } 20898} 20899 20900sub report_v_string { 20901 20902 # warn if this version can't handle v-strings 20903 my $tok = shift; 20904 unless ( $tokenizer_self->{_saw_v_string} ) { 20905 $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number}; 20906 } 20907 if ( $] < 5.006 ) { 20908 warning( 20909"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" 20910 ); 20911 } 20912} 20913 20914sub get_input_line_number { 20915 return $tokenizer_self->{_last_line_number}; 20916} 20917 20918# returns the next tokenized line 20919sub get_line { 20920 20921 my $self = shift; 20922 20923 # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth, 20924 # $square_bracket_depth, $paren_depth 20925 20926 my $input_line = $tokenizer_self->{_line_buffer_object}->get_line(); 20927 $tokenizer_self->{_line_text} = $input_line; 20928 20929 return undef unless ($input_line); 20930 20931 my $input_line_number = ++$tokenizer_self->{_last_line_number}; 20932 20933 # Find and remove what characters terminate this line, including any 20934 # control r 20935 my $input_line_separator = ""; 20936 if ( chomp($input_line) ) { $input_line_separator = $/ } 20937 20938 # TODO: what other characters should be included here? 20939 if ( $input_line =~ s/((\r|\035|\032)+)$// ) { 20940 $input_line_separator = $2 . $input_line_separator; 20941 } 20942 20943 # for backwards compatability we keep the line text terminated with 20944 # a newline character 20945 $input_line .= "\n"; 20946 $tokenizer_self->{_line_text} = $input_line; # update 20947 20948 # create a data structure describing this line which will be 20949 # returned to the caller. 20950 20951 # _line_type codes are: 20952 # SYSTEM - system-specific code before hash-bang line 20953 # CODE - line of perl code (including comments) 20954 # POD_START - line starting pod, such as '=head' 20955 # POD - pod documentation text 20956 # POD_END - last line of pod section, '=cut' 20957 # HERE - text of here-document 20958 # HERE_END - last line of here-doc (target word) 20959 # FORMAT - format section 20960 # FORMAT_END - last line of format section, '.' 20961 # DATA_START - __DATA__ line 20962 # DATA - unidentified text following __DATA__ 20963 # END_START - __END__ line 20964 # END - unidentified text following __END__ 20965 # ERROR - we are in big trouble, probably not a perl script 20966 20967 # Other variables: 20968 # _curly_brace_depth - depth of curly braces at start of line 20969 # _square_bracket_depth - depth of square brackets at start of line 20970 # _paren_depth - depth of parens at start of line 20971 # _starting_in_quote - this line continues a multi-line quote 20972 # (so don't trim leading blanks!) 20973 # _ending_in_quote - this line ends in a multi-line quote 20974 # (so don't trim trailing blanks!) 20975 my $line_of_tokens = { 20976 _line_type => 'EOF', 20977 _line_text => $input_line, 20978 _line_number => $input_line_number, 20979 _rtoken_type => undef, 20980 _rtokens => undef, 20981 _rlevels => undef, 20982 _rslevels => undef, 20983 _rblock_type => undef, 20984 _rcontainer_type => undef, 20985 _rcontainer_environment => undef, 20986 _rtype_sequence => undef, 20987 _rnesting_tokens => undef, 20988 _rci_levels => undef, 20989 _rnesting_blocks => undef, 20990 _python_indentation_level => -1, ## 0, 20991 _starting_in_quote => 0, # to be set by subroutine 20992 _ending_in_quote => 0, 20993 _curly_brace_depth => $brace_depth, 20994 _square_bracket_depth => $square_bracket_depth, 20995 _paren_depth => $paren_depth, 20996 _quote_character => '', 20997 }; 20998 20999 # must print line unchanged if we are in a here document 21000 if ( $tokenizer_self->{_in_here_doc} ) { 21001 21002 $line_of_tokens->{_line_type} = 'HERE'; 21003 my $here_doc_target = $tokenizer_self->{_here_doc_target}; 21004 my $here_quote_character = $tokenizer_self->{_here_quote_character}; 21005 my $candidate_target = $input_line; 21006 chomp $candidate_target; 21007 if ( $candidate_target eq $here_doc_target ) { 21008 $tokenizer_self->{_nearly_matched_here_target_at} = undef; 21009 $line_of_tokens->{_line_type} = 'HERE_END'; 21010 write_logfile_entry("Exiting HERE document $here_doc_target\n"); 21011 21012 my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; 21013 if (@$rhere_target_list) { # there can be multiple here targets 21014 ( $here_doc_target, $here_quote_character ) = 21015 @{ shift @$rhere_target_list }; 21016 $tokenizer_self->{_here_doc_target} = $here_doc_target; 21017 $tokenizer_self->{_here_quote_character} = 21018 $here_quote_character; 21019 write_logfile_entry( 21020 "Entering HERE document $here_doc_target\n"); 21021 $tokenizer_self->{_nearly_matched_here_target_at} = undef; 21022 $tokenizer_self->{_started_looking_for_here_target_at} = 21023 $input_line_number; 21024 } 21025 else { 21026 $tokenizer_self->{_in_here_doc} = 0; 21027 $tokenizer_self->{_here_doc_target} = ""; 21028 $tokenizer_self->{_here_quote_character} = ""; 21029 } 21030 } 21031 21032 # check for error of extra whitespace 21033 # note for PERL6: leading whitespace is allowed 21034 else { 21035 $candidate_target =~ s/\s*$//; 21036 $candidate_target =~ s/^\s*//; 21037 if ( $candidate_target eq $here_doc_target ) { 21038 $tokenizer_self->{_nearly_matched_here_target_at} = 21039 $input_line_number; 21040 } 21041 } 21042 return $line_of_tokens; 21043 } 21044 21045 # must print line unchanged if we are in a format section 21046 elsif ( $tokenizer_self->{_in_format} ) { 21047 21048 if ( $input_line =~ /^\.[\s#]*$/ ) { 21049 write_logfile_entry("Exiting format section\n"); 21050 $tokenizer_self->{_in_format} = 0; 21051 $line_of_tokens->{_line_type} = 'FORMAT_END'; 21052 } 21053 else { 21054 $line_of_tokens->{_line_type} = 'FORMAT'; 21055 } 21056 return $line_of_tokens; 21057 } 21058 21059 # must print line unchanged if we are in pod documentation 21060 elsif ( $tokenizer_self->{_in_pod} ) { 21061 21062 $line_of_tokens->{_line_type} = 'POD'; 21063 if ( $input_line =~ /^=cut/ ) { 21064 $line_of_tokens->{_line_type} = 'POD_END'; 21065 write_logfile_entry("Exiting POD section\n"); 21066 $tokenizer_self->{_in_pod} = 0; 21067 } 21068 if ( $input_line =~ /^\#\!.*perl\b/ ) { 21069 warning( 21070 "Hash-bang in pod can cause older versions of perl to fail! \n" 21071 ); 21072 } 21073 21074 return $line_of_tokens; 21075 } 21076 21077 # must print line unchanged if we have seen a severe error (i.e., we 21078 # are seeing illegal tokens and connot continue. Syntax errors do 21079 # not pass this route). Calling routine can decide what to do, but 21080 # the default can be to just pass all lines as if they were after __END__ 21081 elsif ( $tokenizer_self->{_in_error} ) { 21082 $line_of_tokens->{_line_type} = 'ERROR'; 21083 return $line_of_tokens; 21084 } 21085 21086 # print line unchanged if we are __DATA__ section 21087 elsif ( $tokenizer_self->{_in_data} ) { 21088 21089 # ...but look for POD 21090 # Note that the _in_data and _in_end flags remain set 21091 # so that we return to that state after seeing the 21092 # end of a pod section 21093 if ( $input_line =~ /^=(?!cut)/ ) { 21094 $line_of_tokens->{_line_type} = 'POD_START'; 21095 write_logfile_entry("Entering POD section\n"); 21096 $tokenizer_self->{_in_pod} = 1; 21097 return $line_of_tokens; 21098 } 21099 else { 21100 $line_of_tokens->{_line_type} = 'DATA'; 21101 return $line_of_tokens; 21102 } 21103 } 21104 21105 # print line unchanged if we are in __END__ section 21106 elsif ( $tokenizer_self->{_in_end} ) { 21107 21108 # ...but look for POD 21109 # Note that the _in_data and _in_end flags remain set 21110 # so that we return to that state after seeing the 21111 # end of a pod section 21112 if ( $input_line =~ /^=(?!cut)/ ) { 21113 $line_of_tokens->{_line_type} = 'POD_START'; 21114 write_logfile_entry("Entering POD section\n"); 21115 $tokenizer_self->{_in_pod} = 1; 21116 return $line_of_tokens; 21117 } 21118 else { 21119 $line_of_tokens->{_line_type} = 'END'; 21120 return $line_of_tokens; 21121 } 21122 } 21123 21124 # check for a hash-bang line if we haven't seen one 21125 if ( !$tokenizer_self->{_saw_hash_bang} ) { 21126 if ( $input_line =~ /^\#\!.*perl\b/ ) { 21127 $tokenizer_self->{_saw_hash_bang} = $input_line_number; 21128 21129 # check for -w and -P flags 21130 if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { 21131 $tokenizer_self->{_saw_perl_dash_P} = 1; 21132 } 21133 21134 if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { 21135 $tokenizer_self->{_saw_perl_dash_w} = 1; 21136 } 21137 21138 if ( ( $input_line_number > 1 ) 21139 && ( !$tokenizer_self->{_look_for_hash_bang} ) ) 21140 { 21141 21142 # this is helpful for VMS systems; we may have accidentally 21143 # tokenized some DCL commands 21144 if ( $tokenizer_self->{_started_tokenizing} ) { 21145 warning( 21146"There seems to be a hash-bang after line 1; do you need to run with -x ?\n" 21147 ); 21148 } 21149 else { 21150 complain("Useless hash-bang after line 1\n"); 21151 } 21152 } 21153 21154 # Report the leading hash-bang as a system line 21155 # This will prevent -dac from deleting it 21156 else { 21157 $line_of_tokens->{_line_type} = 'SYSTEM'; 21158 return $line_of_tokens; 21159 } 21160 } 21161 } 21162 21163 # wait for a hash-bang before parsing if the user invoked us with -x 21164 if ( $tokenizer_self->{_look_for_hash_bang} 21165 && !$tokenizer_self->{_saw_hash_bang} ) 21166 { 21167 $line_of_tokens->{_line_type} = 'SYSTEM'; 21168 return $line_of_tokens; 21169 } 21170 21171 # a first line of the form ': #' will be marked as SYSTEM 21172 # since lines of this form may be used by tcsh 21173 if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { 21174 $line_of_tokens->{_line_type} = 'SYSTEM'; 21175 return $line_of_tokens; 21176 } 21177 21178 # now we know that it is ok to tokenize the line... 21179 # the line tokenizer will modify any of these private variables: 21180 # _rhere_target_list 21181 # _in_data 21182 # _in_end 21183 # _in_format 21184 # _in_error 21185 # _in_pod 21186 # _in_quote 21187 my $ending_in_quote_last = $tokenizer_self->{_in_quote}; 21188 tokenize_this_line($line_of_tokens); 21189 21190 # Now finish defining the return structure and return it 21191 $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote}; 21192 21193 # handle severe error (binary data in script) 21194 if ( $tokenizer_self->{_in_error} ) { 21195 $tokenizer_self->{_in_quote} = 0; # to avoid any more messages 21196 warning("Giving up after error\n"); 21197 $line_of_tokens->{_line_type} = 'ERROR'; 21198 reset_indentation_level(0); # avoid error messages 21199 return $line_of_tokens; 21200 } 21201 21202 # handle start of pod documentation 21203 if ( $tokenizer_self->{_in_pod} ) { 21204 21205 # This gets tricky..above a __DATA__ or __END__ section, perl 21206 # accepts '=cut' as the start of pod section. But afterwards, 21207 # only pod utilities see it and they may ignore an =cut without 21208 # leading =head. In any case, this isn't good. 21209 if ( $input_line =~ /^=cut\b/ ) { 21210 if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { 21211 complain("=cut while not in pod ignored\n"); 21212 $tokenizer_self->{_in_pod} = 0; 21213 $line_of_tokens->{_line_type} = 'POD_END'; 21214 } 21215 else { 21216 $line_of_tokens->{_line_type} = 'POD_START'; 21217 complain( 21218"=cut starts a pod section .. this can fool pod utilities.\n" 21219 ); 21220 write_logfile_entry("Entering POD section\n"); 21221 } 21222 } 21223 21224 else { 21225 $line_of_tokens->{_line_type} = 'POD_START'; 21226 write_logfile_entry("Entering POD section\n"); 21227 } 21228 21229 return $line_of_tokens; 21230 } 21231 21232 # update indentation levels for log messages 21233 if ( $input_line !~ /^\s*$/ ) { 21234 my $rlevels = $line_of_tokens->{_rlevels}; 21235 my $structural_indentation_level = $$rlevels[0]; 21236 my ( $python_indentation_level, $msg ) = 21237 find_indentation_level( $input_line, $structural_indentation_level ); 21238 if ($msg) { write_logfile_entry("$msg") } 21239 if ( $tokenizer_self->{_know_input_tabstr} == 1 ) { 21240 $line_of_tokens->{_python_indentation_level} = 21241 $python_indentation_level; 21242 } 21243 } 21244 21245 # see if this line contains here doc targets 21246 my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; 21247 if (@$rhere_target_list) { 21248 21249 my ( $here_doc_target, $here_quote_character ) = 21250 @{ shift @$rhere_target_list }; 21251 $tokenizer_self->{_in_here_doc} = 1; 21252 $tokenizer_self->{_here_doc_target} = $here_doc_target; 21253 $tokenizer_self->{_here_quote_character} = $here_quote_character; 21254 write_logfile_entry("Entering HERE document $here_doc_target\n"); 21255 $tokenizer_self->{_started_looking_for_here_target_at} = 21256 $input_line_number; 21257 } 21258 21259 # NOTE: __END__ and __DATA__ statements are written unformatted 21260 # because they can theoretically contain additional characters 21261 # which are not tokenized (and cannot be read with <DATA> either!). 21262 if ( $tokenizer_self->{_in_data} ) { 21263 $line_of_tokens->{_line_type} = 'DATA_START'; 21264 write_logfile_entry("Starting __DATA__ section\n"); 21265 $tokenizer_self->{_saw_data} = 1; 21266 21267 # keep parsing after __DATA__ if use SelfLoader was seen 21268 if ( $tokenizer_self->{_saw_selfloader} ) { 21269 $tokenizer_self->{_in_data} = 0; 21270 write_logfile_entry( 21271 "SelfLoader seen, continuing; -nlsl deactivates\n"); 21272 } 21273 21274 return $line_of_tokens; 21275 } 21276 21277 elsif ( $tokenizer_self->{_in_end} ) { 21278 $line_of_tokens->{_line_type} = 'END_START'; 21279 write_logfile_entry("Starting __END__ section\n"); 21280 $tokenizer_self->{_saw_end} = 1; 21281 21282 # keep parsing after __END__ if use AutoLoader was seen 21283 if ( $tokenizer_self->{_saw_autoloader} ) { 21284 $tokenizer_self->{_in_end} = 0; 21285 write_logfile_entry( 21286 "AutoLoader seen, continuing; -nlal deactivates\n"); 21287 } 21288 return $line_of_tokens; 21289 } 21290 21291 # now, finally, we know that this line is type 'CODE' 21292 $line_of_tokens->{_line_type} = 'CODE'; 21293 21294 # remember if we have seen any real code 21295 if ( !$tokenizer_self->{_started_tokenizing} 21296 && $input_line !~ /^\s*$/ 21297 && $input_line !~ /^\s*#/ ) 21298 { 21299 $tokenizer_self->{_started_tokenizing} = 1; 21300 } 21301 21302 if ( $tokenizer_self->{_debugger_object} ) { 21303 $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens); 21304 } 21305 21306 # Note: if keyword 'format' occurs in this line code, it is still CODE 21307 # (keyword 'format' need not start a line) 21308 if ( $tokenizer_self->{_in_format} ) { 21309 write_logfile_entry("Entering format section\n"); 21310 } 21311 21312 if ( $tokenizer_self->{_in_quote} 21313 and ( $tokenizer_self->{_line_start_quote} < 0 ) ) 21314 { 21315 21316 #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { 21317 if ( 21318 ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ ) 21319 { 21320 $tokenizer_self->{_line_start_quote} = $input_line_number; 21321 write_logfile_entry( 21322 "Start multi-line quote or pattern ending in $quote_target\n"); 21323 } 21324 } 21325 elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 ) 21326 and !$tokenizer_self->{_in_quote} ) 21327 { 21328 $tokenizer_self->{_line_start_quote} = -1; 21329 write_logfile_entry("End of multi-line quote or pattern\n"); 21330 } 21331 21332 # we are returning a line of CODE 21333 return $line_of_tokens; 21334} 21335 21336sub find_starting_indentation_level { 21337 21338 # USES GLOBAL VARIABLES: $tokenizer_self 21339 my $starting_level = 0; 21340 my $know_input_tabstr = -1; # flag for find_indentation_level 21341 21342 # use value if given as parameter 21343 if ( $tokenizer_self->{_know_starting_level} ) { 21344 $starting_level = $tokenizer_self->{_starting_level}; 21345 } 21346 21347 # if we know there is a hash_bang line, the level must be zero 21348 elsif ( $tokenizer_self->{_look_for_hash_bang} ) { 21349 $tokenizer_self->{_know_starting_level} = 1; 21350 } 21351 21352 # otherwise figure it out from the input file 21353 else { 21354 my $line; 21355 my $i = 0; 21356 my $structural_indentation_level = -1; # flag for find_indentation_level 21357 21358 my $msg = ""; 21359 while ( $line = 21360 $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) 21361 { 21362 21363 # if first line is #! then assume starting level is zero 21364 if ( $i == 1 && $line =~ /^\#\!/ ) { 21365 $starting_level = 0; 21366 last; 21367 } 21368 next if ( $line =~ /^\s*#/ ); # must not be comment 21369 next if ( $line =~ /^\s*$/ ); # must not be blank 21370 ( $starting_level, $msg ) = 21371 find_indentation_level( $line, $structural_indentation_level ); 21372 if ($msg) { write_logfile_entry("$msg") } 21373 last; 21374 } 21375 $msg = "Line $i implies starting-indentation-level = $starting_level\n"; 21376 21377 if ( $starting_level > 0 ) { 21378 21379 my $input_tabstr = $tokenizer_self->{_input_tabstr}; 21380 if ( $input_tabstr eq "\t" ) { 21381 $msg .= "by guessing input tabbing uses 1 tab per level\n"; 21382 } 21383 else { 21384 my $cols = length($input_tabstr); 21385 $msg .= 21386 "by guessing input tabbing uses $cols blanks per level\n"; 21387 } 21388 } 21389 write_logfile_entry("$msg"); 21390 } 21391 $tokenizer_self->{_starting_level} = $starting_level; 21392 reset_indentation_level($starting_level); 21393} 21394 21395# Find indentation level given a input line. At the same time, try to 21396# figure out the input tabbing scheme. 21397# 21398# There are two types of calls: 21399# 21400# Type 1: $structural_indentation_level < 0 21401# In this case we have to guess $input_tabstr to figure out the level. 21402# 21403# Type 2: $structural_indentation_level >= 0 21404# In this case the level of this line is known, and this routine can 21405# update the tabbing string, if still unknown, to make the level correct. 21406 21407sub find_indentation_level { 21408 my ( $line, $structural_indentation_level ) = @_; 21409 21410 # USES GLOBAL VARIABLES: $tokenizer_self 21411 my $level = 0; 21412 my $msg = ""; 21413 21414 my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr}; 21415 my $input_tabstr = $tokenizer_self->{_input_tabstr}; 21416 21417 # find leading whitespace 21418 my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : ""; 21419 21420 # make first guess at input tabbing scheme if necessary 21421 if ( $know_input_tabstr < 0 ) { 21422 21423 $know_input_tabstr = 0; 21424 21425 if ( $tokenizer_self->{_tabs} ) { 21426 $input_tabstr = "\t"; 21427 if ( length($leading_whitespace) > 0 ) { 21428 if ( $leading_whitespace !~ /\t/ ) { 21429 21430 my $cols = $tokenizer_self->{_indent_columns}; 21431 21432 if ( length($leading_whitespace) < $cols ) { 21433 $cols = length($leading_whitespace); 21434 } 21435 $input_tabstr = " " x $cols; 21436 } 21437 } 21438 } 21439 else { 21440 $input_tabstr = " " x $tokenizer_self->{_indent_columns}; 21441 21442 if ( length($leading_whitespace) > 0 ) { 21443 if ( $leading_whitespace =~ /^\t/ ) { 21444 $input_tabstr = "\t"; 21445 } 21446 } 21447 } 21448 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr; 21449 $tokenizer_self->{_input_tabstr} = $input_tabstr; 21450 } 21451 21452 # determine the input tabbing scheme if possible 21453 if ( ( $know_input_tabstr == 0 ) 21454 && ( length($leading_whitespace) > 0 ) 21455 && ( $structural_indentation_level > 0 ) ) 21456 { 21457 my $saved_input_tabstr = $input_tabstr; 21458 21459 # check for common case of one tab per indentation level 21460 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) { 21461 if ( $leading_whitespace eq "\t" x $structural_indentation_level ) { 21462 $input_tabstr = "\t"; 21463 $msg = "Guessing old indentation was tab character\n"; 21464 } 21465 } 21466 21467 else { 21468 21469 # detab any tabs based on 8 blanks per tab 21470 my $entabbed = ""; 21471 if ( $leading_whitespace =~ s/^\t+/ /g ) { 21472 $entabbed = "entabbed"; 21473 } 21474 21475 # now compute tabbing from number of spaces 21476 my $columns = 21477 length($leading_whitespace) / $structural_indentation_level; 21478 if ( $columns == int $columns ) { 21479 $msg = 21480 "Guessing old indentation was $columns $entabbed spaces\n"; 21481 } 21482 else { 21483 $columns = int $columns; 21484 $msg = 21485"old indentation is unclear, using $columns $entabbed spaces\n"; 21486 } 21487 $input_tabstr = " " x $columns; 21488 } 21489 $know_input_tabstr = 1; 21490 $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr; 21491 $tokenizer_self->{_input_tabstr} = $input_tabstr; 21492 21493 # see if mistakes were made 21494 if ( ( $tokenizer_self->{_starting_level} > 0 ) 21495 && !$tokenizer_self->{_know_starting_level} ) 21496 { 21497 21498 if ( $input_tabstr ne $saved_input_tabstr ) { 21499 complain( 21500"I made a bad starting level guess; rerun with a value for -sil \n" 21501 ); 21502 } 21503 } 21504 } 21505 21506 # use current guess at input tabbing to get input indentation level 21507 # 21508 # Patch to handle a common case of entabbed leading whitespace 21509 # If the leading whitespace equals 4 spaces and we also have 21510 # tabs, detab the input whitespace assuming 8 spaces per tab. 21511 if ( length($input_tabstr) == 4 ) { 21512 $leading_whitespace =~ s/^\t+/ /g; 21513 } 21514 21515 if ( ( my $len_tab = length($input_tabstr) ) > 0 ) { 21516 my $pos = 0; 21517 21518 while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr ) 21519 { 21520 $pos += $len_tab; 21521 $level++; 21522 } 21523 } 21524 return ( $level, $msg ); 21525} 21526 21527# This is a currently unused debug routine 21528sub dump_functions { 21529 21530 my $fh = *STDOUT; 21531 my ( $pkg, $sub ); 21532 foreach $pkg ( keys %is_user_function ) { 21533 print $fh "\nnon-constant subs in package $pkg\n"; 21534 21535 foreach $sub ( keys %{ $is_user_function{$pkg} } ) { 21536 my $msg = ""; 21537 if ( $is_block_list_function{$pkg}{$sub} ) { 21538 $msg = 'block_list'; 21539 } 21540 21541 if ( $is_block_function{$pkg}{$sub} ) { 21542 $msg = 'block'; 21543 } 21544 print $fh "$sub $msg\n"; 21545 } 21546 } 21547 21548 foreach $pkg ( keys %is_constant ) { 21549 print $fh "\nconstants and constant subs in package $pkg\n"; 21550 21551 foreach $sub ( keys %{ $is_constant{$pkg} } ) { 21552 print $fh "$sub\n"; 21553 } 21554 } 21555} 21556 21557sub ones_count { 21558 21559 # count number of 1's in a string of 1's and 0's 21560 # example: ones_count("010101010101") gives 6 21561 return ( my $cis = $_[0] ) =~ tr/1/0/; 21562} 21563 21564sub prepare_for_a_new_file { 21565 21566 # previous tokens needed to determine what to expect next 21567 $last_nonblank_token = ';'; # the only possible starting state which 21568 $last_nonblank_type = ';'; # will make a leading brace a code block 21569 $last_nonblank_block_type = ''; 21570 21571 # scalars for remembering statement types across multiple lines 21572 $statement_type = ''; # '' or 'use' or 'sub..' or 'case..' 21573 $in_attribute_list = 0; 21574 21575 # scalars for remembering where we are in the file 21576 $current_package = "main"; 21577 $context = UNKNOWN_CONTEXT; 21578 21579 # hashes used to remember function information 21580 %is_constant = (); # user-defined constants 21581 %is_user_function = (); # user-defined functions 21582 %user_function_prototype = (); # their prototypes 21583 %is_block_function = (); 21584 %is_block_list_function = (); 21585 %saw_function_definition = (); 21586 21587 # variables used to track depths of various containers 21588 # and report nesting errors 21589 $paren_depth = 0; 21590 $brace_depth = 0; 21591 $square_bracket_depth = 0; 21592 @current_depth[ 0 .. $#closing_brace_names ] = 21593 (0) x scalar @closing_brace_names; 21594 $total_depth = 0; 21595 @total_depth = (); 21596 @nesting_sequence_number[ 0 .. $#closing_brace_names ] = 21597 ( 0 .. $#closing_brace_names ); 21598 @current_sequence_number = (); 21599 $paren_type[$paren_depth] = ''; 21600 $paren_semicolon_count[$paren_depth] = 0; 21601 $paren_structural_type[$brace_depth] = ''; 21602 $brace_type[$brace_depth] = ';'; # identify opening brace as code block 21603 $brace_structural_type[$brace_depth] = ''; 21604 $brace_statement_type[$brace_depth] = ""; 21605 $brace_context[$brace_depth] = UNKNOWN_CONTEXT; 21606 $brace_package[$paren_depth] = $current_package; 21607 $square_bracket_type[$square_bracket_depth] = ''; 21608 $square_bracket_structural_type[$square_bracket_depth] = ''; 21609 21610 initialize_tokenizer_state(); 21611} 21612 21613{ # begin tokenize_this_line 21614 21615 use constant BRACE => 0; 21616 use constant SQUARE_BRACKET => 1; 21617 use constant PAREN => 2; 21618 use constant QUESTION_COLON => 3; 21619 21620 # TV1: scalars for processing one LINE. 21621 # Re-initialized on each entry to sub tokenize_this_line. 21622 my ( 21623 $block_type, $container_type, $expecting, 21624 $i, $i_tok, $input_line, 21625 $input_line_number, $last_nonblank_i, $max_token_index, 21626 $next_tok, $next_type, $peeked_ahead, 21627 $prototype, $rhere_target_list, $rtoken_map, 21628 $rtoken_type, $rtokens, $tok, 21629 $type, $type_sequence, $indent_flag, 21630 ); 21631 21632 # TV2: refs to ARRAYS for processing one LINE 21633 # Re-initialized on each call. 21634 my $routput_token_list = []; # stack of output token indexes 21635 my $routput_token_type = []; # token types 21636 my $routput_block_type = []; # types of code block 21637 my $routput_container_type = []; # paren types, such as if, elsif, .. 21638 my $routput_type_sequence = []; # nesting sequential number 21639 my $routput_indent_flag = []; # 21640 21641 # TV3: SCALARS for quote variables. These are initialized with a 21642 # subroutine call and continually updated as lines are processed. 21643 my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, 21644 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, ); 21645 21646 # TV4: SCALARS for multi-line identifiers and 21647 # statements. These are initialized with a subroutine call 21648 # and continually updated as lines are processed. 21649 my ( $id_scan_state, $identifier, $want_paren, $indented_if_level ); 21650 21651 # TV5: SCALARS for tracking indentation level. 21652 # Initialized once and continually updated as lines are 21653 # processed. 21654 my ( 21655 $nesting_token_string, $nesting_type_string, 21656 $nesting_block_string, $nesting_block_flag, 21657 $nesting_list_string, $nesting_list_flag, 21658 $ci_string_in_tokenizer, $continuation_string_in_tokenizer, 21659 $in_statement_continuation, $level_in_tokenizer, 21660 $slevel_in_tokenizer, $rslevel_stack, 21661 ); 21662 21663 # TV6: SCALARS for remembering several previous 21664 # tokens. Initialized once and continually updated as 21665 # lines are processed. 21666 my ( 21667 $last_nonblank_container_type, $last_nonblank_type_sequence, 21668 $last_last_nonblank_token, $last_last_nonblank_type, 21669 $last_last_nonblank_block_type, $last_last_nonblank_container_type, 21670 $last_last_nonblank_type_sequence, $last_nonblank_prototype, 21671 ); 21672 21673 # ---------------------------------------------------------------- 21674 # beginning of tokenizer variable access and manipulation routines 21675 # ---------------------------------------------------------------- 21676 21677 sub initialize_tokenizer_state { 21678 21679 # TV1: initialized on each call 21680 # TV2: initialized on each call 21681 # TV3: 21682 $in_quote = 0; 21683 $quote_type = 'Q'; 21684 $quote_character = ""; 21685 $quote_pos = 0; 21686 $quote_depth = 0; 21687 $quoted_string_1 = ""; 21688 $quoted_string_2 = ""; 21689 $allowed_quote_modifiers = ""; 21690 21691 # TV4: 21692 $id_scan_state = ''; 21693 $identifier = ''; 21694 $want_paren = ""; 21695 $indented_if_level = 0; 21696 21697 # TV5: 21698 $nesting_token_string = ""; 21699 $nesting_type_string = ""; 21700 $nesting_block_string = '1'; # initially in a block 21701 $nesting_block_flag = 1; 21702 $nesting_list_string = '0'; # initially not in a list 21703 $nesting_list_flag = 0; # initially not in a list 21704 $ci_string_in_tokenizer = ""; 21705 $continuation_string_in_tokenizer = "0"; 21706 $in_statement_continuation = 0; 21707 $level_in_tokenizer = 0; 21708 $slevel_in_tokenizer = 0; 21709 $rslevel_stack = []; 21710 21711 # TV6: 21712 $last_nonblank_container_type = ''; 21713 $last_nonblank_type_sequence = ''; 21714 $last_last_nonblank_token = ';'; 21715 $last_last_nonblank_type = ';'; 21716 $last_last_nonblank_block_type = ''; 21717 $last_last_nonblank_container_type = ''; 21718 $last_last_nonblank_type_sequence = ''; 21719 $last_nonblank_prototype = ""; 21720 } 21721 21722 sub save_tokenizer_state { 21723 21724 my $rTV1 = [ 21725 $block_type, $container_type, $expecting, 21726 $i, $i_tok, $input_line, 21727 $input_line_number, $last_nonblank_i, $max_token_index, 21728 $next_tok, $next_type, $peeked_ahead, 21729 $prototype, $rhere_target_list, $rtoken_map, 21730 $rtoken_type, $rtokens, $tok, 21731 $type, $type_sequence, $indent_flag, 21732 ]; 21733 21734 my $rTV2 = [ 21735 $routput_token_list, $routput_token_type, 21736 $routput_block_type, $routput_container_type, 21737 $routput_type_sequence, $routput_indent_flag, 21738 ]; 21739 21740 my $rTV3 = [ 21741 $in_quote, $quote_type, 21742 $quote_character, $quote_pos, 21743 $quote_depth, $quoted_string_1, 21744 $quoted_string_2, $allowed_quote_modifiers, 21745 ]; 21746 21747 my $rTV4 = 21748 [ $id_scan_state, $identifier, $want_paren, $indented_if_level ]; 21749 21750 my $rTV5 = [ 21751 $nesting_token_string, $nesting_type_string, 21752 $nesting_block_string, $nesting_block_flag, 21753 $nesting_list_string, $nesting_list_flag, 21754 $ci_string_in_tokenizer, $continuation_string_in_tokenizer, 21755 $in_statement_continuation, $level_in_tokenizer, 21756 $slevel_in_tokenizer, $rslevel_stack, 21757 ]; 21758 21759 my $rTV6 = [ 21760 $last_nonblank_container_type, 21761 $last_nonblank_type_sequence, 21762 $last_last_nonblank_token, 21763 $last_last_nonblank_type, 21764 $last_last_nonblank_block_type, 21765 $last_last_nonblank_container_type, 21766 $last_last_nonblank_type_sequence, 21767 $last_nonblank_prototype, 21768 ]; 21769 return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ]; 21770 } 21771 21772 sub restore_tokenizer_state { 21773 my ($rstate) = @_; 21774 my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate}; 21775 ( 21776 $block_type, $container_type, $expecting, 21777 $i, $i_tok, $input_line, 21778 $input_line_number, $last_nonblank_i, $max_token_index, 21779 $next_tok, $next_type, $peeked_ahead, 21780 $prototype, $rhere_target_list, $rtoken_map, 21781 $rtoken_type, $rtokens, $tok, 21782 $type, $type_sequence, $indent_flag, 21783 ) = @{$rTV1}; 21784 21785 ( 21786 $routput_token_list, $routput_token_type, 21787 $routput_block_type, $routput_container_type, 21788 $routput_type_sequence, $routput_type_sequence, 21789 ) = @{$rTV2}; 21790 21791 ( 21792 $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, 21793 $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, 21794 ) = @{$rTV3}; 21795 21796 ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) = 21797 @{$rTV4}; 21798 21799 ( 21800 $nesting_token_string, $nesting_type_string, 21801 $nesting_block_string, $nesting_block_flag, 21802 $nesting_list_string, $nesting_list_flag, 21803 $ci_string_in_tokenizer, $continuation_string_in_tokenizer, 21804 $in_statement_continuation, $level_in_tokenizer, 21805 $slevel_in_tokenizer, $rslevel_stack, 21806 ) = @{$rTV5}; 21807 21808 ( 21809 $last_nonblank_container_type, 21810 $last_nonblank_type_sequence, 21811 $last_last_nonblank_token, 21812 $last_last_nonblank_type, 21813 $last_last_nonblank_block_type, 21814 $last_last_nonblank_container_type, 21815 $last_last_nonblank_type_sequence, 21816 $last_nonblank_prototype, 21817 ) = @{$rTV6}; 21818 } 21819 21820 sub get_indentation_level { 21821 21822 # patch to avoid reporting error if indented if is not terminated 21823 if ($indented_if_level) { return $level_in_tokenizer - 1 } 21824 return $level_in_tokenizer; 21825 } 21826 21827 sub reset_indentation_level { 21828 $level_in_tokenizer = $_[0]; 21829 $slevel_in_tokenizer = $_[0]; 21830 push @{$rslevel_stack}, $slevel_in_tokenizer; 21831 } 21832 21833 sub peeked_ahead { 21834 $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead; 21835 } 21836 21837 # ------------------------------------------------------------ 21838 # end of tokenizer variable access and manipulation routines 21839 # ------------------------------------------------------------ 21840 21841 # ------------------------------------------------------------ 21842 # beginning of various scanner interface routines 21843 # ------------------------------------------------------------ 21844 sub scan_replacement_text { 21845 21846 # check for here-docs in replacement text invoked by 21847 # a substitution operator with executable modifier 'e'. 21848 # 21849 # given: 21850 # $replacement_text 21851 # return: 21852 # $rht = reference to any here-doc targets 21853 my ($replacement_text) = @_; 21854 21855 # quick check 21856 return undef unless ( $replacement_text =~ /<</ ); 21857 21858 write_logfile_entry("scanning replacement text for here-doc targets\n"); 21859 21860 # save the logger object for error messages 21861 my $logger_object = $tokenizer_self->{_logger_object}; 21862 21863 # localize all package variables 21864 local ( 21865 $tokenizer_self, $last_nonblank_token, 21866 $last_nonblank_type, $last_nonblank_block_type, 21867 $statement_type, $in_attribute_list, 21868 $current_package, $context, 21869 %is_constant, %is_user_function, 21870 %user_function_prototype, %is_block_function, 21871 %is_block_list_function, %saw_function_definition, 21872 $brace_depth, $paren_depth, 21873 $square_bracket_depth, @current_depth, 21874 @total_depth, $total_depth, 21875 @nesting_sequence_number, @current_sequence_number, 21876 @paren_type, @paren_semicolon_count, 21877 @paren_structural_type, @brace_type, 21878 @brace_structural_type, @brace_statement_type, 21879 @brace_context, @brace_package, 21880 @square_bracket_type, @square_bracket_structural_type, 21881 @depth_array, @starting_line_of_current_depth, 21882 @nested_ternary_flag, 21883 ); 21884 21885 # save all lexical variables 21886 my $rstate = save_tokenizer_state(); 21887 _decrement_count(); # avoid error check for multiple tokenizers 21888 21889 # make a new tokenizer 21890 my $rOpts = {}; 21891 my $rpending_logfile_message; 21892 my $source_object = 21893 Perl::Tidy::LineSource->new( \$replacement_text, $rOpts, 21894 $rpending_logfile_message ); 21895 my $tokenizer = Perl::Tidy::Tokenizer->new( 21896 source_object => $source_object, 21897 logger_object => $logger_object, 21898 starting_line_number => $input_line_number, 21899 ); 21900 21901 # scan the replacement text 21902 1 while ( $tokenizer->get_line() ); 21903 21904 # remove any here doc targets 21905 my $rht = undef; 21906 if ( $tokenizer_self->{_in_here_doc} ) { 21907 $rht = []; 21908 push @{$rht}, 21909 [ 21910 $tokenizer_self->{_here_doc_target}, 21911 $tokenizer_self->{_here_quote_character} 21912 ]; 21913 if ( $tokenizer_self->{_rhere_target_list} ) { 21914 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} }; 21915 $tokenizer_self->{_rhere_target_list} = undef; 21916 } 21917 $tokenizer_self->{_in_here_doc} = undef; 21918 } 21919 21920 # now its safe to report errors 21921 $tokenizer->report_tokenization_errors(); 21922 21923 # restore all tokenizer lexical variables 21924 restore_tokenizer_state($rstate); 21925 21926 # return the here doc targets 21927 return $rht; 21928 } 21929 21930 sub scan_bare_identifier { 21931 ( $i, $tok, $type, $prototype ) = 21932 scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype, 21933 $rtoken_map, $max_token_index ); 21934 } 21935 21936 sub scan_identifier { 21937 ( $i, $tok, $type, $id_scan_state, $identifier ) = 21938 scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens, 21939 $max_token_index, $expecting ); 21940 } 21941 21942 sub scan_id { 21943 ( $i, $tok, $type, $id_scan_state ) = 21944 scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, 21945 $id_scan_state, $max_token_index ); 21946 } 21947 21948 sub scan_number { 21949 my $number; 21950 ( $i, $type, $number ) = 21951 scan_number_do( $input_line, $i, $rtoken_map, $type, 21952 $max_token_index ); 21953 return $number; 21954 } 21955 21956 # a sub to warn if token found where term expected 21957 sub error_if_expecting_TERM { 21958 if ( $expecting == TERM ) { 21959 if ( $really_want_term{$last_nonblank_type} ) { 21960 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map, 21961 $rtoken_type, $input_line ); 21962 1; 21963 } 21964 } 21965 } 21966 21967 # a sub to warn if token found where operator expected 21968 sub error_if_expecting_OPERATOR { 21969 if ( $expecting == OPERATOR ) { 21970 my $thing = defined $_[0] ? $_[0] : $tok; 21971 unexpected( $thing, "operator", $i_tok, $last_nonblank_i, 21972 $rtoken_map, $rtoken_type, $input_line ); 21973 if ( $i_tok == 0 ) { 21974 interrupt_logfile(); 21975 warning("Missing ';' above?\n"); 21976 resume_logfile(); 21977 } 21978 1; 21979 } 21980 } 21981 21982 # ------------------------------------------------------------ 21983 # end scanner interfaces 21984 # ------------------------------------------------------------ 21985 21986 my %is_for_foreach; 21987 @_ = qw(for foreach); 21988 @is_for_foreach{@_} = (1) x scalar(@_); 21989 21990 my %is_my_our; 21991 @_ = qw(my our); 21992 @is_my_our{@_} = (1) x scalar(@_); 21993 21994 # These keywords may introduce blocks after parenthesized expressions, 21995 # in the form: 21996 # keyword ( .... ) { BLOCK } 21997 # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' 21998 my %is_blocktype_with_paren; 21999 @_ = qw(if elsif unless while until for foreach switch case given when); 22000 @is_blocktype_with_paren{@_} = (1) x scalar(@_); 22001 22002 # ------------------------------------------------------------ 22003 # begin hash of code for handling most token types 22004 # ------------------------------------------------------------ 22005 my $tokenization_code = { 22006 22007 # no special code for these types yet, but syntax checks 22008 # could be added 22009 22010## '!' => undef, 22011## '!=' => undef, 22012## '!~' => undef, 22013## '%=' => undef, 22014## '&&=' => undef, 22015## '&=' => undef, 22016## '+=' => undef, 22017## '-=' => undef, 22018## '..' => undef, 22019## '..' => undef, 22020## '...' => undef, 22021## '.=' => undef, 22022## '<<=' => undef, 22023## '<=' => undef, 22024## '<=>' => undef, 22025## '<>' => undef, 22026## '=' => undef, 22027## '==' => undef, 22028## '=~' => undef, 22029## '>=' => undef, 22030## '>>' => undef, 22031## '>>=' => undef, 22032## '\\' => undef, 22033## '^=' => undef, 22034## '|=' => undef, 22035## '||=' => undef, 22036## '//=' => undef, 22037## '~' => undef, 22038## '~~' => undef, 22039## '!~~' => undef, 22040 22041 '>' => sub { 22042 error_if_expecting_TERM() 22043 if ( $expecting == TERM ); 22044 }, 22045 '|' => sub { 22046 error_if_expecting_TERM() 22047 if ( $expecting == TERM ); 22048 }, 22049 '$' => sub { 22050 22051 # start looking for a scalar 22052 error_if_expecting_OPERATOR("Scalar") 22053 if ( $expecting == OPERATOR ); 22054 scan_identifier(); 22055 22056 if ( $identifier eq '$^W' ) { 22057 $tokenizer_self->{_saw_perl_dash_w} = 1; 22058 } 22059 22060 # Check for indentifier in indirect object slot 22061 # (vorboard.pl, sort.t). Something like: 22062 # /^(print|printf|sort|exec|system)$/ 22063 if ( 22064 $is_indirect_object_taker{$last_nonblank_token} 22065 22066 || ( ( $last_nonblank_token eq '(' ) 22067 && $is_indirect_object_taker{ $paren_type[$paren_depth] } ) 22068 || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object 22069 ) 22070 { 22071 $type = 'Z'; 22072 } 22073 }, 22074 '(' => sub { 22075 22076 ++$paren_depth; 22077 $paren_semicolon_count[$paren_depth] = 0; 22078 if ($want_paren) { 22079 $container_type = $want_paren; 22080 $want_paren = ""; 22081 } 22082 else { 22083 $container_type = $last_nonblank_token; 22084 22085 # We can check for a syntax error here of unexpected '(', 22086 # but this is going to get messy... 22087 if ( 22088 $expecting == OPERATOR 22089 22090 # be sure this is not a method call of the form 22091 # &method(...), $method->(..), &{method}(...), 22092 # $ref[2](list) is ok & short for $ref[2]->(list) 22093 # NOTE: at present, braces in something like &{ xxx } 22094 # are not marked as a block, we might have a method call 22095 && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/ 22096 22097 ) 22098 { 22099 22100 # ref: camel 3 p 703. 22101 if ( $last_last_nonblank_token eq 'do' ) { 22102 complain( 22103"do SUBROUTINE is deprecated; consider & or -> notation\n" 22104 ); 22105 } 22106 else { 22107 22108 # if this is an empty list, (), then it is not an 22109 # error; for example, we might have a constant pi and 22110 # invoke it with pi() or just pi; 22111 my ( $next_nonblank_token, $i_next ) = 22112 find_next_nonblank_token( $i, $rtokens, 22113 $max_token_index ); 22114 if ( $next_nonblank_token ne ')' ) { 22115 my $hint; 22116 error_if_expecting_OPERATOR('('); 22117 22118 if ( $last_nonblank_type eq 'C' ) { 22119 $hint = 22120 "$last_nonblank_token has a void prototype\n"; 22121 } 22122 elsif ( $last_nonblank_type eq 'i' ) { 22123 if ( $i_tok > 0 22124 && $last_nonblank_token =~ /^\$/ ) 22125 { 22126 $hint = 22127"Do you mean '$last_nonblank_token->(' ?\n"; 22128 } 22129 } 22130 if ($hint) { 22131 interrupt_logfile(); 22132 warning($hint); 22133 resume_logfile(); 22134 } 22135 } ## end if ( $next_nonblank_token... 22136 } ## end else [ if ( $last_last_nonblank_token... 22137 } ## end if ( $expecting == OPERATOR... 22138 } 22139 $paren_type[$paren_depth] = $container_type; 22140 ( $type_sequence, $indent_flag ) = 22141 increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); 22142 22143 # propagate types down through nested parens 22144 # for example: the second paren in 'if ((' would be structural 22145 # since the first is. 22146 22147 if ( $last_nonblank_token eq '(' ) { 22148 $type = $last_nonblank_type; 22149 } 22150 22151 # We exclude parens as structural after a ',' because it 22152 # causes subtle problems with continuation indentation for 22153 # something like this, where the first 'or' will not get 22154 # indented. 22155 # 22156 # assert( 22157 # __LINE__, 22158 # ( not defined $check ) 22159 # or ref $check 22160 # or $check eq "new" 22161 # or $check eq "old", 22162 # ); 22163 # 22164 # Likewise, we exclude parens where a statement can start 22165 # because of problems with continuation indentation, like 22166 # these: 22167 # 22168 # ($firstline =~ /^#\!.*perl/) 22169 # and (print $File::Find::name, "\n") 22170 # and (return 1); 22171 # 22172 # (ref($usage_fref) =~ /CODE/) 22173 # ? &$usage_fref 22174 # : (&blast_usage, &blast_params, &blast_general_params); 22175 22176 else { 22177 $type = '{'; 22178 } 22179 22180 if ( $last_nonblank_type eq ')' ) { 22181 warning( 22182 "Syntax error? found token '$last_nonblank_type' then '('\n" 22183 ); 22184 } 22185 $paren_structural_type[$paren_depth] = $type; 22186 22187 }, 22188 ')' => sub { 22189 ( $type_sequence, $indent_flag ) = 22190 decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] ); 22191 22192 if ( $paren_structural_type[$paren_depth] eq '{' ) { 22193 $type = '}'; 22194 } 22195 22196 $container_type = $paren_type[$paren_depth]; 22197 22198 # /^(for|foreach)$/ 22199 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) { 22200 my $num_sc = $paren_semicolon_count[$paren_depth]; 22201 if ( $num_sc > 0 && $num_sc != 2 ) { 22202 warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); 22203 } 22204 } 22205 22206 if ( $paren_depth > 0 ) { $paren_depth-- } 22207 }, 22208 ',' => sub { 22209 if ( $last_nonblank_type eq ',' ) { 22210 complain("Repeated ','s \n"); 22211 } 22212 22213 # patch for operator_expected: note if we are in the list (use.t) 22214 if ( $statement_type eq 'use' ) { $statement_type = '_use' } 22215## FIXME: need to move this elsewhere, perhaps check after a '(' 22216## elsif ($last_nonblank_token eq '(') { 22217## warning("Leading ','s illegal in some versions of perl\n"); 22218## } 22219 }, 22220 ';' => sub { 22221 $context = UNKNOWN_CONTEXT; 22222 $statement_type = ''; 22223 22224 # /^(for|foreach)$/ 22225 if ( $is_for_foreach{ $paren_type[$paren_depth] } ) 22226 { # mark ; in for loop 22227 22228 # Be careful: we do not want a semicolon such as the 22229 # following to be included: 22230 # 22231 # for (sort {strcoll($a,$b);} keys %investments) { 22232 22233 if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth] 22234 && $square_bracket_depth == 22235 $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] ) 22236 { 22237 22238 $type = 'f'; 22239 $paren_semicolon_count[$paren_depth]++; 22240 } 22241 } 22242 22243 }, 22244 '"' => sub { 22245 error_if_expecting_OPERATOR("String") 22246 if ( $expecting == OPERATOR ); 22247 $in_quote = 1; 22248 $type = 'Q'; 22249 $allowed_quote_modifiers = ""; 22250 }, 22251 "'" => sub { 22252 error_if_expecting_OPERATOR("String") 22253 if ( $expecting == OPERATOR ); 22254 $in_quote = 1; 22255 $type = 'Q'; 22256 $allowed_quote_modifiers = ""; 22257 }, 22258 '`' => sub { 22259 error_if_expecting_OPERATOR("String") 22260 if ( $expecting == OPERATOR ); 22261 $in_quote = 1; 22262 $type = 'Q'; 22263 $allowed_quote_modifiers = ""; 22264 }, 22265 '/' => sub { 22266 my $is_pattern; 22267 22268 if ( $expecting == UNKNOWN ) { # indeterminte, must guess.. 22269 my $msg; 22270 ( $is_pattern, $msg ) = 22271 guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, 22272 $max_token_index ); 22273 22274 if ($msg) { 22275 write_diagnostics("DIVIDE:$msg\n"); 22276 write_logfile_entry($msg); 22277 } 22278 } 22279 else { $is_pattern = ( $expecting == TERM ) } 22280 22281 if ($is_pattern) { 22282 $in_quote = 1; 22283 $type = 'Q'; 22284 $allowed_quote_modifiers = '[cgimosxp]'; 22285 } 22286 else { # not a pattern; check for a /= token 22287 22288 if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /= 22289 $i++; 22290 $tok = '/='; 22291 $type = $tok; 22292 } 22293 22294 #DEBUG - collecting info on what tokens follow a divide 22295 # for development of guessing algorithm 22296 #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) { 22297 # #write_diagnostics( "DIVIDE? $input_line\n" ); 22298 #} 22299 } 22300 }, 22301 '{' => sub { 22302 22303 # if we just saw a ')', we will label this block with 22304 # its type. We need to do this to allow sub 22305 # code_block_type to determine if this brace starts a 22306 # code block or anonymous hash. (The type of a paren 22307 # pair is the preceding token, such as 'if', 'else', 22308 # etc). 22309 $container_type = ""; 22310 22311 # ATTRS: for a '{' following an attribute list, reset 22312 # things to look like we just saw the sub name 22313 if ( $statement_type =~ /^sub/ ) { 22314 $last_nonblank_token = $statement_type; 22315 $last_nonblank_type = 'i'; 22316 $statement_type = ""; 22317 } 22318 22319 # patch for SWITCH/CASE: hide these keywords from an immediately 22320 # following opening brace 22321 elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) 22322 && $statement_type eq $last_nonblank_token ) 22323 { 22324 $last_nonblank_token = ";"; 22325 } 22326 22327 elsif ( $last_nonblank_token eq ')' ) { 22328 $last_nonblank_token = $paren_type[ $paren_depth + 1 ]; 22329 22330 # defensive move in case of a nesting error (pbug.t) 22331 # in which this ')' had no previous '(' 22332 # this nesting error will have been caught 22333 if ( !defined($last_nonblank_token) ) { 22334 $last_nonblank_token = 'if'; 22335 } 22336 22337 # check for syntax error here; 22338 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) { 22339 my $list = join( ' ', sort keys %is_blocktype_with_paren ); 22340 warning( 22341 "syntax error at ') {', didn't see one of: $list\n"); 22342 } 22343 } 22344 22345 # patch for paren-less for/foreach glitch, part 2. 22346 # see note below under 'qw' 22347 elsif ($last_nonblank_token eq 'qw' 22348 && $is_for_foreach{$want_paren} ) 22349 { 22350 $last_nonblank_token = $want_paren; 22351 if ( $last_last_nonblank_token eq $want_paren ) { 22352 warning( 22353"syntax error at '$want_paren .. {' -- missing \$ loop variable\n" 22354 ); 22355 22356 } 22357 $want_paren = ""; 22358 } 22359 22360 # now identify which of the three possible types of 22361 # curly braces we have: hash index container, anonymous 22362 # hash reference, or code block. 22363 22364 # non-structural (hash index) curly brace pair 22365 # get marked 'L' and 'R' 22366 if ( is_non_structural_brace() ) { 22367 $type = 'L'; 22368 22369 # patch for SWITCH/CASE: 22370 # allow paren-less identifier after 'when' 22371 # if the brace is preceded by a space 22372 if ( $statement_type eq 'when' 22373 && $last_nonblank_type eq 'i' 22374 && $last_last_nonblank_type eq 'k' 22375 && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) 22376 { 22377 $type = '{'; 22378 $block_type = $statement_type; 22379 } 22380 } 22381 22382 # code and anonymous hash have the same type, '{', but are 22383 # distinguished by 'block_type', 22384 # which will be blank for an anonymous hash 22385 else { 22386 22387 $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type, 22388 $max_token_index ); 22389 22390 # patch to promote bareword type to function taking block 22391 if ( $block_type 22392 && $last_nonblank_type eq 'w' 22393 && $last_nonblank_i >= 0 ) 22394 { 22395 if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) { 22396 $routput_token_type->[$last_nonblank_i] = 'G'; 22397 } 22398 } 22399 22400 # patch for SWITCH/CASE: if we find a stray opening block brace 22401 # where we might accept a 'case' or 'when' block, then take it 22402 if ( $statement_type eq 'case' 22403 || $statement_type eq 'when' ) 22404 { 22405 if ( !$block_type || $block_type eq '}' ) { 22406 $block_type = $statement_type; 22407 } 22408 } 22409 } 22410 $brace_type[ ++$brace_depth ] = $block_type; 22411 $brace_package[$brace_depth] = $current_package; 22412 ( $type_sequence, $indent_flag ) = 22413 increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); 22414 $brace_structural_type[$brace_depth] = $type; 22415 $brace_context[$brace_depth] = $context; 22416 $brace_statement_type[$brace_depth] = $statement_type; 22417 }, 22418 '}' => sub { 22419 $block_type = $brace_type[$brace_depth]; 22420 if ($block_type) { $statement_type = '' } 22421 if ( defined( $brace_package[$brace_depth] ) ) { 22422 $current_package = $brace_package[$brace_depth]; 22423 } 22424 22425 # can happen on brace error (caught elsewhere) 22426 else { 22427 } 22428 ( $type_sequence, $indent_flag ) = 22429 decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] ); 22430 22431 if ( $brace_structural_type[$brace_depth] eq 'L' ) { 22432 $type = 'R'; 22433 } 22434 22435 # propagate type information for 'do' and 'eval' blocks. 22436 # This is necessary to enable us to know if an operator 22437 # or term is expected next 22438 if ( $is_block_operator{ $brace_type[$brace_depth] } ) { 22439 $tok = $brace_type[$brace_depth]; 22440 } 22441 22442 $context = $brace_context[$brace_depth]; 22443 $statement_type = $brace_statement_type[$brace_depth]; 22444 if ( $brace_depth > 0 ) { $brace_depth--; } 22445 }, 22446 '&' => sub { # maybe sub call? start looking 22447 22448 # We have to check for sub call unless we are sure we 22449 # are expecting an operator. This example from s2p 22450 # got mistaken as a q operator in an early version: 22451 # print BODY &q(<<'EOT'); 22452 if ( $expecting != OPERATOR ) { 22453 scan_identifier(); 22454 } 22455 else { 22456 } 22457 }, 22458 '<' => sub { # angle operator or less than? 22459 22460 if ( $expecting != OPERATOR ) { 22461 ( $i, $type ) = 22462 find_angle_operator_termination( $input_line, $i, $rtoken_map, 22463 $expecting, $max_token_index ); 22464 22465 } 22466 else { 22467 } 22468 }, 22469 '?' => sub { # ?: conditional or starting pattern? 22470 22471 my $is_pattern; 22472 22473 if ( $expecting == UNKNOWN ) { 22474 22475 my $msg; 22476 ( $is_pattern, $msg ) = 22477 guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map, 22478 $max_token_index ); 22479 22480 if ($msg) { write_logfile_entry($msg) } 22481 } 22482 else { $is_pattern = ( $expecting == TERM ) } 22483 22484 if ($is_pattern) { 22485 $in_quote = 1; 22486 $type = 'Q'; 22487 $allowed_quote_modifiers = '[cgimosxp]'; 22488 } 22489 else { 22490 ( $type_sequence, $indent_flag ) = 22491 increase_nesting_depth( QUESTION_COLON, 22492 $$rtoken_map[$i_tok] ); 22493 } 22494 }, 22495 '*' => sub { # typeglob, or multiply? 22496 22497 if ( $expecting == TERM ) { 22498 scan_identifier(); 22499 } 22500 else { 22501 22502 if ( $$rtokens[ $i + 1 ] eq '=' ) { 22503 $tok = '*='; 22504 $type = $tok; 22505 $i++; 22506 } 22507 elsif ( $$rtokens[ $i + 1 ] eq '*' ) { 22508 $tok = '**'; 22509 $type = $tok; 22510 $i++; 22511 if ( $$rtokens[ $i + 1 ] eq '=' ) { 22512 $tok = '**='; 22513 $type = $tok; 22514 $i++; 22515 } 22516 } 22517 } 22518 }, 22519 '.' => sub { # what kind of . ? 22520 22521 if ( $expecting != OPERATOR ) { 22522 scan_number(); 22523 if ( $type eq '.' ) { 22524 error_if_expecting_TERM() 22525 if ( $expecting == TERM ); 22526 } 22527 } 22528 else { 22529 } 22530 }, 22531 ':' => sub { 22532 22533 # if this is the first nonblank character, call it a label 22534 # since perl seems to just swallow it 22535 if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { 22536 $type = 'J'; 22537 } 22538 22539 # ATTRS: check for a ':' which introduces an attribute list 22540 # (this might eventually get its own token type) 22541 elsif ( $statement_type =~ /^sub/ ) { 22542 $type = 'A'; 22543 $in_attribute_list = 1; 22544 } 22545 22546 # check for scalar attribute, such as 22547 # my $foo : shared = 1; 22548 elsif ($is_my_our{$statement_type} 22549 && $current_depth[QUESTION_COLON] == 0 ) 22550 { 22551 $type = 'A'; 22552 $in_attribute_list = 1; 22553 } 22554 22555 # otherwise, it should be part of a ?/: operator 22556 else { 22557 ( $type_sequence, $indent_flag ) = 22558 decrease_nesting_depth( QUESTION_COLON, 22559 $$rtoken_map[$i_tok] ); 22560 if ( $last_nonblank_token eq '?' ) { 22561 warning("Syntax error near ? :\n"); 22562 } 22563 } 22564 }, 22565 '+' => sub { # what kind of plus? 22566 22567 if ( $expecting == TERM ) { 22568 my $number = scan_number(); 22569 22570 # unary plus is safest assumption if not a number 22571 if ( !defined($number) ) { $type = 'p'; } 22572 } 22573 elsif ( $expecting == OPERATOR ) { 22574 } 22575 else { 22576 if ( $next_type eq 'w' ) { $type = 'p' } 22577 } 22578 }, 22579 '@' => sub { 22580 22581 error_if_expecting_OPERATOR("Array") 22582 if ( $expecting == OPERATOR ); 22583 scan_identifier(); 22584 }, 22585 '%' => sub { # hash or modulo? 22586 22587 # first guess is hash if no following blank 22588 if ( $expecting == UNKNOWN ) { 22589 if ( $next_type ne 'b' ) { $expecting = TERM } 22590 } 22591 if ( $expecting == TERM ) { 22592 scan_identifier(); 22593 } 22594 }, 22595 '[' => sub { 22596 $square_bracket_type[ ++$square_bracket_depth ] = 22597 $last_nonblank_token; 22598 ( $type_sequence, $indent_flag ) = 22599 increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); 22600 22601 # It may seem odd, but structural square brackets have 22602 # type '{' and '}'. This simplifies the indentation logic. 22603 if ( !is_non_structural_brace() ) { 22604 $type = '{'; 22605 } 22606 $square_bracket_structural_type[$square_bracket_depth] = $type; 22607 }, 22608 ']' => sub { 22609 ( $type_sequence, $indent_flag ) = 22610 decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] ); 22611 22612 if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' ) 22613 { 22614 $type = '}'; 22615 } 22616 if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } 22617 }, 22618 '-' => sub { # what kind of minus? 22619 22620 if ( ( $expecting != OPERATOR ) 22621 && $is_file_test_operator{$next_tok} ) 22622 { 22623 my ( $next_nonblank_token, $i_next ) = 22624 find_next_nonblank_token( $i + 1, $rtokens, 22625 $max_token_index ); 22626 22627 # check for a quoted word like "-w=>xx"; 22628 # it is sufficient to just check for a following '=' 22629 if ( $next_nonblank_token eq '=' ) { 22630 $type = 'm'; 22631 } 22632 else { 22633 $i++; 22634 $tok .= $next_tok; 22635 $type = 'F'; 22636 } 22637 } 22638 elsif ( $expecting == TERM ) { 22639 my $number = scan_number(); 22640 22641 # maybe part of bareword token? unary is safest 22642 if ( !defined($number) ) { $type = 'm'; } 22643 22644 } 22645 elsif ( $expecting == OPERATOR ) { 22646 } 22647 else { 22648 22649 if ( $next_type eq 'w' ) { 22650 $type = 'm'; 22651 } 22652 } 22653 }, 22654 22655 '^' => sub { 22656 22657 # check for special variables like ${^WARNING_BITS} 22658 if ( $expecting == TERM ) { 22659 22660 # FIXME: this should work but will not catch errors 22661 # because we also have to be sure that previous token is 22662 # a type character ($,@,%). 22663 if ( $last_nonblank_token eq '{' 22664 && ( $next_tok =~ /^[A-Za-z_]/ ) ) 22665 { 22666 22667 if ( $next_tok eq 'W' ) { 22668 $tokenizer_self->{_saw_perl_dash_w} = 1; 22669 } 22670 $tok = $tok . $next_tok; 22671 $i = $i + 1; 22672 $type = 'w'; 22673 } 22674 22675 else { 22676 unless ( error_if_expecting_TERM() ) { 22677 22678 # Something like this is valid but strange: 22679 # undef ^I; 22680 complain("The '^' seems unusual here\n"); 22681 } 22682 } 22683 } 22684 }, 22685 22686 '::' => sub { # probably a sub call 22687 scan_bare_identifier(); 22688 }, 22689 '<<' => sub { # maybe a here-doc? 22690 return 22691 unless ( $i < $max_token_index ) 22692 ; # here-doc not possible if end of line 22693 22694 if ( $expecting != OPERATOR ) { 22695 my ( $found_target, $here_doc_target, $here_quote_character, 22696 $saw_error ); 22697 ( 22698 $found_target, $here_doc_target, $here_quote_character, $i, 22699 $saw_error 22700 ) 22701 = find_here_doc( $expecting, $i, $rtokens, $rtoken_map, 22702 $max_token_index ); 22703 22704 if ($found_target) { 22705 push @{$rhere_target_list}, 22706 [ $here_doc_target, $here_quote_character ]; 22707 $type = 'h'; 22708 if ( length($here_doc_target) > 80 ) { 22709 my $truncated = substr( $here_doc_target, 0, 80 ); 22710 complain("Long here-target: '$truncated' ...\n"); 22711 } 22712 elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { 22713 complain( 22714 "Unconventional here-target: '$here_doc_target'\n" 22715 ); 22716 } 22717 } 22718 elsif ( $expecting == TERM ) { 22719 unless ($saw_error) { 22720 22721 # shouldn't happen.. 22722 warning("Program bug; didn't find here doc target\n"); 22723 report_definite_bug(); 22724 } 22725 } 22726 } 22727 else { 22728 } 22729 }, 22730 '->' => sub { 22731 22732 # if -> points to a bare word, we must scan for an identifier, 22733 # otherwise something like ->y would look like the y operator 22734 scan_identifier(); 22735 }, 22736 22737 # type = 'pp' for pre-increment, '++' for post-increment 22738 '++' => sub { 22739 if ( $expecting == TERM ) { $type = 'pp' } 22740 elsif ( $expecting == UNKNOWN ) { 22741 my ( $next_nonblank_token, $i_next ) = 22742 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 22743 if ( $next_nonblank_token eq '$' ) { $type = 'pp' } 22744 } 22745 }, 22746 22747 '=>' => sub { 22748 if ( $last_nonblank_type eq $tok ) { 22749 complain("Repeated '=>'s \n"); 22750 } 22751 22752 # patch for operator_expected: note if we are in the list (use.t) 22753 # TODO: make version numbers a new token type 22754 if ( $statement_type eq 'use' ) { $statement_type = '_use' } 22755 }, 22756 22757 # type = 'mm' for pre-decrement, '--' for post-decrement 22758 '--' => sub { 22759 22760 if ( $expecting == TERM ) { $type = 'mm' } 22761 elsif ( $expecting == UNKNOWN ) { 22762 my ( $next_nonblank_token, $i_next ) = 22763 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 22764 if ( $next_nonblank_token eq '$' ) { $type = 'mm' } 22765 } 22766 }, 22767 22768 '&&' => sub { 22769 error_if_expecting_TERM() 22770 if ( $expecting == TERM ); 22771 }, 22772 22773 '||' => sub { 22774 error_if_expecting_TERM() 22775 if ( $expecting == TERM ); 22776 }, 22777 22778 '//' => sub { 22779 error_if_expecting_TERM() 22780 if ( $expecting == TERM ); 22781 }, 22782 }; 22783 22784 # ------------------------------------------------------------ 22785 # end hash of code for handling individual token types 22786 # ------------------------------------------------------------ 22787 22788 my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); 22789 22790 # These block types terminate statements and do not need a trailing 22791 # semicolon 22792 # patched for SWITCH/CASE: 22793 my %is_zero_continuation_block_type; 22794 @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; 22795 if elsif else unless while until for foreach switch case given when); 22796 @is_zero_continuation_block_type{@_} = (1) x scalar(@_); 22797 22798 my %is_not_zero_continuation_block_type; 22799 @_ = qw(sort grep map do eval); 22800 @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_); 22801 22802 my %is_logical_container; 22803 @_ = qw(if elsif unless while and or err not && ! || for foreach); 22804 @is_logical_container{@_} = (1) x scalar(@_); 22805 22806 my %is_binary_type; 22807 @_ = qw(|| &&); 22808 @is_binary_type{@_} = (1) x scalar(@_); 22809 22810 my %is_binary_keyword; 22811 @_ = qw(and or err eq ne cmp); 22812 @is_binary_keyword{@_} = (1) x scalar(@_); 22813 22814 # 'L' is token for opening { at hash key 22815 my %is_opening_type; 22816 @_ = qw" L { ( [ "; 22817 @is_opening_type{@_} = (1) x scalar(@_); 22818 22819 # 'R' is token for closing } at hash key 22820 my %is_closing_type; 22821 @_ = qw" R } ) ] "; 22822 @is_closing_type{@_} = (1) x scalar(@_); 22823 22824 my %is_redo_last_next_goto; 22825 @_ = qw(redo last next goto); 22826 @is_redo_last_next_goto{@_} = (1) x scalar(@_); 22827 22828 my %is_use_require; 22829 @_ = qw(use require); 22830 @is_use_require{@_} = (1) x scalar(@_); 22831 22832 my %is_sub_package; 22833 @_ = qw(sub package); 22834 @is_sub_package{@_} = (1) x scalar(@_); 22835 22836 # This hash holds the hash key in $tokenizer_self for these keywords: 22837 my %is_format_END_DATA = ( 22838 'format' => '_in_format', 22839 '__END__' => '_in_end', 22840 '__DATA__' => '_in_data', 22841 ); 22842 22843 # ref: camel 3 p 147, 22844 # but perl may accept undocumented flags 22845 # perl 5.10 adds 'p' (preserve) 22846 my %quote_modifiers = ( 22847 's' => '[cegimosxp]', 22848 'y' => '[cds]', 22849 'tr' => '[cds]', 22850 'm' => '[cgimosxp]', 22851 'qr' => '[imosxp]', 22852 'q' => "", 22853 'qq' => "", 22854 'qw' => "", 22855 'qx' => "", 22856 ); 22857 22858 # table showing how many quoted things to look for after quote operator.. 22859 # s, y, tr have 2 (pattern and replacement) 22860 # others have 1 (pattern only) 22861 my %quote_items = ( 22862 's' => 2, 22863 'y' => 2, 22864 'tr' => 2, 22865 'm' => 1, 22866 'qr' => 1, 22867 'q' => 1, 22868 'qq' => 1, 22869 'qw' => 1, 22870 'qx' => 1, 22871 ); 22872 22873 sub tokenize_this_line { 22874 22875 # This routine breaks a line of perl code into tokens which are of use in 22876 # indentation and reformatting. One of my goals has been to define tokens 22877 # such that a newline may be inserted between any pair of tokens without 22878 # changing or invalidating the program. This version comes close to this, 22879 # although there are necessarily a few exceptions which must be caught by 22880 # the formatter. Many of these involve the treatment of bare words. 22881 # 22882 # The tokens and their types are returned in arrays. See previous 22883 # routine for their names. 22884 # 22885 # See also the array "valid_token_types" in the BEGIN section for an 22886 # up-to-date list. 22887 # 22888 # To simplify things, token types are either a single character, or they 22889 # are identical to the tokens themselves. 22890 # 22891 # As a debugging aid, the -D flag creates a file containing a side-by-side 22892 # comparison of the input string and its tokenization for each line of a file. 22893 # This is an invaluable debugging aid. 22894 # 22895 # In addition to tokens, and some associated quantities, the tokenizer 22896 # also returns flags indication any special line types. These include 22897 # quotes, here_docs, formats. 22898 # 22899 # ----------------------------------------------------------------------- 22900 # 22901 # How to add NEW_TOKENS: 22902 # 22903 # New token types will undoubtedly be needed in the future both to keep up 22904 # with changes in perl and to help adapt the tokenizer to other applications. 22905 # 22906 # Here are some notes on the minimal steps. I wrote these notes while 22907 # adding the 'v' token type for v-strings, which are things like version 22908 # numbers 5.6.0, and ip addresses, and will use that as an example. ( You 22909 # can use your editor to search for the string "NEW_TOKENS" to find the 22910 # appropriate sections to change): 22911 # 22912 # *. Try to talk somebody else into doing it! If not, .. 22913 # 22914 # *. Make a backup of your current version in case things don't work out! 22915 # 22916 # *. Think of a new, unused character for the token type, and add to 22917 # the array @valid_token_types in the BEGIN section of this package. 22918 # For example, I used 'v' for v-strings. 22919 # 22920 # *. Implement coding to recognize the $type of the token in this routine. 22921 # This is the hardest part, and is best done by immitating or modifying 22922 # some of the existing coding. For example, to recognize v-strings, I 22923 # patched 'sub scan_bare_identifier' to recognize v-strings beginning with 22924 # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. 22925 # 22926 # *. Update sub operator_expected. This update is critically important but 22927 # the coding is trivial. Look at the comments in that routine for help. 22928 # For v-strings, which should behave like numbers, I just added 'v' to the 22929 # regex used to handle numbers and strings (types 'n' and 'Q'). 22930 # 22931 # *. Implement a 'bond strength' rule in sub set_bond_strengths in 22932 # Perl::Tidy::Formatter for breaking lines around this token type. You can 22933 # skip this step and take the default at first, then adjust later to get 22934 # desired results. For adding type 'v', I looked at sub bond_strength and 22935 # saw that number type 'n' was using default strengths, so I didn't do 22936 # anything. I may tune it up someday if I don't like the way line 22937 # breaks with v-strings look. 22938 # 22939 # *. Implement a 'whitespace' rule in sub set_white_space_flag in 22940 # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine 22941 # and saw that type 'n' used spaces on both sides, so I just added 'v' 22942 # to the array @spaces_both_sides. 22943 # 22944 # *. Update HtmlWriter package so that users can colorize the token as 22945 # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in 22946 # that package. For v-strings, I initially chose to use a default color 22947 # equal to the default for numbers, but it might be nice to change that 22948 # eventually. 22949 # 22950 # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. 22951 # 22952 # *. Run lots and lots of debug tests. Start with special files designed 22953 # to test the new token type. Run with the -D flag to create a .DEBUG 22954 # file which shows the tokenization. When these work ok, test as many old 22955 # scripts as possible. Start with all of the '.t' files in the 'test' 22956 # directory of the distribution file. Compare .tdy output with previous 22957 # version and updated version to see the differences. Then include as 22958 # many more files as possible. My own technique has been to collect a huge 22959 # number of perl scripts (thousands!) into one directory and run perltidy 22960 # *, then run diff between the output of the previous version and the 22961 # current version. 22962 # 22963 # *. For another example, search for the smartmatch operator '~~' 22964 # with your editor to see where updates were made for it. 22965 # 22966 # ----------------------------------------------------------------------- 22967 22968 my $line_of_tokens = shift; 22969 my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; 22970 22971 # patch while coding change is underway 22972 # make callers private data to allow access 22973 # $tokenizer_self = $caller_tokenizer_self; 22974 22975 # extract line number for use in error messages 22976 $input_line_number = $line_of_tokens->{_line_number}; 22977 22978 # reinitialize for multi-line quote 22979 $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q'; 22980 22981 # check for pod documentation 22982 if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) { 22983 22984 # must not be in multi-line quote 22985 # and must not be in an eqn 22986 if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) ) 22987 { 22988 $tokenizer_self->{_in_pod} = 1; 22989 return; 22990 } 22991 } 22992 22993 $input_line = $untrimmed_input_line; 22994 22995 chomp $input_line; 22996 22997 # trim start of this line unless we are continuing a quoted line 22998 # do not trim end because we might end in a quote (test: deken4.pl) 22999 # Perl::Tidy::Formatter will delete needless trailing blanks 23000 unless ( $in_quote && ( $quote_type eq 'Q' ) ) { 23001 $input_line =~ s/^\s*//; # trim left end 23002 } 23003 23004 # update the copy of the line for use in error messages 23005 # This must be exactly what we give the pre_tokenizer 23006 $tokenizer_self->{_line_text} = $input_line; 23007 23008 # re-initialize for the main loop 23009 $routput_token_list = []; # stack of output token indexes 23010 $routput_token_type = []; # token types 23011 $routput_block_type = []; # types of code block 23012 $routput_container_type = []; # paren types, such as if, elsif, .. 23013 $routput_type_sequence = []; # nesting sequential number 23014 23015 $rhere_target_list = []; 23016 23017 $tok = $last_nonblank_token; 23018 $type = $last_nonblank_type; 23019 $prototype = $last_nonblank_prototype; 23020 $last_nonblank_i = -1; 23021 $block_type = $last_nonblank_block_type; 23022 $container_type = $last_nonblank_container_type; 23023 $type_sequence = $last_nonblank_type_sequence; 23024 $indent_flag = 0; 23025 $peeked_ahead = 0; 23026 23027 # tokenization is done in two stages.. 23028 # stage 1 is a very simple pre-tokenization 23029 my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens 23030 23031 # a little optimization for a full-line comment 23032 if ( !$in_quote && ( $input_line =~ /^#/ ) ) { 23033 $max_tokens_wanted = 1 # no use tokenizing a comment 23034 } 23035 23036 # start by breaking the line into pre-tokens 23037 ( $rtokens, $rtoken_map, $rtoken_type ) = 23038 pre_tokenize( $input_line, $max_tokens_wanted ); 23039 23040 $max_token_index = scalar(@$rtokens) - 1; 23041 push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic 23042 push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced 23043 push( @$rtoken_type, 'b', 'b', 'b' ); 23044 23045 # initialize for main loop 23046 for $i ( 0 .. $max_token_index + 3 ) { 23047 $routput_token_type->[$i] = ""; 23048 $routput_block_type->[$i] = ""; 23049 $routput_container_type->[$i] = ""; 23050 $routput_type_sequence->[$i] = ""; 23051 $routput_indent_flag->[$i] = 0; 23052 } 23053 $i = -1; 23054 $i_tok = -1; 23055 23056 # ------------------------------------------------------------ 23057 # begin main tokenization loop 23058 # ------------------------------------------------------------ 23059 23060 # we are looking at each pre-token of one line and combining them 23061 # into tokens 23062 while ( ++$i <= $max_token_index ) { 23063 23064 if ($in_quote) { # continue looking for end of a quote 23065 $type = $quote_type; 23066 23067 unless ( @{$routput_token_list} ) 23068 { # initialize if continuation line 23069 push( @{$routput_token_list}, $i ); 23070 $routput_token_type->[$i] = $type; 23071 23072 } 23073 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ ); 23074 23075 # scan for the end of the quote or pattern 23076 ( 23077 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 23078 $quoted_string_1, $quoted_string_2 23079 ) 23080 = do_quote( 23081 $i, $in_quote, $quote_character, 23082 $quote_pos, $quote_depth, $quoted_string_1, 23083 $quoted_string_2, $rtokens, $rtoken_map, 23084 $max_token_index 23085 ); 23086 23087 # all done if we didn't find it 23088 last if ($in_quote); 23089 23090 # save pattern and replacement text for rescanning 23091 my $qs1 = $quoted_string_1; 23092 my $qs2 = $quoted_string_2; 23093 23094 # re-initialize for next search 23095 $quote_character = ''; 23096 $quote_pos = 0; 23097 $quote_type = 'Q'; 23098 $quoted_string_1 = ""; 23099 $quoted_string_2 = ""; 23100 last if ( ++$i > $max_token_index ); 23101 23102 # look for any modifiers 23103 if ($allowed_quote_modifiers) { 23104 23105 # check for exact quote modifiers 23106 if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) { 23107 my $str = $$rtokens[$i]; 23108 my $saw_modifier_e; 23109 while ( $str =~ /\G$allowed_quote_modifiers/gc ) { 23110 my $pos = pos($str); 23111 my $char = substr( $str, $pos - 1, 1 ); 23112 $saw_modifier_e ||= ( $char eq 'e' ); 23113 } 23114 23115 # For an 'e' quote modifier we must scan the replacement 23116 # text for here-doc targets. 23117 if ($saw_modifier_e) { 23118 23119 my $rht = scan_replacement_text($qs1); 23120 23121 # Change type from 'Q' to 'h' for quotes with 23122 # here-doc targets so that the formatter (see sub 23123 # print_line_of_tokens) will not make any line 23124 # breaks after this point. 23125 if ($rht) { 23126 push @{$rhere_target_list}, @{$rht}; 23127 $type = 'h'; 23128 if ( $i_tok < 0 ) { 23129 my $ilast = $routput_token_list->[-1]; 23130 $routput_token_type->[$ilast] = $type; 23131 } 23132 } 23133 } 23134 23135 if ( defined( pos($str) ) ) { 23136 23137 # matched 23138 if ( pos($str) == length($str) ) { 23139 last if ( ++$i > $max_token_index ); 23140 } 23141 23142 # Looks like a joined quote modifier 23143 # and keyword, maybe something like 23144 # s/xxx/yyy/gefor @k=... 23145 # Example is "galgen.pl". Would have to split 23146 # the word and insert a new token in the 23147 # pre-token list. This is so rare that I haven't 23148 # done it. Will just issue a warning citation. 23149 23150 # This error might also be triggered if my quote 23151 # modifier characters are incomplete 23152 else { 23153 warning(<<EOM); 23154 23155Partial match to quote modifier $allowed_quote_modifiers at word: '$str' 23156Please put a space between quote modifiers and trailing keywords. 23157EOM 23158 23159 # print "token $$rtokens[$i]\n"; 23160 # my $num = length($str) - pos($str); 23161 # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num); 23162 # print "continuing with new token $$rtokens[$i]\n"; 23163 23164 # skipping past this token does least damage 23165 last if ( ++$i > $max_token_index ); 23166 } 23167 } 23168 else { 23169 23170 # example file: rokicki4.pl 23171 # This error might also be triggered if my quote 23172 # modifier characters are incomplete 23173 write_logfile_entry( 23174"Note: found word $str at quote modifier location\n" 23175 ); 23176 } 23177 } 23178 23179 # re-initialize 23180 $allowed_quote_modifiers = ""; 23181 } 23182 } 23183 23184 unless ( $tok =~ /^\s*$/ ) { 23185 23186 # try to catch some common errors 23187 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { 23188 23189 if ( $last_nonblank_token eq 'eq' ) { 23190 complain("Should 'eq' be '==' here ?\n"); 23191 } 23192 elsif ( $last_nonblank_token eq 'ne' ) { 23193 complain("Should 'ne' be '!=' here ?\n"); 23194 } 23195 } 23196 23197 $last_last_nonblank_token = $last_nonblank_token; 23198 $last_last_nonblank_type = $last_nonblank_type; 23199 $last_last_nonblank_block_type = $last_nonblank_block_type; 23200 $last_last_nonblank_container_type = 23201 $last_nonblank_container_type; 23202 $last_last_nonblank_type_sequence = 23203 $last_nonblank_type_sequence; 23204 $last_nonblank_token = $tok; 23205 $last_nonblank_type = $type; 23206 $last_nonblank_prototype = $prototype; 23207 $last_nonblank_block_type = $block_type; 23208 $last_nonblank_container_type = $container_type; 23209 $last_nonblank_type_sequence = $type_sequence; 23210 $last_nonblank_i = $i_tok; 23211 } 23212 23213 # store previous token type 23214 if ( $i_tok >= 0 ) { 23215 $routput_token_type->[$i_tok] = $type; 23216 $routput_block_type->[$i_tok] = $block_type; 23217 $routput_container_type->[$i_tok] = $container_type; 23218 $routput_type_sequence->[$i_tok] = $type_sequence; 23219 $routput_indent_flag->[$i_tok] = $indent_flag; 23220 } 23221 my $pre_tok = $$rtokens[$i]; # get the next pre-token 23222 my $pre_type = $$rtoken_type[$i]; # and type 23223 $tok = $pre_tok; 23224 $type = $pre_type; # to be modified as necessary 23225 $block_type = ""; # blank for all tokens except code block braces 23226 $container_type = ""; # blank for all tokens except some parens 23227 $type_sequence = ""; # blank for all tokens except ?/: 23228 $indent_flag = 0; 23229 $prototype = ""; # blank for all tokens except user defined subs 23230 $i_tok = $i; 23231 23232 # this pre-token will start an output token 23233 push( @{$routput_token_list}, $i_tok ); 23234 23235 # continue gathering identifier if necessary 23236 # but do not start on blanks and comments 23237 if ( $id_scan_state && $pre_type !~ /[b#]/ ) { 23238 23239 if ( $id_scan_state =~ /^(sub|package)/ ) { 23240 scan_id(); 23241 } 23242 else { 23243 scan_identifier(); 23244 } 23245 23246 last if ($id_scan_state); 23247 next if ( ( $i > 0 ) || $type ); 23248 23249 # didn't find any token; start over 23250 $type = $pre_type; 23251 $tok = $pre_tok; 23252 } 23253 23254 # handle whitespace tokens.. 23255 next if ( $type eq 'b' ); 23256 my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' '; 23257 my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b'; 23258 23259 # Build larger tokens where possible, since we are not in a quote. 23260 # 23261 # First try to assemble digraphs. The following tokens are 23262 # excluded and handled specially: 23263 # '/=' is excluded because the / might start a pattern. 23264 # 'x=' is excluded since it might be $x=, with $ on previous line 23265 # '**' and *= might be typeglobs of punctuation variables 23266 # I have allowed tokens starting with <, such as <=, 23267 # because I don't think these could be valid angle operators. 23268 # test file: storrs4.pl 23269 my $test_tok = $tok . $$rtokens[ $i + 1 ]; 23270 my $combine_ok = $is_digraph{$test_tok}; 23271 23272 # check for special cases which cannot be combined 23273 if ($combine_ok) { 23274 23275 # '//' must be defined_or operator if an operator is expected. 23276 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=) 23277 # could be migrated here for clarity 23278 if ( $test_tok eq '//' ) { 23279 my $next_type = $$rtokens[ $i + 1 ]; 23280 my $expecting = 23281 operator_expected( $prev_type, $tok, $next_type ); 23282 $combine_ok = 0 unless ( $expecting == OPERATOR ); 23283 } 23284 } 23285 23286 if ( 23287 $combine_ok 23288 && ( $test_tok ne '/=' ) # might be pattern 23289 && ( $test_tok ne 'x=' ) # might be $x 23290 && ( $test_tok ne '**' ) # typeglob? 23291 && ( $test_tok ne '*=' ) # typeglob? 23292 ) 23293 { 23294 $tok = $test_tok; 23295 $i++; 23296 23297 # Now try to assemble trigraphs. Note that all possible 23298 # perl trigraphs can be constructed by appending a character 23299 # to a digraph. 23300 $test_tok = $tok . $$rtokens[ $i + 1 ]; 23301 23302 if ( $is_trigraph{$test_tok} ) { 23303 $tok = $test_tok; 23304 $i++; 23305 } 23306 } 23307 23308 $type = $tok; 23309 $next_tok = $$rtokens[ $i + 1 ]; 23310 $next_type = $$rtoken_type[ $i + 1 ]; 23311 23312 TOKENIZER_DEBUG_FLAG_TOKENIZE && do { 23313 local $" = ')('; 23314 my @debug_list = ( 23315 $last_nonblank_token, $tok, 23316 $next_tok, $brace_depth, 23317 $brace_type[$brace_depth], $paren_depth, 23318 $paren_type[$paren_depth] 23319 ); 23320 print "TOKENIZE:(@debug_list)\n"; 23321 }; 23322 23323 # turn off attribute list on first non-blank, non-bareword 23324 if ( $pre_type ne 'w' ) { $in_attribute_list = 0 } 23325 23326 ############################################################### 23327 # We have the next token, $tok. 23328 # Now we have to examine this token and decide what it is 23329 # and define its $type 23330 # 23331 # section 1: bare words 23332 ############################################################### 23333 23334 if ( $pre_type eq 'w' ) { 23335 $expecting = operator_expected( $prev_type, $tok, $next_type ); 23336 my ( $next_nonblank_token, $i_next ) = 23337 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 23338 23339 # ATTRS: handle sub and variable attributes 23340 if ($in_attribute_list) { 23341 23342 # treat bare word followed by open paren like qw( 23343 if ( $next_nonblank_token eq '(' ) { 23344 $in_quote = $quote_items{'q'}; 23345 $allowed_quote_modifiers = $quote_modifiers{'q'}; 23346 $type = 'q'; 23347 $quote_type = 'q'; 23348 next; 23349 } 23350 23351 # handle bareword not followed by open paren 23352 else { 23353 $type = 'w'; 23354 next; 23355 } 23356 } 23357 23358 # quote a word followed by => operator 23359 if ( $next_nonblank_token eq '=' ) { 23360 23361 if ( $$rtokens[ $i_next + 1 ] eq '>' ) { 23362 if ( $is_constant{$current_package}{$tok} ) { 23363 $type = 'C'; 23364 } 23365 elsif ( $is_user_function{$current_package}{$tok} ) { 23366 $type = 'U'; 23367 $prototype = 23368 $user_function_prototype{$current_package}{$tok}; 23369 } 23370 elsif ( $tok =~ /^v\d+$/ ) { 23371 $type = 'v'; 23372 report_v_string($tok); 23373 } 23374 else { $type = 'w' } 23375 23376 next; 23377 } 23378 } 23379 23380 # quote a bare word within braces..like xxx->{s}; note that we 23381 # must be sure this is not a structural brace, to avoid 23382 # mistaking {s} in the following for a quoted bare word: 23383 # for(@[){s}bla}BLA} 23384 # Also treat q in something like var{-q} as a bare word, not qoute operator 23385 ##if ( ( $last_nonblank_type eq 'L' ) 23386 ## && ( $next_nonblank_token eq '}' ) ) 23387 if ( 23388 $next_nonblank_token eq '}' 23389 && ( 23390 $last_nonblank_type eq 'L' 23391 || ( $last_nonblank_type eq 'm' 23392 && $last_last_nonblank_type eq 'L' ) 23393 ) 23394 ) 23395 { 23396 $type = 'w'; 23397 next; 23398 } 23399 23400 # a bare word immediately followed by :: is not a keyword; 23401 # use $tok_kw when testing for keywords to avoid a mistake 23402 my $tok_kw = $tok; 23403 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' ) 23404 { 23405 $tok_kw .= '::'; 23406 } 23407 23408 # handle operator x (now we know it isn't $x=) 23409 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) { 23410 if ( $tok eq 'x' ) { 23411 23412 if ( $$rtokens[ $i + 1 ] eq '=' ) { # x= 23413 $tok = 'x='; 23414 $type = $tok; 23415 $i++; 23416 } 23417 else { 23418 $type = 'x'; 23419 } 23420 } 23421 23422 # FIXME: Patch: mark something like x4 as an integer for now 23423 # It gets fixed downstream. This is easier than 23424 # splitting the pretoken. 23425 else { 23426 $type = 'n'; 23427 } 23428 } 23429 23430 elsif ( ( $tok eq 'strict' ) 23431 and ( $last_nonblank_token eq 'use' ) ) 23432 { 23433 $tokenizer_self->{_saw_use_strict} = 1; 23434 scan_bare_identifier(); 23435 } 23436 23437 elsif ( ( $tok eq 'warnings' ) 23438 and ( $last_nonblank_token eq 'use' ) ) 23439 { 23440 $tokenizer_self->{_saw_perl_dash_w} = 1; 23441 23442 # scan as identifier, so that we pick up something like: 23443 # use warnings::register 23444 scan_bare_identifier(); 23445 } 23446 23447 elsif ( 23448 $tok eq 'AutoLoader' 23449 && $tokenizer_self->{_look_for_autoloader} 23450 && ( 23451 $last_nonblank_token eq 'use' 23452 23453 # these regexes are from AutoSplit.pm, which we want 23454 # to mimic 23455 || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ 23456 || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ 23457 ) 23458 ) 23459 { 23460 write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); 23461 $tokenizer_self->{_saw_autoloader} = 1; 23462 $tokenizer_self->{_look_for_autoloader} = 0; 23463 scan_bare_identifier(); 23464 } 23465 23466 elsif ( 23467 $tok eq 'SelfLoader' 23468 && $tokenizer_self->{_look_for_selfloader} 23469 && ( $last_nonblank_token eq 'use' 23470 || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ 23471 || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) 23472 ) 23473 { 23474 write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); 23475 $tokenizer_self->{_saw_selfloader} = 1; 23476 $tokenizer_self->{_look_for_selfloader} = 0; 23477 scan_bare_identifier(); 23478 } 23479 23480 elsif ( ( $tok eq 'constant' ) 23481 and ( $last_nonblank_token eq 'use' ) ) 23482 { 23483 scan_bare_identifier(); 23484 my ( $next_nonblank_token, $i_next ) = 23485 find_next_nonblank_token( $i, $rtokens, 23486 $max_token_index ); 23487 23488 if ($next_nonblank_token) { 23489 23490 if ( $is_keyword{$next_nonblank_token} ) { 23491 warning( 23492"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" 23493 ); 23494 } 23495 23496 # FIXME: could check for error in which next token is 23497 # not a word (number, punctuation, ..) 23498 else { 23499 $is_constant{$current_package} 23500 {$next_nonblank_token} = 1; 23501 } 23502 } 23503 } 23504 23505 # various quote operators 23506 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { 23507 if ( $expecting == OPERATOR ) { 23508 23509 # patch for paren-less for/foreach glitch, part 1 23510 # perl will accept this construct as valid: 23511 # 23512 # foreach my $key qw\Uno Due Tres Quadro\ { 23513 # print "Set $key\n"; 23514 # } 23515 unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} ) 23516 { 23517 error_if_expecting_OPERATOR(); 23518 } 23519 } 23520 $in_quote = $quote_items{$tok}; 23521 $allowed_quote_modifiers = $quote_modifiers{$tok}; 23522 23523 # All quote types are 'Q' except possibly qw quotes. 23524 # qw quotes are special in that they may generally be trimmed 23525 # of leading and trailing whitespace. So they are given a 23526 # separate type, 'q', unless requested otherwise. 23527 $type = 23528 ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} ) 23529 ? 'q' 23530 : 'Q'; 23531 $quote_type = $type; 23532 } 23533 23534 # check for a statement label 23535 elsif ( 23536 ( $next_nonblank_token eq ':' ) 23537 && ( $$rtokens[ $i_next + 1 ] ne ':' ) 23538 && ( $i_next <= $max_token_index ) # colon on same line 23539 && label_ok() 23540 ) 23541 { 23542 if ( $tok !~ /[A-Z]/ ) { 23543 push @{ $tokenizer_self->{_rlower_case_labels_at} }, 23544 $input_line_number; 23545 } 23546 $type = 'J'; 23547 $tok .= ':'; 23548 $i = $i_next; 23549 next; 23550 } 23551 23552 # 'sub' || 'package' 23553 elsif ( $is_sub_package{$tok_kw} ) { 23554 error_if_expecting_OPERATOR() 23555 if ( $expecting == OPERATOR ); 23556 scan_id(); 23557 } 23558 23559 # Note on token types for format, __DATA__, __END__: 23560 # It simplifies things to give these type ';', so that when we 23561 # start rescanning we will be expecting a token of type TERM. 23562 # We will switch to type 'k' before outputting the tokens. 23563 elsif ( $is_format_END_DATA{$tok_kw} ) { 23564 $type = ';'; # make tokenizer look for TERM next 23565 $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1; 23566 last; 23567 } 23568 23569 elsif ( $is_keyword{$tok_kw} ) { 23570 $type = 'k'; 23571 23572 # Since for and foreach may not be followed immediately 23573 # by an opening paren, we have to remember which keyword 23574 # is associated with the next '(' 23575 if ( $is_for_foreach{$tok} ) { 23576 if ( new_statement_ok() ) { 23577 $want_paren = $tok; 23578 } 23579 } 23580 23581 # recognize 'use' statements, which are special 23582 elsif ( $is_use_require{$tok} ) { 23583 $statement_type = $tok; 23584 error_if_expecting_OPERATOR() 23585 if ( $expecting == OPERATOR ); 23586 } 23587 23588 # remember my and our to check for trailing ": shared" 23589 elsif ( $is_my_our{$tok} ) { 23590 $statement_type = $tok; 23591 } 23592 23593 # Check for misplaced 'elsif' and 'else', but allow isolated 23594 # else or elsif blocks to be formatted. This is indicated 23595 # by a last noblank token of ';' 23596 elsif ( $tok eq 'elsif' ) { 23597 if ( $last_nonblank_token ne ';' 23598 && $last_nonblank_block_type !~ 23599 /^(if|elsif|unless)$/ ) 23600 { 23601 warning( 23602"expecting '$tok' to follow one of 'if|elsif|unless'\n" 23603 ); 23604 } 23605 } 23606 elsif ( $tok eq 'else' ) { 23607 23608 # patched for SWITCH/CASE 23609 if ( $last_nonblank_token ne ';' 23610 && $last_nonblank_block_type !~ 23611 /^(if|elsif|unless|case|when)$/ ) 23612 { 23613 warning( 23614"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" 23615 ); 23616 } 23617 } 23618 elsif ( $tok eq 'continue' ) { 23619 if ( $last_nonblank_token ne ';' 23620 && $last_nonblank_block_type !~ 23621 /(^(\{|\}|;|while|until|for|foreach)|:$)/ ) 23622 { 23623 23624 # note: ';' '{' and '}' in list above 23625 # because continues can follow bare blocks; 23626 # ':' is labeled block 23627 # 23628 ############################################ 23629 # NOTE: This check has been deactivated because 23630 # continue has an alternative usage for given/when 23631 # blocks in perl 5.10 23632 ## warning("'$tok' should follow a block\n"); 23633 ############################################ 23634 } 23635 } 23636 23637 # patch for SWITCH/CASE if 'case' and 'when are 23638 # treated as keywords. 23639 elsif ( $tok eq 'when' || $tok eq 'case' ) { 23640 $statement_type = $tok; # next '{' is block 23641 } 23642 23643 # indent trailing if/unless/while/until 23644 # outdenting will be handled by later indentation loop 23645 if ( $tok =~ /^(if|unless|while|until)$/ 23646 && $next_nonblank_token ne '(' ) 23647 { 23648 $indent_flag = 1; 23649 } 23650 } 23651 23652 # check for inline label following 23653 # /^(redo|last|next|goto)$/ 23654 elsif (( $last_nonblank_type eq 'k' ) 23655 && ( $is_redo_last_next_goto{$last_nonblank_token} ) ) 23656 { 23657 $type = 'j'; 23658 next; 23659 } 23660 23661 # something else -- 23662 else { 23663 23664 scan_bare_identifier(); 23665 if ( $type eq 'w' ) { 23666 23667 if ( $expecting == OPERATOR ) { 23668 23669 # don't complain about possible indirect object 23670 # notation. 23671 # For example: 23672 # package main; 23673 # sub new($) { ... } 23674 # $b = new A::; # calls A::new 23675 # $c = new A; # same thing but suspicious 23676 # This will call A::new but we have a 'new' in 23677 # main:: which looks like a constant. 23678 # 23679 if ( $last_nonblank_type eq 'C' ) { 23680 if ( $tok !~ /::$/ ) { 23681 complain(<<EOM); 23682Expecting operator after '$last_nonblank_token' but found bare word '$tok' 23683 Maybe indirectet object notation? 23684EOM 23685 } 23686 } 23687 else { 23688 error_if_expecting_OPERATOR("bareword"); 23689 } 23690 } 23691 23692 # mark bare words immediately followed by a paren as 23693 # functions 23694 $next_tok = $$rtokens[ $i + 1 ]; 23695 if ( $next_tok eq '(' ) { 23696 $type = 'U'; 23697 } 23698 23699 # underscore after file test operator is file handle 23700 if ( $tok eq '_' && $last_nonblank_type eq 'F' ) { 23701 $type = 'Z'; 23702 } 23703 23704 # patch for SWITCH/CASE if 'case' and 'when are 23705 # not treated as keywords: 23706 if ( 23707 ( 23708 $tok eq 'case' 23709 && $brace_type[$brace_depth] eq 'switch' 23710 ) 23711 || ( $tok eq 'when' 23712 && $brace_type[$brace_depth] eq 'given' ) 23713 ) 23714 { 23715 $statement_type = $tok; # next '{' is block 23716 $type = 'k'; # for keyword syntax coloring 23717 } 23718 23719 # patch for SWITCH/CASE if switch and given not keywords 23720 # Switch is not a perl 5 keyword, but we will gamble 23721 # and mark switch followed by paren as a keyword. This 23722 # is only necessary to get html syntax coloring nice, 23723 # and does not commit this as being a switch/case. 23724 if ( $next_nonblank_token eq '(' 23725 && ( $tok eq 'switch' || $tok eq 'given' ) ) 23726 { 23727 $type = 'k'; # for keyword syntax coloring 23728 } 23729 } 23730 } 23731 } 23732 23733 ############################################################### 23734 # section 2: strings of digits 23735 ############################################################### 23736 elsif ( $pre_type eq 'd' ) { 23737 $expecting = operator_expected( $prev_type, $tok, $next_type ); 23738 error_if_expecting_OPERATOR("Number") 23739 if ( $expecting == OPERATOR ); 23740 my $number = scan_number(); 23741 if ( !defined($number) ) { 23742 23743 # shouldn't happen - we should always get a number 23744 warning("non-number beginning with digit--program bug\n"); 23745 report_definite_bug(); 23746 } 23747 } 23748 23749 ############################################################### 23750 # section 3: all other tokens 23751 ############################################################### 23752 23753 else { 23754 last if ( $tok eq '#' ); 23755 my $code = $tokenization_code->{$tok}; 23756 if ($code) { 23757 $expecting = 23758 operator_expected( $prev_type, $tok, $next_type ); 23759 $code->(); 23760 redo if $in_quote; 23761 } 23762 } 23763 } 23764 23765 # ----------------------------- 23766 # end of main tokenization loop 23767 # ----------------------------- 23768 23769 if ( $i_tok >= 0 ) { 23770 $routput_token_type->[$i_tok] = $type; 23771 $routput_block_type->[$i_tok] = $block_type; 23772 $routput_container_type->[$i_tok] = $container_type; 23773 $routput_type_sequence->[$i_tok] = $type_sequence; 23774 $routput_indent_flag->[$i_tok] = $indent_flag; 23775 } 23776 23777 unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) { 23778 $last_last_nonblank_token = $last_nonblank_token; 23779 $last_last_nonblank_type = $last_nonblank_type; 23780 $last_last_nonblank_block_type = $last_nonblank_block_type; 23781 $last_last_nonblank_container_type = $last_nonblank_container_type; 23782 $last_last_nonblank_type_sequence = $last_nonblank_type_sequence; 23783 $last_nonblank_token = $tok; 23784 $last_nonblank_type = $type; 23785 $last_nonblank_block_type = $block_type; 23786 $last_nonblank_container_type = $container_type; 23787 $last_nonblank_type_sequence = $type_sequence; 23788 $last_nonblank_prototype = $prototype; 23789 } 23790 23791 # reset indentation level if necessary at a sub or package 23792 # in an attempt to recover from a nesting error 23793 if ( $level_in_tokenizer < 0 ) { 23794 if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { 23795 reset_indentation_level(0); 23796 brace_warning("resetting level to 0 at $1 $2\n"); 23797 } 23798 } 23799 23800 # all done tokenizing this line ... 23801 # now prepare the final list of tokens and types 23802 23803 my @token_type = (); # stack of output token types 23804 my @block_type = (); # stack of output code block types 23805 my @container_type = (); # stack of output code container types 23806 my @type_sequence = (); # stack of output type sequence numbers 23807 my @tokens = (); # output tokens 23808 my @levels = (); # structural brace levels of output tokens 23809 my @slevels = (); # secondary nesting levels of output tokens 23810 my @nesting_tokens = (); # string of tokens leading to this depth 23811 my @nesting_types = (); # string of token types leading to this depth 23812 my @nesting_blocks = (); # string of block types leading to this depth 23813 my @nesting_lists = (); # string of list types leading to this depth 23814 my @ci_string = (); # string needed to compute continuation indentation 23815 my @container_environment = (); # BLOCK or LIST 23816 my $container_environment = ''; 23817 my $im = -1; # previous $i value 23818 my $num; 23819 my $ci_string_sum = ones_count($ci_string_in_tokenizer); 23820 23821# Computing Token Indentation 23822# 23823# The final section of the tokenizer forms tokens and also computes 23824# parameters needed to find indentation. It is much easier to do it 23825# in the tokenizer than elsewhere. Here is a brief description of how 23826# indentation is computed. Perl::Tidy computes indentation as the sum 23827# of 2 terms: 23828# 23829# (1) structural indentation, such as if/else/elsif blocks 23830# (2) continuation indentation, such as long parameter call lists. 23831# 23832# These are occasionally called primary and secondary indentation. 23833# 23834# Structural indentation is introduced by tokens of type '{', although 23835# the actual tokens might be '{', '(', or '['. Structural indentation 23836# is of two types: BLOCK and non-BLOCK. Default structural indentation 23837# is 4 characters if the standard indentation scheme is used. 23838# 23839# Continuation indentation is introduced whenever a line at BLOCK level 23840# is broken before its termination. Default continuation indentation 23841# is 2 characters in the standard indentation scheme. 23842# 23843# Both types of indentation may be nested arbitrarily deep and 23844# interlaced. The distinction between the two is somewhat arbitrary. 23845# 23846# For each token, we will define two variables which would apply if 23847# the current statement were broken just before that token, so that 23848# that token started a new line: 23849# 23850# $level = the structural indentation level, 23851# $ci_level = the continuation indentation level 23852# 23853# The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces), 23854# assuming defaults. However, in some special cases it is customary 23855# to modify $ci_level from this strict value. 23856# 23857# The total structural indentation is easy to compute by adding and 23858# subtracting 1 from a saved value as types '{' and '}' are seen. The 23859# running value of this variable is $level_in_tokenizer. 23860# 23861# The total continuation is much more difficult to compute, and requires 23862# several variables. These veriables are: 23863# 23864# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for 23865# each indentation level, if there are intervening open secondary 23866# structures just prior to that level. 23867# $continuation_string_in_tokenizer = a string of 1's and 0's indicating 23868# if the last token at that level is "continued", meaning that it 23869# is not the first token of an expression. 23870# $nesting_block_string = a string of 1's and 0's indicating, for each 23871# indentation level, if the level is of type BLOCK or not. 23872# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string 23873# $nesting_list_string = a string of 1's and 0's indicating, for each 23874# indentation level, if it is is appropriate for list formatting. 23875# If so, continuation indentation is used to indent long list items. 23876# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string 23877# @{$rslevel_stack} = a stack of total nesting depths at each 23878# structural indentation level, where "total nesting depth" means 23879# the nesting depth that would occur if every nesting token -- '{', '[', 23880# and '(' -- , regardless of context, is used to compute a nesting 23881# depth. 23882 23883 #my $nesting_block_flag = ($nesting_block_string =~ /1$/); 23884 #my $nesting_list_flag = ($nesting_list_string =~ /1$/); 23885 23886 my ( $ci_string_i, $level_i, $nesting_block_string_i, 23887 $nesting_list_string_i, $nesting_token_string_i, 23888 $nesting_type_string_i, ); 23889 23890 foreach $i ( @{$routput_token_list} ) 23891 { # scan the list of pre-tokens indexes 23892 23893 # self-checking for valid token types 23894 my $type = $routput_token_type->[$i]; 23895 my $forced_indentation_flag = $routput_indent_flag->[$i]; 23896 23897 # See if we should undo the $forced_indentation_flag. 23898 # Forced indentation after 'if', 'unless', 'while' and 'until' 23899 # expressions without trailing parens is optional and doesn't 23900 # always look good. It is usually okay for a trailing logical 23901 # expression, but if the expression is a function call, code block, 23902 # or some kind of list it puts in an unwanted extra indentation 23903 # level which is hard to remove. 23904 # 23905 # Example where extra indentation looks ok: 23906 # return 1 23907 # if $det_a < 0 and $det_b > 0 23908 # or $det_a > 0 and $det_b < 0; 23909 # 23910 # Example where extra indentation is not needed because 23911 # the eval brace also provides indentation: 23912 # print "not " if defined eval { 23913 # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4; 23914 # }; 23915 # 23916 # The following rule works fairly well: 23917 # Undo the flag if the end of this line, or start of the next 23918 # line, is an opening container token or a comma. 23919 # This almost always works, but if not after another pass it will 23920 # be stable. 23921 if ( $forced_indentation_flag && $type eq 'k' ) { 23922 my $ixlast = -1; 23923 my $ilast = $routput_token_list->[$ixlast]; 23924 my $toklast = $routput_token_type->[$ilast]; 23925 if ( $toklast eq '#' ) { 23926 $ixlast--; 23927 $ilast = $routput_token_list->[$ixlast]; 23928 $toklast = $routput_token_type->[$ilast]; 23929 } 23930 if ( $toklast eq 'b' ) { 23931 $ixlast--; 23932 $ilast = $routput_token_list->[$ixlast]; 23933 $toklast = $routput_token_type->[$ilast]; 23934 } 23935 if ( $toklast =~ /^[\{,]$/ ) { 23936 $forced_indentation_flag = 0; 23937 } 23938 else { 23939 ( $toklast, my $i_next ) = 23940 find_next_nonblank_token( $max_token_index, $rtokens, 23941 $max_token_index ); 23942 if ( $toklast =~ /^[\{,]$/ ) { 23943 $forced_indentation_flag = 0; 23944 } 23945 } 23946 } 23947 23948 # if we are already in an indented if, see if we should outdent 23949 if ($indented_if_level) { 23950 23951 # don't try to nest trailing if's - shouldn't happen 23952 if ( $type eq 'k' ) { 23953 $forced_indentation_flag = 0; 23954 } 23955 23956 # check for the normal case - outdenting at next ';' 23957 elsif ( $type eq ';' ) { 23958 if ( $level_in_tokenizer == $indented_if_level ) { 23959 $forced_indentation_flag = -1; 23960 $indented_if_level = 0; 23961 } 23962 } 23963 23964 # handle case of missing semicolon 23965 elsif ( $type eq '}' ) { 23966 if ( $level_in_tokenizer == $indented_if_level ) { 23967 $indented_if_level = 0; 23968 23969 # TBD: This could be a subroutine call 23970 $level_in_tokenizer--; 23971 if ( @{$rslevel_stack} > 1 ) { 23972 pop( @{$rslevel_stack} ); 23973 } 23974 if ( length($nesting_block_string) > 1 ) 23975 { # true for valid script 23976 chop $nesting_block_string; 23977 chop $nesting_list_string; 23978 } 23979 23980 } 23981 } 23982 } 23983 23984 my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken 23985 $level_i = $level_in_tokenizer; 23986 23987 # This can happen by running perltidy on non-scripts 23988 # although it could also be bug introduced by programming change. 23989 # Perl silently accepts a 032 (^Z) and takes it as the end 23990 if ( !$is_valid_token_type{$type} ) { 23991 my $val = ord($type); 23992 warning( 23993 "unexpected character decimal $val ($type) in script\n"); 23994 $tokenizer_self->{_in_error} = 1; 23995 } 23996 23997 # ---------------------------------------------------------------- 23998 # TOKEN TYPE PATCHES 23999 # output __END__, __DATA__, and format as type 'k' instead of ';' 24000 # to make html colors correct, etc. 24001 my $fix_type = $type; 24002 if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' } 24003 24004 # output anonymous 'sub' as keyword 24005 if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' } 24006 24007 # ----------------------------------------------------------------- 24008 24009 $nesting_token_string_i = $nesting_token_string; 24010 $nesting_type_string_i = $nesting_type_string; 24011 $nesting_block_string_i = $nesting_block_string; 24012 $nesting_list_string_i = $nesting_list_string; 24013 24014 # set primary indentation levels based on structural braces 24015 # Note: these are set so that the leading braces have a HIGHER 24016 # level than their CONTENTS, which is convenient for indentation 24017 # Also, define continuation indentation for each token. 24018 if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 ) 24019 { 24020 24021 # use environment before updating 24022 $container_environment = 24023 $nesting_block_flag ? 'BLOCK' 24024 : $nesting_list_flag ? 'LIST' 24025 : ""; 24026 24027 # if the difference between total nesting levels is not 1, 24028 # there are intervening non-structural nesting types between 24029 # this '{' and the previous unclosed '{' 24030 my $intervening_secondary_structure = 0; 24031 if ( @{$rslevel_stack} ) { 24032 $intervening_secondary_structure = 24033 $slevel_in_tokenizer - $rslevel_stack->[-1]; 24034 } 24035 24036 # Continuation Indentation 24037 # 24038 # Having tried setting continuation indentation both in the formatter and 24039 # in the tokenizer, I can say that setting it in the tokenizer is much, 24040 # much easier. The formatter already has too much to do, and can't 24041 # make decisions on line breaks without knowing what 'ci' will be at 24042 # arbitrary locations. 24043 # 24044 # But a problem with setting the continuation indentation (ci) here 24045 # in the tokenizer is that we do not know where line breaks will actually 24046 # be. As a result, we don't know if we should propagate continuation 24047 # indentation to higher levels of structure. 24048 # 24049 # For nesting of only structural indentation, we never need to do this. 24050 # For example, in a long if statement, like this 24051 # 24052 # if ( !$output_block_type[$i] 24053 # && ($in_statement_continuation) ) 24054 # { <--outdented 24055 # do_something(); 24056 # } 24057 # 24058 # the second line has ci but we do normally give the lines within the BLOCK 24059 # any ci. This would be true if we had blocks nested arbitrarily deeply. 24060 # 24061 # But consider something like this, where we have created a break after 24062 # an opening paren on line 1, and the paren is not (currently) a 24063 # structural indentation token: 24064 # 24065 # my $file = $menubar->Menubutton( 24066 # qw/-text File -underline 0 -menuitems/ => [ 24067 # [ 24068 # Cascade => '~View', 24069 # -menuitems => [ 24070 # ... 24071 # 24072 # The second line has ci, so it would seem reasonable to propagate it 24073 # down, giving the third line 1 ci + 1 indentation. This suggests the 24074 # following rule, which is currently used to propagating ci down: if there 24075 # are any non-structural opening parens (or brackets, or braces), before 24076 # an opening structural brace, then ci is propagated down, and otherwise 24077 # not. The variable $intervening_secondary_structure contains this 24078 # information for the current token, and the string 24079 # "$ci_string_in_tokenizer" is a stack of previous values of this 24080 # variable. 24081 24082 # save the current states 24083 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer ); 24084 $level_in_tokenizer++; 24085 24086 if ($forced_indentation_flag) { 24087 24088 # break BEFORE '?' when there is forced indentation 24089 if ( $type eq '?' ) { $level_i = $level_in_tokenizer; } 24090 if ( $type eq 'k' ) { 24091 $indented_if_level = $level_in_tokenizer; 24092 } 24093 } 24094 24095 if ( $routput_block_type->[$i] ) { 24096 $nesting_block_flag = 1; 24097 $nesting_block_string .= '1'; 24098 } 24099 else { 24100 $nesting_block_flag = 0; 24101 $nesting_block_string .= '0'; 24102 } 24103 24104 # we will use continuation indentation within containers 24105 # which are not blocks and not logical expressions 24106 my $bit = 0; 24107 if ( !$routput_block_type->[$i] ) { 24108 24109 # propagate flag down at nested open parens 24110 if ( $routput_container_type->[$i] eq '(' ) { 24111 $bit = 1 if $nesting_list_flag; 24112 } 24113 24114 # use list continuation if not a logical grouping 24115 # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/ 24116 else { 24117 $bit = 1 24118 unless 24119 $is_logical_container{ $routput_container_type->[$i] 24120 }; 24121 } 24122 } 24123 $nesting_list_string .= $bit; 24124 $nesting_list_flag = $bit; 24125 24126 $ci_string_in_tokenizer .= 24127 ( $intervening_secondary_structure != 0 ) ? '1' : '0'; 24128 $ci_string_sum = ones_count($ci_string_in_tokenizer); 24129 $continuation_string_in_tokenizer .= 24130 ( $in_statement_continuation > 0 ) ? '1' : '0'; 24131 24132 # Sometimes we want to give an opening brace continuation indentation, 24133 # and sometimes not. For code blocks, we don't do it, so that the leading 24134 # '{' gets outdented, like this: 24135 # 24136 # if ( !$output_block_type[$i] 24137 # && ($in_statement_continuation) ) 24138 # { <--outdented 24139 # 24140 # For other types, we will give them continuation indentation. For example, 24141 # here is how a list looks with the opening paren indented: 24142 # 24143 # @LoL = 24144 # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], 24145 # [ "homer", "marge", "bart" ], ); 24146 # 24147 # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4) 24148 24149 my $total_ci = $ci_string_sum; 24150 if ( 24151 !$routput_block_type->[$i] # patch: skip for BLOCK 24152 && ($in_statement_continuation) 24153 && !( $forced_indentation_flag && $type eq ':' ) 24154 ) 24155 { 24156 $total_ci += $in_statement_continuation 24157 unless ( $ci_string_in_tokenizer =~ /1$/ ); 24158 } 24159 24160 $ci_string_i = $total_ci; 24161 $in_statement_continuation = 0; 24162 } 24163 24164 elsif ($type eq '}' 24165 || $type eq 'R' 24166 || $forced_indentation_flag < 0 ) 24167 { 24168 24169 # only a nesting error in the script would prevent popping here 24170 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); } 24171 24172 $level_i = --$level_in_tokenizer; 24173 24174 # restore previous level values 24175 if ( length($nesting_block_string) > 1 ) 24176 { # true for valid script 24177 chop $nesting_block_string; 24178 $nesting_block_flag = ( $nesting_block_string =~ /1$/ ); 24179 chop $nesting_list_string; 24180 $nesting_list_flag = ( $nesting_list_string =~ /1$/ ); 24181 24182 chop $ci_string_in_tokenizer; 24183 $ci_string_sum = ones_count($ci_string_in_tokenizer); 24184 24185 $in_statement_continuation = 24186 chop $continuation_string_in_tokenizer; 24187 24188 # zero continuation flag at terminal BLOCK '}' which 24189 # ends a statement. 24190 if ( $routput_block_type->[$i] ) { 24191 24192 # ...These include non-anonymous subs 24193 # note: could be sub ::abc { or sub 'abc 24194 if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) { 24195 24196 # note: older versions of perl require the /gc modifier 24197 # here or else the \G does not work. 24198 if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc ) 24199 { 24200 $in_statement_continuation = 0; 24201 } 24202 } 24203 24204# ...and include all block types except user subs with 24205# block prototypes and these: (sort|grep|map|do|eval) 24206# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/ 24207 elsif ( 24208 $is_zero_continuation_block_type{ 24209 $routput_block_type->[$i] } ) 24210 { 24211 $in_statement_continuation = 0; 24212 } 24213 24214 # ..but these are not terminal types: 24215 # /^(sort|grep|map|do|eval)$/ ) 24216 elsif ( 24217 $is_not_zero_continuation_block_type{ 24218 $routput_block_type->[$i] } ) 24219 { 24220 } 24221 24222 # ..and a block introduced by a label 24223 # /^\w+\s*:$/gc ) { 24224 elsif ( $routput_block_type->[$i] =~ /:$/ ) { 24225 $in_statement_continuation = 0; 24226 } 24227 24228 # user function with block prototype 24229 else { 24230 $in_statement_continuation = 0; 24231 } 24232 } 24233 24234 # If we are in a list, then 24235 # we must set continuatoin indentation at the closing 24236 # paren of something like this (paren after $check): 24237 # assert( 24238 # __LINE__, 24239 # ( not defined $check ) 24240 # or ref $check 24241 # or $check eq "new" 24242 # or $check eq "old", 24243 # ); 24244 elsif ( $tok eq ')' ) { 24245 $in_statement_continuation = 1 24246 if $routput_container_type->[$i] =~ /^[;,\{\}]$/; 24247 } 24248 24249 elsif ( $tok eq ';' ) { $in_statement_continuation = 0 } 24250 } 24251 24252 # use environment after updating 24253 $container_environment = 24254 $nesting_block_flag ? 'BLOCK' 24255 : $nesting_list_flag ? 'LIST' 24256 : ""; 24257 $ci_string_i = $ci_string_sum + $in_statement_continuation; 24258 $nesting_block_string_i = $nesting_block_string; 24259 $nesting_list_string_i = $nesting_list_string; 24260 } 24261 24262 # not a structural indentation type.. 24263 else { 24264 24265 $container_environment = 24266 $nesting_block_flag ? 'BLOCK' 24267 : $nesting_list_flag ? 'LIST' 24268 : ""; 24269 24270 # zero the continuation indentation at certain tokens so 24271 # that they will be at the same level as its container. For 24272 # commas, this simplifies the -lp indentation logic, which 24273 # counts commas. For ?: it makes them stand out. 24274 if ($nesting_list_flag) { 24275 if ( $type =~ /^[,\?\:]$/ ) { 24276 $in_statement_continuation = 0; 24277 } 24278 } 24279 24280 # be sure binary operators get continuation indentation 24281 if ( 24282 $container_environment 24283 && ( $type eq 'k' && $is_binary_keyword{$tok} 24284 || $is_binary_type{$type} ) 24285 ) 24286 { 24287 $in_statement_continuation = 1; 24288 } 24289 24290 # continuation indentation is sum of any open ci from previous 24291 # levels plus the current level 24292 $ci_string_i = $ci_string_sum + $in_statement_continuation; 24293 24294 # update continuation flag ... 24295 # if this isn't a blank or comment.. 24296 if ( $type ne 'b' && $type ne '#' ) { 24297 24298 # and we are in a BLOCK 24299 if ($nesting_block_flag) { 24300 24301 # the next token after a ';' and label starts a new stmt 24302 if ( $type eq ';' || $type eq 'J' ) { 24303 $in_statement_continuation = 0; 24304 } 24305 24306 # otherwise, we are continuing the current statement 24307 else { 24308 $in_statement_continuation = 1; 24309 } 24310 } 24311 24312 # if we are not in a BLOCK.. 24313 else { 24314 24315 # do not use continuation indentation if not list 24316 # environment (could be within if/elsif clause) 24317 if ( !$nesting_list_flag ) { 24318 $in_statement_continuation = 0; 24319 } 24320 24321 # otherwise, the next token after a ',' starts a new term 24322 elsif ( $type eq ',' ) { 24323 $in_statement_continuation = 0; 24324 } 24325 24326 # otherwise, we are continuing the current term 24327 else { 24328 $in_statement_continuation = 1; 24329 } 24330 } 24331 } 24332 } 24333 24334 if ( $level_in_tokenizer < 0 ) { 24335 unless ( $tokenizer_self->{_saw_negative_indentation} ) { 24336 $tokenizer_self->{_saw_negative_indentation} = 1; 24337 warning("Starting negative indentation\n"); 24338 } 24339 } 24340 24341 # set secondary nesting levels based on all continment token types 24342 # Note: these are set so that the nesting depth is the depth 24343 # of the PREVIOUS TOKEN, which is convenient for setting 24344 # the stength of token bonds 24345 my $slevel_i = $slevel_in_tokenizer; 24346 24347 # /^[L\{\(\[]$/ 24348 if ( $is_opening_type{$type} ) { 24349 $slevel_in_tokenizer++; 24350 $nesting_token_string .= $tok; 24351 $nesting_type_string .= $type; 24352 } 24353 24354 # /^[R\}\)\]]$/ 24355 elsif ( $is_closing_type{$type} ) { 24356 $slevel_in_tokenizer--; 24357 my $char = chop $nesting_token_string; 24358 24359 if ( $char ne $matching_start_token{$tok} ) { 24360 $nesting_token_string .= $char . $tok; 24361 $nesting_type_string .= $type; 24362 } 24363 else { 24364 chop $nesting_type_string; 24365 } 24366 } 24367 24368 push( @block_type, $routput_block_type->[$i] ); 24369 push( @ci_string, $ci_string_i ); 24370 push( @container_environment, $container_environment ); 24371 push( @container_type, $routput_container_type->[$i] ); 24372 push( @levels, $level_i ); 24373 push( @nesting_tokens, $nesting_token_string_i ); 24374 push( @nesting_types, $nesting_type_string_i ); 24375 push( @slevels, $slevel_i ); 24376 push( @token_type, $fix_type ); 24377 push( @type_sequence, $routput_type_sequence->[$i] ); 24378 push( @nesting_blocks, $nesting_block_string ); 24379 push( @nesting_lists, $nesting_list_string ); 24380 24381 # now form the previous token 24382 if ( $im >= 0 ) { 24383 $num = 24384 $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters 24385 24386 if ( $num > 0 ) { 24387 push( @tokens, 24388 substr( $input_line, $$rtoken_map[$im], $num ) ); 24389 } 24390 } 24391 $im = $i; 24392 } 24393 24394 $num = length($input_line) - $$rtoken_map[$im]; # make the last token 24395 if ( $num > 0 ) { 24396 push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) ); 24397 } 24398 24399 $tokenizer_self->{_in_attribute_list} = $in_attribute_list; 24400 $tokenizer_self->{_in_quote} = $in_quote; 24401 $tokenizer_self->{_quote_target} = 24402 $in_quote ? matching_end_token($quote_character) : ""; 24403 $tokenizer_self->{_rhere_target_list} = $rhere_target_list; 24404 24405 $line_of_tokens->{_rtoken_type} = \@token_type; 24406 $line_of_tokens->{_rtokens} = \@tokens; 24407 $line_of_tokens->{_rblock_type} = \@block_type; 24408 $line_of_tokens->{_rcontainer_type} = \@container_type; 24409 $line_of_tokens->{_rcontainer_environment} = \@container_environment; 24410 $line_of_tokens->{_rtype_sequence} = \@type_sequence; 24411 $line_of_tokens->{_rlevels} = \@levels; 24412 $line_of_tokens->{_rslevels} = \@slevels; 24413 $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens; 24414 $line_of_tokens->{_rci_levels} = \@ci_string; 24415 $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks; 24416 24417 return; 24418 } 24419} # end tokenize_this_line 24420 24421#########i############################################################# 24422# Tokenizer routines which assist in identifying token types 24423####################################################################### 24424 24425sub operator_expected { 24426 24427 # Many perl symbols have two or more meanings. For example, '<<' 24428 # can be a shift operator or a here-doc operator. The 24429 # interpretation of these symbols depends on the current state of 24430 # the tokenizer, which may either be expecting a term or an 24431 # operator. For this example, a << would be a shift if an operator 24432 # is expected, and a here-doc if a term is expected. This routine 24433 # is called to make this decision for any current token. It returns 24434 # one of three possible values: 24435 # 24436 # OPERATOR - operator expected (or at least, not a term) 24437 # UNKNOWN - can't tell 24438 # TERM - a term is expected (or at least, not an operator) 24439 # 24440 # The decision is based on what has been seen so far. This 24441 # information is stored in the "$last_nonblank_type" and 24442 # "$last_nonblank_token" variables. For example, if the 24443 # $last_nonblank_type is '=~', then we are expecting a TERM, whereas 24444 # if $last_nonblank_type is 'n' (numeric), we are expecting an 24445 # OPERATOR. 24446 # 24447 # If a UNKNOWN is returned, the calling routine must guess. A major 24448 # goal of this tokenizer is to minimize the possiblity of returning 24449 # UNKNOWN, because a wrong guess can spoil the formatting of a 24450 # script. 24451 # 24452 # adding NEW_TOKENS: it is critically important that this routine be 24453 # updated to allow it to determine if an operator or term is to be 24454 # expected after the new token. Doing this simply involves adding 24455 # the new token character to one of the regexes in this routine or 24456 # to one of the hash lists 24457 # that it uses, which are initialized in the BEGIN section. 24458 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, 24459 # $statement_type 24460 24461 my ( $prev_type, $tok, $next_type ) = @_; 24462 24463 my $op_expected = UNKNOWN; 24464 24465#print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n"; 24466 24467# Note: function prototype is available for token type 'U' for future 24468# program development. It contains the leading and trailing parens, 24469# and no blanks. It might be used to eliminate token type 'C', for 24470# example (prototype = '()'). Thus: 24471# if ($last_nonblank_type eq 'U') { 24472# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n"; 24473# } 24474 24475 # A possible filehandle (or object) requires some care... 24476 if ( $last_nonblank_type eq 'Z' ) { 24477 24478 # angle.t 24479 if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) { 24480 $op_expected = UNKNOWN; 24481 } 24482 24483 # For possible file handle like "$a", Perl uses weird parsing rules. 24484 # For example: 24485 # print $a/2,"/hi"; - division 24486 # print $a / 2,"/hi"; - division 24487 # print $a/ 2,"/hi"; - division 24488 # print $a /2,"/hi"; - pattern (and error)! 24489 elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) { 24490 $op_expected = TERM; 24491 } 24492 24493 # Note when an operation is being done where a 24494 # filehandle might be expected, since a change in whitespace 24495 # could change the interpretation of the statement. 24496 else { 24497 if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { 24498 complain("operator in print statement not recommended\n"); 24499 $op_expected = OPERATOR; 24500 } 24501 } 24502 } 24503 24504 # handle something after 'do' and 'eval' 24505 elsif ( $is_block_operator{$last_nonblank_token} ) { 24506 24507 # something like $a = eval "expression"; 24508 # ^ 24509 if ( $last_nonblank_type eq 'k' ) { 24510 $op_expected = TERM; # expression or list mode following keyword 24511 } 24512 24513 # something like $a = do { BLOCK } / 2; 24514 # ^ 24515 else { 24516 $op_expected = OPERATOR; # block mode following } 24517 } 24518 } 24519 24520 # handle bare word.. 24521 elsif ( $last_nonblank_type eq 'w' ) { 24522 24523 # unfortunately, we can't tell what type of token to expect next 24524 # after most bare words 24525 $op_expected = UNKNOWN; 24526 } 24527 24528 # operator, but not term possible after these types 24529 # Note: moved ')' from type to token because parens in list context 24530 # get marked as '{' '}' now. This is a minor glitch in the following: 24531 # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : (); 24532 # 24533 elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ ) 24534 || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) ) 24535 { 24536 $op_expected = OPERATOR; 24537 24538 # in a 'use' statement, numbers and v-strings are not true 24539 # numbers, so to avoid incorrect error messages, we will 24540 # mark them as unknown for now (use.t) 24541 # TODO: it would be much nicer to create a new token V for VERSION 24542 # number in a use statement. Then this could be a check on type V 24543 # and related patches which change $statement_type for '=>' 24544 # and ',' could be removed. Further, it would clean things up to 24545 # scan the 'use' statement with a separate subroutine. 24546 if ( ( $statement_type eq 'use' ) 24547 && ( $last_nonblank_type =~ /^[nv]$/ ) ) 24548 { 24549 $op_expected = UNKNOWN; 24550 } 24551 } 24552 24553 # no operator after many keywords, such as "die", "warn", etc 24554 elsif ( $expecting_term_token{$last_nonblank_token} ) { 24555 24556 # patch for dor.t (defined or). 24557 # perl functions which may be unary operators 24558 # TODO: This list is incomplete, and these should be put 24559 # into a hash. 24560 if ( $tok eq '/' 24561 && $next_type eq '/' 24562 && $last_nonblank_type eq 'k' 24563 && $last_nonblank_token =~ /^eof|undef|shift|pop$/ ) 24564 { 24565 $op_expected = OPERATOR; 24566 } 24567 else { 24568 $op_expected = TERM; 24569 } 24570 } 24571 24572 # no operator after things like + - ** (i.e., other operators) 24573 elsif ( $expecting_term_types{$last_nonblank_type} ) { 24574 $op_expected = TERM; 24575 } 24576 24577 # a few operators, like "time", have an empty prototype () and so 24578 # take no parameters but produce a value to operate on 24579 elsif ( $expecting_operator_token{$last_nonblank_token} ) { 24580 $op_expected = OPERATOR; 24581 } 24582 24583 # post-increment and decrement produce values to be operated on 24584 elsif ( $expecting_operator_types{$last_nonblank_type} ) { 24585 $op_expected = OPERATOR; 24586 } 24587 24588 # no value to operate on after sub block 24589 elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; } 24590 24591 # a right brace here indicates the end of a simple block. 24592 # all non-structural right braces have type 'R' 24593 # all braces associated with block operator keywords have been given those 24594 # keywords as "last_nonblank_token" and caught above. 24595 # (This statement is order dependent, and must come after checking 24596 # $last_nonblank_token). 24597 elsif ( $last_nonblank_type eq '}' ) { 24598 24599 # patch for dor.t (defined or). 24600 if ( $tok eq '/' 24601 && $next_type eq '/' 24602 && $last_nonblank_token eq ']' ) 24603 { 24604 $op_expected = OPERATOR; 24605 } 24606 else { 24607 $op_expected = TERM; 24608 } 24609 } 24610 24611 # something else..what did I forget? 24612 else { 24613 24614 # collecting diagnostics on unknown operator types..see what was missed 24615 $op_expected = UNKNOWN; 24616 write_diagnostics( 24617"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n" 24618 ); 24619 } 24620 24621 TOKENIZER_DEBUG_FLAG_EXPECT && do { 24622 print 24623"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; 24624 }; 24625 return $op_expected; 24626} 24627 24628sub new_statement_ok { 24629 24630 # return true if the current token can start a new statement 24631 # USES GLOBAL VARIABLES: $last_nonblank_type 24632 24633 return label_ok() # a label would be ok here 24634 24635 || $last_nonblank_type eq 'J'; # or we follow a label 24636 24637} 24638 24639sub label_ok { 24640 24641 # Decide if a bare word followed by a colon here is a label 24642 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, 24643 # $brace_depth, @brace_type 24644 24645 # if it follows an opening or closing code block curly brace.. 24646 if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' ) 24647 && $last_nonblank_type eq $last_nonblank_token ) 24648 { 24649 24650 # it is a label if and only if the curly encloses a code block 24651 return $brace_type[$brace_depth]; 24652 } 24653 24654 # otherwise, it is a label if and only if it follows a ';' 24655 # (real or fake) 24656 else { 24657 return ( $last_nonblank_type eq ';' ); 24658 } 24659} 24660 24661sub code_block_type { 24662 24663 # Decide if this is a block of code, and its type. 24664 # Must be called only when $type = $token = '{' 24665 # The problem is to distinguish between the start of a block of code 24666 # and the start of an anonymous hash reference 24667 # Returns "" if not code block, otherwise returns 'last_nonblank_token' 24668 # to indicate the type of code block. (For example, 'last_nonblank_token' 24669 # might be 'if' for an if block, 'else' for an else block, etc). 24670 # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, 24671 # $last_nonblank_block_type, $brace_depth, @brace_type 24672 24673 # handle case of multiple '{'s 24674 24675# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; 24676 24677 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; 24678 if ( $last_nonblank_token eq '{' 24679 && $last_nonblank_type eq $last_nonblank_token ) 24680 { 24681 24682 # opening brace where a statement may appear is probably 24683 # a code block but might be and anonymous hash reference 24684 if ( $brace_type[$brace_depth] ) { 24685 return decide_if_code_block( $i, $rtokens, $rtoken_type, 24686 $max_token_index ); 24687 } 24688 24689 # cannot start a code block within an anonymous hash 24690 else { 24691 return ""; 24692 } 24693 } 24694 24695 elsif ( $last_nonblank_token eq ';' ) { 24696 24697 # an opening brace where a statement may appear is probably 24698 # a code block but might be and anonymous hash reference 24699 return decide_if_code_block( $i, $rtokens, $rtoken_type, 24700 $max_token_index ); 24701 } 24702 24703 # handle case of '}{' 24704 elsif ($last_nonblank_token eq '}' 24705 && $last_nonblank_type eq $last_nonblank_token ) 24706 { 24707 24708 # a } { situation ... 24709 # could be hash reference after code block..(blktype1.t) 24710 if ($last_nonblank_block_type) { 24711 return decide_if_code_block( $i, $rtokens, $rtoken_type, 24712 $max_token_index ); 24713 } 24714 24715 # must be a block if it follows a closing hash reference 24716 else { 24717 return $last_nonblank_token; 24718 } 24719 } 24720 24721 # NOTE: braces after type characters start code blocks, but for 24722 # simplicity these are not identified as such. See also 24723 # sub is_non_structural_brace. 24724 # elsif ( $last_nonblank_type eq 't' ) { 24725 # return $last_nonblank_token; 24726 # } 24727 24728 # brace after label: 24729 elsif ( $last_nonblank_type eq 'J' ) { 24730 return $last_nonblank_token; 24731 } 24732 24733# otherwise, look at previous token. This must be a code block if 24734# it follows any of these: 24735# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ 24736 elsif ( $is_code_block_token{$last_nonblank_token} ) { 24737 return $last_nonblank_token; 24738 } 24739 24740 # or a sub definition 24741 elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) 24742 && $last_nonblank_token =~ /^sub\b/ ) 24743 { 24744 return $last_nonblank_token; 24745 } 24746 24747 # user-defined subs with block parameters (like grep/map/eval) 24748 elsif ( $last_nonblank_type eq 'G' ) { 24749 return $last_nonblank_token; 24750 } 24751 24752 # check bareword 24753 elsif ( $last_nonblank_type eq 'w' ) { 24754 return decide_if_code_block( $i, $rtokens, $rtoken_type, 24755 $max_token_index ); 24756 } 24757 24758 # anything else must be anonymous hash reference 24759 else { 24760 return ""; 24761 } 24762} 24763 24764sub decide_if_code_block { 24765 24766 # USES GLOBAL VARIABLES: $last_nonblank_token 24767 my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; 24768 my ( $next_nonblank_token, $i_next ) = 24769 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 24770 24771 # we are at a '{' where a statement may appear. 24772 # We must decide if this brace starts an anonymous hash or a code 24773 # block. 24774 # return "" if anonymous hash, and $last_nonblank_token otherwise 24775 24776 # initialize to be code BLOCK 24777 my $code_block_type = $last_nonblank_token; 24778 24779 # Check for the common case of an empty anonymous hash reference: 24780 # Maybe something like sub { { } } 24781 if ( $next_nonblank_token eq '}' ) { 24782 $code_block_type = ""; 24783 } 24784 24785 else { 24786 24787 # To guess if this '{' is an anonymous hash reference, look ahead 24788 # and test as follows: 24789 # 24790 # it is a hash reference if next come: 24791 # - a string or digit followed by a comma or => 24792 # - bareword followed by => 24793 # otherwise it is a code block 24794 # 24795 # Examples of anonymous hash ref: 24796 # {'aa',}; 24797 # {1,2} 24798 # 24799 # Examples of code blocks: 24800 # {1; print "hello\n", 1;} 24801 # {$a,1}; 24802 24803 # We are only going to look ahead one more (nonblank/comment) line. 24804 # Strange formatting could cause a bad guess, but that's unlikely. 24805 my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ]; 24806 my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ]; 24807 my ( $rpre_tokens, $rpre_types ) = 24808 peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but 24809 # generous, and prevents 24810 # wasting lots of 24811 # time in mangled files 24812 if ( defined($rpre_types) && @$rpre_types ) { 24813 push @pre_types, @$rpre_types; 24814 push @pre_tokens, @$rpre_tokens; 24815 } 24816 24817 # put a sentinal token to simplify stopping the search 24818 push @pre_types, '}'; 24819 24820 my $jbeg = 0; 24821 $jbeg = 1 if $pre_types[0] eq 'b'; 24822 24823 # first look for one of these 24824 # - bareword 24825 # - bareword with leading - 24826 # - digit 24827 # - quoted string 24828 my $j = $jbeg; 24829 if ( $pre_types[$j] =~ /^[\'\"]/ ) { 24830 24831 # find the closing quote; don't worry about escapes 24832 my $quote_mark = $pre_types[$j]; 24833 for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) { 24834 if ( $pre_types[$k] eq $quote_mark ) { 24835 $j = $k + 1; 24836 my $next = $pre_types[$j]; 24837 last; 24838 } 24839 } 24840 } 24841 elsif ( $pre_types[$j] eq 'd' ) { 24842 $j++; 24843 } 24844 elsif ( $pre_types[$j] eq 'w' ) { 24845 unless ( $is_keyword{ $pre_tokens[$j] } ) { 24846 $j++; 24847 } 24848 } 24849 elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { 24850 $j++; 24851 } 24852 if ( $j > $jbeg ) { 24853 24854 $j++ if $pre_types[$j] eq 'b'; 24855 24856 # it's a hash ref if a comma or => follow next 24857 if ( $pre_types[$j] eq ',' 24858 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) ) 24859 { 24860 $code_block_type = ""; 24861 } 24862 } 24863 } 24864 24865 return $code_block_type; 24866} 24867 24868sub unexpected { 24869 24870 # report unexpected token type and show where it is 24871 # USES GLOBAL VARIABLES: $tokenizer_self 24872 my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map, 24873 $rpretoken_type, $input_line ) 24874 = @_; 24875 24876 if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) { 24877 my $msg = "found $found where $expecting expected"; 24878 my $pos = $$rpretoken_map[$i_tok]; 24879 interrupt_logfile(); 24880 my $input_line_number = $tokenizer_self->{_last_line_number}; 24881 my ( $offset, $numbered_line, $underline ) = 24882 make_numbered_line( $input_line_number, $input_line, $pos ); 24883 $underline = write_on_underline( $underline, $pos - $offset, '^' ); 24884 24885 my $trailer = ""; 24886 if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { 24887 my $pos_prev = $$rpretoken_map[$last_nonblank_i]; 24888 my $num; 24889 if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) { 24890 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev; 24891 } 24892 else { 24893 $num = $pos - $pos_prev; 24894 } 24895 if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } 24896 24897 $underline = 24898 write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); 24899 $trailer = " (previous token underlined)"; 24900 } 24901 warning( $numbered_line . "\n" ); 24902 warning( $underline . "\n" ); 24903 warning( $msg . $trailer . "\n" ); 24904 resume_logfile(); 24905 } 24906} 24907 24908sub is_non_structural_brace { 24909 24910 # Decide if a brace or bracket is structural or non-structural 24911 # by looking at the previous token and type 24912 # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token 24913 24914 # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. 24915 # Tentatively deactivated because it caused the wrong operator expectation 24916 # for this code: 24917 # $user = @vars[1] / 100; 24918 # Must update sub operator_expected before re-implementing. 24919 # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { 24920 # return 0; 24921 # } 24922 24923 # NOTE: braces after type characters start code blocks, but for 24924 # simplicity these are not identified as such. See also 24925 # sub code_block_type 24926 # if ($last_nonblank_type eq 't') {return 0} 24927 24928 # otherwise, it is non-structural if it is decorated 24929 # by type information. 24930 # For example, the '{' here is non-structural: ${xxx} 24931 ( 24932 $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ 24933 24934 # or if we follow a hash or array closing curly brace or bracket 24935 # For example, the second '{' in this is non-structural: $a{'x'}{'y'} 24936 # because the first '}' would have been given type 'R' 24937 || $last_nonblank_type =~ /^([R\]])$/ 24938 ); 24939} 24940 24941#########i############################################################# 24942# Tokenizer routines for tracking container nesting depths 24943####################################################################### 24944 24945# The following routines keep track of nesting depths of the nesting 24946# types, ( [ { and ?. This is necessary for determining the indentation 24947# level, and also for debugging programs. Not only do they keep track of 24948# nesting depths of the individual brace types, but they check that each 24949# of the other brace types is balanced within matching pairs. For 24950# example, if the program sees this sequence: 24951# 24952# { ( ( ) } 24953# 24954# then it can determine that there is an extra left paren somewhere 24955# between the { and the }. And so on with every other possible 24956# combination of outer and inner brace types. For another 24957# example: 24958# 24959# ( [ ..... ] ] ) 24960# 24961# which has an extra ] within the parens. 24962# 24963# The brace types have indexes 0 .. 3 which are indexes into 24964# the matrices. 24965# 24966# The pair ? : are treated as just another nesting type, with ? acting 24967# as the opening brace and : acting as the closing brace. 24968# 24969# The matrix 24970# 24971# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; 24972# 24973# saves the nesting depth of brace type $b (where $b is either of the other 24974# nesting types) when brace type $a enters a new depth. When this depth 24975# decreases, a check is made that the current depth of brace types $b is 24976# unchanged, or otherwise there must have been an error. This can 24977# be very useful for localizing errors, particularly when perl runs to 24978# the end of a large file (such as this one) and announces that there 24979# is a problem somewhere. 24980# 24981# A numerical sequence number is maintained for every nesting type, 24982# so that each matching pair can be uniquely identified in a simple 24983# way. 24984 24985sub increase_nesting_depth { 24986 my ( $aa, $pos ) = @_; 24987 24988 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, 24989 # @current_sequence_number, @depth_array, @starting_line_of_current_depth 24990 my $bb; 24991 $current_depth[$aa]++; 24992 $total_depth++; 24993 $total_depth[$aa][ $current_depth[$aa] ] = $total_depth; 24994 my $input_line_number = $tokenizer_self->{_last_line_number}; 24995 my $input_line = $tokenizer_self->{_line_text}; 24996 24997 # Sequence numbers increment by number of items. This keeps 24998 # a unique set of numbers but still allows the relative location 24999 # of any type to be determined. 25000 $nesting_sequence_number[$aa] += scalar(@closing_brace_names); 25001 my $seqno = $nesting_sequence_number[$aa]; 25002 $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno; 25003 25004 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] = 25005 [ $input_line_number, $input_line, $pos ]; 25006 25007 for $bb ( 0 .. $#closing_brace_names ) { 25008 next if ( $bb == $aa ); 25009 $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb]; 25010 } 25011 25012 # set a flag for indenting a nested ternary statement 25013 my $indent = 0; 25014 if ( $aa == QUESTION_COLON ) { 25015 $nested_ternary_flag[ $current_depth[$aa] ] = 0; 25016 if ( $current_depth[$aa] > 1 ) { 25017 if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) { 25018 my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ]; 25019 if ( $pdepth == $total_depth - 1 ) { 25020 $indent = 1; 25021 $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1; 25022 } 25023 } 25024 } 25025 } 25026 return ( $seqno, $indent ); 25027} 25028 25029sub decrease_nesting_depth { 25030 25031 my ( $aa, $pos ) = @_; 25032 25033 # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, 25034 # @current_sequence_number, @depth_array, @starting_line_of_current_depth 25035 my $bb; 25036 my $seqno = 0; 25037 my $input_line_number = $tokenizer_self->{_last_line_number}; 25038 my $input_line = $tokenizer_self->{_line_text}; 25039 25040 my $outdent = 0; 25041 $total_depth--; 25042 if ( $current_depth[$aa] > 0 ) { 25043 25044 # set a flag for un-indenting after seeing a nested ternary statement 25045 $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ]; 25046 if ( $aa == QUESTION_COLON ) { 25047 $outdent = $nested_ternary_flag[ $current_depth[$aa] ]; 25048 } 25049 25050 # check that any brace types $bb contained within are balanced 25051 for $bb ( 0 .. $#closing_brace_names ) { 25052 next if ( $bb == $aa ); 25053 25054 unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] == 25055 $current_depth[$bb] ) 25056 { 25057 my $diff = 25058 $current_depth[$bb] - 25059 $depth_array[$aa][$bb][ $current_depth[$aa] ]; 25060 25061 # don't whine too many times 25062 my $saw_brace_error = get_saw_brace_error(); 25063 if ( 25064 $saw_brace_error <= MAX_NAG_MESSAGES 25065 25066 # if too many closing types have occured, we probably 25067 # already caught this error 25068 && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) 25069 ) 25070 { 25071 interrupt_logfile(); 25072 my $rsl = 25073 $starting_line_of_current_depth[$aa] 25074 [ $current_depth[$aa] ]; 25075 my $sl = $$rsl[0]; 25076 my $rel = [ $input_line_number, $input_line, $pos ]; 25077 my $el = $$rel[0]; 25078 my ($ess); 25079 25080 if ( $diff == 1 || $diff == -1 ) { 25081 $ess = ''; 25082 } 25083 else { 25084 $ess = 's'; 25085 } 25086 my $bname = 25087 ( $diff > 0 ) 25088 ? $opening_brace_names[$bb] 25089 : $closing_brace_names[$bb]; 25090 write_error_indicator_pair( @$rsl, '^' ); 25091 my $msg = <<"EOM"; 25092Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el 25093EOM 25094 25095 if ( $diff > 0 ) { 25096 my $rml = 25097 $starting_line_of_current_depth[$bb] 25098 [ $current_depth[$bb] ]; 25099 my $ml = $$rml[0]; 25100 $msg .= 25101" The most recent un-matched $bname is on line $ml\n"; 25102 write_error_indicator_pair( @$rml, '^' ); 25103 } 25104 write_error_indicator_pair( @$rel, '^' ); 25105 warning($msg); 25106 resume_logfile(); 25107 } 25108 increment_brace_error(); 25109 } 25110 } 25111 $current_depth[$aa]--; 25112 } 25113 else { 25114 25115 my $saw_brace_error = get_saw_brace_error(); 25116 if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { 25117 my $msg = <<"EOM"; 25118There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number 25119EOM 25120 indicate_error( $msg, $input_line_number, $input_line, $pos, '^' ); 25121 } 25122 increment_brace_error(); 25123 } 25124 return ( $seqno, $outdent ); 25125} 25126 25127sub check_final_nesting_depths { 25128 my ($aa); 25129 25130 # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth 25131 25132 for $aa ( 0 .. $#closing_brace_names ) { 25133 25134 if ( $current_depth[$aa] ) { 25135 my $rsl = 25136 $starting_line_of_current_depth[$aa][ $current_depth[$aa] ]; 25137 my $sl = $$rsl[0]; 25138 my $msg = <<"EOM"; 25139Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa] 25140The most recent un-matched $opening_brace_names[$aa] is on line $sl 25141EOM 25142 indicate_error( $msg, @$rsl, '^' ); 25143 increment_brace_error(); 25144 } 25145 } 25146} 25147 25148#########i############################################################# 25149# Tokenizer routines for looking ahead in input stream 25150####################################################################### 25151 25152sub peek_ahead_for_n_nonblank_pre_tokens { 25153 25154 # returns next n pretokens if they exist 25155 # returns undef's if hits eof without seeing any pretokens 25156 # USES GLOBAL VARIABLES: $tokenizer_self 25157 my $max_pretokens = shift; 25158 my $line; 25159 my $i = 0; 25160 my ( $rpre_tokens, $rmap, $rpre_types ); 25161 25162 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) 25163 { 25164 $line =~ s/^\s*//; # trim leading blanks 25165 next if ( length($line) <= 0 ); # skip blank 25166 next if ( $line =~ /^#/ ); # skip comment 25167 ( $rpre_tokens, $rmap, $rpre_types ) = 25168 pre_tokenize( $line, $max_pretokens ); 25169 last; 25170 } 25171 return ( $rpre_tokens, $rpre_types ); 25172} 25173 25174# look ahead for next non-blank, non-comment line of code 25175sub peek_ahead_for_nonblank_token { 25176 25177 # USES GLOBAL VARIABLES: $tokenizer_self 25178 my ( $rtokens, $max_token_index ) = @_; 25179 my $line; 25180 my $i = 0; 25181 25182 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) 25183 { 25184 $line =~ s/^\s*//; # trim leading blanks 25185 next if ( length($line) <= 0 ); # skip blank 25186 next if ( $line =~ /^#/ ); # skip comment 25187 my ( $rtok, $rmap, $rtype ) = 25188 pre_tokenize( $line, 2 ); # only need 2 pre-tokens 25189 my $j = $max_token_index + 1; 25190 my $tok; 25191 25192 foreach $tok (@$rtok) { 25193 last if ( $tok =~ "\n" ); 25194 $$rtokens[ ++$j ] = $tok; 25195 } 25196 last; 25197 } 25198 return $rtokens; 25199} 25200 25201#########i############################################################# 25202# Tokenizer guessing routines for ambiguous situations 25203####################################################################### 25204 25205sub guess_if_pattern_or_conditional { 25206 25207 # this routine is called when we have encountered a ? following an 25208 # unknown bareword, and we must decide if it starts a pattern or not 25209 # input parameters: 25210 # $i - token index of the ? starting possible pattern 25211 # output parameters: 25212 # $is_pattern = 0 if probably not pattern, =1 if probably a pattern 25213 # msg = a warning or diagnostic message 25214 # USES GLOBAL VARIABLES: $last_nonblank_token 25215 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; 25216 my $is_pattern = 0; 25217 my $msg = "guessing that ? after $last_nonblank_token starts a "; 25218 25219 if ( $i >= $max_token_index ) { 25220 $msg .= "conditional (no end to pattern found on the line)\n"; 25221 } 25222 else { 25223 my $ibeg = $i; 25224 $i = $ibeg + 1; 25225 my $next_token = $$rtokens[$i]; # first token after ? 25226 25227 # look for a possible ending ? on this line.. 25228 my $in_quote = 1; 25229 my $quote_depth = 0; 25230 my $quote_character = ''; 25231 my $quote_pos = 0; 25232 my $quoted_string; 25233 ( 25234 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 25235 $quoted_string 25236 ) 25237 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, 25238 $quote_pos, $quote_depth, $max_token_index ); 25239 25240 if ($in_quote) { 25241 25242 # we didn't find an ending ? on this line, 25243 # so we bias towards conditional 25244 $is_pattern = 0; 25245 $msg .= "conditional (no ending ? on this line)\n"; 25246 25247 # we found an ending ?, so we bias towards a pattern 25248 } 25249 else { 25250 25251 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { 25252 $is_pattern = 1; 25253 $msg .= "pattern (found ending ? and pattern expected)\n"; 25254 } 25255 else { 25256 $msg .= "pattern (uncertain, but found ending ?)\n"; 25257 } 25258 } 25259 } 25260 return ( $is_pattern, $msg ); 25261} 25262 25263sub guess_if_pattern_or_division { 25264 25265 # this routine is called when we have encountered a / following an 25266 # unknown bareword, and we must decide if it starts a pattern or is a 25267 # division 25268 # input parameters: 25269 # $i - token index of the / starting possible pattern 25270 # output parameters: 25271 # $is_pattern = 0 if probably division, =1 if probably a pattern 25272 # msg = a warning or diagnostic message 25273 # USES GLOBAL VARIABLES: $last_nonblank_token 25274 my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; 25275 my $is_pattern = 0; 25276 my $msg = "guessing that / after $last_nonblank_token starts a "; 25277 25278 if ( $i >= $max_token_index ) { 25279 "division (no end to pattern found on the line)\n"; 25280 } 25281 else { 25282 my $ibeg = $i; 25283 my $divide_expected = 25284 numerator_expected( $i, $rtokens, $max_token_index ); 25285 $i = $ibeg + 1; 25286 my $next_token = $$rtokens[$i]; # first token after slash 25287 25288 # look for a possible ending / on this line.. 25289 my $in_quote = 1; 25290 my $quote_depth = 0; 25291 my $quote_character = ''; 25292 my $quote_pos = 0; 25293 my $quoted_string; 25294 ( 25295 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 25296 $quoted_string 25297 ) 25298 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, 25299 $quote_pos, $quote_depth, $max_token_index ); 25300 25301 if ($in_quote) { 25302 25303 # we didn't find an ending / on this line, 25304 # so we bias towards division 25305 if ( $divide_expected >= 0 ) { 25306 $is_pattern = 0; 25307 $msg .= "division (no ending / on this line)\n"; 25308 } 25309 else { 25310 $msg = "multi-line pattern (division not possible)\n"; 25311 $is_pattern = 1; 25312 } 25313 25314 } 25315 25316 # we found an ending /, so we bias towards a pattern 25317 else { 25318 25319 if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { 25320 25321 if ( $divide_expected >= 0 ) { 25322 25323 if ( $i - $ibeg > 60 ) { 25324 $msg .= "division (matching / too distant)\n"; 25325 $is_pattern = 0; 25326 } 25327 else { 25328 $msg .= "pattern (but division possible too)\n"; 25329 $is_pattern = 1; 25330 } 25331 } 25332 else { 25333 $is_pattern = 1; 25334 $msg .= "pattern (division not possible)\n"; 25335 } 25336 } 25337 else { 25338 25339 if ( $divide_expected >= 0 ) { 25340 $is_pattern = 0; 25341 $msg .= "division (pattern not possible)\n"; 25342 } 25343 else { 25344 $is_pattern = 1; 25345 $msg .= 25346 "pattern (uncertain, but division would not work here)\n"; 25347 } 25348 } 25349 } 25350 } 25351 return ( $is_pattern, $msg ); 25352} 25353 25354# try to resolve here-doc vs. shift by looking ahead for 25355# non-code or the end token (currently only looks for end token) 25356# returns 1 if it is probably a here doc, 0 if not 25357sub guess_if_here_doc { 25358 25359 # This is how many lines we will search for a target as part of the 25360 # guessing strategy. It is a constant because there is probably 25361 # little reason to change it. 25362 # USES GLOBAL VARIABLES: $tokenizer_self, $current_package 25363 # %is_constant, 25364 use constant HERE_DOC_WINDOW => 40; 25365 25366 my $next_token = shift; 25367 my $here_doc_expected = 0; 25368 my $line; 25369 my $k = 0; 25370 my $msg = "checking <<"; 25371 25372 while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) ) 25373 { 25374 chomp $line; 25375 25376 if ( $line =~ /^$next_token$/ ) { 25377 $msg .= " -- found target $next_token ahead $k lines\n"; 25378 $here_doc_expected = 1; # got it 25379 last; 25380 } 25381 last if ( $k >= HERE_DOC_WINDOW ); 25382 } 25383 25384 unless ($here_doc_expected) { 25385 25386 if ( !defined($line) ) { 25387 $here_doc_expected = -1; # hit eof without seeing target 25388 $msg .= " -- must be shift; target $next_token not in file\n"; 25389 25390 } 25391 else { # still unsure..taking a wild guess 25392 25393 if ( !$is_constant{$current_package}{$next_token} ) { 25394 $here_doc_expected = 1; 25395 $msg .= 25396 " -- guessing it's a here-doc ($next_token not a constant)\n"; 25397 } 25398 else { 25399 $msg .= 25400 " -- guessing it's a shift ($next_token is a constant)\n"; 25401 } 25402 } 25403 } 25404 write_logfile_entry($msg); 25405 return $here_doc_expected; 25406} 25407 25408#########i############################################################# 25409# Tokenizer Routines for scanning identifiers and related items 25410####################################################################### 25411 25412sub scan_bare_identifier_do { 25413 25414 # this routine is called to scan a token starting with an alphanumeric 25415 # variable or package separator, :: or '. 25416 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, 25417 # $last_nonblank_type,@paren_type, $paren_depth 25418 25419 my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map, 25420 $max_token_index ) 25421 = @_; 25422 my $i_begin = $i; 25423 my $package = undef; 25424 25425 my $i_beg = $i; 25426 25427 # we have to back up one pretoken at a :: since each : is one pretoken 25428 if ( $tok eq '::' ) { $i_beg-- } 25429 if ( $tok eq '->' ) { $i_beg-- } 25430 my $pos_beg = $$rtoken_map[$i_beg]; 25431 pos($input_line) = $pos_beg; 25432 25433 # Examples: 25434 # A::B::C 25435 # A:: 25436 # ::A 25437 # A'B 25438 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) { 25439 25440 my $pos = pos($input_line); 25441 my $numc = $pos - $pos_beg; 25442 $tok = substr( $input_line, $pos_beg, $numc ); 25443 25444 # type 'w' includes anything without leading type info 25445 # ($,%,@,*) including something like abc::def::ghi 25446 $type = 'w'; 25447 25448 my $sub_name = ""; 25449 if ( defined($2) ) { $sub_name = $2; } 25450 if ( defined($1) ) { 25451 $package = $1; 25452 25453 # patch: don't allow isolated package name which just ends 25454 # in the old style package separator (single quote). Example: 25455 # use CGI':all'; 25456 if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { 25457 $pos--; 25458 } 25459 25460 $package =~ s/\'/::/g; 25461 if ( $package =~ /^\:/ ) { $package = 'main' . $package } 25462 $package =~ s/::$//; 25463 } 25464 else { 25465 $package = $current_package; 25466 25467 if ( $is_keyword{$tok} ) { 25468 $type = 'k'; 25469 } 25470 } 25471 25472 # if it is a bareword.. 25473 if ( $type eq 'w' ) { 25474 25475 # check for v-string with leading 'v' type character 25476 # (This seems to have presidence over filehandle, type 'Y') 25477 if ( $tok =~ /^v\d[_\d]*$/ ) { 25478 25479 # we only have the first part - something like 'v101' - 25480 # look for more 25481 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { 25482 $pos = pos($input_line); 25483 $numc = $pos - $pos_beg; 25484 $tok = substr( $input_line, $pos_beg, $numc ); 25485 } 25486 $type = 'v'; 25487 25488 # warn if this version can't handle v-strings 25489 report_v_string($tok); 25490 } 25491 25492 elsif ( $is_constant{$package}{$sub_name} ) { 25493 $type = 'C'; 25494 } 25495 25496 # bareword after sort has implied empty prototype; for example: 25497 # @sorted = sort numerically ( 53, 29, 11, 32, 7 ); 25498 # This has priority over whatever the user has specified. 25499 elsif ($last_nonblank_token eq 'sort' 25500 && $last_nonblank_type eq 'k' ) 25501 { 25502 $type = 'Z'; 25503 } 25504 25505 # Note: strangely, perl does not seem to really let you create 25506 # functions which act like eval and do, in the sense that eval 25507 # and do may have operators following the final }, but any operators 25508 # that you create with prototype (&) apparently do not allow 25509 # trailing operators, only terms. This seems strange. 25510 # If this ever changes, here is the update 25511 # to make perltidy behave accordingly: 25512 25513 # elsif ( $is_block_function{$package}{$tok} ) { 25514 # $tok='eval'; # patch to do braces like eval - doesn't work 25515 # $type = 'k'; 25516 #} 25517 # FIXME: This could become a separate type to allow for different 25518 # future behavior: 25519 elsif ( $is_block_function{$package}{$sub_name} ) { 25520 $type = 'G'; 25521 } 25522 25523 elsif ( $is_block_list_function{$package}{$sub_name} ) { 25524 $type = 'G'; 25525 } 25526 elsif ( $is_user_function{$package}{$sub_name} ) { 25527 $type = 'U'; 25528 $prototype = $user_function_prototype{$package}{$sub_name}; 25529 } 25530 25531 # check for indirect object 25532 elsif ( 25533 25534 # added 2001-03-27: must not be followed immediately by '(' 25535 # see fhandle.t 25536 ( $input_line !~ m/\G\(/gc ) 25537 25538 # and 25539 && ( 25540 25541 # preceded by keyword like 'print', 'printf' and friends 25542 $is_indirect_object_taker{$last_nonblank_token} 25543 25544 # or preceded by something like 'print(' or 'printf(' 25545 || ( 25546 ( $last_nonblank_token eq '(' ) 25547 && $is_indirect_object_taker{ $paren_type[$paren_depth] 25548 } 25549 25550 ) 25551 ) 25552 ) 25553 { 25554 25555 # may not be indirect object unless followed by a space 25556 if ( $input_line =~ m/\G\s+/gc ) { 25557 $type = 'Y'; 25558 25559 # Abandon Hope ... 25560 # Perl's indirect object notation is a very bad 25561 # thing and can cause subtle bugs, especially for 25562 # beginning programmers. And I haven't even been 25563 # able to figure out a sane warning scheme which 25564 # doesn't get in the way of good scripts. 25565 25566 # Complain if a filehandle has any lower case 25567 # letters. This is suggested good practice. 25568 # Use 'sub_name' because something like 25569 # main::MYHANDLE is ok for filehandle 25570 if ( $sub_name =~ /[a-z]/ ) { 25571 25572 # could be bug caused by older perltidy if 25573 # followed by '(' 25574 if ( $input_line =~ m/\G\s*\(/gc ) { 25575 complain( 25576"Caution: unknown word '$tok' in indirect object slot\n" 25577 ); 25578 } 25579 } 25580 } 25581 25582 # bareword not followed by a space -- may not be filehandle 25583 # (may be function call defined in a 'use' statement) 25584 else { 25585 $type = 'Z'; 25586 } 25587 } 25588 } 25589 25590 # Now we must convert back from character position 25591 # to pre_token index. 25592 # I don't think an error flag can occur here ..but who knows 25593 my $error; 25594 ( $i, $error ) = 25595 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); 25596 if ($error) { 25597 warning("scan_bare_identifier: Possibly invalid tokenization\n"); 25598 } 25599 } 25600 25601 # no match but line not blank - could be syntax error 25602 # perl will take '::' alone without complaint 25603 else { 25604 $type = 'w'; 25605 25606 # change this warning to log message if it becomes annoying 25607 warning("didn't find identifier after leading ::\n"); 25608 } 25609 return ( $i, $tok, $type, $prototype ); 25610} 25611 25612sub scan_id_do { 25613 25614# This is the new scanner and will eventually replace scan_identifier. 25615# Only type 'sub' and 'package' are implemented. 25616# Token types $ * % @ & -> are not yet implemented. 25617# 25618# Scan identifier following a type token. 25619# The type of call depends on $id_scan_state: $id_scan_state = '' 25620# for starting call, in which case $tok must be the token defining 25621# the type. 25622# 25623# If the type token is the last nonblank token on the line, a value 25624# of $id_scan_state = $tok is returned, indicating that further 25625# calls must be made to get the identifier. If the type token is 25626# not the last nonblank token on the line, the identifier is 25627# scanned and handled and a value of '' is returned. 25628# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list, 25629# $statement_type, $tokenizer_self 25630 25631 my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state, 25632 $max_token_index ) 25633 = @_; 25634 my $type = ''; 25635 my ( $i_beg, $pos_beg ); 25636 25637 #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; 25638 #my ($a,$b,$c) = caller; 25639 #print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; 25640 25641 # on re-entry, start scanning at first token on the line 25642 if ($id_scan_state) { 25643 $i_beg = $i; 25644 $type = ''; 25645 } 25646 25647 # on initial entry, start scanning just after type token 25648 else { 25649 $i_beg = $i + 1; 25650 $id_scan_state = $tok; 25651 $type = 't'; 25652 } 25653 25654 # find $i_beg = index of next nonblank token, 25655 # and handle empty lines 25656 my $blank_line = 0; 25657 my $next_nonblank_token = $$rtokens[$i_beg]; 25658 if ( $i_beg > $max_token_index ) { 25659 $blank_line = 1; 25660 } 25661 else { 25662 25663 # only a '#' immediately after a '$' is not a comment 25664 if ( $next_nonblank_token eq '#' ) { 25665 unless ( $tok eq '$' ) { 25666 $blank_line = 1; 25667 } 25668 } 25669 25670 if ( $next_nonblank_token =~ /^\s/ ) { 25671 ( $next_nonblank_token, $i_beg ) = 25672 find_next_nonblank_token_on_this_line( $i_beg, $rtokens, 25673 $max_token_index ); 25674 if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { 25675 $blank_line = 1; 25676 } 25677 } 25678 } 25679 25680 # handle non-blank line; identifier, if any, must follow 25681 unless ($blank_line) { 25682 25683 if ( $id_scan_state eq 'sub' ) { 25684 ( $i, $tok, $type, $id_scan_state ) = do_scan_sub( 25685 $input_line, $i, $i_beg, 25686 $tok, $type, $rtokens, 25687 $rtoken_map, $id_scan_state, $max_token_index 25688 ); 25689 } 25690 25691 elsif ( $id_scan_state eq 'package' ) { 25692 ( $i, $tok, $type ) = 25693 do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens, 25694 $rtoken_map, $max_token_index ); 25695 $id_scan_state = ''; 25696 } 25697 25698 else { 25699 warning("invalid token in scan_id: $tok\n"); 25700 $id_scan_state = ''; 25701 } 25702 } 25703 25704 if ( $id_scan_state && ( !defined($type) || !$type ) ) { 25705 25706 # shouldn't happen: 25707 warning( 25708"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n" 25709 ); 25710 report_definite_bug(); 25711 } 25712 25713 TOKENIZER_DEBUG_FLAG_NSCAN && do { 25714 print 25715 "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; 25716 }; 25717 return ( $i, $tok, $type, $id_scan_state ); 25718} 25719 25720sub check_prototype { 25721 my ( $proto, $package, $subname ) = @_; 25722 return unless ( defined($package) && defined($subname) ); 25723 if ( defined($proto) ) { 25724 $proto =~ s/^\s*\(\s*//; 25725 $proto =~ s/\s*\)$//; 25726 if ($proto) { 25727 $is_user_function{$package}{$subname} = 1; 25728 $user_function_prototype{$package}{$subname} = "($proto)"; 25729 25730 # prototypes containing '&' must be treated specially.. 25731 if ( $proto =~ /\&/ ) { 25732 25733 # right curly braces of prototypes ending in 25734 # '&' may be followed by an operator 25735 if ( $proto =~ /\&$/ ) { 25736 $is_block_function{$package}{$subname} = 1; 25737 } 25738 25739 # right curly braces of prototypes NOT ending in 25740 # '&' may NOT be followed by an operator 25741 elsif ( $proto !~ /\&$/ ) { 25742 $is_block_list_function{$package}{$subname} = 1; 25743 } 25744 } 25745 } 25746 else { 25747 $is_constant{$package}{$subname} = 1; 25748 } 25749 } 25750 else { 25751 $is_user_function{$package}{$subname} = 1; 25752 } 25753} 25754 25755sub do_scan_package { 25756 25757 # do_scan_package parses a package name 25758 # it is called with $i_beg equal to the index of the first nonblank 25759 # token following a 'package' token. 25760 # USES GLOBAL VARIABLES: $current_package, 25761 25762 my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map, 25763 $max_token_index ) 25764 = @_; 25765 my $package = undef; 25766 my $pos_beg = $$rtoken_map[$i_beg]; 25767 pos($input_line) = $pos_beg; 25768 25769 # handle non-blank line; package name, if any, must follow 25770 if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) { 25771 $package = $1; 25772 $package = ( defined($1) && $1 ) ? $1 : 'main'; 25773 $package =~ s/\'/::/g; 25774 if ( $package =~ /^\:/ ) { $package = 'main' . $package } 25775 $package =~ s/::$//; 25776 my $pos = pos($input_line); 25777 my $numc = $pos - $pos_beg; 25778 $tok = 'package ' . substr( $input_line, $pos_beg, $numc ); 25779 $type = 'i'; 25780 25781 # Now we must convert back from character position 25782 # to pre_token index. 25783 # I don't think an error flag can occur here ..but ? 25784 my $error; 25785 ( $i, $error ) = 25786 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); 25787 if ($error) { warning("Possibly invalid package\n") } 25788 $current_package = $package; 25789 25790 # check for error 25791 my ( $next_nonblank_token, $i_next ) = 25792 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 25793 if ( $next_nonblank_token !~ /^[;\}]$/ ) { 25794 warning( 25795 "Unexpected '$next_nonblank_token' after package name '$tok'\n" 25796 ); 25797 } 25798 } 25799 25800 # no match but line not blank -- 25801 # could be a label with name package, like package: , for example. 25802 else { 25803 $type = 'k'; 25804 } 25805 25806 return ( $i, $tok, $type ); 25807} 25808 25809sub scan_identifier_do { 25810 25811 # This routine assembles tokens into identifiers. It maintains a 25812 # scan state, id_scan_state. It updates id_scan_state based upon 25813 # current id_scan_state and token, and returns an updated 25814 # id_scan_state and the next index after the identifier. 25815 # USES GLOBAL VARIABLES: $context, $last_nonblank_token, 25816 # $last_nonblank_type 25817 25818 my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, 25819 $expecting ) 25820 = @_; 25821 my $i_begin = $i; 25822 my $type = ''; 25823 my $tok_begin = $$rtokens[$i_begin]; 25824 if ( $tok_begin eq ':' ) { $tok_begin = '::' } 25825 my $id_scan_state_begin = $id_scan_state; 25826 my $identifier_begin = $identifier; 25827 my $tok = $tok_begin; 25828 my $message = ""; 25829 25830 # these flags will be used to help figure out the type: 25831 my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ ); 25832 my $saw_type; 25833 25834 # allow old package separator (') except in 'use' statement 25835 my $allow_tick = ( $last_nonblank_token ne 'use' ); 25836 25837 # get started by defining a type and a state if necessary 25838 unless ($id_scan_state) { 25839 $context = UNKNOWN_CONTEXT; 25840 25841 # fixup for digraph 25842 if ( $tok eq '>' ) { 25843 $tok = '->'; 25844 $tok_begin = $tok; 25845 } 25846 $identifier = $tok; 25847 25848 if ( $tok eq '$' || $tok eq '*' ) { 25849 $id_scan_state = '$'; 25850 $context = SCALAR_CONTEXT; 25851 } 25852 elsif ( $tok eq '%' || $tok eq '@' ) { 25853 $id_scan_state = '$'; 25854 $context = LIST_CONTEXT; 25855 } 25856 elsif ( $tok eq '&' ) { 25857 $id_scan_state = '&'; 25858 } 25859 elsif ( $tok eq 'sub' or $tok eq 'package' ) { 25860 $saw_alpha = 0; # 'sub' is considered type info here 25861 $id_scan_state = '$'; 25862 $identifier .= ' '; # need a space to separate sub from sub name 25863 } 25864 elsif ( $tok eq '::' ) { 25865 $id_scan_state = 'A'; 25866 } 25867 elsif ( $tok =~ /^[A-Za-z_]/ ) { 25868 $id_scan_state = ':'; 25869 } 25870 elsif ( $tok eq '->' ) { 25871 $id_scan_state = '$'; 25872 } 25873 else { 25874 25875 # shouldn't happen 25876 my ( $a, $b, $c ) = caller; 25877 warning("Program Bug: scan_identifier given bad token = $tok \n"); 25878 warning(" called from sub $a line: $c\n"); 25879 report_definite_bug(); 25880 } 25881 $saw_type = !$saw_alpha; 25882 } 25883 else { 25884 $i--; 25885 $saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); 25886 } 25887 25888 # now loop to gather the identifier 25889 my $i_save = $i; 25890 25891 while ( $i < $max_token_index ) { 25892 $i_save = $i unless ( $tok =~ /^\s*$/ ); 25893 $tok = $$rtokens[ ++$i ]; 25894 25895 if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) { 25896 $tok = '::'; 25897 $i++; 25898 } 25899 25900 if ( $id_scan_state eq '$' ) { # starting variable name 25901 25902 if ( $tok eq '$' ) { 25903 25904 $identifier .= $tok; 25905 25906 # we've got a punctuation variable if end of line (punct.t) 25907 if ( $i == $max_token_index ) { 25908 $type = 'i'; 25909 $id_scan_state = ''; 25910 last; 25911 } 25912 } 25913 elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric .. 25914 $saw_alpha = 1; 25915 $id_scan_state = ':'; # now need :: 25916 $identifier .= $tok; 25917 } 25918 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. 25919 $saw_alpha = 1; 25920 $id_scan_state = ':'; # now need :: 25921 $identifier .= $tok; 25922 25923 # Perl will accept leading digits in identifiers, 25924 # although they may not always produce useful results. 25925 # Something like $main::0 is ok. But this also works: 25926 # 25927 # sub howdy::123::bubba{ print "bubba $54321!\n" } 25928 # howdy::123::bubba(); 25929 # 25930 } 25931 elsif ( $tok =~ /^[0-9]/ ) { # numeric 25932 $saw_alpha = 1; 25933 $id_scan_state = ':'; # now need :: 25934 $identifier .= $tok; 25935 } 25936 elsif ( $tok eq '::' ) { 25937 $id_scan_state = 'A'; 25938 $identifier .= $tok; 25939 } 25940 elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array 25941 $identifier .= $tok; # keep same state, a $ could follow 25942 } 25943 elsif ( $tok eq '{' ) { 25944 25945 # check for something like ${#} or ${�} 25946 if ( $identifier eq '$' 25947 && $i + 2 <= $max_token_index 25948 && $$rtokens[ $i + 2 ] eq '}' 25949 && $$rtokens[ $i + 1 ] !~ /[\s\w]/ ) 25950 { 25951 my $next2 = $$rtokens[ $i + 2 ]; 25952 my $next1 = $$rtokens[ $i + 1 ]; 25953 $identifier .= $tok . $next1 . $next2; 25954 $i += 2; 25955 $id_scan_state = ''; 25956 last; 25957 } 25958 25959 # skip something like ${xxx} or ->{ 25960 $id_scan_state = ''; 25961 25962 # if this is the first token of a line, any tokens for this 25963 # identifier have already been accumulated 25964 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; } 25965 $i = $i_save; 25966 last; 25967 } 25968 25969 # space ok after leading $ % * & @ 25970 elsif ( $tok =~ /^\s*$/ ) { 25971 25972 if ( $identifier =~ /^[\$\%\*\&\@]/ ) { 25973 25974 if ( length($identifier) > 1 ) { 25975 $id_scan_state = ''; 25976 $i = $i_save; 25977 $type = 'i'; # probably punctuation variable 25978 last; 25979 } 25980 else { 25981 25982 # spaces after $'s are common, and space after @ 25983 # is harmless, so only complain about space 25984 # after other type characters. Space after $ and 25985 # @ will be removed in formatting. Report space 25986 # after % and * because they might indicate a 25987 # parsing error. In other words '% ' might be a 25988 # modulo operator. Delete this warning if it 25989 # gets annoying. 25990 if ( $identifier !~ /^[\@\$]$/ ) { 25991 $message = 25992 "Space in identifier, following $identifier\n"; 25993 } 25994 } 25995 } 25996 25997 # else: 25998 # space after '->' is ok 25999 } 26000 elsif ( $tok eq '^' ) { 26001 26002 # check for some special variables like $^W 26003 if ( $identifier =~ /^[\$\*\@\%]$/ ) { 26004 $identifier .= $tok; 26005 $id_scan_state = 'A'; 26006 26007 # Perl accepts '$^]' or '@^]', but 26008 # there must not be a space before the ']'. 26009 my $next1 = $$rtokens[ $i + 1 ]; 26010 if ( $next1 eq ']' ) { 26011 $i++; 26012 $identifier .= $next1; 26013 $id_scan_state = ""; 26014 last; 26015 } 26016 } 26017 else { 26018 $id_scan_state = ''; 26019 } 26020 } 26021 else { # something else 26022 26023 # check for various punctuation variables 26024 if ( $identifier =~ /^[\$\*\@\%]$/ ) { 26025 $identifier .= $tok; 26026 } 26027 26028 elsif ( $identifier eq '$#' ) { 26029 26030 if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } 26031 26032 # perl seems to allow just these: $#: $#- $#+ 26033 elsif ( $tok =~ /^[\:\-\+]$/ ) { 26034 $type = 'i'; 26035 $identifier .= $tok; 26036 } 26037 else { 26038 $i = $i_save; 26039 write_logfile_entry( 'Use of $# is deprecated' . "\n" ); 26040 } 26041 } 26042 elsif ( $identifier eq '$$' ) { 26043 26044 # perl does not allow references to punctuation 26045 # variables without braces. For example, this 26046 # won't work: 26047 # $:=\4; 26048 # $a = $$:; 26049 # You would have to use 26050 # $a = ${$:}; 26051 26052 $i = $i_save; 26053 if ( $tok eq '{' ) { $type = 't' } 26054 else { $type = 'i' } 26055 } 26056 elsif ( $identifier eq '->' ) { 26057 $i = $i_save; 26058 } 26059 else { 26060 $i = $i_save; 26061 if ( length($identifier) == 1 ) { $identifier = ''; } 26062 } 26063 $id_scan_state = ''; 26064 last; 26065 } 26066 } 26067 elsif ( $id_scan_state eq '&' ) { # starting sub call? 26068 26069 if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric .. 26070 $id_scan_state = ':'; # now need :: 26071 $saw_alpha = 1; 26072 $identifier .= $tok; 26073 } 26074 elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. 26075 $id_scan_state = ':'; # now need :: 26076 $saw_alpha = 1; 26077 $identifier .= $tok; 26078 } 26079 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above 26080 $id_scan_state = ':'; # now need :: 26081 $saw_alpha = 1; 26082 $identifier .= $tok; 26083 } 26084 elsif ( $tok =~ /^\s*$/ ) { # allow space 26085 } 26086 elsif ( $tok eq '::' ) { # leading :: 26087 $id_scan_state = 'A'; # accept alpha next 26088 $identifier .= $tok; 26089 } 26090 elsif ( $tok eq '{' ) { 26091 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; } 26092 $i = $i_save; 26093 $id_scan_state = ''; 26094 last; 26095 } 26096 else { 26097 26098 # punctuation variable? 26099 # testfile: cunningham4.pl 26100 # 26101 # We have to be careful here. If we are in an unknown state, 26102 # we will reject the punctuation variable. In the following 26103 # example the '&' is a binary opeator but we are in an unknown 26104 # state because there is no sigil on 'Prima', so we don't 26105 # know what it is. But it is a bad guess that 26106 # '&~' is a punction variable. 26107 # $self->{text}->{colorMap}->[ 26108 # Prima::PodView::COLOR_CODE_FOREGROUND 26109 # & ~tb::COLOR_INDEX ] = 26110 # $sec->{ColorCode} 26111 if ( $identifier eq '&' && $expecting ) { 26112 $identifier .= $tok; 26113 } 26114 else { 26115 $identifier = ''; 26116 $i = $i_save; 26117 $type = '&'; 26118 } 26119 $id_scan_state = ''; 26120 last; 26121 } 26122 } 26123 elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::) 26124 26125 if ( $tok =~ /^[A-Za-z_]/ ) { # found it 26126 $identifier .= $tok; 26127 $id_scan_state = ':'; # now need :: 26128 $saw_alpha = 1; 26129 } 26130 elsif ( $tok eq "'" && $allow_tick ) { 26131 $identifier .= $tok; 26132 $id_scan_state = ':'; # now need :: 26133 $saw_alpha = 1; 26134 } 26135 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above 26136 $identifier .= $tok; 26137 $id_scan_state = ':'; # now need :: 26138 $saw_alpha = 1; 26139 } 26140 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { 26141 $id_scan_state = '('; 26142 $identifier .= $tok; 26143 } 26144 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { 26145 $id_scan_state = ')'; 26146 $identifier .= $tok; 26147 } 26148 else { 26149 $id_scan_state = ''; 26150 $i = $i_save; 26151 last; 26152 } 26153 } 26154 elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha 26155 26156 if ( $tok eq '::' ) { # got it 26157 $identifier .= $tok; 26158 $id_scan_state = 'A'; # now require alpha 26159 } 26160 elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here 26161 $identifier .= $tok; 26162 $id_scan_state = ':'; # now need :: 26163 $saw_alpha = 1; 26164 } 26165 elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above 26166 $identifier .= $tok; 26167 $id_scan_state = ':'; # now need :: 26168 $saw_alpha = 1; 26169 } 26170 elsif ( $tok eq "'" && $allow_tick ) { # tick 26171 26172 if ( $is_keyword{$identifier} ) { 26173 $id_scan_state = ''; # that's all 26174 $i = $i_save; 26175 } 26176 else { 26177 $identifier .= $tok; 26178 } 26179 } 26180 elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { 26181 $id_scan_state = '('; 26182 $identifier .= $tok; 26183 } 26184 elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { 26185 $id_scan_state = ')'; 26186 $identifier .= $tok; 26187 } 26188 else { 26189 $id_scan_state = ''; # that's all 26190 $i = $i_save; 26191 last; 26192 } 26193 } 26194 elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype 26195 26196 if ( $tok eq '(' ) { # got it 26197 $identifier .= $tok; 26198 $id_scan_state = ')'; # now find the end of it 26199 } 26200 elsif ( $tok =~ /^\s*$/ ) { # blank - keep going 26201 $identifier .= $tok; 26202 } 26203 else { 26204 $id_scan_state = ''; # that's all - no prototype 26205 $i = $i_save; 26206 last; 26207 } 26208 } 26209 elsif ( $id_scan_state eq ')' ) { # looking for ) to end 26210 26211 if ( $tok eq ')' ) { # got it 26212 $identifier .= $tok; 26213 $id_scan_state = ''; # all done 26214 last; 26215 } 26216 elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { 26217 $identifier .= $tok; 26218 } 26219 else { # probable error in script, but keep going 26220 warning("Unexpected '$tok' while seeking end of prototype\n"); 26221 $identifier .= $tok; 26222 } 26223 } 26224 else { # can get here due to error in initialization 26225 $id_scan_state = ''; 26226 $i = $i_save; 26227 last; 26228 } 26229 } 26230 26231 if ( $id_scan_state eq ')' ) { 26232 warning("Hit end of line while seeking ) to end prototype\n"); 26233 } 26234 26235 # once we enter the actual identifier, it may not extend beyond 26236 # the end of the current line 26237 if ( $id_scan_state =~ /^[A\:\(\)]/ ) { 26238 $id_scan_state = ''; 26239 } 26240 if ( $i < 0 ) { $i = 0 } 26241 26242 unless ($type) { 26243 26244 if ($saw_type) { 26245 26246 if ($saw_alpha) { 26247 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) { 26248 $type = 'w'; 26249 } 26250 else { $type = 'i' } 26251 } 26252 elsif ( $identifier eq '->' ) { 26253 $type = '->'; 26254 } 26255 elsif ( 26256 ( length($identifier) > 1 ) 26257 26258 # In something like '@$=' we have an identifier '@$' 26259 # In something like '$${' we have type '$$' (and only 26260 # part of an identifier) 26261 && !( $identifier =~ /\$$/ && $tok eq '{' ) 26262 && ( $identifier !~ /^(sub |package )$/ ) 26263 ) 26264 { 26265 $type = 'i'; 26266 } 26267 else { $type = 't' } 26268 } 26269 elsif ($saw_alpha) { 26270 26271 # type 'w' includes anything without leading type info 26272 # ($,%,@,*) including something like abc::def::ghi 26273 $type = 'w'; 26274 } 26275 else { 26276 $type = ''; 26277 } # this can happen on a restart 26278 } 26279 26280 if ($identifier) { 26281 $tok = $identifier; 26282 if ($message) { write_logfile_entry($message) } 26283 } 26284 else { 26285 $tok = $tok_begin; 26286 $i = $i_begin; 26287 } 26288 26289 TOKENIZER_DEBUG_FLAG_SCAN_ID && do { 26290 my ( $a, $b, $c ) = caller; 26291 print 26292"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; 26293 print 26294"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; 26295 }; 26296 return ( $i, $tok, $type, $id_scan_state, $identifier ); 26297} 26298 26299{ 26300 26301 # saved package and subnames in case prototype is on separate line 26302 my ( $package_saved, $subname_saved ); 26303 26304 sub do_scan_sub { 26305 26306 # do_scan_sub parses a sub name and prototype 26307 # it is called with $i_beg equal to the index of the first nonblank 26308 # token following a 'sub' token. 26309 26310 # TODO: add future error checks to be sure we have a valid 26311 # sub name. For example, 'sub &doit' is wrong. Also, be sure 26312 # a name is given if and only if a non-anonymous sub is 26313 # appropriate. 26314 # USES GLOBAL VARS: $current_package, $last_nonblank_token, 26315 # $in_attribute_list, %saw_function_definition, 26316 # $statement_type 26317 26318 my ( 26319 $input_line, $i, $i_beg, 26320 $tok, $type, $rtokens, 26321 $rtoken_map, $id_scan_state, $max_token_index 26322 ) = @_; 26323 $id_scan_state = ""; # normally we get everything in one call 26324 my $subname = undef; 26325 my $package = undef; 26326 my $proto = undef; 26327 my $attrs = undef; 26328 my $match; 26329 26330 my $pos_beg = $$rtoken_map[$i_beg]; 26331 pos($input_line) = $pos_beg; 26332 26333 # sub NAME PROTO ATTRS 26334 if ( 26335 $input_line =~ m/\G\s* 26336 ((?:\w*(?:'|::))*) # package - something that ends in :: or ' 26337 (\w+) # NAME - required 26338 (\s*\([^){]*\))? # PROTO - something in parens 26339 (\s*:)? # ATTRS - leading : of attribute list 26340 /gcx 26341 ) 26342 { 26343 $match = 1; 26344 $subname = $2; 26345 $proto = $3; 26346 $attrs = $4; 26347 26348 $package = ( defined($1) && $1 ) ? $1 : $current_package; 26349 $package =~ s/\'/::/g; 26350 if ( $package =~ /^\:/ ) { $package = 'main' . $package } 26351 $package =~ s/::$//; 26352 my $pos = pos($input_line); 26353 my $numc = $pos - $pos_beg; 26354 $tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); 26355 $type = 'i'; 26356 } 26357 26358 # Look for prototype/attributes not preceded on this line by subname; 26359 # This might be an anonymous sub with attributes, 26360 # or a prototype on a separate line from its sub name 26361 elsif ( 26362 $input_line =~ m/\G(\s*\([^){]*\))? # PROTO 26363 (\s*:)? # ATTRS leading ':' 26364 /gcx 26365 && ( $1 || $2 ) 26366 ) 26367 { 26368 $match = 1; 26369 $proto = $1; 26370 $attrs = $2; 26371 26372 # Handle prototype on separate line from subname 26373 if ($subname_saved) { 26374 $package = $package_saved; 26375 $subname = $subname_saved; 26376 $tok = $last_nonblank_token; 26377 } 26378 $type = 'i'; 26379 } 26380 26381 if ($match) { 26382 26383 # ATTRS: if there are attributes, back up and let the ':' be 26384 # found later by the scanner. 26385 my $pos = pos($input_line); 26386 if ($attrs) { 26387 $pos -= length($attrs); 26388 } 26389 26390 my $next_nonblank_token = $tok; 26391 26392 # catch case of line with leading ATTR ':' after anonymous sub 26393 if ( $pos == $pos_beg && $tok eq ':' ) { 26394 $type = 'A'; 26395 $in_attribute_list = 1; 26396 } 26397 26398 # We must convert back from character position 26399 # to pre_token index. 26400 else { 26401 26402 # I don't think an error flag can occur here ..but ? 26403 my $error; 26404 ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, 26405 $max_token_index ); 26406 if ($error) { warning("Possibly invalid sub\n") } 26407 26408 # check for multiple definitions of a sub 26409 ( $next_nonblank_token, my $i_next ) = 26410 find_next_nonblank_token_on_this_line( $i, $rtokens, 26411 $max_token_index ); 26412 } 26413 26414 if ( $next_nonblank_token =~ /^(\s*|#)$/ ) 26415 { # skip blank or side comment 26416 my ( $rpre_tokens, $rpre_types ) = 26417 peek_ahead_for_n_nonblank_pre_tokens(1); 26418 if ( defined($rpre_tokens) && @$rpre_tokens ) { 26419 $next_nonblank_token = $rpre_tokens->[0]; 26420 } 26421 else { 26422 $next_nonblank_token = '}'; 26423 } 26424 } 26425 $package_saved = ""; 26426 $subname_saved = ""; 26427 if ( $next_nonblank_token eq '{' ) { 26428 if ($subname) { 26429 26430 # Check for multiple definitions of a sub, but 26431 # it is ok to have multiple sub BEGIN, etc, 26432 # so we do not complain if name is all caps 26433 if ( $saw_function_definition{$package}{$subname} 26434 && $subname !~ /^[A-Z]+$/ ) 26435 { 26436 my $lno = $saw_function_definition{$package}{$subname}; 26437 warning( 26438"already saw definition of 'sub $subname' in package '$package' at line $lno\n" 26439 ); 26440 } 26441 $saw_function_definition{$package}{$subname} = 26442 $tokenizer_self->{_last_line_number}; 26443 } 26444 } 26445 elsif ( $next_nonblank_token eq ';' ) { 26446 } 26447 elsif ( $next_nonblank_token eq '}' ) { 26448 } 26449 26450 # ATTRS - if an attribute list follows, remember the name 26451 # of the sub so the next opening brace can be labeled. 26452 # Setting 'statement_type' causes any ':'s to introduce 26453 # attributes. 26454 elsif ( $next_nonblank_token eq ':' ) { 26455 $statement_type = $tok; 26456 } 26457 26458 # see if PROTO follows on another line: 26459 elsif ( $next_nonblank_token eq '(' ) { 26460 if ( $attrs || $proto ) { 26461 warning( 26462"unexpected '(' after definition or declaration of sub '$subname'\n" 26463 ); 26464 } 26465 else { 26466 $id_scan_state = 'sub'; # we must come back to get proto 26467 $statement_type = $tok; 26468 $package_saved = $package; 26469 $subname_saved = $subname; 26470 } 26471 } 26472 elsif ($next_nonblank_token) { # EOF technically ok 26473 warning( 26474"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" 26475 ); 26476 } 26477 check_prototype( $proto, $package, $subname ); 26478 } 26479 26480 # no match but line not blank 26481 else { 26482 } 26483 return ( $i, $tok, $type, $id_scan_state ); 26484 } 26485} 26486 26487#########i############################################################### 26488# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS 26489######################################################################### 26490 26491sub find_next_nonblank_token { 26492 my ( $i, $rtokens, $max_token_index ) = @_; 26493 26494 if ( $i >= $max_token_index ) { 26495 if ( !peeked_ahead() ) { 26496 peeked_ahead(1); 26497 $rtokens = 26498 peek_ahead_for_nonblank_token( $rtokens, $max_token_index ); 26499 } 26500 } 26501 my $next_nonblank_token = $$rtokens[ ++$i ]; 26502 26503 if ( $next_nonblank_token =~ /^\s*$/ ) { 26504 $next_nonblank_token = $$rtokens[ ++$i ]; 26505 } 26506 return ( $next_nonblank_token, $i ); 26507} 26508 26509sub numerator_expected { 26510 26511 # this is a filter for a possible numerator, in support of guessing 26512 # for the / pattern delimiter token. 26513 # returns - 26514 # 1 - yes 26515 # 0 - can't tell 26516 # -1 - no 26517 # Note: I am using the convention that variables ending in 26518 # _expected have these 3 possible values. 26519 my ( $i, $rtokens, $max_token_index ) = @_; 26520 my $next_token = $$rtokens[ $i + 1 ]; 26521 if ( $next_token eq '=' ) { $i++; } # handle /= 26522 my ( $next_nonblank_token, $i_next ) = 26523 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 26524 26525 if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { 26526 1; 26527 } 26528 else { 26529 26530 if ( $next_nonblank_token =~ /^\s*$/ ) { 26531 0; 26532 } 26533 else { 26534 -1; 26535 } 26536 } 26537} 26538 26539sub pattern_expected { 26540 26541 # This is the start of a filter for a possible pattern. 26542 # It looks at the token after a possbible pattern and tries to 26543 # determine if that token could end a pattern. 26544 # returns - 26545 # 1 - yes 26546 # 0 - can't tell 26547 # -1 - no 26548 my ( $i, $rtokens, $max_token_index ) = @_; 26549 my $next_token = $$rtokens[ $i + 1 ]; 26550 if ( $next_token =~ /^[cgimosxp]/ ) { $i++; } # skip possible modifier 26551 my ( $next_nonblank_token, $i_next ) = 26552 find_next_nonblank_token( $i, $rtokens, $max_token_index ); 26553 26554 # list of tokens which may follow a pattern 26555 # (can probably be expanded) 26556 if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ ) 26557 { 26558 1; 26559 } 26560 else { 26561 26562 if ( $next_nonblank_token =~ /^\s*$/ ) { 26563 0; 26564 } 26565 else { 26566 -1; 26567 } 26568 } 26569} 26570 26571sub find_next_nonblank_token_on_this_line { 26572 my ( $i, $rtokens, $max_token_index ) = @_; 26573 my $next_nonblank_token; 26574 26575 if ( $i < $max_token_index ) { 26576 $next_nonblank_token = $$rtokens[ ++$i ]; 26577 26578 if ( $next_nonblank_token =~ /^\s*$/ ) { 26579 26580 if ( $i < $max_token_index ) { 26581 $next_nonblank_token = $$rtokens[ ++$i ]; 26582 } 26583 } 26584 } 26585 else { 26586 $next_nonblank_token = ""; 26587 } 26588 return ( $next_nonblank_token, $i ); 26589} 26590 26591sub find_angle_operator_termination { 26592 26593 # We are looking at a '<' and want to know if it is an angle operator. 26594 # We are to return: 26595 # $i = pretoken index of ending '>' if found, current $i otherwise 26596 # $type = 'Q' if found, '>' otherwise 26597 my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_; 26598 my $i = $i_beg; 26599 my $type = '<'; 26600 pos($input_line) = 1 + $$rtoken_map[$i]; 26601 26602 my $filter; 26603 26604 # we just have to find the next '>' if a term is expected 26605 if ( $expecting == TERM ) { $filter = '[\>]' } 26606 26607 # we have to guess if we don't know what is expected 26608 elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } 26609 26610 # shouldn't happen - we shouldn't be here if operator is expected 26611 else { warning("Program Bug in find_angle_operator_termination\n") } 26612 26613 # To illustrate what we might be looking at, in case we are 26614 # guessing, here are some examples of valid angle operators 26615 # (or file globs): 26616 # <tmp_imp/*> 26617 # <FH> 26618 # <$fh> 26619 # <*.c *.h> 26620 # <_> 26621 # <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t) 26622 # <${PREFIX}*img*.$IMAGE_TYPE> 26623 # <img*.$IMAGE_TYPE> 26624 # <Timg*.$IMAGE_TYPE> 26625 # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> 26626 # 26627 # Here are some examples of lines which do not have angle operators: 26628 # return undef unless $self->[2]++ < $#{$self->[1]}; 26629 # < 2 || @$t > 26630 # 26631 # the following line from dlister.pl caused trouble: 26632 # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; 26633 # 26634 # If the '<' starts an angle operator, it must end on this line and 26635 # it must not have certain characters like ';' and '=' in it. I use 26636 # this to limit the testing. This filter should be improved if 26637 # possible. 26638 26639 if ( $input_line =~ /($filter)/g ) { 26640 26641 if ( $1 eq '>' ) { 26642 26643 # We MAY have found an angle operator termination if we get 26644 # here, but we need to do more to be sure we haven't been 26645 # fooled. 26646 my $pos = pos($input_line); 26647 26648 my $pos_beg = $$rtoken_map[$i]; 26649 my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); 26650 26651 # Reject if the closing '>' follows a '-' as in: 26652 # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { } 26653 if ( $expecting eq UNKNOWN ) { 26654 my $check = substr( $input_line, $pos - 2, 1 ); 26655 if ( $check eq '-' ) { 26656 return ( $i, $type ); 26657 } 26658 } 26659 26660 ######################################debug##### 26661 #write_diagnostics( "ANGLE? :$str\n"); 26662 #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n"; 26663 ######################################debug##### 26664 $type = 'Q'; 26665 my $error; 26666 ( $i, $error ) = 26667 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); 26668 26669 # It may be possible that a quote ends midway in a pretoken. 26670 # If this happens, it may be necessary to split the pretoken. 26671 if ($error) { 26672 warning( 26673 "Possible tokinization error..please check this line\n"); 26674 report_possible_bug(); 26675 } 26676 26677 # Now let's see where we stand.... 26678 # OK if math op not possible 26679 if ( $expecting == TERM ) { 26680 } 26681 26682 # OK if there are no more than 2 pre-tokens inside 26683 # (not possible to write 2 token math between < and >) 26684 # This catches most common cases 26685 elsif ( $i <= $i_beg + 3 ) { 26686 write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); 26687 } 26688 26689 # Not sure.. 26690 else { 26691 26692 # Let's try a Brace Test: any braces inside must balance 26693 my $br = 0; 26694 while ( $str =~ /\{/g ) { $br++ } 26695 while ( $str =~ /\}/g ) { $br-- } 26696 my $sb = 0; 26697 while ( $str =~ /\[/g ) { $sb++ } 26698 while ( $str =~ /\]/g ) { $sb-- } 26699 my $pr = 0; 26700 while ( $str =~ /\(/g ) { $pr++ } 26701 while ( $str =~ /\)/g ) { $pr-- } 26702 26703 # if braces do not balance - not angle operator 26704 if ( $br || $sb || $pr ) { 26705 $i = $i_beg; 26706 $type = '<'; 26707 write_diagnostics( 26708 "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); 26709 } 26710 26711 # we should keep doing more checks here...to be continued 26712 # Tentatively accepting this as a valid angle operator. 26713 # There are lots more things that can be checked. 26714 else { 26715 write_diagnostics( 26716 "ANGLE-Guessing yes: $str expecting=$expecting\n"); 26717 write_logfile_entry("Guessing angle operator here: $str\n"); 26718 } 26719 } 26720 } 26721 26722 # didn't find ending > 26723 else { 26724 if ( $expecting == TERM ) { 26725 warning("No ending > for angle operator\n"); 26726 } 26727 } 26728 } 26729 return ( $i, $type ); 26730} 26731 26732sub scan_number_do { 26733 26734 # scan a number in any of the formats that Perl accepts 26735 # Underbars (_) are allowed in decimal numbers. 26736 # input parameters - 26737 # $input_line - the string to scan 26738 # $i - pre_token index to start scanning 26739 # $rtoken_map - reference to the pre_token map giving starting 26740 # character position in $input_line of token $i 26741 # output parameters - 26742 # $i - last pre_token index of the number just scanned 26743 # number - the number (characters); or undef if not a number 26744 26745 my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_; 26746 my $pos_beg = $$rtoken_map[$i]; 26747 my $pos; 26748 my $i_begin = $i; 26749 my $number = undef; 26750 my $type = $input_type; 26751 26752 my $first_char = substr( $input_line, $pos_beg, 1 ); 26753 26754 # Look for bad starting characters; Shouldn't happen.. 26755 if ( $first_char !~ /[\d\.\+\-Ee]/ ) { 26756 warning("Program bug - scan_number given character $first_char\n"); 26757 report_definite_bug(); 26758 return ( $i, $type, $number ); 26759 } 26760 26761 # handle v-string without leading 'v' character ('Two Dot' rule) 26762 # (vstring.t) 26763 # TODO: v-strings may contain underscores 26764 pos($input_line) = $pos_beg; 26765 if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { 26766 $pos = pos($input_line); 26767 my $numc = $pos - $pos_beg; 26768 $number = substr( $input_line, $pos_beg, $numc ); 26769 $type = 'v'; 26770 report_v_string($number); 26771 } 26772 26773 # handle octal, hex, binary 26774 if ( !defined($number) ) { 26775 pos($input_line) = $pos_beg; 26776 if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g ) 26777 { 26778 $pos = pos($input_line); 26779 my $numc = $pos - $pos_beg; 26780 $number = substr( $input_line, $pos_beg, $numc ); 26781 $type = 'n'; 26782 } 26783 } 26784 26785 # handle decimal 26786 if ( !defined($number) ) { 26787 pos($input_line) = $pos_beg; 26788 26789 if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { 26790 $pos = pos($input_line); 26791 26792 # watch out for things like 0..40 which would give 0. by this; 26793 if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) 26794 && ( substr( $input_line, $pos, 1 ) eq '.' ) ) 26795 { 26796 $pos--; 26797 } 26798 my $numc = $pos - $pos_beg; 26799 $number = substr( $input_line, $pos_beg, $numc ); 26800 $type = 'n'; 26801 } 26802 } 26803 26804 # filter out non-numbers like e + - . e2 .e3 +e6 26805 # the rule: at least one digit, and any 'e' must be preceded by a digit 26806 if ( 26807 $number !~ /\d/ # no digits 26808 || ( $number =~ /^(.*)[eE]/ 26809 && $1 !~ /\d/ ) # or no digits before the 'e' 26810 ) 26811 { 26812 $number = undef; 26813 $type = $input_type; 26814 return ( $i, $type, $number ); 26815 } 26816 26817 # Found a number; now we must convert back from character position 26818 # to pre_token index. An error here implies user syntax error. 26819 # An example would be an invalid octal number like '009'. 26820 my $error; 26821 ( $i, $error ) = 26822 inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); 26823 if ($error) { warning("Possibly invalid number\n") } 26824 26825 return ( $i, $type, $number ); 26826} 26827 26828sub inverse_pretoken_map { 26829 26830 # Starting with the current pre_token index $i, scan forward until 26831 # finding the index of the next pre_token whose position is $pos. 26832 my ( $i, $pos, $rtoken_map, $max_token_index ) = @_; 26833 my $error = 0; 26834 26835 while ( ++$i <= $max_token_index ) { 26836 26837 if ( $pos <= $$rtoken_map[$i] ) { 26838 26839 # Let the calling routine handle errors in which we do not 26840 # land on a pre-token boundary. It can happen by running 26841 # perltidy on some non-perl scripts, for example. 26842 if ( $pos < $$rtoken_map[$i] ) { $error = 1 } 26843 $i--; 26844 last; 26845 } 26846 } 26847 return ( $i, $error ); 26848} 26849 26850sub find_here_doc { 26851 26852 # find the target of a here document, if any 26853 # input parameters: 26854 # $i - token index of the second < of << 26855 # ($i must be less than the last token index if this is called) 26856 # output parameters: 26857 # $found_target = 0 didn't find target; =1 found target 26858 # HERE_TARGET - the target string (may be empty string) 26859 # $i - unchanged if not here doc, 26860 # or index of the last token of the here target 26861 # $saw_error - flag noting unbalanced quote on here target 26862 my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; 26863 my $ibeg = $i; 26864 my $found_target = 0; 26865 my $here_doc_target = ''; 26866 my $here_quote_character = ''; 26867 my $saw_error = 0; 26868 my ( $next_nonblank_token, $i_next_nonblank, $next_token ); 26869 $next_token = $$rtokens[ $i + 1 ]; 26870 26871 # perl allows a backslash before the target string (heredoc.t) 26872 my $backslash = 0; 26873 if ( $next_token eq '\\' ) { 26874 $backslash = 1; 26875 $next_token = $$rtokens[ $i + 2 ]; 26876 } 26877 26878 ( $next_nonblank_token, $i_next_nonblank ) = 26879 find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); 26880 26881 if ( $next_nonblank_token =~ /[\'\"\`]/ ) { 26882 26883 my $in_quote = 1; 26884 my $quote_depth = 0; 26885 my $quote_pos = 0; 26886 my $quoted_string; 26887 26888 ( 26889 $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth, 26890 $quoted_string 26891 ) 26892 = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, 26893 $here_quote_character, $quote_pos, $quote_depth, $max_token_index ); 26894 26895 if ($in_quote) { # didn't find end of quote, so no target found 26896 $i = $ibeg; 26897 if ( $expecting == TERM ) { 26898 warning( 26899"Did not find here-doc string terminator ($here_quote_character) before end of line \n" 26900 ); 26901 $saw_error = 1; 26902 } 26903 } 26904 else { # found ending quote 26905 my $j; 26906 $found_target = 1; 26907 26908 my $tokj; 26909 for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) { 26910 $tokj = $$rtokens[$j]; 26911 26912 # we have to remove any backslash before the quote character 26913 # so that the here-doc-target exactly matches this string 26914 next 26915 if ( $tokj eq "\\" 26916 && $j < $i - 1 26917 && $$rtokens[ $j + 1 ] eq $here_quote_character ); 26918 $here_doc_target .= $tokj; 26919 } 26920 } 26921 } 26922 26923 elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { 26924 $found_target = 1; 26925 write_logfile_entry( 26926 "found blank here-target after <<; suggest using \"\"\n"); 26927 $i = $ibeg; 26928 } 26929 elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << 26930 26931 my $here_doc_expected; 26932 if ( $expecting == UNKNOWN ) { 26933 $here_doc_expected = guess_if_here_doc($next_token); 26934 } 26935 else { 26936 $here_doc_expected = 1; 26937 } 26938 26939 if ($here_doc_expected) { 26940 $found_target = 1; 26941 $here_doc_target = $next_token; 26942 $i = $ibeg + 1; 26943 } 26944 26945 } 26946 else { 26947 26948 if ( $expecting == TERM ) { 26949 $found_target = 1; 26950 write_logfile_entry("Note: bare here-doc operator <<\n"); 26951 } 26952 else { 26953 $i = $ibeg; 26954 } 26955 } 26956 26957 # patch to neglect any prepended backslash 26958 if ( $found_target && $backslash ) { $i++ } 26959 26960 return ( $found_target, $here_doc_target, $here_quote_character, $i, 26961 $saw_error ); 26962} 26963 26964sub do_quote { 26965 26966 # follow (or continue following) quoted string(s) 26967 # $in_quote return code: 26968 # 0 - ok, found end 26969 # 1 - still must find end of quote whose target is $quote_character 26970 # 2 - still looking for end of first of two quotes 26971 # 26972 # Returns updated strings: 26973 # $quoted_string_1 = quoted string seen while in_quote=1 26974 # $quoted_string_2 = quoted string seen while in_quote=2 26975 my ( 26976 $i, $in_quote, $quote_character, 26977 $quote_pos, $quote_depth, $quoted_string_1, 26978 $quoted_string_2, $rtokens, $rtoken_map, 26979 $max_token_index 26980 ) = @_; 26981 26982 my $in_quote_starting = $in_quote; 26983 26984 my $quoted_string; 26985 if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow 26986 my $ibeg = $i; 26987 ( 26988 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 26989 $quoted_string 26990 ) 26991 = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character, 26992 $quote_pos, $quote_depth, $max_token_index ); 26993 $quoted_string_2 .= $quoted_string; 26994 if ( $in_quote == 1 ) { 26995 if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } 26996 $quote_character = ''; 26997 } 26998 else { 26999 $quoted_string_2 .= "\n"; 27000 } 27001 } 27002 27003 if ( $in_quote == 1 ) { # one (more) quote to follow 27004 my $ibeg = $i; 27005 ( 27006 $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 27007 $quoted_string 27008 ) 27009 = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, 27010 $quote_pos, $quote_depth, $max_token_index ); 27011 $quoted_string_1 .= $quoted_string; 27012 if ( $in_quote == 1 ) { 27013 $quoted_string_1 .= "\n"; 27014 } 27015 } 27016 return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, 27017 $quoted_string_1, $quoted_string_2 ); 27018} 27019 27020sub follow_quoted_string { 27021 27022 # scan for a specific token, skipping escaped characters 27023 # if the quote character is blank, use the first non-blank character 27024 # input parameters: 27025 # $rtokens = reference to the array of tokens 27026 # $i = the token index of the first character to search 27027 # $in_quote = number of quoted strings being followed 27028 # $beginning_tok = the starting quote character 27029 # $quote_pos = index to check next for alphanumeric delimiter 27030 # output parameters: 27031 # $i = the token index of the ending quote character 27032 # $in_quote = decremented if found end, unchanged if not 27033 # $beginning_tok = the starting quote character 27034 # $quote_pos = index to check next for alphanumeric delimiter 27035 # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. 27036 # $quoted_string = the text of the quote (without quotation tokens) 27037 my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth, 27038 $max_token_index ) 27039 = @_; 27040 my ( $tok, $end_tok ); 27041 my $i = $i_beg - 1; 27042 my $quoted_string = ""; 27043 27044 TOKENIZER_DEBUG_FLAG_QUOTE && do { 27045 print 27046"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; 27047 }; 27048 27049 # get the corresponding end token 27050 if ( $beginning_tok !~ /^\s*$/ ) { 27051 $end_tok = matching_end_token($beginning_tok); 27052 } 27053 27054 # a blank token means we must find and use the first non-blank one 27055 else { 27056 my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr> 27057 27058 while ( $i < $max_token_index ) { 27059 $tok = $$rtokens[ ++$i ]; 27060 27061 if ( $tok !~ /^\s*$/ ) { 27062 27063 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { 27064 $i = $max_token_index; 27065 } 27066 else { 27067 27068 if ( length($tok) > 1 ) { 27069 if ( $quote_pos <= 0 ) { $quote_pos = 1 } 27070 $beginning_tok = substr( $tok, $quote_pos - 1, 1 ); 27071 } 27072 else { 27073 $beginning_tok = $tok; 27074 $quote_pos = 0; 27075 } 27076 $end_tok = matching_end_token($beginning_tok); 27077 $quote_depth = 1; 27078 last; 27079 } 27080 } 27081 else { 27082 $allow_quote_comments = 1; 27083 } 27084 } 27085 } 27086 27087 # There are two different loops which search for the ending quote 27088 # character. In the rare case of an alphanumeric quote delimiter, we 27089 # have to look through alphanumeric tokens character-by-character, since 27090 # the pre-tokenization process combines multiple alphanumeric 27091 # characters, whereas for a non-alphanumeric delimiter, only tokens of 27092 # length 1 can match. 27093 27094 ################################################################### 27095 # Case 1 (rare): loop for case of alphanumeric quote delimiter.. 27096 # "quote_pos" is the position the current word to begin searching 27097 ################################################################### 27098 if ( $beginning_tok =~ /\w/ ) { 27099 27100 # Note this because it is not recommended practice except 27101 # for obfuscated perl contests 27102 if ( $in_quote == 1 ) { 27103 write_logfile_entry( 27104 "Note: alphanumeric quote delimiter ($beginning_tok) \n"); 27105 } 27106 27107 while ( $i < $max_token_index ) { 27108 27109 if ( $quote_pos == 0 || ( $i < 0 ) ) { 27110 $tok = $$rtokens[ ++$i ]; 27111 27112 if ( $tok eq '\\' ) { 27113 27114 # retain backslash unless it hides the end token 27115 $quoted_string .= $tok 27116 unless $$rtokens[ $i + 1 ] eq $end_tok; 27117 $quote_pos++; 27118 last if ( $i >= $max_token_index ); 27119 $tok = $$rtokens[ ++$i ]; 27120 } 27121 } 27122 my $old_pos = $quote_pos; 27123 27124 unless ( defined($tok) && defined($end_tok) && defined($quote_pos) ) 27125 { 27126 27127 } 27128 $quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); 27129 27130 if ( $quote_pos > 0 ) { 27131 27132 $quoted_string .= 27133 substr( $tok, $old_pos, $quote_pos - $old_pos - 1 ); 27134 27135 $quote_depth--; 27136 27137 if ( $quote_depth == 0 ) { 27138 $in_quote--; 27139 last; 27140 } 27141 } 27142 else { 27143 $quoted_string .= substr( $tok, $old_pos ); 27144 } 27145 } 27146 } 27147 27148 ######################################################################## 27149 # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter.. 27150 ######################################################################## 27151 else { 27152 27153 while ( $i < $max_token_index ) { 27154 $tok = $$rtokens[ ++$i ]; 27155 27156 if ( $tok eq $end_tok ) { 27157 $quote_depth--; 27158 27159 if ( $quote_depth == 0 ) { 27160 $in_quote--; 27161 last; 27162 } 27163 } 27164 elsif ( $tok eq $beginning_tok ) { 27165 $quote_depth++; 27166 } 27167 elsif ( $tok eq '\\' ) { 27168 27169 # retain backslash unless it hides the beginning or end token 27170 $tok = $$rtokens[ ++$i ]; 27171 $quoted_string .= '\\' 27172 unless ( $tok eq $end_tok || $tok eq $beginning_tok ); 27173 } 27174 $quoted_string .= $tok; 27175 } 27176 } 27177 if ( $i > $max_token_index ) { $i = $max_token_index } 27178 return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth, 27179 $quoted_string ); 27180} 27181 27182sub indicate_error { 27183 my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_; 27184 interrupt_logfile(); 27185 warning($msg); 27186 write_error_indicator_pair( $line_number, $input_line, $pos, $carrat ); 27187 resume_logfile(); 27188} 27189 27190sub write_error_indicator_pair { 27191 my ( $line_number, $input_line, $pos, $carrat ) = @_; 27192 my ( $offset, $numbered_line, $underline ) = 27193 make_numbered_line( $line_number, $input_line, $pos ); 27194 $underline = write_on_underline( $underline, $pos - $offset, $carrat ); 27195 warning( $numbered_line . "\n" ); 27196 $underline =~ s/\s*$//; 27197 warning( $underline . "\n" ); 27198} 27199 27200sub make_numbered_line { 27201 27202 # Given an input line, its line number, and a character position of 27203 # interest, create a string not longer than 80 characters of the form 27204 # $lineno: sub_string 27205 # such that the sub_string of $str contains the position of interest 27206 # 27207 # Here is an example of what we want, in this case we add trailing 27208 # '...' because the line is long. 27209 # 27210 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... 27211 # 27212 # Here is another example, this time in which we used leading '...' 27213 # because of excessive length: 27214 # 27215 # 2: ... er of the World Wide Web Consortium's 27216 # 27217 # input parameters are: 27218 # $lineno = line number 27219 # $str = the text of the line 27220 # $pos = position of interest (the error) : 0 = first character 27221 # 27222 # We return : 27223 # - $offset = an offset which corrects the position in case we only 27224 # display part of a line, such that $pos-$offset is the effective 27225 # position from the start of the displayed line. 27226 # - $numbered_line = the numbered line as above, 27227 # - $underline = a blank 'underline' which is all spaces with the same 27228 # number of characters as the numbered line. 27229 27230 my ( $lineno, $str, $pos ) = @_; 27231 my $offset = ( $pos < 60 ) ? 0 : $pos - 40; 27232 my $excess = length($str) - $offset - 68; 27233 my $numc = ( $excess > 0 ) ? 68 : undef; 27234 27235 if ( defined($numc) ) { 27236 if ( $offset == 0 ) { 27237 $str = substr( $str, $offset, $numc - 4 ) . " ..."; 27238 } 27239 else { 27240 $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; 27241 } 27242 } 27243 else { 27244 27245 if ( $offset == 0 ) { 27246 } 27247 else { 27248 $str = "... " . substr( $str, $offset + 4 ); 27249 } 27250 } 27251 27252 my $numbered_line = sprintf( "%d: ", $lineno ); 27253 $offset -= length($numbered_line); 27254 $numbered_line .= $str; 27255 my $underline = " " x length($numbered_line); 27256 return ( $offset, $numbered_line, $underline ); 27257} 27258 27259sub write_on_underline { 27260 27261 # The "underline" is a string that shows where an error is; it starts 27262 # out as a string of blanks with the same length as the numbered line of 27263 # code above it, and we have to add marking to show where an error is. 27264 # In the example below, we want to write the string '--^' just below 27265 # the line of bad code: 27266 # 27267 # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... 27268 # ---^ 27269 # We are given the current underline string, plus a position and a 27270 # string to write on it. 27271 # 27272 # In the above example, there will be 2 calls to do this: 27273 # First call: $pos=19, pos_chr=^ 27274 # Second call: $pos=16, pos_chr=--- 27275 # 27276 # This is a trivial thing to do with substr, but there is some 27277 # checking to do. 27278 27279 my ( $underline, $pos, $pos_chr ) = @_; 27280 27281 # check for error..shouldn't happen 27282 unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) { 27283 return $underline; 27284 } 27285 my $excess = length($pos_chr) + $pos - length($underline); 27286 if ( $excess > 0 ) { 27287 $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); 27288 } 27289 substr( $underline, $pos, length($pos_chr) ) = $pos_chr; 27290 return ($underline); 27291} 27292 27293sub pre_tokenize { 27294 27295 # Break a string, $str, into a sequence of preliminary tokens. We 27296 # are interested in these types of tokens: 27297 # words (type='w'), example: 'max_tokens_wanted' 27298 # digits (type = 'd'), example: '0755' 27299 # whitespace (type = 'b'), example: ' ' 27300 # any other single character (i.e. punct; type = the character itself). 27301 # We cannot do better than this yet because we might be in a quoted 27302 # string or pattern. Caller sets $max_tokens_wanted to 0 to get all 27303 # tokens. 27304 my ( $str, $max_tokens_wanted ) = @_; 27305 27306 # we return references to these 3 arrays: 27307 my @tokens = (); # array of the tokens themselves 27308 my @token_map = (0); # string position of start of each token 27309 my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct 27310 27311 do { 27312 27313 # whitespace 27314 if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; } 27315 27316 # numbers 27317 # note that this must come before words! 27318 elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; } 27319 27320 # words 27321 elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; } 27322 27323 # single-character punctuation 27324 elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; } 27325 27326 # that's all.. 27327 else { 27328 return ( \@tokens, \@token_map, \@type ); 27329 } 27330 27331 push @tokens, $1; 27332 push @token_map, pos($str); 27333 27334 } while ( --$max_tokens_wanted != 0 ); 27335 27336 return ( \@tokens, \@token_map, \@type ); 27337} 27338 27339sub show_tokens { 27340 27341 # this is an old debug routine 27342 my ( $rtokens, $rtoken_map ) = @_; 27343 my $num = scalar(@$rtokens); 27344 my $i; 27345 27346 for ( $i = 0 ; $i < $num ; $i++ ) { 27347 my $len = length( $$rtokens[$i] ); 27348 print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n"; 27349 } 27350} 27351 27352sub matching_end_token { 27353 27354 # find closing character for a pattern 27355 my $beginning_token = shift; 27356 27357 if ( $beginning_token eq '{' ) { 27358 '}'; 27359 } 27360 elsif ( $beginning_token eq '[' ) { 27361 ']'; 27362 } 27363 elsif ( $beginning_token eq '<' ) { 27364 '>'; 27365 } 27366 elsif ( $beginning_token eq '(' ) { 27367 ')'; 27368 } 27369 else { 27370 $beginning_token; 27371 } 27372} 27373 27374sub dump_token_types { 27375 my $class = shift; 27376 my $fh = shift; 27377 27378 # This should be the latest list of token types in use 27379 # adding NEW_TOKENS: add a comment here 27380 print $fh <<'END_OF_LIST'; 27381 27382Here is a list of the token types currently used for lines of type 'CODE'. 27383For the following tokens, the "type" of a token is just the token itself. 27384 27385.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> 27386( ) <= >= == =~ !~ != ++ -- /= x= 27387... **= <<= >>= &&= ||= //= <=> 27388, + - / * | % ! x ~ = \ ? : . < > ^ & 27389 27390The following additional token types are defined: 27391 27392 type meaning 27393 b blank (white space) 27394 { indent: opening structural curly brace or square bracket or paren 27395 (code block, anonymous hash reference, or anonymous array reference) 27396 } outdent: right structural curly brace or square bracket or paren 27397 [ left non-structural square bracket (enclosing an array index) 27398 ] right non-structural square bracket 27399 ( left non-structural paren (all but a list right of an =) 27400 ) right non-structural parena 27401 L left non-structural curly brace (enclosing a key) 27402 R right non-structural curly brace 27403 ; terminal semicolon 27404 f indicates a semicolon in a "for" statement 27405 h here_doc operator << 27406 # a comment 27407 Q indicates a quote or pattern 27408 q indicates a qw quote block 27409 k a perl keyword 27410 C user-defined constant or constant function (with void prototype = ()) 27411 U user-defined function taking parameters 27412 G user-defined function taking block parameter (like grep/map/eval) 27413 M (unused, but reserved for subroutine definition name) 27414 P (unused, but -html uses it to label pod text) 27415 t type indicater such as %,$,@,*,&,sub 27416 w bare word (perhaps a subroutine call) 27417 i identifier of some type (with leading %, $, @, *, &, sub, -> ) 27418 n a number 27419 v a v-string 27420 F a file test operator (like -e) 27421 Y File handle 27422 Z identifier in indirect object slot: may be file handle, object 27423 J LABEL: code block label 27424 j LABEL after next, last, redo, goto 27425 p unary + 27426 m unary - 27427 pp pre-increment operator ++ 27428 mm pre-decrement operator -- 27429 A : used as attribute separator 27430 27431 Here are the '_line_type' codes used internally: 27432 SYSTEM - system-specific code before hash-bang line 27433 CODE - line of perl code (including comments) 27434 POD_START - line starting pod, such as '=head' 27435 POD - pod documentation text 27436 POD_END - last line of pod section, '=cut' 27437 HERE - text of here-document 27438 HERE_END - last line of here-doc (target word) 27439 FORMAT - format section 27440 FORMAT_END - last line of format section, '.' 27441 DATA_START - __DATA__ line 27442 DATA - unidentified text following __DATA__ 27443 END_START - __END__ line 27444 END - unidentified text following __END__ 27445 ERROR - we are in big trouble, probably not a perl script 27446END_OF_LIST 27447} 27448 27449BEGIN { 27450 27451 # These names are used in error messages 27452 @opening_brace_names = qw# '{' '[' '(' '?' #; 27453 @closing_brace_names = qw# '}' ']' ')' ':' #; 27454 27455 my @digraphs = qw( 27456 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> 27457 <= >= == =~ !~ != ++ -- /= x= ~~ 27458 ); 27459 @is_digraph{@digraphs} = (1) x scalar(@digraphs); 27460 27461 my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ ); 27462 @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); 27463 27464 # make a hash of all valid token types for self-checking the tokenizer 27465 # (adding NEW_TOKENS : select a new character and add to this list) 27466 my @valid_token_types = qw# 27467 A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v 27468 { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & 27469 #; 27470 push( @valid_token_types, @digraphs ); 27471 push( @valid_token_types, @trigraphs ); 27472 push( @valid_token_types, '#' ); 27473 push( @valid_token_types, ',' ); 27474 @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); 27475 27476 # a list of file test letters, as in -e (Table 3-4 of 'camel 3') 27477 my @file_test_operators = 27478 qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z); 27479 @is_file_test_operator{@file_test_operators} = 27480 (1) x scalar(@file_test_operators); 27481 27482 # these functions have prototypes of the form (&), so when they are 27483 # followed by a block, that block MAY BE followed by an operator. 27484 @_ = qw( do eval ); 27485 @is_block_operator{@_} = (1) x scalar(@_); 27486 27487 # these functions allow an identifier in the indirect object slot 27488 @_ = qw( print printf sort exec system say); 27489 @is_indirect_object_taker{@_} = (1) x scalar(@_); 27490 27491 # These tokens may precede a code block 27492 # patched for SWITCH/CASE 27493 @_ = 27494 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else 27495 unless do while until eval for foreach map grep sort 27496 switch case given when); 27497 @is_code_block_token{@_} = (1) x scalar(@_); 27498 27499 # I'll build the list of keywords incrementally 27500 my @Keywords = (); 27501 27502 # keywords and tokens after which a value or pattern is expected, 27503 # but not an operator. In other words, these should consume terms 27504 # to their right, or at least they are not expected to be followed 27505 # immediately by operators. 27506 my @value_requestor = qw( 27507 AUTOLOAD 27508 BEGIN 27509 CHECK 27510 DESTROY 27511 END 27512 EQ 27513 GE 27514 GT 27515 INIT 27516 LE 27517 LT 27518 NE 27519 UNITCHECK 27520 abs 27521 accept 27522 alarm 27523 and 27524 atan2 27525 bind 27526 binmode 27527 bless 27528 break 27529 caller 27530 chdir 27531 chmod 27532 chomp 27533 chop 27534 chown 27535 chr 27536 chroot 27537 close 27538 closedir 27539 cmp 27540 connect 27541 continue 27542 cos 27543 crypt 27544 dbmclose 27545 dbmopen 27546 defined 27547 delete 27548 die 27549 dump 27550 each 27551 else 27552 elsif 27553 eof 27554 eq 27555 exec 27556 exists 27557 exit 27558 exp 27559 fcntl 27560 fileno 27561 flock 27562 for 27563 foreach 27564 formline 27565 ge 27566 getc 27567 getgrgid 27568 getgrnam 27569 gethostbyaddr 27570 gethostbyname 27571 getnetbyaddr 27572 getnetbyname 27573 getpeername 27574 getpgrp 27575 getpriority 27576 getprotobyname 27577 getprotobynumber 27578 getpwnam 27579 getpwuid 27580 getservbyname 27581 getservbyport 27582 getsockname 27583 getsockopt 27584 glob 27585 gmtime 27586 goto 27587 grep 27588 gt 27589 hex 27590 if 27591 index 27592 int 27593 ioctl 27594 join 27595 keys 27596 kill 27597 last 27598 lc 27599 lcfirst 27600 le 27601 length 27602 link 27603 listen 27604 local 27605 localtime 27606 lock 27607 log 27608 lstat 27609 lt 27610 map 27611 mkdir 27612 msgctl 27613 msgget 27614 msgrcv 27615 msgsnd 27616 my 27617 ne 27618 next 27619 no 27620 not 27621 oct 27622 open 27623 opendir 27624 or 27625 ord 27626 our 27627 pack 27628 pipe 27629 pop 27630 pos 27631 print 27632 printf 27633 prototype 27634 push 27635 quotemeta 27636 rand 27637 read 27638 readdir 27639 readlink 27640 readline 27641 readpipe 27642 recv 27643 redo 27644 ref 27645 rename 27646 require 27647 reset 27648 return 27649 reverse 27650 rewinddir 27651 rindex 27652 rmdir 27653 scalar 27654 seek 27655 seekdir 27656 select 27657 semctl 27658 semget 27659 semop 27660 send 27661 sethostent 27662 setnetent 27663 setpgrp 27664 setpriority 27665 setprotoent 27666 setservent 27667 setsockopt 27668 shift 27669 shmctl 27670 shmget 27671 shmread 27672 shmwrite 27673 shutdown 27674 sin 27675 sleep 27676 socket 27677 socketpair 27678 sort 27679 splice 27680 split 27681 sprintf 27682 sqrt 27683 srand 27684 stat 27685 study 27686 substr 27687 symlink 27688 syscall 27689 sysopen 27690 sysread 27691 sysseek 27692 system 27693 syswrite 27694 tell 27695 telldir 27696 tie 27697 tied 27698 truncate 27699 uc 27700 ucfirst 27701 umask 27702 undef 27703 unless 27704 unlink 27705 unpack 27706 unshift 27707 untie 27708 until 27709 use 27710 utime 27711 values 27712 vec 27713 waitpid 27714 warn 27715 while 27716 write 27717 xor 27718 27719 switch 27720 case 27721 given 27722 when 27723 err 27724 say 27725 ); 27726 27727 # patched above for SWITCH/CASE given/when err say 27728 # 'err' is a fairly safe addition. 27729 # TODO: 'default' still needed if appropriate 27730 # 'use feature' seen, but perltidy works ok without it. 27731 # Concerned that 'default' could break code. 27732 push( @Keywords, @value_requestor ); 27733 27734 # These are treated the same but are not keywords: 27735 my @extra_vr = qw( 27736 constant 27737 vars 27738 ); 27739 push( @value_requestor, @extra_vr ); 27740 27741 @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor); 27742 27743 # this list contains keywords which do not look for arguments, 27744 # so that they might be followed by an operator, or at least 27745 # not a term. 27746 my @operator_requestor = qw( 27747 endgrent 27748 endhostent 27749 endnetent 27750 endprotoent 27751 endpwent 27752 endservent 27753 fork 27754 getgrent 27755 gethostent 27756 getlogin 27757 getnetent 27758 getppid 27759 getprotoent 27760 getpwent 27761 getservent 27762 setgrent 27763 setpwent 27764 time 27765 times 27766 wait 27767 wantarray 27768 ); 27769 27770 push( @Keywords, @operator_requestor ); 27771 27772 # These are treated the same but are not considered keywords: 27773 my @extra_or = qw( 27774 STDERR 27775 STDIN 27776 STDOUT 27777 ); 27778 27779 push( @operator_requestor, @extra_or ); 27780 27781 @expecting_operator_token{@operator_requestor} = 27782 (1) x scalar(@operator_requestor); 27783 27784 # these token TYPES expect trailing operator but not a term 27785 # note: ++ and -- are post-increment and decrement, 'C' = constant 27786 my @operator_requestor_types = qw( ++ -- C <> q ); 27787 @expecting_operator_types{@operator_requestor_types} = 27788 (1) x scalar(@operator_requestor_types); 27789 27790 # these token TYPES consume values (terms) 27791 # note: pp and mm are pre-increment and decrement 27792 # f=semicolon in for, F=file test operator 27793 my @value_requestor_type = qw# 27794 L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x 27795 **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= 27796 <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ 27797 f F pp mm Y p m U J G j >> << ^ t 27798 #; 27799 push( @value_requestor_type, ',' ) 27800 ; # (perl doesn't like a ',' in a qw block) 27801 @expecting_term_types{@value_requestor_type} = 27802 (1) x scalar(@value_requestor_type); 27803 27804 # Note: the following valid token types are not assigned here to 27805 # hashes requesting to be followed by values or terms, but are 27806 # instead currently hard-coded into sub operator_expected: 27807 # ) -> :: Q R Z ] b h i k n v w } # 27808 27809 # For simple syntax checking, it is nice to have a list of operators which 27810 # will really be unhappy if not followed by a term. This includes most 27811 # of the above... 27812 %really_want_term = %expecting_term_types; 27813 27814 # with these exceptions... 27815 delete $really_want_term{'U'}; # user sub, depends on prototype 27816 delete $really_want_term{'F'}; # file test works on $_ if no following term 27817 delete $really_want_term{'Y'}; # indirect object, too risky to check syntax; 27818 # let perl do it 27819 27820 @_ = qw(q qq qw qx qr s y tr m); 27821 @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_); 27822 27823 # These keywords are handled specially in the tokenizer code: 27824 my @special_keywords = qw( 27825 do 27826 eval 27827 format 27828 m 27829 package 27830 q 27831 qq 27832 qr 27833 qw 27834 qx 27835 s 27836 sub 27837 tr 27838 y 27839 ); 27840 push( @Keywords, @special_keywords ); 27841 27842 # Keywords after which list formatting may be used 27843 # WARNING: do not include |map|grep|eval or perl may die on 27844 # syntax errors (map1.t). 27845 my @keyword_taking_list = qw( 27846 and 27847 chmod 27848 chomp 27849 chop 27850 chown 27851 dbmopen 27852 die 27853 elsif 27854 exec 27855 fcntl 27856 for 27857 foreach 27858 formline 27859 getsockopt 27860 if 27861 index 27862 ioctl 27863 join 27864 kill 27865 local 27866 msgctl 27867 msgrcv 27868 msgsnd 27869 my 27870 open 27871 or 27872 our 27873 pack 27874 print 27875 printf 27876 push 27877 read 27878 readpipe 27879 recv 27880 return 27881 reverse 27882 rindex 27883 seek 27884 select 27885 semctl 27886 semget 27887 send 27888 setpriority 27889 setsockopt 27890 shmctl 27891 shmget 27892 shmread 27893 shmwrite 27894 socket 27895 socketpair 27896 sort 27897 splice 27898 split 27899 sprintf 27900 substr 27901 syscall 27902 sysopen 27903 sysread 27904 sysseek 27905 system 27906 syswrite 27907 tie 27908 unless 27909 unlink 27910 unpack 27911 unshift 27912 until 27913 vec 27914 warn 27915 while 27916 ); 27917 @is_keyword_taking_list{@keyword_taking_list} = 27918 (1) x scalar(@keyword_taking_list); 27919 27920 # These are not used in any way yet 27921 # my @unused_keywords = qw( 27922 # CORE 27923 # __FILE__ 27924 # __LINE__ 27925 # __PACKAGE__ 27926 # ); 27927 27928 # The list of keywords was extracted from function 'keyword' in 27929 # perl file toke.c version 5.005.03, using this utility, plus a 27930 # little editing: (file getkwd.pl): 27931 # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } 27932 # Add 'get' prefix where necessary, then split into the above lists. 27933 # This list should be updated as necessary. 27934 # The list should not contain these special variables: 27935 # ARGV DATA ENV SIG STDERR STDIN STDOUT 27936 # __DATA__ __END__ 27937 27938 @is_keyword{@Keywords} = (1) x scalar(@Keywords); 27939} 279401; 27941__END__ 27942 27943=head1 NAME 27944 27945Perl::Tidy - Parses and beautifies perl source 27946 27947=head1 SYNOPSIS 27948 27949 use Perl::Tidy; 27950 27951 Perl::Tidy::perltidy( 27952 source => $source, 27953 destination => $destination, 27954 stderr => $stderr, 27955 argv => $argv, 27956 perltidyrc => $perltidyrc, 27957 logfile => $logfile, 27958 errorfile => $errorfile, 27959 formatter => $formatter, # callback object (see below) 27960 dump_options => $dump_options, 27961 dump_options_type => $dump_options_type, 27962 ); 27963 27964=head1 DESCRIPTION 27965 27966This module makes the functionality of the perltidy utility available to perl 27967scripts. Any or all of the input parameters may be omitted, in which case the 27968@ARGV array will be used to provide input parameters as described 27969in the perltidy(1) man page. 27970 27971For example, the perltidy script is basically just this: 27972 27973 use Perl::Tidy; 27974 Perl::Tidy::perltidy(); 27975 27976The module accepts input and output streams by a variety of methods. 27977The following list of parameters may be any of a the following: a 27978filename, an ARRAY reference, a SCALAR reference, or an object with 27979either a B<getline> or B<print> method, as appropriate. 27980 27981 source - the source of the script to be formatted 27982 destination - the destination of the formatted output 27983 stderr - standard error output 27984 perltidyrc - the .perltidyrc file 27985 logfile - the .LOG file stream, if any 27986 errorfile - the .ERR file stream, if any 27987 dump_options - ref to a hash to receive parameters (see below), 27988 dump_options_type - controls contents of dump_options 27989 dump_getopt_flags - ref to a hash to receive Getopt flags 27990 dump_options_category - ref to a hash giving category of options 27991 dump_abbreviations - ref to a hash giving all abbreviations 27992 27993The following chart illustrates the logic used to decide how to 27994treat a parameter. 27995 27996 ref($param) $param is assumed to be: 27997 ----------- --------------------- 27998 undef a filename 27999 SCALAR ref to string 28000 ARRAY ref to array 28001 (other) object with getline (if source) or print method 28002 28003If the parameter is an object, and the object has a B<close> method, that 28004close method will be called at the end of the stream. 28005 28006=over 4 28007 28008=item source 28009 28010If the B<source> parameter is given, it defines the source of the 28011input stream. 28012 28013=item destination 28014 28015If the B<destination> parameter is given, it will be used to define the 28016file or memory location to receive output of perltidy. 28017 28018=item stderr 28019 28020The B<stderr> parameter allows the calling program to capture the output 28021to what would otherwise go to the standard error output device. 28022 28023=item perltidyrc 28024 28025If the B<perltidyrc> file is given, it will be used instead of any 28026F<.perltidyrc> configuration file that would otherwise be used. 28027 28028=item argv 28029 28030If the B<argv> parameter is given, it will be used instead of the 28031B<@ARGV> array. The B<argv> parameter may be a string, a reference to a 28032string, or a reference to an array. If it is a string or reference to a 28033string, it will be parsed into an array of items just as if it were a 28034command line string. 28035 28036=item dump_options 28037 28038If the B<dump_options> parameter is given, it must be the reference to a hash. 28039In this case, the parameters contained in any perltidyrc configuration file 28040will be placed in this hash and perltidy will return immediately. This is 28041equivalent to running perltidy with --dump-options, except that the perameters 28042are returned in a hash rather than dumped to standard output. Also, by default 28043only the parameters in the perltidyrc file are returned, but this can be 28044changed (see the next parameter). This parameter provides a convenient method 28045for external programs to read a perltidyrc file. An example program using 28046this feature, F<perltidyrc_dump.pl>, is included in the distribution. 28047 28048Any combination of the B<dump_> parameters may be used together. 28049 28050=item dump_options_type 28051 28052This parameter is a string which can be used to control the parameters placed 28053in the hash reference supplied by B<dump_options>. The possible values are 28054'perltidyrc' (default) and 'full'. The 'full' parameter causes both the 28055default options plus any options found in a perltidyrc file to be returned. 28056 28057=item dump_getopt_flags 28058 28059If the B<dump_getopt_flags> parameter is given, it must be the reference to a 28060hash. This hash will receive all of the parameters that perltidy understands 28061and flags that are passed to Getopt::Long. This parameter may be 28062used alone or with the B<dump_options> flag. Perltidy will 28063exit immediately after filling this hash. See the demo program 28064F<perltidyrc_dump.pl> for example usage. 28065 28066=item dump_options_category 28067 28068If the B<dump_options_category> parameter is given, it must be the reference to a 28069hash. This hash will receive a hash with keys equal to all long parameter names 28070and values equal to the title of the corresponding section of the perltidy manual. 28071See the demo program F<perltidyrc_dump.pl> for example usage. 28072 28073=item dump_abbreviations 28074 28075If the B<dump_abbreviations> parameter is given, it must be the reference to a 28076hash. This hash will receive all abbreviations used by Perl::Tidy. See the 28077demo program F<perltidyrc_dump.pl> for example usage. 28078 28079=back 28080 28081=head1 EXAMPLE 28082 28083The following example passes perltidy a snippet as a reference 28084to a string and receives the result back in a reference to 28085an array. 28086 28087 use Perl::Tidy; 28088 28089 # some messy source code to format 28090 my $source = <<'EOM'; 28091 use strict; 28092 my @editors=('Emacs', 'Vi '); my $rand = rand(); 28093 print "A poll of 10 random programmers gave these results:\n"; 28094 foreach(0..10) { 28095 my $i=int ($rand+rand()); 28096 print " $editors[$i] users are from Venus" . ", " . 28097 "$editors[1-$i] users are from Mars" . 28098 "\n"; 28099 } 28100 EOM 28101 28102 # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY 28103 my @dest; 28104 perltidy( source => \$source, destination => \@dest ); 28105 foreach (@dest) {print} 28106 28107=head1 Using the B<formatter> Callback Object 28108 28109The B<formatter> parameter is an optional callback object which allows 28110the calling program to receive tokenized lines directly from perltidy for 28111further specialized processing. When this parameter is used, the two 28112formatting options which are built into perltidy (beautification or 28113html) are ignored. The following diagram illustrates the logical flow: 28114 28115 |-- (normal route) -> code beautification 28116 caller->perltidy->|-- (-html flag ) -> create html 28117 |-- (formatter given)-> callback to write_line 28118 28119This can be useful for processing perl scripts in some way. The 28120parameter C<$formatter> in the perltidy call, 28121 28122 formatter => $formatter, 28123 28124is an object created by the caller with a C<write_line> method which 28125will accept and process tokenized lines, one line per call. Here is 28126a simple example of a C<write_line> which merely prints the line number, 28127the line type (as determined by perltidy), and the text of the line: 28128 28129 sub write_line { 28130 28131 # This is called from perltidy line-by-line 28132 my $self = shift; 28133 my $line_of_tokens = shift; 28134 my $line_type = $line_of_tokens->{_line_type}; 28135 my $input_line_number = $line_of_tokens->{_line_number}; 28136 my $input_line = $line_of_tokens->{_line_text}; 28137 print "$input_line_number:$line_type:$input_line"; 28138 } 28139 28140The complete program, B<perllinetype>, is contained in the examples section of 28141the source distribution. As this example shows, the callback method 28142receives a parameter B<$line_of_tokens>, which is a reference to a hash 28143of other useful information. This example uses these hash entries: 28144 28145 $line_of_tokens->{_line_number} - the line number (1,2,...) 28146 $line_of_tokens->{_line_text} - the text of the line 28147 $line_of_tokens->{_line_type} - the type of the line, one of: 28148 28149 SYSTEM - system-specific code before hash-bang line 28150 CODE - line of perl code (including comments) 28151 POD_START - line starting pod, such as '=head' 28152 POD - pod documentation text 28153 POD_END - last line of pod section, '=cut' 28154 HERE - text of here-document 28155 HERE_END - last line of here-doc (target word) 28156 FORMAT - format section 28157 FORMAT_END - last line of format section, '.' 28158 DATA_START - __DATA__ line 28159 DATA - unidentified text following __DATA__ 28160 END_START - __END__ line 28161 END - unidentified text following __END__ 28162 ERROR - we are in big trouble, probably not a perl script 28163 28164Most applications will be only interested in lines of type B<CODE>. For 28165another example, let's write a program which checks for one of the 28166so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which 28167can slow down processing. Here is a B<write_line>, from the example 28168program B<find_naughty.pl>, which does that: 28169 28170 sub write_line { 28171 28172 # This is called back from perltidy line-by-line 28173 # We're looking for $`, $&, and $' 28174 my ( $self, $line_of_tokens ) = @_; 28175 28176 # pull out some stuff we might need 28177 my $line_type = $line_of_tokens->{_line_type}; 28178 my $input_line_number = $line_of_tokens->{_line_number}; 28179 my $input_line = $line_of_tokens->{_line_text}; 28180 my $rtoken_type = $line_of_tokens->{_rtoken_type}; 28181 my $rtokens = $line_of_tokens->{_rtokens}; 28182 chomp $input_line; 28183 28184 # skip comments, pod, etc 28185 return if ( $line_type ne 'CODE' ); 28186 28187 # loop over tokens looking for $`, $&, and $' 28188 for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) { 28189 28190 # we only want to examine token types 'i' (identifier) 28191 next unless $$rtoken_type[$j] eq 'i'; 28192 28193 # pull out the actual token text 28194 my $token = $$rtokens[$j]; 28195 28196 # and check it 28197 if ( $token =~ /^\$[\`\&\']$/ ) { 28198 print STDERR 28199 "$input_line_number: $token\n"; 28200 } 28201 } 28202 } 28203 28204This example pulls out these tokenization variables from the $line_of_tokens 28205hash reference: 28206 28207 $rtoken_type = $line_of_tokens->{_rtoken_type}; 28208 $rtokens = $line_of_tokens->{_rtokens}; 28209 28210The variable C<$rtoken_type> is a reference to an array of token type codes, 28211and C<$rtokens> is a reference to a corresponding array of token text. 28212These are obviously only defined for lines of type B<CODE>. 28213Perltidy classifies tokens into types, and has a brief code for each type. 28214You can get a complete list at any time by running perltidy from the 28215command line with 28216 28217 perltidy --dump-token-types 28218 28219In the present example, we are only looking for tokens of type B<i> 28220(identifiers), so the for loop skips past all other types. When an 28221identifier is found, its actual text is checked to see if it is one 28222being sought. If so, the above write_line prints the token and its 28223line number. 28224 28225The B<formatter> feature is relatively new in perltidy, and further 28226documentation needs to be written to complete its description. However, 28227several example programs have been written and can be found in the 28228B<examples> section of the source distribution. Probably the best way 28229to get started is to find one of the examples which most closely matches 28230your application and start modifying it. 28231 28232For help with perltidy's pecular way of breaking lines into tokens, you 28233might run, from the command line, 28234 28235 perltidy -D filename 28236 28237where F<filename> is a short script of interest. This will produce 28238F<filename.DEBUG> with interleaved lines of text and their token types. 28239The B<-D> flag has been in perltidy from the beginning for this purpose. 28240If you want to see the code which creates this file, it is 28241C<write_debug_entry> in Tidy.pm. 28242 28243=head1 EXPORT 28244 28245 &perltidy 28246 28247=head1 CREDITS 28248 28249Thanks to Hugh Myers who developed the initial modular interface 28250to perltidy. 28251 28252=head1 VERSION 28253 28254This man page documents Perl::Tidy version 20071205. 28255 28256=head1 AUTHOR 28257 28258 Steve Hancock 28259 perltidy at users.sourceforge.net 28260 28261=head1 SEE ALSO 28262 28263The perltidy(1) man page describes all of the features of perltidy. It 28264can be found at http://perltidy.sourceforge.net. 28265 28266=cut 28267