1#!/usr/bin/perl -w 2 3# Break long quoted strings in perl code into smaller pieces 4# This version only breaks at blanks. See sub break_at_blanks to 5# customize. 6# 7# usage: 8# break_long_quotes.pl -ln myfile.pl >myfile.new 9# 10# where n specifies the maximum quote length. 11 12# NOTES: 13# 1. Use with caution - has not been extensively tested 14# 15# 2. The output is not beautified so that you can use diff to see what 16# changed. If all is ok, run the output through perltidy to clean it up. 17# 18# 3. This version only breaks single-line quotes contained within 19# either single or double quotes. 20 21# Steve Hancock, Sept 28, 2006 22# 23use strict; 24use Getopt::Std; 25$| = 1; 26use vars qw($opt_l $opt_h); 27 28my $usage = <<EOM; 29 usage: break_long_quotes.pl [ -ln ] filename >outfile 30 where n=line length (default 72) 31EOM 32 33getopts('hl:') or die "$usage"; 34if ($opt_h) { die $usage } 35if ( !defined $opt_l ) { 36 $opt_l = 70; 37} 38else { 39 $opt_l =~ /^\d+$/ or die "$usage"; 40} 41 42unless ( @ARGV == 1 ) { die $usage } 43my $file = $ARGV[0]; 44scan_file( $file, $opt_l ); 45 46sub scan_file { 47 my ( $file, $line_length ) = @_; 48 use Perl::Tidy; 49 use IO::File; 50 my $fh = IO::File->new( $file, 'r' ); 51 unless ($fh) { die "cannot open '$file': $!\n" } 52 my $formatter = MyWriter->new($line_length); 53 54 perltidy( 55 'formatter' => $formatter, # callback object 56 'source' => $fh, 57 'argv' => "-npro -se", # dont need .perltidyrc 58 # errors to STDOUT 59 ); 60 $fh->close(); 61} ## end sub scan_file 62 63##################################################################### 64# 65# This is a class with a write_line() method which receives 66# tokenized lines from perltidy 67# 68##################################################################### 69 70package MyWriter; 71 72sub new { 73 my ( $class, $line_length ) = @_; 74 my $comment_block = ""; 75 bless { 76 _rcomment_block => \$comment_block, 77 _maximum_comment_length => 0, 78 _max_quote_length => $line_length, 79 _in_hanging_side_comment => 0, 80 }, $class; 81} ## end sub new 82 83sub write_line { 84 85 # This is called from perltidy line-by-line 86 # We will look for quotes and fix them up if necessary 87 my $self = shift; 88 my $line_of_tokens = shift; 89 my $line_type = $line_of_tokens->{_line_type}; 90 my $input_line_number = $line_of_tokens->{_line_number}; 91 my $input_line = $line_of_tokens->{_line_text}; # the orignal line 92 my $rtoken_type = $line_of_tokens->{_rtoken_type}; # type of tokens 93 my $rtokens = $line_of_tokens->{_rtokens}; # text of tokens 94 my $starting_in_quote = 95 $line_of_tokens->{_starting_in_quote}; # text of tokens 96 my $ending_in_quote = $line_of_tokens->{_ending_in_quote}; # text of tokens 97 my $max_quote_length = $self->{_max_quote_length}; 98 chomp $input_line; 99 100 # look in lines of CODE (and not POD for example) 101 if ( $line_type eq 'CODE' && @$rtoken_type ) { 102 103 my $jmax = @$rtoken_type - 1; 104 105 # find leading whitespace 106 my $leading_whitespace = ( $input_line =~ /^(\s*)/ ) ? $1 : ""; 107 if ($starting_in_quote) {$leading_whitespace=""}; 108 my $new_line = $leading_whitespace; 109 110 # loop over tokens looking for quotes (token type Q) 111 for ( my $j = 0 ; $j <= $jmax ; $j++ ) { 112 113 # pull out the actual token text 114 my $token = $$rtokens[$j]; 115 116 # look for long quoted strings on a single line 117 # (multiple line quotes not currently handled) 118 if ( $$rtoken_type[$j] eq 'Q' 119 && !( $j == 0 && $starting_in_quote ) 120 && !( $j == $jmax && $ending_in_quote ) 121 && ( length($token) > $max_quote_length ) ) 122 { 123 my $quote_char = substr( $token, 0, 1 ); 124 if ( $quote_char eq '"' || $quote_char eq '\'' ) { 125 126 # safety check - shouldn't happen 127 my $check_char = substr( $token, -1, 1 ); 128 if ( $check_char ne $quote_char ) { 129 die <<EOM; 130programming error at line $input_line 131starting quote character is <<$quote_char>> but ending quote character is <<$check_char>> 132quoted string is: 133$token 134EOM 135 } ## end if ( $check_char ne $quote_char) 136 $token = 137 break_at_blanks( $token, $quote_char, $max_quote_length ); 138 } ## end if ( $quote_char eq '"'... 139 } ## end if ( $$rtoken_type[$j]... 140 $new_line .= $token; 141 } ## end for ( my $j = 0 ; $j <=... 142 143 # substitude the modified line for the original line 144 $input_line = $new_line; 145 } ## end if ( $line_type eq 'CODE') 146 147 # print the line 148 $self->print($input_line."\n"); 149 return; 150} ## end sub write_line 151 152sub break_at_blanks { 153 154 # break a string at one or more spaces so that the longest substring is 155 # less than the desired length (if possible). 156 my ( $str, $quote_char, $max_length ) = @_; 157 my $blank = ' '; 158 my $prev_char = ""; 159 my @break_after_pos; 160 my $quote_pos = -1; 161 while ( ( $quote_pos = index( $str, $blank, 1 + $quote_pos ) ) >= 0 ) { 162 163 # as a precaution, do not break if preceded by a backslash 164 if ( $quote_pos > 0 ) { 165 next if ( substr( $str, $quote_pos - 1, 1 ) eq '\\' ); 166 } 167 push @break_after_pos, $quote_pos; 168 } ## end while ( ( $quote_pos = index... 169 push @break_after_pos, length($str); 170 171 my $starting_pos = 0; 172 my $new_str = ""; 173 for ( my $i = 1 ; $i < @break_after_pos ; $i++ ) { 174 my $pos = $break_after_pos[$i]; 175 my $length = $pos - $starting_pos; 176 if ( $length > $max_length - 1 ) { 177 $pos = $break_after_pos[ $i - 1 ]; 178 $new_str .= substr( $str, $starting_pos, $pos - $starting_pos + 1 ) 179 . "$quote_char . $quote_char"; 180 $starting_pos = $pos + 1; 181 } ## end if ( $length > $max_length... 182 } ## end for ( my $i = 1 ; $i < ... 183 my $pos = length($str); 184 $new_str .= substr( $str, $starting_pos, $pos ); 185 return $new_str; 186} ## end sub break_at_blanks 187 188sub print { 189 my ( $self, $input_line ) = @_; 190 print $input_line; 191} 192 193# called once after the last line of a file 194sub finish_formatting { 195 my $self = shift; 196 $self->flush_comments(); 197} 198