1#!/usr/bin/perl -w
2
3# Break long quoted strings in perl code into smaller pieces
4# This version only breaks at blanks.  See sub break_at_blanks to
5# customize.
6#
7# usage:
8# break_long_quotes.pl -ln myfile.pl >myfile.new
9#
10# where n specifies the maximum quote length.
11
12# NOTES:
13# 1. Use with caution - has not been extensively tested
14#
15# 2. The output is not beautified so that you can use diff to see what
16# changed.  If all is ok, run the output through perltidy to clean it up.
17#
18# 3. This version only breaks single-line quotes contained within
19# either single or double quotes.
20
21# Steve Hancock, Sept 28, 2006
22#
23use strict;
24use Getopt::Std;
25$| = 1;
26use vars qw($opt_l $opt_h);
27
28my $usage = <<EOM;
29   usage: break_long_quotes.pl [ -ln ] filename >outfile
30          where n=line length (default 72)
31EOM
32
33getopts('hl:') or die "$usage";
34if ($opt_h) { die $usage }
35if ( !defined $opt_l ) {
36    $opt_l = 70;
37}
38else {
39    $opt_l =~ /^\d+$/ or die "$usage";
40}
41
42unless ( @ARGV == 1 ) { die $usage }
43my $file = $ARGV[0];
44scan_file( $file, $opt_l );
45
46sub scan_file {
47    my ( $file, $line_length ) = @_;
48    use Perl::Tidy;
49    use IO::File;
50    my $fh = IO::File->new( $file, 'r' );
51    unless ($fh) { die "cannot open '$file': $!\n" }
52    my $formatter = MyWriter->new($line_length);
53
54    perltidy(
55        'formatter' => $formatter,     # callback object
56        'source'    => $fh,
57        'argv'      => "-npro -se",    # dont need .perltidyrc
58                                       # errors to STDOUT
59    );
60    $fh->close();
61} ## end sub scan_file
62
63#####################################################################
64#
65# This is a class with a write_line() method which receives
66# tokenized lines from perltidy
67#
68#####################################################################
69
70package MyWriter;
71
72sub new {
73    my ( $class, $line_length ) = @_;
74    my $comment_block = "";
75    bless {
76        _rcomment_block          => \$comment_block,
77        _maximum_comment_length  => 0,
78        _max_quote_length        => $line_length,
79        _in_hanging_side_comment => 0,
80    }, $class;
81} ## end sub new
82
83sub write_line {
84
85    # This is called from perltidy line-by-line
86    # We will look for quotes and fix them up if necessary
87    my $self              = shift;
88    my $line_of_tokens    = shift;
89    my $line_type         = $line_of_tokens->{_line_type};
90    my $input_line_number = $line_of_tokens->{_line_number};
91    my $input_line        = $line_of_tokens->{_line_text};    # the orignal line
92    my $rtoken_type       = $line_of_tokens->{_rtoken_type};  # type of tokens
93    my $rtokens           = $line_of_tokens->{_rtokens};      # text of tokens
94    my $starting_in_quote =
95      $line_of_tokens->{_starting_in_quote};                  # text of tokens
96    my $ending_in_quote  = $line_of_tokens->{_ending_in_quote}; # text of tokens
97    my $max_quote_length = $self->{_max_quote_length};
98    chomp $input_line;
99
100    # look in lines of CODE (and not POD for example)
101    if ( $line_type eq 'CODE' && @$rtoken_type ) {
102
103        my $jmax = @$rtoken_type - 1;
104
105        # find leading whitespace
106        my $leading_whitespace = ( $input_line =~ /^(\s*)/ ) ? $1 : "";
107        if ($starting_in_quote) {$leading_whitespace=""};
108        my $new_line = $leading_whitespace;
109
110        # loop over tokens looking for quotes (token type Q)
111        for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
112
113            # pull out the actual token text
114            my $token = $$rtokens[$j];
115
116            # look for long quoted strings on a single line
117            # (multiple line quotes not currently handled)
118            if (   $$rtoken_type[$j] eq 'Q'
119                && !( $j == 0     && $starting_in_quote )
120                && !( $j == $jmax && $ending_in_quote )
121                && ( length($token) > $max_quote_length ) )
122            {
123                my $quote_char = substr( $token, 0, 1 );
124                if ( $quote_char eq '"' || $quote_char eq '\'' ) {
125
126                    # safety check - shouldn't happen
127                    my $check_char = substr( $token, -1, 1 );
128                    if ( $check_char ne $quote_char ) {
129                        die <<EOM;
130programming error at line $input_line
131starting quote character is <<$quote_char>> but ending quote character is <<$check_char>>
132quoted string is:
133$token
134EOM
135                    } ## end if ( $check_char ne $quote_char)
136                    $token =
137                      break_at_blanks( $token, $quote_char, $max_quote_length );
138                } ## end if ( $quote_char eq '"'...
139            } ## end if ( $$rtoken_type[$j]...
140            $new_line .= $token;
141        } ## end for ( my $j = 0 ; $j <=...
142
143        # substitude the modified line for the original line
144        $input_line = $new_line;
145    } ## end if ( $line_type eq 'CODE')
146
147    # print the line
148    $self->print($input_line."\n");
149    return;
150} ## end sub write_line
151
152sub break_at_blanks {
153
154    # break a string at one or more spaces so that the longest substring is
155    # less than the desired length (if possible).
156    my ( $str, $quote_char, $max_length ) = @_;
157    my $blank     = ' ';
158    my $prev_char = "";
159    my @break_after_pos;
160    my $quote_pos = -1;
161    while ( ( $quote_pos = index( $str, $blank, 1 + $quote_pos ) ) >= 0 ) {
162
163        # as a precaution, do not break if preceded by a backslash
164        if ( $quote_pos > 0 ) {
165            next if ( substr( $str, $quote_pos - 1, 1 ) eq '\\' );
166        }
167        push @break_after_pos, $quote_pos;
168    } ## end while ( ( $quote_pos = index...
169    push @break_after_pos, length($str);
170
171    my $starting_pos = 0;
172    my $new_str      = "";
173    for ( my $i = 1 ; $i < @break_after_pos ; $i++ ) {
174        my $pos    = $break_after_pos[$i];
175        my $length = $pos - $starting_pos;
176        if ( $length > $max_length - 1 ) {
177            $pos = $break_after_pos[ $i - 1 ];
178            $new_str .= substr( $str, $starting_pos, $pos - $starting_pos + 1 )
179              . "$quote_char . $quote_char";
180            $starting_pos = $pos + 1;
181        } ## end if ( $length > $max_length...
182    } ## end for ( my $i = 1 ; $i < ...
183    my $pos = length($str);
184    $new_str .= substr( $str, $starting_pos, $pos );
185    return $new_str;
186} ## end sub break_at_blanks
187
188sub print {
189    my ( $self, $input_line ) = @_;
190    print $input_line;
191}
192
193# called once after the last line of a file
194sub finish_formatting {
195    my $self = shift;
196    $self->flush_comments();
197}
198