1#!/usr/bin/perl -w
2#
3# Clean a text file -- or directory of text files -- of stealth whitespace.
4# WARNING: this can be a highly destructive operation.  Use with caution.
5#
6
7use bytes;
8use File::Basename;
9
10# Default options
11$max_width = 79;
12
13# Clean up space-tab sequences, either by removing spaces or
14# replacing them with tabs.
15sub clean_space_tabs($)
16{
17    no bytes;			# Tab alignment depends on characters
18
19    my($li) = @_;
20    my($lo) = '';
21    my $pos = 0;
22    my $nsp = 0;
23    my($i, $c);
24
25    for ($i = 0; $i < length($li); $i++) {
26	$c = substr($li, $i, 1);
27	if ($c eq "\t") {
28	    my $npos = ($pos+$nsp+8) & ~7;
29	    my $ntab = ($npos >> 3) - ($pos >> 3);
30	    $lo .= "\t" x $ntab;
31	    $pos = $npos;
32	    $nsp = 0;
33	} elsif ($c eq "\n" || $c eq "\r") {
34	    $lo .= " " x $nsp;
35	    $pos += $nsp;
36	    $nsp = 0;
37	    $lo .= $c;
38	    $pos = 0;
39	} elsif ($c eq " ") {
40	    $nsp++;
41	} else {
42	    $lo .= " " x $nsp;
43	    $pos += $nsp;
44	    $nsp = 0;
45	    $lo .= $c;
46	    $pos++;
47	}
48    }
49    $lo .= " " x $nsp;
50    return $lo;
51}
52
53# Compute the visual width of a string
54sub strwidth($) {
55    no bytes;			# Tab alignment depends on characters
56
57    my($li) = @_;
58    my($c, $i);
59    my $pos = 0;
60    my $mlen = 0;
61
62    for ($i = 0; $i < length($li); $i++) {
63	$c = substr($li,$i,1);
64	if ($c eq "\t") {
65	    $pos = ($pos+8) & ~7;
66	} elsif ($c eq "\n") {
67	    $mlen = $pos if ($pos > $mlen);
68	    $pos = 0;
69	} else {
70	    $pos++;
71	}
72    }
73
74    $mlen = $pos if ($pos > $mlen);
75    return $mlen;
76}
77
78$name = basename($0);
79
80@files = ();
81
82while (defined($a = shift(@ARGV))) {
83    if ($a =~ /^-/) {
84	if ($a eq '-width' || $a eq '-w') {
85	    $max_width = shift(@ARGV)+0;
86	} else {
87	    print STDERR "Usage: $name [-width #] files...\n";
88	    exit 1;
89	}
90    } else {
91	push(@files, $a);
92    }
93}
94
95foreach $f ( @files ) {
96    print STDERR "$name: $f\n";
97
98    if (! -f $f) {
99	print STDERR "$f: not a file\n";
100	next;
101    }
102
103    if (!open(FILE, '+<', $f)) {
104	print STDERR "$name: Cannot open file: $f: $!\n";
105	next;
106    }
107
108    binmode FILE;
109
110    # First, verify that it is not a binary file; consider any file
111    # with a zero byte to be a binary file.  Is there any better, or
112    # additional, heuristic that should be applied?
113    $is_binary = 0;
114
115    while (read(FILE, $data, 65536) > 0) {
116	if ($data =~ /\0/) {
117	    $is_binary = 1;
118	    last;
119	}
120    }
121
122    if ($is_binary) {
123	print STDERR "$name: $f: binary file\n";
124	next;
125    }
126
127    seek(FILE, 0, 0);
128
129    $in_bytes = 0;
130    $out_bytes = 0;
131    $blank_bytes = 0;
132
133    @blanks = ();
134    @lines  = ();
135    $lineno = 0;
136
137    while ( defined($line = <FILE>) ) {
138	$lineno++;
139	$in_bytes += length($line);
140	$line =~ s/[ \t\r]*$//;		# Remove trailing spaces
141	$line = clean_space_tabs($line);
142
143	if ( $line eq "\n" ) {
144	    push(@blanks, $line);
145	    $blank_bytes += length($line);
146	} else {
147	    push(@lines, @blanks);
148	    $out_bytes += $blank_bytes;
149	    push(@lines, $line);
150	    $out_bytes += length($line);
151	    @blanks = ();
152	    $blank_bytes = 0;
153	}
154
155	$l_width = strwidth($line);
156	if ($max_width && $l_width > $max_width) {
157	    print STDERR
158		"$f:$lineno: line exceeds $max_width characters ($l_width)\n";
159	}
160    }
161
162    # Any blanks at the end of the file are discarded
163
164    if ($in_bytes != $out_bytes) {
165	# Only write to the file if changed
166	seek(FILE, 0, 0);
167	print FILE @lines;
168
169	if ( !defined($where = tell(FILE)) ||
170	     !truncate(FILE, $where) ) {
171	    die "$name: Failed to truncate modified file: $f: $!\n";
172	}
173    }
174
175    close(FILE);
176}
177