1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3#
4# Clean a patch file -- or directory of patch files -- of stealth whitespace.
5# WARNING: this can be a highly destructive operation.  Use with caution.
6#
7
8use warnings;
9use bytes;
10use File::Basename;
11
12# Default options
13$max_width = 79;
14
15# Clean up space-tab sequences, either by removing spaces or
16# replacing them with tabs.
17sub clean_space_tabs($)
18{
19    no bytes;			# Tab alignment depends on characters
20
21    my($li) = @_;
22    my($lo) = '';
23    my $pos = 0;
24    my $nsp = 0;
25    my($i, $c);
26
27    for ($i = 0; $i < length($li); $i++) {
28	$c = substr($li, $i, 1);
29	if ($c eq "\t") {
30	    my $npos = ($pos+$nsp+8) & ~7;
31	    my $ntab = ($npos >> 3) - ($pos >> 3);
32	    $lo .= "\t" x $ntab;
33	    $pos = $npos;
34	    $nsp = 0;
35	} elsif ($c eq "\n" || $c eq "\r") {
36	    $lo .= " " x $nsp;
37	    $pos += $nsp;
38	    $nsp = 0;
39	    $lo .= $c;
40	    $pos = 0;
41	} elsif ($c eq " ") {
42	    $nsp++;
43	} else {
44	    $lo .= " " x $nsp;
45	    $pos += $nsp;
46	    $nsp = 0;
47	    $lo .= $c;
48	    $pos++;
49	}
50    }
51    $lo .= " " x $nsp;
52    return $lo;
53}
54
55# Compute the visual width of a string
56sub strwidth($) {
57    no bytes;			# Tab alignment depends on characters
58
59    my($li) = @_;
60    my($c, $i);
61    my $pos = 0;
62    my $mlen = 0;
63
64    for ($i = 0; $i < length($li); $i++) {
65	$c = substr($li,$i,1);
66	if ($c eq "\t") {
67	    $pos = ($pos+8) & ~7;
68	} elsif ($c eq "\n") {
69	    $mlen = $pos if ($pos > $mlen);
70	    $pos = 0;
71	} else {
72	    $pos++;
73	}
74    }
75
76    $mlen = $pos if ($pos > $mlen);
77    return $mlen;
78}
79
80$name = basename($0);
81
82@files = ();
83
84while (defined($a = shift(@ARGV))) {
85    if ($a =~ /^-/) {
86	if ($a eq '-width' || $a eq '-w') {
87	    $max_width = shift(@ARGV)+0;
88	} else {
89	    print STDERR "Usage: $name [-width #] files...\n";
90	    exit 1;
91	}
92    } else {
93	push(@files, $a);
94    }
95}
96
97foreach $f ( @files ) {
98    print STDERR "$name: $f\n";
99
100    if (! -f $f) {
101	print STDERR "$f: not a file\n";
102	next;
103    }
104
105    if (!open(FILE, '+<', $f)) {
106	print STDERR "$name: Cannot open file: $f: $!\n";
107	next;
108    }
109
110    binmode FILE;
111
112    # First, verify that it is not a binary file; consider any file
113    # with a zero byte to be a binary file.  Is there any better, or
114    # additional, heuristic that should be applied?
115    $is_binary = 0;
116
117    while (read(FILE, $data, 65536) > 0) {
118	if ($data =~ /\0/) {
119	    $is_binary = 1;
120	    last;
121	}
122    }
123
124    if ($is_binary) {
125	print STDERR "$name: $f: binary file\n";
126	next;
127    }
128
129    seek(FILE, 0, 0);
130
131    $in_bytes = 0;
132    $out_bytes = 0;
133    $lineno = 0;
134
135    @lines  = ();
136
137    $in_hunk = 0;
138    $err = 0;
139
140    while ( defined($line = <FILE>) ) {
141	$lineno++;
142	$in_bytes += length($line);
143
144	if (!$in_hunk) {
145	    if ($line =~
146		/^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@/) {
147		$minus_lines = $2;
148		$plus_lines = $4;
149		if ($minus_lines || $plus_lines) {
150		    $in_hunk = 1;
151		    @hunk_lines = ($line);
152		}
153	    } else {
154		push(@lines, $line);
155		$out_bytes += length($line);
156	    }
157	} else {
158	    # We're in a hunk
159
160	    if ($line =~ /^\+/) {
161		$plus_lines--;
162
163		$text = substr($line, 1);
164		$text =~ s/[ \t\r]*$//;		# Remove trailing spaces
165		$text = clean_space_tabs($text);
166
167		$l_width = strwidth($text);
168		if ($max_width && $l_width > $max_width) {
169		    print STDERR
170			"$f:$lineno: adds line exceeds $max_width ",
171			"characters ($l_width)\n";
172		}
173
174		push(@hunk_lines, '+'.$text);
175	    } elsif ($line =~ /^\-/) {
176		$minus_lines--;
177		push(@hunk_lines, $line);
178	    } elsif ($line =~ /^ /) {
179		$plus_lines--;
180		$minus_lines--;
181		push(@hunk_lines, $line);
182	    } else {
183		print STDERR "$name: $f: malformed patch\n";
184		$err = 1;
185		last;
186	    }
187
188	    if ($plus_lines < 0 || $minus_lines < 0) {
189		print STDERR "$name: $f: malformed patch\n";
190		$err = 1;
191		last;
192	    } elsif ($plus_lines == 0 && $minus_lines == 0) {
193		# End of a hunk.  Process this hunk.
194		my $i;
195		my $l;
196		my @h = ();
197		my $adj = 0;
198		my $done = 0;
199
200		for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) {
201		    $l = $hunk_lines[$i];
202		    if (!$done && $l eq "+\n") {
203			$adj++; # Skip this line
204		    } elsif ($l =~ /^[ +]/) {
205			$done = 1;
206			unshift(@h, $l);
207		    } else {
208			unshift(@h, $l);
209		    }
210		}
211
212		$l = $hunk_lines[0];  # Hunk header
213		undef @hunk_lines;    # Free memory
214
215		if ($adj) {
216		    die unless
217			($l =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@(.*)$/);
218		    my $mstart = $1;
219		    my $mlin = $2;
220		    my $pstart = $3;
221		    my $plin = $4;
222		    my $tail = $5; # doesn't include the final newline
223
224		    $l = sprintf("@@ -%d,%d +%d,%d @@%s\n",
225				 $mstart, $mlin, $pstart, $plin-$adj,
226				 $tail);
227		}
228		unshift(@h, $l);
229
230		# Transfer to the output array
231		foreach $l (@h) {
232		    $out_bytes += length($l);
233		    push(@lines, $l);
234		}
235
236		$in_hunk = 0;
237	    }
238	}
239    }
240
241    if ($in_hunk) {
242	print STDERR "$name: $f: malformed patch\n";
243	$err = 1;
244    }
245
246    if (!$err) {
247	if ($in_bytes != $out_bytes) {
248	    # Only write to the file if changed
249	    seek(FILE, 0, 0);
250	    print FILE @lines;
251
252	    if ( !defined($where = tell(FILE)) ||
253		 !truncate(FILE, $where) ) {
254		die "$name: Failed to truncate modified file: $f: $!\n";
255	    }
256	}
257    }
258
259    close(FILE);
260}
261