1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3#
4# Treewide grep for references to files under doc, and report
5# non-existing files in stderr.
6
7use warnings;
8use strict;
9use Getopt::Long qw(:config no_auto_abbrev);
10
11# NOTE: only add things here when the file was gone, but the text wants
12# to mention a past documentation file, for example, to give credits for
13# the original work.
14my %false_positives = (
15);
16
17my $scriptname = $0;
18$scriptname =~ s,.*/([^/]+/),$1,;
19
20# Parse arguments
21my $help = 0;
22my $fix = 0;
23my $warn = 0;
24
25if (! -d ".git") {
26	printf "Warning: can't check if file exists, as this is not a git tree";
27	exit 0;
28}
29
30GetOptions(
31	'fix' => \$fix,
32	'warn' => \$warn,
33	'h|help|usage' => \$help,
34);
35
36if ($help != 0) {
37    print "$scriptname [--help] [--fix]\n";
38    exit -1;
39}
40
41# Step 1: find broken references
42print "Finding broken references. This may take a while...  " if ($fix);
43
44my %broken_ref;
45
46my $doc_fix = 0;
47
48open IN, "git grep ':doc:\`' doc/|"
49     or die "Failed to run git grep";
50while (<IN>) {
51	next if (!m,^([^:]+):.*\:doc\:\`([^\`]+)\`,);
52
53	my $d = $1;
54	my $doc_ref = $2;
55
56	my $f = $doc_ref;
57
58	$d =~ s,(.*/).*,$1,;
59	$f =~ s,.*\<([^\>]+)\>,$1,;
60
61	$f ="$d$f.rst";
62
63	next if (grep -e, glob("$f"));
64
65	if ($fix && !$doc_fix) {
66		print STDERR "\nWARNING: Currently, can't fix broken :doc:`` fields\n";
67	}
68	$doc_fix++;
69
70	print STDERR "$f: :doc:`$doc_ref`\n";
71}
72close IN;
73
74open IN, "git grep 'doc/'|"
75     or die "Failed to run git grep";
76while (<IN>) {
77	next if (!m/^([^:]+):(.*)/);
78
79	my $f = $1;
80	my $ln = $2;
81
82	# On linux-next, discard the Next/ directory
83	next if ($f =~ m,^Next/,);
84
85	# Makefiles and scripts contain nasty expressions to parse docs
86	next if ($f =~ m/Makefile/ || $f =~ m/\.sh$/);
87
88	# Skip this script
89	next if ($f eq $scriptname);
90
91	# Ignore the dir where documentation will be built
92	next if ($ln =~ m,\b(\S*)doc/output,);
93
94	if ($ln =~ m,\b(\S*)(doc/[A-Za-z0-9\_\.\,\~/\*\[\]\?+-]*)(.*),) {
95		my $prefix = $1;
96		my $ref = $2;
97		my $base = $2;
98		my $extra = $3;
99
100		# some file references are like:
101		# /usr/src/linux/doc/DMA-{API,mapping}.txt
102		# For now, ignore them
103		next if ($extra =~ m/^{/);
104
105		# Remove footnotes at the end like:
106		# doc/devicetree/dt-object-internal.txt[1]
107		$ref =~ s/(txt|rst)\[\d+]$/$1/;
108
109		# Remove ending ']' without any '['
110		$ref =~ s/\].*// if (!($ref =~ m/\[/));
111
112		# Remove puntuation marks at the end
113		$ref =~ s/[\,\.]+$//;
114
115		my $fulref = "$prefix$ref";
116
117		$fulref =~ s/^(\<file|ref)://;
118		$fulref =~ s/^[\'\`]+//;
119		$fulref =~ s,^\$\(.*\)/,,;
120		$base =~ s,.*/,,;
121
122		# Remove URL false-positives
123		next if ($fulref =~ m/^http/);
124
125		# Check if exists, evaluating wildcards
126		next if (grep -e, glob("$ref $fulref"));
127
128		# Accept relative doc patches for tools/
129		if ($f =~ m/tools/) {
130			my $path = $f;
131			$path =~ s,(.*)/.*,$1,;
132			next if (grep -e, glob("$path/$ref $path/../$ref $path/$fulref"));
133		}
134
135		# Discard known false-positives
136		if (defined($false_positives{$f})) {
137			next if ($false_positives{$f} eq $fulref);
138		}
139
140		if ($fix) {
141			if (!($ref =~ m/(scripts|Kconfig|Kbuild)/)) {
142				$broken_ref{$ref}++;
143			}
144		} elsif ($warn) {
145			print STDERR "Warning: $f references a file that doesn't exist: $fulref\n";
146		} else {
147			print STDERR "$f: $fulref\n";
148		}
149	}
150}
151close IN;
152
153exit 0 if (!$fix);
154
155# Step 2: Seek for file name alternatives
156print "Auto-fixing broken references. Please double-check the results\n";
157
158foreach my $ref (keys %broken_ref) {
159	my $new =$ref;
160
161	my $basedir = ".";
162	# On translations, only seek inside the translations directory
163	$basedir  = $1 if ($ref =~ m,(doc/translations/[^/]+),);
164
165	# get just the basename
166	$new =~ s,.*/,,;
167
168	my $f="";
169
170	# usual reason for breakage: DT file moved around
171	if ($ref =~ /devicetree/) {
172		# usual reason for breakage: DT file renamed to .yaml
173		if (!$f) {
174			my $new_ref = $ref;
175			$new_ref =~ s/\.txt$/.yaml/;
176			$f=$new_ref if (-f $new_ref);
177		}
178
179		if (!$f) {
180			my $search = $new;
181			$search =~ s,^.*/,,;
182			$f = qx(find doc/device-tree-bindings/ -iname "*$search*") if ($search);
183			if (!$f) {
184				# Manufacturer name may have changed
185				$search =~ s/^.*,//;
186				$f = qx(find doc/device-tree-bindings/ -iname "*$search*") if ($search);
187			}
188		}
189	}
190
191	# usual reason for breakage: file renamed to .rst
192	if (!$f) {
193		$new =~ s/\.txt$/.rst/;
194		$f=qx(find $basedir -iname $new) if ($new);
195	}
196
197	# usual reason for breakage: use dash or underline
198	if (!$f) {
199		$new =~ s/[-_]/[-_]/g;
200		$f=qx(find $basedir -iname $new) if ($new);
201	}
202
203	# Wild guess: seek for the same name on another place
204	if (!$f) {
205		$f = qx(find $basedir -iname $new) if ($new);
206	}
207
208	my @find = split /\s+/, $f;
209
210	if (!$f) {
211		print STDERR "ERROR: Didn't find a replacement for $ref\n";
212	} elsif (scalar(@find) > 1) {
213		print STDERR "WARNING: Won't auto-replace, as found multiple files close to $ref:\n";
214		foreach my $j (@find) {
215			$j =~ s,^./,,;
216			print STDERR "    $j\n";
217		}
218	} else {
219		$f = $find[0];
220		$f =~ s,^./,,;
221		print "INFO: Replacing $ref to $f\n";
222		foreach my $j (qx(git grep -l $ref)) {
223			qx(sed "s\@$ref\@$f\@g" -i $j);
224		}
225	}
226}
227