whereis.c revision 102072
1/*
2 * Copyright � 2002, J�rg Wunsch
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT,
17 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
19 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
22 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
23 * POSSIBILITY OF SUCH DAMAGE.
24 */
25
26/*
27 * 4.3BSD UI-compatible whereis(1) utility.  Rewritten from scratch
28 * since the original 4.3BSD version suffers legal problems that
29 * prevent it from being redistributed, and since the 4.4BSD version
30 * was pretty inferior in functionality.
31 */
32
33#include <sys/types.h>
34
35__FBSDID("$FreeBSD: head/usr.bin/whereis/whereis.c 102072 2002-08-18 18:21:18Z johan $");
36
37#include <sys/stat.h>
38#include <sys/sysctl.h>
39
40#include <dirent.h>
41#include <err.h>
42#include <errno.h>
43#include <regex.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <sysexits.h>
48#include <unistd.h>
49
50#include "pathnames.h"
51
52typedef const char *ccharp;
53
54int opt_b, opt_m, opt_q, opt_s, opt_u, opt_x;
55ccharp *bindirs, *mandirs, *sourcedirs;
56char **query;
57
58const char *sourcepath = PATH_SOURCES;
59
60char	*colonify(ccharp *);
61int	 contains(ccharp *, const char *);
62void	 decolonify(char *, ccharp **, int *);
63void	 defaults(void);
64void	 scanopts(int, char **);
65void	 usage(void);
66
67/*
68 * Throughout this program, a number of strings are dynamically
69 * allocated but never freed.  Their memory is written to when
70 * splitting the strings into string lists which will later be
71 * processed.  Since it's important that those string lists remain
72 * valid even after the functions allocating the memory returned,
73 * those functions cannot free them.  They could be freed only at end
74 * of main(), which is pretty pointless anyway.
75 *
76 * The overall amount of memory to be allocated for processing the
77 * strings is not expected to exceed a few kilobytes.  For that
78 * reason, allocation can usually always be assumed to succeed (within
79 * a virtual memory environment), thus we simply bail out using
80 * abort(3) in case of an allocation failure.
81 */
82
83void
84usage(void)
85{
86	errx(EX_USAGE,
87	     "usage: whereis [-bmqsux] [-BMS dir... -f] name ...");
88}
89
90/*
91 * Scan options passed to program.
92 *
93 * Note that the -B/-M/-S options expect a list of directory
94 * names that must be terminated with -f.
95 */
96void
97scanopts(int argc, char **argv)
98{
99	int c, i, opt_f;
100	ccharp **dirlist;
101
102	opt_f = 0;
103	while ((c = getopt(argc, argv, "BMSbfmqsux")) != -1)
104		switch (c) {
105		case 'B':
106			dirlist = &bindirs;
107			goto dolist;
108
109		case 'M':
110			dirlist = &mandirs;
111			goto dolist;
112
113		case 'S':
114			dirlist = &sourcedirs;
115		  dolist:
116			i = 0;
117			*dirlist = realloc(*dirlist, (i + 1) * sizeof(char *));
118			(*dirlist)[i] = NULL;
119			while (optind < argc &&
120			       strcmp(argv[optind], "-f") != 0 &&
121			       strcmp(argv[optind], "-B") != 0 &&
122			       strcmp(argv[optind], "-M") != 0 &&
123			       strcmp(argv[optind], "-S") != 0) {
124				decolonify(argv[optind], dirlist, &i);
125				optind++;
126			}
127			break;
128
129		case 'b':
130			opt_b = 1;
131			break;
132
133		case 'f':
134			goto breakout;
135
136		case 'm':
137			opt_m = 1;
138			break;
139
140		case 'q':
141			opt_q = 1;
142			break;
143
144		case 's':
145			opt_s = 1;
146			break;
147
148		case 'u':
149			opt_u = 1;
150			break;
151
152		case 'x':
153			opt_x = 1;
154			break;
155
156		default:
157			usage();
158		}
159  breakout:
160	if (optind == argc)
161		usage();
162	query = argv + optind;
163}
164
165/*
166 * Find out whether string `s' is contained in list `cpp'.
167 */
168int
169contains(ccharp *cpp, const char *s)
170{
171	ccharp cp;
172
173	if (cpp == NULL)
174		return (0);
175
176	while ((cp = *cpp) != NULL) {
177		if (strcmp(cp, s) == 0)
178			return (1);
179		cpp++;
180	}
181	return (0);
182}
183
184/*
185 * Split string `s' at colons, and pass it to the string list pointed
186 * to by `cppp' (which has `*ip' elements).  Note that the original
187 * string is modified by replacing the colon with a NUL byte.  The
188 * partial string is only added if it has a length greater than 0, and
189 * if it's not already contained in the string list.
190 */
191void
192decolonify(char *s, ccharp **cppp, int *ip)
193{
194	char *cp;
195
196	while ((cp = strchr(s, ':')), *s != '\0') {
197		if (cp)
198			*cp = '\0';
199		if (strlen(s) && !contains(*cppp, s)) {
200			*cppp = realloc(*cppp, (*ip + 2) * sizeof(char *));
201			if (cppp == NULL)
202				abort();
203			(*cppp)[*ip] = s;
204			(*cppp)[*ip + 1] = NULL;
205			(*ip)++;
206		}
207		if (cp)
208			s = cp + 1;
209		else
210			break;
211	}
212}
213
214/*
215 * Join string list `cpp' into a colon-separated string.
216 */
217char *
218colonify(ccharp *cpp)
219{
220	size_t s;
221	char *cp;
222	int i;
223
224	if (cpp == NULL)
225		return (0);
226
227	for (s = 0, i = 0; cpp[i] != NULL; i++)
228		s += strlen(cpp[i]) + 1;
229	if ((cp = malloc(s + 1)) == NULL)
230		abort();
231	for (i = 0, *cp = '\0'; cpp[i] != NULL; i++) {
232		strcat(cp, cpp[i]);
233		strcat(cp, ":");
234	}
235	cp[s - 1] = '\0';		/* eliminate last colon */
236
237	return (cp);
238}
239
240/*
241 * Provide defaults for all options and directory lists.
242 */
243void
244defaults(void)
245{
246	size_t s;
247	char *b, buf[BUFSIZ], *cp;
248	int nele;
249	FILE *p;
250	DIR *dir;
251	struct stat sb;
252	struct dirent *dirp;
253
254	/* default to -bms if none has been specified */
255	if (!opt_b && !opt_m && !opt_s)
256		opt_b = opt_m = opt_s = 1;
257
258	/* -b defaults to default path + /usr/libexec +
259	 * /usr/games + user's path */
260	if (!bindirs) {
261		if (sysctlbyname("user.cs_path", (void *)NULL, &s,
262				 (void *)NULL, 0) == -1)
263			err(EX_OSERR, "sysctlbyname(\"user.cs_path\")");
264		if ((b = malloc(s + 1)) == NULL)
265			abort();
266		if (sysctlbyname("user.cs_path", b, &s, (void *)NULL, 0) == -1)
267			err(EX_OSERR, "sysctlbyname(\"user.cs_path\")");
268		nele = 0;
269		decolonify(b, &bindirs, &nele);
270		bindirs = realloc(bindirs, (nele + 3) * sizeof(char *));
271		if (bindirs == NULL)
272			abort();
273		bindirs[nele++] = PATH_LIBEXEC;
274		bindirs[nele++] = PATH_GAMES;
275		bindirs[nele] = NULL;
276		if ((cp = getenv("PATH")) != NULL) {
277			/* don't destroy the original environment... */
278			if ((b = malloc(strlen(cp) + 1)) == NULL)
279				abort();
280			strcpy(b, cp);
281			decolonify(b, &bindirs, &nele);
282		}
283	}
284
285	/* -m defaults to $(manpath) */
286	if (!mandirs) {
287		if ((p = popen(MANPATHCMD, "r")) == NULL)
288			err(EX_OSERR, "cannot execute manpath command");
289		if (fgets(buf, BUFSIZ - 1, p) == NULL ||
290		    pclose(p))
291			err(EX_OSERR, "error processing manpath results");
292		if ((b = strchr(buf, '\n')) != NULL)
293			*b = '\0';
294		if ((b = malloc(strlen(buf) + 1)) == NULL)
295			abort();
296		strcpy(b, buf);
297		nele = 0;
298		decolonify(b, &mandirs, &nele);
299	}
300
301	/* -s defaults to precompiled list, plus subdirs of /usr/ports */
302	if (!sourcedirs) {
303		if ((b = malloc(strlen(sourcepath) + 1)) == NULL)
304			abort();
305		strcpy(b, sourcepath);
306		nele = 0;
307		decolonify(b, &sourcedirs, &nele);
308
309		if (stat(PATH_PORTS, &sb) == -1) {
310			if (errno == ENOENT)
311				/* no /usr/ports, we are done */
312				return;
313			err(EX_OSERR, "stat(" PATH_PORTS ")");
314		}
315		if ((sb.st_mode & S_IFMT) != S_IFDIR)
316			/* /usr/ports is not a directory, ignore */
317			return;
318		if (access(PATH_PORTS, R_OK | X_OK) != 0)
319			return;
320		if ((dir = opendir(PATH_PORTS)) == NULL)
321			err(EX_OSERR, "opendir" PATH_PORTS ")");
322		while ((dirp = readdir(dir)) != NULL) {
323			if (dirp->d_name[0] == '.' ||
324			    strcmp(dirp->d_name, "CVS") == 0)
325				/* ignore dot entries and CVS subdir */
326				continue;
327			if ((b = malloc(sizeof PATH_PORTS + 1 + dirp->d_namlen))
328			    == NULL)
329				abort();
330			strcpy(b, PATH_PORTS);
331			strcat(b, "/");
332			strcat(b, dirp->d_name);
333			if (stat(b, &sb) == -1 ||
334			    (sb.st_mode & S_IFMT) != S_IFDIR ||
335			    access(b, R_OK | X_OK) != 0) {
336				free(b);
337				continue;
338			}
339			sourcedirs = realloc(sourcedirs,
340					     (nele + 2) * sizeof(char *));
341			if (sourcedirs == NULL)
342				abort();
343			sourcedirs[nele++] = b;
344			sourcedirs[nele] = NULL;
345		}
346		closedir(dir);
347	}
348}
349
350int
351main(int argc, char **argv)
352{
353	int unusual, i, printed;
354	char *bin, buf[BUFSIZ], *cp, *cp2, *man, *name, *src;
355	ccharp *dp;
356	size_t s;
357	struct stat sb;
358	regex_t re, re2;
359	regmatch_t matches[2];
360	regoff_t rlen;
361	FILE *p;
362
363	scanopts(argc, argv);
364	defaults();
365
366	if (mandirs == NULL)
367		opt_m = 0;
368	if (bindirs == NULL)
369		opt_b = 0;
370	if (sourcedirs == NULL)
371		opt_s = 0;
372	if (opt_m + opt_b + opt_s == 0)
373		errx(EX_DATAERR, "no directories to search");
374
375	if (opt_m) {
376		setenv("MANPATH", colonify(mandirs), 1);
377		if ((i = regcomp(&re, MANWHEREISMATCH, REG_EXTENDED)) != 0) {
378			regerror(i, &re, buf, BUFSIZ - 1);
379			errx(EX_UNAVAILABLE, "regcomp(%s) failed: %s",
380			     MANWHEREISMATCH, buf);
381		}
382	}
383
384	for (; (name = *query) != NULL; query++) {
385		/* strip leading path name component */
386		if ((cp = strrchr(name, '/')) != NULL)
387			name = cp + 1;
388		/* strip SCCS or RCS suffix/prefix */
389		if (strlen(name) > 2 && strncmp(name, "s.", 2) == 0)
390			name += 2;
391		if ((s = strlen(name)) > 2 && strcmp(name + s - 2, ",v") == 0)
392			name[s - 2] = '\0';
393		/* compression suffix */
394		s = strlen(name);
395		if (s > 2 &&
396		    (strcmp(name + s - 2, ".z") == 0 ||
397		     strcmp(name + s - 2, ".Z") == 0))
398			name[s - 2] = '\0';
399		else if (s > 3 &&
400			 strcmp(name + s - 3, ".gz") == 0)
401			name[s - 3] = '\0';
402		else if (s > 4 &&
403			 strcmp(name + s - 4, ".bz2") == 0)
404			name[s - 4] = '\0';
405
406		unusual = 0;
407		bin = man = src = NULL;
408		s = strlen(name);
409
410		if (opt_b) {
411			/*
412			 * Binaries have to match exactly, and must be regular
413			 * executable files.
414			 */
415			unusual++;
416			for (dp = bindirs; *dp != NULL; dp++) {
417				cp = malloc(strlen(*dp) + 1 + s + 1);
418				if (cp == NULL)
419					abort();
420				strcpy(cp, *dp);
421				strcat(cp, "/");
422				strcat(cp, name);
423				if (stat(cp, &sb) == 0 &&
424				    (sb.st_mode & S_IFMT) == S_IFREG &&
425				    (sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))
426				    != 0) {
427					unusual--;
428					bin = cp;
429					break;
430				}
431				free(cp);
432			}
433		}
434
435		if (opt_m) {
436			/*
437			 * Ask the man command to perform the search for us.
438			 */
439			unusual++;
440			cp = malloc(sizeof MANWHEREISCMD - 2 + s);
441			if (cp == NULL)
442				abort();
443			sprintf(cp, MANWHEREISCMD, name);
444			if ((p = popen(cp, "r")) != NULL &&
445			    fgets(buf, BUFSIZ - 1, p) != NULL &&
446			    pclose(p) == 0) {
447				unusual--;
448				if ((cp2 = strchr(buf, '\n')) != NULL)
449					*cp2 = '\0';
450				if (regexec(&re, buf, 2, matches, 0) == 0 &&
451				    (rlen = matches[1].rm_eo - matches[1].rm_so)
452				    > 0) {
453					/*
454					 * man -w found compressed
455					 * page, need to pick up
456					 * source page name.
457					 */
458					cp2 = malloc(rlen + 1);
459					if (cp2 == NULL)
460						abort();
461					memcpy(cp2, buf + matches[1].rm_so,
462					       rlen);
463					cp2[rlen] = '\0';
464					man = cp2;
465				} else {
466					/*
467					 * man -w found plain source
468					 * page, use it.
469					 */
470					s = strlen(buf);
471					cp2 = malloc(s + 1);
472					if (cp2 == NULL)
473						abort();
474					strcpy(cp2, buf);
475					man = cp2;
476				}
477			}
478			free(cp);
479		}
480
481		if (opt_s) {
482			/*
483			 * Sources match if a subdir with the exact
484			 * name is found.
485			 */
486			unusual++;
487			for (dp = sourcedirs; *dp != NULL; dp++) {
488				cp = malloc(strlen(*dp) + 1 + s + 1);
489				if (cp == NULL)
490					abort();
491				strcpy(cp, *dp);
492				strcat(cp, "/");
493				strcat(cp, name);
494				if (stat(cp, &sb) == 0 &&
495				    (sb.st_mode & S_IFMT) == S_IFDIR) {
496					unusual--;
497					src = cp;
498					break;
499				}
500				free(cp);
501			}
502			/*
503			 * If still not found, ask locate to search it
504			 * for us.  This will find sources for things
505			 * like lpr that are well hidden in the
506			 * /usr/src tree, but takes a lot longer.
507			 * Thus, option -x (`expensive') prevents this
508			 * search.
509			 *
510			 * Do only match locate output that starts
511			 * with one of our source directories, and at
512			 * least one further level of subdirectories.
513			 */
514			if (opt_x || src)
515				goto done_sources;
516
517			cp = malloc(sizeof LOCATECMD - 2 + s);
518			if (cp == NULL)
519				abort();
520			sprintf(cp, LOCATECMD, name);
521			if ((p = popen(cp, "r")) == NULL)
522				goto done_sources;
523			while (src == NULL &&
524			       (fgets(buf, BUFSIZ - 1, p)) != NULL) {
525				if ((cp2 = strchr(buf, '\n')) != NULL)
526					*cp2 = '\0';
527				for (dp = sourcedirs;
528				     src == NULL && *dp != NULL;
529				     dp++) {
530					cp2 = malloc(strlen(*dp) + 9);
531					if (cp2 == NULL)
532						abort();
533					strcpy(cp2, "^");
534					strcat(cp2, *dp);
535					strcat(cp2, "/[^/]+/");
536					if ((i = regcomp(&re2, cp2,
537							 REG_EXTENDED|REG_NOSUB))
538					    != 0) {
539						regerror(i, &re, buf,
540							 BUFSIZ - 1);
541						errx(EX_UNAVAILABLE,
542						     "regcomp(%s) failed: %s",
543						     cp2, buf);
544					}
545					free(cp2);
546					if (regexec(&re2, buf, 0,
547						    (regmatch_t *)NULL, 0)
548					    == 0) {
549						unusual--;
550						src = buf;
551					}
552					regfree(&re2);
553				}
554			}
555			pclose(p);
556			free(cp);
557		}
558	  done_sources:
559
560		if (opt_u && !unusual)
561			continue;
562
563		printed = 0;
564		if (!opt_q) {
565			printf("%s:", name);
566			printed++;
567		}
568		if (bin) {
569			if (printed++)
570				putchar(' ');
571			fputs(bin, stdout);
572		}
573		if (man) {
574			if (printed++)
575				putchar(' ');
576			fputs(man, stdout);
577		}
578		if (src) {
579			if (printed++)
580				putchar(' ');
581			fputs(src, stdout);
582		}
583		if (printed)
584			putchar('\n');
585	}
586
587	if (opt_m)
588		regfree(&re);
589
590	return (0);
591}
592