1/*
2 * Copyright � 2002, J�rg Wunsch
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT,
17 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
19 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
22 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
23 * POSSIBILITY OF SUCH DAMAGE.
24 */
25
26/*
27 * 4.3BSD UI-compatible whereis(1) utility.  Rewritten from scratch
28 * since the original 4.3BSD version suffers legal problems that
29 * prevent it from being redistributed, and since the 4.4BSD version
30 * was pretty inferior in functionality.
31 */
32
33#include <sys/types.h>
34
35__FBSDID("$FreeBSD$");
36
37#include <sys/stat.h>
38#include <sys/sysctl.h>
39
40#include <dirent.h>
41#include <err.h>
42#include <errno.h>
43#include <locale.h>
44#include <regex.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <sysexits.h>
49#include <unistd.h>
50
51#include "pathnames.h"
52
53#define	NO_BIN_FOUND	1
54#define	NO_MAN_FOUND	2
55#define	NO_SRC_FOUND	4
56
57typedef const char *ccharp;
58
59int opt_a, opt_b, opt_m, opt_q, opt_s, opt_u, opt_x;
60ccharp *bindirs, *mandirs, *sourcedirs;
61char **query;
62
63const char *sourcepath = PATH_SOURCES;
64
65char	*colonify(ccharp *);
66int	 contains(ccharp *, const char *);
67void	 decolonify(char *, ccharp **, int *);
68void	 defaults(void);
69void	 scanopts(int, char **);
70void	 usage(void);
71
72/*
73 * Throughout this program, a number of strings are dynamically
74 * allocated but never freed.  Their memory is written to when
75 * splitting the strings into string lists which will later be
76 * processed.  Since it's important that those string lists remain
77 * valid even after the functions allocating the memory returned,
78 * those functions cannot free them.  They could be freed only at end
79 * of main(), which is pretty pointless anyway.
80 *
81 * The overall amount of memory to be allocated for processing the
82 * strings is not expected to exceed a few kilobytes.  For that
83 * reason, allocation can usually always be assumed to succeed (within
84 * a virtual memory environment), thus we simply bail out using
85 * abort(3) in case of an allocation failure.
86 */
87
88void
89usage(void)
90{
91	(void)fprintf(stderr,
92	     "usage: whereis [-abmqsux] [-BMS dir ... -f] program ...\n");
93	exit(EX_USAGE);
94}
95
96/*
97 * Scan options passed to program.
98 *
99 * Note that the -B/-M/-S options expect a list of directory
100 * names that must be terminated with -f.
101 */
102void
103scanopts(int argc, char **argv)
104{
105	int c, i;
106	ccharp **dirlist;
107
108	while ((c = getopt(argc, argv, "BMSabfmqsux")) != -1)
109		switch (c) {
110		case 'B':
111			dirlist = &bindirs;
112			goto dolist;
113
114		case 'M':
115			dirlist = &mandirs;
116			goto dolist;
117
118		case 'S':
119			dirlist = &sourcedirs;
120		  dolist:
121			i = 0;
122			*dirlist = realloc(*dirlist, (i + 1) * sizeof(char *));
123			(*dirlist)[i] = NULL;
124			while (optind < argc &&
125			       strcmp(argv[optind], "-f") != 0 &&
126			       strcmp(argv[optind], "-B") != 0 &&
127			       strcmp(argv[optind], "-M") != 0 &&
128			       strcmp(argv[optind], "-S") != 0) {
129				decolonify(argv[optind], dirlist, &i);
130				optind++;
131			}
132			break;
133
134		case 'a':
135			opt_a = 1;
136			break;
137
138		case 'b':
139			opt_b = 1;
140			break;
141
142		case 'f':
143			goto breakout;
144
145		case 'm':
146			opt_m = 1;
147			break;
148
149		case 'q':
150			opt_q = 1;
151			break;
152
153		case 's':
154			opt_s = 1;
155			break;
156
157		case 'u':
158			opt_u = 1;
159			break;
160
161		case 'x':
162			opt_x = 1;
163			break;
164
165		default:
166			usage();
167		}
168  breakout:
169	if (optind == argc)
170		usage();
171	query = argv + optind;
172}
173
174/*
175 * Find out whether string `s' is contained in list `cpp'.
176 */
177int
178contains(ccharp *cpp, const char *s)
179{
180	ccharp cp;
181
182	if (cpp == NULL)
183		return (0);
184
185	while ((cp = *cpp) != NULL) {
186		if (strcmp(cp, s) == 0)
187			return (1);
188		cpp++;
189	}
190	return (0);
191}
192
193/*
194 * Split string `s' at colons, and pass it to the string list pointed
195 * to by `cppp' (which has `*ip' elements).  Note that the original
196 * string is modified by replacing the colon with a NUL byte.  The
197 * partial string is only added if it has a length greater than 0, and
198 * if it's not already contained in the string list.
199 */
200void
201decolonify(char *s, ccharp **cppp, int *ip)
202{
203	char *cp;
204
205	while ((cp = strchr(s, ':')), *s != '\0') {
206		if (cp)
207			*cp = '\0';
208		if (strlen(s) && !contains(*cppp, s)) {
209			*cppp = realloc(*cppp, (*ip + 2) * sizeof(char *));
210			if (cppp == NULL)
211				abort();
212			(*cppp)[*ip] = s;
213			(*cppp)[*ip + 1] = NULL;
214			(*ip)++;
215		}
216		if (cp)
217			s = cp + 1;
218		else
219			break;
220	}
221}
222
223/*
224 * Join string list `cpp' into a colon-separated string.
225 */
226char *
227colonify(ccharp *cpp)
228{
229	size_t s;
230	char *cp;
231	int i;
232
233	if (cpp == NULL)
234		return (0);
235
236	for (s = 0, i = 0; cpp[i] != NULL; i++)
237		s += strlen(cpp[i]) + 1;
238	if ((cp = malloc(s + 1)) == NULL)
239		abort();
240	for (i = 0, *cp = '\0'; cpp[i] != NULL; i++) {
241		strcat(cp, cpp[i]);
242		strcat(cp, ":");
243	}
244	cp[s - 1] = '\0';		/* eliminate last colon */
245
246	return (cp);
247}
248
249/*
250 * Provide defaults for all options and directory lists.
251 */
252void
253defaults(void)
254{
255	size_t s;
256	char *b, buf[BUFSIZ], *cp;
257	int nele;
258	FILE *p;
259	DIR *dir;
260	struct stat sb;
261	struct dirent *dirp;
262
263	/* default to -bms if none has been specified */
264	if (!opt_b && !opt_m && !opt_s)
265		opt_b = opt_m = opt_s = 1;
266
267	/* -b defaults to default path + /usr/libexec +
268	 * /usr/games + user's path */
269	if (!bindirs) {
270		if (sysctlbyname("user.cs_path", (void *)NULL, &s,
271				 (void *)NULL, 0) == -1)
272			err(EX_OSERR, "sysctlbyname(\"user.cs_path\")");
273		if ((b = malloc(s + 1)) == NULL)
274			abort();
275		if (sysctlbyname("user.cs_path", b, &s, (void *)NULL, 0) == -1)
276			err(EX_OSERR, "sysctlbyname(\"user.cs_path\")");
277		nele = 0;
278		decolonify(b, &bindirs, &nele);
279		bindirs = realloc(bindirs, (nele + 3) * sizeof(char *));
280		if (bindirs == NULL)
281			abort();
282		bindirs[nele++] = PATH_LIBEXEC;
283		bindirs[nele++] = PATH_GAMES;
284		bindirs[nele] = NULL;
285		if ((cp = getenv("PATH")) != NULL) {
286			/* don't destroy the original environment... */
287			if ((b = malloc(strlen(cp) + 1)) == NULL)
288				abort();
289			strcpy(b, cp);
290			decolonify(b, &bindirs, &nele);
291		}
292	}
293
294	/* -m defaults to $(manpath) */
295	if (!mandirs) {
296		if ((p = popen(MANPATHCMD, "r")) == NULL)
297			err(EX_OSERR, "cannot execute manpath command");
298		if (fgets(buf, BUFSIZ - 1, p) == NULL ||
299		    pclose(p))
300			err(EX_OSERR, "error processing manpath results");
301		if ((b = strchr(buf, '\n')) != NULL)
302			*b = '\0';
303		if ((b = malloc(strlen(buf) + 1)) == NULL)
304			abort();
305		strcpy(b, buf);
306		nele = 0;
307		decolonify(b, &mandirs, &nele);
308	}
309
310	/* -s defaults to precompiled list, plus subdirs of /usr/ports */
311	if (!sourcedirs) {
312		if ((b = malloc(strlen(sourcepath) + 1)) == NULL)
313			abort();
314		strcpy(b, sourcepath);
315		nele = 0;
316		decolonify(b, &sourcedirs, &nele);
317
318		if (stat(PATH_PORTS, &sb) == -1) {
319			if (errno == ENOENT)
320				/* no /usr/ports, we are done */
321				return;
322			err(EX_OSERR, "stat(" PATH_PORTS ")");
323		}
324		if ((sb.st_mode & S_IFMT) != S_IFDIR)
325			/* /usr/ports is not a directory, ignore */
326			return;
327		if (access(PATH_PORTS, R_OK | X_OK) != 0)
328			return;
329		if ((dir = opendir(PATH_PORTS)) == NULL)
330			err(EX_OSERR, "opendir" PATH_PORTS ")");
331		while ((dirp = readdir(dir)) != NULL) {
332			/*
333			 * Not everything below PATH_PORTS is of
334			 * interest.  First, all dot files and
335			 * directories (e. g. .snap) can be ignored.
336			 * Also, all subdirectories starting with a
337			 * capital letter are not going to be
338			 * examined, as they are used for internal
339			 * purposes (Mk, Tools, ...).  This also
340			 * matches a possible CVS subdirectory.
341			 * Finally, the distfiles subdirectory is also
342			 * special, and should not be considered to
343			 * avoid false matches.
344			 */
345			if (dirp->d_name[0] == '.' ||
346			    /*
347			     * isupper() not used on purpose: the
348			     * check is supposed to default to the C
349			     * locale instead of the current user's
350			     * locale.
351			     */
352			    (dirp->d_name[0] >= 'A' && dirp->d_name[0] <= 'Z') ||
353			    strcmp(dirp->d_name, "distfiles") == 0)
354				continue;
355			if ((b = malloc(sizeof PATH_PORTS + 1 + dirp->d_namlen))
356			    == NULL)
357				abort();
358			strcpy(b, PATH_PORTS);
359			strcat(b, "/");
360			strcat(b, dirp->d_name);
361			if (stat(b, &sb) == -1 ||
362			    (sb.st_mode & S_IFMT) != S_IFDIR ||
363			    access(b, R_OK | X_OK) != 0) {
364				free(b);
365				continue;
366			}
367			sourcedirs = realloc(sourcedirs,
368					     (nele + 2) * sizeof(char *));
369			if (sourcedirs == NULL)
370				abort();
371			sourcedirs[nele++] = b;
372			sourcedirs[nele] = NULL;
373		}
374		closedir(dir);
375	}
376}
377
378int
379main(int argc, char **argv)
380{
381	int unusual, i, printed;
382	char *bin, buf[BUFSIZ], *cp, *cp2, *man, *name, *src;
383	ccharp *dp;
384	size_t nlen, olen, s;
385	struct stat sb;
386	regex_t re, re2;
387	regmatch_t matches[2];
388	regoff_t rlen;
389	FILE *p;
390
391	setlocale(LC_ALL, "");
392
393	scanopts(argc, argv);
394	defaults();
395
396	if (mandirs == NULL)
397		opt_m = 0;
398	if (bindirs == NULL)
399		opt_b = 0;
400	if (sourcedirs == NULL)
401		opt_s = 0;
402	if (opt_m + opt_b + opt_s == 0)
403		errx(EX_DATAERR, "no directories to search");
404
405	if (opt_m) {
406		setenv("MANPATH", colonify(mandirs), 1);
407		if ((i = regcomp(&re, MANWHEREISMATCH, REG_EXTENDED)) != 0) {
408			regerror(i, &re, buf, BUFSIZ - 1);
409			errx(EX_UNAVAILABLE, "regcomp(%s) failed: %s",
410			     MANWHEREISMATCH, buf);
411		}
412	}
413
414	for (; (name = *query) != NULL; query++) {
415		/* strip leading path name component */
416		if ((cp = strrchr(name, '/')) != NULL)
417			name = cp + 1;
418		/* strip SCCS or RCS suffix/prefix */
419		if (strlen(name) > 2 && strncmp(name, "s.", 2) == 0)
420			name += 2;
421		if ((s = strlen(name)) > 2 && strcmp(name + s - 2, ",v") == 0)
422			name[s - 2] = '\0';
423		/* compression suffix */
424		s = strlen(name);
425		if (s > 2 &&
426		    (strcmp(name + s - 2, ".z") == 0 ||
427		     strcmp(name + s - 2, ".Z") == 0))
428			name[s - 2] = '\0';
429		else if (s > 3 &&
430			 strcmp(name + s - 3, ".gz") == 0)
431			name[s - 3] = '\0';
432		else if (s > 4 &&
433			 strcmp(name + s - 4, ".bz2") == 0)
434			name[s - 4] = '\0';
435
436		unusual = 0;
437		bin = man = src = NULL;
438		s = strlen(name);
439
440		if (opt_b) {
441			/*
442			 * Binaries have to match exactly, and must be regular
443			 * executable files.
444			 */
445			unusual = unusual | NO_BIN_FOUND;
446			for (dp = bindirs; *dp != NULL; dp++) {
447				cp = malloc(strlen(*dp) + 1 + s + 1);
448				if (cp == NULL)
449					abort();
450				strcpy(cp, *dp);
451				strcat(cp, "/");
452				strcat(cp, name);
453				if (stat(cp, &sb) == 0 &&
454				    (sb.st_mode & S_IFMT) == S_IFREG &&
455				    (sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))
456				    != 0) {
457					unusual = unusual & ~NO_BIN_FOUND;
458					if (bin == NULL) {
459						bin = strdup(cp);
460					} else {
461						olen = strlen(bin);
462						nlen = strlen(cp);
463						bin = realloc(bin,
464							      olen + nlen + 2);
465						if (bin == 0)
466							abort();
467						strcat(bin, " ");
468						strcat(bin, cp);
469					}
470					if (!opt_a) {
471						free(cp);
472						break;
473					}
474				}
475				free(cp);
476			}
477		}
478
479		if (opt_m) {
480			/*
481			 * Ask the man command to perform the search for us.
482			 */
483			unusual = unusual | NO_MAN_FOUND;
484			if (opt_a)
485				cp = malloc(sizeof MANWHEREISALLCMD - 2 + s);
486			else
487				cp = malloc(sizeof MANWHEREISCMD - 2 + s);
488
489			if (cp == NULL)
490				abort();
491
492			if (opt_a)
493				sprintf(cp, MANWHEREISALLCMD, name);
494			else
495				sprintf(cp, MANWHEREISCMD, name);
496
497			if ((p = popen(cp, "r")) != NULL) {
498
499				while (fgets(buf, BUFSIZ - 1, p) != NULL) {
500					unusual = unusual & ~NO_MAN_FOUND;
501
502					if ((cp2 = strchr(buf, '\n')) != NULL)
503						*cp2 = '\0';
504					if (regexec(&re, buf, 2,
505						    matches, 0) == 0 &&
506					    (rlen = matches[1].rm_eo -
507					     matches[1].rm_so) > 0) {
508						/*
509						 * man -w found formated
510						 * page, need to pick up
511						 * source page name.
512						 */
513						cp2 = malloc(rlen + 1);
514						if (cp2 == NULL)
515							abort();
516						memcpy(cp2,
517						       buf + matches[1].rm_so,
518						       rlen);
519						cp2[rlen] = '\0';
520					} else {
521						/*
522						 * man -w found plain source
523						 * page, use it.
524						 */
525						s = strlen(buf);
526						cp2 = malloc(s + 1);
527						if (cp2 == NULL)
528							abort();
529						strcpy(cp2, buf);
530					}
531
532					if (man == NULL) {
533						man = strdup(cp2);
534					} else {
535						olen = strlen(man);
536						nlen = strlen(cp2);
537						man = realloc(man,
538							      olen + nlen + 2);
539						if (man == 0)
540							abort();
541						strcat(man, " ");
542						strcat(man, cp2);
543					}
544
545					free(cp2);
546
547					if (!opt_a)
548						break;
549				}
550				pclose(p);
551				free(cp);
552			}
553		}
554
555		if (opt_s) {
556			/*
557			 * Sources match if a subdir with the exact
558			 * name is found.
559			 */
560			unusual = unusual | NO_SRC_FOUND;
561			for (dp = sourcedirs; *dp != NULL; dp++) {
562				cp = malloc(strlen(*dp) + 1 + s + 1);
563				if (cp == NULL)
564					abort();
565				strcpy(cp, *dp);
566				strcat(cp, "/");
567				strcat(cp, name);
568				if (stat(cp, &sb) == 0 &&
569				    (sb.st_mode & S_IFMT) == S_IFDIR) {
570					unusual = unusual & ~NO_SRC_FOUND;
571					if (src == NULL) {
572						src = strdup(cp);
573					} else {
574						olen = strlen(src);
575						nlen = strlen(cp);
576						src = realloc(src,
577							      olen + nlen + 2);
578						if (src == 0)
579							abort();
580						strcat(src, " ");
581						strcat(src, cp);
582					}
583					if (!opt_a) {
584						free(cp);
585						break;
586					}
587				}
588				free(cp);
589			}
590			/*
591			 * If still not found, ask locate to search it
592			 * for us.  This will find sources for things
593			 * like lpr that are well hidden in the
594			 * /usr/src tree, but takes a lot longer.
595			 * Thus, option -x (`expensive') prevents this
596			 * search.
597			 *
598			 * Do only match locate output that starts
599			 * with one of our source directories, and at
600			 * least one further level of subdirectories.
601			 */
602			if (opt_x || (src && !opt_a))
603				goto done_sources;
604
605			cp = malloc(sizeof LOCATECMD - 2 + s);
606			if (cp == NULL)
607				abort();
608			sprintf(cp, LOCATECMD, name);
609			if ((p = popen(cp, "r")) == NULL)
610				goto done_sources;
611			while ((src == NULL || opt_a) &&
612			       (fgets(buf, BUFSIZ - 1, p)) != NULL) {
613				if ((cp2 = strchr(buf, '\n')) != NULL)
614					*cp2 = '\0';
615				for (dp = sourcedirs;
616				     (src == NULL || opt_a) && *dp != NULL;
617				     dp++) {
618					cp2 = malloc(strlen(*dp) + 9);
619					if (cp2 == NULL)
620						abort();
621					strcpy(cp2, "^");
622					strcat(cp2, *dp);
623					strcat(cp2, "/[^/]+/");
624					if ((i = regcomp(&re2, cp2,
625							 REG_EXTENDED|REG_NOSUB))
626					    != 0) {
627						regerror(i, &re, buf,
628							 BUFSIZ - 1);
629						errx(EX_UNAVAILABLE,
630						     "regcomp(%s) failed: %s",
631						     cp2, buf);
632					}
633					free(cp2);
634					if (regexec(&re2, buf, 0,
635						    (regmatch_t *)NULL, 0)
636					    == 0) {
637						unusual = unusual &
638						          ~NO_SRC_FOUND;
639						if (src == NULL) {
640							src = strdup(buf);
641						} else {
642							olen = strlen(src);
643							nlen = strlen(buf);
644							src = realloc(src,
645								      olen +
646								      nlen + 2);
647							if (src == 0)
648								abort();
649							strcat(src, " ");
650							strcat(src, buf);
651						}
652					}
653					regfree(&re2);
654				}
655			}
656			pclose(p);
657			free(cp);
658		}
659	  done_sources:
660
661		if (opt_u && !unusual)
662			continue;
663
664		printed = 0;
665		if (!opt_q) {
666			printf("%s:", name);
667			printed++;
668		}
669		if (bin) {
670			if (printed++)
671				putchar(' ');
672			fputs(bin, stdout);
673		}
674		if (man) {
675			if (printed++)
676				putchar(' ');
677			fputs(man, stdout);
678		}
679		if (src) {
680			if (printed++)
681				putchar(' ');
682			fputs(src, stdout);
683		}
684		if (printed)
685			putchar('\n');
686	}
687
688	if (opt_m)
689		regfree(&re);
690
691	return (0);
692}
693