1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright �� 2002, J��rg Wunsch
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * 4.3BSD UI-compatible whereis(1) utility.  Rewritten from scratch
30 * since the original 4.3BSD version suffers legal problems that
31 * prevent it from being redistributed, and since the 4.4BSD version
32 * was pretty inferior in functionality.
33 */
34
35#include <sys/types.h>
36
37__FBSDID("$FreeBSD$");
38
39#include <sys/stat.h>
40#include <sys/sysctl.h>
41
42#include <dirent.h>
43#include <err.h>
44#include <errno.h>
45#include <locale.h>
46#include <regex.h>
47#include <stdio.h>
48#include <stdlib.h>
49#include <string.h>
50#include <sysexits.h>
51#include <unistd.h>
52
53#include "pathnames.h"
54
55#define	NO_BIN_FOUND	1
56#define	NO_MAN_FOUND	2
57#define	NO_SRC_FOUND	4
58
59typedef const char *ccharp;
60
61static int opt_a, opt_b, opt_m, opt_q, opt_s, opt_u, opt_x;
62static ccharp *bindirs, *mandirs, *sourcedirs;
63static char **query;
64
65static const char *sourcepath = PATH_SOURCES;
66
67static char	*colonify(ccharp *);
68static int	 contains(ccharp *, const char *);
69static void	 decolonify(char *, ccharp **, int *);
70static void	 defaults(void);
71static void	 scanopts(int, char **);
72static void	 usage(void);
73
74/*
75 * Throughout this program, a number of strings are dynamically
76 * allocated but never freed.  Their memory is written to when
77 * splitting the strings into string lists which will later be
78 * processed.  Since it's important that those string lists remain
79 * valid even after the functions allocating the memory returned,
80 * those functions cannot free them.  They could be freed only at end
81 * of main(), which is pretty pointless anyway.
82 *
83 * The overall amount of memory to be allocated for processing the
84 * strings is not expected to exceed a few kilobytes.  For that
85 * reason, allocation can usually always be assumed to succeed (within
86 * a virtual memory environment), thus we simply bail out using
87 * abort(3) in case of an allocation failure.
88 */
89
90static void
91usage(void)
92{
93	(void)fprintf(stderr,
94	     "usage: whereis [-abmqsux] [-BMS dir ... -f] program ...\n");
95	exit(EX_USAGE);
96}
97
98/*
99 * Scan options passed to program.
100 *
101 * Note that the -B/-M/-S options expect a list of directory
102 * names that must be terminated with -f.
103 */
104static void
105scanopts(int argc, char **argv)
106{
107	int c, i;
108	ccharp **dirlist;
109
110	while ((c = getopt(argc, argv, "BMSabfmqsux")) != -1)
111		switch (c) {
112		case 'B':
113			dirlist = &bindirs;
114			goto dolist;
115
116		case 'M':
117			dirlist = &mandirs;
118			goto dolist;
119
120		case 'S':
121			dirlist = &sourcedirs;
122		  dolist:
123			i = 0;
124			*dirlist = realloc(*dirlist, (i + 1) * sizeof(char *));
125			(*dirlist)[i] = NULL;
126			while (optind < argc &&
127			       strcmp(argv[optind], "-f") != 0 &&
128			       strcmp(argv[optind], "-B") != 0 &&
129			       strcmp(argv[optind], "-M") != 0 &&
130			       strcmp(argv[optind], "-S") != 0) {
131				decolonify(argv[optind], dirlist, &i);
132				optind++;
133			}
134			break;
135
136		case 'a':
137			opt_a = 1;
138			break;
139
140		case 'b':
141			opt_b = 1;
142			break;
143
144		case 'f':
145			goto breakout;
146
147		case 'm':
148			opt_m = 1;
149			break;
150
151		case 'q':
152			opt_q = 1;
153			break;
154
155		case 's':
156			opt_s = 1;
157			break;
158
159		case 'u':
160			opt_u = 1;
161			break;
162
163		case 'x':
164			opt_x = 1;
165			break;
166
167		default:
168			usage();
169		}
170  breakout:
171	if (optind == argc)
172		usage();
173	query = argv + optind;
174}
175
176/*
177 * Find out whether string `s' is contained in list `cpp'.
178 */
179static int
180contains(ccharp *cpp, const char *s)
181{
182	ccharp cp;
183
184	if (cpp == NULL)
185		return (0);
186
187	while ((cp = *cpp) != NULL) {
188		if (strcmp(cp, s) == 0)
189			return (1);
190		cpp++;
191	}
192	return (0);
193}
194
195/*
196 * Split string `s' at colons, and pass it to the string list pointed
197 * to by `cppp' (which has `*ip' elements).  Note that the original
198 * string is modified by replacing the colon with a NUL byte.  The
199 * partial string is only added if it has a length greater than 0, and
200 * if it's not already contained in the string list.
201 */
202static void
203decolonify(char *s, ccharp **cppp, int *ip)
204{
205	char *cp;
206
207	while ((cp = strchr(s, ':')), *s != '\0') {
208		if (cp)
209			*cp = '\0';
210		if (strlen(s) && !contains(*cppp, s)) {
211			*cppp = realloc(*cppp, (*ip + 2) * sizeof(char *));
212			if (*cppp == NULL)
213				abort();
214			(*cppp)[*ip] = s;
215			(*cppp)[*ip + 1] = NULL;
216			(*ip)++;
217		}
218		if (cp)
219			s = cp + 1;
220		else
221			break;
222	}
223}
224
225/*
226 * Join string list `cpp' into a colon-separated string.
227 */
228static char *
229colonify(ccharp *cpp)
230{
231	size_t s;
232	char *cp;
233	int i;
234
235	if (cpp == NULL)
236		return (0);
237
238	for (s = 0, i = 0; cpp[i] != NULL; i++)
239		s += strlen(cpp[i]) + 1;
240	if ((cp = malloc(s + 1)) == NULL)
241		abort();
242	for (i = 0, *cp = '\0'; cpp[i] != NULL; i++) {
243		strcat(cp, cpp[i]);
244		strcat(cp, ":");
245	}
246	cp[s - 1] = '\0';		/* eliminate last colon */
247
248	return (cp);
249}
250
251/*
252 * Provide defaults for all options and directory lists.
253 */
254static void
255defaults(void)
256{
257	size_t s;
258	char *b, buf[BUFSIZ], *cp;
259	int nele;
260	FILE *p;
261	DIR *dir;
262	struct stat sb;
263	struct dirent *dirp;
264
265	/* default to -bms if none has been specified */
266	if (!opt_b && !opt_m && !opt_s)
267		opt_b = opt_m = opt_s = 1;
268
269	/* -b defaults to default path + /usr/libexec +
270	 * user's path */
271	if (!bindirs) {
272		if (sysctlbyname("user.cs_path", (void *)NULL, &s,
273				 (void *)NULL, 0) == -1)
274			err(EX_OSERR, "sysctlbyname(\"user.cs_path\")");
275		if ((b = malloc(s + 1)) == NULL)
276			abort();
277		if (sysctlbyname("user.cs_path", b, &s, (void *)NULL, 0) == -1)
278			err(EX_OSERR, "sysctlbyname(\"user.cs_path\")");
279		nele = 0;
280		decolonify(b, &bindirs, &nele);
281		bindirs = realloc(bindirs, (nele + 2) * sizeof(char *));
282		if (bindirs == NULL)
283			abort();
284		bindirs[nele++] = PATH_LIBEXEC;
285		bindirs[nele] = NULL;
286		if ((cp = getenv("PATH")) != NULL) {
287			/* don't destroy the original environment... */
288			b = strdup(cp);
289			if (b == NULL)
290				abort();
291			decolonify(b, &bindirs, &nele);
292		}
293	}
294
295	/* -m defaults to $(manpath) */
296	if (!mandirs) {
297		if ((p = popen(MANPATHCMD, "r")) == NULL)
298			err(EX_OSERR, "cannot execute manpath command");
299		if (fgets(buf, BUFSIZ - 1, p) == NULL ||
300		    pclose(p))
301			err(EX_OSERR, "error processing manpath results");
302		if ((b = strchr(buf, '\n')) != NULL)
303			*b = '\0';
304		b = strdup(buf);
305		if (b == NULL)
306			abort();
307		nele = 0;
308		decolonify(b, &mandirs, &nele);
309	}
310
311	/* -s defaults to precompiled list, plus subdirs of /usr/ports */
312	if (!sourcedirs) {
313		b = strdup(sourcepath);
314		if (b == NULL)
315			abort();
316		nele = 0;
317		decolonify(b, &sourcedirs, &nele);
318
319		if (stat(PATH_PORTS, &sb) == -1) {
320			if (errno == ENOENT)
321				/* no /usr/ports, we are done */
322				return;
323			err(EX_OSERR, "stat(" PATH_PORTS ")");
324		}
325		if ((sb.st_mode & S_IFMT) != S_IFDIR)
326			/* /usr/ports is not a directory, ignore */
327			return;
328		if (access(PATH_PORTS, R_OK | X_OK) != 0)
329			return;
330		if ((dir = opendir(PATH_PORTS)) == NULL)
331			err(EX_OSERR, "opendir" PATH_PORTS ")");
332		while ((dirp = readdir(dir)) != NULL) {
333			/*
334			 * Not everything below PATH_PORTS is of
335			 * interest.  First, all dot files and
336			 * directories (e. g. .snap) can be ignored.
337			 * Also, all subdirectories starting with a
338			 * capital letter are not going to be
339			 * examined, as they are used for internal
340			 * purposes (Mk, Tools, ...).  This also
341			 * matches a possible CVS subdirectory.
342			 * Finally, the distfiles subdirectory is also
343			 * special, and should not be considered to
344			 * avoid false matches.
345			 */
346			if (dirp->d_name[0] == '.' ||
347			    /*
348			     * isupper() not used on purpose: the
349			     * check is supposed to default to the C
350			     * locale instead of the current user's
351			     * locale.
352			     */
353			    (dirp->d_name[0] >= 'A' && dirp->d_name[0] <= 'Z') ||
354			    strcmp(dirp->d_name, "distfiles") == 0)
355				continue;
356			if ((b = malloc(sizeof PATH_PORTS + 1 + dirp->d_namlen))
357			    == NULL)
358				abort();
359			strcpy(b, PATH_PORTS);
360			strcat(b, "/");
361			strcat(b, dirp->d_name);
362			if (stat(b, &sb) == -1 ||
363			    (sb.st_mode & S_IFMT) != S_IFDIR ||
364			    access(b, R_OK | X_OK) != 0) {
365				free(b);
366				continue;
367			}
368			sourcedirs = realloc(sourcedirs,
369					     (nele + 2) * sizeof(char *));
370			if (sourcedirs == NULL)
371				abort();
372			sourcedirs[nele++] = b;
373			sourcedirs[nele] = NULL;
374		}
375		closedir(dir);
376	}
377}
378
379int
380main(int argc, char **argv)
381{
382	int unusual, i, printed;
383	char *bin, buf[BUFSIZ], *cp, *cp2, *man, *name, *src;
384	ccharp *dp;
385	size_t nlen, olen, s;
386	struct stat sb;
387	regex_t re, re2;
388	regmatch_t matches[2];
389	regoff_t rlen;
390	FILE *p;
391
392	setlocale(LC_ALL, "");
393
394	scanopts(argc, argv);
395	defaults();
396
397	if (mandirs == NULL)
398		opt_m = 0;
399	if (bindirs == NULL)
400		opt_b = 0;
401	if (sourcedirs == NULL)
402		opt_s = 0;
403	if (opt_m + opt_b + opt_s == 0)
404		errx(EX_DATAERR, "no directories to search");
405
406	if (opt_m) {
407		setenv("MANPATH", colonify(mandirs), 1);
408		if ((i = regcomp(&re, MANWHEREISMATCH, REG_EXTENDED)) != 0) {
409			regerror(i, &re, buf, BUFSIZ - 1);
410			errx(EX_UNAVAILABLE, "regcomp(%s) failed: %s",
411			     MANWHEREISMATCH, buf);
412		}
413	}
414
415	for (; (name = *query) != NULL; query++) {
416		/* strip leading path name component */
417		if ((cp = strrchr(name, '/')) != NULL)
418			name = cp + 1;
419		/* strip SCCS or RCS suffix/prefix */
420		if (strlen(name) > 2 && strncmp(name, "s.", 2) == 0)
421			name += 2;
422		if ((s = strlen(name)) > 2 && strcmp(name + s - 2, ",v") == 0)
423			name[s - 2] = '\0';
424		/* compression suffix */
425		s = strlen(name);
426		if (s > 2 &&
427		    (strcmp(name + s - 2, ".z") == 0 ||
428		     strcmp(name + s - 2, ".Z") == 0))
429			name[s - 2] = '\0';
430		else if (s > 3 &&
431			 strcmp(name + s - 3, ".gz") == 0)
432			name[s - 3] = '\0';
433		else if (s > 4 &&
434			 strcmp(name + s - 4, ".bz2") == 0)
435			name[s - 4] = '\0';
436
437		unusual = 0;
438		bin = man = src = NULL;
439		s = strlen(name);
440
441		if (opt_b) {
442			/*
443			 * Binaries have to match exactly, and must be regular
444			 * executable files.
445			 */
446			unusual = unusual | NO_BIN_FOUND;
447			for (dp = bindirs; *dp != NULL; dp++) {
448				cp = malloc(strlen(*dp) + 1 + s + 1);
449				if (cp == NULL)
450					abort();
451				strcpy(cp, *dp);
452				strcat(cp, "/");
453				strcat(cp, name);
454				if (stat(cp, &sb) == 0 &&
455				    (sb.st_mode & S_IFMT) == S_IFREG &&
456				    (sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))
457				    != 0) {
458					unusual = unusual & ~NO_BIN_FOUND;
459					if (bin == NULL) {
460						bin = strdup(cp);
461					} else {
462						olen = strlen(bin);
463						nlen = strlen(cp);
464						bin = realloc(bin,
465							      olen + nlen + 2);
466						if (bin == NULL)
467							abort();
468						strcat(bin, " ");
469						strcat(bin, cp);
470					}
471					if (!opt_a) {
472						free(cp);
473						break;
474					}
475				}
476				free(cp);
477			}
478		}
479
480		if (opt_m) {
481			/*
482			 * Ask the man command to perform the search for us.
483			 */
484			unusual = unusual | NO_MAN_FOUND;
485			if (opt_a)
486				cp = malloc(sizeof MANWHEREISALLCMD - 2 + s);
487			else
488				cp = malloc(sizeof MANWHEREISCMD - 2 + s);
489
490			if (cp == NULL)
491				abort();
492
493			if (opt_a)
494				sprintf(cp, MANWHEREISALLCMD, name);
495			else
496				sprintf(cp, MANWHEREISCMD, name);
497
498			if ((p = popen(cp, "r")) != NULL) {
499
500				while (fgets(buf, BUFSIZ - 1, p) != NULL) {
501					unusual = unusual & ~NO_MAN_FOUND;
502
503					if ((cp2 = strchr(buf, '\n')) != NULL)
504						*cp2 = '\0';
505					if (regexec(&re, buf, 2,
506						    matches, 0) == 0 &&
507					    (rlen = matches[1].rm_eo -
508					     matches[1].rm_so) > 0) {
509						/*
510						 * man -w found formatted
511						 * page, need to pick up
512						 * source page name.
513						 */
514						cp2 = malloc(rlen + 1);
515						if (cp2 == NULL)
516							abort();
517						memcpy(cp2,
518						       buf + matches[1].rm_so,
519						       rlen);
520						cp2[rlen] = '\0';
521					} else {
522						/*
523						 * man -w found plain source
524						 * page, use it.
525						 */
526						cp2 = strdup(buf);
527						if (cp2 == NULL)
528							abort();
529					}
530
531					if (man == NULL) {
532						man = strdup(cp2);
533					} else {
534						olen = strlen(man);
535						nlen = strlen(cp2);
536						man = realloc(man,
537							      olen + nlen + 2);
538						if (man == NULL)
539							abort();
540						strcat(man, " ");
541						strcat(man, cp2);
542					}
543
544					free(cp2);
545
546					if (!opt_a)
547						break;
548				}
549				pclose(p);
550				free(cp);
551			}
552		}
553
554		if (opt_s) {
555			/*
556			 * Sources match if a subdir with the exact
557			 * name is found.
558			 */
559			unusual = unusual | NO_SRC_FOUND;
560			for (dp = sourcedirs; *dp != NULL; dp++) {
561				cp = malloc(strlen(*dp) + 1 + s + 1);
562				if (cp == NULL)
563					abort();
564				strcpy(cp, *dp);
565				strcat(cp, "/");
566				strcat(cp, name);
567				if (stat(cp, &sb) == 0 &&
568				    (sb.st_mode & S_IFMT) == S_IFDIR) {
569					unusual = unusual & ~NO_SRC_FOUND;
570					if (src == NULL) {
571						src = strdup(cp);
572					} else {
573						olen = strlen(src);
574						nlen = strlen(cp);
575						src = realloc(src,
576							      olen + nlen + 2);
577						if (src == NULL)
578							abort();
579						strcat(src, " ");
580						strcat(src, cp);
581					}
582					if (!opt_a) {
583						free(cp);
584						break;
585					}
586				}
587				free(cp);
588			}
589			/*
590			 * If still not found, ask locate to search it
591			 * for us.  This will find sources for things
592			 * like lpr that are well hidden in the
593			 * /usr/src tree, but takes a lot longer.
594			 * Thus, option -x (`expensive') prevents this
595			 * search.
596			 *
597			 * Do only match locate output that starts
598			 * with one of our source directories, and at
599			 * least one further level of subdirectories.
600			 */
601			if (opt_x || (src && !opt_a))
602				goto done_sources;
603
604			cp = malloc(sizeof LOCATECMD - 2 + s);
605			if (cp == NULL)
606				abort();
607			sprintf(cp, LOCATECMD, name);
608			if ((p = popen(cp, "r")) == NULL)
609				goto done_sources;
610			while ((src == NULL || opt_a) &&
611			       (fgets(buf, BUFSIZ - 1, p)) != NULL) {
612				if ((cp2 = strchr(buf, '\n')) != NULL)
613					*cp2 = '\0';
614				for (dp = sourcedirs;
615				     (src == NULL || opt_a) && *dp != NULL;
616				     dp++) {
617					cp2 = malloc(strlen(*dp) + 9);
618					if (cp2 == NULL)
619						abort();
620					strcpy(cp2, "^");
621					strcat(cp2, *dp);
622					strcat(cp2, "/[^/]+/");
623					if ((i = regcomp(&re2, cp2,
624							 REG_EXTENDED|REG_NOSUB))
625					    != 0) {
626						regerror(i, &re, buf,
627							 BUFSIZ - 1);
628						errx(EX_UNAVAILABLE,
629						     "regcomp(%s) failed: %s",
630						     cp2, buf);
631					}
632					free(cp2);
633					if (regexec(&re2, buf, 0,
634						    (regmatch_t *)NULL, 0)
635					    == 0) {
636						unusual = unusual &
637						          ~NO_SRC_FOUND;
638						if (src == NULL) {
639							src = strdup(buf);
640						} else {
641							olen = strlen(src);
642							nlen = strlen(buf);
643							src = realloc(src,
644								      olen +
645								      nlen + 2);
646							if (src == NULL)
647								abort();
648							strcat(src, " ");
649							strcat(src, buf);
650						}
651					}
652					regfree(&re2);
653				}
654			}
655			pclose(p);
656			free(cp);
657		}
658	  done_sources:
659
660		if (opt_u && !unusual)
661			continue;
662
663		printed = 0;
664		if (!opt_q) {
665			printf("%s:", name);
666			printed++;
667		}
668		if (bin) {
669			if (printed++)
670				putchar(' ');
671			fputs(bin, stdout);
672		}
673		if (man) {
674			if (printed++)
675				putchar(' ');
676			fputs(man, stdout);
677		}
678		if (src) {
679			if (printed++)
680				putchar(' ');
681			fputs(src, stdout);
682		}
683		if (printed)
684			putchar('\n');
685	}
686
687	if (opt_m)
688		regfree(&re);
689
690	return (0);
691}
692