1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Portions Copyright 2007-2013 Apple Inc.
29 */
30
31#pragma ident	"@(#)autod_nfs.c	1.126	05/06/08 SMI"
32
33#include <stdio.h>
34#include <unistd.h>
35#include <stdlib.h>
36#include <ctype.h>
37#include <syslog.h>
38#include <string.h>
39#include <deflt.h>
40#include <sys/param.h>
41#include <sys/types.h>
42#include <sys/time.h>
43#include <sys/stat.h>
44#include <sys/wait.h>
45#include <sys/socket.h>
46#include <netinet/in.h>
47#include <signal.h>
48#include <sys/signal.h>
49#include <oncrpc/rpc.h>
50#include <arpa/inet.h>
51#include <netdb.h>
52#include <errno.h>
53#include "mount.h"
54#include <mntopts.h>
55#include <locale.h>
56#include <setjmp.h>
57#include <sys/socket.h>
58#include <pthread.h>
59#include <limits.h>
60#include <sys/sockio.h>
61#include <net/if.h>
62#include <ifaddrs.h>
63#include <assert.h>
64
65#include "autofs_types.h"
66#include "automount.h"
67#include "auto_mntopts.h"
68#include "replica.h"
69#include "nfs.h"
70#include "nfs_subr.h"
71
72#include "umount_by_fsid.h"
73
74#define	MAXHOSTS	512
75
76/*
77 * host cache states
78 */
79#define	NOHOST		0	/* host not found in the cache */
80#define	GOODHOST	1	/* host was OK last time we checked */
81#define	DEADHOST	2	/* host was dead last time we checked */
82#define	NXHOST		3	/* host didn't exist last time we checked */
83
84struct cache_entry {
85	struct	cache_entry *cache_next;
86	char	*cache_host;
87	time_t	cache_time;
88	int	cache_state;
89	rpcvers_t cache_reqvers;
90	rpcvers_t cache_outvers;
91	char	*cache_proto;
92};
93
94static struct cache_entry *cache_head = NULL;
95pthread_rwlock_t cache_lock;	/* protect the cache chain */
96
97static int nfsmount(struct mapfs *, char *, char *, boolean_t, fsid_t ,
98		    au_asid_t, fsid_t *, uint32_t *);
99#ifdef HAVE_LOFS
100static int is_nfs_port(char *);
101#endif
102
103static struct mapfs *enum_servers(struct mapent *, char *);
104static struct mapfs *get_mysubnet_servers(struct mapfs *);
105static int subnet_test(int, int, char *);
106
107struct mapfs *add_mfs(struct mapfs *, int, struct mapfs **, struct mapfs **);
108void free_mfs(struct mapfs *);
109static void dump_mfs(struct mapfs *, char *, int);
110static char *dump_distance(struct mapfs *);
111static void cache_free(struct cache_entry *);
112static int cache_check(const char *, rpcvers_t *, const char *);
113static void cache_enter(const char *, rpcvers_t, rpcvers_t, const char *, int);
114
115#ifdef CACHE_DEBUG
116static void trace_host_cache();
117#endif /* CACHE_DEBUG */
118
119static int rpc_timeout = 20;
120
121#ifdef CACHE_DEBUG
122/*
123 * host cache counters. These variables do not need to be protected
124 * by mutex's. They have been added to measure the utility of the
125 * goodhost/deadhost cache in the lazy hierarchical mounting scheme.
126 */
127static int host_cache_accesses = 0;
128static int host_cache_lookups = 0;
129static int nxhost_cache_hits = 0;
130static int deadhost_cache_hits = 0;
131static int goodhost_cache_hits = 0;
132#endif /* CACHE_DEBUG */
133
134/*
135 * There are the defaults (range) for the client when determining
136 * which NFS version to use when probing the server (see above).
137 * These will only be used when the vers mount option is not used and
138 * these may be reset if /etc/default/nfs is configured to do so.
139 */
140static rpcvers_t vers_max_default = NFS_VER3;
141static rpcvers_t vers_min_default = NFS_VER2;
142
143int
144mount_nfs(struct mapent *me, char *mntpnt, char *prevhost, boolean_t isdirect,
145	  fsid_t mntpnt_fsid, au_asid_t asid, fsid_t *fsidp,
146	  uint32_t *retflags)
147{
148#ifdef HAVE_LOFS
149	struct mapfs *mfs, *mp;
150#else
151	struct mapfs *mfs;
152#endif
153	int err = -1;
154
155	mfs = enum_servers(me, prevhost);
156	if (mfs == NULL)
157		return (ENOENT);
158
159#ifdef HAVE_LOFS
160	/*
161	 * Try loopback if we have something on localhost; if nothing
162	 * works, we will fall back to NFS
163	 */
164	if (is_nfs_port(me->map_mntopts)) {
165		for (mp = mfs; mp; mp = mp->mfs_next) {
166			if (self_check(mp->mfs_host)) {
167				err = loopbackmount(mp->mfs_dir,
168					mntpnt, me->map_mntopts);
169				if (err) {
170					mp->mfs_ignore = 1;
171				} else {
172					break;
173				}
174			}
175		}
176	}
177#endif
178	if (err) {
179		err = nfsmount(mfs, mntpnt, me->map_mntopts, isdirect,
180			       mntpnt_fsid, asid, fsidp, retflags);
181		if (err && trace > 1) {
182			trace_prt(1, "	Couldn't mount %s:%s, err=%d\n",
183				mfs->mfs_host, mfs->mfs_dir, err);
184		}
185	}
186	free_mfs(mfs);
187	return (err);
188}
189
190struct aftype {
191	int	afnum;
192	char	*name;
193};
194
195static struct mapfs *
196get_mysubnet_servers(struct mapfs *mfs_in)
197{
198	struct mapfs *mfs, *p, *mfs_head = NULL, *mfs_tail = NULL;
199
200	static const struct aftype aflist[] = {
201		{ AF_INET, "IPv4" },
202#ifdef HAVE_IPV6_SUPPORT
203		{ AF_INET6, "IPv6" }
204#endif
205	};
206#define N_AFS	(sizeof aflist / sizeof aflist[0])
207	struct hostent *hp;
208	char **nb;
209	int res;
210	int af;
211	int err;
212	u_int i;
213
214	for (mfs = mfs_in; mfs; mfs = mfs->mfs_next) {
215		for (i = 0; i < N_AFS; i++) {
216			af = aflist[i].afnum;
217			hp = getipnodebyname(mfs->mfs_host, af, AI_DEFAULT, &err);
218			if (hp == NULL)
219				continue;
220			if (hp->h_addrtype != af) {
221				freehostent(hp);
222				continue;
223			}
224
225			/*
226			 * For each address for this host see if it's on our
227			 * local subnet.
228			 */
229
230			res = 0;
231			for (nb = &hp->h_addr_list[0]; *nb != NULL; nb++) {
232				if ((res = subnet_test(af, hp->h_length, *nb)) != 0) {
233					p = add_mfs(mfs, DIST_MYNET,
234						&mfs_head, &mfs_tail);
235					if (!p) {
236						freehostent(hp);
237						return (NULL);
238					}
239					break;
240				}
241			}  /* end of every host address */
242			if (trace > 2) {
243				trace_prt(1, "get_mysubnet_servers: host=%s "
244					"netid=%s res=%s\n", mfs->mfs_host,
245					aflist[i].name, res == 1?"SUC":"FAIL");
246			}
247
248			freehostent(hp);
249		} /* end of while */
250
251	} /* end of every map */
252
253	return (mfs_head);
254
255}
256
257/*
258 * XXX - there's no SIOC to get at in_localaddr() or in6_localaddr();
259 * we might have to do getaddrlist() and reimplement it ourselves.
260 * Note that the answer can change over time....
261 */
262static int
263masked_eq(char *a, char *b, char *mask, int len)
264{
265	char *masklim;
266
267	masklim = mask + len;
268
269	for (; mask < masklim; mask++) {
270		if ((*a++ ^ *b++) & *mask)
271			break;
272	}
273	return (mask == masklim);
274}
275
276static int
277subnet_test(int af, int len, char *addr)
278{
279	struct ifaddrs *ifalist, *ifa;
280	char *if_inaddr, *if_inmask, *if_indstaddr;
281
282	if (getifaddrs(&ifalist))
283		return (0);
284
285	for (ifa = ifalist; ifa != NULL; ifa = ifa->ifa_next) {
286		if (ifa->ifa_addr->sa_family != af)
287			continue;
288		if_inaddr = (af == AF_INET) ?
289		    (char *) &(((struct sockaddr_in *)(ifa->ifa_addr))->sin_addr) :
290		    (char *) &(((struct sockaddr_in6 *)(ifa->ifa_addr))->sin6_addr);
291		if (ifa->ifa_dstaddr) {
292			if_indstaddr = (af == AF_INET) ?
293			     (char *) &(((struct sockaddr_in *)(ifa->ifa_dstaddr))->sin_addr) :
294			     (char *) &(((struct sockaddr_in6 *)(ifa->ifa_dstaddr))->sin6_addr);
295		} else
296			if_indstaddr = NULL;
297
298		if (ifa->ifa_netmask == NULL) {
299			if (memcmp(if_inaddr, addr, len) == 0 ||
300			    (if_indstaddr && memcmp(if_indstaddr, addr, len) == 0)) {
301				freeifaddrs(ifalist);
302				return (1);
303			}
304		} else {
305			if_inmask = (af == AF_INET) ?
306			    (char *) &(((struct sockaddr_in *)(ifa->ifa_netmask))->sin_addr) :
307			    (char *) &(((struct sockaddr_in6 *)(ifa->ifa_netmask))->sin6_addr);
308			if (ifa->ifa_flags & IFF_POINTOPOINT) {
309				if (if_indstaddr && memcmp(if_indstaddr, addr, len) == 0) {
310					freeifaddrs(ifalist);
311					return (1);
312				}
313			} else {
314				if (masked_eq(if_inaddr, addr, if_inmask, len)) {
315					freeifaddrs(ifalist);
316					return (1);
317				}
318			}
319		}
320	}
321	freeifaddrs(ifalist);
322	return (0);
323}
324
325/*
326 * ping a bunch of hosts at once and sort by who responds first
327 */
328static struct mapfs *
329sort_servers(struct mapfs *mfs_in, int timeout)
330{
331	struct mapfs *m1 = NULL;
332	enum clnt_stat clnt_stat;
333
334	if (!mfs_in)
335		return (NULL);
336
337	clnt_stat = nfs_cast(mfs_in, &m1, timeout);
338
339	if (!m1) {
340		char buff[2048] = {'\0'};
341		const char *ellipsis = "";
342
343		for (m1 = mfs_in; m1; m1 = m1->mfs_next) {
344			if (strlcat(buff, m1->mfs_host, sizeof buff) >=
345			    sizeof buff) {
346				ellipsis = "...";
347				break;
348			}
349			if (m1->mfs_next) {
350				if (strlcat(buff, ",", sizeof buff) >=
351				    sizeof buff) {
352					ellipsis = "...";
353					break;
354				}
355			}
356		}
357
358		syslog(LOG_ERR, "servers %s%s not responding: %s",
359			buff, ellipsis, clnt_sperrno(clnt_stat));
360	}
361
362	return (m1);
363}
364
365/*
366 * Add a mapfs entry to the list described by *mfs_head and *mfs_tail,
367 * provided it is not marked "ignored" and isn't a dupe of ones we've
368 * already seen.
369 */
370struct mapfs *
371add_mfs(struct mapfs *mfs, int distance, struct mapfs **mfs_head,
372	struct mapfs **mfs_tail)
373{
374	struct mapfs *tmp, *new;
375	void bcopy();
376
377	for (tmp = *mfs_head; tmp; tmp = tmp->mfs_next)
378		if ((strcmp(tmp->mfs_host, mfs->mfs_host) == 0 &&
379		    strcmp(tmp->mfs_dir, mfs->mfs_dir) == 0) ||
380			mfs->mfs_ignore)
381			return (*mfs_head);
382	new = (struct mapfs *)malloc(sizeof (struct mapfs));
383	if (!new) {
384		syslog(LOG_ERR, "Memory allocation failed: %m");
385		return (NULL);
386	}
387	bcopy(mfs, new, sizeof (struct mapfs));
388	new->mfs_next = NULL;
389	if (distance)
390		new->mfs_distance = distance;
391	if (!*mfs_head)
392		*mfs_tail = *mfs_head = new;
393	else {
394		(*mfs_tail)->mfs_next = new;
395		*mfs_tail = new;
396	}
397	return (*mfs_head);
398}
399
400static void
401dump_mfs(struct mapfs *mfs, char *message, int level)
402{
403	struct mapfs *m1;
404
405	if (trace <= level)
406		return;
407
408	trace_prt(1, "%s", message);
409	if (!mfs) {
410		trace_prt(0, "mfs is null\n");
411		return;
412	}
413	for (m1 = mfs; m1; m1 = m1->mfs_next)
414		trace_prt(0, "\t%s[%s] ", m1->mfs_host, dump_distance(m1));
415	trace_prt(0, "\n");
416}
417
418static char *
419dump_distance(struct mapfs *mfs)
420{
421	switch (mfs->mfs_distance) {
422	case 0:			return ("zero");
423	case DIST_SELF:		return ("self");
424	case DIST_MYSUB:	return ("mysub");
425	case DIST_MYNET:	return ("mynet");
426	case DIST_OTHER:	return ("other");
427	default:		return ("other");
428	}
429}
430
431/*
432 * Walk linked list "raw", building a new list consisting of members
433 * NOT found in list "filter", returning the result.
434 */
435static struct mapfs *
436filter_mfs(struct mapfs *raw, struct mapfs *filter)
437{
438	struct mapfs *mfs, *p, *mfs_head = NULL, *mfs_tail = NULL;
439	int skip;
440
441	if (!raw)
442		return (NULL);
443	for (mfs = raw; mfs; mfs = mfs->mfs_next) {
444		for (skip = 0, p = filter; p; p = p->mfs_next) {
445			if (strcmp(p->mfs_host, mfs->mfs_host) == 0 &&
446			    strcmp(p->mfs_dir, mfs->mfs_dir) == 0) {
447				skip = 1;
448				break;
449			}
450		}
451		if (skip)
452			continue;
453		p = add_mfs(mfs, 0, &mfs_head, &mfs_tail);
454		if (!p)
455			return (NULL);
456	}
457	return (mfs_head);
458}
459
460/*
461 * Walk a linked list of mapfs structs, freeing each member.
462 */
463void
464free_mfs(struct mapfs *mfs)
465{
466	struct mapfs *tmp;
467
468	while (mfs) {
469		tmp = mfs->mfs_next;
470		free(mfs);
471		mfs = tmp;
472	}
473}
474
475/*
476 * New code for NFS client failover: we need to carry and sort
477 * lists of server possibilities rather than return a single
478 * entry.  It preserves previous behaviour of sorting first by
479 * locality (loopback-or-preferred/subnet/net/other) and then
480 * by ping times.  We'll short-circuit this process when we
481 * have ENOUGH or more entries.
482 */
483static struct mapfs *
484enum_servers(struct mapent *me, char *preferred)
485{
486	struct mapfs *p, *m1, *m2, *mfs_head = NULL, *mfs_tail = NULL;
487
488	/*
489	 * Short-circuit for simple cases.
490	 */
491	if (!me->map_fs->mfs_next) {
492		p = add_mfs(me->map_fs, DIST_OTHER, &mfs_head, &mfs_tail);
493		if (!p)
494			return (NULL);
495		return (mfs_head);
496	}
497
498	dump_mfs(me->map_fs, "	enum_servers: mapent: ", 2);
499
500	/*
501	 * get addresses & see if any are myself
502	 * or were mounted from previously in a
503	 * hierarchical mount.
504	 */
505	if (trace > 2)
506		trace_prt(1, "	enum_servers: looking for pref/self\n");
507	for (m1 = me->map_fs; m1; m1 = m1->mfs_next) {
508		if (m1->mfs_ignore)
509			continue;
510		if (self_check(m1->mfs_host) ||
511		    strcmp(m1->mfs_host, preferred) == 0) {
512			p = add_mfs(m1, DIST_SELF, &mfs_head, &mfs_tail);
513			if (!p)
514				return (NULL);
515		}
516	}
517	if (trace > 2 && m1)
518		trace_prt(1, "	enum_servers: pref/self found, %s\n",
519			m1->mfs_host);
520
521	/*
522	 * look for entries on this subnet
523	 */
524	dump_mfs(me->map_fs, "	enum_servers: input of get_mysubnet_servers: ", 2);
525	m1 = get_mysubnet_servers(me->map_fs);
526	dump_mfs(m1, "	enum_servers: output of get_mysubnet_servers: ", 3);
527	if (m1 && m1->mfs_next) {
528		m2 = sort_servers(m1, rpc_timeout / 2);
529		dump_mfs(m2, "	enum_servers: output of sort_servers: ", 3);
530		free_mfs(m1);
531		m1 = m2;
532	}
533
534	for (m2 = m1; m2; m2 = m2->mfs_next) {
535		p = add_mfs(m2, 0, &mfs_head, &mfs_tail);
536		if (!p)
537			return (NULL);
538	}
539	if (m1)
540		free_mfs(m1);
541
542	/*
543	 * add the rest of the entries at the end
544	 */
545	m1 = filter_mfs(me->map_fs, mfs_head);
546	dump_mfs(m1, "	enum_servers: etc: output of filter_mfs: ", 3);
547	m2 = sort_servers(m1, rpc_timeout / 2);
548	dump_mfs(m2, "	enum_servers: etc: output of sort_servers: ", 3);
549	if (m1)
550		free_mfs(m1);
551	m1 = m2;
552	for (m2 = m1; m2; m2 = m2->mfs_next) {
553		p = add_mfs(m2, DIST_OTHER, &mfs_head, &mfs_tail);
554		if (!p)
555			return (NULL);
556	}
557	if (m1)
558		free_mfs(m1);
559
560	dump_mfs(mfs_head, "  enum_servers: output: ", 1);
561	return (mfs_head);
562}
563
564static const struct mntopt mopts_nfs[] = {
565	MOPT_NFS
566};
567
568static int
569nfsmount(struct mapfs *mfs_in, char *mntpnt, char *opts, boolean_t isdirect,
570	 fsid_t mntpnt_fsid, au_asid_t asid, fsid_t *fsidp,
571	 uint32_t *retflags)
572{
573	mntoptparse_t mp;
574	int flags, altflags;
575	struct stat stbuf;
576	rpcvers_t vers, versmin; /* used to negotiate nfs version in pingnfs */
577				/* and mount version with mountd */
578	rpcvers_t nfsvers;	/* version in map options, 0 if not there */
579	long optval;
580	static time_t prevmsg = 0;
581
582	int i;
583	char *nfs_proto = NULL;
584	long nfs_port = 0;
585	char *host, *dir;
586	struct mapfs *mfs = NULL;
587	int last_error = 0;
588	int replicated;
589	int entries = 0;
590	int v2cnt = 0, v3cnt = 0, v4cnt = 0;
591	int v2near = 0, v3near = 0, v4near = 0;
592	char *mount_resource = NULL;
593	int mrlen = 0;
594	ushort_t thisport;
595
596	dump_mfs(mfs_in, "  nfsmount: input: ", 2);
597	replicated = (mfs_in->mfs_next != NULL);
598
599	if (trace > 1) {
600		trace_prt(1, "	nfsmount: mount on %s %s:\n",
601			mntpnt, opts);
602		for (mfs = mfs_in; mfs; mfs = mfs->mfs_next)
603			trace_prt(1, "	  %s:%s\n",
604				mfs->mfs_host, mfs->mfs_dir);
605	}
606
607	/*
608	 * Make sure mountpoint is safe to mount on
609	 *
610	 * XXX - if we do a stat() on the mount point of a direct
611	 * mount, that'll trigger the mount, so do that only for
612	 * an indirect mount.
613	 *
614	 * XXX - why bother doing it at all?  Won't the program
615	 * we run just fail if it doesn't exist?
616	 */
617	if (!isdirect && lstat(mntpnt, &stbuf) < 0) {
618		syslog(LOG_ERR, "Couldn't stat %s: %m", mntpnt);
619		return (ENOENT);
620	}
621
622	/*
623	 * Parse mount options.
624	 */
625	flags = altflags = 0;
626	getmnt_silent = 1;
627	mp = getmntopts(opts, mopts_nfs, &flags, &altflags);
628	if (mp == NULL) {
629		syslog(LOG_ERR, "Couldn't parse mount options \"%s\": %m",
630		    opts);
631		last_error = ENOENT;
632		goto ret;
633	}
634
635	/*
636	 * Get protocol specified in options list, if any.
637	 * XXX - process NFS_MNT_TCP and NFS_MNT_UDP?
638	 */
639	if (altflags & NFS_MNT_PROTO) {
640		const char *nfs_proto_opt;
641
642		nfs_proto_opt = getmntoptstr(mp, "proto");
643		if (nfs_proto_opt == NULL) {
644			freemntopts(mp);
645			last_error = ENOENT;
646			goto ret;
647		}
648		nfs_proto = strdup(nfs_proto_opt);
649	}
650
651	/*
652	 * Get port specified in options list, if any.
653	 */
654	if (altflags & NFS_MNT_PORT) {
655		nfs_port = getmntoptnum(mp, "port");
656		if (nfs_port < 1) {
657			syslog(LOG_ERR, "%s: invalid port number", mntpnt);
658			freemntopts(mp);
659			last_error = ENOENT;
660			goto ret;
661		}
662		if (nfs_port > USHRT_MAX) {
663			syslog(LOG_ERR, "%s: invalid port number %ld", mntpnt, nfs_port);
664			freemntopts(mp);
665			last_error = ENOENT;
666			goto ret;
667		}
668	} else
669		nfs_port = 0;	/* "unspecified" */
670
671	if (altflags & (NFS_MNT_VERS|NFS_MNT_NFSVERS)) {
672		optval = get_nfs_vers(mp, altflags);
673		if (optval == 0) {
674			/* Error. */
675			syslog(LOG_ERR, "%s: invalid NFS version number", mntpnt);
676			freemntopts(mp);
677			last_error = ENOENT;
678			goto ret;
679		}
680		nfsvers = (rpcvers_t)optval;
681	} else
682		nfsvers = 0;	/* "unspecified" */
683	if (set_versrange(nfsvers, &vers, &versmin) != 0) {
684		syslog(LOG_ERR, "Incorrect NFS version specified for %s",
685			mntpnt);
686		freemntopts(mp);
687		last_error = ENOENT;
688		goto ret;
689	}
690	freemntopts(mp);
691
692	entries = 0;
693	for (mfs = mfs_in; mfs; mfs = mfs->mfs_next) {
694		if (!mfs->mfs_ignore) {
695			entries++;
696			host = mfs->mfs_host;
697		}
698	}
699
700	if (entries == 1) {
701		/*
702		 * Make sure the server is responding before attempting a mount.
703		 * This up-front check can potentially avoid a hang if a mount
704		 * from this server is hierarchical and in the process of being
705		 * force unmounted.
706		 */
707		i = pingnfs(host, &vers, versmin, 0, NULL, nfs_proto);
708		if (i != RPC_SUCCESS) {
709			if (prevmsg < time((time_t) NULL)) {
710				prevmsg = time((time_t) NULL) + 5; // throttle these msgs
711				if (i == RPC_PROGVERSMISMATCH) {
712					syslog(LOG_ERR, "NFS server %s protocol version mismatch", host);
713				} else {
714					syslog(LOG_ERR, "NFS server %s not responding", host);
715				}
716			}
717			last_error = ENOENT;
718			goto out;
719		}
720
721	} else if (entries > 1) {
722		/*
723		 * We have more than one resource.
724		 * Walk the whole list of resources, pinging and
725		 * collecting version info, and choose one to
726		 * mount.
727		 *
728		 * If we have a version preference, this is easy; we'll
729		 * just reject anything that doesn't match.
730		 *
731		 * If not, we want to try to provide the best compromise
732		 * that considers proximity, preference for a higher version,
733		 * sorted order, and number of replicas.  We will count
734		 * the number of V2 and V3 replicas and also the number
735		 * which are "near", i.e. the localhost or on the same
736		 * subnet.
737		 *
738		 * XXX - this really belongs in mount_nfs.
739		 */
740		entries = 0;
741		for (mfs = mfs_in; mfs; mfs = mfs->mfs_next) {
742			if (mfs->mfs_ignore)
743				continue;
744
745			host = mfs->mfs_host;
746
747			if (mfs->mfs_flags & MFS_URL) {
748				char *path;
749				int pathlen;
750
751				if (nfs_port != 0 && mfs->mfs_port != 0 &&
752				    (uint_t)nfs_port != mfs->mfs_port) {
753
754					syslog(LOG_ERR, "nfsmount: port (%u) in nfs URL"
755						" not the same as port (%ld) in port "
756						"option\n", mfs->mfs_port, nfs_port);
757					last_error = EIO;
758					goto out;
759				} else if (nfs_port != 0)
760					thisport = nfs_port;
761				else
762					thisport = mfs->mfs_port;
763
764				dir = mfs->mfs_dir;
765
766				/*
767				 * Back off to a conventional mount.
768				 *
769				 * URL's can contain escape characters. Get
770				 * rid of them.
771				 */
772				pathlen = (int) strlen(dir) + 2;
773				path = malloc(pathlen);
774
775				if (path == NULL) {
776					syslog(LOG_ERR, "nfsmount: no memory");
777					last_error = EIO;
778					goto out;
779				}
780
781				strlcpy(path, dir, pathlen);
782				URLparse(path);
783				mfs->mfs_dir = path;
784				mfs->mfs_flags |= MFS_ALLOC_DIR;
785				mfs->mfs_flags &= ~MFS_URL;
786			}
787
788			i = pingnfs(host, &vers, versmin, 0, NULL, nfs_proto);
789			if (i != RPC_SUCCESS) {
790				if (i == RPC_PROGVERSMISMATCH) {
791					syslog(LOG_ERR, "server %s: NFS "
792						"protocol version mismatch",
793						host);
794				} else {
795					syslog(LOG_ERR, "server %s not "
796						"responding", host);
797				}
798				mfs->mfs_ignore = 1;
799				last_error = ENOENT;
800				continue;
801			}
802			if (nfsvers != 0 && (rpcvers_t)nfsvers != vers) {
803				if (nfs_proto == NULL)
804					syslog(LOG_ERR,
805						"NFS version %d "
806						"not supported by %s",
807						nfsvers, host);
808				else
809					syslog(LOG_ERR,
810						"NFS version %d "
811						"with proto %s "
812						"not supported by %s",
813						nfsvers, nfs_proto, host);
814				mfs->mfs_ignore = 1;
815				last_error = ENOENT;
816				continue;
817			}
818
819			entries++;
820
821			switch (vers) {
822#ifdef NFS_V4_DEFAULT
823			case NFS_VER4: v4cnt++; break;
824#endif
825			case NFS_VER3: v3cnt++; break;
826			case NFS_VER2: v2cnt++; break;
827			default: break;
828			}
829
830			/*
831			 * It's not clear how useful this stuff is if
832			 * we are using webnfs across the internet, but it
833			 * can't hurt.
834			 */
835			if (mfs->mfs_distance &&
836			    mfs->mfs_distance <= DIST_MYSUB) {
837				switch (vers) {
838#ifdef NFS_V4_DEFAULT
839				case NFS_VER4: v4near++; break;
840#endif
841				case NFS_VER3: v3near++; break;
842				case NFS_VER2: v2near++; break;
843				default: break;
844				}
845			}
846
847			/*
848			 * If the mount is not replicated, we don't want to
849			 * ping every entry, so we'll stop here.  This means
850			 * that we may have to go back to "nextentry" above
851			 * to consider another entry if there we can't get
852			 * all the way to mount(2) with this one.
853			 */
854			if (!replicated)
855				break;
856		}
857
858		if (nfsvers == 0) {
859			/*
860			 * Choose the NFS version.
861			 * We prefer higher versions, but will choose a one-
862			 * version downgrade in service if we can use a local
863			 * network interface and avoid a router.
864			 */
865#ifdef NFS_V4_DEFAULT
866			if (v4cnt && v4cnt >= v3cnt && (v4near || !v3near))
867				nfsvers = NFS_VER4;
868			else
869#endif
870			if (v3cnt && v3cnt >= v2cnt && (v3near || !v2near))
871				nfsvers = NFS_VER3;
872			else
873				nfsvers = NFS_VER2;
874			if (trace > 2)
875				trace_prt(1,
876				"  nfsmount: v4=%d[%d],v3=%d[%d],v2=%d[%d] => v%u.\n",
877				v4cnt, v4near,
878				v3cnt, v3near,
879				v2cnt, v2near, nfsvers);
880		}
881	}
882
883	/*
884	 * Find the first entry not marked as "ignore",
885	 * and mount that.
886	 */
887	for (mfs = mfs_in; mfs; mfs = mfs->mfs_next) {
888		if (!mfs->mfs_ignore)
889			break;
890	}
891
892	/*
893	 * Did we get through all possibilities without success?
894	 */
895	if (!mfs)
896		goto out;
897
898	/*
899	 * Whew; do the mount, at last.
900	 * We just call mount_generic() so it runs the NFS mount
901	 * program; that way, we don't have to know the same
902	 * stuff about mounting NFS that mount_nfs does.
903	 */
904	mrlen = (int) (strlen(mfs->mfs_host) + strlen(mfs->mfs_dir)) + 2;
905	mount_resource = malloc(mrlen);
906	if (mount_resource == NULL) {
907		last_error = errno;
908		goto out;
909	}
910	strlcpy(mount_resource, mfs->mfs_host, mrlen);
911	strlcat(mount_resource, ":", mrlen);
912	strlcat(mount_resource, mfs->mfs_dir, mrlen);
913	/*
914	 * Note we must mount as root for NFS because hierarchical mounts
915	 * will almost certainly not work.
916	 */
917	last_error = mount_generic(mount_resource, "nfs", opts, nfsvers,
918	    mntpnt, isdirect, FALSE, mntpnt_fsid, 0, asid, fsidp,
919	    retflags);
920
921	free(mount_resource);
922
923out:
924ret:
925	if (nfs_proto)
926		free(nfs_proto);
927
928	for (mfs = mfs_in; mfs; mfs = mfs->mfs_next) {
929
930		if (mfs->mfs_flags & MFS_ALLOC_DIR) {
931			free(mfs->mfs_dir);
932			mfs->mfs_dir = NULL;
933			mfs->mfs_flags &= ~MFS_ALLOC_DIR;
934		}
935
936		if (mfs->mfs_args != NULL) {
937			free(mfs->mfs_args);
938			mfs->mfs_args = NULL;
939		}
940	}
941
942	return (last_error);
943}
944
945int
946get_nfs_vers(mntoptparse_t mp, int altflags)
947{
948	const char *optstrval;
949
950	/*
951	 * "vers=" takes precedence over "nfsvers="; arguably,
952	 * we should let the last one specified in the option
953	 * string win, but getmntopts() doesn't support that.
954	 */
955	if (altflags & NFS_MNT_VERS)
956		optstrval = getmntoptstr(mp, "vers");
957	else if (altflags & NFS_MNT_NFSVERS)
958		optstrval = getmntoptstr(mp, "nfsvers");
959	else {
960		/*
961		 * We shouldn't be called if neither of them are set.
962		 */
963		return 0;		/* neither vers= nor nfsvers= specified */
964	}
965
966	if (optstrval == NULL)
967		return 0;		/* no version specified */
968	if (strcmp(optstrval, "2") == 0)
969		return NFS_VER2;	/* NFSv2 */
970	else if (strcmp(optstrval, "3") == 0)
971		return NFS_VER3;	/* NFSv3 */
972	else if (strncmp(optstrval, "4", 1) == 0)
973		return NFS_VER4;	/* "4*" means NFSv4 */
974	else
975		return 0;		/* invalid version */
976}
977
978/*
979 * This routine has the same definition as clnt_create_vers(),
980 * except it takes an additional timeout parameter - a pointer to
981 * a timeval structure.  A NULL value for the pointer indicates
982 * that the default timeout value should be used.
983 */
984static CLIENT *
985clnt_create_vers_timed(const char *hostname, const rpcprog_t prog,
986    rpcvers_t *vers_out, const rpcvers_t vers_low, const rpcvers_t vers_high,
987    const char *proto, struct timeval *tp)
988{
989	CLIENT *clnt;
990	struct timeval to;
991	enum clnt_stat rpc_stat;
992	struct rpc_err rpcerr;
993	rpcvers_t v_low, v_high;
994
995	clnt = clnt_create_timeout(hostname, prog, vers_high, proto, tp);
996	if (clnt == NULL)
997		return (NULL);
998	if (tp == NULL) {
999		to.tv_sec = 10;
1000		to.tv_usec = 0;
1001	} else
1002		to = *tp;
1003
1004	rpc_stat = clnt_call(clnt, NULLPROC, (xdrproc_t)xdr_void,
1005			NULL, (xdrproc_t)xdr_void, NULL, to);
1006	if (rpc_stat == RPC_SUCCESS) {
1007		*vers_out = vers_high;
1008		return (clnt);
1009	}
1010	v_low = vers_low;
1011	v_high = vers_high;
1012	while (rpc_stat == RPC_PROGVERSMISMATCH && v_high > v_low) {
1013		unsigned int minvers, maxvers;
1014
1015		clnt_geterr(clnt, &rpcerr);
1016		minvers = rpcerr.re_vers.low;
1017		maxvers = rpcerr.re_vers.high;
1018		if (maxvers < v_high)
1019			v_high = maxvers;
1020		else
1021			v_high--;
1022		if (minvers > v_low)
1023			v_low = minvers;
1024		if (v_low > v_high) {
1025			goto error;
1026		}
1027		clnt_destroy(clnt);
1028		clnt = clnt_create_timeout(hostname, prog, v_high, proto, tp);
1029		if (clnt == NULL)
1030			return (NULL);
1031		rpc_stat = clnt_call(clnt, NULLPROC, (xdrproc_t)xdr_void,
1032				NULL, (xdrproc_t)xdr_void,
1033				NULL, to);
1034		if (rpc_stat == RPC_SUCCESS) {
1035			*vers_out = v_high;
1036			return (clnt);
1037		}
1038	}
1039	clnt_geterr(clnt, &rpcerr);
1040
1041error:
1042	rpc_createerr.cf_stat = rpc_stat;
1043	rpc_createerr.cf_error = rpcerr;
1044	clnt_destroy(clnt);
1045	return (NULL);
1046}
1047
1048/*
1049 * Create a client handle for a well known service or a specific port on
1050 * host. This routine bypasses rpcbind and can be use to construct a client
1051 * handle to services that are not registered with rpcbind or where the remote
1052 * rpcbind is not available, e.g., the remote rpcbind port is blocked by a
1053 * firewall. We construct a client handle and then ping the service's NULL
1054 * proc to see that the service is really available. If the caller supplies
1055 * a non zero port number, the service name is ignored and the port will be
1056 * used. A non-zero port number limits the protocol family to inet or inet6.
1057 */
1058
1059static CLIENT *
1060clnt_create_service_timed(const char *host, const char *service,
1061			const rpcprog_t prog, const rpcvers_t vers,
1062			const ushort_t port, const char *proto,
1063			const struct timeval *tmout)
1064{
1065	CLIENT *clnt = NULL;
1066	struct timeval to;
1067	struct hostent *h;
1068	struct servent *se;
1069	struct protoent *p;
1070	struct sockaddr_in sin;
1071	int sock;
1072
1073	if (tmout == NULL) {
1074		to.tv_sec = 10;
1075		to.tv_usec = 0;
1076	} else
1077		to = *tmout;
1078
1079	if (host == NULL) {
1080		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
1081		rpc_createerr.cf_error.re_errno = EINVAL;
1082		return (NULL);
1083	}
1084
1085	rpc_createerr.cf_stat = RPC_SUCCESS;
1086	h = gethostbyname(host);
1087	if (h == NULL) {
1088		rpc_createerr.cf_stat = RPC_UNKNOWNHOST;
1089		return (NULL);
1090	}
1091
1092	if (h->h_addrtype != AF_INET) {
1093		/*
1094		 * Only support INET for now.
1095		 * XXX - need IPv6 as well.
1096		 */
1097		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
1098		rpc_createerr.cf_error.re_errno = EAFNOSUPPORT;
1099		return (NULL);
1100	}
1101
1102	bzero((char *)&sin, sizeof sin);
1103	sin.sin_family = h->h_addrtype;
1104	if (port == 0) {
1105		/*
1106		 * We were not given an explicit port number;
1107		 * attempt to get the port number for the
1108		 * service.
1109		 */
1110		if (service == NULL) {
1111			rpc_createerr.cf_stat = RPC_SYSTEMERROR;
1112			rpc_createerr.cf_error.re_errno = EINVAL;
1113			return (NULL);
1114		}
1115		se = getservbyname(service, proto);
1116		if (se == NULL) {
1117			rpc_createerr.cf_stat = RPC_PROGNOTREGISTERED;
1118			return (NULL);
1119		}
1120		sin.sin_port = se->s_port;
1121	} else
1122		sin.sin_port = port;
1123	bcopy(h->h_addr, (char*)&sin.sin_addr, h->h_length);
1124	p = getprotobyname(proto);
1125	if (p == NULL) {
1126		rpc_createerr.cf_stat = RPC_UNKNOWNPROTO;
1127		rpc_createerr.cf_error.re_errno = EPFNOSUPPORT;
1128		return (NULL);
1129	}
1130	sock = RPC_ANYSOCK;
1131	switch (p->p_proto) {
1132	case IPPROTO_UDP:
1133		clnt = clntudp_create(&sin, prog, vers, to, &sock);
1134		if (clnt == NULL)
1135			return (NULL);
1136		break;
1137	case IPPROTO_TCP:
1138		clnt = clnttcp_create(&sin, prog, vers, &sock, 0, 0);
1139		if (clnt == NULL)
1140			return (NULL);
1141		break;
1142	default:
1143		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
1144		rpc_createerr.cf_error.re_errno = EPFNOSUPPORT;
1145		return (NULL);
1146	}
1147
1148	/*
1149	 * Check if we can reach the server with this clnt handle
1150	 * Other clnt_create calls do a ping by contacting the
1151	 * remote rpcbind, here will just try to execute the service's
1152	 * NULL proc.
1153	 */
1154
1155	rpc_createerr.cf_stat = clnt_call(clnt, NULLPROC,
1156					(xdrproc_t)xdr_void, 0,
1157					(xdrproc_t)xdr_void, 0, to);
1158
1159	if (rpc_createerr.cf_stat != RPC_SUCCESS) {
1160		clnt_geterr(clnt, &rpc_createerr.cf_error);
1161		clnt_destroy(clnt);
1162		return (NULL);
1163	}
1164
1165	return (clnt);
1166}
1167
1168/*
1169 * Sends a null call to the remote host's (NFS program, versp). versp
1170 * may be "NULL" in which case the default maximum version is used.
1171 * Upon return, versp contains the maximum version supported iff versp!= NULL.
1172 */
1173enum clnt_stat
1174pingnfs(
1175	const char *hostpart,
1176	rpcvers_t *versp,
1177	rpcvers_t versmin,
1178	ushort_t port,			/* may be zeor */
1179	const char *path,
1180	const char *proto)
1181{
1182	CLIENT *cl = NULL;
1183	enum clnt_stat clnt_stat;
1184	rpcvers_t versmax;	/* maximum version to try against server */
1185	rpcvers_t outvers;	/* version supported by host on last call */
1186	rpcvers_t vers_to_try;	/* to try different versions against host */
1187	const char *hostname = hostpart;
1188	char *hostcopy = NULL;
1189	char *pathcopy;
1190	struct timeval tv = {10, 0};
1191
1192	if (path != NULL && strcmp(hostname, "nfs") == 0 &&
1193	    strncmp(path, "//", 2) == 0) {
1194		char *sport;
1195
1196		hostcopy = strdup(path+2);
1197
1198		if (hostcopy == NULL) {
1199			syslog(LOG_ERR, "pingnfs: memory allocation failed");
1200			return (RPC_SYSTEMERROR);
1201		}
1202
1203		pathcopy = strchr(hostcopy, '/');
1204
1205		/*
1206		 * This cannot happen. If it does, give up
1207		 * on the ping as this is obviously a corrupt
1208		 * entry.
1209		 */
1210		if (pathcopy == NULL) {
1211			free(hostcopy);
1212			return (RPC_SUCCESS);
1213		}
1214
1215		/*
1216		 * Probable end point of host string.
1217		 */
1218		*pathcopy = '\0';
1219
1220		sport = strchr(hostname, ':');
1221
1222		if (sport != NULL && sport < pathcopy) {
1223
1224			/*
1225			 * Actual end point of host string.
1226			 */
1227			*sport = '\0';
1228			port = htons((ushort_t)atoi(sport+1));
1229		}
1230		hostname = hostcopy;
1231		path = pathcopy;
1232	}
1233
1234	/* Pick up the default versions and then set them appropriately */
1235	if (versp) {
1236		versmax = *versp;
1237		/* use versmin passed in */
1238	} else {
1239		set_versrange(0, &versmax, &versmin);
1240	}
1241
1242	if (proto &&
1243	    strcasecmp(proto, "udp") == 0 &&
1244	    versmax == NFS_VER4) {
1245		/*
1246		 * No V4-over-UDP for you.
1247		 */
1248		if (versmin == NFS_VER4) {
1249			if (versp) {
1250				*versp = versmax - 1;
1251				return (RPC_SUCCESS);
1252			}
1253			return (RPC_PROGUNAVAIL);
1254		} else {
1255			versmax--;
1256		}
1257	}
1258
1259	if (versp)
1260		*versp = versmax;
1261
1262	switch (cache_check(hostname, versp, proto)) {
1263	case GOODHOST:
1264		if (hostcopy != NULL)
1265			free(hostcopy);
1266		return (RPC_SUCCESS);
1267	case DEADHOST:
1268		if (hostcopy != NULL)
1269			free(hostcopy);
1270		return (RPC_TIMEDOUT);
1271	case NXHOST:
1272		if (hostcopy != NULL)
1273			free(hostcopy);
1274		return (RPC_UNKNOWNHOST);
1275	case NOHOST:
1276	default:
1277		break;
1278	}
1279
1280	vers_to_try = versmax;
1281
1282	/*
1283	 * check the host's version within the timeout
1284	 */
1285	if (trace > 1)
1286		trace_prt(1, "	ping: %s request vers=%d min=%d\n",
1287				hostname, versmax, versmin);
1288
1289	do {
1290		outvers = vers_to_try;
1291		/*
1292		 * If NFSv4, we give the port number explicitly so that we
1293		 * avoid talking to the portmapper.
1294		 */
1295		if (vers_to_try == NFS_VER4) {
1296			if (trace > 4) {
1297				trace_prt(1, "  pingnfs: Trying ping via TCP\n");
1298			}
1299
1300			if ((cl = clnt_create_service_timed(hostname, "nfs",
1301							    NFS_PROG,
1302							    vers_to_try,
1303							    port, "tcp",
1304							    &tv))
1305			    != NULL) {
1306				outvers = vers_to_try;
1307				break;
1308			}
1309			if (trace > 4) {
1310				trace_prt(1, "  pingnfs: Can't ping via TCP"
1311					" %s: RPC error=%d\n",
1312					hostname, rpc_createerr.cf_stat);
1313			}
1314
1315		} else {
1316			if ((cl = clnt_create_vers_timed(hostname, NFS_PROG,
1317				&outvers, versmin, vers_to_try,
1318				"udp", &tv))
1319				!= NULL)
1320				break;
1321			if (trace > 4) {
1322				trace_prt(1, "  pingnfs: Can't ping via UDP"
1323					" %s: RPC error=%d\n",
1324					hostname, rpc_createerr.cf_stat);
1325			}
1326			if (rpc_createerr.cf_stat == RPC_UNKNOWNHOST ||
1327				rpc_createerr.cf_stat == RPC_TIMEDOUT)
1328				break;
1329			if (rpc_createerr.cf_stat == RPC_PROGNOTREGISTERED) {
1330				if (trace > 4) {
1331					trace_prt(1, "  pingnfs: Trying ping "
1332						"via TCP\n");
1333				}
1334				if ((cl = clnt_create_vers_timed(hostname,
1335					NFS_PROG, &outvers,
1336					versmin, vers_to_try,
1337					"tcp", &tv)) != NULL)
1338					break;
1339				if (trace > 4) {
1340					trace_prt(1, "  pingnfs: Can't ping "
1341						"via TCP %s: "
1342						"RPC error=%d\n",
1343						hostname,
1344						rpc_createerr.cf_stat);
1345				}
1346			}
1347		}
1348
1349		/*
1350		 * backoff and return lower version to retry the ping.
1351		 * XXX we should be more careful and handle
1352		 * RPC_PROGVERSMISMATCH here, because that error is handled
1353		 * in clnt_create_vers(). It's not done to stay in sync
1354		 * with the nfs mount command.
1355		 */
1356		vers_to_try--;
1357		if (vers_to_try < versmin)
1358			break;
1359		if (versp != NULL) {	/* recheck the cache */
1360			*versp = vers_to_try;
1361			if (trace > 4) {
1362				trace_prt(1,
1363				    "  pingnfs: check cache: vers=%d\n",
1364				    *versp);
1365			}
1366			switch (cache_check(hostname, versp, proto)) {
1367			case GOODHOST:
1368				if (hostcopy != NULL)
1369					free(hostcopy);
1370				return (RPC_SUCCESS);
1371			case DEADHOST:
1372				if (hostcopy != NULL)
1373					free(hostcopy);
1374				return (RPC_TIMEDOUT);
1375			case NXHOST:
1376				if (hostcopy != NULL)
1377					free(hostcopy);
1378				return (RPC_UNKNOWNHOST);
1379			case NOHOST:
1380			default:
1381				break;
1382			}
1383		}
1384		if (trace > 4) {
1385			trace_prt(1, "  pingnfs: Try version=%d\n",
1386				vers_to_try);
1387		}
1388	} while (cl == NULL);
1389
1390
1391	if (cl == NULL) {
1392		if (verbose)
1393			syslog(LOG_ERR, "pingnfs: %s%s",
1394				hostname, clnt_spcreateerror(""));
1395		clnt_stat = rpc_createerr.cf_stat;
1396	} else {
1397		clnt_destroy(cl);
1398		clnt_stat = RPC_SUCCESS;
1399	}
1400
1401	if (trace > 1)
1402		clnt_stat == RPC_SUCCESS ?
1403			trace_prt(1, "	pingnfs OK: nfs version=%d\n", outvers):
1404			trace_prt(1, "	pingnfs FAIL: can't get nfs version\n");
1405
1406	switch (clnt_stat) {
1407
1408	case RPC_SUCCESS:
1409		cache_enter(hostname, versmax, outvers, proto, GOODHOST);
1410		if (versp != NULL)
1411			*versp = outvers;
1412		break;
1413
1414	case RPC_UNKNOWNHOST:
1415		cache_enter(hostname, versmax, versmax, proto, NXHOST);
1416		break;
1417
1418	default:
1419		cache_enter(hostname, versmax, versmax, proto, DEADHOST);
1420		break;
1421	}
1422
1423	if (hostcopy != NULL)
1424		free(hostcopy);
1425
1426	return (clnt_stat);
1427}
1428
1429#ifdef HAVE_LOFS
1430#define	MNTTYPE_LOFS	"lofs"
1431
1432int
1433loopbackmount(fsname, dir, mntopts)
1434	char *fsname;		/* Directory being mounted */
1435	char *dir;		/* Directory being mounted on */
1436	char *mntopts;
1437{
1438	struct mnttab mnt;
1439	int flags = 0;
1440	char fstype[] = MNTTYPE_LOFS;
1441	int dirlen;
1442	struct stat st;
1443	char optbuf[AUTOFS_MAXOPTSLEN];
1444
1445	dirlen = strlen(dir);
1446	if (dir[dirlen-1] == ' ')
1447		dirlen--;
1448
1449	if (dirlen == strlen(fsname) &&
1450		strncmp(fsname, dir, dirlen) == 0) {
1451		syslog(LOG_ERR,
1452			"Mount of %s on %s would result in deadlock, aborted\n",
1453			fsname, dir);
1454		return (RET_ERR);
1455	}
1456	mnt.mnt_mntopts = mntopts;
1457	if (hasmntopt(&mnt, MNTOPT_RO) != NULL)
1458		flags |= MS_RDONLY;
1459
1460	(void) strlcpy(optbuf, mntopts, sizeof (optbuf));
1461
1462	if (trace > 1)
1463		trace_prt(1,
1464			"  loopbackmount: fsname=%s, dir=%s, flags=%d\n",
1465			fsname, dir, flags);
1466
1467	if (mount(fsname, dir, flags | MS_DATA | MS_OPTIONSTR, fstype,
1468	    NULL, 0, optbuf, sizeof (optbuf)) < 0) {
1469		syslog(LOG_ERR, "Mount of %s on %s: %m", fsname, dir);
1470		return (RET_ERR);
1471	}
1472
1473	if (stat(dir, &st) == 0) {
1474		if (trace > 1) {
1475			trace_prt(1,
1476			    "  loopbackmount of %s on %s dev=%x rdev=%x OK\n",
1477			    fsname, dir, st.st_dev, st.st_rdev);
1478		}
1479	} else {
1480		if (trace > 1) {
1481			trace_prt(1,
1482			    "  loopbackmount of %s on %s OK\n", fsname, dir);
1483			trace_prt(1, "	stat of %s failed\n", dir);
1484		}
1485	}
1486
1487	return (0);
1488}
1489#endif
1490
1491/*
1492 * Put a new entry in the cache chain by prepending it to the front.
1493 * If there isn't enough memory then just give up.
1494 */
1495static void
1496cache_enter(host, reqvers, outvers, proto, state)
1497	const char *host;
1498	rpcvers_t reqvers;
1499	rpcvers_t outvers;
1500	const char *proto;
1501	int state;
1502{
1503	struct cache_entry *entry;
1504	int cache_time = 30;	/* sec */
1505
1506	timenow = time(NULL);
1507
1508	entry = (struct cache_entry *)malloc(sizeof (struct cache_entry));
1509	if (entry == NULL)
1510		return;
1511	(void) memset((caddr_t)entry, 0, sizeof (struct cache_entry));
1512	entry->cache_host = strdup(host);
1513	if (entry->cache_host == NULL) {
1514		cache_free(entry);
1515		return;
1516	}
1517	entry->cache_reqvers = reqvers;
1518	entry->cache_outvers = outvers;
1519	entry->cache_proto = (proto == NULL ? NULL : strdup(proto));
1520	entry->cache_state = state;
1521	entry->cache_time = timenow + cache_time;
1522	(void) pthread_rwlock_wrlock(&cache_lock);
1523#ifdef CACHE_DEBUG
1524	host_cache_accesses++;		/* up host cache access counter */
1525#endif /* CACHE DEBUG */
1526	entry->cache_next = cache_head;
1527	cache_head = entry;
1528	(void) pthread_rwlock_unlock(&cache_lock);
1529}
1530
1531static int
1532cache_check(host, versp, proto)
1533	const char *host;
1534	rpcvers_t *versp;
1535	const char *proto;
1536{
1537	int state = NOHOST;
1538	struct cache_entry *ce, *prev;
1539
1540	timenow = time(NULL);
1541
1542	(void) pthread_rwlock_rdlock(&cache_lock);
1543
1544#ifdef CACHE_DEBUG
1545	/* Increment the lookup and access counters for the host cache */
1546	host_cache_accesses++;
1547	host_cache_lookups++;
1548	if ((host_cache_lookups%1000) == 0)
1549		trace_host_cache();
1550#endif /* CACHE DEBUG */
1551
1552	for (ce = cache_head; ce; ce = ce->cache_next) {
1553		if (timenow > ce->cache_time) {
1554			(void) pthread_rwlock_unlock(&cache_lock);
1555			(void) pthread_rwlock_wrlock(&cache_lock);
1556			for (prev = NULL, ce = cache_head; ce;
1557				prev = ce, ce = ce->cache_next) {
1558				if (timenow > ce->cache_time) {
1559					cache_free(ce);
1560					if (prev)
1561						prev->cache_next = NULL;
1562					else
1563						cache_head = NULL;
1564					break;
1565				}
1566			}
1567			(void) pthread_rwlock_unlock(&cache_lock);
1568			return (state);
1569		}
1570		if (strcmp(host, ce->cache_host) != 0)
1571			continue;
1572		if ((proto == NULL && ce->cache_proto != NULL) ||
1573		    (proto != NULL && ce->cache_proto == NULL))
1574			continue;
1575		if (proto != NULL &&
1576		    strcmp(proto, ce->cache_proto) != 0)
1577			continue;
1578
1579		if (versp == NULL ||
1580			(versp != NULL && *versp == ce->cache_reqvers) ||
1581			(versp != NULL && *versp == ce->cache_outvers)) {
1582				if (versp != NULL)
1583					*versp = ce->cache_outvers;
1584				state = ce->cache_state;
1585
1586				/* increment the host cache hit counters */
1587#ifdef CACHE_DEBUG
1588				switch (state) {
1589
1590				case GOODHOST:
1591					goodhost_cache_hits++;
1592					break;
1593
1594				case DEADHOST:
1595					deadhost_cache_hits++;
1596					break;
1597
1598				case NXHOST:
1599					nxhost_cache_hits++;
1600					break;
1601#endif /* CACHE_DEBUG */
1602				(void) pthread_rwlock_unlock(&cache_lock);
1603				return (state);
1604		}
1605	}
1606	(void) pthread_rwlock_unlock(&cache_lock);
1607	return (state);
1608}
1609
1610/*
1611 * Free a cache entry and all entries
1612 * further down the chain since they
1613 * will also be expired.
1614 */
1615static void
1616cache_free(entry)
1617	struct cache_entry *entry;
1618{
1619	struct cache_entry *ce, *next = NULL;
1620
1621	for (ce = entry; ce; ce = next) {
1622		if (ce->cache_host)
1623			free(ce->cache_host);
1624		if (ce->cache_proto)
1625			free(ce->cache_proto);
1626		next = ce->cache_next;
1627		free(ce);
1628	}
1629}
1630
1631static void
1632cache_flush(void)
1633{
1634	(void) pthread_rwlock_wrlock(&cache_lock);
1635	cache_free(cache_head);
1636	cache_head = NULL;
1637	(void) pthread_rwlock_unlock(&cache_lock);
1638}
1639
1640void
1641flush_caches(void)
1642{
1643	pthread_mutex_lock(&cleanup_lock);
1644	pthread_cond_signal(&cleanup_start_cv);
1645	(void) pthread_cond_wait(&cleanup_done_cv, &cleanup_lock);
1646	pthread_mutex_unlock(&cleanup_lock);
1647	cache_flush();
1648	flush_host_name_cache();
1649}
1650
1651#ifdef HAVE_LOFS
1652/*
1653 * Returns 1, if port option is NFS_PORT or
1654 *	nfsd is running on the port given
1655 * Returns 0, if both port is not NFS_PORT and nfsd is not
1656 *	running on the port.
1657 */
1658
1659static int
1660is_nfs_port(char *opts)
1661{
1662	mntoptparse_t mp;
1663	int flags, altflags;
1664	long nfs_port = 0;
1665#if 0
1666	struct servent sv;
1667	char buf[256];
1668#endif
1669	int got_port;
1670
1671	/*
1672	 * Parse mount options.
1673	 */
1674	flags = altflags = 0;
1675	getmnt_silent = 1;
1676	mp = getmntopts(opts, mopts_nfs, &flags, &altflags);
1677	if (mp == NULL) {
1678		syslog(LOG_ERR, "Couldn't parse mount options \"%s\": %m",
1679		    opts);
1680		return (0);
1681	}
1682
1683	/*
1684	 * Get port specified in options list, if any.
1685	 */
1686	got_port = (altflags & NFS_MNT_PORT);
1687	if (got_port) {
1688		nfs_port = getmntoptnum(mp, "port");
1689		if (nfs_port == -1) {
1690			syslog(LOG_ERR, "Invalid port number in \"%s\"",
1691			    opts);
1692			freemntopts(mp);
1693			return (0);
1694		}
1695		if (nfs_port > USHRT_MAX) {
1696			syslog(LOG_ERR, "Invalid port number %ld in \"%s\"",
1697			    nfs_port, opts);
1698			freemntopts(mp);
1699			return (0);
1700		}
1701	}
1702	freemntopts(mp);
1703
1704	/*
1705	 * if no port specified or it is same as NFS_PORT return nfs
1706	 * To use any other daemon the port number should be different
1707	 */
1708	if (!got_port || nfs_port == NFS_PORT)
1709		return (1);
1710#if 0
1711	/*
1712	 * If daemon is nfsd, return nfs
1713	 * XXX - we don't have getservbyport_r(), and it's not clear
1714	 * that this does anything useful - the only port that should
1715	 * map to "nfsd" is 2049, i.e. NFS_PORT.
1716	 */
1717	if (getservbyport_r(nfs_port, NULL, &sv, buf, 256) == &sv &&
1718		strcmp(sv.s_name, "nfsd") == 0)
1719		return (1);
1720#endif
1721
1722	/*
1723	 * daemon is not nfs
1724	 */
1725	return (0);
1726}
1727#endif
1728
1729
1730/*
1731 * Attempt to figure out which version of NFS to use in pingnfs().  If
1732 * the version number was specified (i.e., non-zero), then use it.
1733 * Otherwise, default to the compiled-in default or the default as set
1734 * by the /etc/default/nfs configuration (as read by read_default().
1735 */
1736int
1737set_versrange(rpcvers_t nfsvers, rpcvers_t *vers, rpcvers_t *versmin)
1738{
1739	switch (nfsvers) {
1740	case 0:
1741		*vers = vers_max_default;
1742		*versmin = vers_min_default;
1743		break;
1744	case NFS_VER4:
1745		*vers = NFS_VER4;
1746		*versmin = NFS_VER4;
1747		break;
1748	case NFS_VER3:
1749		*vers = NFS_VER3;
1750		*versmin = NFS_VER3;
1751		break;
1752	case NFS_VER2:
1753		*vers = NFS_VER2;
1754		*versmin = NFS_VER2;
1755		break;
1756	default:
1757		return (-1);
1758	}
1759	return (0);
1760}
1761
1762#ifdef CACHE_DEBUG
1763/*
1764 * trace_host_cache()
1765 * traces the host cache values at desired points
1766 */
1767static void
1768trace_host_cache()
1769{
1770	syslog(LOG_ERR,
1771		"host_cache: accesses=%d lookups=%d deadhits=%d goodhits=%d\n",
1772		host_cache_accesses, host_cache_lookups, deadhost_cache_hits,
1773		goodhost_cache_hits);
1774}
1775#endif /* CACHE_DEBUG */
1776