1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Portions Copyright 2007-2013 Apple Inc.
29 */
30
31#pragma ident	"@(#)auto_subr.c	1.95	05/12/19 SMI"
32
33#include <mach/task.h>
34#include <mach/task_special_ports.h>
35#include <mach/thread_act.h>
36#include <mach/vm_map.h>
37#include <vm/vm_map.h>
38#include <vm/vm_kern.h>
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/file.h>
44#include <sys/stat.h>
45#include <sys/buf.h>
46#include <sys/proc.h>
47#include <sys/conf.h>
48#include <sys/mount.h>
49#include <sys/vnode.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/dirent.h>
53#include <sys/namei.h>
54#include <sys/kauth.h>
55#include <sys/attr.h>
56#include <sys/time.h>
57#include <sys/uio.h>
58#include <sys/vm.h>
59#include <sys/errno.h>
60#include <vfs/vfs_support.h>
61
62#include <kern/assert.h>
63#include <kern/locks.h>
64#include <kern/clock.h>
65
66#include <IOKit/IOLib.h>
67
68#ifdef DEBUG
69#include <stdarg.h>
70#endif
71
72#include "autofs.h"
73#include "triggers.h"
74#include "triggers_priv.h"
75#include "autofs_kern.h"
76#include "autofs_protUser.h"
77
78#define	TYPICALPATH_MAX	64
79
80/*
81 * List of subtriggers to be planted on a mount.
82 */
83typedef struct subtrigger {
84	struct mounta mounta;	/* struct mounta from the action list entry for the subtrigger */
85	int inplace;		/* is this subtrigger currently in place? */
86	struct subtrigger *next;
87} subtrigger_t;
88
89static subtrigger_t *auto_make_subtriggers(action_list *);
90static void auto_free_subtriggers(subtrigger_t *);
91static void auto_trigger_callback(mount_t, vfs_trigger_callback_op_t,
92     void *, vfs_context_t);
93static void auto_plant_subtriggers(mount_t, subtrigger_t *, vfs_context_t);
94
95/*
96 * Parameters passed to an autofs mount thread.
97 */
98struct autofs_callargs {
99	struct trigger_callargs fnc_t;	/* common args */
100};
101
102#define fnc_vp			fnc_t.tc_vp
103#define fnc_this_fsid		fnc_t.tc_this_fsid
104#define fnc_ti			fnc_t.tc_ti
105#define fnc_origin		fnc_t.tc_origin
106
107#define fnc_uid			fnc_t.tc_uid
108#define fnc_asid		fnc_t.tc_asid
109#define fnc_mounted_fsid	fnc_t.tc_mounted_fsid
110#define fnc_retflags		fnc_t.tc_retflags
111
112static int auto_mount_request(struct autofs_callargs *, char *, char *,
113    char *, int, char *, char *, boolean_t, boolean_t, boolean_t *);
114
115/*
116 * Unless we're an automounter (in which case, the process that caused
117 * us to be asked to do something already has a reader lock on the fninfo_t,
118 * and some other process might be asking for a writer lock, which would
119 * prevent us from getting a reader lock, and thus would prevent us from
120 * finishing the mount, and thus would prevent the process that caused us
121 * to be asked to mount the file system from releasing the reader lock,
122 * so everybody's deadlocked), get a reader lock on the fninfo_t.
123 */
124void
125auto_fninfo_lock_shared(fninfo_t *fnip, int pid)
126{
127	if (!auto_is_automounter(pid))
128		lck_rw_lock_shared(fnip->fi_rwlock);
129}
130
131/*
132 * Release the shared lock, unless we're an automounter, in which case
133 * we don't have one.
134 */
135void
136auto_fninfo_unlock_shared(fninfo_t *fnip, int pid)
137{
138	if (!auto_is_automounter(pid))
139		lck_rw_unlock_shared(fnip->fi_rwlock);
140}
141
142/*
143 * Checks whether a given mount_t is autofs.
144 */
145int
146auto_is_autofs(mount_t mp)
147{
148	struct vfsstatfs *vfsstat;
149	size_t typename_len;
150	static const char autofs_typename[] = MNTTYPE_AUTOFS;
151
152	vfsstat = vfs_statfs(mp);
153	typename_len = strlen(vfsstat->f_fstypename) + 1;
154	if (typename_len != sizeof autofs_typename)
155		return (0);	/* no, the strings aren't even the same length */
156	if (bcmp(autofs_typename, vfsstat->f_fstypename, typename_len) != 0)
157		return (0);	/* same length, different contents */
158	return (1);	/* same length, same contents */
159}
160
161/*
162 * Greasy hack to handle home directory mounting.
163 *
164 * If a process is a home directory mounter, then the first unmount it does
165 * of automounted file systems cause the trigger on which the mount was done
166 * to be marked as "home directory mount in progress", and no fsctl
167 * operations will trigger a mount on the last component of a pathname.
168 *
169 * We support an fsctl that also sets "home directory mount in progress"
170 * on a vnode; only home directory mounter processes may use this, and
171 * they may only do it once.
172 *
173 * The /dev/autofs_homedirmounter device holds onto the vnode for the
174 * trigger in question; when the device is closed, the vnode's "home
175 * directory mount in progress" state is cleared.  This means that, if
176 * the home directory mounter process dies, things go back to normal
177 * for that trigger.
178 *
179 * All attempts to trigger a mount on a vnode marked as "home directory
180 * mount in progress" fail with ENOENT, so that nobody will trigger a
181 * mount while the mount is in progress; this closes a race condition
182 * that we've seen in 6932244/7482727.
183 *
184 * As the vnode is held onto, we can set its owner and it'll persist.
185 */
186
187/*
188 * Called when a trigger is rearmed.
189 * Mark it as having a home directory mount in progress on it if
190 * appropriate.
191 */
192static void
193auto_rearm(vnode_t vp, int pid)
194{
195	int error;
196	fnnode_t *fnp;
197
198	error = auto_mark_vnode_homedirmount(vp, pid, 0);
199
200	/*
201	 * If we get EINVAL, it means we're not a home directory mounter
202	 * process, in which case we want to revert the owner of the
203	 * trigger back to root, so unprivileged processes can't mount
204	 * on it.  (If we are a home directory mounter process, we
205	 * want to leave the owner as is, so that whoever triggered
206	 * the mount on the vnode can do another mount on it; they,
207	 * and root, are the only people who could have done the unmount.)
208	 *
209	 * If we get EBUSY, it means we're a home directory mounter
210	 * process that has already unmounted an automounted file
211	 * system, in which case we've made a mistake; log that.
212	 *
213	 * If we get ENOENT, it means we're a home directory mounter
214	 * process and we couldn't get a reference on the trigger;
215	 * log that.
216	 */
217	if (error == EINVAL) {
218		fnp = vntofn(vp);
219		lck_mtx_lock(fnp->fn_lock);
220		fnp->fn_uid = 0;
221		lck_mtx_unlock(fnp->fn_lock);
222	} else if (error == EBUSY)
223		IOLog("auto_rearm: process %d has already unmounted an automounted file system\n",
224		    pid);
225	else if (error == ENOENT)
226		IOLog("auto_rearm: can't get a reference on vnode %p\n", vp);
227}
228
229int
230auto_lookup_request(fninfo_t *fnip, char *name, int namelen, char *subdir,
231    vfs_context_t context, int *node_type, boolean_t *lu_verbose)
232{
233	boolean_t isdirect;
234	mach_port_t automount_port;
235	int error;
236	kern_return_t ret;
237
238	AUTOFS_DPRINT((4, "auto_lookup_request: path=%s name=%.*s\n",
239	    fnip->fi_path, namelen, name));
240
241	isdirect = fnip->fi_flags & MF_DIRECT ? TRUE : FALSE;
242
243	AUTOFS_DPRINT((4, "auto_lookup_request: using key=%.*s, subdir=%s\n",
244	    keylen, key, subdir));
245
246	error = auto_get_automountd_port(&automount_port);
247	if (error)
248		return error;
249	ret = autofs_lookup(automount_port, fnip->fi_map, fnip->fi_path,
250	    name, namelen, subdir, fnip->fi_opts, isdirect,
251	    kauth_cred_getuid(vfs_context_ucred(context)), &error, node_type,
252	    lu_verbose);
253	auto_release_port(automount_port);
254	if (ret != KERN_SUCCESS) {
255		if (ret == MACH_RCV_INTERRUPTED || ret == MACH_SEND_INTERRUPTED) {
256			error = EINTR;	// interrupted by a signal
257		} else {
258			IOLog("autofs: autofs_lookup failed, status 0x%08x\n", ret);
259			error = EIO;		/* XXX - process Mach errors */
260		}
261	}
262	AUTOFS_DPRINT((5, "auto_lookup_request: path=%s name=%.*s error=%d\n",
263	    fnip->fi_path, namelen, name, error));
264	return error;
265}
266
267static int
268get_key_and_subdirectory(struct fninfo *fnip, char *name, int namelen,
269    fnnode_t *parentp, char **keyp, int *keylenp, char **subdirp,
270    char *pathbuf, size_t pathbuflen)
271{
272	fnnode_t *rootfnp, *stopfnp = NULL;
273	struct autofs_globals *fngp;
274	char *subdir;
275
276	/*
277	 * XXX - can we just test fnip->fi_flags & MF_SUBTRIGGER here,
278	 * so that we do the "build the subdirectory" stuff if
279	 * MF_SUBTRIGGER isn't set and don't do it if it's set?
280	 */
281	if (fnip->fi_subdir[0] == '\0') {
282		rootfnp = vntofn(fnip->fi_rootvp);
283		fngp = rootfnp->fn_globals;
284
285		/*
286		 * Build the subdirectory for this fnnode.
287		 */
288		subdir = &pathbuf[pathbuflen];
289		*--subdir = '\0';	/* trailing NUL */
290
291		if (!(fnip->fi_flags & MF_DIRECT)) {
292			/*
293			 * This is an indirect map.  The key will
294			 * be the name of the directory in the path
295			 * right under the top-level directory of the
296			 * map, and the subdirectory will be the rest
297			 * of that path.
298			 *
299			 * That means we stop adding names to the
300			 * subdirectory path when we find a directory
301			 * *below* the root directory for the map,
302			 * as that directory's name is the key, not
303			 * part of the subdirectory.  That directory's
304			 * parent is the root directory for this mount.
305			 */
306			stopfnp = rootfnp;
307		}
308		for (;;) {
309			/*
310			 * Don't walk up to the Root Of All Evil^W
311			 * top-level autofs mounts.
312			 */
313			if (parentp == fngp->fng_rootfnnodep)
314				break;
315
316			/*
317			 * Don't walk up to any directory where
318			 * we should stop.
319			 */
320			if (parentp == stopfnp)
321				break;
322
323			/*
324			 * Add this component to the path.
325			 */
326			subdir -= namelen;
327			if (subdir < pathbuf)
328				return (ENAMETOOLONG);
329			memcpy(subdir, name, namelen);
330
331			subdir--;
332			if (subdir < pathbuf)
333				return (ENAMETOOLONG);
334			*subdir = '/';
335
336			name = parentp->fn_name;
337			namelen = parentp->fn_namelen;
338			if (parentp->fn_parent == parentp)
339				break;
340			parentp = parentp->fn_parent;
341		}
342	} else
343		subdir = fnip->fi_subdir;
344
345	/*
346	 * For direct maps, the key for an entry is the mount point
347	 * for the map.  For indirect maps, it's the name of the
348	 * first component in the pathname past the root of the
349	 * mount, which is what we've found.
350	 */
351	if (fnip->fi_flags & MF_DIRECT) {
352		*keyp = fnip->fi_key;
353		*keylenp = fnip->fi_keylen;
354	} else {
355		*keyp = name;
356		*keylenp = namelen;
357	}
358	*subdirp = subdir;
359	return (0);
360}
361
362int
363auto_lookup_aux(struct fninfo *fnip, fnnode_t *parentp, char *name, int namelen,
364    vfs_context_t context, int *node_type)
365{
366	int error = 0;
367	char pathbuf[MAXPATHLEN];
368	char *key;
369	int keylen;
370	char *subdir;
371	struct autofs_globals *fngp;
372	boolean_t lu_verbose;
373
374	/*
375	 * If we're the automounter or a child of the automounter,
376	 * don't wait for us to finish what we're doing, and
377	 * don't ask ourselves to do anything - just say we succeeded.
378	 */
379	if (auto_is_automounter(vfs_context_pid(context)))
380		return (0);
381
382	/*
383	 * Find the appropriate key and subdirectory to pass to
384	 * auto_lookup_request().
385	 */
386	error = get_key_and_subdirectory(fnip, name, namelen, parentp,
387	    &key, &keylen, &subdir, pathbuf, sizeof(pathbuf));
388	if (error != 0)
389		return (error);
390
391	error = auto_lookup_request(fnip, key, keylen, subdir, context,
392	    node_type, &lu_verbose);
393	if (error == 0) {
394		fngp = vntofn(fnip->fi_rootvp)->fn_globals;
395		fngp->fng_verbose = lu_verbose;
396	}
397
398	return (error);
399}
400
401int
402auto_readdir_aux(struct fninfo *fnip, fnnode_t *dirp, off_t offset,
403    u_int alloc_count, int64_t *return_offset, boolean_t *return_eof,
404    byte_buffer *return_buffer, mach_msg_type_number_t *return_bufcount)
405{
406	int error = 0;
407	char pathbuf[MAXPATHLEN];
408	char *key;
409	int keylen;
410	char *subdir;
411	mach_port_t automount_port;
412	boolean_t isdirect;
413	kern_return_t ret;
414
415	isdirect = fnip->fi_flags & MF_DIRECT ? TRUE : FALSE;
416
417	error = auto_get_automountd_port(&automount_port);
418	if (error)
419		goto done;
420
421	if (dirp == vntofn(fnip->fi_rootvp) && !isdirect) {
422		/*
423		 * This is the top-level directory of an indirect map
424		 * mount, so, to read the contents of the directory,
425		 * we should enumerate the map.
426		 *
427		 * XXX - what if this is really for a direct map
428		 * with nothing mounted atop it, but with stuff
429		 * mounted in directories underneath it, so it's
430		 * "really" an indirect map?  That requires a
431		 * subdir readdir.
432		 */
433		ret = autofs_readdir(automount_port, fnip->fi_map,
434		    offset, alloc_count, &error,
435		    return_offset, return_eof, return_buffer,
436		return_bufcount);
437	} else {
438		/*
439		 * This is a directory under a top-level directory of
440		 * an indirect map, or the top-level directory of a
441		 * direct map (which can have things underneath it),
442		 * so we should look up the map entry for the indirect
443		 * map's top-level directory or for the direct map
444		 * and extract subdirectory information from it.
445		 */
446
447		/*
448		 * Find the appropriate key and subdirectory to pass to
449		 * autofs_readsubdir().
450		 */
451		error = get_key_and_subdirectory(fnip, dirp->fn_name,
452		    dirp->fn_namelen, dirp->fn_parent, &key, &keylen, &subdir,
453		    pathbuf, sizeof(pathbuf));
454		if (error != 0) {
455			auto_release_port(automount_port);
456			goto done;
457		}
458
459		ret = autofs_readsubdir(automount_port, fnip->fi_map,
460		    key, keylen, subdir, fnip->fi_opts,
461		    (uint32_t) dirp->fn_nodeid, offset, alloc_count, &error,
462		    return_offset, return_eof, return_buffer,
463		return_bufcount);
464	}
465
466	auto_release_port(automount_port);
467	if (ret != KERN_SUCCESS) {
468		if (ret == MACH_RCV_INTERRUPTED || ret == MACH_SEND_INTERRUPTED) {
469			error = EINTR;	// interrupted by a signal
470		} else {
471			IOLog("autofs: autofs_readdir failed, status 0x%08x\n", ret);
472			error = EIO;
473		}
474	}
475
476done:
477	AUTOFS_DPRINT((5, "auto_readdir_aux: path=%s name=%.*s subdir=%s error=%d\n",
478	    fnip->fi_path, keylen, key, subdir, error));
479	return (error);
480}
481
482static int
483auto_check_homedirmount(vnode_t vp)
484{
485	fnnode_t *fnp = vntofn(vp);
486
487	return (fnp->fn_flags & MF_HOMEDIRMOUNT);
488}
489
490/*
491 * For doing mounts atop an autofs node that's a trigger.
492 */
493static void *
494autofs_trigger_get_mount_args(__unused vnode_t vp, vfs_context_t ctx, int *errp)
495{
496	struct autofs_callargs *argsp;
497
498	/*
499	 * Allocate the args structure.
500	 */
501	MALLOC(argsp, struct autofs_callargs *, sizeof (*argsp), M_AUTOFS,
502	    M_WAITOK);
503
504	/*
505	 * Get the UID for the process that triggered the mount, so
506	 * we do the mount as that user.
507	 */
508	argsp->fnc_uid = kauth_cred_getuid(vfs_context_ucred(ctx));
509
510	*errp = 0;
511	return (argsp);
512}
513
514/*
515 * Starting point for thread to handle mount requests with automountd.
516 */
517static int
518auto_do_mount(void *arg)
519{
520	struct autofs_callargs *argsp = arg;
521	vnode_t vp;
522	fnnode_t *fnp;
523	struct fninfo *fnip;
524	char *key;
525	int keylen;
526	boolean_t isdirect, issubtrigger;
527	boolean_t mr_verbose;
528	int error;
529	char pathbuf[MAXPATHLEN];
530	char *subdir;
531	struct autofs_globals *fngp;
532
533	vp = argsp->fnc_vp;
534	fnp = vntofn(vp);
535	fnip = vfstofni(vnode_mount(vp));
536
537	/*
538	 * This is in a kernel thread, so the PID is 0.
539	 */
540	auto_fninfo_lock_shared(fnip, 0);
541
542	/*
543	 * Is this in the process of being unmounted?  If so, give
544	 * up, so that we aren't holding an iocount or usecount on
545	 * the vnode, and the unmount can finish.
546	 */
547	if (fnip->fi_flags & MF_UNMOUNTING) {
548		auto_fninfo_unlock_shared(fnip, 0);
549		return (ENOENT);
550	}
551
552	/*
553	 * Find the appropriate key and subdirectory to pass to
554	 * auto_mount_request().
555	 */
556	error = get_key_and_subdirectory(fnip, fnp->fn_name,
557	    fnp->fn_namelen, fnp->fn_parent, &key, &keylen,
558	    &subdir, pathbuf, sizeof(pathbuf));
559	if (error != 0) {
560		auto_fninfo_unlock_shared(fnip, 0);
561		return (error);
562	}
563
564        /* <13595777> Keep from racing with homedirmounter */
565	lck_mtx_lock(fnp->fn_mnt_lock);
566
567	/*
568	 * Set the UID of the mount point to the UID of the process on
569	 * whose behalf we're doing the mount; the mount might have to
570	 * be done as that user if it requires authentication as that
571	 * user.
572	 */
573	lck_mtx_lock(fnp->fn_lock);
574	fnp->fn_uid = argsp->fnc_uid;
575	lck_mtx_unlock(fnp->fn_lock);
576
577	isdirect = (fnip->fi_flags & MF_DIRECT) ? TRUE : FALSE;
578	issubtrigger = (fnip->fi_flags & MF_SUBTRIGGER) ? TRUE : FALSE;
579
580	error = auto_mount_request(argsp, fnip->fi_map, fnip->fi_path,
581	    key, keylen, subdir, fnip->fi_opts, isdirect, issubtrigger,
582	    &mr_verbose);
583	if (!error) {
584		/*
585		 * Change setting of "verbose" flag; references to
586		 * non-existent names in an autofs file system with
587		 * "=v" at the beginning turn verbosity on.
588		 */
589		fngp = vntofn(fnip->fi_rootvp)->fn_globals;
590		fngp->fng_verbose = mr_verbose;
591	}
592
593	if (error != 0) {
594		/*
595		 * Revert the ownership of the mount point.
596		 */
597		lck_mtx_lock(fnp->fn_lock);
598		fnp->fn_uid = 0;
599		lck_mtx_unlock(fnp->fn_lock);
600	}
601
602        /* <13595777> Keep from racing with homedirmounter */
603	lck_mtx_unlock(fnp->fn_mnt_lock);
604
605	auto_fninfo_unlock_shared(fnip, 0);
606
607	return (error);
608}
609
610static void
611autofs_trigger_rel_mount_args(void *data)
612{
613	struct autofs_callargs *argsp = data;
614
615	FREE(argsp, M_AUTOFS);
616}
617
618/*
619 * Starting point for thread to handle submount requests with
620 * automountd.  This is *not* mounting atop an autofs node,
621 * it's mounting atop a node for some file system mounted by
622 * autofs.
623 */
624static int
625auto_do_submount(void *arg)
626{
627	struct autofs_callargs *argsp = arg;
628	subtrigger_t *subtrigger = argsp->fnc_ti->ti_private;
629	struct mounta *m = &subtrigger->mounta;
630	char *key;
631	int keylen;
632	int error;
633	boolean_t mr_verbose;
634
635	key = m->key;
636	if (key != NULL)
637		keylen = (int)strlen(key);
638	else {
639		/*
640		 * automountd handed us a null string; presumably
641		 * that means the key is irrelevant, so use a
642		 * null string.
643		 */
644		key = "";
645		keylen = 0;
646	}
647	error = auto_mount_request(argsp, m->map, m->path, key, keylen,
648	    m->subdir, m->opts, TRUE, TRUE, &mr_verbose);
649
650	return (error);
651}
652
653/*
654 * For automounting an autofs trigger for a submount; see the comment in
655 * auto_plant_subtriggers() for the reason why we sometimes add that
656 * extra level of indirection.
657 */
658static int
659auto_mount_subtrigger_request(
660	char *mntpt,
661	char *submntpt,
662	char *path,
663	char *opts,
664	char *map,
665	char *subdir,
666	char *key,
667	uint32_t flags,
668	uint32_t mntflags,
669	int32_t direct,
670	fsid_t *fsidp)
671{
672	int error;
673	mach_port_t automount_port;
674	kern_return_t ret;
675	boolean_t top_level;
676
677	error = auto_get_automountd_port(&automount_port);
678	if (error)
679		return (error);
680	ret = autofs_mount_subtrigger(automount_port, mntpt, submntpt, path,
681	    opts, map, subdir, key, flags, mntflags, direct, fsidp,
682	    &top_level, &error);
683	auto_release_port(automount_port);
684	if (ret != KERN_SUCCESS) {
685		if (ret == MACH_RCV_INTERRUPTED || ret == MACH_SEND_INTERRUPTED) {
686			error = EINTR;	// interrupted by a signal
687		} else {
688			IOLog("autofs: autofs_mount_subtrigger failed, status 0x%08x\n",
689			  ret);
690			error = EIO;		/* XXX - process Mach errors */
691		}
692	}
693	return (error);
694}
695
696/*
697 * For doing mounts atop an autofs node that's a trigger.
698 */
699static void *
700autofs_subtrigger_get_mount_args(__unused vnode_t vp,
701    __unused vfs_context_t ctx, int *errp)
702{
703	struct trigger_callargs *argsp;
704
705	/*
706	 * Allocate the args structure.
707	 */
708	MALLOC(argsp, struct trigger_callargs *, sizeof (*argsp), M_AUTOFS,
709	    M_WAITOK);
710	*errp = 0;
711	return (argsp);
712}
713
714/*
715 * Starting point for thread to handle subtrigger mount requests with
716 * automountd.
717 */
718static int
719auto_do_subtrigger_mount(void *arg)
720{
721	struct trigger_callargs *tc = arg;
722	struct mounta *m = tc->tc_ti->ti_private;
723	char *mntpnt;
724	int error;
725
726	if (m->dir[0] == '.' && m->dir[1] == '\0') {
727		/*
728		 * mounting on the trigger node
729		 */
730		mntpnt = ".";
731	} else {
732		/*
733		 * ignore "./" in front of mountpoint
734		 */
735		mntpnt = m->dir + 2;
736	}
737
738	error = auto_mount_subtrigger_request(m->path, mntpnt, m->path,
739	    m->opts, m->map, m->subdir, m->key, m->flags, m->mntflags,
740	    m->isdirect, &tc->tc_mounted_fsid);
741	tc->tc_retflags = FALSE;	/* we mounted an autofs file system; it's not volfs or NFS, hard or otherwise */
742
743	return (error);
744}
745
746static void
747autofs_subtrigger_rel_mount_args(void *data)
748{
749	struct trigger_callargs *argsp = data;
750
751	FREE(argsp, M_AUTOFS);
752}
753
754static void
755auto_subtrigger_release(void *data)
756{
757	subtrigger_t *subtrigger = data;
758
759	/*
760	 * This is only called for external triggers.  If the trigger info
761	 * structure pointing to us is being released, it means that the
762	 * vnode with the external trigger in question is being released,
763	 * and so the external trigger is going away and would need to
764	 * be replanted.
765	 */
766	subtrigger->inplace = 0;
767}
768
769static int
770getstring(char **strp, uint8_t **inbufp, mach_msg_type_number_t *bytes_leftp)
771{
772	uint32_t stringlen;
773
774	if (*bytes_leftp < sizeof (uint32_t)) {
775		IOLog("Action list too short for string length");
776		return (EIO);
777	}
778	memcpy(&stringlen, *inbufp, sizeof (uint32_t));
779	*inbufp += sizeof (uint32_t);
780	*bytes_leftp -= (mach_msg_type_number_t)sizeof (uint32_t);
781	if (stringlen == 0xFFFFFFFF) {
782		/* Null pointer */
783		*strp = NULL;
784	} else {
785		if (*bytes_leftp < stringlen) {
786			IOLog("Action list too short for string data");
787			return (EIO);
788		}
789		MALLOC(*strp, char *, stringlen + 1, M_AUTOFS, M_WAITOK);
790		if (*strp == NULL) {
791			IOLog("No space for string data in action list");
792			return (ENOMEM);
793		}
794		memcpy(*strp, *inbufp, stringlen);
795		(*strp)[stringlen] = '\0';
796		*inbufp += stringlen;
797		*bytes_leftp -= stringlen;
798	}
799	return (0);
800}
801
802static int
803getint(int *intp, uint8_t **inbufp, mach_msg_type_number_t *bytes_leftp)
804{
805	if (*bytes_leftp < sizeof (int)) {
806		IOLog("Action list too short for int");
807		return (EIO);
808	}
809	memcpy(intp, *inbufp, sizeof (int));
810	*inbufp += sizeof (int);
811	*bytes_leftp -= (mach_msg_type_number_t)sizeof (int);
812	return (0);
813}
814
815static int
816getuint32(uint32_t *uintp, uint8_t **inbufp, mach_msg_type_number_t *bytes_leftp)
817{
818	if (*bytes_leftp < sizeof (uint32_t)) {
819		IOLog("Action list too short for uint32_t");
820		return (EIO);
821	}
822	memcpy(uintp, *inbufp, sizeof (uint32_t));
823	*inbufp += sizeof (uint32_t);
824	*bytes_leftp -= (mach_msg_type_number_t)sizeof (uint32_t);
825	return (0);
826}
827
828/*
829 * Free the strings pointed to by a struct mounta.
830 */
831static void
832free_mounta_strings(struct mounta *m)
833{
834	if (m->dir != NULL)
835		FREE(m->dir, M_AUTOFS);
836	if (m->opts != NULL)
837		FREE(m->opts, M_AUTOFS);
838	if (m->path != NULL)
839		FREE(m->path, M_AUTOFS);
840	if (m->map != NULL)
841		FREE(m->map, M_AUTOFS);
842	if (m->subdir != NULL)
843		FREE(m->subdir, M_AUTOFS);
844	if (m->trig_mntpnt != NULL)
845		FREE(m->trig_mntpnt, M_AUTOFS);
846	if (m->key != NULL)
847		FREE(m->key, M_AUTOFS);
848}
849
850static void
851free_action_list(action_list *alp)
852{
853	action_list *action, *next_action;
854
855	for (action = alp; action != NULL; action = next_action) {
856		next_action = action->next;
857		free_mounta_strings(&action->mounta);
858		FREE(action, M_AUTOFS);
859	}
860}
861
862static int
863auto_mount_request(
864	struct autofs_callargs *argsp,
865	char *map,
866	char *path,
867	char *key,
868	int keylen,
869	char *subdir,
870	char *opts,
871	boolean_t isdirect,
872	boolean_t issubtrigger,
873	boolean_t *mr_verbosep)
874{
875	mach_port_t automount_port;
876	int error;
877	int mr_type;
878	kern_return_t ret;
879	byte_buffer actions_buffer;
880	mach_msg_type_number_t actions_bufcount;
881	vm_map_offset_t map_data;
882	vm_offset_t data;
883	uint8_t *inbuf;
884	mach_msg_type_number_t bytes_left;
885	action_list *alphead, *alp, *prevalp;
886	subtrigger_t *subtriggers;
887
888	AUTOFS_DPRINT((4, "auto_mount_request: path=%s key=%.*s\n",
889	    path, keylen, key));
890
891	alphead = NULL;
892	error = auto_get_automountd_port(&automount_port);
893	if (error) {
894		goto done;
895	}
896	ret = autofs_mount(automount_port, map, path, key, keylen, subdir,
897	    opts, isdirect, issubtrigger, argsp->fnc_this_fsid,
898	    argsp->fnc_uid, argsp->fnc_asid, &mr_type,
899	    &argsp->fnc_mounted_fsid, &argsp->fnc_retflags,
900	    &actions_buffer, &actions_bufcount, &error, mr_verbosep);
901	auto_release_port(automount_port);
902	if (ret == KERN_SUCCESS) {
903		switch (mr_type) {
904		case AUTOFS_ACTION:
905			error = 0;
906
907			/*
908			 * Get the action list.
909			 */
910			ret = vm_map_copyout(kernel_map, &map_data,
911			    (vm_map_copy_t)actions_buffer);
912			if (ret != KERN_SUCCESS) {
913				if (ret == MACH_RCV_INTERRUPTED || ret == MACH_SEND_INTERRUPTED) {
914					error = EINTR;	// interrupted by a signal
915				} else {
916					/* XXX - deal with Mach errors */
917					IOLog("autofs: vm_map_copyout failed, status 0x%08x\n", ret);
918					error = EIO;
919				}
920				goto done;
921			}
922			data = CAST_DOWN(vm_offset_t, map_data);
923
924			/*
925			 * Deserialize the action list.
926			 */
927			prevalp = NULL;
928			inbuf = (uint8_t *)data;
929			bytes_left = actions_bufcount;
930			while (bytes_left != 0) {
931				MALLOC(alp, action_list *, sizeof(*alp),
932				    M_AUTOFS, M_WAITOK);
933				if (prevalp == NULL)
934					alphead = alp;
935				else
936					prevalp->next = alp;
937				bzero(alp, sizeof *alp);
938				error = getstring(&alp->mounta.dir, &inbuf,
939				    &bytes_left);
940				if (error)
941					break;
942				error = getstring(&alp->mounta.opts, &inbuf,
943				    &bytes_left);
944				if (error)
945					break;
946				error = getstring(&alp->mounta.path, &inbuf,
947				    &bytes_left);
948				if (error)
949					break;
950				error = getstring(&alp->mounta.map, &inbuf,
951				    &bytes_left);
952				if (error)
953					break;
954				error = getstring(&alp->mounta.subdir, &inbuf,
955				    &bytes_left);
956				if (error)
957					break;
958				error = getstring(&alp->mounta.trig_mntpnt, &inbuf,
959				    &bytes_left);
960				if (error)
961					break;
962				error = getint(&alp->mounta.flags, &inbuf,
963				    &bytes_left);
964				if (error)
965					break;
966				error = getint(&alp->mounta.mntflags, &inbuf,
967				    &bytes_left);
968				if (error)
969					break;
970				error = getuint32(&alp->mounta.isdirect, &inbuf,
971				    &bytes_left);
972				if (error)
973					break;
974				error = getuint32(&alp->mounta.needs_subtrigger, &inbuf,
975				    &bytes_left);
976				if (error)
977					break;
978				error = getstring(&alp->mounta.key, &inbuf,
979				    &bytes_left);
980				if (error)
981					break;
982				prevalp = alp;
983			}
984			vm_deallocate(kernel_map, data, actions_bufcount);
985			if (error) {
986				free_action_list(alphead);
987				alphead = NULL;
988				goto done;
989			}
990
991			/*
992			 * If there are any submounts to be lazily done
993			 * atop what was mounted, set the callback for
994			 * planting triggers for those submounts.  That
995			 * will cause the callback to be called in order
996			 * to plant the triggers for the first time.
997			 */
998			if (alphead != NULL) {
999				/*
1000				 * XXX - sanity-check the actions?
1001				 */
1002				subtriggers = auto_make_subtriggers(alphead);
1003
1004				/*
1005				 * Set the trigger callback for this mount,
1006				 * with the subtrigger list as the data
1007				 * to pass to it.  This will cause the
1008				 * callback to be called with VTC_REPLACE,
1009				 * to plant the triggers for the first
1010				 * time; the file system cannot be
1011				 * unmounted while that is in progress,
1012				 * so we will not have two threads in
1013				 * the callback on the same file system
1014				 * at the same time.
1015				 */
1016				error = vfs_settriggercallback(&argsp->fnc_mounted_fsid,
1017				    auto_trigger_callback, subtriggers, 0,
1018				    vfs_context_current());
1019				if (error == EBUSY) {
1020					/*
1021					 * This probably means it's getting
1022					 * unmounted out from under us.
1023					 * Free the subtrigger list, and
1024					 * treat that as ENOENT from the
1025					 * mount.
1026					 *
1027					 * (We'd get ENOENT if it had already
1028					 * been unmounted by the time we
1029					 * called vfs_settriggercallback().)
1030					 */
1031					auto_free_subtriggers(subtriggers);
1032					error = ENOENT;
1033				}
1034			}
1035			break;
1036		case AUTOFS_DONE:
1037			break;
1038		default:
1039			error = ENOENT;
1040			IOLog("auto_mount_request: unknown status %d\n",
1041			    mr_type);
1042			break;
1043		}
1044	} else {
1045		if (ret == MACH_RCV_INTERRUPTED || ret == MACH_SEND_INTERRUPTED) {
1046			error = EINTR;	// interrupted by a signal
1047		} else {
1048			IOLog("autofs: autofs_mount failed, status 0x%08x\n", ret);
1049			error = EIO;		/* XXX - process Mach errors */
1050		}
1051	}
1052
1053done:
1054	AUTOFS_DPRINT((5, "auto_mount_request: path=%s key=%.*s error=%d\n",
1055	    path, keylen, key, error));
1056	return (error);
1057}
1058
1059#if 0
1060/*
1061 * XXX - Solaris passes a bunch of other crap to automountd;
1062 * see the umntrequest structure.
1063 * Is any of that crap needed?
1064 * Yes - the file system type is used, as it special-cases
1065 * NFS.  For NFS, it also needs the port number, so it can
1066 * ping the server before trying to unmount it, so it doesn't
1067 * get stuck unmounting from an unresponsive server (I'm sure
1068 * having the mountd keep track of what stuff was mounted from
1069 * clients sounded like a good idea at the time; in retrospect,
1070 * it wasn't).  It also uses the "public" option and the
1071 * mount resource.
1072 */
1073static int
1074auto_send_unmount_request(
1075	fsid_t fsid,
1076	char *mntresource,
1077	char *mntpnt,
1078	char *fstype,
1079	char *mntopts)
1080{
1081	int error;
1082	mach_port_t automount_port;
1083	int status;
1084	kern_return_t ret;
1085
1086	AUTOFS_DPRINT((4, "\tauto_send_unmount_request: fstype=%s "
1087			" mntpnt=%s\n", fstype,	mntpnt));
1088	error = auto_get_automountd_port(&automount_port);
1089	if (error)
1090		goto done;
1091	ret = autofs_unmount(automount_port, fsid.val[0], fsid.val[1],
1092	    mntresource, mntpnt, fstype, mntopts, &status);
1093	auto_release_port(automount_port);
1094	if (ret == KERN_SUCCESS)
1095		error = status;
1096	else {
1097		if (ret == MACH_RCV_INTERRUPTED || ret == MACH_SEND_INTERRUPTED) {
1098			error = EINTR;	// interrupted by a signal
1099		} else {
1100			IOLog("autofs: autofs_unmount failed, status 0x%08x\n", ret);
1101			error = EIO;		/* XXX - process Mach errors */
1102		}
1103	}
1104
1105done:
1106	AUTOFS_DPRINT((5, "\tauto_send_unmount_request: error=%d\n", error));
1107
1108	return (error);
1109}
1110#endif
1111
1112/*
1113 * Create a subtrigger list from an action list.  Also frees the
1114 * action list entries in the process (but not the strings to
1115 * which they point, as those are now pointed to by the elements
1116 * in the subtrigger list.
1117 */
1118static subtrigger_t *
1119auto_make_subtriggers(action_list *alp)
1120{
1121	action_list *p, *pnext;
1122	subtrigger_t *subtriggers = NULL, *subtrigger, *prev_subtrigger = NULL;
1123
1124	for (p = alp; p != NULL; p = pnext) {
1125		pnext = p->next;
1126		MALLOC(subtrigger, subtrigger_t *, sizeof(subtrigger_t),
1127		    M_AUTOFS, M_WAITOK);
1128		subtrigger->mounta = p->mounta;	/* copies pointers */
1129		subtrigger->inplace = 0;	/* not planted yet */
1130		subtrigger->next = NULL;	/* end of the list, so far */
1131		if (prev_subtrigger == NULL) {
1132			/* First subtrigger - set the list head */
1133			subtriggers = subtrigger;
1134		} else {
1135			/* Not the first - append to the list */
1136			prev_subtrigger->next = subtrigger;
1137		}
1138		prev_subtrigger = subtrigger;
1139		FREE(p, M_AUTOFS);
1140	}
1141	return (subtriggers);
1142}
1143
1144/*
1145 * Free a subtrigger list.
1146 */
1147static void
1148auto_free_subtriggers(subtrigger_t *subtriggers)
1149{
1150	subtrigger_t *subtrigger, *next_subtrigger;
1151
1152	for (subtrigger = subtriggers; subtrigger != NULL;
1153	    subtrigger = next_subtrigger) {
1154		next_subtrigger = subtrigger->next;
1155		free_mounta_strings(&subtrigger->mounta);
1156		FREE(subtrigger, M_AUTOFS);
1157	}
1158}
1159
1160static void
1161auto_trigger_callback(mount_t mp, vfs_trigger_callback_op_t op, void *data,
1162    vfs_context_t ctx)
1163{
1164	subtrigger_t *subtriggers = data;
1165
1166	switch (op) {
1167
1168	case VTC_RELEASE:
1169		/*
1170		 * Release the subtrigger list.
1171		 */
1172		auto_free_subtriggers(subtriggers);
1173		break;
1174
1175	case VTC_REPLACE:
1176		/*
1177		 * Plant the triggers.
1178		 */
1179		auto_plant_subtriggers(mp, subtriggers, ctx);
1180		break;
1181	}
1182}
1183
1184static void
1185auto_plant_subtriggers(mount_t mp, subtrigger_t *subtriggers, vfs_context_t ctx)
1186{
1187	vnode_t dvp;
1188	subtrigger_t *subtrigger;
1189	struct mounta *m;
1190	int error;
1191	struct vnode_trigger_param vnt;
1192	struct vnode_trigger_info vti;
1193	trigger_info_t *ti;
1194
1195	AUTOFS_DPRINT((4, "auto_plant_subtriggers: subtriggers=%p\n", (void *)subtriggers));
1196
1197	/*
1198	 * XXX - this will hold an iocount on dvp; will that block any
1199	 * unmounts that might block us from finishing?
1200	 *
1201	 * Or do we already hold an iocount on it because it's the
1202	 * trigger we're resolving?
1203	 */
1204	dvp = vfs_vnodecovered(mp);
1205
1206	for (subtrigger = subtriggers; subtrigger != NULL;
1207	    subtrigger = subtrigger->next) {
1208		/*
1209		 * Is this subtrigger already (still) in place?  If so,
1210		 * skip it.
1211		 */
1212		if (subtrigger->inplace)
1213			continue;
1214
1215		m = &subtrigger->mounta;
1216
1217		AUTOFS_DPRINT((10, "\tsubtrigger on %s/%s\n",
1218		    vfs_statfs(mp)->f_mntonname, m->trig_mntpnt));
1219
1220		/*
1221		 * Tell the VFS layer the directory at mntpnt
1222		 * should be a trigger.
1223		 */
1224		if (m->needs_subtrigger) {
1225			/*
1226			 * The trigger causes an autofs subtrigger
1227			 * mount to be done, and the root of that
1228			 * submount is a trigger for the real
1229			 * file system.
1230			 *
1231			 * We do that for file systems other than
1232			 * NFS and autofs, so that we can do the
1233			 * mount as the user, and can do the
1234			 * home directory mount dance for it.
1235			 *
1236			 * XXX - this is necessary only if
1237			 * we can't do that with just a trigger;
1238			 * we might have to be able to do that
1239			 * with just a trigger for Dfs mounts.
1240			 */
1241			ti = trigger_new_autofs(&vnt, TF_AUTOFS,
1242			    auto_is_notrigger_process,
1243			    auto_is_nowait_process,
1244			    auto_is_homedirmounter_process,
1245			    NULL,
1246			    autofs_subtrigger_get_mount_args,
1247			    auto_do_subtrigger_mount,
1248			    autofs_subtrigger_rel_mount_args,
1249			    NULL,
1250			    auto_subtrigger_release, subtrigger);
1251		} else {
1252			/*
1253			 * The trigger will directly cause the
1254			 * mount to be done.
1255			 *
1256			 * We do that for NFS and autofs, because
1257			 * NFS mounts are done by a process running
1258			 * with root privilege (mount_nfs is set-UID
1259			 * root, so that it can get a privileged
1260			 * port if necessary, and NFS mounts are
1261			 * multi-user so they shouldn't appear to
1262			 * have been done by the user who happened
1263			 * to trigger the mount in any case), and
1264			 * autofs mounts are done directly in
1265			 * automountd, which runs as root.  NFS
1266			 * home directories don't get remounted
1267			 * at login (as they're multi-user mounts),
1268			 * and the whole notion of a home directory
1269			 * on a pseudo-file-system such as autofs
1270			 * is nonsensical, so we don't need to
1271			 * worry about the home directory mount
1272			 * dance for NFS or autofs.
1273			 *
1274			 * The whole two-level trigger thing is a
1275			 * kludge, so we don't do it if we don't
1276			 * have to.
1277			 */
1278			ti = trigger_new_autofs(&vnt, 0,
1279			    auto_is_notrigger_process,
1280			    auto_is_nowait_process,
1281			    auto_is_homedirmounter_process,
1282			    NULL,
1283			    autofs_trigger_get_mount_args,
1284			    auto_do_submount,
1285			    autofs_trigger_rel_mount_args,
1286			    NULL,
1287			    auto_subtrigger_release, subtrigger);
1288		}
1289		vti.vti_resolve_func = vnt.vnt_resolve_func;
1290		vti.vti_unresolve_func = vnt.vnt_unresolve_func;
1291		vti.vti_rearm_func = vnt.vnt_rearm_func;
1292		vti.vti_reclaim_func = vnt.vnt_reclaim_func;
1293		vti.vti_data = vnt.vnt_data;
1294		vti.vti_flags = vnt.vnt_flags;
1295
1296		error = vfs_addtrigger(mp, m->trig_mntpnt, &vti, ctx);
1297		if (error != 0) {
1298			trigger_free(ti);
1299			IOLog(
1300			    "autofs: vfs_addtrigger on %s/%s failed error=%d\n",
1301			    vfs_statfs(mp)->f_mntonname, m->trig_mntpnt, error);
1302			continue;
1303		}
1304
1305		/*
1306		 * This trigger is in place now.
1307		 */
1308		subtrigger->inplace = 1;
1309	}
1310
1311	/*
1312	 * Release the iocount we got above.
1313	 */
1314	vnode_put(dvp);
1315
1316	AUTOFS_DPRINT((5, "auto_plant_subtriggers: error=%d\n", error));
1317}
1318
1319int
1320auto_makefnnode(
1321	fnnode_t **fnpp,
1322	int node_type,
1323	mount_t mp,
1324	struct componentname *cnp,
1325	const char *name,
1326	vnode_t parent,
1327	int markroot,
1328	struct autofs_globals *fngp)
1329{
1330	int namelen;
1331	fnnode_t *fnp;
1332	struct fninfo *fnip;
1333	errno_t error;
1334	struct vnode_trigger_param vnt;
1335	vnode_t vp;
1336	char *tmpname;
1337	struct timeval now;
1338	/*
1339	 * autofs uses odd inode numbers
1340	 * automountd uses even inode numbers
1341	 */
1342	static ino_t nodeid = 3;
1343#ifdef DEBUG
1344	lck_attr_t *lckattr;
1345#endif
1346
1347	fnip = vfstofni(mp);
1348
1349	if (cnp != NULL) {
1350		name = cnp->cn_nameptr;
1351		namelen = cnp->cn_namelen;
1352	} else
1353		namelen = (int)strlen(name);
1354
1355	MALLOC(fnp, fnnode_t *, sizeof(fnnode_t), M_AUTOFS, M_WAITOK);
1356	bzero(fnp, sizeof(*fnp));
1357	fnp->fn_namelen = namelen;
1358	MALLOC(tmpname, char *, fnp->fn_namelen + 1, M_AUTOFS, M_WAITOK);
1359	bcopy(name, tmpname, namelen);
1360	tmpname[namelen] = '\0';
1361	fnp->fn_name = tmpname;
1362	/*
1363	 * ".." is added in auto_enter and auto_mount.
1364	 * "." is added in auto_mkdir and auto_mount.
1365	 */
1366	/*
1367	 * Note that fn_size and fn_linkcnt are already 0 since
1368	 * we zeroed out *fnp
1369	 */
1370	fnp->fn_mode = AUTOFS_MODE;
1371	microtime(&now);
1372	fnp->fn_crtime = fnp->fn_atime = fnp->fn_mtime = fnp->fn_ctime = now;
1373	lck_mtx_lock(autofs_nodeid_lock);
1374	/* XXX - does this need to be 2 for the root vnode? */
1375	fnp->fn_nodeid = nodeid;
1376	nodeid += 2;
1377	fnp->fn_globals = fngp;
1378	fngp->fng_fnnode_count++;
1379	lck_mtx_unlock(autofs_nodeid_lock);
1380
1381	bzero(&vnt, sizeof(struct vnode_trigger_param));
1382	vnt.vnt_params.vnfs_mp = mp;
1383	vnt.vnt_params.vnfs_vtype = (node_type & NT_SYMLINK) ? VLNK : VDIR;
1384	vnt.vnt_params.vnfs_str = MNTTYPE_AUTOFS;
1385	vnt.vnt_params.vnfs_dvp = parent;
1386	vnt.vnt_params.vnfs_fsnode = fnp;
1387	vnt.vnt_params.vnfs_vops = autofs_vnodeop_p;
1388	vnt.vnt_params.vnfs_markroot = markroot;
1389	vnt.vnt_params.vnfs_marksystem = 0;
1390	vnt.vnt_params.vnfs_rdev = 0;
1391	vnt.vnt_params.vnfs_filesize = 0;
1392	vnt.vnt_params.vnfs_cnp = cnp;
1393	vnt.vnt_params.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE;
1394
1395	if (node_type & NT_TRIGGER) {
1396		/*
1397		 * Home directory mounter processes should not trigger
1398		 * mounts on fsctl, so they can do the fsctl that
1399		 * marks a trigger as having a home directory mount
1400		 * in progress.
1401		 */
1402		fnp->fn_trigger_info = trigger_new_autofs(&vnt, 0,
1403		    auto_is_notrigger_process,
1404		    auto_is_nowait_process,
1405		    auto_is_homedirmounter_process,
1406		    auto_check_homedirmount,
1407		    autofs_trigger_get_mount_args,
1408		    auto_do_mount,
1409		    autofs_trigger_rel_mount_args,
1410		    auto_rearm,
1411		    NULL, NULL);
1412		if (node_type & NT_FORCEMOUNT)
1413			fnp->fn_trigger_info->ti_flags |= TF_FORCEMOUNT;
1414		error = vnode_create(VNCREATE_TRIGGER, VNCREATE_TRIGGER_SIZE,
1415		    &vnt, &vp);
1416	} else {
1417		error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE,
1418		    &vnt.vnt_params, &vp);
1419	}
1420	if (error != 0) {
1421		AUTOFS_DPRINT((5, "auto_makefnnode failed with vnode_create error code %d\n", error));
1422		if (fnp->fn_trigger_info != NULL)
1423			trigger_free(fnp->fn_trigger_info);
1424		FREE(fnp->fn_name, M_TEMP);
1425		FREE(fnp, M_TEMP);
1426		return error;
1427	}
1428
1429	if (node_type & NT_SYMLINK) {
1430		char *tmp;
1431
1432		/*
1433		 * All autofs symlinks are links to "/".
1434		 */
1435		MALLOC(tmp, char *, 1, M_AUTOFS, M_WAITOK);
1436		bcopy("/", tmp, 1);
1437		fnp->fn_symlink = tmp;
1438		fnp->fn_symlinklen = 1;
1439	}
1440
1441	fnp->fn_vnode = vp;
1442	fnp->fn_vid = vnode_vid(vp);
1443
1444	/*
1445	 * Solaris has only one reference count on a vnode; when
1446	 * the count goes to zero, the vnode is inactivated.
1447	 *
1448	 * OS X has two reference counts, the iocount, for short-term
1449	 * holds within a system call, and the usecount, for long-term
1450	 * holds in another data structure.
1451	 *
1452	 * Releasing an iocount doesn't cause a check to be done for the
1453	 * reference counts going to zero, so that's not sufficient to
1454	 * get a vnode inactivated and recycled.  autofs expects vnodes
1455	 * to be inactivated as soon as possible; only vnodes that are
1456	 * mounted on or are otherwise being held onto (for example,
1457	 * because they are open or are a process's current or root
1458	 * directory) should stick around.  In particular, if you
1459	 * do a stat() on a directory, and the directory isn't open
1460	 * and isn't a current or root directory for any process, its
1461	 * vnode should be invalidated and recycled once the stat() is
1462	 * finished.
1463	 *
1464	 * To force this to happen, we add a usecount and then drop it.
1465	 * The vnode currently has no usecount and nobody yet has a
1466	 * pointer to it, so the usecount is zero; that means the
1467	 * usecount will go to 1 and then drop to zero.  That will
1468	 * set the VL_NEEDINACTIVE flag (as it has a non-zero iocount),
1469	 * causing the vnode to be inactivated when its iocount drops
1470	 * to zero.  Note that vnode_ref() can fail; if it does, we
1471	 * just don't do vnode_rele(), as that'll drive the usecount
1472	 * negative, and you get a "usecount -ve" crash.
1473	 *
1474	 * If we could just call vnode_setneedinactive(), we would, as
1475	 * it does all that we really want done and doesn't do any of
1476	 * the other stuff we don't care about, and it can't fail.
1477	 * However, we can't call vnode_setneedinactive(), as it's not
1478	 * exported from the kernel.
1479	 */
1480	if (vnode_ref(vp) == 0)
1481		vnode_rele(vp);
1482
1483#ifdef DEBUG
1484	/*
1485	 * Enable debugging on these locks.
1486	 */
1487	lckattr = lck_attr_alloc_init();
1488	lck_attr_setdebug(lckattr);
1489	fnp->fn_lock = lck_mtx_alloc_init(autofs_lck_grp, lckattr);
1490	fnp->fn_rwlock = lck_rw_alloc_init(autofs_lck_grp, lckattr);
1491	fnp->fn_mnt_lock = lck_mtx_alloc_init(autofs_lck_grp, lckattr);
1492	lck_attr_free(lckattr);
1493#else
1494	fnp->fn_lock = lck_mtx_alloc_init(autofs_lck_grp, NULL);
1495	fnp->fn_rwlock = lck_rw_alloc_init(autofs_lck_grp, NULL);
1496	fnp->fn_mnt_lock = lck_mtx_alloc_init(autofs_lck_grp, NULL);
1497#endif
1498	*fnpp = fnp;
1499
1500	return (0);
1501}
1502
1503
1504void
1505auto_freefnnode(fnnode_t *fnp)
1506{
1507	AUTOFS_DPRINT((4, "auto_freefnnode: fnp=%p\n", (void *)fnp));
1508
1509	assert(fnp->fn_linkcnt == 0);
1510	assert(!vnode_isinuse(vp, 1));
1511	assert(!vnode_isdir(vp) || fnp->fn_dirents == NULL);
1512	assert(fnp->fn_parent == NULL);
1513
1514	FREE(fnp->fn_name, M_AUTOFS);
1515	if (fnp->fn_symlink != NULL)
1516		FREE(fnp->fn_symlink, M_AUTOFS);
1517	lck_mtx_free(fnp->fn_lock, autofs_lck_grp);
1518	lck_rw_free(fnp->fn_rwlock, autofs_lck_grp);
1519	lck_mtx_free(fnp->fn_mnt_lock, autofs_lck_grp);
1520
1521	lck_mtx_lock(autofs_nodeid_lock);
1522	fnp->fn_globals->fng_fnnode_count--;
1523	lck_mtx_unlock(autofs_nodeid_lock);
1524	FREE(fnp, M_AUTOFS);
1525}
1526
1527/*
1528 * Remove the entry for *fnp from the list of directory entries of *dfnp.
1529 * Must be called with a write lock on *dfnp; it drops the write lock.
1530 */
1531void
1532auto_disconnect(
1533	fnnode_t *dfnp,
1534	fnnode_t *fnp)
1535{
1536	fnnode_t *tmp, **fnpp;
1537	vnode_t vp = fntovn(fnp);
1538	int isdir = vnode_isdir(vp);
1539	vnode_t dvp;
1540	struct vnode_attr vattr;
1541
1542	AUTOFS_DPRINT((4,
1543	    "auto_disconnect: dfnp=%p fnp=%p linkcnt=%d\n",
1544	    (void *)dfnp, (void *)fnp, fnp->fn_linkcnt));
1545
1546	assert(lck_rw_held_exclusive(dfnp->fn_rwlock));
1547	assert(fnp->fn_linkcnt == 1);
1548
1549	/*
1550	 * Decrement by 1 because we're removing the entry in dfnp.
1551	 */
1552	fnp->fn_linkcnt--;
1553
1554	/*
1555	 * only changed while holding parent's (dfnp) rw_lock
1556	 */
1557	fnp->fn_parent = NULL;
1558
1559	/*
1560	 * Remove the entry for this vnode from its parent directory.
1561	 */
1562	fnpp = &dfnp->fn_dirents;
1563	for (;;) {
1564		tmp = *fnpp;
1565		if (tmp == NULL) {
1566			panic(
1567			    "auto_disconnect: %p not in %p dirent list",
1568			    (void *)fnp, (void *)dfnp);
1569		}
1570		if (tmp == fnp) {
1571			*fnpp = tmp->fn_next;	/* remove it from the list */
1572			assert(!vnode_isinuse(vp, 1));
1573			if (isdir) {
1574				/*
1575				 * Vnode being disconnected was a directory,
1576				 * so it had a ".." pointer to its parent;
1577				 * that's going away, so there's one less
1578				 * link to the parent, i.e. to this directory.
1579				 */
1580				dfnp->fn_linkcnt--;
1581			}
1582
1583			/*
1584			 * One less entry in this directory.
1585			 */
1586			dfnp->fn_direntcnt--;
1587			break;
1588		}
1589		fnpp = &tmp->fn_next;
1590	}
1591
1592	/*
1593	 * If the directory from which we removed this is one on which
1594	 * a readdir will only return names corresponding to the vnodes
1595	 * we have for it, and somebody cares whether something was
1596	 * removed from it, notify them.
1597	 */
1598	dvp = fntovn(dfnp);
1599	if (vnode_ismonitored(dvp) && auto_nobrowse(dvp)) {
1600		vfs_get_notify_attributes(&vattr);
1601		auto_get_attributes(dvp, &vattr);
1602		vnode_notify(dvp, VNODE_EVENT_WRITE, &vattr);
1603	}
1604
1605	/*
1606	 * Drop the write lock on the parent.
1607	 */
1608	lck_rw_unlock_exclusive(dfnp->fn_rwlock);
1609
1610	/*
1611	 * Drop the usecount we held on the parent.
1612	 * We do this after all use of dfnp, as dropping the usecount
1613	 * could cause the parent to be reclaimed.
1614	 */
1615	vnode_rele(fntovn(dfnp));
1616
1617	AUTOFS_DPRINT((5, "auto_disconnect: done\n"));
1618}
1619
1620/*
1621 * Add an entry to a directory.
1622 */
1623int
1624auto_enter(fnnode_t *dfnp, struct componentname *cnp, fnnode_t **fnpp)
1625{
1626	struct fnnode *cfnp, **spp = NULL;
1627	vnode_t vp, cvp;
1628	uint32_t cvid;
1629	off_t offset = 0;
1630	off_t diff;
1631	errno_t error;
1632
1633	AUTOFS_DPRINT((4, "auto_enter: dfnp=%p, name=%.*s ", (void *)dfnp,
1634	    cnp->cn_namelen, cnp->cn_nameptr));
1635
1636	lck_rw_lock_exclusive(dfnp->fn_rwlock);
1637
1638	cfnp = dfnp->fn_dirents;
1639	if (cfnp == NULL) {
1640		/*
1641		 * Parent directory is empty, so this is the first
1642		 * entry.
1643		 *
1644		 * The offset for the "." entry is 0, and the offset
1645		 * for the ".." entry is 1, so the offset for this
1646		 * entry is 2.
1647		 */
1648		spp = &dfnp->fn_dirents;
1649		offset = 2;
1650	}
1651
1652	/*
1653	 * See if there's already an entry with this name.
1654	 */
1655	for (; cfnp; cfnp = cfnp->fn_next) {
1656		if (cfnp->fn_namelen == cnp->cn_namelen &&
1657		    bcmp(cfnp->fn_name, cnp->cn_nameptr, cnp->cn_namelen) == 0) {
1658			/*
1659			 * There is, and this is it.
1660			 *
1661			 * Put and recycle the vnode for the fnnode we
1662			 * were handed, drop the write lock so that we
1663			 * don't block reclaims, do a vnode_getwithvid()
1664			 * for the fnnode we found and, if that succeeded,
1665			 * return EEXIST to indicate that we found and got
1666			 * that fnnode, otherwise return the error we
1667			 * got.
1668			 *
1669			 * We fetch the vnode from the fnnode we were
1670			 * handed before dropping its iocount, because
1671			 * dropping the iocount could cause it to be
1672			 * reclaimed, thus invalidating the fnnode at
1673			 * *fnpp.
1674			 *
1675			 * We fetch the vnode and its vid from the fnnode
1676			 * we found before dropping the write lock, as,
1677			 * when we drop that lock, the vnode might be
1678			 * reclaimed, freeing the fnnode.
1679			 */
1680			vp = fntovn(*fnpp);
1681			vnode_put(vp);
1682			vnode_recycle(vp);
1683			cvp = fntovn(cfnp);
1684			cvid = cfnp->fn_vid;
1685			lck_rw_done(dfnp->fn_rwlock);
1686			error = vnode_getwithvid(cvp, cvid);
1687			if (error == 0) {
1688				*fnpp = cfnp;
1689				error = EEXIST;
1690			}
1691			return (error);
1692		}
1693
1694		if (cfnp->fn_next != NULL) {
1695			diff = (off_t)
1696			    (cfnp->fn_next->fn_offset - cfnp->fn_offset);
1697			assert(diff != 0);
1698			if (diff > 1 && offset == 0) {
1699				offset = cfnp->fn_offset + 1;
1700				spp = &cfnp->fn_next;
1701			}
1702		} else if (offset == 0) {
1703			offset = cfnp->fn_offset + 1;
1704			spp = &cfnp->fn_next;
1705		}
1706	}
1707
1708	/*
1709	 * This fnnode will be pointing to its parent; grab a usecount
1710	 * on the parent.
1711	 */
1712	error = vnode_ref(fntovn(dfnp));
1713	if (error != 0) {
1714		lck_rw_done(dfnp->fn_rwlock);
1715		return (error);
1716	}
1717
1718	/*
1719	 * I don't hold the mutex on fnpp because I created it, and
1720	 * I'm already holding the writers lock for it's parent
1721	 * directory, therefore nobody can reference it without me first
1722	 * releasing the writers lock.
1723	 */
1724	(*fnpp)->fn_offset = offset;
1725	(*fnpp)->fn_next = *spp;
1726	*spp = *fnpp;
1727	(*fnpp)->fn_parent = dfnp;
1728	(*fnpp)->fn_linkcnt++;	/* parent now holds reference to entry */
1729
1730	/*
1731	 * dfnp->fn_linkcnt and dfnp->fn_direntcnt protected by dfnp->rw_lock
1732	 */
1733	if (vnode_isdir(fntovn(*fnpp))) {
1734		/*
1735		 * The new fnnode is a directory, and has a ".." entry
1736		 * for its parent.  Count that entry.
1737		 */
1738		dfnp->fn_linkcnt++;
1739	}
1740	dfnp->fn_direntcnt++;	/* count the directory entry for the new fnnode */
1741
1742	lck_rw_done(dfnp->fn_rwlock);
1743
1744	AUTOFS_DPRINT((5, "*fnpp=%p\n", (void *)*fnpp));
1745	return (0);
1746}
1747
1748fnnode_t *
1749auto_search(fnnode_t *dfnp, char *name, int namelen)
1750{
1751	vnode_t dvp;
1752	fnnode_t *p;
1753
1754	AUTOFS_DPRINT((4, "auto_search: dfnp=%p, name=%.*s...\n",
1755	    (void *)dfnp, namelen, name));
1756
1757	dvp = fntovn(dfnp);
1758	if (!vnode_isdir(dvp)) {
1759		panic("auto_search: dvp=%p not a directory", dvp);
1760	}
1761
1762	assert(lck_rw_held(dfnp->fn_rwlock));
1763	for (p = dfnp->fn_dirents; p != NULL; p = p->fn_next) {
1764		if (p->fn_namelen == namelen &&
1765		    bcmp(p->fn_name, name, namelen) == 0) {
1766			AUTOFS_DPRINT((5, "auto_search: success\n"));
1767			return (p);
1768		}
1769	}
1770
1771	AUTOFS_DPRINT((5, "auto_search: failure\n"));
1772	return (NULL);
1773}
1774
1775#ifdef DEBUG
1776static int autofs_debug = 0;
1777
1778/*
1779 * Utilities used by both client and server
1780 * Standard levels:
1781 * 0) no debugging
1782 * 1) hard failures
1783 * 2) soft failures
1784 * 3) current test software
1785 * 4) main procedure entry points
1786 * 5) main procedure exit points
1787 * 6) utility procedure entry points
1788 * 7) utility procedure exit points
1789 * 8) obscure procedure entry points
1790 * 9) obscure procedure exit points
1791 * 10) random stuff
1792 * 11) all <= 1
1793 * 12) all <= 2
1794 * 13) all <= 3
1795 * ...
1796 */
1797/* PRINTFLIKE2 */
1798void
1799auto_dprint(int level, const char *fmt, ...)
1800{
1801	va_list args;
1802
1803	if (autofs_debug == level ||
1804	    (autofs_debug > 10 && (autofs_debug - 10) >= level)) {
1805		va_start(args, fmt);
1806		IOLogv(fmt, args);
1807		va_end(args);
1808	}
1809}
1810
1811void
1812auto_debug_set(int level)
1813{
1814	autofs_debug = level;
1815}
1816#endif /* DEBUG */
1817